Commit e48a456d authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

optimized lapack' SVD for noticeably better performance on small matrices

parent fea66d93
......@@ -37,11 +37,28 @@ static __inline double r_sign(real *a, real *b)
return *b >= 0 ? x : -x;
}
extern const unsigned char lapack_toupper_tab[];
#define lapack_toupper(c) ((char)lapack_toupper_tab[(unsigned char)(c)])
extern const unsigned char lapack_lamch_tab[];
extern const doublereal lapack_dlamch_tab[];
extern const doublereal lapack_slamch_tab[];
static __inline logical lsame_(char *ca, char *cb)
{
return toupper(ca[0]) == toupper(cb[0]);
return lapack_toupper(ca[0]) == lapack_toupper(cb[0]);
}
static __inline doublereal dlamch_(char* cmach)
{
return lapack_dlamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
}
static __inline doublereal slamch_(char* cmach)
{
return lapack_slamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
}
static __inline integer i_nint(real *x)
{
return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x));
......
......@@ -3680,8 +3680,6 @@ doublereal dsecnd_();
doublereal second_();
doublereal slamch_(char *cmach);
/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical
*ieee1);
......@@ -3696,8 +3694,6 @@ doublereal slamc3_(real *a, real *b);
logical *ieee, integer *emax, real *rmax);
doublereal dlamch_(char *cmach);
/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical
*ieee1);
......@@ -3712,9 +3708,6 @@ doublereal dlamc3_(doublereal *a, doublereal *b);
/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin,
logical *ieee, integer *emax, doublereal *rmax);
integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
integer *n2, integer *n3, integer *n4);
#ifdef __cplusplus
}
#endif
......
......@@ -7,6 +7,7 @@
#ifndef F2C_INCLUDE
#define F2C_INCLUDE
#include <assert.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
......@@ -17,6 +18,10 @@
#include <string.h>
#include <stdio.h>
#if __SSE2__ || defined _M_X64
#include "emmintrin.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
......
/* dgemv.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
doublereal *beta, doublereal *y, integer *incy)
/* Subroutine */ int dgemv_(char *_trans, integer *_m, integer *_n, doublereal *
_alpha, doublereal *a, integer *_lda, doublereal *x, integer *_incx,
doublereal *_beta, doublereal *y, integer *_incy)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
/* Local variables */
integer i__, j, ix, iy, jx, jy, kx, ky, info;
doublereal temp;
integer lenx, leny;
extern logical lsame_(char *, char *);
extern /* Subroutine */ int xerbla_(char *, integer *);
/* .. Scalar Arguments .. */
/* .. */
/* .. Array Arguments .. */
......@@ -136,175 +114,123 @@
/* Test the input parameters. */
/* Parameter adjustments */
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--x;
--y;
/* Function Body */
info = 0;
if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
) {
info = 1;
} else if (*m < 0) {
info = 2;
} else if (*n < 0) {
info = 3;
} else if (*lda < max(1,*m)) {
info = 6;
} else if (*incx == 0) {
info = 8;
} else if (*incy == 0) {
info = 11;
}
if (info != 0) {
xerbla_("DGEMV ", &info);
return 0;
}
/* Quick return if possible. */
if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
return 0;
}
/* Set LENX and LENY, the lengths of the vectors x and y, and set */
/* up the start points in X and Y. */
if (lsame_(trans, "N")) {
lenx = *n;
leny = *m;
} else {
lenx = *m;
leny = *n;
}
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (lenx - 1) * *incx;
char trans = lapack_toupper(_trans[0]);
integer i, j, m = *_m, n = *_n, lda = *_lda, incx = *_incx, incy = *_incy;
integer leny = trans == 'N' ? m : n, lenx = trans == 'N' ? n : m;
real alpha = *_alpha, beta = *_beta;
integer info = 0;
if (trans != 'N' && trans != 'T' && trans != 'C')
info = 1;
else if (m < 0)
info = 2;
else if (n < 0)
info = 3;
else if (lda < max(1,m))
info = 6;
else if (incx == 0)
info = 8;
else if (incy == 0)
info = 11;
if (info != 0)
{
xerbla_("SGEMV ", &info);
return 0;
}
if (*incy > 0) {
ky = 1;
} else {
ky = 1 - (leny - 1) * *incy;
if( incy < 0 )
y -= incy*(leny - 1);
if( incx < 0 )
x -= incx*(lenx - 1);
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
if( beta != 1. )
{
if( incy == 1 )
{
if( beta == 0. )
for( i = 0; i < leny; i++ )
y[i] = 0.;
else
for( i = 0; i < leny; i++ )
y[i] *= beta;
}
else
{
if( beta == 0. )
for( i = 0; i < leny; i++ )
y[i*incy] = 0.;
else
for( i = 0; i < leny; i++ )
y[i*incy] *= beta;
}
}
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
/* First form y := beta*y. */
if (*beta != 1.) {
if (*incy == 1) {
if (*beta == 0.) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = 0.;
/* L10: */
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = *beta * y[i__];
/* L20: */
}
}
} else {
iy = ky;
if (*beta == 0.) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = 0.;
iy += *incy;
/* L30: */
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = *beta * y[iy];
iy += *incy;
/* L40: */
}
}
}
if( alpha == 0. )
;
else if( trans == 'N' )
{
if( incy == 1 )
{
for( i = 0; i < n; i++, a += lda )
{
doublereal s = x[i*incx];
if( s == 0. )
continue;
s *= alpha;
for( j = 0; j <= m - 2; j += 2 )
{
doublereal t0 = y[j] + s*a[j];
doublereal t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1;
}
for( ; j < m; j++ )
y[j] += s*a[j];
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
doublereal s = x[i*incx];
if( s == 0. )
continue;
s *= alpha;
for( j = 0; j < m; j++ )
y[j*incy] += s*a[j];
}
}
}
if (*alpha == 0.) {
return 0;
else
{
if( incx == 1 )
{
for( i = 0; i < n; i++, a += lda )
{
doublereal s = 0;
for( j = 0; j <= m - 2; j += 2 )
s += x[j]*a[j] + x[j+1]*a[j+1];
for( ; j < m; j++ )
s += x[j]*a[j];
y[i*incy] += alpha*s;
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
doublereal s = 0;
for( j = 0; j < m; j++ )
s += x[j*incx]*a[j];
y[i*incy] += alpha*s;
}
}
}
if (lsame_(trans, "N")) {
/* Form y := alpha*A*x + y. */
jx = kx;
if (*incy == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (x[jx] != 0.) {
temp = *alpha * x[jx];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[i__] += temp * a[i__ + j * a_dim1];
/* L50: */
}
}
jx += *incx;
/* L60: */
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (x[jx] != 0.) {
temp = *alpha * x[jx];
iy = ky;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[iy] += temp * a[i__ + j * a_dim1];
iy += *incy;
/* L70: */
}
}
jx += *incx;
/* L80: */
}
}
} else {
/* Form y := alpha*A'*x + y. */
jy = ky;
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[i__];
/* L90: */
}
y[jy] += *alpha * temp;
jy += *incy;
/* L100: */
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.;
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[ix];
ix += *incx;
/* L110: */
}
y[jy] += *alpha * temp;
jy += *incy;
/* L120: */
}
}
}
return 0;
/* End of DGEMV . */
......
/* dger.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha,
doublereal *x, integer *incx, doublereal *y, integer *incy,
doublereal *a, integer *lda)
/* Subroutine */ int dger_(integer *_m, integer *_n, doublereal *_alpha,
doublereal *x, integer *_incx, doublereal *y, integer *_incy,
doublereal *a, integer *_lda)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
/* Local variables */
integer i__, j, ix, jy, kx, info;
doublereal temp;
extern /* Subroutine */ int xerbla_(char *, integer *);
/* .. Scalar Arguments .. */
/* .. */
......@@ -111,80 +92,70 @@
/* Test the input parameters. */
/* Parameter adjustments */
--x;
--y;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
/* Function Body */
info = 0;
if (*m < 0) {
info = 1;
} else if (*n < 0) {
info = 2;
} else if (*incx == 0) {
info = 5;
} else if (*incy == 0) {
info = 7;
} else if (*lda < max(1,*m)) {
info = 9;
}
if (info != 0) {
xerbla_("DGER ", &info);
return 0;
integer i, j, m = *_m, n = *_n, incx = *_incx, incy = *_incy, lda = *_lda;
doublereal alpha = *_alpha;
integer info = 0;
if (m < 0)
info = 1;
else if (n < 0)
info = 2;
else if (incx == 0)
info = 5;
else if (incy == 0)
info = 7;
else if (lda < max(1,m))
info = 9;
if (info != 0)
{
xerbla_("DGER ", &info);
return 0;
}
/* Quick return if possible. */
if (*m == 0 || *n == 0 || *alpha == 0.) {
return 0;
}
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
if (*incy > 0) {
jy = 1;
} else {
jy = 1 - (*n - 1) * *incy;
if (incx < 0)
x -= (m-1)*incx;
if (incy < 0)
y -= (n-1)*incy;
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
if( alpha == 0 )
;
else if( incx == 1 )
{
for( j = 0; j < n; j++, a += lda )
{
doublereal s = y[j*incy];
if( s == 0 )
continue;
s *= alpha;
for( i = 0; i <= m - 2; i += 2 )
{
doublereal t0 = a[i] + x[i]*s;
doublereal t1 = a[i+1] + x[i+1]*s;
a[i] = t0; a[i+1] = t1;
}
for( ; i < m; i++ )
a[i] += x[i]*s;
}
}
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.) {
temp = *alpha * y[jy];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[i__] * temp;
/* L10: */
}
}
jy += *incy;
/* L20: */
}
} else {
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (*m - 1) * *incx;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.) {
temp = *alpha * y[jy];
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[ix] * temp;
ix += *incx;
/* L30: */
}
}
jy += *incy;
/* L40: */
}
else
{
for( j = 0; j < n; j++, a += lda )
{
doublereal s = y[j*incy];
if( s == 0 )
continue;
s *= alpha;
for( i = 0; i < m; i++ )
a[i] += x[i*incx]*s;
}
}
return 0;
......
#include "clapack.h"
#include <float.h>
#include <stdio.h>
/* *********************************************************************** */
doublereal dlamc3_(doublereal *a, doublereal *b)
{
/* System generated locals */
doublereal ret_val;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMC3 is intended to force A and B to be stored prior to doing */
/* the addition of A and B , for use in situations where optimizers */
/* might hold one of these in a register. */
/* Arguments */
/* ========= */
/* A (input) DOUBLE PRECISION */
/* B (input) DOUBLE PRECISION */
/* The values A and B. */
/* ===================================================================== */
/* .. Executable Statements .. */
ret_val = *a + *b;
return ret_val;
/* End of DLAMC3 */
} /* dlamc3_ */
#if 1
/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
#ifndef DBL_DIGITS
#define DBL_DIGITS 53
#endif
doublereal
dlamch_(char *cmach) {
char ch = cmach[0];
double eps = DBL_EPSILON, sfmin, small;
if ('B' == ch || 'b' == ch) {
return FLT_RADIX;
} else if ('E' == ch || 'e' == ch) {
return eps;
} else if ('L' == ch || 'l' == ch) {
return DBL_MAX_EXP;
} else if ('M' == ch || 'm' == ch) {
return DBL_MIN_EXP;
} else if ('N' == ch || 'n' == ch) {
return DBL_DIGITS;
} else if ('O' == ch || 'o' == ch) {
return DBL_MAX;
} else if ('P' == ch || 'p' == ch) {
return eps * FLT_RADIX;
} else if ('R' == ch || 'r' == ch) {
return FLT_ROUNDS < 2;
} else if ('S' == ch || 's' == ch) {
/* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow
when computing 1/sfmin. */
sfmin = DBL_MIN;
small = 2. / DBL_MAX;
if (small <= sfmin) small = sfmin * (1 + eps);
return small;
} else if ('U' == ch || 'u' == ch) {
return DBL_MIN;
}
return 0;
}
#else
/* Table of constant values */
static integer c__1 = 1;
static doublereal c_b32 = 0.;
doublereal dlamch_(char *cmach)
{
/* Initialized data */
static logical first = TRUE_;
/* System generated locals */
integer i__1;
doublereal ret_val;
/* Builtin functions */
double pow_di(doublereal *, integer *);
/* Local variables */
static doublereal t;
integer it;
static doublereal rnd, eps, base;
integer beta;
static doublereal emin, prec, emax;
integer imin, imax;
logical lrnd;
static doublereal rmin, rmax;
doublereal rmach;
extern logical lsame_(char *, char *);
doublereal small;
static doublereal sfmin;
extern /* Subroutine */ int dlamc2_(integer *, integer *, logical *,
doublereal *, integer *, doublereal *, integer *, doublereal *);
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMCH determines double precision machine parameters. */
/* Arguments */
/* ========= */
/* CMACH (input) CHARACTER*1 */
/* Specifies the value to be returned by DLAMCH: */
/* = 'E' or 'e', DLAMCH := eps */
/* = 'S' or 's , DLAMCH := sfmin */
/* = 'B' or 'b', DLAMCH := base */
/* = 'P' or 'p', DLAMCH := eps*base */
/* = 'N' or 'n', DLAMCH := t */
/* = 'R' or 'r', DLAMCH := rnd */
/* = 'M' or 'm', DLAMCH := emin */
/* = 'U' or 'u', DLAMCH := rmin */
/* = 'L' or 'l', DLAMCH := emax */
/* = 'O' or 'o', DLAMCH := rmax */
/* where */
/* eps = relative machine precision */
/* sfmin = safe minimum, such that 1/sfmin does not overflow */
/* base = base of the machine */
/* prec = eps*base */
/* t = number of (base) digits in the mantissa */
/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */
/* emin = minimum exponent before (gradual) underflow */
/* rmin = underflow threshold - base**(emin-1) */
/* emax = largest exponent before overflow */
/* rmax = overflow threshold - (base**emax)*(1-eps) */
/* ===================================================================== */
/* .. Parameters .. */
/* .. */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. External Subroutines .. */
/* .. */
/* .. Save statement .. */
/* .. */
/* .. Data statements .. */
/* .. */
/* .. Executable Statements .. */
if (first) {
dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
base = (doublereal) beta;
t = (doublereal) it;
if (lrnd) {
rnd = 1.;
i__1 = 1 - it;
eps = pow_di(&base, &i__1) / 2;
} else {
rnd = 0.;
i__1 = 1 - it;
eps = pow_di(&base, &i__1);
}
prec = eps * base;
emin = (doublereal) imin;
emax = (doublereal) imax;
sfmin = rmin;
small = 1. / rmax;
if (small >= sfmin) {
/* Use SMALL plus a bit, to avoid the possibility of rounding */
/* causing overflow when computing 1/sfmin. */
sfmin = small * (eps + 1.);
}
}
if (lsame_(cmach, "E")) {
rmach = eps;
} else if (lsame_(cmach, "S")) {
rmach = sfmin;
} else if (lsame_(cmach, "B")) {
rmach = base;
} else if (lsame_(cmach, "P")) {
rmach = prec;
} else if (lsame_(cmach, "N")) {
rmach = t;
} else if (lsame_(cmach, "R")) {
rmach = rnd;
} else if (lsame_(cmach, "M")) {
rmach = emin;
} else if (lsame_(cmach, "U")) {
rmach = rmin;
} else if (lsame_(cmach, "L")) {
rmach = emax;
} else if (lsame_(cmach, "O")) {
rmach = rmax;
}
ret_val = rmach;
first = FALSE_;
return ret_val;
/* End of DLAMCH */
} /* dlamch_ */
/* *********************************************************************** */
/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical
*ieee1)
{
/* Initialized data */
static logical first = TRUE_;
/* System generated locals */
doublereal d__1, d__2;
/* Local variables */
doublereal a, b, c__, f, t1, t2;
static integer lt;
doublereal one, qtr;
static logical lrnd;
static integer lbeta;
doublereal savec;
extern doublereal dlamc3_(doublereal *, doublereal *);
static logical lieee1;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMC1 determines the machine parameters given by BETA, T, RND, and */
/* IEEE1. */
/* Arguments */
/* ========= */
/* BETA (output) INTEGER */
/* The base of the machine. */
/* T (output) INTEGER */
/* The number of ( BETA ) digits in the mantissa. */
/* RND (output) LOGICAL */
/* Specifies whether proper rounding ( RND = .TRUE. ) or */
/* chopping ( RND = .FALSE. ) occurs in addition. This may not */
/* be a reliable guide to the way in which the machine performs */
/* its arithmetic. */
/* IEEE1 (output) LOGICAL */
/* Specifies whether rounding appears to be done in the IEEE */
/* 'round to nearest' style. */
/* Further Details */
/* =============== */
/* The routine is based on the routine ENVRON by Malcolm and */
/* incorporates suggestions by Gentleman and Marovich. See */
/* Malcolm M. A. (1972) Algorithms to reveal properties of */
/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */
/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */
/* that reveal properties of floating point arithmetic units. */
/* Comms. of the ACM, 17, 276-277. */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Save statement .. */
/* .. */
/* .. Data statements .. */
/* .. */
/* .. Executable Statements .. */
if (first) {
one = 1.;
/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */
/* IEEE1, T and RND. */
/* Throughout this routine we use the function DLAMC3 to ensure */
/* that relevant values are stored and not held in registers, or */
/* are not affected by optimizers. */
/* Compute a = 2.0**m with the smallest positive integer m such */
/* that */
/* fl( a + 1.0 ) = a. */
a = 1.;
c__ = 1.;
/* + WHILE( C.EQ.ONE )LOOP */
L10:
if (c__ == one) {
a *= 2;
c__ = dlamc3_(&a, &one);
d__1 = -a;
c__ = dlamc3_(&c__, &d__1);
goto L10;
}
/* + END WHILE */
/* Now compute b = 2.0**m with the smallest positive integer m */
/* such that */
/* fl( a + b ) .gt. a. */
b = 1.;
c__ = dlamc3_(&a, &b);
/* + WHILE( C.EQ.A )LOOP */
L20:
if (c__ == a) {
b *= 2;
c__ = dlamc3_(&a, &b);
goto L20;
}
/* + END WHILE */
/* Now compute the base. a and c are neighbouring floating point */
/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */
/* their difference is beta. Adding 0.25 to c is to ensure that it */
/* is truncated to beta and not ( beta - 1 ). */
qtr = one / 4;
savec = c__;
d__1 = -a;
c__ = dlamc3_(&c__, &d__1);
lbeta = (integer) (c__ + qtr);
/* Now determine whether rounding or chopping occurs, by adding a */
/* bit less than beta/2 and a bit more than beta/2 to a. */
b = (doublereal) lbeta;
d__1 = b / 2;
d__2 = -b / 100;
f = dlamc3_(&d__1, &d__2);
c__ = dlamc3_(&f, &a);
if (c__ == a) {
lrnd = TRUE_;
} else {
lrnd = FALSE_;
}
d__1 = b / 2;
d__2 = b / 100;
f = dlamc3_(&d__1, &d__2);
c__ = dlamc3_(&f, &a);
if (lrnd && c__ == a) {
lrnd = FALSE_;
}
/* Try and decide whether rounding is done in the IEEE 'round to */
/* nearest' style. B/2 is half a unit in the last place of the two */
/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */
/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */
/* A, but adding B/2 to SAVEC should change SAVEC. */
d__1 = b / 2;
t1 = dlamc3_(&d__1, &a);
d__1 = b / 2;
t2 = dlamc3_(&d__1, &savec);
lieee1 = t1 == a && t2 > savec && lrnd;
/* Now find the mantissa, t. It should be the integer part of */
/* log to the base beta of a, however it is safer to determine t */
/* by powering. So we find t as the smallest positive integer for */
/* which */
/* fl( beta**t + 1.0 ) = 1.0. */
lt = 0;
a = 1.;
c__ = 1.;
/* + WHILE( C.EQ.ONE )LOOP */
L30:
if (c__ == one) {
++lt;
a *= lbeta;
c__ = dlamc3_(&a, &one);
d__1 = -a;
c__ = dlamc3_(&c__, &d__1);
goto L30;
}
/* + END WHILE */
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*ieee1 = lieee1;
first = FALSE_;
return 0;
/* End of DLAMC1 */
} /* dlamc1_ */
/* *********************************************************************** */
/* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd,
doublereal *eps, integer *emin, doublereal *rmin, integer *emax,
doublereal *rmax)
{
/* Initialized data */
static logical first = TRUE_;
static logical iwarn = FALSE_;
/* Format strings */
static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
"ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va"
"lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
" the IF block as marked within the code of routine\002,\002 DLAM"
"C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
/* System generated locals */
integer i__1;
doublereal d__1, d__2, d__3, d__4, d__5;
/* Builtin functions */
double pow_di(doublereal *, integer *);
//integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
/* Local variables */
doublereal a, b, c__;
integer i__;
static integer lt;
doublereal one, two;
logical ieee;
doublereal half;
logical lrnd;
static doublereal leps;
doublereal zero;
static integer lbeta;
doublereal rbase;
static integer lemin, lemax;
integer gnmin;
doublereal small;
integer gpmin;
doublereal third;
static doublereal lrmin, lrmax;
doublereal sixth;
extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *,
logical *);
extern doublereal dlamc3_(doublereal *, doublereal *);
logical lieee1;
extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *),
dlamc5_(integer *, integer *, integer *, logical *, integer *,
doublereal *);
integer ngnmin, ngpmin;
/* Fortran I/O blocks */
static cilist io___58 = { 0, 6, 0, fmt_9999, 0 };
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMC2 determines the machine parameters specified in its argument */
/* list. */
/* Arguments */
/* ========= */
/* BETA (output) INTEGER */
/* The base of the machine. */
/* T (output) INTEGER */
/* The number of ( BETA ) digits in the mantissa. */
/* RND (output) LOGICAL */
/* Specifies whether proper rounding ( RND = .TRUE. ) or */
/* chopping ( RND = .FALSE. ) occurs in addition. This may not */
/* be a reliable guide to the way in which the machine performs */
/* its arithmetic. */
/* EPS (output) DOUBLE PRECISION */
/* The smallest positive number such that */
/* fl( 1.0 - EPS ) .LT. 1.0, */
/* where fl denotes the computed value. */
/* EMIN (output) INTEGER */
/* The minimum exponent before (gradual) underflow occurs. */
/* RMIN (output) DOUBLE PRECISION */
/* The smallest normalized number for the machine, given by */
/* BASE**( EMIN - 1 ), where BASE is the floating point value */
/* of BETA. */
/* EMAX (output) INTEGER */
/* The maximum exponent before overflow occurs. */
/* RMAX (output) DOUBLE PRECISION */
/* The largest positive number for the machine, given by */
/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */
/* value of BETA. */
/* Further Details */
/* =============== */
/* The computation of EPS is based on a routine PARANOIA by */
/* W. Kahan of the University of California at Berkeley. */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. External Subroutines .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. Save statement .. */
/* .. */
/* .. Data statements .. */
/* .. */
/* .. Executable Statements .. */
if (first) {
zero = 0.;
one = 1.;
two = 2.;
/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */
/* BETA, T, RND, EPS, EMIN and RMIN. */
/* Throughout this routine we use the function DLAMC3 to ensure */
/* that relevant values are stored and not held in registers, or */
/* are not affected by optimizers. */
/* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */
dlamc1_(&lbeta, &lt, &lrnd, &lieee1);
/* Start to find EPS. */
b = (doublereal) lbeta;
i__1 = -lt;
a = pow_di(&b, &i__1);
leps = a;
/* Try some tricks to see whether or not this is the correct EPS. */
b = two / 3;
half = one / 2;
d__1 = -half;
sixth = dlamc3_(&b, &d__1);
third = dlamc3_(&sixth, &sixth);
d__1 = -half;
b = dlamc3_(&third, &d__1);
b = dlamc3_(&b, &sixth);
b = abs(b);
if (b < leps) {
b = leps;
}
leps = 1.;
/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
L10:
if (leps > b && b > zero) {
leps = b;
d__1 = half * leps;
/* Computing 5th power */
d__3 = two, d__4 = d__3, d__3 *= d__3;
/* Computing 2nd power */
d__5 = leps;
d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5);
c__ = dlamc3_(&d__1, &d__2);
d__1 = -c__;
c__ = dlamc3_(&half, &d__1);
b = dlamc3_(&half, &c__);
d__1 = -b;
c__ = dlamc3_(&half, &d__1);
b = dlamc3_(&half, &c__);
goto L10;
}
/* + END WHILE */
if (a < leps) {
leps = a;
}
/* Computation of EPS complete. */
/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */
/* Keep dividing A by BETA until (gradual) underflow occurs. This */
/* is detected when we cannot recover the previous A. */
rbase = one / lbeta;
small = one;
for (i__ = 1; i__ <= 3; ++i__) {
d__1 = small * rbase;
small = dlamc3_(&d__1, &zero);
/* L20: */
}
a = dlamc3_(&one, &small);
dlamc4_(&ngpmin, &one, &lbeta);
d__1 = -one;
dlamc4_(&ngnmin, &d__1, &lbeta);
dlamc4_(&gpmin, &a, &lbeta);
d__1 = -a;
dlamc4_(&gnmin, &d__1, &lbeta);
ieee = FALSE_;
if (ngpmin == ngnmin && gpmin == gnmin) {
if (ngpmin == gpmin) {
lemin = ngpmin;
/* ( Non twos-complement machines, no gradual underflow; */
/* e.g., VAX ) */
} else if (gpmin - ngpmin == 3) {
lemin = ngpmin - 1 + lt;
ieee = TRUE_;
/* ( Non twos-complement machines, with gradual underflow; */
/* e.g., IEEE standard followers ) */
} else {
lemin = min(ngpmin,gpmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else if (ngpmin == gpmin && ngnmin == gnmin) {
if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
lemin = max(ngpmin,ngnmin);
/* ( Twos-complement machines, no gradual underflow; */
/* e.g., CYBER 205 ) */
} else {
lemin = min(ngpmin,ngnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
{
if (gpmin - min(ngpmin,ngnmin) == 3) {
lemin = max(ngpmin,ngnmin) - 1 + lt;
/* ( Twos-complement machines with gradual underflow; */
/* no known machine ) */
} else {
lemin = min(ngpmin,ngnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else {
/* Computing MIN */
i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
lemin = min(i__1,gnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
first = FALSE_;
/* ** */
/* Comment out this if block if EMIN is ok */
if (iwarn) {
first = TRUE_;
printf("\n\n WARNING. The value EMIN may be incorrect:- ");
printf("EMIN = %8i\n",lemin);
printf("If, after inspection, the value EMIN looks acceptable");
printf("please comment out \n the IF block as marked within the");
printf("code of routine DLAMC2, \n otherwise supply EMIN");
printf("explicitly.\n");
/*
s_wsfe(&io___58);
do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
e_wsfe();
*/
}
/* ** */
/* Assume IEEE arithmetic if we found denormalised numbers above, */
/* or if arithmetic seems to round in the IEEE style, determined */
/* in routine DLAMC1. A true IEEE machine should have both things */
/* true; however, faulty machines may have one or the other. */
ieee = ieee || lieee1;
/* Compute RMIN by successive division by BETA. We could compute */
/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */
/* this computation. */
lrmin = 1.;
i__1 = 1 - lemin;
for (i__ = 1; i__ <= i__1; ++i__) {
d__1 = lrmin * rbase;
lrmin = dlamc3_(&d__1, &zero);
/* L30: */
}
/* Finally, call DLAMC5 to compute EMAX and RMAX. */
dlamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*eps = leps;
*emin = lemin;
*rmin = lrmin;
*emax = lemax;
*rmax = lrmax;
return 0;
/* End of DLAMC2 */
} /* dlamc2_ */
/* *********************************************************************** */
/* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base)
{
/* System generated locals */
integer i__1;
doublereal d__1;
/* Local variables */
doublereal a;
integer i__;
doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase;
extern doublereal dlamc3_(doublereal *, doublereal *);
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMC4 is a service routine for DLAMC2. */
/* Arguments */
/* ========= */
/* EMIN (output) INTEGER */
/* The minimum exponent before (gradual) underflow, computed by */
/* setting A = START and dividing by BASE until the previous A */
/* can not be recovered. */
/* START (input) DOUBLE PRECISION */
/* The starting point for determining EMIN. */
/* BASE (input) INTEGER */
/* The base of the machine. */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Executable Statements .. */
a = *start;
one = 1.;
rbase = one / *base;
zero = 0.;
*emin = 1;
d__1 = a * rbase;
b1 = dlamc3_(&d__1, &zero);
c1 = a;
c2 = a;
d1 = a;
d2 = a;
/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */
/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */
L10:
if (c1 == a && c2 == a && d1 == a && d2 == a) {
--(*emin);
a = b1;
d__1 = a / *base;
b1 = dlamc3_(&d__1, &zero);
d__1 = b1 * *base;
c1 = dlamc3_(&d__1, &zero);
d1 = zero;
i__1 = *base;
for (i__ = 1; i__ <= i__1; ++i__) {
d1 += b1;
/* L20: */
}
d__1 = a * rbase;
b2 = dlamc3_(&d__1, &zero);
d__1 = b2 / rbase;
c2 = dlamc3_(&d__1, &zero);
d2 = zero;
i__1 = *base;
for (i__ = 1; i__ <= i__1; ++i__) {
d2 += b2;
/* L30: */
}
goto L10;
}
/* + END WHILE */
return 0;
/* End of DLAMC4 */
} /* dlamc4_ */
/* *********************************************************************** */
/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin,
logical *ieee, integer *emax, doublereal *rmax)
{
/* System generated locals */
integer i__1;
doublereal d__1;
/* Local variables */
integer i__;
doublereal y, z__;
integer try__, lexp;
doublereal oldy;
integer uexp, nbits;
extern doublereal dlamc3_(doublereal *, doublereal *);
doublereal recbas;
integer exbits, expsum;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMC5 attempts to compute RMAX, the largest machine floating-point */
/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */
/* approximately to a power of 2. It will fail on machines where this */
/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */
/* EMAX = 28718). It will also fail if the value supplied for EMIN is */
/* too large (i.e. too close to zero), probably with overflow. */
/* Arguments */
/* ========= */
/* BETA (input) INTEGER */
/* The base of floating-point arithmetic. */
/* P (input) INTEGER */
/* The number of base BETA digits in the mantissa of a */
/* floating-point value. */
/* EMIN (input) INTEGER */
/* The minimum exponent before (gradual) underflow. */
/* IEEE (input) LOGICAL */
/* A logical flag specifying whether or not the arithmetic */
/* system is thought to comply with the IEEE standard. */
/* EMAX (output) INTEGER */
/* The largest exponent before overflow */
/* RMAX (output) DOUBLE PRECISION */
/* The largest machine floating-point number. */
/* ===================================================================== */
/* .. Parameters .. */
/* .. */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. Executable Statements .. */
/* First compute LEXP and UEXP, two powers of 2 that bound */
/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */
/* approximately to the bound that is closest to abs(EMIN). */
/* (EMAX is the exponent of the required number RMAX). */
lexp = 1;
exbits = 1;
L10:
try__ = lexp << 1;
if (try__ <= -(*emin)) {
lexp = try__;
++exbits;
goto L10;
}
if (lexp == -(*emin)) {
uexp = lexp;
} else {
uexp = try__;
++exbits;
}
/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */
/* than or equal to EMIN. EXBITS is the number of bits needed to */
/* store the exponent. */
if (uexp + *emin > -lexp - *emin) {
expsum = lexp << 1;
} else {
expsum = uexp << 1;
}
/* EXPSUM is the exponent range, approximately equal to */
/* EMAX - EMIN + 1 . */
*emax = expsum + *emin - 1;
nbits = exbits + 1 + *p;
/* NBITS is the total number of bits needed to store a */
/* floating-point number. */
if (nbits % 2 == 1 && *beta == 2) {
/* Either there are an odd number of bits used to store a */
/* floating-point number, which is unlikely, or some bits are */
/* not used in the representation of numbers, which is possible, */
/* (e.g. Cray machines) or the mantissa has an implicit bit, */
/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */
/* most likely. We have to assume the last alternative. */
/* If this is true, then we need to reduce EMAX by one because */
/* there must be some way of representing zero in an implicit-bit */
/* system. On machines like Cray, we are reducing EMAX by one */
/* unnecessarily. */
--(*emax);
}
if (*ieee) {
/* Assume we are on an IEEE machine which reserves one exponent */
/* for infinity and NaN. */
--(*emax);
}
/* Now create RMAX, the largest machine number, which should */
/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */
/* First compute 1.0 - BETA**(-P), being careful that the */
/* result is less than 1.0 . */
recbas = 1. / *beta;
z__ = *beta - 1.;
y = 0.;
i__1 = *p;
for (i__ = 1; i__ <= i__1; ++i__) {
z__ *= recbas;
if (y < 1.) {
oldy = y;
}
y = dlamc3_(&y, &z__);
/* L20: */
}
if (y >= 1.) {
y = oldy;
}
/* Now multiply by BETA**EMAX to get RMAX. */
i__1 = *emax;
for (i__ = 1; i__ <= i__1; ++i__) {
d__1 = y * *beta;
y = dlamc3_(&d__1, &c_b32);
/* L30: */
}
*rmax = y;
return 0;
/* End of DLAMC5 */
} /* dlamc5_ */
#endif
#include "clapack.h"
#include <float.h>
#include <stdio.h>
/* *********************************************************************** */
doublereal dlamc3_(doublereal *a, doublereal *b)
{
/* System generated locals */
doublereal ret_val;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* DLAMC3 is intended to force A and B to be stored prior to doing */
/* the addition of A and B , for use in situations where optimizers */
/* might hold one of these in a register. */
/* Arguments */
/* ========= */
/* A (input) DOUBLE PRECISION */
/* B (input) DOUBLE PRECISION */
/* The values A and B. */
/* ===================================================================== */
/* .. Executable Statements .. */
ret_val = *a + *b;
return ret_val;
/* End of DLAMC3 */
} /* dlamc3_ */
/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
#ifndef DBL_DIGITS
#define DBL_DIGITS 53
#endif
const doublereal lapack_dlamch_tab[] =
{
0, FLT_RADIX, DBL_EPSILON, DBL_MAX_EXP, DBL_MIN_EXP, DBL_DIGITS, DBL_MAX,
DBL_EPSILON*FLT_RADIX, 1, DBL_MIN*(1 + DBL_EPSILON), DBL_MIN
};
/* dlartg.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
......@@ -20,17 +8,14 @@
integer i__1;
doublereal d__1, d__2;
/* Builtin functions */
double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
/* Local variables */
integer i__;
doublereal f1, g1, eps, scale;
integer count;
doublereal safmn2, safmx2;
extern doublereal dlamch_(char *);
doublereal safmin;
static doublereal safmn2, safmx2;
static doublereal safmin;
static volatile logical FIRST = TRUE_;
/* -- LAPACK auxiliary routine (version 3.2) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
......@@ -97,15 +82,16 @@
/* .. */
/* .. Executable Statements .. */
/* IF( FIRST ) THEN */
safmin = dlamch_("S");
eps = dlamch_("E");
d__1 = dlamch_("B");
i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.);
safmn2 = pow_di(&d__1, &i__1);
safmx2 = 1. / safmn2;
/* FIRST = .FALSE. */
/* END IF */
if( FIRST )
{
safmin = dlamch_("S");
eps = dlamch_("E");
d__1 = dlamch_("B");
i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.);
safmn2 = pow_di(&d__1, &i__1);
safmx2 = 1. / safmn2;
FIRST = FALSE_;
}
if (*g == 0.) {
*cs = 1.;
*sn = 0.;
......
/* ilaenv.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
#include "string.h"
/* Table of constant values */
static integer c__1 = 1;
static real c_b163 = 0.f;
static real c_b164 = 1.f;
static integer c__0 = 0;
integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
integer *n2, integer *n3, integer *n4)
{
/* System generated locals */
integer ret_val;
/* Builtin functions */
/* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
integer s_cmp(char *, char *, ftnlen, ftnlen);
/* Local variables */
integer i__;
char c1[1], c2[2], c3[3], c4[2];
integer ic, nb, iz, nx;
logical cname;
integer nbmin;
logical sname;
extern integer ieeeck_(integer *, real *, real *);
char subnam[6];
extern integer iparmq_(integer *, char *, char *, integer *, integer *,
integer *, integer *);
ftnlen name_len, opts_len;
name_len = (ftnlen)strlen (name__);
opts_len = (ftnlen)strlen (opts);
/* -- LAPACK auxiliary routine (version 3.2) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* January 2007 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* ILAENV is called from the LAPACK routines to choose problem-dependent */
/* parameters for the local environment. See ISPEC for a description of */
/* the parameters. */
/* ILAENV returns an INTEGER */
/* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
/* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */
/* This version provides a set of parameters which should give good, */
/* but not optimal, performance on many of the currently available */
/* computers. Users are encouraged to modify this subroutine to set */
/* the tuning parameters for their particular machine using the option */
/* and problem size information in the arguments. */
/* This routine will not function correctly if it is converted to all */
/* lower case. Converting it to all upper case is allowed. */
/* Arguments */
/* ========= */
/* ISPEC (input) INTEGER */
/* Specifies the parameter to be returned as the value of */
/* ILAENV. */
/* = 1: the optimal blocksize; if this value is 1, an unblocked */
/* algorithm will give the best performance. */
/* = 2: the minimum block size for which the block routine */
/* should be used; if the usable block size is less than */
/* this value, an unblocked routine should be used. */
/* = 3: the crossover point (in a block routine, for N less */
/* than this value, an unblocked routine should be used) */
/* = 4: the number of shifts, used in the nonsymmetric */
/* eigenvalue routines (DEPRECATED) */
/* = 5: the minimum column dimension for blocking to be used; */
/* rectangular blocks must have dimension at least k by m, */
/* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
/* = 6: the crossover point for the SVD (when reducing an m by n */
/* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
/* this value, a QR factorization is used first to reduce */
/* the matrix to a triangular form.) */
/* = 7: the number of processors */
/* = 8: the crossover point for the multishift QR method */
/* for nonsymmetric eigenvalue problems (DEPRECATED) */
/* = 9: maximum size of the subproblems at the bottom of the */
/* computation tree in the divide-and-conquer algorithm */
/* (used by xGELSD and xGESDD) */
/* =10: ieee NaN arithmetic can be trusted not to trap */
/* =11: infinity arithmetic can be trusted not to trap */
/* 12 <= ISPEC <= 16: */
/* xHSEQR or one of its subroutines, */
/* see IPARMQ for detailed explanation */
/* NAME (input) CHARACTER*(*) */
/* The name of the calling subroutine, in either upper case or */
/* lower case. */
/* OPTS (input) CHARACTER*(*) */
/* The character options to the subroutine NAME, concatenated */
/* into a single character string. For example, UPLO = 'U', */
/* TRANS = 'T', and DIAG = 'N' for a triangular routine would */
/* be specified as OPTS = 'UTN'. */
/* N1 (input) INTEGER */
/* N2 (input) INTEGER */
/* N3 (input) INTEGER */
/* N4 (input) INTEGER */
/* Problem dimensions for the subroutine NAME; these may not all */
/* be required. */
/* Further Details */
/* =============== */
/* The following conventions have been used when calling ILAENV from the */
/* LAPACK routines: */
/* 1) OPTS is a concatenation of all of the character options to */
/* subroutine NAME, in the same order that they appear in the */
/* argument list for NAME, even if they are not used in determining */
/* the value of the parameter specified by ISPEC. */
/* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */
/* that they appear in the argument list for NAME. N1 is used */
/* first, N2 second, and so on, and unused problem dimensions are */
/* passed a value of -1. */
/* 3) The parameter value returned by ILAENV is checked for validity in */
/* the calling subroutine. For example, ILAENV is used to retrieve */
/* the optimal blocksize for STRTRI as follows: */
/* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
/* IF( NB.LE.1 ) NB = MAX( 1, N ) */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Executable Statements .. */
switch (*ispec) {
case 1: goto L10;
case 2: goto L10;
case 3: goto L10;
case 4: goto L80;
case 5: goto L90;
case 6: goto L100;
case 7: goto L110;
case 8: goto L120;
case 9: goto L130;
case 10: goto L140;
case 11: goto L150;
case 12: goto L160;
case 13: goto L160;
case 14: goto L160;
case 15: goto L160;
case 16: goto L160;
}
/* Invalid value for ISPEC */
ret_val = -1;
return ret_val;
L10:
/* Convert NAME to upper case if the first character is lower case. */
ret_val = 1;
s_copy(subnam, name__, (ftnlen)1, name_len);
ic = *(unsigned char *)subnam;
iz = 'Z';
if (iz == 90 || iz == 122) {
/* ASCII character set */
if (ic >= 97 && ic <= 122) {
*(unsigned char *)subnam = (char) (ic - 32);
for (i__ = 2; i__ <= 6; ++i__) {
ic = *(unsigned char *)&subnam[i__ - 1];
if (ic >= 97 && ic <= 122) {
*(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
}
/* L20: */
}
}
} else if (iz == 233 || iz == 169) {
/* EBCDIC character set */
if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 &&
ic <= 169) {
*(unsigned char *)subnam = (char) (ic + 64);
for (i__ = 2; i__ <= 6; ++i__) {
ic = *(unsigned char *)&subnam[i__ - 1];
if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >=
162 && ic <= 169) {
*(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
}
/* L30: */
}
}
} else if (iz == 218 || iz == 250) {
/* Prime machines: ASCII+128 */
if (ic >= 225 && ic <= 250) {
*(unsigned char *)subnam = (char) (ic - 32);
for (i__ = 2; i__ <= 6; ++i__) {
ic = *(unsigned char *)&subnam[i__ - 1];
if (ic >= 225 && ic <= 250) {
*(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
}
/* L40: */
}
}
}
*(unsigned char *)c1 = *(unsigned char *)subnam;
sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
if (! (cname || sname)) {
return ret_val;
}
s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2);
s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3);
s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2);
switch (*ispec) {
case 1: goto L50;
case 2: goto L60;
case 3: goto L70;
}
L50:
/* ISPEC = 1: block size */
/* In these examples, separate code is provided for setting NB for */
/* real and complex. We assume that NB will take the same value in */
/* single or double precision. */
nb = 1;
if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 64;
} else {
nb = 64;
}
} else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3,
"RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3)
== 0) {
if (sname) {
nb = 32;
} else {
nb = 32;
}
} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 32;
} else {
nb = 32;
}
} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 32;
} else {
nb = 32;
}
} else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 64;
} else {
nb = 64;
}
}
} else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 64;
} else {
nb = 64;
}
}
} else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 64;
} else {
nb = 64;
}
} else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
nb = 32;
} else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) {
nb = 64;
}
} else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
nb = 64;
} else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
nb = 32;
} else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) {
nb = 64;
}
} else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
if (*(unsigned char *)c3 == 'G') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nb = 32;
}
} else if (*(unsigned char *)c3 == 'M') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nb = 32;
}
}
} else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
if (*(unsigned char *)c3 == 'G') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nb = 32;
}
} else if (*(unsigned char *)c3 == 'M') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nb = 32;
}
}
} else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
if (*n4 <= 64) {
nb = 1;
} else {
nb = 32;
}
} else {
if (*n4 <= 64) {
nb = 1;
} else {
nb = 32;
}
}
}
} else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
if (*n2 <= 64) {
nb = 1;
} else {
nb = 32;
}
} else {
if (*n2 <= 64) {
nb = 1;
} else {
nb = 32;
}
}
}
} else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 64;
} else {
nb = 64;
}
}
} else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nb = 64;
} else {
nb = 64;
}
}
} else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) {
nb = 1;
}
}
ret_val = nb;
return ret_val;
L60:
/* ISPEC = 2: minimum block size */
nbmin = 2;
if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, (
ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0)
{
if (sname) {
nbmin = 2;
} else {
nbmin = 2;
}
} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nbmin = 2;
} else {
nbmin = 2;
}
} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nbmin = 2;
} else {
nbmin = 2;
}
} else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nbmin = 2;
} else {
nbmin = 2;
}
}
} else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nbmin = 8;
} else {
nbmin = 8;
}
} else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
nbmin = 2;
}
} else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
nbmin = 2;
}
} else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
if (*(unsigned char *)c3 == 'G') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nbmin = 2;
}
} else if (*(unsigned char *)c3 == 'M') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nbmin = 2;
}
}
} else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
if (*(unsigned char *)c3 == 'G') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nbmin = 2;
}
} else if (*(unsigned char *)c3 == 'M') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nbmin = 2;
}
}
}
ret_val = nbmin;
return ret_val;
L70:
/* ISPEC = 3: crossover point */
nx = 0;
if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, (
ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0)
{
if (sname) {
nx = 128;
} else {
nx = 128;
}
} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nx = 128;
} else {
nx = 128;
}
} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
if (sname) {
nx = 128;
} else {
nx = 128;
}
}
} else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
nx = 32;
}
} else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
nx = 32;
}
} else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
if (*(unsigned char *)c3 == 'G') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nx = 128;
}
}
} else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
if (*(unsigned char *)c3 == 'G') {
if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
(ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
ftnlen)1, (ftnlen)2) == 0) {
nx = 128;
}
}
}
ret_val = nx;
return ret_val;
L80:
/* ISPEC = 4: number of shifts (used by xHSEQR) */
ret_val = 6;
return ret_val;
L90:
/* ISPEC = 5: minimum column dimension (not used) */
ret_val = 2;
return ret_val;
L100:
/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
return ret_val;
L110:
/* ISPEC = 7: number of processors (not used) */
ret_val = 1;
return ret_val;
L120:
/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
ret_val = 50;
return ret_val;
L130:
/* ISPEC = 9: maximum size of the subproblems at the bottom of the */
/* computation tree in the divide-and-conquer algorithm */
/* (used by xGELSD and xGESDD) */
ret_val = 25;
return ret_val;
L140:
/* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
/* ILAENV = 0 */
ret_val = 1;
if (ret_val == 1) {
ret_val = ieeeck_(&c__1, &c_b163, &c_b164);
}
return ret_val;
L150:
/* ISPEC = 11: infinity arithmetic can be trusted not to trap */
/* ILAENV = 0 */
ret_val = 1;
if (ret_val == 1) {
ret_val = ieeeck_(&c__0, &c_b163, &c_b164);
}
return ret_val;
L160:
/* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4)
;
return ret_val;
/* End of ILAENV */
} /* ilaenv_ */
/* ilaenv.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
#include "string.h"
/* Table of constant values */
static integer c__1 = 1;
static real c_b163 = 0.f;
static real c_b164 = 1.f;
static integer c__0 = 0;
integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
integer *n2, integer *n3, integer *n4)
{
/* -- LAPACK auxiliary routine (version 3.2) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* January 2007 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* ILAENV is called from the LAPACK routines to choose problem-dependent */
/* parameters for the local environment. See ISPEC for a description of */
/* the parameters. */
/* ILAENV returns an INTEGER */
/* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
/* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */
/* This version provides a set of parameters which should give good, */
/* but not optimal, performance on many of the currently available */
/* computers. Users are encouraged to modify this subroutine to set */
/* the tuning parameters for their particular machine using the option */
/* and problem size information in the arguments. */
/* This routine will not function correctly if it is converted to all */
/* lower case. Converting it to all upper case is allowed. */
/* Arguments */
/* ========= */
/* ISPEC (input) INTEGER */
/* Specifies the parameter to be returned as the value of */
/* ILAENV. */
/* = 1: the optimal blocksize; if this value is 1, an unblocked */
/* algorithm will give the best performance. */
/* = 2: the minimum block size for which the block routine */
/* should be used; if the usable block size is less than */
/* this value, an unblocked routine should be used. */
/* = 3: the crossover point (in a block routine, for N less */
/* than this value, an unblocked routine should be used) */
/* = 4: the number of shifts, used in the nonsymmetric */
/* eigenvalue routines (DEPRECATED) */
/* = 5: the minimum column dimension for blocking to be used; */
/* rectangular blocks must have dimension at least k by m, */
/* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
/* = 6: the crossover point for the SVD (when reducing an m by n */
/* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
/* this value, a QR factorization is used first to reduce */
/* the matrix to a triangular form.) */
/* = 7: the number of processors */
/* = 8: the crossover point for the multishift QR method */
/* for nonsymmetric eigenvalue problems (DEPRECATED) */
/* = 9: maximum size of the subproblems at the bottom of the */
/* computation tree in the divide-and-conquer algorithm */
/* (used by xGELSD and xGESDD) */
/* =10: ieee NaN arithmetic can be trusted not to trap */
/* =11: infinity arithmetic can be trusted not to trap */
/* 12 <= ISPEC <= 16: */
/* xHSEQR or one of its subroutines, */
/* see IPARMQ for detailed explanation */
/* NAME (input) CHARACTER*(*) */
/* The name of the calling subroutine, in either upper case or */
/* lower case. */
/* OPTS (input) CHARACTER*(*) */
/* The character options to the subroutine NAME, concatenated */
/* into a single character string. For example, UPLO = 'U', */
/* TRANS = 'T', and DIAG = 'N' for a triangular routine would */
/* be specified as OPTS = 'UTN'. */
/* N1 (input) INTEGER */
/* N2 (input) INTEGER */
/* N3 (input) INTEGER */
/* N4 (input) INTEGER */
/* Problem dimensions for the subroutine NAME; these may not all */
/* be required. */
/* Further Details */
/* =============== */
/* The following conventions have been used when calling ILAENV from the */
/* LAPACK routines: */
/* 1) OPTS is a concatenation of all of the character options to */
/* subroutine NAME, in the same order that they appear in the */
/* argument list for NAME, even if they are not used in determining */
/* the value of the parameter specified by ISPEC. */
/* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */
/* that they appear in the argument list for NAME. N1 is used */
/* first, N2 second, and so on, and unused problem dimensions are */
/* passed a value of -1. */
/* 3) The parameter value returned by ILAENV is checked for validity in */
/* the calling subroutine. For example, ILAENV is used to retrieve */
/* the optimal blocksize for STRTRI as follows: */
/* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
/* IF( NB.LE.1 ) NB = MAX( 1, N ) */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Executable Statements .. */
switch (*ispec) {
case 1:
/* ISPEC = 1: block size */
/* In these examples, separate code is provided for setting NB for */
/* real and complex. We assume that NB will take the same value in */
/* single or double precision. */
return 1;
case 2:
/* ISPEC = 2: minimum block size */
return 2;
case 3:
/* ISPEC = 3: crossover point */
return 3;
case 4:
/* ISPEC = 4: number of shifts (used by xHSEQR) */
return 6;
case 5:
/* ISPEC = 5: minimum column dimension (not used) */
return 2;
case 6:
/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
return (integer) ((real) min(*n1,*n2) * 1.6f);
case 7:
/* ISPEC = 7: number of processors (not used) */
return 1;
case 8:
/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
return 50;
case 9:
/* ISPEC = 9: maximum size of the subproblems at the bottom of the */
/* computation tree in the divide-and-conquer algorithm */
/* (used by xGELSD and xGESDD) */
return 25;
case 10:
/* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
return ieeeck_(&c__1, &c_b163, &c_b164);
case 11:
/* ISPEC = 11: infinity arithmetic can be trusted not to trap */
return ieeeck_(&c__0, &c_b163, &c_b164);
case 12:
case 13:
case 14:
case 15:
case 16:
/* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
return iparmq_(ispec, name__, opts, n1, n2, n3, n4);
default:
break;
}
/* Invalid value for ISPEC */
return -1;
/* End of ILAENV */
} /* ilaenv_ */
/* sgemv.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha,
real *a, integer *lda, real *x, integer *incx, real *beta, real *y,
integer *incy)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
/* Local variables */
integer i__, j, ix, iy, jx, jy, kx, ky, info;
real temp;
integer lenx, leny;
extern logical lsame_(char *, char *);
extern /* Subroutine */ int xerbla_(char *, integer *);
/* .. Scalar Arguments .. */
/* .. */
/* .. Array Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SGEMV performs one of the matrix-vector operations */
/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */
/* where alpha and beta are scalars, x and y are vectors and A is an */
/* m by n matrix. */
/* Arguments */
/* ========== */
/* TRANS - CHARACTER*1. */
/* On entry, TRANS specifies the operation to be performed as */
/* follows: */
/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */
/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */
/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */
/* Unchanged on exit. */
/* M - INTEGER. */
/* On entry, M specifies the number of rows of the matrix A. */
/* M must be at least zero. */
/* Unchanged on exit. */
/* N - INTEGER. */
/* On entry, N specifies the number of columns of the matrix A. */
/* N must be at least zero. */
/* Unchanged on exit. */
/* ALPHA - REAL . */
/* On entry, ALPHA specifies the scalar alpha. */
/* Unchanged on exit. */
/* A - REAL array of DIMENSION ( LDA, n ). */
/* Before entry, the leading m by n part of the array A must */
/* contain the matrix of coefficients. */
/* Unchanged on exit. */
/* LDA - INTEGER. */
/* On entry, LDA specifies the first dimension of A as declared */
/* in the calling (sub) program. LDA must be at least */
/* max( 1, m ). */
/* Unchanged on exit. */
/* X - REAL array of DIMENSION at least */
/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
/* and at least */
/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
/* Before entry, the incremented array X must contain the */
/* vector x. */
/* Unchanged on exit. */
/* INCX - INTEGER. */
/* On entry, INCX specifies the increment for the elements of */
/* X. INCX must not be zero. */
/* Unchanged on exit. */
/* BETA - REAL . */
/* On entry, BETA specifies the scalar beta. When BETA is */
/* supplied as zero then Y need not be set on input. */
/* Unchanged on exit. */
/* Y - REAL array of DIMENSION at least */
/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
/* and at least */
/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
/* Before entry with BETA non-zero, the incremented array Y */
/* must contain the vector y. On exit, Y is overwritten by the */
/* updated vector y. */
/* INCY - INTEGER. */
/* On entry, INCY specifies the increment for the elements of */
/* Y. INCY must not be zero. */
/* Unchanged on exit. */
/* Level 2 Blas routine. */
/* -- Written on 22-October-1986. */
/* Jack Dongarra, Argonne National Lab. */
/* Jeremy Du Croz, Nag Central Office. */
/* Sven Hammarling, Nag Central Office. */
/* Richard Hanson, Sandia National Labs. */
/* .. Parameters .. */
/* .. */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. External Subroutines .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* Test the input parameters. */
/* Parameter adjustments */
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
--x;
--y;
/* Function Body */
info = 0;
if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
) {
info = 1;
} else if (*m < 0) {
info = 2;
} else if (*n < 0) {
info = 3;
} else if (*lda < max(1,*m)) {
info = 6;
} else if (*incx == 0) {
info = 8;
} else if (*incy == 0) {
info = 11;
}
if (info != 0) {
xerbla_("SGEMV ", &info);
return 0;
}
/* Quick return if possible. */
if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
return 0;
}
/* Set LENX and LENY, the lengths of the vectors x and y, and set */
/* up the start points in X and Y. */
if (lsame_(trans, "N")) {
lenx = *n;
leny = *m;
} else {
lenx = *m;
leny = *n;
}
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (lenx - 1) * *incx;
}
if (*incy > 0) {
ky = 1;
} else {
ky = 1 - (leny - 1) * *incy;
}
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
/* First form y := beta*y. */
if (*beta != 1.f) {
if (*incy == 1) {
if (*beta == 0.f) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = 0.f;
/* L10: */
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[i__] = *beta * y[i__];
/* L20: */
}
}
} else {
iy = ky;
if (*beta == 0.f) {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = 0.f;
iy += *incy;
/* L30: */
}
} else {
i__1 = leny;
for (i__ = 1; i__ <= i__1; ++i__) {
y[iy] = *beta * y[iy];
iy += *incy;
/* L40: */
}
}
}
}
if (*alpha == 0.f) {
return 0;
}
if (lsame_(trans, "N")) {
/* Form y := alpha*A*x + y. */
jx = kx;
if (*incy == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (x[jx] != 0.f) {
temp = *alpha * x[jx];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[i__] += temp * a[i__ + j * a_dim1];
/* L50: */
}
}
jx += *incx;
/* L60: */
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (x[jx] != 0.f) {
temp = *alpha * x[jx];
iy = ky;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
y[iy] += temp * a[i__ + j * a_dim1];
iy += *incy;
/* L70: */
}
}
jx += *incx;
/* L80: */
}
}
} else {
/* Form y := alpha*A'*x + y. */
jy = ky;
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.f;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[i__];
/* L90: */
}
y[jy] += *alpha * temp;
jy += *incy;
/* L100: */
}
} else {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
temp = 0.f;
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
temp += a[i__ + j * a_dim1] * x[ix];
ix += *incx;
/* L110: */
}
y[jy] += *alpha * temp;
jy += *incy;
/* L120: */
}
}
}
return 0;
/* End of SGEMV . */
} /* sgemv_ */
#include "clapack.h"
#include <assert.h>
/* Subroutine */ int sgemv_(char *_trans, integer *_m, integer *_n, real *_alpha,
real *a, integer *_lda, real *x, integer *_incx, real *_beta, real *y,
integer *_incy)
{
/* .. Scalar Arguments .. */
/* .. */
/* .. Array Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SGEMV performs one of the matrix-vector operations */
/* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */
/* where alpha and beta are scalars, x and y are vectors and A is an */
/* m by n matrix. */
/* Arguments */
/* ========== */
/* TRANS - CHARACTER*1. */
/* On entry, TRANS specifies the operation to be performed as */
/* follows: */
/* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */
/* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */
/* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */
/* Unchanged on exit. */
/* M - INTEGER. */
/* On entry, M specifies the number of rows of the matrix A. */
/* M must be at least zero. */
/* Unchanged on exit. */
/* N - INTEGER. */
/* On entry, N specifies the number of columns of the matrix A. */
/* N must be at least zero. */
/* Unchanged on exit. */
/* ALPHA - REAL . */
/* On entry, ALPHA specifies the scalar alpha. */
/* Unchanged on exit. */
/* A - REAL array of DIMENSION ( LDA, n ). */
/* Before entry, the leading m by n part of the array A must */
/* contain the matrix of coefficients. */
/* Unchanged on exit. */
/* LDA - INTEGER. */
/* On entry, LDA specifies the first dimension of A as declared */
/* in the calling (sub) program. LDA must be at least */
/* max( 1, m ). */
/* Unchanged on exit. */
/* X - REAL array of DIMENSION at least */
/* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
/* and at least */
/* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
/* Before entry, the incremented array X must contain the */
/* vector x. */
/* Unchanged on exit. */
/* INCX - INTEGER. */
/* On entry, INCX specifies the increment for the elements of */
/* X. INCX must not be zero. */
/* Unchanged on exit. */
/* BETA - REAL . */
/* On entry, BETA specifies the scalar beta. When BETA is */
/* supplied as zero then Y need not be set on input. */
/* Unchanged on exit. */
/* Y - REAL array of DIMENSION at least */
/* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
/* and at least */
/* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
/* Before entry with BETA non-zero, the incremented array Y */
/* must contain the vector y. On exit, Y is overwritten by the */
/* updated vector y. */
/* INCY - INTEGER. */
/* On entry, INCY specifies the increment for the elements of */
/* Y. INCY must not be zero. */
/* Unchanged on exit. */
/* Level 2 Blas routine. */
/* -- Written on 22-October-1986. */
/* Jack Dongarra, Argonne National Lab. */
/* Jeremy Du Croz, Nag Central Office. */
/* Sven Hammarling, Nag Central Office. */
/* Richard Hanson, Sandia National Labs. */
/* Test the input parameters. */
/* Function Body */
char trans = lapack_toupper(_trans[0]);
integer i, j, m = *_m, n = *_n, lda = *_lda, incx = *_incx, incy = *_incy;
integer leny = trans == 'N' ? m : n, lenx = trans == 'N' ? n : m;
real alpha = *_alpha, beta = *_beta;
integer info = 0;
if (trans != 'N' && trans != 'T' && trans != 'C')
info = 1;
else if (m < 0)
info = 2;
else if (n < 0)
info = 3;
else if (lda < max(1,m))
info = 6;
else if (incx == 0)
info = 8;
else if (incy == 0)
info = 11;
if (info != 0)
{
xerbla_("SGEMV ", &info);
return 0;
}
if( incy < 0 )
y -= incy*(leny - 1);
if( incx < 0 )
x -= incx*(lenx - 1);
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
if( beta != 1.f )
{
if( incy == 1 )
{
if( beta == 0.f )
for( i = 0; i < leny; i++ )
y[i] = 0.f;
else
for( i = 0; i < leny; i++ )
y[i] *= beta;
}
else
{
if( beta == 0.f )
for( i = 0; i < leny; i++ )
y[i*incy] = 0.f;
else
for( i = 0; i < leny; i++ )
y[i*incy] *= beta;
}
}
if( alpha == 0.f )
;
else if( trans == 'N' )
{
for( i = 0; i < n; i++, a += lda )
{
real s = x[i*incx];
if( s == 0.f )
continue;
s *= alpha;
for( j = 0; j <= m - 4; j += 4 )
{
real t0 = y[j] + s*a[j];
real t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1;
t0 = y[j+2] + s*a[j+2];
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1;
}
for( ; j < m; j++ )
y[j] += s*a[j];
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
real s = 0;
for( j = 0; j <= m - 4; j += 4 )
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
for( ; j < m; j++ )
s += x[j]*a[j];
y[i*incy] += alpha*s;
}
}
return 0;
/* End of SGEMV . */
} /* sgemv_ */
/* sger.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x,
integer *incx, real *y, integer *incy, real *a, integer *lda)
/* Subroutine */ int sger_(integer *_m, integer *_n, real *_alpha,
real *x, integer *_incx, real *y, integer *_incy,
real *a, integer *_lda)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
/* Local variables */
integer i__, j, ix, jy, kx, info;
real temp;
extern /* Subroutine */ int xerbla_(char *, integer *);
/* .. Scalar Arguments .. */
/* .. */
......@@ -52,11 +33,11 @@
/* N must be at least zero. */
/* Unchanged on exit. */
/* ALPHA - REAL . */
/* ALPHA - SINGLE PRECISION. */
/* On entry, ALPHA specifies the scalar alpha. */
/* Unchanged on exit. */
/* X - REAL array of dimension at least */
/* X - SINGLE PRECISION array of dimension at least */
/* ( 1 + ( m - 1 )*abs( INCX ) ). */
/* Before entry, the incremented array X must contain the m */
/* element vector x. */
......@@ -67,7 +48,7 @@
/* X. INCX must not be zero. */
/* Unchanged on exit. */
/* Y - REAL array of dimension at least */
/* Y - SINGLE PRECISION array of dimension at least */
/* ( 1 + ( n - 1 )*abs( INCY ) ). */
/* Before entry, the incremented array Y must contain the n */
/* element vector y. */
......@@ -78,7 +59,7 @@
/* Y. INCY must not be zero. */
/* Unchanged on exit. */
/* A - REAL array of DIMENSION ( LDA, n ). */
/* A - SINGLE PRECISION array of DIMENSION ( LDA, n ). */
/* Before entry, the leading m by n part of the array A must */
/* contain the matrix of coefficients. On exit, A is */
/* overwritten by the updated matrix. */
......@@ -110,80 +91,70 @@
/* Test the input parameters. */
/* Parameter adjustments */
--x;
--y;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
/* Function Body */
info = 0;
if (*m < 0) {
info = 1;
} else if (*n < 0) {
info = 2;
} else if (*incx == 0) {
info = 5;
} else if (*incy == 0) {
info = 7;
} else if (*lda < max(1,*m)) {
info = 9;
}
if (info != 0) {
xerbla_("SGER ", &info);
return 0;
integer i, j, m = *_m, n = *_n, incx = *_incx, incy = *_incy, lda = *_lda;
real alpha = *_alpha;
integer info = 0;
if (m < 0)
info = 1;
else if (n < 0)
info = 2;
else if (incx == 0)
info = 5;
else if (incy == 0)
info = 7;
else if (lda < max(1,m))
info = 9;
if (info != 0)
{
xerbla_("SGER ", &info);
return 0;
}
/* Quick return if possible. */
if (*m == 0 || *n == 0 || *alpha == 0.f) {
return 0;
}
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
if (*incy > 0) {
jy = 1;
} else {
jy = 1 - (*n - 1) * *incy;
if (incx < 0)
x -= (m-1)*incx;
if (incy < 0)
y -= (n-1)*incy;
/* Start the operations. In this version the elements of A are */
/* accessed sequentially with one pass through A. */
if( alpha == 0 )
;
else if( incx == 1 )
{
for( j = 0; j < n; j++, a += lda )
{
real s = y[j*incy];
if( s == 0 )
continue;
s *= alpha;
for( i = 0; i <= m - 2; i += 2 )
{
real t0 = a[i] + x[i]*s;
real t1 = a[i+1] + x[i+1]*s;
a[i] = t0; a[i+1] = t1;
}
for( ; i < m; i++ )
a[i] += x[i]*s;
}
}
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.f) {
temp = *alpha * y[jy];
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[i__] * temp;
/* L10: */
}
}
jy += *incy;
/* L20: */
}
} else {
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (*m - 1) * *incx;
}
i__1 = *n;
for (j = 1; j <= i__1; ++j) {
if (y[jy] != 0.f) {
temp = *alpha * y[jy];
ix = kx;
i__2 = *m;
for (i__ = 1; i__ <= i__2; ++i__) {
a[i__ + j * a_dim1] += x[ix] * temp;
ix += *incx;
/* L30: */
}
}
jy += *incy;
/* L40: */
}
else
{
for( j = 0; j < n; j++, a += lda )
{
real s = y[j*incy];
if( s == 0 )
continue;
s *= alpha;
for( i = 0; i < m; i++ )
a[i] += x[i*incx]*s;
}
}
return 0;
......
#include "clapack.h"
#include <float.h>
#include <stdio.h>
/* *********************************************************************** */
doublereal slamc3_(real *a, real *b)
{
/* System generated locals */
real ret_val;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMC3 is intended to force A and B to be stored prior to doing */
/* the addition of A and B , for use in situations where optimizers */
/* might hold one of these in a register. */
/* Arguments */
/* ========= */
/* A (input) REAL */
/* B (input) REAL */
/* The values A and B. */
/* ===================================================================== */
/* .. Executable Statements .. */
ret_val = *a + *b;
return ret_val;
/* End of SLAMC3 */
} /* slamc3_ */
#if 1
/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
#ifndef FLT_DIGITS
#define FLT_DIGITS 24
#endif
doublereal
slamch_(char *cmach) {
char ch = cmach[0];
float eps=FLT_EPSILON, sfmin, small;
if ('B' == ch || 'b' == ch) {
return FLT_RADIX;
} else if ('E' == ch || 'e' == ch) {
return eps;
} else if ('L' == ch || 'l' == ch) {
return FLT_MAX_EXP;
} else if ('M' == ch || 'm' == ch) {
return FLT_MIN_EXP;
} else if ('N' == ch || 'n' == ch) {
return FLT_DIGITS;
} else if ('O' == ch || 'o' == ch) {
return FLT_MAX;
} else if ('P' == ch || 'p' == ch) {
return eps * FLT_RADIX;
} else if ('R' == ch || 'r' == ch) {
return FLT_ROUNDS < 2;
} else if ('S' == ch || 's' == ch) {
/* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow
when computing 1/sfmin. */
sfmin = FLT_MIN;
small = (float)(2. / FLT_MAX);
if (small <= sfmin) small = sfmin * (1 + eps);
return small;
} else if ('U' == ch || 'u' == ch) {
return FLT_MIN;
}
return 0;
}
#else
/* Table of constant values */
static integer c__1 = 1;
static real c_b32 = 0.f;
doublereal slamch_(char *cmach)
{
/* Initialized data */
static logical first = TRUE_;
/* System generated locals */
integer i__1;
real ret_val;
/* Builtin functions */
double pow_ri(real *, integer *);
/* Local variables */
static real t;
integer it;
static real rnd, eps, base;
integer beta;
static real emin, prec, emax;
integer imin, imax;
logical lrnd;
static real rmin, rmax;
real rmach;
extern logical lsame_(char *, char *);
real small;
static real sfmin;
extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real
*, integer *, real *, integer *, real *);
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMCH determines single precision machine parameters. */
/* Arguments */
/* ========= */
/* CMACH (input) CHARACTER*1 */
/* Specifies the value to be returned by SLAMCH: */
/* = 'E' or 'e', SLAMCH := eps */
/* = 'S' or 's , SLAMCH := sfmin */
/* = 'B' or 'b', SLAMCH := base */
/* = 'P' or 'p', SLAMCH := eps*base */
/* = 'N' or 'n', SLAMCH := t */
/* = 'R' or 'r', SLAMCH := rnd */
/* = 'M' or 'm', SLAMCH := emin */
/* = 'U' or 'u', SLAMCH := rmin */
/* = 'L' or 'l', SLAMCH := emax */
/* = 'O' or 'o', SLAMCH := rmax */
/* where */
/* eps = relative machine precision */
/* sfmin = safe minimum, such that 1/sfmin does not overflow */
/* base = base of the machine */
/* prec = eps*base */
/* t = number of (base) digits in the mantissa */
/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */
/* emin = minimum exponent before (gradual) underflow */
/* rmin = underflow threshold - base**(emin-1) */
/* emax = largest exponent before overflow */
/* rmax = overflow threshold - (base**emax)*(1-eps) */
/* ===================================================================== */
/* .. Parameters .. */
/* .. */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. External Subroutines .. */
/* .. */
/* .. Save statement .. */
/* .. */
/* .. Data statements .. */
/* .. */
/* .. Executable Statements .. */
if (first) {
slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
base = (real) beta;
t = (real) it;
if (lrnd) {
rnd = 1.f;
i__1 = 1 - it;
eps = pow_ri(&base, &i__1) / 2;
} else {
rnd = 0.f;
i__1 = 1 - it;
eps = pow_ri(&base, &i__1);
}
prec = eps * base;
emin = (real) imin;
emax = (real) imax;
sfmin = rmin;
small = 1.f / rmax;
if (small >= sfmin) {
/* Use SMALL plus a bit, to avoid the possibility of rounding */
/* causing overflow when computing 1/sfmin. */
sfmin = small * (eps + 1.f);
}
}
if (lsame_(cmach, "E")) {
rmach = eps;
} else if (lsame_(cmach, "S")) {
rmach = sfmin;
} else if (lsame_(cmach, "B")) {
rmach = base;
} else if (lsame_(cmach, "P")) {
rmach = prec;
} else if (lsame_(cmach, "N")) {
rmach = t;
} else if (lsame_(cmach, "R")) {
rmach = rnd;
} else if (lsame_(cmach, "M")) {
rmach = emin;
} else if (lsame_(cmach, "U")) {
rmach = rmin;
} else if (lsame_(cmach, "L")) {
rmach = emax;
} else if (lsame_(cmach, "O")) {
rmach = rmax;
}
ret_val = rmach;
first = FALSE_;
return ret_val;
/* End of SLAMCH */
} /* slamch_ */
/* *********************************************************************** */
/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical
*ieee1)
{
/* Initialized data */
static logical first = TRUE_;
/* System generated locals */
real r__1, r__2;
/* Local variables */
real a, b, c__, f, t1, t2;
static integer lt;
real one, qtr;
static logical lrnd;
static integer lbeta;
real savec;
static logical lieee1;
extern doublereal slamc3_(real *, real *);
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMC1 determines the machine parameters given by BETA, T, RND, and */
/* IEEE1. */
/* Arguments */
/* ========= */
/* BETA (output) INTEGER */
/* The base of the machine. */
/* T (output) INTEGER */
/* The number of ( BETA ) digits in the mantissa. */
/* RND (output) LOGICAL */
/* Specifies whether proper rounding ( RND = .TRUE. ) or */
/* chopping ( RND = .FALSE. ) occurs in addition. This may not */
/* be a reliable guide to the way in which the machine performs */
/* its arithmetic. */
/* IEEE1 (output) LOGICAL */
/* Specifies whether rounding appears to be done in the IEEE */
/* 'round to nearest' style. */
/* Further Details */
/* =============== */
/* The routine is based on the routine ENVRON by Malcolm and */
/* incorporates suggestions by Gentleman and Marovich. See */
/* Malcolm M. A. (1972) Algorithms to reveal properties of */
/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */
/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */
/* that reveal properties of floating point arithmetic units. */
/* Comms. of the ACM, 17, 276-277. */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Save statement .. */
/* .. */
/* .. Data statements .. */
/* .. */
/* .. Executable Statements .. */
if (first) {
one = 1.f;
/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */
/* IEEE1, T and RND. */
/* Throughout this routine we use the function SLAMC3 to ensure */
/* that relevant values are stored and not held in registers, or */
/* are not affected by optimizers. */
/* Compute a = 2.0**m with the smallest positive integer m such */
/* that */
/* fl( a + 1.0 ) = a. */
a = 1.f;
c__ = 1.f;
/* + WHILE( C.EQ.ONE )LOOP */
L10:
if (c__ == one) {
a *= 2;
c__ = slamc3_(&a, &one);
r__1 = -a;
c__ = slamc3_(&c__, &r__1);
goto L10;
}
/* + END WHILE */
/* Now compute b = 2.0**m with the smallest positive integer m */
/* such that */
/* fl( a + b ) .gt. a. */
b = 1.f;
c__ = slamc3_(&a, &b);
/* + WHILE( C.EQ.A )LOOP */
L20:
if (c__ == a) {
b *= 2;
c__ = slamc3_(&a, &b);
goto L20;
}
/* + END WHILE */
/* Now compute the base. a and c are neighbouring floating point */
/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */
/* their difference is beta. Adding 0.25 to c is to ensure that it */
/* is truncated to beta and not ( beta - 1 ). */
qtr = one / 4;
savec = c__;
r__1 = -a;
c__ = slamc3_(&c__, &r__1);
lbeta = c__ + qtr;
/* Now determine whether rounding or chopping occurs, by adding a */
/* bit less than beta/2 and a bit more than beta/2 to a. */
b = (real) lbeta;
r__1 = b / 2;
r__2 = -b / 100;
f = slamc3_(&r__1, &r__2);
c__ = slamc3_(&f, &a);
if (c__ == a) {
lrnd = TRUE_;
} else {
lrnd = FALSE_;
}
r__1 = b / 2;
r__2 = b / 100;
f = slamc3_(&r__1, &r__2);
c__ = slamc3_(&f, &a);
if (lrnd && c__ == a) {
lrnd = FALSE_;
}
/* Try and decide whether rounding is done in the IEEE 'round to */
/* nearest' style. B/2 is half a unit in the last place of the two */
/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */
/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */
/* A, but adding B/2 to SAVEC should change SAVEC. */
r__1 = b / 2;
t1 = slamc3_(&r__1, &a);
r__1 = b / 2;
t2 = slamc3_(&r__1, &savec);
lieee1 = t1 == a && t2 > savec && lrnd;
/* Now find the mantissa, t. It should be the integer part of */
/* log to the base beta of a, however it is safer to determine t */
/* by powering. So we find t as the smallest positive integer for */
/* which */
/* fl( beta**t + 1.0 ) = 1.0. */
lt = 0;
a = 1.f;
c__ = 1.f;
/* + WHILE( C.EQ.ONE )LOOP */
L30:
if (c__ == one) {
++lt;
a *= lbeta;
c__ = slamc3_(&a, &one);
r__1 = -a;
c__ = slamc3_(&c__, &r__1);
goto L30;
}
/* + END WHILE */
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*ieee1 = lieee1;
first = FALSE_;
return 0;
/* End of SLAMC1 */
} /* slamc1_ */
/* *********************************************************************** */
/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real *
eps, integer *emin, real *rmin, integer *emax, real *rmax)
{
/* Initialized data */
static logical first = TRUE_;
static logical iwarn = FALSE_;
/* Format strings */
static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
"ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va"
"lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
" the IF block as marked within the code of routine\002,\002 SLAM"
"C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
/* System generated locals */
integer i__1;
real r__1, r__2, r__3, r__4, r__5;
/* Builtin functions */
double pow_ri(real *, integer *);
//integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
/* Local variables */
real a, b, c__;
integer i__;
static integer lt;
real one, two;
logical ieee;
real half;
logical lrnd;
static real leps;
real zero;
static integer lbeta;
real rbase;
static integer lemin, lemax;
integer gnmin;
real small;
integer gpmin;
real third;
static real lrmin, lrmax;
real sixth;
logical lieee1;
extern /* Subroutine */ int slamc1_(integer *, integer *, logical *,
logical *);
extern doublereal slamc3_(real *, real *);
extern /* Subroutine */ int slamc4_(integer *, real *, integer *),
slamc5_(integer *, integer *, integer *, logical *, integer *,
real *);
integer ngnmin, ngpmin;
/* Fortran I/O blocks */
static cilist io___58 = { 0, 6, 0, fmt_9999, 0 };
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMC2 determines the machine parameters specified in its argument */
/* list. */
/* Arguments */
/* ========= */
/* BETA (output) INTEGER */
/* The base of the machine. */
/* T (output) INTEGER */
/* The number of ( BETA ) digits in the mantissa. */
/* RND (output) LOGICAL */
/* Specifies whether proper rounding ( RND = .TRUE. ) or */
/* chopping ( RND = .FALSE. ) occurs in addition. This may not */
/* be a reliable guide to the way in which the machine performs */
/* its arithmetic. */
/* EPS (output) REAL */
/* The smallest positive number such that */
/* fl( 1.0 - EPS ) .LT. 1.0, */
/* where fl denotes the computed value. */
/* EMIN (output) INTEGER */
/* The minimum exponent before (gradual) underflow occurs. */
/* RMIN (output) REAL */
/* The smallest normalized number for the machine, given by */
/* BASE**( EMIN - 1 ), where BASE is the floating point value */
/* of BETA. */
/* EMAX (output) INTEGER */
/* The maximum exponent before overflow occurs. */
/* RMAX (output) REAL */
/* The largest positive number for the machine, given by */
/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */
/* value of BETA. */
/* Further Details */
/* =============== */
/* The computation of EPS is based on a routine PARANOIA by */
/* W. Kahan of the University of California at Berkeley. */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. External Subroutines .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. Save statement .. */
/* .. */
/* .. Data statements .. */
/* .. */
/* .. Executable Statements .. */
if (first) {
zero = 0.f;
one = 1.f;
two = 2.f;
/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */
/* BETA, T, RND, EPS, EMIN and RMIN. */
/* Throughout this routine we use the function SLAMC3 to ensure */
/* that relevant values are stored and not held in registers, or */
/* are not affected by optimizers. */
/* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */
slamc1_(&lbeta, &lt, &lrnd, &lieee1);
/* Start to find EPS. */
b = (real) lbeta;
i__1 = -lt;
a = pow_ri(&b, &i__1);
leps = a;
/* Try some tricks to see whether or not this is the correct EPS. */
b = two / 3;
half = one / 2;
r__1 = -half;
sixth = slamc3_(&b, &r__1);
third = slamc3_(&sixth, &sixth);
r__1 = -half;
b = slamc3_(&third, &r__1);
b = slamc3_(&b, &sixth);
b = dabs(b);
if (b < leps) {
b = leps;
}
leps = 1.f;
/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
L10:
if (leps > b && b > zero) {
leps = b;
r__1 = half * leps;
/* Computing 5th power */
r__3 = two, r__4 = r__3, r__3 *= r__3;
/* Computing 2nd power */
r__5 = leps;
r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
c__ = slamc3_(&r__1, &r__2);
r__1 = -c__;
c__ = slamc3_(&half, &r__1);
b = slamc3_(&half, &c__);
r__1 = -b;
c__ = slamc3_(&half, &r__1);
b = slamc3_(&half, &c__);
goto L10;
}
/* + END WHILE */
if (a < leps) {
leps = a;
}
/* Computation of EPS complete. */
/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */
/* Keep dividing A by BETA until (gradual) underflow occurs. This */
/* is detected when we cannot recover the previous A. */
rbase = one / lbeta;
small = one;
for (i__ = 1; i__ <= 3; ++i__) {
r__1 = small * rbase;
small = slamc3_(&r__1, &zero);
/* L20: */
}
a = slamc3_(&one, &small);
slamc4_(&ngpmin, &one, &lbeta);
r__1 = -one;
slamc4_(&ngnmin, &r__1, &lbeta);
slamc4_(&gpmin, &a, &lbeta);
r__1 = -a;
slamc4_(&gnmin, &r__1, &lbeta);
ieee = FALSE_;
if (ngpmin == ngnmin && gpmin == gnmin) {
if (ngpmin == gpmin) {
lemin = ngpmin;
/* ( Non twos-complement machines, no gradual underflow; */
/* e.g., VAX ) */
} else if (gpmin - ngpmin == 3) {
lemin = ngpmin - 1 + lt;
ieee = TRUE_;
/* ( Non twos-complement machines, with gradual underflow; */
/* e.g., IEEE standard followers ) */
} else {
lemin = min(ngpmin,gpmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else if (ngpmin == gpmin && ngnmin == gnmin) {
if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
lemin = max(ngpmin,ngnmin);
/* ( Twos-complement machines, no gradual underflow; */
/* e.g., CYBER 205 ) */
} else {
lemin = min(ngpmin,ngnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
{
if (gpmin - min(ngpmin,ngnmin) == 3) {
lemin = max(ngpmin,ngnmin) - 1 + lt;
/* ( Twos-complement machines with gradual underflow; */
/* no known machine ) */
} else {
lemin = min(ngpmin,ngnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
} else {
/* Computing MIN */
i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
lemin = min(i__1,gnmin);
/* ( A guess; no known machine ) */
iwarn = TRUE_;
}
first = FALSE_;
/* ** */
/* Comment out this if block if EMIN is ok */
if (iwarn) {
first = TRUE_;
printf("\n\n WARNING. The value EMIN may be incorrect:- ");
printf("EMIN = %8i\n",lemin);
printf("If, after inspection, the value EMIN looks acceptable");
printf("please comment out \n the IF block as marked within the");
printf("code of routine SLAMC2, \n otherwise supply EMIN");
printf("explicitly.\n");
/*
s_wsfe(&io___58);
do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
e_wsfe();
*/
}
/* ** */
/* Assume IEEE arithmetic if we found denormalised numbers above, */
/* or if arithmetic seems to round in the IEEE style, determined */
/* in routine SLAMC1. A true IEEE machine should have both things */
/* true; however, faulty machines may have one or the other. */
ieee = ieee || lieee1;
/* Compute RMIN by successive division by BETA. We could compute */
/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */
/* this computation. */
lrmin = 1.f;
i__1 = 1 - lemin;
for (i__ = 1; i__ <= i__1; ++i__) {
r__1 = lrmin * rbase;
lrmin = slamc3_(&r__1, &zero);
/* L30: */
}
/* Finally, call SLAMC5 to compute EMAX and RMAX. */
slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
}
*beta = lbeta;
*t = lt;
*rnd = lrnd;
*eps = leps;
*emin = lemin;
*rmin = lrmin;
*emax = lemax;
*rmax = lrmax;
return 0;
/* End of SLAMC2 */
} /* slamc2_ */
/* *********************************************************************** */
/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base)
{
/* System generated locals */
integer i__1;
real r__1;
/* Local variables */
real a;
integer i__;
real b1, b2, c1, c2, d1, d2, one, zero, rbase;
extern doublereal slamc3_(real *, real *);
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMC4 is a service routine for SLAMC2. */
/* Arguments */
/* ========= */
/* EMIN (output) INTEGER */
/* The minimum exponent before (gradual) underflow, computed by */
/* setting A = START and dividing by BASE until the previous A */
/* can not be recovered. */
/* START (input) REAL */
/* The starting point for determining EMIN. */
/* BASE (input) INTEGER */
/* The base of the machine. */
/* ===================================================================== */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Executable Statements .. */
a = *start;
one = 1.f;
rbase = one / *base;
zero = 0.f;
*emin = 1;
r__1 = a * rbase;
b1 = slamc3_(&r__1, &zero);
c1 = a;
c2 = a;
d1 = a;
d2 = a;
/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */
/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */
L10:
if (c1 == a && c2 == a && d1 == a && d2 == a) {
--(*emin);
a = b1;
r__1 = a / *base;
b1 = slamc3_(&r__1, &zero);
r__1 = b1 * *base;
c1 = slamc3_(&r__1, &zero);
d1 = zero;
i__1 = *base;
for (i__ = 1; i__ <= i__1; ++i__) {
d1 += b1;
/* L20: */
}
r__1 = a * rbase;
b2 = slamc3_(&r__1, &zero);
r__1 = b2 / rbase;
c2 = slamc3_(&r__1, &zero);
d2 = zero;
i__1 = *base;
for (i__ = 1; i__ <= i__1; ++i__) {
d2 += b2;
/* L30: */
}
goto L10;
}
/* + END WHILE */
return 0;
/* End of SLAMC4 */
} /* slamc4_ */
/* *********************************************************************** */
/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin,
logical *ieee, integer *emax, real *rmax)
{
/* System generated locals */
integer i__1;
real r__1;
/* Local variables */
integer i__;
real y, z__;
integer try__, lexp;
real oldy;
integer uexp, nbits;
extern doublereal slamc3_(real *, real *);
real recbas;
integer exbits, expsum;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMC5 attempts to compute RMAX, the largest machine floating-point */
/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */
/* approximately to a power of 2. It will fail on machines where this */
/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */
/* EMAX = 28718). It will also fail if the value supplied for EMIN is */
/* too large (i.e. too close to zero), probably with overflow. */
/* Arguments */
/* ========= */
/* BETA (input) INTEGER */
/* The base of floating-point arithmetic. */
/* P (input) INTEGER */
/* The number of base BETA digits in the mantissa of a */
/* floating-point value. */
/* EMIN (input) INTEGER */
/* The minimum exponent before (gradual) underflow. */
/* IEEE (input) LOGICAL */
/* A logical flag specifying whether or not the arithmetic */
/* system is thought to comply with the IEEE standard. */
/* EMAX (output) INTEGER */
/* The largest exponent before overflow */
/* RMAX (output) REAL */
/* The largest machine floating-point number. */
/* ===================================================================== */
/* .. Parameters .. */
/* .. */
/* .. Local Scalars .. */
/* .. */
/* .. External Functions .. */
/* .. */
/* .. Intrinsic Functions .. */
/* .. */
/* .. Executable Statements .. */
/* First compute LEXP and UEXP, two powers of 2 that bound */
/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */
/* approximately to the bound that is closest to abs(EMIN). */
/* (EMAX is the exponent of the required number RMAX). */
lexp = 1;
exbits = 1;
L10:
try__ = lexp << 1;
if (try__ <= -(*emin)) {
lexp = try__;
++exbits;
goto L10;
}
if (lexp == -(*emin)) {
uexp = lexp;
} else {
uexp = try__;
++exbits;
}
/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */
/* than or equal to EMIN. EXBITS is the number of bits needed to */
/* store the exponent. */
if (uexp + *emin > -lexp - *emin) {
expsum = lexp << 1;
} else {
expsum = uexp << 1;
}
/* EXPSUM is the exponent range, approximately equal to */
/* EMAX - EMIN + 1 . */
*emax = expsum + *emin - 1;
nbits = exbits + 1 + *p;
/* NBITS is the total number of bits needed to store a */
/* floating-point number. */
if (nbits % 2 == 1 && *beta == 2) {
/* Either there are an odd number of bits used to store a */
/* floating-point number, which is unlikely, or some bits are */
/* not used in the representation of numbers, which is possible, */
/* (e.g. Cray machines) or the mantissa has an implicit bit, */
/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */
/* most likely. We have to assume the last alternative. */
/* If this is true, then we need to reduce EMAX by one because */
/* there must be some way of representing zero in an implicit-bit */
/* system. On machines like Cray, we are reducing EMAX by one */
/* unnecessarily. */
--(*emax);
}
if (*ieee) {
/* Assume we are on an IEEE machine which reserves one exponent */
/* for infinity and NaN. */
--(*emax);
}
/* Now create RMAX, the largest machine number, which should */
/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */
/* First compute 1.0 - BETA**(-P), being careful that the */
/* result is less than 1.0 . */
recbas = 1.f / *beta;
z__ = *beta - 1.f;
y = 0.f;
i__1 = *p;
for (i__ = 1; i__ <= i__1; ++i__) {
z__ *= recbas;
if (y < 1.f) {
oldy = y;
}
y = slamc3_(&y, &z__);
/* L20: */
}
if (y >= 1.f) {
y = oldy;
}
/* Now multiply by BETA**EMAX to get RMAX. */
i__1 = *emax;
for (i__ = 1; i__ <= i__1; ++i__) {
r__1 = y * *beta;
y = slamc3_(&r__1, &c_b32);
/* L30: */
}
*rmax = y;
return 0;
/* End of SLAMC5 */
} /* slamc5_ */
#endif
#include "clapack.h"
#include <float.h>
#include <stdio.h>
/* *********************************************************************** */
doublereal slamc3_(real *a, real *b)
{
/* System generated locals */
real ret_val;
/* -- LAPACK auxiliary routine (version 3.1) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/* November 2006 */
/* .. Scalar Arguments .. */
/* .. */
/* Purpose */
/* ======= */
/* SLAMC3 is intended to force A and B to be stored prior to doing */
/* the addition of A and B , for use in situations where optimizers */
/* might hold one of these in a register. */
/* Arguments */
/* ========= */
/* A (input) REAL */
/* B (input) REAL */
/* The values A and B. */
/* ===================================================================== */
/* .. Executable Statements .. */
ret_val = *a + *b;
return ret_val;
/* End of SLAMC3 */
} /* slamc3_ */
const unsigned char lapack_toupper_tab[] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203,
204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,
222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
};
/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
#ifndef FLT_DIGITS
#define FLT_DIGITS 24
#endif
const unsigned char lapack_lamch_tab[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 0, 8, 9, 0, 10, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 0, 8, 9,
0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
const doublereal lapack_slamch_tab[] =
{
0, FLT_RADIX, FLT_EPSILON, FLT_MAX_EXP, FLT_MIN_EXP, FLT_DIGITS, FLT_MAX,
FLT_EPSILON*FLT_RADIX, 1, FLT_MIN*(1 + FLT_EPSILON), FLT_MIN
};
/* slartg.f -- translated by f2c (version 20061008).
You must link the resulting object file with libf2c:
on Microsoft Windows system, link with libf2c.lib;
on Linux or Unix systems, link with .../path/to/libf2c.a -lm
or, if you install libf2c.a in a standard place, with -lf2c -lm
-- in that order, at the end of the command line, as in
cc *.o -lf2c -lm
Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
http://www.netlib.org/f2c/libf2c.zip
*/
#include "clapack.h"
......@@ -19,17 +7,13 @@
integer i__1;
real r__1, r__2;
/* Builtin functions */
double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal);
/* Local variables */
integer i__;
real f1, g1, eps, scale;
integer count;
real safmn2, safmx2;
extern doublereal slamch_(char *);
real safmin;
static real safmn2, safmx2;
static real safmin;
static logical FIRST = TRUE_;
/* -- LAPACK auxiliary routine (version 3.2) -- */
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
......@@ -96,15 +80,16 @@
/* .. */
/* .. Executable Statements .. */
/* IF( FIRST ) THEN */
safmin = slamch_("S");
eps = slamch_("E");
r__1 = slamch_("B");
i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f);
safmn2 = pow_ri(&r__1, &i__1);
safmx2 = 1.f / safmn2;
/* FIRST = .FALSE. */
/* END IF */
if(FIRST)
{
safmin = slamch_("S");
eps = slamch_("E");
r__1 = slamch_("B");
i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f);
safmn2 = pow_ri(&r__1, &i__1);
safmx2 = 1.f / safmn2;
FIRST = FALSE_;
}
if (*g == 0.f) {
*cs = 1.f;
*sn = 0.f;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment