Commit bf94db5b authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

fixed 2 bugs in the recently modified Lapack functions

parent e65234b8
...@@ -181,11 +181,14 @@ ...@@ -181,11 +181,14 @@
if( s == 0. ) if( s == 0. )
continue; continue;
s *= alpha; s *= alpha;
for( j = 0; j <= m - 2; j += 2 ) for( j = 0; j <= m - 4; j += 4 )
{ {
doublereal t0 = y[j] + s*a[j]; doublereal t0 = y[j] + s*a[j];
doublereal t1 = y[j+1] + s*a[j+1]; doublereal t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1; y[j] = t0; y[j+1] = t1;
t0 = y[j+2] + s*a[j+2];
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1;
} }
for( ; j < m; j++ ) for( ; j < m; j++ )
...@@ -212,8 +215,8 @@ ...@@ -212,8 +215,8 @@
for( i = 0; i < n; i++, a += lda ) for( i = 0; i < n; i++, a += lda )
{ {
doublereal s = 0; doublereal s = 0;
for( j = 0; j <= m - 2; j += 2 ) for( j = 0; j <= m - 4; j += 4 )
s += x[j]*a[j] + x[j+1]*a[j+1]; s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
for( ; j < m; j++ ) for( ; j < m; j++ )
s += x[j]*a[j]; s += x[j]*a[j];
y[i*incy] += alpha*s; y[i*incy] += alpha*s;
......
...@@ -163,37 +163,65 @@ ...@@ -163,37 +163,65 @@
; ;
else if( trans == 'N' ) else if( trans == 'N' )
{ {
for( i = 0; i < n; i++, a += lda ) if( incy == 1 )
{ {
real s = x[i*incx]; for( i = 0; i < n; i++, a += lda )
if( s == 0.f )
continue;
s *= alpha;
for( j = 0; j <= m - 4; j += 4 )
{ {
real t0 = y[j] + s*a[j]; real s = x[i*incx];
real t1 = y[j+1] + s*a[j+1]; if( s == 0.f )
y[j] = t0; y[j+1] = t1; continue;
t0 = y[j+2] + s*a[j+2]; s *= alpha;
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1; for( j = 0; j <= m - 4; j += 4 )
} {
real t0 = y[j] + s*a[j];
real t1 = y[j+1] + s*a[j+1];
y[j] = t0; y[j+1] = t1;
t0 = y[j+2] + s*a[j+2];
t1 = y[j+3] + s*a[j+3];
y[j+2] = t0; y[j+3] = t1;
}
for( ; j < m; j++ ) for( ; j < m; j++ )
y[j] += s*a[j]; y[j] += s*a[j];
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
real s = x[i*incx];
if( s == 0. )
continue;
s *= alpha;
for( j = 0; j < m; j++ )
y[j*incy] += s*a[j];
}
} }
} }
else else
{ {
for( i = 0; i < n; i++, a += lda ) if( incx == 1 )
{ {
real s = 0; for( i = 0; i < n; i++, a += lda )
for( j = 0; j <= m - 4; j += 4 ) {
s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3]; real s = 0;
for( ; j < m; j++ ) for( j = 0; j <= m - 4; j += 4 )
s += x[j]*a[j]; s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
y[i*incy] += alpha*s; for( ; j < m; j++ )
s += x[j]*a[j];
y[i*incy] += alpha*s;
}
}
else
{
for( i = 0; i < n; i++, a += lda )
{
real s = 0;
for( j = 0; j < m; j++ )
s += x[j*incx]*a[j];
y[i*incy] += alpha*s;
}
} }
} }
......
...@@ -1327,6 +1327,7 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags ) ...@@ -1327,6 +1327,7 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
if(u) u->release(); if(u) u->release();
if(vt) vt->release(); if(vt) vt->release();
u = vt = 0; u = vt = 0;
compute_uv = false;
} }
if( compute_uv ) if( compute_uv )
...@@ -1417,13 +1418,13 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags ) ...@@ -1417,13 +1418,13 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
if( type == CV_32F ) if( type == CV_32F )
{ {
sgesdd_(mode, &n, &m, (float*)_a.data, &lda, (float*)w.data, sgesdd_(mode, &n, &m, (float*)_a.data, &lda, (float*)w.data,
(float*)vt->data, &ldv, (float*)u->data, &ldu, vt ? (float*)vt->data : (float*)&v1, &ldv, u ? (float*)u->data : (float*)&u1, &ldu,
(float*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info ); (float*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
} }
else else
{ {
dgesdd_(mode, &n, &m, (double*)_a.data, &lda, (double*)w.data, dgesdd_(mode, &n, &m, (double*)_a.data, &lda, (double*)w.data,
(double*)vt->data, &ldv, (double*)u->data, &ldu, vt ? (double*)vt->data : &v1, &ldv, u ? (double*)u->data : &u1, &ldu,
(double*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info ); (double*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
} }
CV_Assert(info >= 0); CV_Assert(info >= 0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment