Commit dec0af8d authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

implemented invert(A, B, DECOMP_EIG)

parent abf42e20
...@@ -2050,18 +2050,13 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste ...@@ -2050,18 +2050,13 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
if( code == CMP_GT || code == CMP_LE ) if( code == CMP_GT || code == CMP_LE )
{ {
int m = code == CMP_GT ? 0 : 255; int m = code == CMP_GT ? 0 : 255;
#if CV_SSE2
__m128i m128, c128;
if( USE_SSE2 ){
m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
c128 = _mm_set1_epi8 (0x7f);
}
#endif
for( ; size.height--; src1 += step1, src2 += step2, dst += step ) for( ; size.height--; src1 += step1, src2 += step2, dst += step )
{ {
int x =0; int x =0;
#if CV_SSE2 #if CV_SSE2
if( USE_SSE2 ){ if( USE_SSE2 ){
__m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
__m128i c128 = _mm_set1_epi8 (0x7f);
for( ; x <= size.width - 16; x += 16 ) for( ; x <= size.width - 16; x += 16 )
{ {
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
...@@ -2085,17 +2080,12 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste ...@@ -2085,17 +2080,12 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
else if( code == CMP_EQ || code == CMP_NE ) else if( code == CMP_EQ || code == CMP_NE )
{ {
int m = code == CMP_EQ ? 0 : 255; int m = code == CMP_EQ ? 0 : 255;
#if CV_SSE2
__m128i m128;
if( USE_SSE2 ){
m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
}
#endif
for( ; size.height--; src1 += step1, src2 += step2, dst += step ) for( ; size.height--; src1 += step1, src2 += step2, dst += step )
{ {
int x = 0; int x = 0;
#if CV_SSE2 #if CV_SSE2
if( USE_SSE2 ){ if( USE_SSE2 ){
__m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (0xff);
for( ; x <= size.width - 16; x += 16 ) for( ; x <= size.width - 16; x += 16 )
{ {
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
...@@ -2141,17 +2131,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st ...@@ -2141,17 +2131,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
if( code == CMP_GT || code == CMP_LE ) if( code == CMP_GT || code == CMP_LE )
{ {
int m = code == CMP_GT ? 0 : 255; int m = code == CMP_GT ? 0 : 255;
#if CV_SSE2
__m128i m128;
if( USE_SSE2 ){
m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
}
#endif
for( ; size.height--; src1 += step1, src2 += step2, dst += step ) for( ; size.height--; src1 += step1, src2 += step2, dst += step )
{ {
int x =0; int x =0;
#if CV_SSE2 #if CV_SSE2
if( USE_SSE2){// if( USE_SSE2){//
__m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
for( ; x <= size.width - 16; x += 16 ) for( ; x <= size.width - 16; x += 16 )
{ {
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
...@@ -2184,17 +2169,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st ...@@ -2184,17 +2169,12 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
else if( code == CMP_EQ || code == CMP_NE ) else if( code == CMP_EQ || code == CMP_NE )
{ {
int m = code == CMP_EQ ? 0 : 255; int m = code == CMP_EQ ? 0 : 255;
#if CV_SSE2
__m128i m128;
if( USE_SSE2 ){
m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
}
#endif
for( ; size.height--; src1 += step1, src2 += step2, dst += step ) for( ; size.height--; src1 += step1, src2 += step2, dst += step )
{ {
int x = 0; int x = 0;
#if CV_SSE2 #if CV_SSE2
if( USE_SSE2 ){ if( USE_SSE2 ){
__m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (0xffff);
for( ; x <= size.width - 16; x += 16 ) for( ; x <= size.width - 16; x += 16 )
{ {
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
......
...@@ -615,20 +615,14 @@ cvtScale_<short, short, float>( const short* src, size_t sstep, ...@@ -615,20 +615,14 @@ cvtScale_<short, short, float>( const short* src, size_t sstep,
sstep /= sizeof(src[0]); sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]); dstep /= sizeof(dst[0]);
#if CV_SSE2
__m128 scale128, shift128;
if(USE_SSE2){
scale128 = _mm_set1_ps (scale);
shift128 = _mm_set1_ps (shift);
}
#endif
for( ; size.height--; src += sstep, dst += dstep ) for( ; size.height--; src += sstep, dst += dstep )
{ {
int x = 0; int x = 0;
#if CV_SSE2 #if CV_SSE2
if(USE_SSE2) if(USE_SSE2)
{ {
__m128 scale128 = _mm_set1_ps (scale);
__m128 shift128 = _mm_set1_ps (shift);
for(; x <= size.width - 8; x += 8 ) for(; x <= size.width - 8; x += 8 )
{ {
__m128i r0 = _mm_loadl_epi64((const __m128i*)(src + x)); __m128i r0 = _mm_loadl_epi64((const __m128i*)(src + x));
......
...@@ -951,34 +951,61 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) ...@@ -951,34 +951,61 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
bool result = false; bool result = false;
Mat src = _src.getMat(); Mat src = _src.getMat();
int type = src.type(); int type = src.type();
size_t esz = CV_ELEM_SIZE(type);
CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY || method == DECOMP_SVD ); int m = src.rows, n = src.cols;
_dst.create( src.cols, src.rows, type );
Mat dst = _dst.getMat();
if( method == DECOMP_SVD ) if( method == DECOMP_SVD )
{ {
int n = std::min(src.rows, src.cols); int nm = std::min(m, n);
SVD svd(src);
svd.backSubst(Mat(), dst); AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
uchar* buf = alignPtr((uchar*)_buf, (int)esz);
Mat u(m, nm, type, buf);
Mat w(nm, 1, type, u.data + m*nm*esz);
Mat vt(nm, n, type, w.data + nm*esz);
SVD::compute(src, w, u, vt);
SVD::backSubst(w, u, vt, Mat(), _dst);
return type == CV_32F ? return type == CV_32F ?
(((float*)svd.w.data)[0] >= FLT_EPSILON ? (((float*)w.data)[0] >= FLT_EPSILON ?
((float*)svd.w.data)[n-1]/((float*)svd.w.data)[0] : 0) : ((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
(((double*)svd.w.data)[0] >= DBL_EPSILON ? (((double*)w.data)[0] >= DBL_EPSILON ?
((double*)svd.w.data)[n-1]/((double*)svd.w.data)[0] : 0); ((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
} }
CV_Assert( src.rows == src.cols && (type == CV_32F || type == CV_64F));
if( src.rows <= 3 ) CV_Assert( m == n && (type == CV_32F || type == CV_64F));
if( method == DECOMP_EIG )
{
AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
uchar* buf = alignPtr((uchar*)_buf, (int)esz);
Mat u(n, n, type, buf);
Mat w(n, 1, type, u.data + n*n*esz);
Mat vt(n, n, type, w.data + n*esz);
eigen(src, w, vt);
transpose(vt, u);
SVD::backSubst(w, u, vt, Mat(), _dst);
return type == CV_32F ?
(((float*)w.data)[0] >= FLT_EPSILON ?
((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
(((double*)w.data)[0] >= DBL_EPSILON ?
((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
}
CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
_dst.create( n, n, type );
Mat dst = _dst.getMat();
if( n <= 3 )
{ {
uchar* srcdata = src.data; uchar* srcdata = src.data;
uchar* dstdata = dst.data; uchar* dstdata = dst.data;
size_t srcstep = src.step; size_t srcstep = src.step;
size_t dststep = dst.step; size_t dststep = dst.step;
if( src.rows == 2 ) if( n == 2 )
{ {
if( type == CV_32FC1 ) if( type == CV_32FC1 )
{ {
...@@ -1017,7 +1044,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) ...@@ -1017,7 +1044,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
} }
} }
} }
else if( src.rows == 3 ) else if( n == 3 )
{ {
if( type == CV_32FC1 ) if( type == CV_32FC1 )
{ {
...@@ -1074,7 +1101,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) ...@@ -1074,7 +1101,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
} }
else else
{ {
assert( src.rows == 1 ); assert( n == 1 );
if( type == CV_32FC1 ) if( type == CV_32FC1 )
{ {
...@@ -1100,7 +1127,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) ...@@ -1100,7 +1127,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
return result; return result;
} }
int n = dst.cols, elem_size = CV_ELEM_SIZE(type); int elem_size = CV_ELEM_SIZE(type);
AutoBuffer<uchar> buf(n*n*elem_size); AutoBuffer<uchar> buf(n*n*elem_size);
Mat src1(n, n, type, (uchar*)buf); Mat src1(n, n, type, (uchar*)buf);
src.copyTo(src1); src.copyTo(src1);
...@@ -1621,7 +1648,8 @@ cvInvert( const CvArr* srcarr, CvArr* dstarr, int method ) ...@@ -1621,7 +1648,8 @@ cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows ); CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
method == CV_SVD || method == CV_SVD_SYM ? cv::DECOMP_SVD : cv::DECOMP_LU ); method == CV_SVD ? cv::DECOMP_SVD :
method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
} }
...@@ -1634,7 +1662,8 @@ cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method ) ...@@ -1634,7 +1662,8 @@ cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
bool is_normal = (method & CV_NORMAL) != 0; bool is_normal = (method & CV_NORMAL) != 0;
method &= ~CV_NORMAL; method &= ~CV_NORMAL;
return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
method == CV_SVD || method == CV_SVD_SYM ? cv::DECOMP_SVD : method == CV_SVD ? cv::DECOMP_SVD :
method == CV_SVD_SYM ? cv::DECOMP_EIG :
A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) ); A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment