Commit 008abd28 authored by Maksim Shabunin's avatar Maksim Shabunin

Extracted HAL interfaces for DFT/DCT, added new test

parent 5877debb
...@@ -187,6 +187,25 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, ...@@ -187,6 +187,25 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
struct DftContext
{
void * impl;
bool useReplacement;
DftContext() : impl(0), useReplacement(false) {}
};
CV_EXPORTS void dftInit2D(DftContext & c, int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows = 0);
CV_EXPORTS void dftRun2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step);
CV_EXPORTS void dftFree2D(DftContext & c);
CV_EXPORTS void dftInit(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0);
CV_EXPORTS void dftRun(const DftContext & c, const void * src, void * dst);
CV_EXPORTS void dftFree(DftContext & c);
CV_EXPORTS void dctInit(DftContext & c, int width, int height, int depth, int flags);
CV_EXPORTS void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step);
CV_EXPORTS void dctFree(DftContext & c);
//! @} core_hal //! @} core_hal
//============================================================================= //=============================================================================
......
...@@ -12,6 +12,16 @@ ...@@ -12,6 +12,16 @@
//! @} //! @}
#define CV_HAL_DFT_INVERSE 1
#define CV_HAL_DFT_SCALE 2
#define CV_HAL_DFT_ROWS 4
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
#define CV_HAL_DFT_REAL_OUTPUT 32
#define CV_HAL_DFT_TWO_STAGE 64
#define CV_HAL_DFT_STAGE_COLS 128
#define CV_HAL_DFT_IS_CONTINUOUS 512
#define CV_HAL_DFT_IS_INPLACE 1024
#ifdef __cplusplus #ifdef __cplusplus
#include <cstddef> #include <cstddef>
#else #else
......
...@@ -173,7 +173,7 @@ DFTFactorize( int n, int* factors ) ...@@ -173,7 +173,7 @@ DFTFactorize( int n, int* factors )
} }
static void static void
DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab ) DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
{ {
int digits[34], radix[34]; int digits[34], radix[34];
int n = factors[0], m = 0; int n = factors[0], m = 0;
...@@ -519,19 +519,59 @@ static IppStatus ippsDFTInv_PackToR( const double* src, double* dst, ...@@ -519,19 +519,59 @@ static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
} }
#endif #endif
enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 }; struct OcvDftOptions;
// mixed-radix complex discrete Fourier transform: double-precision version typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst);
template<typename T> static void
DFT( const Complex<T>* src, Complex<T>* dst, int n, struct OcvDftOptions {
int nf, const int* factors, const int* itab, int nf;
const Complex<T>* wave, int tab_size, int *factors;
const void* double scale;
int* itab;
void* wave;
int tab_size;
int n;
bool isInverse;
bool noPermute;
bool isComplex;
bool haveSSE3;
DFTFunc dft_func;
bool useIpp;
#ifdef USE_IPP_DFT
uchar* ipp_spec;
uchar* ipp_work;
#endif
OcvDftOptions()
{
nf = 0;
factors = 0;
scale = 0;
itab = 0;
wave = 0;
tab_size = 0;
n = 0;
isInverse = false;
noPermute = false;
isComplex = false;
useIpp = false;
#ifdef USE_IPP_DFT #ifdef USE_IPP_DFT
spec ipp_spec = 0;
ipp_work = 0;
#endif #endif
, Complex<T>* buf, dft_func = 0;
int flags, double _scale ) haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
}
};
// mixed-radix complex discrete Fourier transform: double-precision version
template<typename T> static void
DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
{ {
static const T sin_120 = (T)0.86602540378443864676372317075294; static const T sin_120 = (T)0.86602540378443864676372317075294;
static const T fft5_2 = (T)0.559016994374947424102293417182819; static const T fft5_2 = (T)0.559016994374947424102293417182819;
...@@ -539,20 +579,23 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -539,20 +579,23 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
static const T fft5_4 = (T)-1.538841768587626701285145288018455; static const T fft5_4 = (T)-1.538841768587626701285145288018455;
static const T fft5_5 = (T)0.363271264002680442947733378740309; static const T fft5_5 = (T)0.363271264002680442947733378740309;
int n0 = n, f_idx, nx; const Complex<T>* wave = (Complex<T>*)c.wave;
int inv = flags & DFT_INVERSE; const int * itab = c.itab;
int dw0 = tab_size, dw;
int n = c.n;
int f_idx, nx;
int inv = c.isInverse;
int dw0 = c.tab_size, dw;
int i, j, k; int i, j, k;
Complex<T> t; Complex<T> t;
T scale = (T)_scale; T scale = (T)c.scale;
int tab_step;
#ifdef USE_IPP_DFT if( c.useIpp )
if( spec )
{ {
#ifdef USE_IPP_DFT
if( !inv ) if( !inv )
{ {
if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0) if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
{ {
CV_IMPL_ADD(CV_IMPL_IPP); CV_IMPL_ADD(CV_IMPL_IPP);
return; return;
...@@ -560,22 +603,22 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -560,22 +603,22 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
} }
else else
{ {
if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0) if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
{ {
CV_IMPL_ADD(CV_IMPL_IPP); CV_IMPL_ADD(CV_IMPL_IPP);
return; return;
} }
} }
setIppErrorStatus(); setIppErrorStatus();
}
#endif #endif
}
tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n; int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n;
// 0. shuffle data // 0. shuffle data
if( dst != src ) if( dst != src )
{ {
assert( (flags & DFT_NO_PERMUTE) == 0 ); assert( !c.noPermute );
if( !inv ) if( !inv )
{ {
for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step ) for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
...@@ -609,10 +652,10 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -609,10 +652,10 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
} }
else else
{ {
if( (flags & DFT_NO_PERMUTE) == 0 ) if( !c.noPermute )
{ {
CV_Assert( factors[0] == factors[nf-1] ); CV_Assert( c.factors[0] == c.factors[c.nf-1] );
if( nf == 1 ) if( c.nf == 1 )
{ {
if( (n & 3) == 0 ) if( (n & 3) == 0 )
{ {
...@@ -662,22 +705,22 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -662,22 +705,22 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
n = 1; n = 1;
// 1. power-2 transforms // 1. power-2 transforms
if( (factors[0] & 1) == 0 ) if( (c.factors[0] & 1) == 0 )
{ {
if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3)) if( c.factors[0] >= 4 && c.haveSSE3)
{ {
DFT_VecR4<T> vr4; DFT_VecR4<T> vr4;
n = vr4(dst, factors[0], n0, dw0, wave); n = vr4(dst, c.factors[0], c.n, dw0, wave);
} }
// radix-4 transform // radix-4 transform
for( ; n*4 <= factors[0]; ) for( ; n*4 <= c.factors[0]; )
{ {
nx = n; nx = n;
n *= 4; n *= 4;
dw0 /= 4; dw0 /= 4;
for( i = 0; i < n0; i += n ) for( i = 0; i < c.n; i += n )
{ {
Complex<T> *v0, *v1; Complex<T> *v0, *v1;
T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4; T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
...@@ -729,14 +772,14 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -729,14 +772,14 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
} }
} }
for( ; n < factors[0]; ) for( ; n < c.factors[0]; )
{ {
// do the remaining radix-2 transform // do the remaining radix-2 transform
nx = n; nx = n;
n *= 2; n *= 2;
dw0 /= 2; dw0 /= 2;
for( i = 0; i < n0; i += n ) for( i = 0; i < c.n; i += n )
{ {
Complex<T>* v = dst + i; Complex<T>* v = dst + i;
T r0 = v[0].re + v[nx].re; T r0 = v[0].re + v[nx].re;
...@@ -761,9 +804,9 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -761,9 +804,9 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
} }
// 2. all the other transforms // 2. all the other transforms
for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ ) for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ )
{ {
int factor = factors[f_idx]; int factor = c.factors[f_idx];
nx = n; nx = n;
n *= factor; n *= factor;
dw0 /= factor; dw0 /= factor;
...@@ -771,7 +814,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -771,7 +814,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
if( factor == 3 ) if( factor == 3 )
{ {
// radix-3 // radix-3
for( i = 0; i < n0; i += n ) for( i = 0; i < c.n; i += n )
{ {
Complex<T>* v = dst + i; Complex<T>* v = dst + i;
...@@ -807,7 +850,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -807,7 +850,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
else if( factor == 5 ) else if( factor == 5 )
{ {
// radix-5 // radix-5
for( i = 0; i < n0; i += n ) for( i = 0; i < c.n; i += n )
{ {
for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
{ {
...@@ -863,11 +906,12 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -863,11 +906,12 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
{ {
// radix-"factor" - an odd number // radix-"factor" - an odd number
int p, q, factor2 = (factor - 1)/2; int p, q, factor2 = (factor - 1)/2;
int d, dd, dw_f = tab_size/factor; int d, dd, dw_f = c.tab_size/factor;
AutoBuffer<Complex<T> > buf(factor2 * 2);
Complex<T>* a = buf; Complex<T>* a = buf;
Complex<T>* b = buf + factor2; Complex<T>* b = a + factor2;
for( i = 0; i < n0; i += n ) for( i = 0; i < c.n; i += n )
{ {
for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
{ {
...@@ -931,7 +975,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -931,7 +975,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
s1.im += r1 - i1; s0.im += r1 + i1; s1.im += r1 - i1; s0.im += r1 + i1;
d += dd; d += dd;
d -= -(d >= tab_size) & tab_size; d -= -(d >= c.tab_size) & c.tab_size;
} }
v[k] = s0; v[k] = s0;
...@@ -948,7 +992,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -948,7 +992,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
if( inv ) if( inv )
im_scale = -im_scale; im_scale = -im_scale;
for( i = 0; i < n0; i++ ) for( i = 0; i < c.n; i++ )
{ {
T t0 = dst[i].re*re_scale; T t0 = dst[i].re*re_scale;
T t1 = dst[i].im*im_scale; T t1 = dst[i].im*im_scale;
...@@ -958,7 +1002,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -958,7 +1002,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
} }
else if( inv ) else if( inv )
{ {
for( i = 0; i <= n0 - 2; i += 2 ) for( i = 0; i <= c.n - 2; i += 2 )
{ {
T t0 = -dst[i].im; T t0 = -dst[i].im;
T t1 = -dst[i+1].im; T t1 = -dst[i+1].im;
...@@ -966,8 +1010,8 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -966,8 +1010,8 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
dst[i+1].im = t1; dst[i+1].im = t1;
} }
if( i < n0 ) if( i < c.n )
dst[n0-1].im = -dst[n0-1].im; dst[c.n-1].im = -dst[c.n-1].im;
} }
} }
...@@ -977,23 +1021,18 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n, ...@@ -977,23 +1021,18 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ... re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */ re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
template<typename T> static void template<typename T> static void
RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, RealDFT(const OcvDftOptions & c, const T* src, T* dst)
const Complex<T>* wave, int tab_size, const void*
#ifdef USE_IPP_DFT
spec
#endif
,
Complex<T>* buf, int flags, double _scale )
{ {
int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; int n = c.n;
T scale = (T)_scale; int complex_output = c.isComplex;
int j, n2 = n >> 1; T scale = (T)c.scale;
int j;
dst += complex_output; dst += complex_output;
#ifdef USE_IPP_DFT if( c.useIpp )
if( spec )
{ {
if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0) #ifdef USE_IPP_DFT
if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0)
{ {
if( complex_output ) if( complex_output )
{ {
...@@ -1006,9 +1045,9 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1006,9 +1045,9 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
return; return;
} }
setIppErrorStatus(); setIppErrorStatus();
}
#endif #endif
assert( tab_size == n ); }
assert( c.tab_size == n );
if( n == 1 ) if( n == 1 )
{ {
...@@ -1028,15 +1067,19 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1028,15 +1067,19 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
_dst[0].im = 0; _dst[0].im = 0;
for( j = 1; j < n; j += 2 ) for( j = 1; j < n; j += 2 )
{ {
T t0 = src[itab[j]]*scale; T t0 = src[c.itab[j]]*scale;
T t1 = src[itab[j+1]]*scale; T t1 = src[c.itab[j+1]]*scale;
_dst[j].re = t0; _dst[j].re = t0;
_dst[j].im = 0; _dst[j].im = 0;
_dst[j+1].re = t1; _dst[j+1].re = t1;
_dst[j+1].im = 0; _dst[j+1].im = 0;
} }
DFT( _dst, _dst, n, nf, factors, itab, wave, OcvDftOptions sub_c = c;
tab_size, 0, buf, DFT_NO_PERMUTE, 1 ); sub_c.isComplex = false;
sub_c.isInverse = false;
sub_c.noPermute = true;
sub_c.scale = 1.;
DFT(sub_c, _dst, _dst);
if( !complex_output ) if( !complex_output )
dst[1] = dst[0]; dst[1] = dst[0];
} }
...@@ -1045,12 +1088,22 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1045,12 +1088,22 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
T t0, t; T t0, t;
T h1_re, h1_im, h2_re, h2_im; T h1_re, h1_im, h2_re, h2_im;
T scale2 = scale*(T)0.5; T scale2 = scale*(T)0.5;
factors[0] >>= 1; int n2 = n >> 1;
c.factors[0] >>= 1;
OcvDftOptions sub_c = c;
sub_c.factors += (c.factors[0] == 1);
sub_c.nf -= (c.factors[0] == 1);
sub_c.isComplex = false;
sub_c.isInverse = false;
sub_c.noPermute = false;
sub_c.scale = 1.;
sub_c.n = n2;
DFT( (Complex<T>*)src, (Complex<T>*)dst, n2, nf - (factors[0] == 1), DFT(sub_c, (Complex<T>*)src, (Complex<T>*)dst);
factors + (factors[0] == 1),
itab, wave, tab_size, 0, buf, 0, 1 ); c.factors[0] <<= 1;
factors[0] <<= 1;
t = dst[0] - dst[1]; t = dst[0] - dst[1];
dst[0] = (dst[0] + dst[1])*scale; dst[0] = (dst[0] + dst[1])*scale;
...@@ -1060,6 +1113,8 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1060,6 +1113,8 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
t = dst[n-1]; t = dst[n-1];
dst[n-1] = dst[1]; dst[n-1] = dst[1];
const Complex<T> *wave = (const Complex<T>*)c.wave;
for( j = 2, wave++; j < n2; j += 2, wave++ ) for( j = 2, wave++; j < n2; j += 2, wave++ )
{ {
/* calc odd */ /* calc odd */
...@@ -1103,22 +1158,16 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1103,22 +1158,16 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */ re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
template<typename T> static void template<typename T> static void
CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
const Complex<T>* wave, int tab_size,
const void*
#ifdef USE_IPP_DFT
spec
#endif
, Complex<T>* buf,
int flags, double _scale )
{ {
int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; int n = c.n;
int j, k, n2 = (n+1) >> 1; int complex_input = c.isComplex;
T scale = (T)_scale; int j, k;
T scale = (T)c.scale;
T save_s1 = 0.; T save_s1 = 0.;
T t0, t1, t2, t3, t; T t0, t1, t2, t3, t;
assert( tab_size == n ); assert( c.tab_size == n );
if( complex_input ) if( complex_input )
{ {
...@@ -1127,10 +1176,10 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1127,10 +1176,10 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
((T*)src)[1] = src[0]; ((T*)src)[1] = src[0];
src++; src++;
} }
#ifdef USE_IPP_DFT if( c.useIpp )
if( spec )
{ {
if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0) #ifdef USE_IPP_DFT
if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0)
{ {
if( complex_input ) if( complex_input )
((T*)src)[0] = (T)save_s1; ((T*)src)[0] = (T)save_s1;
...@@ -1139,8 +1188,8 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1139,8 +1188,8 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
} }
setIppErrorStatus(); setIppErrorStatus();
}
#endif #endif
}
if( n == 1 ) if( n == 1 )
{ {
dst[0] = (T)(src[0]*scale); dst[0] = (T)(src[0]*scale);
...@@ -1158,16 +1207,25 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1158,16 +1207,25 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
_dst[0].re = src[0]; _dst[0].re = src[0];
_dst[0].im = 0; _dst[0].im = 0;
int n2 = (n+1) >> 1;
for( j = 1; j < n2; j++ ) for( j = 1; j < n2; j++ )
{ {
int k0 = itab[j], k1 = itab[n-j]; int k0 = c.itab[j], k1 = c.itab[n-j];
t0 = _src[j].re; t1 = _src[j].im; t0 = _src[j].re; t1 = _src[j].im;
_dst[k0].re = t0; _dst[k0].im = -t1; _dst[k0].re = t0; _dst[k0].im = -t1;
_dst[k1].re = t0; _dst[k1].im = t1; _dst[k1].re = t0; _dst[k1].im = t1;
} }
DFT( _dst, _dst, n, nf, factors, itab, wave, OcvDftOptions sub_c = c;
tab_size, 0, buf, DFT_NO_PERMUTE, 1. ); sub_c.isComplex = false;
sub_c.isInverse = false;
sub_c.noPermute = true;
sub_c.scale = 1.;
sub_c.n = n;
DFT(sub_c, _dst, _dst);
dst[0] *= scale; dst[0] *= scale;
for( j = 1; j < n; j += 2 ) for( j = 1; j < n; j += 2 )
{ {
...@@ -1180,7 +1238,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1180,7 +1238,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
else else
{ {
int inplace = src == dst; int inplace = src == dst;
const Complex<T>* w = wave; const Complex<T>* w = (const Complex<T>*)c.wave;
t = src[1]; t = src[1];
t0 = (src[0] + src[n-1]); t0 = (src[0] + src[n-1]);
...@@ -1188,6 +1246,8 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1188,6 +1246,8 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
dst[0] = t0; dst[0] = t0;
dst[1] = t1; dst[1] = t1;
int n2 = (n+1) >> 1;
for( j = 2, w++; j < n2; j += 2, w++ ) for( j = 2, w++; j < n2; j += 2, w++ )
{ {
T h1_re, h1_im, h2_re, h2_im; T h1_re, h1_im, h2_re, h2_im;
...@@ -1218,10 +1278,10 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1218,10 +1278,10 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
else else
{ {
int j2 = j >> 1; int j2 = j >> 1;
k = itab[j2]; k = c.itab[j2];
dst[k] = t0; dst[k] = t0;
dst[k+1] = t1; dst[k+1] = t1;
k = itab[n2-j2]; k = c.itab[n2-j2];
dst[k] = t2; dst[k] = t2;
dst[k+1]= t3; dst[k+1]= t3;
} }
...@@ -1239,19 +1299,26 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ...@@ -1239,19 +1299,26 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
} }
else else
{ {
k = itab[n2]; k = c.itab[n2];
dst[k*2] = t0; dst[k*2] = t0;
dst[k*2+1] = t1; dst[k*2+1] = t1;
} }
} }
factors[0] >>= 1; c.factors[0] >>= 1;
DFT( (Complex<T>*)dst, (Complex<T>*)dst, n2,
nf - (factors[0] == 1), OcvDftOptions sub_c = c;
factors + (factors[0] == 1), itab, sub_c.factors += (c.factors[0] == 1);
wave, tab_size, 0, buf, sub_c.nf -= (c.factors[0] == 1);
inplace ? 0 : DFT_NO_PERMUTE, 1. ); sub_c.isComplex = false;
factors[0] <<= 1; sub_c.isInverse = false;
sub_c.noPermute = !inplace;
sub_c.scale = 1.;
sub_c.n = n2;
DFT(sub_c, (Complex<T>*)dst, (Complex<T>*)dst);
c.factors[0] <<= 1;
for( j = 0; j < n; j += 2 ) for( j = 0; j < n; j += 2 )
{ {
...@@ -1436,57 +1503,35 @@ ExpandCCS( uchar* _ptr, int n, int elem_size ) ...@@ -1436,57 +1503,35 @@ ExpandCCS( uchar* _ptr, int n, int elem_size )
} }
} }
static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
typedef void (*DFTFunc)(
const void* src, void* dst, int n, int nf, int* factors,
const int* itab, const void* wave, int tab_size,
const void* spec, void* buf, int inv, double scale );
static void DFT_32f( const Complexf* src, Complexf* dst, int n,
int nf, const int* factors, const int* itab,
const Complexf* wave, int tab_size,
const void* spec, Complexf* buf,
int flags, double scale )
{ {
DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); DFT(c, src, dst);
} }
static void DFT_64f( const Complexd* src, Complexd* dst, int n, static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
int nf, const int* factors, const int* itab,
const Complexd* wave, int tab_size,
const void* spec, Complexd* buf,
int flags, double scale )
{ {
DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); DFT(c, src, dst);
} }
static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors, static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
const int* itab, const Complexf* wave, int tab_size, const void* spec,
Complexf* buf, int flags, double scale )
{ {
RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); RealDFT(c, src, dst);
} }
static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors, static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
const int* itab, const Complexd* wave, int tab_size, const void* spec,
Complexd* buf, int flags, double scale )
{ {
RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); RealDFT(c, src, dst);
} }
static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors, static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
const int* itab, const Complexf* wave, int tab_size, const void* spec,
Complexf* buf, int flags, double scale )
{ {
CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); CCSIDFT(c, src, dst);
} }
static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors, static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
const int* itab, const Complexd* wave, int tab_size, const void* spec,
Complexd* buf, int flags, double scale )
{ {
CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); CCSIDFT(c, src, dst);
} }
} }
...@@ -1508,8 +1553,11 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody ...@@ -1508,8 +1553,11 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
{ {
public: public:
Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) : Dft_C_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width,
ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) const Dft& _ippidft, int _norm_flag, bool *_ok) :
ParallelLoopBody(),
src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
{ {
*ok = true; *ok = true;
} }
...@@ -1523,7 +1571,7 @@ public: ...@@ -1523,7 +1571,7 @@ public:
int sizeSpec=0; int sizeSpec=0;
int sizeInit=0; int sizeInit=0;
IppiSize srcRoiSize = {src.cols, 1}; IppiSize srcRoiSize = {width, 1};
status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 ) if ( status < 0 )
...@@ -1555,7 +1603,8 @@ public: ...@@ -1555,7 +1603,8 @@ public:
} }
for( int i = range.start; i < range.end; ++i) for( int i = range.start; i < range.end; ++i)
if(!ippidft(src.ptr<Ipp32fc>(i), (int)src.step,dst.ptr<Ipp32fc>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer)) if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step,
pDFTSpec, (Ipp8u*)pBuffer))
{ {
*ok = false; *ok = false;
} }
...@@ -1568,8 +1617,11 @@ public: ...@@ -1568,8 +1617,11 @@ public:
} }
private: private:
const Mat& src; uchar * src;
Mat& dst; int src_step;
uchar * dst;
int dst_step;
int width;
const Dft& ippidft; const Dft& ippidft;
int norm_flag; int norm_flag;
bool *ok; bool *ok;
...@@ -1582,8 +1634,11 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody ...@@ -1582,8 +1634,11 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
{ {
public: public:
Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) : Dft_R_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width,
ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) const Dft& _ippidft, int _norm_flag, bool *_ok) :
ParallelLoopBody(),
src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
{ {
*ok = true; *ok = true;
} }
...@@ -1597,7 +1652,7 @@ public: ...@@ -1597,7 +1652,7 @@ public:
int sizeSpec=0; int sizeSpec=0;
int sizeInit=0; int sizeInit=0;
IppiSize srcRoiSize = {src.cols, 1}; IppiSize srcRoiSize = {width, 1};
status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 ) if ( status < 0 )
...@@ -1629,7 +1684,8 @@ public: ...@@ -1629,7 +1684,8 @@ public:
} }
for( int i = range.start; i < range.end; ++i) for( int i = range.start; i < range.end; ++i)
if(!ippidft(src.ptr<float>(i), (int)src.step,dst.ptr<float>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer)) if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step,
pDFTSpec, (Ipp8u*)pBuffer))
{ {
*ok = false; *ok = false;
} }
...@@ -1642,8 +1698,11 @@ public: ...@@ -1642,8 +1698,11 @@ public:
} }
private: private:
const Mat& src; uchar * src;
Mat& dst; int src_step;
uchar * dst;
int dst_step;
int width;
const Dft& ippidft; const Dft& ippidft;
int norm_flag; int norm_flag;
bool *ok; bool *ok;
...@@ -1652,18 +1711,18 @@ private: ...@@ -1652,18 +1711,18 @@ private:
}; };
template <typename Dft> template <typename Dft>
bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag) bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag)
{ {
bool ok; bool ok;
parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) ); parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
return ok; return ok;
} }
template <typename Dft> template <typename Dft>
bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag) bool Dft_R_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag)
{ {
bool ok; bool ok;
parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) ); parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
return ok; return ok;
} }
...@@ -1691,7 +1750,7 @@ private: ...@@ -1691,7 +1750,7 @@ private:
ippiDFT_R_Func func; ippiDFT_R_Func func;
}; };
static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag)
{ {
IppStatus status; IppStatus status;
Ipp8u* pBuffer = 0; Ipp8u* pBuffer = 0;
...@@ -1700,7 +1759,7 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) ...@@ -1700,7 +1759,7 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
int sizeSpec=0; int sizeSpec=0;
int sizeInit=0; int sizeInit=0;
IppiSize srcRoiSize = {src.cols, src.rows}; IppiSize srcRoiSize = {width, height};
status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 ) if ( status < 0 )
...@@ -1728,9 +1787,9 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) ...@@ -1728,9 +1787,9 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
} }
if (!inv) if (!inv)
status = ippiDFTFwd_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer ); status = ippiDFTFwd_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer );
else else
status = ippiDFTInv_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer ); status = ippiDFTInv_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer );
if ( sizeBuffer > 0 ) if ( sizeBuffer > 0 )
ippFree( pBuffer ); ippFree( pBuffer );
...@@ -1745,7 +1804,7 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) ...@@ -1745,7 +1804,7 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
return false; return false;
} }
static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) static bool ippi_DFT_R_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag)
{ {
IppStatus status; IppStatus status;
Ipp8u* pBuffer = 0; Ipp8u* pBuffer = 0;
...@@ -1754,7 +1813,7 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) ...@@ -1754,7 +1813,7 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
int sizeSpec=0; int sizeSpec=0;
int sizeInit=0; int sizeInit=0;
IppiSize srcRoiSize = {src.cols, src.rows}; IppiSize srcRoiSize = {width, height};
status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
if ( status < 0 ) if ( status < 0 )
...@@ -1782,9 +1841,9 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) ...@@ -1782,9 +1841,9 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
} }
if (!inv) if (!inv)
status = ippiDFTFwd_RToPack_32f_C1R( src.ptr<float>(), (int)(src.step), dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer ); status = ippiDFTFwd_RToPack_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer );
else else
status = ippiDFTInv_PackToR_32f_C1R( src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer ); status = ippiDFTInv_PackToR_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer );
if ( sizeBuffer > 0 ) if ( sizeBuffer > 0 )
ippFree( pBuffer ); ippFree( pBuffer );
...@@ -2426,18 +2485,16 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) ...@@ -2426,18 +2485,16 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
namespace cv namespace cv
{ {
static void complementComplexOutput(Mat& dst, int len, int dft_dims)
template <typename T>
static void complementComplex(T * ptr, int step, int n, int len, int dft_dims)
{ {
int i, n = dst.cols; T* p0 = (T*)ptr;
size_t elem_size = dst.elemSize1(); size_t dstep = step/sizeof(p0[0]);
if( elem_size == sizeof(float) ) for(int i = 0; i < len; i++ )
{
float* p0 = dst.ptr<float>();
size_t dstep = dst.step/sizeof(p0[0]);
for( i = 0; i < len; i++ )
{ {
float* p = p0 + dstep*i; T* p = p0 + dstep*i;
float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
for( int j = 1; j < (n+1)/2; j++ ) for( int j = 1; j < (n+1)/2; j++ )
{ {
...@@ -2445,92 +2502,307 @@ static void complementComplexOutput(Mat& dst, int len, int dft_dims) ...@@ -2445,92 +2502,307 @@ static void complementComplexOutput(Mat& dst, int len, int dft_dims)
p[(n-j)*2+1] = -q[j*2+1]; p[(n-j)*2+1] = -q[j*2+1];
} }
} }
} }
static void complementComplexOutput(int depth, uchar * ptr, int step, int count, int len, int dft_dims)
{
if( depth == CV_32F )
complementComplex((float*)ptr, step, count, len, dft_dims);
else else
complementComplex((double*)ptr, step, count, len, dft_dims);
}
enum DftMode {
InvalidDft = 0,
FwdRealToCCS,
FwdRealToComplex,
FwdComplexToComplex,
InvCCSToReal,
InvComplexToReal,
InvComplexToComplex,
};
enum DftDims {
InvalidDim = 0,
OneDim,
OneDimColWise,
TwoDims
};
inline const char * modeName(DftMode m)
{
switch (m)
{ {
double* p0 = dst.ptr<double>(); case InvalidDft: return "InvalidDft";
size_t dstep = dst.step/sizeof(p0[0]); case FwdRealToCCS: return "FwdRealToCCS";
for( i = 0; i < len; i++ ) case FwdRealToComplex: return "FwdRealToComplex";
case FwdComplexToComplex: return "FwdComplexToComplex";
case InvCCSToReal: return "InvCCSToReal";
case InvComplexToReal: return "InvComplexToReal";
case InvComplexToComplex: return "InvComplexToComplex";
}
return 0;
}
inline const char * dimsName(DftDims d)
{
switch (d)
{ {
double* p = p0 + dstep*i; case InvalidDim: return "InvalidDim";
double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); case OneDim: return "OneDim";
case OneDimColWise: return "OneDimColWise";
case TwoDims: return "TwoDims";
};
return 0;
}
for( int j = 1; j < (n+1)/2; j++ ) template <typename T>
inline bool isInv(T mode)
{
switch ((DftMode)mode)
{ {
p[(n-j)*2] = q[j*2]; case InvCCSToReal:
p[(n-j)*2+1] = -q[j*2+1]; case InvComplexToReal:
case InvComplexToComplex: return true;
default: return false;
} }
}
inline DftMode determineMode(bool inv, int cn1, int cn2)
{
if (!inv)
{
if (cn1 == 1 && cn2 == 1)
return FwdRealToCCS;
else if (cn1 == 1 && cn2 == 2)
return FwdRealToComplex;
else if (cn1 == 2 && cn2 == 2)
return FwdComplexToComplex;
} }
else
{
if (cn1 == 1 && cn2 == 1)
return InvCCSToReal;
else if (cn1 == 2 && cn2 == 1)
return InvComplexToReal;
else if (cn1 == 2 && cn2 == 2)
return InvComplexToComplex;
} }
return InvalidDft;
} }
inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
{
// printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
if (isRowWise)
return OneDim;
if (cols == 1 && rows > 1) // one-column-shaped input
{
if (isContinuous)
return OneDim;
else
return OneDimColWise;
}
if (rows == 1)
return OneDim;
if (cols > 1 && rows > 1)
return TwoDims;
return InvalidDim;
} }
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) class OcvDftImpl
{ {
#ifdef HAVE_CLAMDFFT protected:
CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU && hal::DftContext contextA;
_dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0, hal::DftContext contextB;
ocl_dft_amdfft(_src0, _dst, flags)) bool needBufferA;
#endif bool needBufferB;
bool inv;
int width;
int height;
DftMode mode;
int elem_size;
int complex_elem_size;
int depth;
bool real_transform;
int nonzero_rows;
bool isRowTransform;
bool isScaled;
std::vector<int> stages;
bool useIpp;
int src_channels;
int dst_channels;
AutoBuffer<uchar> tmp_bufA;
AutoBuffer<uchar> tmp_bufB;
AutoBuffer<uchar> buf0;
AutoBuffer<uchar> buf1;
#ifdef HAVE_OPENCL public:
CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2, OcvDftImpl()
ocl_dft(_src0, _dst, flags, nonzero_rows)) {
#endif needBufferA = false;
needBufferB = false;
inv = false;
width = 0;
height = 0;
mode = InvalidDft;
elem_size = 0;
complex_elem_size = 0;
depth = 0;
real_transform = false;
nonzero_rows = 0;
isRowTransform = false;
isScaled = false;
useIpp = false;
src_channels = 0;
dst_channels = 0;
}
void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
{
bool isComplex = _src_channels != _dst_channels;
nonzero_rows = _nonzero_rows;
width = _width;
height = _height;
depth = _depth;
src_channels = _src_channels;
dst_channels = _dst_channels;
bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0;
bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
mode = determineMode(isInverse, _src_channels, _dst_channels);
inv = isInverse;
isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
isScaled = (flags & CV_HAL_DFT_SCALE) != 0;
needBufferA = false;
needBufferB = false;
real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
complex_elem_size = elem_size * 2;
if( !real_transform )
elem_size = complex_elem_size;
static DFTFunc dft_tbl[6] = #if defined USE_IPP_DFT
CV_IPP_CHECK()
{ {
(DFTFunc)DFT_32f, if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6)))
(DFTFunc)RealDFT_32f, {
(DFTFunc)CCSIDFT_32f, if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal)
(DFTFunc)DFT_64f, {
(DFTFunc)RealDFT_64f, useIpp = true;
(DFTFunc)CCSIDFT_64f return;
}; }
AutoBuffer<uchar> buf; }
Mat src0 = _src0.getMat(), src = src0; }
int prev_len = 0, stage = 0;
bool inv = (flags & DFT_INVERSE) != 0;
int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
int type = src.type(), depth = src.depth();
int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
int factors[34];
bool inplace_transform = false;
#ifdef USE_IPP_DFT
AutoBuffer<uchar> ippbuf;
int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
#endif #endif
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); DftDims dims = determineDims(height, width, isRowTransform, isContinuous);
if (dims == TwoDims)
{
stages.resize(2);
if (mode == InvCCSToReal || mode == InvComplexToReal)
{
stages[0] = 1;
stages[1] = 0;
}
else
{
stages[0] = 0;
stages[1] = 1;
}
}
else
{
stages.resize(1);
if (dims == OneDimColWise)
stages[0] = 1;
else
stages[0] = 0;
}
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
_dst.create( src.size(), CV_MAKETYPE(depth, 2) ); {
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) if (stageIndex == 1)
_dst.create( src.size(), depth ); {
isInplace = true;
isComplex = false;
}
int stage = stages[stageIndex];
bool isLastStage = (stageIndex + 1 == stages.size());
int len, count;
int f = 0;
if (inv)
f |= CV_HAL_DFT_INVERSE;
if (isScaled)
f |= CV_HAL_DFT_SCALE;
if (isRowTransform)
f |= CV_HAL_DFT_ROWS;
if (isComplex)
f |= CV_HAL_DFT_COMPLEX_OUTPUT;
if (real_transform)
f |= CV_HAL_DFT_REAL_OUTPUT;
if (!isLastStage)
f |= CV_HAL_DFT_TWO_STAGE;
if( stage == 0 ) // row-wise transform
{
if (width == 1 && !isRowTransform )
{
len = height;
count = width;
}
else else
_dst.create( src.size(), type ); {
len = width;
count = height;
}
needBufferA = isInplace;
hal::dftInit(contextA, len, count, depth, f, &needBufferA);
if (needBufferA)
tmp_bufA.allocate(len * complex_elem_size);
}
else
{
len = height;
count = width;
f |= CV_HAL_DFT_STAGE_COLS;
needBufferB = isInplace;
hal::dftInit(contextB, len, count, depth, f, &needBufferB);
if (needBufferB)
tmp_bufB.allocate(len * complex_elem_size);
Mat dst = _dst.getMat(); buf0.allocate(len * complex_elem_size);
buf1.allocate(len * complex_elem_size);
}
}
}
#if defined USE_IPP_DFT void run(uchar * src, int src_step, uchar * dst, int dst_step)
CV_IPP_CHECK()
{ {
if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0) #if defined USE_IPP_DFT
if (useIpp)
{ {
if ((flags & DFT_ROWS) == 0) int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1;
if (!isRowTransform)
{ {
if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT))) if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
{ {
if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag)) if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
{ {
CV_IMPL_ADD(CV_IMPL_IPP); CV_IMPL_ADD(CV_IMPL_IPP);
return; return;
} }
setIppErrorStatus(); setIppErrorStatus();
} }
if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT))) else if (mode == FwdRealToCCS || mode == InvCCSToReal)
{ {
if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag)) if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
{ {
CV_IMPL_ADD(CV_IMPL_IPP); CV_IMPL_ADD(CV_IMPL_IPP);
return; return;
...@@ -2540,20 +2812,20 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -2540,20 +2812,20 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
} }
else else
{ {
if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT))) if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
{ {
ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R; ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag)) if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
{ {
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
return; return;
} }
setIppErrorStatus(); setIppErrorStatus();
} }
if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT))) else if (mode == FwdRealToCCS || mode == InvCCSToReal)
{ {
ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R; ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag)) if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
{ {
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
return; return;
...@@ -2561,222 +2833,114 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -2561,222 +2833,114 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
setIppErrorStatus(); setIppErrorStatus();
} }
} }
} return;
} }
#endif #endif
if( !real_transform ) for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
elem_size = complex_elem_size;
if( src.cols == 1 && nonzero_rows > 0 )
CV_Error( CV_StsNotImplemented,
"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
// determine, which transform to do first - row-wise
// (stage 0) or column-wise (stage 1) transform
if( !(flags & DFT_ROWS) && src.rows > 1 &&
((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
(src.cols > 1 && inv && real_transform)) )
stage = 1;
for(;;)
{
double scale = 1;
uchar* wave = 0;
int* itab = 0;
uchar* ptr;
int i, len, count, sz = 0;
int use_buf = 0, odd_real = 0;
DFTFunc dft_func;
if( stage == 0 ) // row-wise transform
{
len = !inv ? src.cols : dst.cols;
count = src.rows;
if( len == 1 && !(flags & DFT_ROWS) )
{ {
len = !inv ? src.rows : dst.rows; int stage_src_channels = src_channels;
count = 1; int stage_dst_channels = dst_channels;
}
odd_real = real_transform && (len & 1); if (stageIndex == 1)
}
else
{ {
len = dst.rows; src = dst;
count = !inv ? src0.cols : dst.cols; src_step = dst_step;
sz = 2*len*complex_elem_size; stage_src_channels = stage_dst_channels;
} }
void *spec = 0; int stage = stages[stageIndex];
#ifdef USE_IPP_DFT bool isLastStage = (stageIndex + 1 == stages.size());
if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available bool isComplex = stage_src_channels != stage_dst_channels;
{
int specsize=0, initsize=0, worksize=0;
IppDFTGetSizeFunc getSizeFunc = 0;
IppDFTInitFunc initFunc = 0;
if( real_transform && stage == 0 ) if( stage == 0 )
{ rowDft(src, src_step, dst, dst_step, isComplex, isLastStage);
if( depth == CV_32F )
{
getSizeFunc = ippsDFTGetSize_R_32f;
initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
}
else
{
getSizeFunc = ippsDFTGetSize_R_64f;
initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
}
}
else
{
if( depth == CV_32F )
{
getSizeFunc = ippsDFTGetSize_C_32fc;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
}
else else
{ colDft(src, src_step, dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
getSizeFunc = ippsDFTGetSize_C_64fc;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
} }
} }
if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
void free()
{ {
ippbuf.allocate(specsize + initsize + 64); if (useIpp)
spec = alignPtr(&ippbuf[0], 32); return;
uchar* initbuf = alignPtr((uchar*)spec + specsize, 32); hal::dftFree(contextA);
if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 ) hal::dftFree(contextB);
spec = 0;
sz += worksize;
}
else
setIppErrorStatus();
} }
else
#endif
{
if( len != prev_len )
nf = DFTFactorize( len, factors );
inplace_transform = factors[0] == factors[nf-1]; protected:
sz += len*(complex_elem_size + sizeof(int));
i = nf > 1 && (factors[0] & 1) == 0;
if( (factors[i] & 1) != 0 && factors[i] > 5 )
sz += (factors[i]+1)*complex_elem_size;
if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) || void rowDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage)
(stage == 1 && !inplace_transform) )
{ {
use_buf = 1; int len, count;
sz += len*complex_elem_size; if (width == 1 && !isRowTransform )
}
}
ptr = (uchar*)buf;
buf.allocate( sz + 32 );
if( ptr != (uchar*)buf )
prev_len = 0; // because we release the buffer,
// force recalculation of
// twiddle factors and permutation table
ptr = (uchar*)buf;
if( !spec )
{ {
wave = ptr; len = height;
ptr += len*complex_elem_size; count = width;
itab = (int*)ptr;
ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
if( len != prev_len || (!inplace_transform && inv && real_transform))
DFTInit( len, nf, factors, itab, complex_elem_size,
wave, stage == 0 && inv && real_transform );
// otherwise reuse the tables calculated on the previous stage
} }
else
if( stage == 0 )
{ {
uchar* tmp_buf = 0; len = width;
count = height;
}
int dptr_offset = 0; int dptr_offset = 0;
int dst_full_len = len*elem_size; int dst_full_len = len*elem_size;
int _flags = (int)inv + (src.channels() != dst.channels() ?
DFT_COMPLEX_INPUT_OR_OUTPUT : 0); if( needBufferA )
if( use_buf ) {
{ if (mode == FwdRealToCCS && (len & 1) && len > 1)
tmp_buf = ptr;
ptr += len*complex_elem_size;
if( odd_real && !inv && len > 1 &&
!(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
dptr_offset = elem_size; dptr_offset = elem_size;
} }
if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) ) if( !inv && isComplex )
dst_full_len += (len & 1) ? elem_size : complex_elem_size; dst_full_len += (len & 1) ? elem_size : complex_elem_size;
dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3]; int nz = nonzero_rows;
if( nz <= 0 || nz > count )
nz = count;
if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) ) int i;
stage = 1; for( i = 0; i < nz; i++ )
else if( flags & CV_DXT_SCALE )
scale = 1./(len * (flags & DFT_ROWS ? 1 : count));
if( nonzero_rows <= 0 || nonzero_rows > count )
nonzero_rows = count;
for( i = 0; i < nonzero_rows; i++ )
{ {
const uchar* sptr = src.ptr(i); const uchar* sptr = src_data + src_step * i;
uchar* dptr0 = dst.ptr(i); uchar* dptr0 = dst_data + dst_step * i;
uchar* dptr = dptr0; uchar* dptr = dptr0;
if( tmp_buf ) if( needBufferA )
dptr = tmp_buf; dptr = tmp_bufA;
hal::dftRun(contextA, sptr, dptr);
dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale ); if( needBufferA )
if( dptr != dptr0 )
memcpy( dptr0, dptr + dptr_offset, dst_full_len ); memcpy( dptr0, dptr + dptr_offset, dst_full_len );
} }
for( ; i < count; i++ ) for( ; i < count; i++ )
{ {
uchar* dptr0 = dst.ptr(i); uchar* dptr0 = dst_data + dst_step * i;
memset( dptr0, 0, dst_full_len ); memset( dptr0, 0, dst_full_len );
} }
if(isLastStage && mode == FwdRealToComplex)
if( stage != 1 ) complementComplexOutput(depth, dst_data, dst_step, len, nz, 1);
{
if( !inv && real_transform && dst.channels() == 2 )
complementComplexOutput(dst, nonzero_rows, 1);
break;
} }
src = dst;
} void colDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
else
{ {
int len = height;
int count = width;
int a = 0, b = count; int a = 0, b = count;
uchar *buf0, *buf1, *dbuf0, *dbuf1; uchar *dbuf0, *dbuf1;
const uchar* sptr0 = src.ptr(); const uchar* sptr0 = src_data;
uchar* dptr0 = dst.ptr(); uchar* dptr0 = dst_data;
buf0 = ptr;
ptr += len*complex_elem_size;
buf1 = ptr;
ptr += len*complex_elem_size;
dbuf0 = buf0, dbuf1 = buf1; dbuf0 = buf0, dbuf1 = buf1;
if( use_buf ) if( needBufferB )
{ {
dbuf1 = ptr; dbuf1 = tmp_bufB;
dbuf0 = buf1; dbuf0 = buf1;
ptr += len*complex_elem_size;
} }
dft_func = dft_tbl[(depth == CV_64F)*3];
if( real_transform && inv && src.cols > 1 )
stage = 0;
else if( flags & CV_DXT_SCALE )
scale = 1./(len * count);
if( real_transform ) if( real_transform )
{ {
int even; int even;
...@@ -2786,22 +2950,22 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -2786,22 +2950,22 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
if( !inv ) if( !inv )
{ {
memset( buf0, 0, len*complex_elem_size ); memset( buf0, 0, len*complex_elem_size );
CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size ); CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, elem_size );
sptr0 += dst.channels()*elem_size; sptr0 += stage_dst_channels*elem_size;
if( even ) if( even )
{ {
memset( buf1, 0, len*complex_elem_size ); memset( buf1, 0, len*complex_elem_size );
CopyColumn( sptr0 + (count-2)*elem_size, src.step, CopyColumn( sptr0 + (count-2)*elem_size, src_step,
buf1, complex_elem_size, len, elem_size ); buf1, complex_elem_size, len, elem_size );
} }
} }
else if( src.channels() == 1 ) else if( stage_src_channels == 1 )
{ {
CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size ); CopyColumn( sptr0, src_step, buf0, elem_size, len, elem_size );
ExpandCCS( buf0, len, elem_size ); ExpandCCS( buf0, len, elem_size );
if( even ) if( even )
{ {
CopyColumn( sptr0 + (count-1)*elem_size, src.step, CopyColumn( sptr0 + (count-1)*elem_size, src_step,
buf1, elem_size, len, elem_size ); buf1, elem_size, len, elem_size );
ExpandCCS( buf1, len, elem_size ); ExpandCCS( buf1, len, elem_size );
} }
...@@ -2809,22 +2973,20 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -2809,22 +2973,20 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
} }
else else
{ {
CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size ); CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
if( even ) if( even )
{ {
CopyColumn( sptr0 + b*complex_elem_size, src.step, CopyColumn( sptr0 + b*complex_elem_size, src_step,
buf1, complex_elem_size, len, complex_elem_size ); buf1, complex_elem_size, len, complex_elem_size );
} }
sptr0 += complex_elem_size; sptr0 += complex_elem_size;
} }
if( even ) if( even )
dft_func( buf1, dbuf1, len, nf, factors, itab, hal::dftRun(contextB, buf1, dbuf1);
wave, len, spec, ptr, inv, scale ); hal::dftRun(contextB, buf0, dbuf0);
dft_func( buf0, dbuf0, len, nf, factors, itab,
wave, len, spec, ptr, inv, scale );
if( dst.channels() == 1 ) if( stage_dst_channels == 1 )
{ {
if( !inv ) if( !inv )
{ {
...@@ -2832,23 +2994,23 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -2832,23 +2994,23 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
// before doing that, defgragment the vector // before doing that, defgragment the vector
memcpy( dbuf0 + elem_size, dbuf0, elem_size ); memcpy( dbuf0 + elem_size, dbuf0, elem_size );
CopyColumn( dbuf0 + elem_size, elem_size, dptr0, CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
dst.step, len, elem_size ); dst_step, len, elem_size );
if( even ) if( even )
{ {
memcpy( dbuf1 + elem_size, dbuf1, elem_size ); memcpy( dbuf1 + elem_size, dbuf1, elem_size );
CopyColumn( dbuf1 + elem_size, elem_size, CopyColumn( dbuf1 + elem_size, elem_size,
dptr0 + (count-1)*elem_size, dptr0 + (count-1)*elem_size,
dst.step, len, elem_size ); dst_step, len, elem_size );
} }
dptr0 += elem_size; dptr0 += elem_size;
} }
else else
{ {
// copy the real part of the complex vector to the first/last column // copy the real part of the complex vector to the first/last column
CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size ); CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, elem_size );
if( even ) if( even )
CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size, CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
dst.step, len, elem_size ); dst_step, len, elem_size );
dptr0 += elem_size; dptr0 += elem_size;
} }
} }
...@@ -2856,46 +3018,374 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -2856,46 +3018,374 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{ {
assert( !inv ); assert( !inv );
CopyColumn( dbuf0, complex_elem_size, dptr0, CopyColumn( dbuf0, complex_elem_size, dptr0,
dst.step, len, complex_elem_size ); dst_step, len, complex_elem_size );
if( even ) if( even )
CopyColumn( dbuf1, complex_elem_size, CopyColumn( dbuf1, complex_elem_size,
dptr0 + b*complex_elem_size, dptr0 + b*complex_elem_size,
dst.step, len, complex_elem_size ); dst_step, len, complex_elem_size );
dptr0 += complex_elem_size; dptr0 += complex_elem_size;
} }
} }
for( i = a; i < b; i += 2 ) for(int i = a; i < b; i += 2 )
{ {
if( i+1 < b ) if( i+1 < b )
{ {
CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size ); CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size );
dft_func( buf1, dbuf1, len, nf, factors, itab, hal::dftRun(contextB, buf1, dbuf1);
wave, len, spec, ptr, inv, scale );
} }
else else
CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size ); CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
dft_func( buf0, dbuf0, len, nf, factors, itab, hal::dftRun(contextB, buf0, dbuf0);
wave, len, spec, ptr, inv, scale );
if( i+1 < b ) if( i+1 < b )
CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size ); CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
else else
CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size ); CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, complex_elem_size );
sptr0 += 2*complex_elem_size; sptr0 += 2*complex_elem_size;
dptr0 += 2*complex_elem_size; dptr0 += 2*complex_elem_size;
} }
if(isLastStage && mode == FwdRealToComplex)
complementComplexOutput(depth, dst_data, dst_step, count, len, 2);
}
};
class OcvDftBasicImpl
{
public:
OcvDftOptions opt;
int _factors[34];
AutoBuffer<uchar> wave_buf;
AutoBuffer<int> itab_buf;
#ifdef USE_IPP_DFT
AutoBuffer<uchar> ippbuf;
AutoBuffer<uchar> ippworkbuf;
#endif
if( stage != 0 ) public:
OcvDftBasicImpl()
{ {
if( !inv && real_transform && dst.channels() == 2 && len > 1 ) opt.factors = _factors;
complementComplexOutput(dst, len, 2);
break;
} }
src = dst; OcvDftBasicImpl & operator=(const OcvDftBasicImpl & other)
{
this->opt = other.opt;
return *this;
}
void init(int len, int count, int depth, int flags, bool *needBuffer)
{
int prev_len = opt.n;
int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0;
int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0;
opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0;
bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0;
opt.scale = 1;
opt.tab_size = len;
opt.n = len;
opt.useIpp = false;
#ifdef USE_IPP_DFT
opt.ipp_spec = 0;
opt.ipp_work = 0;
if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available
{
int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1;
int specsize=0, initsize=0, worksize=0;
IppDFTGetSizeFunc getSizeFunc = 0;
IppDFTInitFunc initFunc = 0;
if( real_transform && stage == 0 )
{
if( depth == CV_32F )
{
getSizeFunc = ippsDFTGetSize_R_32f;
initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
}
else
{
getSizeFunc = ippsDFTGetSize_R_64f;
initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
}
}
else
{
if( depth == CV_32F )
{
getSizeFunc = ippsDFTGetSize_C_32fc;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
}
else
{
getSizeFunc = ippsDFTGetSize_C_64fc;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
}
}
if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
{
ippbuf.allocate(specsize + initsize + 64);
opt.ipp_spec = alignPtr(&ippbuf[0], 32);
ippworkbuf.allocate(worksize + 32);
opt.ipp_work = alignPtr(&ippworkbuf[0], 32);
uchar* initbuf = alignPtr((uchar*)opt.ipp_spec + specsize, 32);
if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 )
opt.useIpp = true;
}
else
setIppErrorStatus();
}
#endif
if (!opt.useIpp)
{
if (len != prev_len)
{
opt.nf = DFTFactorize( opt.n, opt.factors );
}
bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
{
wave_buf.allocate(opt.n*complex_elem_size);
opt.wave = wave_buf;
itab_buf.allocate(opt.n);
opt.itab = itab_buf;
DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size,
opt.wave, stage == 0 && opt.isInverse && real_transform );
}
// otherwise reuse the tables calculated on the previous stage
if (needBuffer)
{
if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) ||
(stage == 1 && !inplace_transform) )
{
*needBuffer = true;
}
}
}
else
{
if (needBuffer)
{
*needBuffer = false;
}
}
{
static DFTFunc dft_tbl[6] =
{
(DFTFunc)DFT_32f,
(DFTFunc)RealDFT_32f,
(DFTFunc)CCSIDFT_32f,
(DFTFunc)DFT_64f,
(DFTFunc)RealDFT_64f,
(DFTFunc)CCSIDFT_64f
};
int idx = 0;
if (stage == 0)
{
if (real_transform)
{
if (!opt.isInverse)
idx = 1;
else
idx = 2;
}
}
if (depth == CV_64F)
idx += 3;
opt.dft_func = dft_tbl[idx];
}
if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0)
{
int rowCount = count;
if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0)
rowCount = 1;
opt.scale = 1./(len * rowCount);
}
}
void run(const void * src, void * dst)
{
opt.dft_func(opt, src, dst);
}
void free() {}
};
namespace hal {
//================== 1D ======================
void dftInit(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer)
{
int res = cv_hal_dftInit(&context.impl, len, count, depth, flags, needBuffer);
if (res == CV_HAL_ERROR_OK)
{
context.useReplacement = true;
return;
}
context.useReplacement = false;
OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
if (!c)
{
c = new OcvDftBasicImpl();
context.impl = (void*)c;
}
c->init(len, count, depth, flags, needBuffer);
}
void dftRun(const DftContext & context, const void * src, void * dst)
{
if (context.useReplacement)
{
int res = cv_hal_dftRun(context.impl, src, dst);
if (res != CV_HAL_ERROR_OK)
{
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun");
}
return;
}
OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
c->run(src, dst);
}
void dftFree(DftContext & context)
{
if (context.useReplacement)
{
int res = cv_hal_dftFree(context.impl);
if (res != CV_HAL_ERROR_OK)
{
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree");
}
return;
}
OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
if (c)
{
c->free();
delete c;
context.impl = 0;
} }
}
//================== 2D ======================
void dftInit2D(DftContext & c,
int _width, int _height, int _depth, int _src_channels, int _dst_channels,
int flags,
int _nonzero_rows)
{
int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows);
if (res == CV_HAL_ERROR_OK)
{
c.useReplacement = true;
return;
}
c.useReplacement = false;
if( _width == 1 && _nonzero_rows > 0 )
CV_Error( CV_StsNotImplemented,
"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
OcvDftImpl * d = new OcvDftImpl();
d->init(_width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows);
c.impl = (void*)d;
}
void dftRun2D(const DftContext & c,
const void * src, int src_step, void * dst, int dst_step)
{
if (c.useReplacement)
{
int res = cv_hal_dftRun2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step);
if (res != CV_HAL_ERROR_OK)
{
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D");
}
return;
}
OcvDftImpl * d = (OcvDftImpl*)c.impl;
d->run((uchar*)src, src_step, (uchar*)dst, dst_step);
}
void dftFree2D(DftContext & c)
{
if (c.useReplacement)
{
int res = cv_hal_dftFree2D(c.impl);
if (res != CV_HAL_ERROR_OK)
{
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree2D");
}
return;
} }
OcvDftImpl * d = (OcvDftImpl*)c.impl;
d->free();
delete d;
c.impl = 0;
}
} // cv::hal::
} // cv::
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
#ifdef HAVE_CLAMDFFT
CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
_dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
ocl_dft_amdfft(_src0, _dst, flags))
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
ocl_dft(_src0, _dst, flags, nonzero_rows))
#endif
Mat src0 = _src0.getMat(), src = src0;
bool inv = (flags & DFT_INVERSE) != 0;
int type = src.type();
int depth = src.depth();
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
_dst.create( src.size(), depth );
else
_dst.create( src.size(), type );
Mat dst = _dst.getMat();
int f = 0;
if (src.isContinuous() && dst.isContinuous())
f |= CV_HAL_DFT_IS_CONTINUOUS;
if (inv)
f |= CV_HAL_DFT_INVERSE;
if (flags & DFT_ROWS)
f |= CV_HAL_DFT_ROWS;
if (flags & DFT_SCALE)
f |= CV_HAL_DFT_SCALE;
if (src.data == dst.data)
f |= CV_HAL_DFT_IS_INPLACE;
hal::DftContext c;
hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows);
hal::dftRun2D(c, src.data, (int)src.step, dst.data, (int)dst.step);
hal::dftFree2D(c);
} }
...@@ -3117,11 +3607,12 @@ namespace cv ...@@ -3117,11 +3607,12 @@ namespace cv
http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/: http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
*/ */
template<typename T> static void template<typename T> static void
DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave, const Complex<T>* dct_wave )
const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
{ {
static const T sin_45 = (T)0.70710678118654752440084436210485; static const T sin_45 = (T)0.70710678118654752440084436210485;
int n = c.n;
int j, n2 = n >> 1; int j, n2 = n >> 1;
src_step /= sizeof(src[0]); src_step /= sizeof(src[0]);
...@@ -3140,8 +3631,7 @@ DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, ...@@ -3140,8 +3631,7 @@ DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
dft_src[n-j-1] = src[src_step]; dft_src[n-j-1] = src[src_step];
} }
RealDFT( dft_src, dft_dst, n, nf, factors, RealDFT(c, dft_src, dft_dst);
itab, dft_wave, n, spec, buf, 0, 1.0 );
src = dft_dst; src = dft_dst;
dst[0] = (T)(src[0]*dct_wave->re*sin_45); dst[0] = (T)(src[0]*dct_wave->re*sin_45);
...@@ -3160,11 +3650,11 @@ DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, ...@@ -3160,11 +3650,11 @@ DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
template<typename T> static void template<typename T> static void
IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, IDCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave, const Complex<T>* dct_wave)
const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
{ {
static const T sin_45 = (T)0.70710678118654752440084436210485; static const T sin_45 = (T)0.70710678118654752440084436210485;
int n = c.n;
int j, n2 = n >> 1; int j, n2 = n >> 1;
src_step /= sizeof(src[0]); src_step /= sizeof(src[0]);
...@@ -3189,8 +3679,7 @@ IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, ...@@ -3189,8 +3679,7 @@ IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
} }
dft_src[n-1] = (T)(src[0]*2*dct_wave->re); dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
CCSIDFT( dft_src, dft_dst, n, nf, factors, itab, CCSIDFT(c, dft_src, dft_dst);
dft_wave, n, spec, buf, 0, 1.0 );
for( j = 0; j < n2; j++, dst += dst_step*2 ) for( j = 0; j < n2; j++, dst += dst_step*2 )
{ {
...@@ -3279,41 +3768,31 @@ DCTInit( int n, int elem_size, void* _wave, int inv ) ...@@ -3279,41 +3768,31 @@ DCTInit( int n, int elem_size, void* _wave, int inv )
} }
typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src, typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, int src_step, void* dft_src,
void* dft_dst, void* dst, int dst_step, int n, void* dft_dst, void* dst, int dst_step, const void* dct_wave);
int nf, int* factors, const int* itab, const void* dft_wave,
const void* dct_wave, const void* spec, void* buf );
static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst, static void DCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst,
float* dst, int dst_step, int n, int nf, int* factors, const int* itab, float* dst, int dst_step, const Complexf* dct_wave)
const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
{ {
DCT(src, src_step, dft_src, dft_dst, dst, dst_step, DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
} }
static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst, static void IDCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst,
float* dst, int dst_step, int n, int nf, int* factors, const int* itab, float* dst, int dst_step, const Complexf* dct_wave)
const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
{ {
IDCT(src, src_step, dft_src, dft_dst, dst, dst_step, IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
} }
static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst, static void DCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst,
double* dst, int dst_step, int n, int nf, int* factors, const int* itab, double* dst, int dst_step, const Complexd* dct_wave)
const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
{ {
DCT(src, src_step, dft_src, dft_dst, dst, dst_step, DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
} }
static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst, static void IDCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst,
double* dst, int dst_step, int n, int nf, int* factors, const int* itab, double* dst, int dst_step, const Complexd* dct_wave)
const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
{ {
IDCT(src, src_step, dft_src, dft_dst, dst, dst_step, IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
} }
} }
...@@ -3336,8 +3815,8 @@ typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*); ...@@ -3336,8 +3815,8 @@ typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
class DctIPPLoop_Invoker : public ParallelLoopBody class DctIPPLoop_Invoker : public ParallelLoopBody
{ {
public: public:
DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, bool _inv, bool *_ok) : DctIPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, bool _inv, bool *_ok) :
ParallelLoopBody(), src(&_src), dst(&_dst), inv(_inv), ok(_ok) ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
{ {
*ok = true; *ok = true;
} }
...@@ -3348,7 +3827,7 @@ public: ...@@ -3348,7 +3827,7 @@ public:
return; return;
#if IPP_VERSION_X100 >= 900 #if IPP_VERSION_X100 >= 900
IppiSize srcRoiSize = {src->cols, 1}; IppiSize srcRoiSize = {width, 1};
int specSize = 0; int specSize = 0;
int initSize = 0; int initSize = 0;
...@@ -3405,7 +3884,7 @@ public: ...@@ -3405,7 +3884,7 @@ public:
for(int i = range.start; i < range.end; ++i) for(int i = range.start; i < range.end; ++i)
{ {
if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, pBuffer) < 0) if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, pBuffer) < 0)
{ {
*ok = false; *ok = false;
IPP_RETURN IPP_RETURN
...@@ -3419,7 +3898,7 @@ public: ...@@ -3419,7 +3898,7 @@ public:
uchar* pBuffer = 0; uchar* pBuffer = 0;
int bufSize=0; int bufSize=0;
IppiSize srcRoiSize = {src->cols, 1}; IppiSize srcRoiSize = {width, 1};
CV_SUPPRESS_DEPRECATED_START CV_SUPPRESS_DEPRECATED_START
...@@ -3435,7 +3914,7 @@ public: ...@@ -3435,7 +3914,7 @@ public:
for( int i = range.start; i < range.end; ++i) for( int i = range.start; i < range.end; ++i)
{ {
if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer) < 0) if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, (Ipp8u*)pBuffer) < 0)
{ {
*ok = false; *ok = false;
break; break;
...@@ -3456,27 +3935,30 @@ public: ...@@ -3456,27 +3935,30 @@ public:
} }
private: private:
const Mat* src; const uchar * src;
Mat* dst; int src_step;
uchar * dst;
int dst_step;
int width;
bool inv; bool inv;
bool *ok; bool *ok;
}; };
static bool DctIPPLoop(const Mat& src, Mat& dst, bool inv) static bool DctIPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv)
{ {
bool ok; bool ok;
parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, inv, &ok), src.rows/(double)(1<<4) ); parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) );
return ok; return ok;
} }
static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, bool row)
{ {
if(row) if(row)
return DctIPPLoop(src, dst, inv); return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
else else
{ {
#if IPP_VERSION_X100 >= 900 #if IPP_VERSION_X100 >= 900
IppiSize srcRoiSize = {src.cols, src.rows}; IppiSize srcRoiSize = {width, height};
int specSize = 0; int specSize = 0;
int initSize = 0; int initSize = 0;
...@@ -3524,7 +4006,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) ...@@ -3524,7 +4006,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
return false; return false;
} }
if(ippDctFun(src.ptr<float>(), (int)src.step,dst.ptr<float>(), (int)dst.step, pDCTSpec, pBuffer) < 0) if(ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, pBuffer) < 0)
{ {
IPP_RELEASE IPP_RELEASE
return false; return false;
...@@ -3540,7 +4022,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) ...@@ -3540,7 +4022,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
uchar* pBuffer = 0; uchar* pBuffer = 0;
int bufSize=0; int bufSize=0;
IppiSize srcRoiSize = {src.cols, src.rows}; IppiSize srcRoiSize = {width, height};
CV_SUPPRESS_DEPRECATED_START CV_SUPPRESS_DEPRECATED_START
...@@ -3556,7 +4038,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) ...@@ -3556,7 +4038,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
buf.allocate( bufSize ); buf.allocate( bufSize );
pBuffer = (uchar*)buf; pBuffer = (uchar*)buf;
status = ippDctFun(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer); status = ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, (Ipp8u*)pBuffer);
} }
if (pDCTSpec) if (pDCTSpec)
...@@ -3574,8 +4056,35 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) ...@@ -3574,8 +4056,35 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
} }
#endif #endif
void cv::dct( InputArray _src0, OutputArray _dst, int flags ) namespace cv {
class OcvDctImpl
{ {
public:
OcvDftOptions opt;
int _factors[34];
AutoBuffer<uint> wave_buf;
AutoBuffer<int> itab_buf;
DCTFunc dct_func;
bool isRowTransform;
bool isInverse;
bool isContinuous;
int start_stage;
int end_stage;
int width;
int height;
int depth;
void init(int _width, int _height, int _depth, int flags)
{
width = _width;
height = _height;
depth = _depth;
isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
static DCTFunc dct_tbl[4] = static DCTFunc dct_tbl[4] =
{ {
(DCTFunc)DCT_32f, (DCTFunc)DCT_32f,
...@@ -3583,131 +4092,183 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) ...@@ -3583,131 +4092,183 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
(DCTFunc)DCT_64f, (DCTFunc)DCT_64f,
(DCTFunc)IDCT_64f (DCTFunc)IDCT_64f
}; };
dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2];
opt.nf = 0;
opt.isComplex = false;
opt.isInverse = false;
opt.noPermute = false;
opt.scale = 1.;
opt.factors = _factors;
bool inv = (flags & DCT_INVERSE) != 0; if (isRowTransform || height == 1 || (width == 1 && isContinuous))
Mat src0 = _src0.getMat(), src = src0;
int type = src.type(), depth = src.depth();
void *spec = 0;
double scale = 1.;
int prev_len = 0, nf = 0, stage, end_stage;
uchar *src_dft_buf = 0, *dst_dft_buf = 0;
uchar *dft_wave = 0, *dct_wave = 0;
int* itab = 0;
uchar* ptr = 0;
int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2;
int factors[34], inplace_transform;
int i, len, count;
AutoBuffer<uchar> buf;
CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
_dst.create( src.rows, src.cols, type );
Mat dst = _dst.getMat();
CV_IPP_RUN(IPP_VERSION_X100 >= 700 && src.type() == CV_32F, ippi_DCT_32f(src, dst, inv, ((flags & DCT_ROWS) != 0)))
DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
if( (flags & DCT_ROWS) || src.rows == 1 ||
(src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
{ {
stage = end_stage = 0; start_stage = end_stage = 0;
} }
else else
{ {
stage = src.cols == 1; start_stage = (width == 1);
end_stage = 1; end_stage = 1;
} }
}
void run(uchar * src, int src_step, uchar * dst, int dst_step)
{
CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
for( ; stage <= end_stage; stage++ ) AutoBuffer<uchar> dct_wave;
AutoBuffer<uchar> src_buf, dst_buf;
uchar *src_dft_buf = 0, *dst_dft_buf = 0;
int prev_len = 0;
int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
int complex_elem_size = elem_size*2;
for(int stage = start_stage ; stage <= end_stage; stage++ )
{ {
const uchar* sptr = src.ptr(); const uchar* sptr = src;
uchar* dptr = dst.ptr(); uchar* dptr = dst;
size_t sstep0, sstep1, dstep0, dstep1; size_t sstep0, sstep1, dstep0, dstep1;
int len, count;
if( stage == 0 ) if( stage == 0 )
{ {
len = src.cols; len = width;
count = src.rows; count = height;
if( len == 1 && !(flags & DCT_ROWS) ) if( len == 1 && !isRowTransform )
{ {
len = src.rows; len = height;
count = 1; count = 1;
} }
sstep0 = src.step; sstep0 = src_step;
dstep0 = dst.step; dstep0 = dst_step;
sstep1 = dstep1 = elem_size; sstep1 = dstep1 = elem_size;
} }
else else
{ {
len = dst.rows; len = height;
count = dst.cols; count = width;
sstep1 = src.step; sstep1 = src_step;
dstep1 = dst.step; dstep1 = dst_step;
sstep0 = dstep0 = elem_size; sstep0 = dstep0 = elem_size;
} }
opt.n = len;
opt.tab_size = len;
if( len != prev_len ) if( len != prev_len )
{ {
int sz;
if( len > 1 && (len & 1) ) if( len > 1 && (len & 1) )
CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" ); CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
sz = len*elem_size; opt.nf = DFTFactorize( len, opt.factors );
sz += (len/2 + 1)*complex_elem_size; bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
spec = 0; wave_buf.allocate(len*complex_elem_size);
inplace_transform = 1; opt.wave = wave_buf;
{ itab_buf.allocate(len);
sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size; opt.itab = itab_buf;
DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse );
nf = DFTFactorize( len, factors );
inplace_transform = factors[0] == factors[nf-1];
i = nf > 1 && (factors[0] & 1) == 0; dct_wave.allocate((len/2 + 1)*complex_elem_size);
if( (factors[i] & 1) != 0 && factors[i] > 5 ) src_buf.allocate(len*elem_size);
sz += (factors[i]+1)*complex_elem_size; src_dft_buf = src_buf;
if(!inplace_transform)
{
dst_buf.allocate(len*elem_size);
dst_dft_buf = dst_buf;
}
else
{
dst_dft_buf = src_buf;
}
DCTInit( len, complex_elem_size, dct_wave, isInverse);
prev_len = len;
}
// otherwise reuse the tables calculated on the previous stage
for(int i = 0; i < count; i++ )
{
dct_func( opt, sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf,
dptr + i*dstep0, (int)dstep1, dct_wave);
}
src = dst;
src_step = dst_step;
}
if( !inplace_transform )
sz += len*elem_size;
} }
void free() {}
};
buf.allocate( sz + 32 ); namespace hal {
ptr = (uchar*)buf;
if( !spec ) void dctInit(DftContext & c, int width, int height, int depth, int flags)
{
int res = cv_hal_dctInit(&c.impl, width, height, depth, flags);
if (res == CV_HAL_ERROR_OK)
{ {
dft_wave = ptr; c.useReplacement = true;
ptr += len*complex_elem_size; return;
itab = (int*)ptr;
ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv );
} }
c.useReplacement = false;
OcvDctImpl * impl = new OcvDctImpl();
impl->init(width, height, depth, flags);
c.impl = impl;
}
dct_wave = ptr; void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step)
ptr += (len/2 + 1)*complex_elem_size; {
src_dft_buf = dst_dft_buf = ptr; if (c.useReplacement)
ptr += len*elem_size; {
if( !inplace_transform ) int res = cv_hal_dctRun(c.impl, src, src_step, dst, dst_step);
if (res != CV_HAL_ERROR_OK)
{ {
dst_dft_buf = ptr; CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun");
ptr += len*elem_size;
} }
DCTInit( len, complex_elem_size, dct_wave, inv ); return;
if( !inv )
scale += scale;
prev_len = len;
} }
// otherwise reuse the tables calculated on the previous stage OcvDctImpl * impl = (OcvDctImpl*)c.impl;
for( i = 0; i < count; i++ ) impl->run((uchar*)src, src_step, (uchar*)dst, dst_step);
}
void dctFree(DftContext & c)
{
if (c.useReplacement)
{
int res = cv_hal_dctFree(c.impl);
if (res != CV_HAL_ERROR_OK)
{ {
dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree");
dptr + i*dstep0, (int)dstep1, len, nf, factors,
itab, dft_wave, dct_wave, spec, ptr );
} }
src = dst; return;
} }
OcvDctImpl * impl = (OcvDctImpl*)c.impl;
impl->free();
delete impl;
c.impl = 0;
}
} // cv::hal::
} // cv::
void cv::dct( InputArray _src0, OutputArray _dst, int flags )
{
Mat src0 = _src0.getMat(), src = src0;
int type = src.type(), depth = src.depth();
CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
_dst.create( src.rows, src.cols, type );
Mat dst = _dst.getMat();
int f = 0;
if ((flags & DFT_ROWS) != 0)
f |= CV_HAL_DFT_ROWS;
if ((flags & DCT_INVERSE) != 0)
f |= CV_HAL_DFT_INVERSE;
if (src.isContinuous() && dst.isContinuous())
f |= CV_HAL_DFT_IS_CONTINUOUS;
hal::DftContext c;
hal::dctInit(c, src.cols, src.rows, depth, f);
hal::dctRun(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step);
hal::dctFree(c);
} }
......
...@@ -384,6 +384,31 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int ...@@ -384,6 +384,31 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int
# pragma warning( pop ) # pragma warning( pop )
#endif #endif
inline int hal_ni_dftInit(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dftRun(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dftFree(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
#define cv_hal_dftInit hal_ni_dftInit
#define cv_hal_dftRun hal_ni_dftRun
#define cv_hal_dftFree hal_ni_dftFree
inline int hal_ni_dftInit2D(void **, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dftRun2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dftFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
#define cv_hal_dftInit2D hal_ni_dftInit2D
#define cv_hal_dftRun2D hal_ni_dftRun2D
#define cv_hal_dftFree2D hal_ni_dftFree2D
inline int hal_ni_dctInit(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dctRun(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dctFree(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
#define cv_hal_dctInit hal_ni_dctInit
#define cv_hal_dctRun hal_ni_dctRun
#define cv_hal_dctFree hal_ni_dctFree
#include "custom_hal.hpp" #include "custom_hal.hpp"
#endif #endif
...@@ -887,3 +887,79 @@ TEST(Core_DFT, complex_output2) ...@@ -887,3 +887,79 @@ TEST(Core_DFT, complex_output2)
} }
} }
} }
class Core_DXTReverseTest : public cvtest::BaseTest
{
public:
enum Mode
{
ModeDFT,
ModeDCT
};
Core_DXTReverseTest(Mode m) : mode(m) {}
private:
Mode mode;
protected:
void run(int)
{
for (int i = 0; i < 3; ++i)
{
if (mode == ModeDCT && i != 0)
continue;
int flags = 0;
int flags_inv = DFT_INVERSE | DFT_SCALE;
int cn_in = 0;
int cn_out = 0;
switch (i)
{
case 0: cn_in = 1; cn_out = 1; break;
case 1: cn_in = 1; cn_out = 2; flags |= DFT_COMPLEX_OUTPUT; flags_inv |= DFT_REAL_OUTPUT; break;
case 2: cn_in = 2; cn_out = 2; break;
};
for (int j = 0; j < 100; ++j)
{
RNG& rng = ts->get_rng();
int type = rng.uniform(0, 2) ? CV_64F : CV_32F;
int m = rng.uniform(1, 10);
int n = rng.uniform(1, 10);
if (mode == ModeDCT)
{
m *= 2;
n *= 2;
}
Mat one(m, n, CV_MAKETYPE(type, cn_in));
cvtest::randUni(rng, one, Scalar::all(-1.), Scalar::all(1.));
Mat out;
Mat two;
if (mode == ModeDFT)
{
cv::dft(one, out, flags);
cv::dft(out, two, flags_inv);
}
else if (mode == ModeDCT)
{
cv::dct(one, out, flags);
cv::dct(out, two, flags_inv);
}
if (out.channels() != cn_out || two.channels() != cn_in || cvtest::norm(one, two, NORM_INF) > 1e-5)
{
cout << "Test #" << j + 1 << " - "
<< "elements: " << m << " x " << n << ", "
<< "channels: "
<< one.channels() << " (" << cn_in << ")" << " -> "
<< out.channels() << " (" << cn_out << ")" << " -> "
<< two.channels() << " (" << cn_in << ")"
<< endl;
cout << "signal:\n" << one << endl << endl;
cout << "spectrum:\n" << out << endl << endl;
cout << "inverse:\n" << two << endl << endl;
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
break;
}
}
}
}
};
TEST(Core_DFT, reverse) { Core_DXTReverseTest test(Core_DXTReverseTest::ModeDFT); test.safe_run(); }
TEST(Core_DCT, reverse) { Core_DXTReverseTest test(Core_DXTReverseTest::ModeDCT); test.safe_run(); }
...@@ -632,6 +632,8 @@ static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst) ...@@ -632,6 +632,8 @@ static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
#endif #endif
#include "opencv2/core/hal/hal.hpp"
void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
Size corrsize, int ctype, Size corrsize, int ctype,
Point anchor, double delta, int borderType ) Point anchor, double delta, int borderType )
...@@ -698,6 +700,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -698,6 +700,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
buf.resize(bufSize); buf.resize(bufSize);
hal::DftContext c;
hal::dftInit2D(c, dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);
// compute DFT of each template plane // compute DFT of each template plane
for( k = 0; k < tcn; k++ ) for( k = 0; k < tcn; k++ )
{ {
...@@ -721,9 +726,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -721,9 +726,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
part = Scalar::all(0); part = Scalar::all(0);
} }
dft(dst, dst, 0, templ.rows); hal::dftRun2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step);
} }
hal::dftFree2D(c);
int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width; int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height; int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
int tileCount = tileCountX * tileCountY; int tileCount = tileCountX * tileCountY;
...@@ -740,6 +747,16 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -740,6 +747,16 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
} }
borderType |= BORDER_ISOLATED; borderType |= BORDER_ISOLATED;
bool useHalDft = tileCount > 1;
hal::DftContext cF, cR;
if (useHalDft)
{
int f = CV_HAL_DFT_IS_INPLACE;
int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE;
hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1);
hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height);
}
// calculate correlation by blocks // calculate correlation by blocks
for( i = 0; i < tileCount; i++ ) for( i = 0; i < tileCount; i++ )
{ {
...@@ -777,10 +794,18 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -777,10 +794,18 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0), copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
if (useHalDft && bsz.height == blocksize.height)
hal::dftRun2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
else
dft( dftImg, dftImg, 0, dsz.height ); dft( dftImg, dftImg, 0, dsz.height );
Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0, Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
dftsize.width, dftsize.height)); dftsize.width, dftsize.height));
mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
if (useHalDft && bsz.height == blocksize.height)
hal::dftRun2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
else
dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
src = dftImg(Rect(0, 0, bsz.width, bsz.height)); src = dftImg(Rect(0, 0, bsz.width, bsz.height));
...@@ -813,6 +838,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -813,6 +838,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
} }
} }
} }
if (useHalDft)
{
hal::dftFree2D(cF);
hal::dftFree2D(cR);
}
} }
static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment