Commit 233612ef authored by Maksim Shabunin's avatar Maksim Shabunin

Reworked HAL dft/dct interface, added replacement documentation

parent f40d7014
...@@ -187,24 +187,28 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, ...@@ -187,24 +187,28 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
struct DftContext struct CV_EXPORTS DFT1D
{ {
void * impl; static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
bool useReplacement; virtual void apply(const uchar *src, uchar *dst) = 0;
DftContext() : impl(0), useReplacement(false) {} virtual ~DFT1D() {}
}; };
CV_EXPORTS void dftInit2D(DftContext & c, int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows = 0); struct CV_EXPORTS DFT2D
CV_EXPORTS void dft2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); {
CV_EXPORTS void dftFree2D(DftContext & c); static Ptr<DFT2D> create(int width, int height, int depth,
int src_channels, int dst_channels,
CV_EXPORTS void dftInit1D(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0); int flags, int nonzero_rows = 0);
CV_EXPORTS void dft1D(const DftContext & c, const void * src, void * dst); virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
CV_EXPORTS void dftFree1D(DftContext & c); virtual ~DFT2D() {}
};
CV_EXPORTS void dctInit2D(DftContext & c, int width, int height, int depth, int flags); struct CV_EXPORTS DCT2D
CV_EXPORTS void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); {
CV_EXPORTS void dctFree2D(DftContext & c); static Ptr<DCT2D> create(int width, int height, int depth, int flags);
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
virtual ~DCT2D() {}
};
//! @} core_hal //! @} core_hal
......
...@@ -11,21 +11,11 @@ ...@@ -11,21 +11,11 @@
#define CV_HAL_ERROR_UNKNOWN -1 #define CV_HAL_ERROR_UNKNOWN -1
//! @} //! @}
#define CV_HAL_DFT_INVERSE 1
#define CV_HAL_DFT_SCALE 2
#define CV_HAL_DFT_ROWS 4
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
#define CV_HAL_DFT_REAL_OUTPUT 32
#define CV_HAL_DFT_TWO_STAGE 64
#define CV_HAL_DFT_STAGE_COLS 128
#define CV_HAL_DFT_IS_CONTINUOUS 512
#define CV_HAL_DFT_IS_INPLACE 1024
#ifdef __cplusplus #ifdef __cplusplus
#include <cstddef> #include <cstddef>
#else #else
#include <stddef.h> #include <stddef.h>
#include <stdbool.h>
#endif #endif
//! @name Data types //! @name Data types
...@@ -155,6 +145,19 @@ typedef signed char schar; ...@@ -155,6 +145,19 @@ typedef signed char schar;
#define CV_HAL_BORDER_ISOLATED 16 #define CV_HAL_BORDER_ISOLATED 16
//! @} //! @}
//! @name DFT flags
//! @{
#define CV_HAL_DFT_INVERSE 1
#define CV_HAL_DFT_SCALE 2
#define CV_HAL_DFT_ROWS 4
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
#define CV_HAL_DFT_REAL_OUTPUT 32
#define CV_HAL_DFT_TWO_STAGE 64
#define CV_HAL_DFT_STAGE_COLS 128
#define CV_HAL_DFT_IS_CONTINUOUS 512
#define CV_HAL_DFT_IS_INPLACE 1024
//! @}
//! @} //! @}
#endif #endif
...@@ -1553,7 +1553,7 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody ...@@ -1553,7 +1553,7 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
{ {
public: public:
Dft_C_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, Dft_C_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width,
const Dft& _ippidft, int _norm_flag, bool *_ok) : const Dft& _ippidft, int _norm_flag, bool *_ok) :
ParallelLoopBody(), ParallelLoopBody(),
src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
...@@ -1617,7 +1617,7 @@ public: ...@@ -1617,7 +1617,7 @@ public:
} }
private: private:
uchar * src; const uchar * src;
int src_step; int src_step;
uchar * dst; uchar * dst;
int dst_step; int dst_step;
...@@ -1634,7 +1634,7 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody ...@@ -1634,7 +1634,7 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
{ {
public: public:
Dft_R_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, Dft_R_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width,
const Dft& _ippidft, int _norm_flag, bool *_ok) : const Dft& _ippidft, int _norm_flag, bool *_ok) :
ParallelLoopBody(), ParallelLoopBody(),
src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
...@@ -1698,7 +1698,7 @@ public: ...@@ -1698,7 +1698,7 @@ public:
} }
private: private:
uchar * src; const uchar * src;
int src_step; int src_step;
uchar * dst; uchar * dst;
int dst_step; int dst_step;
...@@ -1711,7 +1711,7 @@ private: ...@@ -1711,7 +1711,7 @@ private:
}; };
template <typename Dft> template <typename Dft>
bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) bool Dft_C_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag)
{ {
bool ok; bool ok;
parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
...@@ -1719,7 +1719,7 @@ bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int wid ...@@ -1719,7 +1719,7 @@ bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int wid
} }
template <typename Dft> template <typename Dft>
bool Dft_R_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) bool Dft_R_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag)
{ {
bool ok; bool ok;
parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
...@@ -1750,7 +1750,7 @@ private: ...@@ -1750,7 +1750,7 @@ private:
ippiDFT_R_Func func; ippiDFT_R_Func func;
}; };
static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) static bool ippi_DFT_C_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag)
{ {
IppStatus status; IppStatus status;
Ipp8u* pBuffer = 0; Ipp8u* pBuffer = 0;
...@@ -1804,7 +1804,7 @@ static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, ...@@ -1804,7 +1804,7 @@ static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step,
return false; return false;
} }
static bool ippi_DFT_R_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) static bool ippi_DFT_R_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag)
{ {
IppStatus status; IppStatus status;
Ipp8u* pBuffer = 0; Ipp8u* pBuffer = 0;
...@@ -2611,11 +2611,11 @@ inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuo ...@@ -2611,11 +2611,11 @@ inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuo
return InvalidDim; return InvalidDim;
} }
class OcvDftImpl class OcvDftImpl : public hal::DFT2D
{ {
protected: protected:
hal::DftContext contextA; Ptr<hal::DFT1D> contextA;
hal::DftContext contextB; Ptr<hal::DFT1D> contextB;
bool needBufferA; bool needBufferA;
bool needBufferB; bool needBufferB;
bool inv; bool inv;
...@@ -2763,7 +2763,7 @@ public: ...@@ -2763,7 +2763,7 @@ public:
count = height; count = height;
} }
needBufferA = isInplace; needBufferA = isInplace;
hal::dftInit1D(contextA, len, count, depth, f, &needBufferA); contextA = hal::DFT1D::create(len, count, depth, f, &needBufferA);
if (needBufferA) if (needBufferA)
tmp_bufA.allocate(len * complex_elem_size); tmp_bufA.allocate(len * complex_elem_size);
} }
...@@ -2773,7 +2773,7 @@ public: ...@@ -2773,7 +2773,7 @@ public:
count = width; count = width;
f |= CV_HAL_DFT_STAGE_COLS; f |= CV_HAL_DFT_STAGE_COLS;
needBufferB = isInplace; needBufferB = isInplace;
hal::dftInit1D(contextB, len, count, depth, f, &needBufferB); contextB = hal::DFT1D::create(len, count, depth, f, &needBufferB);
if (needBufferB) if (needBufferB)
tmp_bufB.allocate(len * complex_elem_size); tmp_bufB.allocate(len * complex_elem_size);
...@@ -2783,7 +2783,7 @@ public: ...@@ -2783,7 +2783,7 @@ public:
} }
} }
void run(uchar * src, int src_step, uchar * dst, int dst_step) void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step)
{ {
#if defined USE_IPP_DFT #if defined USE_IPP_DFT
if (useIpp) if (useIpp)
...@@ -2860,17 +2860,9 @@ public: ...@@ -2860,17 +2860,9 @@ public:
} }
} }
void free()
{
if (useIpp)
return;
hal::dftFree1D(contextA);
hal::dftFree1D(contextB);
}
protected: protected:
void rowDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) void rowDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage)
{ {
int len, count; int len, count;
if (width == 1 && !isRowTransform ) if (width == 1 && !isRowTransform )
...@@ -2909,7 +2901,7 @@ protected: ...@@ -2909,7 +2901,7 @@ protected:
if( needBufferA ) if( needBufferA )
dptr = tmp_bufA; dptr = tmp_bufA;
hal::dft1D(contextA, sptr, dptr); contextA->apply(sptr, dptr);
if( needBufferA ) if( needBufferA )
memcpy( dptr0, dptr + dptr_offset, dst_full_len ); memcpy( dptr0, dptr + dptr_offset, dst_full_len );
...@@ -2924,7 +2916,7 @@ protected: ...@@ -2924,7 +2916,7 @@ protected:
complementComplexOutput(depth, dst_data, dst_step, len, nz, 1); complementComplexOutput(depth, dst_data, dst_step, len, nz, 1);
} }
void colDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) void colDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
{ {
int len = height; int len = height;
int count = width; int count = width;
...@@ -2983,8 +2975,8 @@ protected: ...@@ -2983,8 +2975,8 @@ protected:
} }
if( even ) if( even )
hal::dft1D(contextB, buf1, dbuf1); contextB->apply(buf1, dbuf1);
hal::dft1D(contextB, buf0, dbuf0); contextB->apply(buf0, dbuf0);
if( stage_dst_channels == 1 ) if( stage_dst_channels == 1 )
{ {
...@@ -3032,12 +3024,12 @@ protected: ...@@ -3032,12 +3024,12 @@ protected:
if( i+1 < b ) if( i+1 < b )
{ {
CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size ); CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size );
hal::dft1D(contextB, buf1, dbuf1); contextB->apply(buf1, dbuf1);
} }
else else
CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size ); CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size );
hal::dft1D(contextB, buf0, dbuf0); contextB->apply(buf0, dbuf0);
if( i+1 < b ) if( i+1 < b )
CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size ); CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
...@@ -3051,7 +3043,7 @@ protected: ...@@ -3051,7 +3043,7 @@ protected:
} }
}; };
class OcvDftBasicImpl class OcvDftBasicImpl : public hal::DFT1D
{ {
public: public:
OcvDftOptions opt; OcvDftOptions opt;
...@@ -3068,11 +3060,6 @@ public: ...@@ -3068,11 +3060,6 @@ public:
{ {
opt.factors = _factors; opt.factors = _factors;
} }
OcvDftBasicImpl & operator=(const OcvDftBasicImpl & other)
{
this->opt = other.opt;
return *this;
}
void init(int len, int count, int depth, int flags, bool *needBuffer) void init(int len, int count, int depth, int flags, bool *needBuffer)
{ {
int prev_len = opt.n; int prev_len = opt.n;
...@@ -3211,7 +3198,7 @@ public: ...@@ -3211,7 +3198,7 @@ public:
} }
} }
void run(const void * src, void * dst) void apply(const uchar *src, uchar *dst)
{ {
opt.dft_func(opt, src, dst); opt.dft_func(opt, src, dst);
} }
...@@ -3219,126 +3206,113 @@ public: ...@@ -3219,126 +3206,113 @@ public:
void free() {} void free() {}
}; };
namespace hal { struct ReplacementDFT1D : public hal::DFT1D
//================== 1D ======================
void dftInit1D(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer)
{ {
int res = cv_hal_dftInit1D(&context.impl, len, count, depth, flags, needBuffer); cvhalDFT *context;
if (res == CV_HAL_ERROR_OK) bool isInitialized;
ReplacementDFT1D() : context(0), isInitialized(false) {}
bool init(int len, int count, int depth, int flags, bool *needBuffer)
{ {
context.useReplacement = true; int res = cv_hal_dftInit1D(&context, len, count, depth, flags, needBuffer);
return; isInitialized = (res == CV_HAL_ERROR_OK);
return isInitialized;
} }
void apply(const uchar *src, uchar *dst)
context.useReplacement = false;
OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
if (!c)
{ {
c = new OcvDftBasicImpl(); if (isInitialized)
context.impl = (void*)c; {
CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst);
}
} }
c->init(len, count, depth, flags, needBuffer); ~ReplacementDFT1D()
}
void dft1D(const DftContext & context, const void * src, void * dst)
{
if (context.useReplacement)
{ {
int res = cv_hal_dft1D(context.impl, src, dst); if (isInitialized)
if (res != CV_HAL_ERROR_OK)
{ {
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun"); CALL_HAL(dftFree1D, cv_hal_dftFree1D, context);
} }
return;
} }
OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; };
c->run(src, dst);
}
void dftFree1D(DftContext & context) struct ReplacementDFT2D : public hal::DFT2D
{ {
if (context.useReplacement) cvhalDFT *context;
bool isInitialized;
ReplacementDFT2D() : context(0), isInitialized(false) {}
bool init(int width, int height, int depth,
int src_channels, int dst_channels,
int flags, int nonzero_rows)
{
int res = cv_hal_dftInit2D(&context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
isInitialized = (res == CV_HAL_ERROR_OK);
return isInitialized;
}
void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step)
{ {
int res = cv_hal_dftFree1D(context.impl); if (isInitialized)
if (res != CV_HAL_ERROR_OK)
{ {
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree"); CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step);
} }
return;
} }
~ReplacementDFT2D()
OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl;
if (c)
{ {
c->free(); if (isInitialized)
delete c; {
context.impl = 0; CALL_HAL(dftFree2D, cv_hal_dftFree1D, context);
}
} }
} };
namespace hal {
//================== 2D ====================== //================== 1D ======================
void dftInit2D(DftContext & c, Ptr<DFT1D> DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer)
int _width, int _height, int _depth, int _src_channels, int _dst_channels,
int flags,
int _nonzero_rows)
{ {
int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows);
if (res == CV_HAL_ERROR_OK)
{ {
c.useReplacement = true; ReplacementDFT1D *impl = new ReplacementDFT1D();
return; if (impl->init(len, count, depth, flags, needBuffer))
{
return Ptr<DFT1D>(impl);
}
delete impl;
}
{
OcvDftBasicImpl *impl = new OcvDftBasicImpl();
impl->init(len, count, depth, flags, needBuffer);
return Ptr<DFT1D>(impl);
} }
c.useReplacement = false;
if( _width == 1 && _nonzero_rows > 0 )
CV_Error( CV_StsNotImplemented,
"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
OcvDftImpl * d = new OcvDftImpl();
d->init(_width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows);
c.impl = (void*)d;
} }
void dft2D(const DftContext & c, //================== 2D ======================
const void * src, int src_step, void * dst, int dst_step)
Ptr<DFT2D> DFT2D::create(int width, int height, int depth,
int src_channels, int dst_channels,
int flags, int nonzero_rows)
{ {
if (c.useReplacement)
{ {
int res = cv_hal_dft2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step); ReplacementDFT2D *impl = new ReplacementDFT2D();
if (res != CV_HAL_ERROR_OK) if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows))
{ {
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D"); return Ptr<DFT2D>(impl);
} }
return; delete impl;
} }
OcvDftImpl * d = (OcvDftImpl*)c.impl;
d->run((uchar*)src, src_step, (uchar*)dst, dst_step);
}
void dftFree2D(DftContext & c)
{
if (c.useReplacement)
{ {
int res = cv_hal_dftFree2D(c.impl); if(width == 1 && nonzero_rows > 0 )
if (res != CV_HAL_ERROR_OK)
{ {
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree2D"); CV_Error( CV_StsNotImplemented,
"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
} }
return; OcvDftImpl *impl = new OcvDftImpl();
impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
return Ptr<DFT2D>(impl);
} }
OcvDftImpl * d = (OcvDftImpl*)c.impl;
d->free();
delete d;
c.impl = 0;
} }
} // cv::hal:: } // cv::hal::
} // cv:: } // cv::
...@@ -3382,10 +3356,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) ...@@ -3382,10 +3356,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
f |= CV_HAL_DFT_SCALE; f |= CV_HAL_DFT_SCALE;
if (src.data == dst.data) if (src.data == dst.data)
f |= CV_HAL_DFT_IS_INPLACE; f |= CV_HAL_DFT_IS_INPLACE;
hal::DftContext c; Ptr<hal::DFT2D> c = hal::DFT2D::create(src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows);
hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); c->apply(src.data, src.step, dst.data, dst.step);
hal::dft2D(c, src.data, (int)src.step, dst.data, (int)dst.step);
hal::dftFree2D(c);
} }
...@@ -3607,7 +3579,7 @@ namespace cv ...@@ -3607,7 +3579,7 @@ namespace cv
http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/: http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
*/ */
template<typename T> static void template<typename T> static void
DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
const Complex<T>* dct_wave ) const Complex<T>* dct_wave )
{ {
static const T sin_45 = (T)0.70710678118654752440084436210485; static const T sin_45 = (T)0.70710678118654752440084436210485;
...@@ -3650,7 +3622,7 @@ DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst ...@@ -3650,7 +3622,7 @@ DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst
template<typename T> static void template<typename T> static void
IDCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
const Complex<T>* dct_wave) const Complex<T>* dct_wave)
{ {
static const T sin_45 = (T)0.70710678118654752440084436210485; static const T sin_45 = (T)0.70710678118654752440084436210485;
...@@ -3768,29 +3740,29 @@ DCTInit( int n, int elem_size, void* _wave, int inv ) ...@@ -3768,29 +3740,29 @@ DCTInit( int n, int elem_size, void* _wave, int inv )
} }
typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, int src_step, void* dft_src, typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src,
void* dft_dst, void* dst, int dst_step, const void* dct_wave); void* dft_dst, void* dst, size_t dst_step, const void* dct_wave);
static void DCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
float* dst, int dst_step, const Complexf* dct_wave) float* dst, size_t dst_step, const Complexf* dct_wave)
{ {
DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
} }
static void IDCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
float* dst, int dst_step, const Complexf* dct_wave) float* dst, size_t dst_step, const Complexf* dct_wave)
{ {
IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
} }
static void DCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
double* dst, int dst_step, const Complexd* dct_wave) double* dst, size_t dst_step, const Complexd* dct_wave)
{ {
DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
} }
static void IDCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
double* dst, int dst_step, const Complexd* dct_wave) double* dst, size_t dst_step, const Complexd* dct_wave)
{ {
IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
} }
...@@ -4058,7 +4030,7 @@ static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_s ...@@ -4058,7 +4030,7 @@ static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_s
namespace cv { namespace cv {
class OcvDctImpl class OcvDctImpl : public hal::DCT2D
{ {
public: public:
OcvDftOptions opt; OcvDftOptions opt;
...@@ -4110,7 +4082,7 @@ public: ...@@ -4110,7 +4082,7 @@ public:
end_stage = 1; end_stage = 1;
} }
} }
void run(uchar * src, int src_step, uchar * dst, int dst_step) void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step)
{ {
CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform)) CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
...@@ -4183,69 +4155,65 @@ public: ...@@ -4183,69 +4155,65 @@ public:
prev_len = len; prev_len = len;
} }
// otherwise reuse the tables calculated on the previous stage // otherwise reuse the tables calculated on the previous stage
for(int i = 0; i < count; i++ ) for(unsigned i = 0; i < static_cast<unsigned>(count); i++ )
{ {
dct_func( opt, sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
dptr + i*dstep0, (int)dstep1, dct_wave); dptr + i*dstep0, dstep1, dct_wave);
} }
src = dst; src = dst;
src_step = dst_step; src_step = dst_step;
} }
} }
void free() {}
}; };
namespace hal { struct ReplacementDCT2D : public hal::DCT2D
void dctInit2D(DftContext & c, int width, int height, int depth, int flags)
{ {
int res = cv_hal_dctInit2D(&c.impl, width, height, depth, flags); cvhalDFT *context;
if (res == CV_HAL_ERROR_OK) bool isInitialized;
ReplacementDCT2D() : context(0), isInitialized(false) {}
bool init(int width, int height, int depth, int flags)
{ {
c.useReplacement = true; int res = hal_ni_dctInit2D(&context, width, height, depth, flags);
return; isInitialized = (res == CV_HAL_ERROR_OK);
return isInitialized;
} }
c.useReplacement = false; void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step)
OcvDctImpl * impl = new OcvDctImpl();
impl->init(width, height, depth, flags);
c.impl = impl;
}
void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step)
{
if (c.useReplacement)
{ {
int res = cv_hal_dct2D(c.impl, src, src_step, dst, dst_step); if (isInitialized)
if (res != CV_HAL_ERROR_OK)
{ {
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun"); CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step);
} }
return;
} }
OcvDctImpl * impl = (OcvDctImpl*)c.impl; ~ReplacementDCT2D()
impl->run((uchar*)src, src_step, (uchar*)dst, dst_step); {
} if (isInitialized)
{
CALL_HAL(dctFree2D, cv_hal_dctFree2D, context);
}
}
};
namespace hal {
void dctFree2D(DftContext & c) Ptr<DCT2D> DCT2D::create(int width, int height, int depth, int flags)
{ {
if (c.useReplacement)
{ {
int res = cv_hal_dctFree2D(c.impl); ReplacementDCT2D *impl = new ReplacementDCT2D();
if (res != CV_HAL_ERROR_OK) if (impl->init(width, height, depth, flags))
{ {
CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree"); return Ptr<DCT2D>(impl);
} }
return; delete impl;
}
{
OcvDctImpl *impl = new OcvDctImpl();
impl->init(width, height, depth, flags);
return Ptr<DCT2D>(impl);
} }
OcvDctImpl * impl = (OcvDctImpl*)c.impl;
impl->free();
delete impl;
c.impl = 0;
} }
} // cv::hal:: } // cv::hal::
} // cv:: } // cv::
void cv::dct( InputArray _src0, OutputArray _dst, int flags ) void cv::dct( InputArray _src0, OutputArray _dst, int flags )
...@@ -4265,10 +4233,8 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) ...@@ -4265,10 +4233,8 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
if (src.isContinuous() && dst.isContinuous()) if (src.isContinuous() && dst.isContinuous())
f |= CV_HAL_DFT_IS_CONTINUOUS; f |= CV_HAL_DFT_IS_CONTINUOUS;
hal::DftContext c; Ptr<hal::DCT2D> c = hal::DCT2D::create(src.cols, src.rows, depth, f);
hal::dctInit2D(c, src.cols, src.rows, depth, f); c->apply(src.data, src.step, dst.data, dst.step);
hal::dct2D(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step);
hal::dctFree2D(c);
} }
......
...@@ -376,38 +376,109 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int ...@@ -376,38 +376,109 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int
#define cv_hal_merge64s hal_ni_merge64s #define cv_hal_merge64s hal_ni_merge64s
//! @endcond //! @endcond
//! @} /**
@brief Dummy structure storing DFT/DCT context
#if defined __GNUC__
# pragma GCC diagnostic pop Users can convert this pointer to any type they want. Initialisation and destruction should be made in Init and Free function implementations correspondingly.
#elif defined _MSC_VER Example:
# pragma warning( pop ) @code{.cpp}
#endif int my_hal_dftInit2D(cvhalDFT **context, ...) {
*context = static_cast<cvhalDFT*>(new MyFilterData());
//... init
}
int my_hal_dftFree2D(cvhalDFT *context) {
MyFilterData *c = static_cast<MyFilterData*>(context);
delete c;
}
@endcode
*/
struct cvhalDFT {};
inline int hal_ni_dftInit1D(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } /**
inline int hal_ni_dft1D(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } @param context double pointer to context storing all necessary data
inline int hal_ni_dftFree1D(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } @param len transformed array length
@param count estimated transformation count
@param depth array type (CV_32F or CV_64F)
@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, CV_HAL_DFT_SCALE, ...)
@param needBuffer pointer to boolean variable, if valid pointer provided, then variable value should be set to true to signal that additional memory buffer is needed for operations
*/
inline int hal_ni_dftInit1D(cvhalDFT **context, int len, int count, int depth, int flags, bool *needBuffer) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@param context pointer to context storing all necessary data
@param src source data
@param dst destination data
*/
inline int hal_ni_dft1D(cvhalDFT *context, const uchar *src, uchar *dst) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@param context pointer to context storing all necessary data
*/
inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @cond IGNORED
#define cv_hal_dftInit1D hal_ni_dftInit1D #define cv_hal_dftInit1D hal_ni_dftInit1D
#define cv_hal_dft1D hal_ni_dft1D #define cv_hal_dft1D hal_ni_dft1D
#define cv_hal_dftFree1D hal_ni_dftFree1D #define cv_hal_dftFree1D hal_ni_dftFree1D
//! @endcond
inline int hal_ni_dftInit2D(void **, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } /**
inline int hal_ni_dft2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } @param context double pointer to context storing all necessary data
inline int hal_ni_dftFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } @param width,height image dimensions
@param depth image type (CV_32F or CV64F)
@param src_channels number of channels in input image
@param dst_channels number of channels in output image
@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...)
@param nonzero_rows number of nonzero rows in image, can be used for optimization
*/
inline int hal_ni_dftInit2D(cvhalDFT **context, int width, int height, int depth, int src_channels, int dst_channels, int flags, int nonzero_rows) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@param context pointer to context storing all necessary data
@param src_data,src_step source image data and step
@param dst_data,dst_step destination image data and step
*/
inline int hal_ni_dft2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@param context pointer to context storing all necessary data
*/
inline int hal_ni_dftFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @cond IGNORED
#define cv_hal_dftInit2D hal_ni_dftInit2D #define cv_hal_dftInit2D hal_ni_dftInit2D
#define cv_hal_dft2D hal_ni_dft2D #define cv_hal_dft2D hal_ni_dft2D
#define cv_hal_dftFree2D hal_ni_dftFree2D #define cv_hal_dftFree2D hal_ni_dftFree2D
//! @endcond
/**
@param context double pointer to context storing all necessary data
@param width,height image dimensions
@param depth image type (CV_32F or CV64F)
@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...)
*/
inline int hal_ni_dctInit2D(cvhalDFT **context, int width, int height, int depth, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@param context pointer to context storing all necessary data
@param src_data,src_step source image data and step
@param dst_data,dst_step destination image data and step
*/
inline int hal_ni_dct2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@param context pointer to context storing all necessary data
*/
inline int hal_ni_dctFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dctInit2D(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } //! @cond IGNORED
inline int hal_ni_dct2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_dctFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
#define cv_hal_dctInit2D hal_ni_dctInit2D #define cv_hal_dctInit2D hal_ni_dctInit2D
#define cv_hal_dct2D hal_ni_dct2D #define cv_hal_dct2D hal_ni_dct2D
#define cv_hal_dctFree2D hal_ni_dctFree2D #define cv_hal_dctFree2D hal_ni_dctFree2D
//! @endcond
//! @}
#if defined __GNUC__
# pragma GCC diagnostic pop
#elif defined _MSC_VER
# pragma warning( pop )
#endif
#include "custom_hal.hpp" #include "custom_hal.hpp"
......
...@@ -700,8 +700,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -700,8 +700,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
buf.resize(bufSize); buf.resize(bufSize);
hal::DftContext c; Ptr<hal::DFT2D> c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);
hal::dftInit2D(c, dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);
// compute DFT of each template plane // compute DFT of each template plane
for( k = 0; k < tcn; k++ ) for( k = 0; k < tcn; k++ )
...@@ -726,11 +725,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -726,11 +725,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
part = Scalar::all(0); part = Scalar::all(0);
} }
hal::dft2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step); c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step);
} }
hal::dftFree2D(c);
int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width; int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height; int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
int tileCount = tileCountX * tileCountY; int tileCount = tileCountX * tileCountY;
...@@ -747,11 +744,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -747,11 +744,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
} }
borderType |= BORDER_ISOLATED; borderType |= BORDER_ISOLATED;
hal::DftContext cF, cR; Ptr<hal::DFT2D> cF, cR;
int f = CV_HAL_DFT_IS_INPLACE; int f = CV_HAL_DFT_IS_INPLACE;
int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE; int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE;
hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1);
hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height);
// calculate correlation by blocks // calculate correlation by blocks
for( i = 0; i < tileCount; i++ ) for( i = 0; i < tileCount; i++ )
...@@ -791,7 +788,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -791,7 +788,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
if (bsz.height == blocksize.height) if (bsz.height == blocksize.height)
hal::dft2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
else else
dft( dftImg, dftImg, 0, dsz.height ); dft( dftImg, dftImg, 0, dsz.height );
...@@ -800,7 +797,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -800,7 +797,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
if (bsz.height == blocksize.height) if (bsz.height == blocksize.height)
hal::dft2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
else else
dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
...@@ -834,8 +831,6 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, ...@@ -834,8 +831,6 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
} }
} }
} }
hal::dftFree2D(cF);
hal::dftFree2D(cR);
} }
static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment