Commit c8cbfe53 authored by Ilya Lavrenov's avatar Ilya Lavrenov

added cv::dft T-API test

parent bd91b395
...@@ -49,6 +49,7 @@ namespace cv { namespace ocl { ...@@ -49,6 +49,7 @@ namespace cv { namespace ocl {
CV_EXPORTS bool haveOpenCL(); CV_EXPORTS bool haveOpenCL();
CV_EXPORTS bool useOpenCL(); CV_EXPORTS bool useOpenCL();
CV_EXPORTS bool haveAmdBlas(); CV_EXPORTS bool haveAmdBlas();
CV_EXPORTS bool haveAmdFft();
CV_EXPORTS void setUseOpenCL(bool flag); CV_EXPORTS void setUseOpenCL(bool flag);
CV_EXPORTS void finish2(); CV_EXPORTS void finish2();
......
...@@ -95,7 +95,7 @@ ...@@ -95,7 +95,7 @@
#undef clAmdFftSetPlanOutStride #undef clAmdFftSetPlanOutStride
#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn #define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
#undef clAmdFftSetPlanPrecision #undef clAmdFftSetPlanPrecision
//#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn #define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
#undef clAmdFftSetPlanScale #undef clAmdFftSetPlanScale
#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn #define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
#undef clAmdFftSetPlanTransposeResult #undef clAmdFftSetPlanTransposeResult
...@@ -134,7 +134,7 @@ extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanH ...@@ -134,7 +134,7 @@ extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanH
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths); //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed); //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness); extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);
......
...@@ -40,6 +40,8 @@ ...@@ -40,6 +40,8 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
namespace cv namespace cv
{ {
...@@ -1473,8 +1475,261 @@ typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, in ...@@ -1473,8 +1475,261 @@ typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, in
typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*); typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
#endif #endif
#ifdef HAVE_CLAMDFFT
namespace cv {
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
enum FftType
{
R2R = 0, // real to real
C2R = 1, // opencl HERMITIAN_INTERLEAVED to real
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // complex to complex
};
class PlanCache
{
struct FftPlan
{
FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), plHandle(0)
{
bool dft_inverse = (flags & DFT_INVERSE) != 0;
bool dft_scale = (flags & DFT_SCALE) != 0;
bool dft_rows = (flags & DFT_ROWS) != 0;
clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
size_t batchSize = dft_rows ? dft_size.height : 1;
size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 };
size_t clStridesIn[3] = { 1, 1, 1 };
size_t clStridesOut[3] = { 1, 1, 1 };
int elemSize = doubleFP ? sizeof(double) : sizeof(float);
switch (fftType)
{
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / (elemSize << 1);
clStridesOut[1] = dst_step / (elemSize << 1);
break;
case R2C:
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / elemSize;
clStridesOut[1] = dst_step / (elemSize << 1);
break;
case C2R:
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (elemSize << 1);
clStridesOut[1] = dst_step / elemSize;
break;
case R2R:
default:
CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
break;
}
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
// TODO remove all plans if context changed
CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context2::getDefault().ptr(), dim, clLengthsIn))
// setting plan properties
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
// ready to bake
cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr();
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &commandQueue, NULL, NULL))
}
~FftPlan()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend class PlanCache;
private:
Size dft_size;
int src_step, dst_step;
bool doubleFP;
bool inplace;
int flags;
FftType fftType;
cl_context context;
clAmdFftPlanHandle plHandle;
};
public:
static PlanCache & getInstance()
{
static PlanCache planCache;
return planCache;
}
clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
bool inplace, int flags, FftType fftType)
{
cl_context currentContext = (cl_context)ocl::Context2::getDefault().ptr();
for (size_t i = 0, size = planStorage.size(); i < size; i ++)
{
const FftPlan * const plan = planStorage[i];
if (plan->dft_size == dft_size &&
plan->flags == flags &&
plan->src_step == src_step &&
plan->dst_step == dst_step &&
plan->doubleFP == doubleFP &&
plan->fftType == fftType &&
plan->inplace == inplace)
{
if (plan->context != currentContext)
{
planStorage.erase(planStorage.begin() + i);
break;
}
return plan->plHandle;
}
}
// no baked plan is found, so let's create a new one
FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType);
planStorage.push_back(newPlan);
return newPlan->plHandle;
}
~PlanCache()
{
for (std::vector<FftPlan *>::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i)
delete (*i);
planStorage.clear();
}
protected:
PlanCache() :
planStorage()
{
}
std::vector<FftPlan *> planStorage;
};
extern "C" {
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
{
UMatData * u = (UMatData *)p;
if( u && CV_XADD(&u->urefcount, -1) == 1 )
u->currAllocator->deallocate(u);
u = 0;
clReleaseEvent(e), e = 0;
}
}
static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
Size ssize = _src.size();
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( (!doubleSupport && depth == CV_64F) ||
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
_src.offset() != 0)
return false;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if (ssize.area() != getOptimalDFTSize(ssize.area()))
return false;
int dst_complex_input = cn == 2 ? 1 : 0;
bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
CV_Assert(dft_complex_output + dft_real_output < 2);
FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
switch (fftType)
{
case C2C:
_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
break;
case R2C: // TODO implement it if possible
case C2R: // TODO implement it if possible
case R2R: // AMD Fft does not support this type
default:
return false;
}
UMat src = _src.getUMat(), dst = _dst.getUMat();
bool inplace = src.u == dst.u;
clAmdFftPlanHandle plHandle = PlanCache::getInstance().
getPlanHandle(ssize, (int)src.step, (int)dst.step,
depth == CV_64F, inplace, flags, fftType);
// get the bufferSize
size_t bufferSize = 0;
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr();
cl_event e = 0;
CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, &commandQueue, 0, NULL, &e,
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
tmpBuffer.addref();
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
return true;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{ {
#ifdef HAVE_CLAMDFFT
if (ocl::useOpenCL() && ocl::haveAmdFft() && _dst.isUMat() && _src0.dims() <= 2
&& nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
return;
#endif
static DFTFunc dft_tbl[6] = static DFTFunc dft_tbl[6] =
{ {
(DFTFunc)DFT_32f, (DFTFunc)DFT_32f,
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <map> #include <map>
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
#include "opencv2/core/opencl/runtime/opencl_core.hpp" #include "opencv2/core/opencl/runtime/opencl_core.hpp"
...@@ -1423,6 +1424,83 @@ bool haveAmdBlas() ...@@ -1423,6 +1424,83 @@ bool haveAmdBlas()
#endif #endif
#ifdef HAVE_CLAMDFFT
class AmdFftHelper
{
public:
static AmdFftHelper & getInstance()
{
static AmdFftHelper amdFft;
return amdFft;
}
bool isAvailable() const
{
return g_isAmdFftAvailable;
}
~AmdFftHelper()
{
try
{
// clAmdFftTeardown();
}
catch (...) { }
}
protected:
AmdFftHelper()
{
if (!g_isAmdFftInitialized)
{
AutoLock lock(m);
if (!g_isAmdFftInitialized && haveOpenCL())
{
try
{
CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
g_isAmdFftAvailable = true;
}
catch (const Exception &)
{
g_isAmdFftAvailable = false;
}
}
else
g_isAmdFftAvailable = false;
g_isAmdFftInitialized = true;
}
}
private:
static clAmdFftSetupData setupData;
static Mutex m;
static bool g_isAmdFftInitialized;
static bool g_isAmdFftAvailable;
};
clAmdFftSetupData AmdFftHelper::setupData;
bool AmdFftHelper::g_isAmdFftAvailable = false;
bool AmdFftHelper::g_isAmdFftInitialized = false;
Mutex AmdFftHelper::m;
bool haveAmdFft()
{
return AmdFftHelper::getInstance().isAvailable();
}
#else
bool haveAmdFft()
{
return false;
}
#endif
void finish2() void finish2()
{ {
Queue::getDefault().finish(); Queue::getDefault().finish();
......
...@@ -33,7 +33,7 @@ enum OPENCLAMDFFT_FN_ID { ...@@ -33,7 +33,7 @@ enum OPENCLAMDFFT_FN_ID {
OPENCLAMDFFT_FN_clAmdFftSetPlanInStride = 23, OPENCLAMDFFT_FN_clAmdFftSetPlanInStride = 23,
// OPENCLAMDFFT_FN_clAmdFftSetPlanLength = 24, // OPENCLAMDFFT_FN_clAmdFftSetPlanLength = 24,
OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride = 25, OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride = 25,
// OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26, OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26,
OPENCLAMDFFT_FN_clAmdFftSetPlanScale = 27, OPENCLAMDFFT_FN_clAmdFftSetPlanScale = 27,
// OPENCLAMDFFT_FN_clAmdFftSetPlanTransposeResult = 28, // OPENCLAMDFFT_FN_clAmdFftSetPlanTransposeResult = 28,
OPENCLAMDFFT_FN_clAmdFftSetResultLocation = 29, OPENCLAMDFFT_FN_clAmdFftSetResultLocation = 29,
...@@ -334,9 +334,9 @@ clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle, const clAmdFftDim ...@@ -334,9 +334,9 @@ clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle, const clAmdFftDim
openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride, clAmdFftStatus, clAmdFftPlanHandle, const clAmdFftDim, size_t*>::switch_fn; openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride, clAmdFftStatus, clAmdFftPlanHandle, const clAmdFftDim, size_t*>::switch_fn;
static const struct DynamicFnEntry clAmdFftSetPlanOutStride_definition = { "clAmdFftSetPlanOutStride", (void**)&clAmdFftSetPlanOutStride}; static const struct DynamicFnEntry clAmdFftSetPlanOutStride_definition = { "clAmdFftSetPlanOutStride", (void**)&clAmdFftSetPlanOutStride};
//clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) = clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) =
// openclamdfft_fn2<OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftPrecision>::switch_fn; openclamdfft_fn2<OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftPrecision>::switch_fn;
//static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision}; static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision};
clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle, clAmdFftDirection, cl_float) = clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle, clAmdFftDirection, cl_float) =
openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanScale, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftDirection, cl_float>::switch_fn; openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanScale, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftDirection, cl_float>::switch_fn;
...@@ -387,7 +387,7 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = { ...@@ -387,7 +387,7 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = {
&clAmdFftSetPlanInStride_definition, &clAmdFftSetPlanInStride_definition,
NULL/*&clAmdFftSetPlanLength_definition*/, NULL/*&clAmdFftSetPlanLength_definition*/,
&clAmdFftSetPlanOutStride_definition, &clAmdFftSetPlanOutStride_definition,
NULL/*&clAmdFftSetPlanPrecision_definition*/, &clAmdFftSetPlanPrecision_definition,
&clAmdFftSetPlanScale_definition, &clAmdFftSetPlanScale_definition,
NULL/*&clAmdFftSetPlanTransposeResult_definition*/, NULL/*&clAmdFftSetPlanTransposeResult_definition*/,
&clAmdFftSetResultLocation_definition, &clAmdFftSetResultLocation_definition,
...@@ -396,4 +396,4 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = { ...@@ -396,4 +396,4 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = {
ADDITIONAL_FN_DEFINITIONS // macro for custom functions ADDITIONAL_FN_DEFINITIONS // macro for custom functions
}; };
// number of enabled functions: 14 // number of enabled functions: 15
...@@ -5,7 +5,7 @@ clAmdFftDestroyPlan ...@@ -5,7 +5,7 @@ clAmdFftDestroyPlan
clAmdFftEnqueueTransform clAmdFftEnqueueTransform
//clAmdFftGetLayout //clAmdFftGetLayout
//clAmdFftGetPlanBatchSize //clAmdFftGetPlanBatchSize
//clAmdFftGetPlanContext clAmdFftGetPlanContext
//clAmdFftGetPlanDim //clAmdFftGetPlanDim
//clAmdFftGetPlanDistance //clAmdFftGetPlanDistance
//clAmdFftGetPlanInStride //clAmdFftGetPlanInStride
...@@ -22,9 +22,9 @@ clAmdFftSetPlanBatchSize ...@@ -22,9 +22,9 @@ clAmdFftSetPlanBatchSize
//clAmdFftSetPlanDim //clAmdFftSetPlanDim
clAmdFftSetPlanDistance clAmdFftSetPlanDistance
clAmdFftSetPlanInStride clAmdFftSetPlanInStride
//clAmdFftSetPlanLength clAmdFftSetPlanLength
clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride
//clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision
clAmdFftSetPlanScale clAmdFftSetPlanScale
//clAmdFftSetPlanTransposeResult //clAmdFftSetPlanTransposeResult
clAmdFftSetResultLocation clAmdFftSetResultLocation
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool)
{
cv::Size dft_size;
int dft_flags, depth;
bool inplace;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
dft_size = GET_PARAM(0);
depth = GET_PARAM(1);
inplace = GET_PARAM(2);
dft_flags = 0;
if (GET_PARAM(3))
dft_flags |= cv::DFT_ROWS;
if (GET_PARAM(4))
dft_flags |= cv::DFT_SCALE;
if (GET_PARAM(5))
dft_flags |= cv::DFT_INVERSE;
}
void generateTestData(int cn = 2)
{
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
usrc = src.getUMat(ACCESS_READ);
if (inplace)
dst = src, udst = usrc;
}
};
OCL_TEST_P(Dft, C2C)
{
generateTestData();
OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
double eps = src.size().area() * 1e-4;
EXPECT_MAT_NEAR(dst, udst, eps);
}
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20),
cv::Size(512, 1), cv::Size(1024, 768)),
Values(CV_32F, CV_64F),
Bool(), // inplace
Bool(), // DFT_ROWS
Bool(), // DFT_SCALE
Bool()) // DFT_INVERSE
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
...@@ -48,43 +48,53 @@ using namespace cv; ...@@ -48,43 +48,53 @@ using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
#if !defined HAVE_CLAMDFFT #if !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat&, oclMat&, Size, int) void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{ {
CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented"); CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
} }
namespace cv { namespace ocl { namespace cv { namespace ocl {
void fft_teardown(); void fft_teardown();
}} } }
void cv::ocl::fft_teardown(){}
void cv::ocl::fft_teardown() { }
#else #else
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
namespace cv namespace cv
{ {
namespace ocl namespace ocl
{ {
void fft_setup(); void fft_setup();
void fft_teardown(); void fft_teardown();
enum FftType enum FftType
{ {
C2R = 1, // complex to complex C2R = 1, // complex to complex
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
}; };
struct FftPlan struct FftPlan
{ {
protected: protected:
clAmdFftPlanHandle plHandle; clAmdFftPlanHandle plHandle;
FftPlan& operator=(const FftPlan&); FftPlan& operator=(const FftPlan&);
public: public:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
~FftPlan(); ~FftPlan();
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; } inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
const Size dft_size; const Size dft_size;
const int src_step, dst_step; const int src_step, dst_step;
const int depth;
const int flags; const int flags;
const FftType type; const FftType type;
}; };
class PlanCache class PlanCache
{ {
protected: protected:
...@@ -105,10 +115,11 @@ namespace cv ...@@ -105,10 +115,11 @@ namespace cv
planCache = new PlanCache(); planCache = new PlanCache();
return planCache; return planCache;
} }
// return a baked plan-> // return a baked plan->
// if there is one matched plan, return it // if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it. // if not, bake a new one, put it into the planStore and return it.
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
// remove a single plan from the store // remove a single plan from the store
// return true if the plan is successfully removed // return true if the plan is successfully removed
...@@ -117,6 +128,7 @@ namespace cv ...@@ -117,6 +128,7 @@ namespace cv
}; };
} }
} }
PlanCache* PlanCache::planCache = NULL; PlanCache* PlanCache::planCache = NULL;
void cv::ocl::fft_setup() void cv::ocl::fft_setup()
...@@ -128,9 +140,11 @@ void cv::ocl::fft_setup() ...@@ -128,9 +140,11 @@ void cv::ocl::fft_setup()
} }
if (pCache.setupData == NULL) if (pCache.setupData == NULL)
pCache.setupData = new clAmdFftSetupData; pCache.setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( pCache.setupData )); openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
pCache.started = true; pCache.started = true;
} }
void cv::ocl::fft_teardown() void cv::ocl::fft_teardown()
{ {
PlanCache& pCache = *PlanCache::getPlanCache(); PlanCache& pCache = *PlanCache::getPlanCache();
...@@ -154,8 +168,8 @@ void cv::ocl::fft_teardown() ...@@ -154,8 +168,8 @@ void cv::ocl::fft_teardown()
} }
// bake a new plan // bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type) : plHandle(0), dft_size(_dft_size), src_step(_src_step), depth(_depth), dst_step(_dst_step), flags(_flags), type(_type)
{ {
fft_setup(); fft_setup();
...@@ -184,20 +198,20 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla ...@@ -184,20 +198,20 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
case C2C: case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED; inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED; outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / (2*sizeof(float)); clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = clStridesIn[1]; clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
break; break;
case R2C: case R2C:
inLayout = CLFFT_REAL; inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED; outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float); clStridesIn[1] = src_step / CV_ELEM_SIZE(_depth);
clStridesOut[1] = dst_step / (2*sizeof(float)); clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
break; break;
case C2R: case C2R:
inLayout = CLFFT_HERMITIAN_INTERLEAVED; inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL; outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (2*sizeof(float)); clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / sizeof(float); clStridesOut[1] = dst_step / CV_ELEM_SIZE(_depth);
break; break;
default: default:
//std::runtime_error("does not support this convertion!"); //std::runtime_error("does not support this convertion!");
...@@ -211,6 +225,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla ...@@ -211,6 +225,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) ); openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetPlanPrecision( plHandle, depth == CV_64F ? CLFFT_DOUBLE : CLFFT_SINGLE ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) ); openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
...@@ -225,6 +240,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla ...@@ -225,6 +240,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
//ready to bake //ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) ); openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
} }
cv::ocl::FftPlan::~FftPlan() cv::ocl::FftPlan::~FftPlan()
{ {
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
...@@ -242,7 +258,7 @@ cv::ocl::PlanCache::~PlanCache() ...@@ -242,7 +258,7 @@ cv::ocl::PlanCache::~PlanCache()
fft_teardown(); fft_teardown();
} }
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
{ {
PlanCache& pCache = *PlanCache::getPlanCache(); PlanCache& pCache = *PlanCache::getPlanCache();
std::vector<FftPlan *>& pStore = pCache.planStore; std::vector<FftPlan *>& pStore = pCache.planStore;
...@@ -256,6 +272,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste ...@@ -256,6 +272,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste
plan->flags == _flags && plan->flags == _flags &&
plan->src_step == _src_step && plan->src_step == _src_step &&
plan->dst_step == _dst_step && plan->dst_step == _dst_step &&
plan->depth == _depth &&
plan->type == _type plan->type == _type
) )
{ {
...@@ -263,7 +280,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste ...@@ -263,7 +280,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste
} }
} }
// no baked plan is found // no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type); FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _depth, _flags, _type);
pStore.push_back(newPlan); pStore.push_back(newPlan);
return newPlan; return newPlan;
} }
...@@ -286,6 +303,8 @@ bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle) ...@@ -286,6 +303,8 @@ bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{ {
CV_Assert(cv::ocl::haveAmdFft());
if(dft_size == Size(0, 0)) if(dft_size == Size(0, 0))
{ {
dft_size = src.size(); dft_size = src.size();
...@@ -296,9 +315,6 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) ...@@ -296,9 +315,6 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
// the two flags are not compatible // the two flags are not compatible
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) ); CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
//bool is_1d_input = (src.rows == 1); //bool is_1d_input = (src.rows == 1);
//int is_row_dft = flags & DFT_ROWS; //int is_row_dft = flags & DFT_ROWS;
//int is_scaled_dft = flags & DFT_SCALE; //int is_scaled_dft = flags & DFT_SCALE;
...@@ -306,6 +322,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) ...@@ -306,6 +322,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
bool is_complex_input = src.channels() == 2; bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT); bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
int depth = src.depth();
// We don't support real-to-real transform // We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output); CV_Assert(is_complex_input || is_complex_output);
...@@ -314,14 +331,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) ...@@ -314,14 +331,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
switch(type) switch(type)
{ {
case C2C: case C2C:
dst.create(src.rows, src.cols, CV_32FC2); dst.create(src.rows, src.cols, CV_MAKE_TYPE(depth, 2));
printf("C2C\n");
break; break;
case R2C: case R2C:
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); dst.create(src.rows, src.cols / 2 + 1, CV_MAKE_TYPE(depth, 2));
printf("R2C\n");
break; break;
case C2R: case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
dst.create(src.rows, dft_size.width, CV_32FC1); dst.create(src.rows, dft_size.width, CV_MAKE_TYPE(depth, 1));
printf("C2R\n");
break; break;
default: default:
//std::runtime_error("does not support this convertion!"); //std::runtime_error("does not support this convertion!");
...@@ -329,7 +349,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) ...@@ -329,7 +349,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
throw std::exception(); throw std::exception();
break; break;
} }
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle(); clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, depth, flags, type)->getPlanHandle();
//get the buffersize //get the buffersize
size_t buffersize = 0; size_t buffersize = 0;
...@@ -356,7 +376,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) ...@@ -356,7 +376,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{ {
openCLFree(clMedBuffer); openCLFree(clMedBuffer);
} }
//fft_teardown(); fft_teardown();
} }
#endif #endif
...@@ -75,6 +75,7 @@ ...@@ -75,6 +75,7 @@
#include "opencv2/core/utility.hpp" #include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp" #include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#define __ATI__ #define __ATI__
......
...@@ -50,32 +50,36 @@ using namespace std; ...@@ -50,32 +50,36 @@ using namespace std;
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Dft // Dft
PARAM_TEST_CASE(Dft, cv::Size, int) PARAM_TEST_CASE(Dft, cv::Size, int, bool)
{ {
cv::Size dft_size; cv::Size dft_size;
int dft_flags; int dft_flags;
bool doubleFP;
virtual void SetUp() virtual void SetUp()
{ {
dft_size = GET_PARAM(0); dft_size = GET_PARAM(0);
dft_flags = GET_PARAM(1); dft_flags = GET_PARAM(1);
doubleFP = GET_PARAM(2);
} }
}; };
OCL_TEST_P(Dft, C2C) OCL_TEST_P(Dft, C2C)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0); cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC2 : CV_32FC2, 0.0, 100.0);
cv::Mat b_gold; cv::Mat b_gold;
cv::ocl::oclMat d_b; cv::ocl::oclMat d_b;
cv::dft(a, b_gold, dft_flags); cv::dft(a, b_gold, dft_flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags); cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4); EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4);
} }
OCL_TEST_P(Dft, R2C) OCL_TEST_P(Dft, R2C)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0); cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 100.0);
cv::Mat b_gold, b_gold_roi; cv::Mat b_gold, b_gold_roi;
cv::ocl::oclMat d_b, d_c; cv::ocl::oclMat d_b, d_c;
...@@ -92,7 +96,7 @@ OCL_TEST_P(Dft, R2C) ...@@ -92,7 +96,7 @@ OCL_TEST_P(Dft, R2C)
OCL_TEST_P(Dft, R2CthenC2R) OCL_TEST_P(Dft, R2CthenC2R)
{ {
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 10.0);
cv::ocl::oclMat d_b, d_c; cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0); cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
...@@ -102,7 +106,7 @@ OCL_TEST_P(Dft, R2CthenC2R) ...@@ -102,7 +106,7 @@ OCL_TEST_P(Dft, R2CthenC2R)
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)), testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) )); testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE), testing::Bool()));
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// MulSpectrums // MulSpectrums
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment