Commit 1d2cf0e2 authored by Alexander Karsakov's avatar Alexander Karsakov

Added nonzero_rows support

parent 52f76a32
...@@ -2034,19 +2034,19 @@ enum FftType ...@@ -2034,19 +2034,19 @@ enum FftType
C2C = 3 C2C = 3
}; };
static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix) static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
{ {
int factors[34]; int factors[34];
int nf = DFTFactorize( cols, factors ); int nf = DFTFactorize(cols, factors);
int n = 1; int n = 1;
int factor_index = 0; int factor_index = 0;
min_radix = INT_MAX; min_radix = INT_MAX;
// 2^n transforms // 2^n transforms
if ( (factors[factor_index] & 1) == 0 ) if ((factors[factor_index] & 1) == 0)
{ {
for( ; n < factors[factor_index]; ) for( ; n < factors[factor_index];)
{ {
int radix = 2, block = 1; int radix = 2, block = 1;
if (8*n <= factors[0]) if (8*n <= factors[0])
...@@ -2080,7 +2080,7 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std: ...@@ -2080,7 +2080,7 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std:
} }
// all the other transforms // all the other transforms
for( ; factor_index < nf; factor_index++ ) for( ; factor_index < nf; factor_index++)
{ {
int radix = factors[factor_index], block = 1; int radix = factors[factor_index], block = 1;
if (radix == 3) if (radix == 3)
...@@ -2101,7 +2101,6 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std: ...@@ -2101,7 +2101,6 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std:
blocks.push_back(block); blocks.push_back(block);
min_radix = min(min_radix, block*radix); min_radix = min(min_radix, block*radix);
} }
return radixes;
} }
struct OCL_FftPlan struct OCL_FftPlan
...@@ -2111,14 +2110,13 @@ struct OCL_FftPlan ...@@ -2111,14 +2110,13 @@ struct OCL_FftPlan
int thread_count; int thread_count;
int dft_size; int dft_size;
int flags;
bool status; bool status;
OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true) OCL_FftPlan(int _size): dft_size(_size), status(true)
{ {
int min_radix; int min_radix;
std::vector<int> radixes, blocks; std::vector<int> radixes, blocks;
ocl_getRadixes(dft_size, radixes, blocks, min_radix); ocl_getRadixes(dft_size, radixes, blocks, min_radix);
thread_count = (dft_size + min_radix-1) / min_radix; thread_count = dft_size / min_radix;
if (thread_count > ocl::Device::getDefault().maxWorkGroupSize()) if (thread_count > ocl::Device::getDefault().maxWorkGroupSize())
{ {
...@@ -2140,8 +2138,7 @@ struct OCL_FftPlan ...@@ -2140,8 +2138,7 @@ struct OCL_FftPlan
n *= radix; n *= radix;
} }
twiddles.create(1, twiddle_size, CV_32FC2); Mat tw(1, twiddle_size, CV_32FC2);
Mat tw = twiddles.getMat(ACCESS_WRITE);
float* ptr = tw.ptr<float>(); float* ptr = tw.ptr<float>();
int ptr_index = 0; int ptr_index = 0;
...@@ -2162,6 +2159,7 @@ struct OCL_FftPlan ...@@ -2162,6 +2159,7 @@ struct OCL_FftPlan
} }
} }
} }
twiddles = tw.getUMat(ACCESS_READ);
buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s", buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s",
dft_size, dft_size/thread_count, radix_processing.c_str()); dft_size, dft_size/thread_count, radix_processing.c_str());
...@@ -2185,10 +2183,10 @@ struct OCL_FftPlan ...@@ -2185,10 +2183,10 @@ struct OCL_FftPlan
if (rows) if (rows)
{ {
globalsize[0] = thread_count; globalsize[1] = dft_size; globalsize[0] = thread_count; globalsize[1] = src.rows;
localsize[0] = thread_count; localsize[1] = 1; localsize[0] = thread_count; localsize[1] = 1;
kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows"; kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
if (is1d && (flags & DFT_SCALE)) if ((is1d || inv) && (flags & DFT_SCALE))
options += " -D DFT_SCALE"; options += " -D DFT_SCALE";
} }
else else
...@@ -2200,14 +2198,9 @@ struct OCL_FftPlan ...@@ -2200,14 +2198,9 @@ struct OCL_FftPlan
options += " -D DFT_SCALE"; options += " -D DFT_SCALE";
} }
if (src.channels() == 1) options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
options += " -D REAL_INPUT"; options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
else options += is1d ? " -D IS_1D" : "";
options += " -D COMPLEX_INPUT";
if (dst.channels() == 1)
options += " -D REAL_OUTPUT";
if (is1d)
options += " -D IS_1D";
if (!inv) if (!inv)
{ {
...@@ -2216,10 +2209,10 @@ struct OCL_FftPlan ...@@ -2216,10 +2209,10 @@ struct OCL_FftPlan
} }
else else
{ {
if (is1d && fftType == C2R || (rows && fftType == R2R)) if (rows && (fftType == C2R || fftType == R2R))
options += " -D NO_CONJUGATE"; options += " -D NO_CONJUGATE";
if (dst.cols % 2 == 0) if (dst.cols % 2 == 0)
options += " -D EVEN"; options += " -D EVEN";
} }
ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
...@@ -2240,7 +2233,7 @@ public: ...@@ -2240,7 +2233,7 @@ public:
return planCache; return planCache;
} }
OCL_FftPlan* getFftPlan(int dft_size, int flags) OCL_FftPlan* getFftPlan(int dft_size)
{ {
for (size_t i = 0, size = planStorage.size(); i < size; ++i) for (size_t i = 0, size = planStorage.size(); i < size; ++i)
{ {
...@@ -2252,7 +2245,7 @@ public: ...@@ -2252,7 +2245,7 @@ public:
} }
} }
OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size, flags); OCL_FftPlan * newPlan = new OCL_FftPlan(dft_size);
planStorage.push_back(newPlan); planStorage.push_back(newPlan);
return newPlan; return newPlan;
} }
...@@ -2275,13 +2268,13 @@ protected: ...@@ -2275,13 +2268,13 @@ protected:
static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
{ {
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols());
return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
} }
static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
{ {
const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags); const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows());
return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
} }
...@@ -2385,7 +2378,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro ...@@ -2385,7 +2378,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
} }
else else
{ {
int nonzero_cols = src.cols/2 + 1;// : src.cols; int nonzero_cols = src.cols/2 + 1;
if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType)) if (!ocl_dft_C2C_cols(src, output, nonzero_cols, flags, fftType))
return false; return false;
......
...@@ -3002,7 +3002,8 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], ...@@ -3002,7 +3002,8 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
sync ? 0 : &p->e); sync ? 0 : &p->e);
if( sync || retval != CL_SUCCESS ) if( sync || retval != CL_SUCCESS )
{ {
CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS); int a = clFinish(qq);
CV_OclDbgAssert(a == CL_SUCCESS);
p->cleanupUMats(); p->cleanupUMats();
} }
else else
...@@ -3898,8 +3899,9 @@ public: ...@@ -3898,8 +3899,9 @@ public:
if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
{ {
AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, int a = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
CV_Assert( a == CL_SUCCESS );
u->markHostCopyObsolete(false); u->markHostCopyObsolete(false);
} }
} }
......
This diff is collapsed.
...@@ -66,7 +66,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) ...@@ -66,7 +66,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
{ {
cv::Size dft_size; cv::Size dft_size;
int dft_flags, depth, cn, dft_type; int dft_flags, depth, cn, dft_type;
bool inplace; bool hint;
bool is1d; bool is1d;
TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_INPUT_PARAMETER(src);
...@@ -93,9 +93,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) ...@@ -93,9 +93,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
dft_flags |= cv::DFT_ROWS; dft_flags |= cv::DFT_ROWS;
if (GET_PARAM(4)) if (GET_PARAM(4))
dft_flags |= cv::DFT_SCALE; dft_flags |= cv::DFT_SCALE;
inplace = GET_PARAM(5); hint = GET_PARAM(5);
is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1; is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1;
} }
...@@ -103,9 +101,6 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) ...@@ -103,9 +101,6 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
{ {
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0); src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
usrc = src.getUMat(ACCESS_READ); usrc = src.getUMat(ACCESS_READ);
if (inplace)
dst = src, udst = usrc;
} }
}; };
...@@ -113,8 +108,9 @@ OCL_TEST_P(Dft, Mat) ...@@ -113,8 +108,9 @@ OCL_TEST_P(Dft, Mat)
{ {
generateTestData(); generateTestData();
OCL_OFF(cv::dft(src, dst, dft_flags)); int nonzero_rows = hint ? src.cols - randomInt(1, src.rows-1) : 0;
OCL_ON(cv::dft(usrc, udst, dft_flags)); OCL_OFF(cv::dft(src, dst, dft_flags, nonzero_rows));
OCL_ON(cv::dft(usrc, udst, dft_flags, nonzero_rows));
if (dft_type == R2C && is1d && (dft_flags & cv::DFT_INVERSE) == 0) if (dft_type == R2C && is1d && (dft_flags & cv::DFT_INVERSE) == 0)
{ {
...@@ -122,15 +118,16 @@ OCL_TEST_P(Dft, Mat) ...@@ -122,15 +118,16 @@ OCL_TEST_P(Dft, Mat)
udst = udst(cv::Range(0, udst.rows), cv::Range(0, udst.cols/2 + 1)); udst = udst(cv::Range(0, udst.rows), cv::Range(0, udst.cols/2 + 1));
} }
Mat gpu = udst.getMat(ACCESS_READ); //Mat gpu = udst.getMat(ACCESS_READ);
std::cout << src << std::endl; //std::cout << dst << std::endl;
std::cout << dst << std::endl; //std::cout << gpu << std::endl;
std::cout << gpu << std::endl;
//int cn = udst.channels(); //int cn = udst.channels();
// //
//Mat dst1ch = dst.reshape(1);
//Mat gpu1ch = gpu.reshape(1);
//Mat df; //Mat df;
//absdiff(dst, gpu, df); //absdiff(dst1ch, gpu1ch, df);
//std::cout << Mat_<int>(df) << std::endl; //std::cout << Mat_<int>(df) << std::endl;
double eps = src.size().area() * 1e-4; double eps = src.size().area() * 1e-4;
...@@ -188,13 +185,12 @@ OCL_TEST_P(MulSpectrums, Mat) ...@@ -188,13 +185,12 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(4, 1), cv::Size(5, 8), cv::Size(6, 6), OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)),
cv::Size(512, 1), cv::Size(1280, 768)), Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R),
Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R),
Bool(), // DFT_INVERSE Bool(), // DFT_INVERSE
Bool(), // DFT_ROWS Bool(), // DFT_ROWS
Bool(), // DFT_SCALE Bool(), // DFT_SCALE
Bool() // inplace Bool() // hint
) )
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment