Commit 66ac4621 authored by Alexander Karsakov's avatar Alexander Karsakov

Final refactoring, fixes

parent 1d2cf0e2
...@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine( ...@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
typedef Size_MatType TransposeFixture; typedef Size_MatType TransposeFixture;
OCL_PERF_TEST_P(TransposeFixture, Transpose, ::testing::Combine( OCL_PERF_TEST_P(TransposeFixture, Transpose, ::testing::Combine(
OCL_TEST_SIZES, Values(CV_8UC1, CV_32FC1, CV_8UC2, CV_32FC2, CV_8UC4, CV_32FC4))) OCL_TEST_SIZES, OCL_TEST_TYPES_134))
{ {
const Size_MatType_t params = GetParam(); const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params); const Size srcSize = get<0>(params);
......
...@@ -54,40 +54,21 @@ namespace ocl { ...@@ -54,40 +54,21 @@ namespace ocl {
///////////// dft //////////////////////// ///////////// dft ////////////////////////
enum OCL_FFT_TYPE typedef tuple<Size, int> DftParams;
{
R2R = 0, // real to real (CCS)
C2R = 1, // complex to real
R2C = 2, // real to complex
C2C = 3 // complex to complex
};
typedef tuple<OCL_FFT_TYPE, Size, int> DftParams;
typedef TestBaseWithParam<DftParams> DftFixture; typedef TestBaseWithParam<DftParams> DftFixture;
OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(C2C, R2R, C2R, R2C), OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, Size(1024, 1024), Size(512, 512), Size(2048, 2048)), Values((int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE,
Values((int) 0, (int)DFT_ROWS, (int)DFT_SCALE/*, (int)DFT_INVERSE, (int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE)))
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/)))
{ {
const DftParams params = GetParam(); const DftParams params = GetParam();
const int dft_type = get<0>(params); const Size srcSize = get<0>(params);
const Size srcSize = get<1>(params); const int flags = get<1>(params);
int flags = get<2>(params);
UMat src(srcSize, CV_32FC2), dst(srcSize, CV_32FC2);
int in_cn, out_cn;
switch (dft_type)
{
case R2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 1; out_cn = 1; break;
case C2R: flags |= cv::DFT_REAL_OUTPUT; in_cn = 2; out_cn = 2; break;
case R2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 1; out_cn = 2; break;
case C2C: flags |= cv::DFT_COMPLEX_OUTPUT; in_cn = 2; out_cn = 2; break;
}
UMat src(srcSize, CV_MAKE_TYPE(CV_32F, in_cn)), dst(srcSize, CV_MAKE_TYPE(CV_32F, out_cn));
declare.in(src, WARMUP_RNG).out(dst); declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::dft(src, dst, flags); OCL_TEST_CYCLE() cv::dft(src, dst, flags | DFT_COMPLEX_OUTPUT);
SANITY_CHECK(dst, 1e-3); SANITY_CHECK(dst, 1e-3);
} }
......
This diff is collapsed.
...@@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], ...@@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
sync ? 0 : &p->e); sync ? 0 : &p->e);
if( sync || retval != CL_SUCCESS ) if( sync || retval != CL_SUCCESS )
{ {
int a = clFinish(qq); CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
CV_OclDbgAssert(a == CL_SUCCESS);
p->cleanupUMats(); p->cleanupUMats();
} }
else else
...@@ -3899,9 +3898,8 @@ public: ...@@ -3899,9 +3898,8 @@ public:
if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
{ {
AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
int a = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0); u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
CV_Assert( a == CL_SUCCESS );
u->markHostCopyObsolete(false); u->markHostCopyObsolete(false);
} }
} }
......
...@@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
} }
else else
{ {
// fill with zero other rows
#ifdef COMPLEX_OUTPUT
__global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset));
#else
__global float* dst = (__global float*)(dst_ptr + mad24(y, dst_step, dst_offset));
#endif
#pragma unroll #pragma unroll
for (int i=x; i<dst_cols; i+=block_size) for (int i=x; i<dst_cols; i+=block_size)
dst[i] = (float2) 0.f; dst[i] = 0.f;
} }
} }
...@@ -667,12 +672,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -667,12 +672,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
smem[x+i*block_size].y = -src[i*block_size].y; smem[x+i*block_size].y = -src[i*block_size].y;
} }
#else #else
__global const float2* src;
#if !defined(REAL_INPUT) && defined(NO_CONJUGATE) #if !defined(REAL_INPUT) && defined(NO_CONJUGATE)
src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(2, (int)sizeof(float), src_offset))); __global const float2* src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(2, (int)sizeof(float), src_offset)));
#else
src = (__global const float2*)(src_ptr + mad24(y, src_step, mad24(1, (int)sizeof(float), src_offset)));
#endif
#pragma unroll #pragma unroll
for (int i=x; i<(LOCAL_SIZE-1)/2; i+=block_size) for (int i=x; i<(LOCAL_SIZE-1)/2; i+=block_size)
...@@ -681,6 +683,20 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -681,6 +683,20 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
smem[i+1].y = -src[i].y; smem[i+1].y = -src[i].y;
smem[LOCAL_SIZE-i-1] = src[i]; smem[LOCAL_SIZE-i-1] = src[i];
} }
#else
#pragma unroll
for (int i=x; i<(LOCAL_SIZE-1)/2; i+=block_size)
{
float2 src = vload2(0, (__global const float*)(src_ptr + mad24(y, src_step, mad24(2*i+1, (int)sizeof(float), src_offset))));
smem[i+1].x = src.x;
smem[i+1].y = -src.y;
smem[LOCAL_SIZE-i-1] = src;
}
#endif
if (x==0) if (x==0)
{ {
smem[0].x = *(__global const float*)(src_ptr + mad24(y, src_step, src_offset)); smem[0].x = *(__global const float*)(src_ptr + mad24(y, src_step, src_offset));
...@@ -688,7 +704,11 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -688,7 +704,11 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
if(LOCAL_SIZE % 2 ==0) if(LOCAL_SIZE % 2 ==0)
{ {
#if !defined(REAL_INPUT) && defined(NO_CONJUGATE)
smem[LOCAL_SIZE/2].x = src[LOCAL_SIZE/2-1].x; smem[LOCAL_SIZE/2].x = src[LOCAL_SIZE/2-1].x;
#else
smem[LOCAL_SIZE/2].x = *(__global const float*)(src_ptr + mad24(y, src_step, mad24(LOCAL_SIZE-1, (int)sizeof(float), src_offset)));
#endif
smem[LOCAL_SIZE/2].y = 0.f; smem[LOCAL_SIZE/2].y = 0.f;
} }
} }
...@@ -718,10 +738,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -718,10 +738,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
} }
else else
{ {
__global float2* dst = (__global float*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); // fill with zero other rows
#ifdef COMPLEX_OUTPUT
__global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset));
#else
__global float* dst = (__global float*)(dst_ptr + mad24(y, dst_step, dst_offset));
#endif
#pragma unroll #pragma unroll
for (int i=0; i<kercn; i++) for (int i=x; i<dst_cols; i+=block_size)
dst[i*block_size] = (float2) 0.f; dst[i] = 0.f;
} }
} }
...@@ -781,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, ...@@ -781,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#pragma unroll #pragma unroll
for (int i=0; i<kercn; i++) for (int i=0; i<kercn; i++)
{ {
float2 temp = *((__global const float2*)(src + i*block_size*src_step)); float2 temp = vload2(0, (__global const float*)(src + i*block_size*src_step));
smem[y+i*block_size].x = temp.x; smem[y+i*block_size].x = temp.x;
smem[y+i*block_size].y = -temp.y; smem[y+i*block_size].y = -temp.y;
} }
......
...@@ -48,26 +48,17 @@ ...@@ -48,26 +48,17 @@
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
enum OCL_FFT_TYPE
{
R2R = 0,
C2R = 1,
R2C = 2,
C2C = 3
};
namespace cvtest { namespace cvtest {
namespace ocl { namespace ocl {
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Dft // Dft
PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool)
{ {
cv::Size dft_size; cv::Size dft_size;
int dft_flags, depth, cn, dft_type; int dft_flags, depth;
bool hint; bool inplace;
bool is1d;
TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst); TEST_DECLARE_OUTPUT_PARAMETER(dst);
...@@ -75,60 +66,34 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool) ...@@ -75,60 +66,34 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
virtual void SetUp() virtual void SetUp()
{ {
dft_size = GET_PARAM(0); dft_size = GET_PARAM(0);
dft_type = GET_PARAM(1); depth = GET_PARAM(1);
depth = CV_32F; inplace = GET_PARAM(2);
dft_flags = 0; dft_flags = 0;
switch (dft_type)
{
case R2R: dft_flags |= cv::DFT_REAL_OUTPUT; cn = 1; break;
case C2R: dft_flags |= cv::DFT_REAL_OUTPUT; cn = 2; break;
case R2C: dft_flags |= cv::DFT_COMPLEX_OUTPUT; cn = 1; break;
case C2C: dft_flags |= cv::DFT_COMPLEX_OUTPUT; cn = 2; break;
}
if (GET_PARAM(2))
dft_flags |= cv::DFT_INVERSE;
if (GET_PARAM(3)) if (GET_PARAM(3))
dft_flags |= cv::DFT_ROWS; dft_flags |= cv::DFT_ROWS;
if (GET_PARAM(4)) if (GET_PARAM(4))
dft_flags |= cv::DFT_SCALE; dft_flags |= cv::DFT_SCALE;
hint = GET_PARAM(5); if (GET_PARAM(5))
is1d = (dft_flags & DFT_ROWS) != 0 || dft_size.height == 1; dft_flags |= cv::DFT_INVERSE;
} }
void generateTestData() void generateTestData(int cn = 2)
{ {
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0); src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
usrc = src.getUMat(ACCESS_READ); usrc = src.getUMat(ACCESS_READ);
if (inplace)
dst = src, udst = usrc;
} }
}; };
OCL_TEST_P(Dft, Mat) OCL_TEST_P(Dft, C2C)
{ {
generateTestData(); generateTestData();
int nonzero_rows = hint ? src.cols - randomInt(1, src.rows-1) : 0; OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
OCL_OFF(cv::dft(src, dst, dft_flags, nonzero_rows)); OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
OCL_ON(cv::dft(usrc, udst, dft_flags, nonzero_rows));
if (dft_type == R2C && is1d && (dft_flags & cv::DFT_INVERSE) == 0)
{
dst = dst(cv::Range(0, dst.rows), cv::Range(0, dst.cols/2 + 1));
udst = udst(cv::Range(0, udst.rows), cv::Range(0, udst.cols/2 + 1));
}
//Mat gpu = udst.getMat(ACCESS_READ);
//std::cout << dst << std::endl;
//std::cout << gpu << std::endl;
//int cn = udst.channels();
//
//Mat dst1ch = dst.reshape(1);
//Mat gpu1ch = gpu.reshape(1);
//Mat df;
//absdiff(dst1ch, gpu1ch, df);
//std::cout << Mat_<int>(df) << std::endl;
double eps = src.size().area() * 1e-4; double eps = src.size().area() * 1e-4;
EXPECT_MAT_NEAR(dst, udst, eps); EXPECT_MAT_NEAR(dst, udst, eps);
...@@ -185,13 +150,13 @@ OCL_TEST_P(MulSpectrums, Mat) ...@@ -185,13 +150,13 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)), OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20),
Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), cv::Size(512, 1), cv::Size(1024, 768)),
Bool(), // DFT_INVERSE Values(CV_32F, CV_64F),
Bool(), // inplace
Bool(), // DFT_ROWS Bool(), // DFT_ROWS
Bool(), // DFT_SCALE Bool(), // DFT_SCALE
Bool() // hint Bool()) // DFT_INVERSE
)
); );
} } // namespace cvtest::ocl } } // namespace cvtest::ocl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment