Commit 0a136334 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #15444 from alalek:ocl_fix_fft_kernel

parents 76e403cf 8bd2720c
...@@ -536,9 +536,9 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -536,9 +536,9 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
const int x = get_global_id(0); const int x = get_global_id(0);
const int y = get_group_id(1); const int y = get_group_id(1);
const int block_size = LOCAL_SIZE/kercn; const int block_size = LOCAL_SIZE/kercn;
__local CT smem[LOCAL_SIZE]; // used in (y < nz) code branch only, but should be declared in the outermost scope of a kernel function
if (y < nz) if (y < nz)
{ {
__local CT smem[LOCAL_SIZE];
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); __global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset);
const int ind = x; const int ind = x;
#ifdef IS_1D #ifdef IS_1D
...@@ -615,9 +615,9 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step, ...@@ -615,9 +615,9 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
const int x = get_group_id(0); const int x = get_group_id(0);
const int y = get_global_id(1); const int y = get_global_id(1);
__local CT smem[LOCAL_SIZE]; // used in (x < nz) code branch only, but should be declared in the outermost scope of a kernel function
if (x < nz) if (x < nz)
{ {
__local CT smem[LOCAL_SIZE];
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(CT)), src_offset)); __global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(CT)), src_offset));
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); __global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset);
const int ind = y; const int ind = y;
...@@ -682,9 +682,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step, ...@@ -682,9 +682,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
const FT scale = (FT) 1/(dst_cols*dst_rows); const FT scale = (FT) 1/(dst_cols*dst_rows);
#endif #endif
__local CT smem[LOCAL_SIZE]; // used in (y < nz) code branch only, but should be declared in the outermost scope of a kernel function
if (y < nz) if (y < nz)
{ {
__local CT smem[LOCAL_SIZE];
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); __global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset);
const int ind = x; const int ind = x;
...@@ -782,10 +782,10 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, ...@@ -782,10 +782,10 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
const int x = get_group_id(0); const int x = get_group_id(0);
const int y = get_global_id(1); const int y = get_global_id(1);
#ifdef COMPLEX_INPUT __local CT smem[LOCAL_SIZE]; // used in (x < nz) code branch only, but should be declared in the outermost scope of a kernel function
if (x < nz) if (x < nz)
{ {
__local CT smem[LOCAL_SIZE]; #ifdef COMPLEX_INPUT
__global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(CT)), src_offset)); __global const uchar* src = src_ptr + mad24(y, src_step, mad24(x, (int)(sizeof(CT)), src_offset));
__global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(CT)), dst_offset)); __global uchar* dst = dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(CT)), dst_offset));
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); __global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset);
...@@ -812,15 +812,11 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, ...@@ -812,15 +812,11 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
res[0].x = smem[y + i*block_size].x; res[0].x = smem[y + i*block_size].x;
res[0].y = -smem[y + i*block_size].y; res[0].y = -smem[y + i*block_size].y;
} }
}
#else #else
if (x < nz)
{
__global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset); __global const CT* twiddles = (__global const CT*)(twiddles_ptr + twiddles_offset);
const int ind = y; const int ind = y;
const int block_size = LOCAL_SIZE/kercn; const int block_size = LOCAL_SIZE/kercn;
__local CT smem[LOCAL_SIZE];
#ifdef EVEN #ifdef EVEN
if (x!=0 && (x!=(nz-1))) if (x!=0 && (x!=(nz-1)))
#else #else
...@@ -877,6 +873,6 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step, ...@@ -877,6 +873,6 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
res[0].x = smem[y + i*block_size].x; res[0].x = smem[y + i*block_size].x;
res[0].y = -smem[y + i*block_size].y; res[0].y = -smem[y + i*block_size].y;
} }
}
#endif #endif
} }
\ No newline at end of file }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment