Commit 719e8674 authored by yao's avatar yao

fix the compile errors on Mac

parent 656594ad
...@@ -77,7 +77,7 @@ namespace cv ...@@ -77,7 +77,7 @@ namespace cv
size_t wave_size = 0; size_t wave_size = 0;
queryDeviceInfo(WAVEFRONT_SIZE, &wave_size); queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
std::sprintf(pSURF_OPTIONS, " -D WAVE_SIZE=%d", static_cast<int>(wave_size)); std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
OPTION_INIT = true; OPTION_INIT = true;
} }
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS); openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS);
......
...@@ -277,8 +277,7 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, ...@@ -277,8 +277,7 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
char compile_option[128]; char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s", sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
rectKernel?"-D RECTKERNEL":"", s, rectKernel?"-D RECTKERNEL":"");
s);
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
......
...@@ -44,7 +44,11 @@ ...@@ -44,7 +44,11 @@
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif #endif
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
...@@ -60,8 +64,11 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of ...@@ -60,8 +64,11 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
if (x < cols && y < thread_rows) if (x < cols && y < thread_rows)
{ {
x = x << 2; x = x << 2;
#define dst_align (dst_offset & 3) #ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align); int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
...@@ -115,8 +122,11 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off ...@@ -115,8 +122,11 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off
if (x < cols && y < thread_rows) if (x < cols && y < thread_rows)
{ {
x = x << 2; x = x << 2;
#define dst_align (dst_offset & 3) #ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align); int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
...@@ -157,8 +167,11 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o ...@@ -157,8 +167,11 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o
if (x < cols && y < thread_rows) if (x < cols && y < thread_rows)
{ {
x = x << 2; x = x << 2;
#define dst_align (((dst_offset >> 1) & 3) << 1) #ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align); int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
...@@ -199,8 +212,11 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of ...@@ -199,8 +212,11 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of
if (x < cols && y < thread_rows) if (x < cols && y < thread_rows)
{ {
x = x << 2; x = x << 2;
#define dst_align (((dst_offset >> 1) & 3) << 1) #ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align); int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
...@@ -314,16 +330,14 @@ __kernel void arithm_flip_cols_C1_D0 (__global uchar *src, int src_step, int src ...@@ -314,16 +330,14 @@ __kernel void arithm_flip_cols_C1_D0 (__global uchar *src, int src_step, int src
if (x < thread_cols && y < rows) if (x < thread_cols && y < rows)
{ {
int src_index_0 = mad24(y, src_step, (x) + src_offset); int src_index_0 = mad24(y, src_step, (x) + src_offset);
int src_index_1 = mad24(y, src_step, (cols - x -1) + src_offset);
int dst_index_0 = mad24(y, dst_step, (x) + dst_offset);
int dst_index_1 = mad24(y, dst_step, (cols - x -1) + dst_offset); int dst_index_1 = mad24(y, dst_step, (cols - x -1) + dst_offset);
uchar data0 = *(src + src_index_0); uchar data0 = *(src + src_index_0);
uchar data1 = *(src + src_index_1); *(dst + dst_index_1) = data0;
int src_index_1 = mad24(y, src_step, (cols - x -1) + src_offset);
int dst_index_0 = mad24(y, dst_step, (x) + dst_offset);
uchar data1 = *(src + src_index_1);
*(dst + dst_index_0) = data1; *(dst + dst_index_0) = data1;
*(dst + dst_index_1) = data0;
} }
} }
__kernel void arithm_flip_cols_C1_D1 (__global char *src, int src_step, int src_offset, __kernel void arithm_flip_cols_C1_D1 (__global char *src, int src_step, int src_offset,
......
This diff is collapsed.
...@@ -44,7 +44,11 @@ ...@@ -44,7 +44,11 @@
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif #endif
#define LSIZE 256 #define LSIZE 256
#define LSIZE_1 255 #define LSIZE_1 255
...@@ -71,13 +75,13 @@ kernel void integral_cols(__global uchar4 *src,__global int *sum ,__global float ...@@ -71,13 +75,13 @@ kernel void integral_cols(__global uchar4 *src,__global int *sum ,__global float
gid = gid << 1; gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1) for(int i = 0; i < rows; i =i + LSIZE_1)
{ {
src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid]) : 0); src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid, (uint)cols - 1)]) : 0);
src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid + 1]) : 0); src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, (uint)cols - 1)]) : 0);
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]); sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? 0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]); sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]); sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? 0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]); sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid); int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
...@@ -127,7 +131,8 @@ kernel void integral_cols(__global uchar4 *src,__global int *sum ,__global float ...@@ -127,7 +131,8 @@ kernel void integral_cols(__global uchar4 *src,__global int *sum ,__global float
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ; int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
if(lid > 0 && (i+lid) <= rows){ if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0]; lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1]; lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0]; lm_sqsum[0][bf_loc] += sqsum_t[0];
...@@ -169,15 +174,15 @@ kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__glo ...@@ -169,15 +174,15 @@ kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__glo
src_step = src_step >> 4; src_step = src_step >> 4;
for(int i = 0; i < rows; i =i + LSIZE_1) for(int i = 0; i < rows; i =i + LSIZE_1)
{ {
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : 0; src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : 0; sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : 0; src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : 0; sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]); sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? 0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]); sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]); sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? 0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]); sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid); int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
...@@ -228,14 +233,14 @@ kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__glo ...@@ -228,14 +233,14 @@ kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__glo
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows) if(gid == 0 && (i + lid) <= rows)
{ {
sum[sum_offset + i + lid] = 0; sum[sum_offset + i + lid] = 0;
sqsum[sqsum_offset + i + lid] = 0; sqsum[sqsum_offset + i + lid] = 0;
} }
if(i + lid == 0) if(i + lid == 0)
{ {
int loc0 = gid * 2 * sum_step; int loc0 = gid * 2 * sum_step;
int loc1 = gid * 2 * sqsum_step; int loc1 = gid * 2 * sqsum_step;
for(int k = 1;k <= 8;k++) for(int k = 1; k <= 8; k++)
{ {
if(gid * 8 + k > cols) break; if(gid * 8 + k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0; sum[sum_offset + loc0 + k * sum_step / 4] = 0;
...@@ -244,7 +249,8 @@ kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__glo ...@@ -244,7 +249,8 @@ kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__glo
} }
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ; int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ; int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
if(lid > 0 && (i+lid) <= rows){ if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0]; lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1]; lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0]; lm_sqsum[0][bf_loc] += sqsum_t[0];
......
...@@ -447,10 +447,10 @@ void matchTemplate_Naive_CCORR_C1_D0 ...@@ -447,10 +447,10 @@ void matchTemplate_Naive_CCORR_C1_D0
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); __global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++) for(j = 0; j < tpl_cols; j ++)
{ {
sum = mad24(img_ptr[j], tpl_ptr[j], sum); sum = mad24(convert_int(img_ptr[j]), convert_int(tpl_ptr[j]), sum);
} }
} }
res[res_idx] = sum; res[res_idx] = (float)sum;
} }
} }
...@@ -548,7 +548,7 @@ void matchTemplate_Naive_CCORR_C4_D0 ...@@ -548,7 +548,7 @@ void matchTemplate_Naive_CCORR_C4_D0
sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum); sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
} }
} }
res[res_idx] = sum.x + sum.y + sum.z + sum.w; res[res_idx] = (float)(sum.x + sum.y + sum.z + sum.w);
} }
} }
...@@ -633,9 +633,8 @@ void matchTemplate_Prepared_CCOFF_C1_D0 ...@@ -633,9 +633,8 @@ void matchTemplate_Prepared_CCOFF_C1_D0
if(gidx < res_cols && gidy < res_rows) if(gidx < res_cols && gidy < res_rows)
{ {
float sum = (float)( float sum = (float)((img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)]) -(img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
res[res_idx] -= sum * tpl_sum; res[res_idx] -= sum * tpl_sum;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment