Commit 96121a66 authored by Ilya Lavrenov's avatar Ilya Lavrenov

kernel warnings on AMD

parent a81efdbb
...@@ -34,9 +34,13 @@ ...@@ -34,9 +34,13 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
__kernel void LUT_C1( __global const srcT * src, __global const dstT *lut, __kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
__global dstT *dst, __global dstT *dst,
......
...@@ -44,11 +44,11 @@ ...@@ -44,11 +44,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -44,11 +44,11 @@ ...@@ -44,11 +44,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,14 +43,6 @@ ...@@ -43,14 +43,6 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary//////////////////////////////////////////// ////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,24 +43,21 @@ ...@@ -43,24 +43,21 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#pragma OPENCL EXTENSION cl_khr_fp64:enable #ifdef cl_amd_fp64
#define CV_PI 3.1415926535897932384626433832795 #pragma OPENCL EXTENSION cl_amd_fp64:enable
#ifndef DBL_EPSILON #elif defined (cl_khr_fp64)
#define DBL_EPSILON 0x1.0p-52 #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#define CV_PI M_PI
#else #else
#define CV_PI 3.1415926535897932384626433832795f #define CV_PI M_PI_F
#ifndef DBL_EPSILON
#define DBL_EPSILON 0x1.0p-52f
#endif
#endif #endif
__kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset, __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset, __global float *src2, int src2_step, int src2_offset,
__global float *dst1, int dst1_step, int dst1_offset, //magnitude __global float *dst1, int dst1_step, int dst1_offset, // magnitude
__global float *dst2, int dst2_step, int dst2_offset, //cartToPolar __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
int rows, int cols, int angInDegree) int rows, int cols, int angInDegree)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr ...@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
float y2 = y * y; float y2 = y * y;
float magnitude = sqrt(x2 + y2); float magnitude = sqrt(x2 + y2);
float cartToPolar;
float tmp = y >= 0 ? 0 : CV_PI*2; float tmp = y >= 0 ? 0 : CV_PI*2;
tmp = x < 0 ? CV_PI : tmp; tmp = x < 0 ? CV_PI : tmp;
float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f; float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp : float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON); tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI); cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
*((__global float *)((__global char *)dst1 + dst1_index)) = magnitude; *((__global float *)((__global char *)dst1 + dst1_index)) = magnitude;
*((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar; *((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar;
...@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr ...@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step, int src2_offset,
__global double *dst1, int dst1_step, int dst1_offset, __global double *dst1, int dst1_step, int dst1_offset,
...@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s ...@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
double y2 = y * y; double y2 = y * y;
double magnitude = sqrt(x2 + y2); double magnitude = sqrt(x2 + y2);
double cartToPolar;
float tmp = y >= 0 ? 0 : CV_PI*2; float tmp = y >= 0 ? 0 : CV_PI*2;
tmp = x < 0 ? CV_PI : tmp; tmp = x < 0 ? CV_PI : tmp;
float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5; float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON) + tmp : double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp :
tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON); tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI); cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
*((__global double *)((__global char *)dst1 + dst1_index)) = magnitude; *((__global double *)((__global char *)dst1 + dst1_index)) = magnitude;
*((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar; *((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar;
} }
} }
#endif #endif
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,9 +43,13 @@ ...@@ -43,9 +43,13 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////LOG///////////////////////////////////////////////////// /////////////////////////////////////////////LOG/////////////////////////////////////////////////////
......
...@@ -43,9 +43,13 @@ ...@@ -43,9 +43,13 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
__kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset, __kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset, __global float *src2, int src2_step, int src2_offset,
......
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
/**************************************PUBLICFUNC*************************************/ /**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
......
...@@ -44,8 +44,13 @@ ...@@ -44,8 +44,13 @@
//M*/ //M*/
/**************************************PUBLICFUNC*************************************/ /**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define RES_TYPE double4 #define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4 #define CONVERT_RES_TYPE convert_double4
#else #else
......
...@@ -44,8 +44,13 @@ ...@@ -44,8 +44,13 @@
//M*/ //M*/
/**************************************PUBLICFUNC*************************************/ /**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define RES_TYPE double4 #define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4 #define CONVERT_RES_TYPE convert_double4
#else #else
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
......
...@@ -44,17 +44,17 @@ ...@@ -44,17 +44,17 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#define CV_PI M_PI #define CV_PI M_PI
#define CV_2PI (2 * CV_PI) #define CV_2PI (2 * CV_PI)
#else #else
#define CV_PI M_PI_F #define CV_PI M_PI_F
#define CV_2PI (2 * CV_PI) #define CV_2PI (2 * CV_PI)
#endif #endif
/**************************************phase inradians**************************************/ /**************************************phase inradians**************************************/
...@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, ...@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
double data1 = src1[src1_index]; double data1 = src1[src1_index];
double data2 = src2[src2_index]; double data2 = src2[src2_index];
double tmp = atan2(src2[src2_index], src1[src1_index]); double tmp = atan2(data2, data1);
tmp = 180 * tmp / CV_PI; tmp = 180 * tmp / CV_PI;
if (tmp < 0) if (tmp < 0)
......
...@@ -44,14 +44,14 @@ ...@@ -44,14 +44,14 @@
//M*/ //M*/
#ifdef DOUBLE_SUPPORT #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#define CV_PI M_PI #define CV_PI M_PI
#else #else
#define CV_PI M_PI_F #define CV_PI M_PI_F
#endif #endif
///////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -43,21 +43,22 @@ ...@@ -43,21 +43,22 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
typedef double F; #endif
typedef double4 F4; #define F double
#define convert_F4 convert_double4;
#else #else
typedef float F; #define F float
typedef float4 F4;
#define convert_F4 convert_float4;
#endif #endif
/************************************** pow **************************************/ /************************************** pow **************************************/
__kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset, __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *dst, int dst_step, int dst_offset, __global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, int rows, int cols, int dst_step1, F p)
F p)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse ...@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
*((__global float *)((__global char *)dst + dst_index)) = tmp; *((__global float *)((__global char *)dst + dst_index)) = tmp;
} }
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, int rows, int cols, int dst_step1, F p)
F p)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs ...@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data)))); double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
*((__global double *)((__global char *)dst + dst_index)) = tmp; *((__global double *)((__global char *)dst + dst_index)) = tmp;
} }
} }
#endif #endif
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
......
...@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar) ...@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
{ {
return fmax(var + learningRate * (diff * diff - var), minVar); return fmax(var + learningRate * (diff * diff - var), minVar);
} }
#else #else
#define T_FRAME uchar4 #define T_FRAME uchar4
#define T_MEAN_VAR float4 #define T_MEAN_VAR float4
#define CONVERT_TYPE convert_uchar4_sat #define CONVERT_TYPE convert_uchar4_sat
#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f) #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
inline float4 cvt(const uchar4 val) inline float4 cvt(const uchar4 val)
{ {
float4 result; float4 result;
...@@ -93,6 +96,14 @@ inline float sum(const float4 val) ...@@ -93,6 +96,14 @@ inline float sum(const float4 val)
return (val.x + val.y + val.z); return (val.x + val.y + val.z);
} }
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
{
float4 val = ptr[(k * rows + y) * ptr_step + x];
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
}
static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar) static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
{ {
float4 result; float4 result;
...@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl ...@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
result.w = 0.0f; result.w = 0.0f;
return result; return result;
} }
#endif #endif
typedef struct typedef struct
...@@ -114,7 +126,7 @@ typedef struct ...@@ -114,7 +126,7 @@ typedef struct
float c_varMax; float c_varMax;
float c_tau; float c_tau;
uchar c_shadowVal; uchar c_shadowVal;
}con_srtuct_t; } con_srtuct_t;
static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step) static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
{ {
...@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste ...@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
ptr[((k + 1) * rows + y) * ptr_step + x] = val; ptr[((k + 1) * rows + y) * ptr_step + x] = val;
} }
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
{
float4 val = ptr[(k * rows + y) * ptr_step + x];
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
}
__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask, __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
__global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var, __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
int frame_row, int frame_col, int frame_step, int fgmask_step, int frame_row, int frame_col, int frame_step, int fgmask_step,
......
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
......
...@@ -63,14 +63,6 @@ ...@@ -63,14 +63,6 @@
#define DIST_TYPE 0 #define DIST_TYPE 0
#endif #endif
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static int bit1Count(int v)
{
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
}
// dirty fix for non-template support // dirty fix for non-template support
#if (DIST_TYPE == 0) // L1Dist #if (DIST_TYPE == 0) // L1Dist
# ifdef T_FLOAT # ifdef T_FLOAT
...@@ -89,6 +81,13 @@ typedef float value_type; ...@@ -89,6 +81,13 @@ typedef float value_type;
typedef float result_type; typedef float result_type;
#define DIST_RES(x) sqrt(x) #define DIST_RES(x) sqrt(x)
#elif (DIST_TYPE == 2) // Hamming #elif (DIST_TYPE == 2) // Hamming
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static int bit1Count(int v)
{
v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
}
#define DIST(x, y) bit1Count( (x) ^ (y) ) #define DIST(x, y) bit1Count( (x) ^ (y) )
typedef int value_type; typedef int value_type;
typedef int result_type; typedef int result_type;
......
...@@ -33,12 +33,17 @@ ...@@ -33,12 +33,17 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows, __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
int dstStep_in_piexl,int pixel_end) int cols, int rows,
int dstStep_in_piexl, int pixel_end)
{ {
int id = get_global_id(0); int id = get_global_id(0);
int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2)); int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
...@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY ...@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
dst[addr.y] = outpix1; dst[addr.y] = outpix1;
} }
else if(outx.x<cols && outy.x<rows) else if(outx.x<cols && outy.x<rows)
{
dst[addr.x] = outpix0; dst[addr.x] = outpix0;
}
} }
__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows, __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
int srcStep_in_pixel,int pixel_end) int cols, int rows,
int srcStep_in_pixel, int pixel_end)
{ {
int id = get_global_id(0)<<2; int id = get_global_id(0)<<2;
int y = id / cols; int y = id / cols;
...@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY ...@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
dst[outaddr.y] = outpixel1; dst[outaddr.y] = outpixel1;
} }
else if(outaddr.x <= pixel_end) else if(outaddr.x <= pixel_end)
{
dst[outaddr.x] = pixel0; dst[outaddr.x] = pixel0;
}
} }
...@@ -146,7 +146,11 @@ ...@@ -146,7 +146,11 @@
#endif #endif
#if USE_DOUBLE #if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double #define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE) #define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else #else
......
...@@ -143,7 +143,11 @@ ...@@ -143,7 +143,11 @@
#endif #endif
#if USE_DOUBLE #if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double #define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE) #define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else #else
......
...@@ -45,8 +45,6 @@ ...@@ -45,8 +45,6 @@
// //
//M*/ //M*/
// Enter your kernel in this window
//#pragma OPENCL EXTENSION cl_amd_printf:enable
#define CV_HAAR_FEATURE_MAX 3 #define CV_HAAR_FEATURE_MAX 3
typedef int sumtype; typedef int sumtype;
typedef float sqsumtype; typedef float sqsumtype;
...@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH ...@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
int counter = get_global_id(0); int counter = get_global_id(0);
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0; int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
GpuHidHaarTreeNode t1 = *(orinode + counter); GpuHidHaarTreeNode t1 = *(orinode + counter);
#pragma unroll
#pragma unroll
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f); tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
...@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH ...@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]); t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
counter += nodenum; counter += nodenum;
#pragma unroll
#pragma unroll
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
newnode[counter].p[i][0] = tr_x[i]; newnode[counter].p[i][0] = tr_x[i];
......
...@@ -43,11 +43,13 @@ ...@@ -43,11 +43,13 @@
// //
//M*/ //M*/
#if defined (__ATI__) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__) #elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
/************************************** convolve **************************************/ /************************************** convolve **************************************/
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
......
...@@ -43,13 +43,14 @@ ...@@ -43,13 +43,14 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
#define LSIZE 256 #define LSIZE 256
#define LSIZE_1 255 #define LSIZE_1 255
#define LSIZE_2 254 #define LSIZE_2 254
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -43,11 +43,11 @@ ...@@ -43,11 +43,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -48,8 +48,12 @@ ...@@ -48,8 +48,12 @@
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported. // Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
// We shall support other types later if necessary. // We shall support other types later if necessary.
#if defined DOUBLE_SUPPORT #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define F double #define F double
#else #else
#define F float #define F float
......
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64) #elif defined (cl_khr_fp64)
......
...@@ -47,11 +47,11 @@ ...@@ -47,11 +47,11 @@
//warpAffine kernel //warpAffine kernel
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic. //support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
typedef double F; typedef double F;
typedef double4 F4; typedef double4 F4;
......
...@@ -47,11 +47,11 @@ ...@@ -47,11 +47,11 @@
//wrapPerspective kernel //wrapPerspective kernel
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic. //support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
typedef double F; typedef double F;
typedef double4 F4; typedef double4 F4;
......
...@@ -61,35 +61,6 @@ ...@@ -61,35 +61,6 @@
#define my_comp(x,y) ((x) < (y)) #define my_comp(x,y) ((x) < (y))
#endif #endif
///////////// parallel merge sort ///////////////
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
{
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
uint firstIndex = left;
uint lastIndex = right;
// This loops through [firstIndex, lastIndex)
// Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
// this while loop will be divergent within a wavefront
while( firstIndex < lastIndex )
{
K_T dataVal = data[ firstIndex ];
// This branch will create divergent wavefronts
if( my_comp( dataVal, searchVal ) )
{
firstIndex = firstIndex+1;
}
else
{
break;
}
}
return firstIndex;
}
// This implements a binary search routine to look for an 'insertion point' in a sequence, denoted // This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
// by a base pointer and left and right index for a particular candidate value. The comparison operator is // by a base pointer and left and right index for a particular candidate value. The comparison operator is
// passed as a functor parameter my_comp // passed as a functor parameter my_comp
......
...@@ -42,8 +42,13 @@ ...@@ -42,8 +42,13 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT)
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define TYPE double #define TYPE double
#else #else
#define TYPE float #define TYPE float
......
...@@ -43,14 +43,12 @@ ...@@ -43,14 +43,12 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#define TYPE_IMAGE_SQSUM double #define TYPE_IMAGE_SQSUM double
#else #else
#define TYPE_IMAGE_SQSUM float #define TYPE_IMAGE_SQSUM float
......
...@@ -43,15 +43,19 @@ ...@@ -43,15 +43,19 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////optimized code using vector roi////////////////////////// //////////////////////////////////optimized code using vector roi//////////////////////////
////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)////// ////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void merge_vector_C2_D0(__global uchar *mat_dst, int dst_step, int dst_offset, __kernel void merge_vector_C2_D0(__global uchar *mat_dst, int dst_step, int dst_offset,
__global uchar *mat_src0, int src0_step, int src0_offset, __global uchar *mat_src0, int src0_step, int src0_offset,
__global uchar *mat_src1, int src1_step, int src1_offset, __global uchar *mat_src1, int src1_step, int src1_offset,
......
...@@ -44,11 +44,11 @@ ...@@ -44,11 +44,11 @@
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
typedef double T; typedef double T;
#else #else
......
...@@ -35,8 +35,12 @@ ...@@ -35,8 +35,12 @@
// //
#ifdef DOUBLE_SUPPORT #ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
__kernel void convert_to( __kernel void convert_to(
__global const srcT* restrict srcMat, __global const srcT* restrict srcMat,
......
...@@ -34,11 +34,11 @@ ...@@ -34,11 +34,11 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -34,11 +34,11 @@ ...@@ -34,11 +34,11 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -34,11 +34,11 @@ ...@@ -34,11 +34,11 @@
// //
// //
#if defined (DOUBLE_SUPPORT) #ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64 #ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
......
...@@ -45,8 +45,6 @@ ...@@ -45,8 +45,6 @@
// //
//M*/ //M*/
//#pragma OPENCL EXTENSION cl_amd_printf : enable
#define BUFFER 64 #define BUFFER 64
#define BUFFER2 BUFFER>>1 #define BUFFER2 BUFFER>>1
#ifndef WAVE_SIZE #ifndef WAVE_SIZE
......
...@@ -38,9 +38,14 @@ ...@@ -38,9 +38,14 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
#if defined (DOUBLE_SUPPORT)
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif
#if DATA_DEPTH == 0 #if DATA_DEPTH == 0
#define BASE_TYPE uchar #define BASE_TYPE uchar
......
...@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz) ...@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
{ {
unsigned int cache = cols[0]; unsigned int cache = cols[0];
#pragma unroll
for(int i = 1; i <= winsz; i++) for(int i = 1; i <= winsz; i++)
cache += cols[i]; cache += cols[i];
......
...@@ -45,13 +45,11 @@ ...@@ -45,13 +45,11 @@
//M*/ //M*/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_amd_fp64
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#endif #endif
#ifdef T_FLOAT #ifdef T_FLOAT
......
...@@ -44,19 +44,10 @@ ...@@ -44,19 +44,10 @@
// //
//M*/ //M*/
#ifndef FLT_MAX
#define FLT_MAX CL_FLT_MAX
#endif
#ifndef SHRT_MAX
#define SHRT_MAX CL_SHORT_MAX
#endif
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global////////////////////////////// ////////////////////////////////////////get_first_k_initial_global//////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////
__kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr, __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
__global short *ctemp, int h, int w, int nr_plane, __global short *ctemp, int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp) int cmsg_step1, int cdisp_step1, int cndisp)
...@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, ...@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
} }
} }
} }
__kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, __global float *selected_disp_pyr, __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
__global float *ctemp, int h, int w, int nr_plane, __global float *ctemp, int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp) int cmsg_step1, int cdisp_step1, int cndisp)
...@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, ...@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local//////////////////////////////////// ///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr, __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
__global short *ctemp,int h, int w, int nr_plane, __global short *ctemp,int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp) int cmsg_step1, int cdisp_step1, int cndisp)
...@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _ ...@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
/////////////////////// init data cost //////////////////////// /////////////////////// init data cost ////////////////////////
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
inline float compute_3(__global uchar* left, __global uchar* right, inline float compute_3(__global uchar* left, __global uchar* right,
float cdata_weight, float cmax_data_term) float cdata_weight, float cmax_data_term)
{ {
...@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right, ...@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term); return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
} }
inline float compute_1(__global uchar* left, __global uchar* right, inline float compute_1(__global uchar* left, __global uchar* right,
float cdata_weight, float cmax_data_term) float cdata_weight, float cmax_data_term)
{ {
...@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g ...@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
} }
} }
} }
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright, __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels, int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1, int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
...@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g ...@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
} }
} }
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce////////////////////////////////////////////////// //////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright, __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels, __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth, int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
...@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle ...@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
////////////////////// compute data cost ////////////////////// ////////////////////// compute data cost //////////////////////
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_, __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels, int h, int w, int level, int nr_plane, int channels,
...@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo ...@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
} }
} }
} }
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_, __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels, int h, int w, int level, int nr_plane, int channels,
...@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo ...@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
} }
} }
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce////////////////////////////////////////// ////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_, __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem, __global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane, int level, int rows, int cols, int h, int nr_plane,
...@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short ...@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
} }
} }
static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
__global float *r_new, __global const float *u_cur, __global const float *d_cur,
__global const float *l_cur, __global const float *r_cur,
__global float *data_cost_selected, __global float *disparity_selected_new,
__global float *data_cost_new, __global const float *data_cost_cur,
__global const float *disparity_selected_cur,
int nr_plane, int nr_plane2,
int cdisp_step1, int cdisp_step2)
{
for(int i = 0; i < nr_plane; i++)
{
float minimum = FLT_MAX;
int id = 0;
for(int j = 0; j < nr_plane2; j++)
{
float cur = data_cost_new[j * cdisp_step1];
if(cur < minimum)
{
minimum = cur;
id = j;
}
}
data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
data_cost_new[id * cdisp_step1] = FLT_MAX;
}
}
__kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_, __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_, __global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
__global const short *l_cur_, __global const short *r_cur_, __global short *ctemp, __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
...@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g ...@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
cdisp_step1, cdisp_step2); cdisp_step1, cdisp_step2);
} }
} }
__kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_, __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_, __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
__global const float *l_cur_, __global const float *r_cur_, __global float *ctemp, __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
......
...@@ -33,11 +33,12 @@ ...@@ -33,11 +33,12 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
// //
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64 #ifdef DOUBLE_SUPPORT
#pragma OPENCL EXTENSION cl_khr_fp64:enable #ifdef cl_amd_fp64
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable #pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif #endif
#define TYPE double #define TYPE double
#else #else
...@@ -53,7 +54,6 @@ ...@@ -53,7 +54,6 @@
#else #else
#define POW(X,Y) X #define POW(X,Y) X
#endif #endif
#define FLT_MAX 3.402823466e+38F
#define MAX_VAL (FLT_MAX*1e-3) #define MAX_VAL (FLT_MAX*1e-3)
__kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols, __kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment