Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
96121a66
Commit
96121a66
authored
Nov 11, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kernel warnings on AMD
parent
a81efdbb
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
57 changed files
with
265 additions
and
286 deletions
+265
-286
arithm_LUT.cl
modules/ocl/src/opencl/arithm_LUT.cl
+5
-1
arithm_absdiff_nonsaturate.cl
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+4
-4
arithm_add.cl
modules/ocl/src/opencl/arithm_add.cl
+4
-4
arithm_addWeighted.cl
modules/ocl/src/opencl/arithm_addWeighted.cl
+4
-4
arithm_add_mask.cl
modules/ocl/src/opencl/arithm_add_mask.cl
+4
-4
arithm_add_scalar.cl
modules/ocl/src/opencl/arithm_add_scalar.cl
+4
-4
arithm_add_scalar_mask.cl
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+4
-4
arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+0
-8
arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+4
-4
arithm_cartToPolar.cl
modules/ocl/src/opencl/arithm_cartToPolar.cl
+18
-21
arithm_compare.cl
modules/ocl/src/opencl/arithm_compare.cl
+4
-4
arithm_exp.cl
modules/ocl/src/opencl/arithm_exp.cl
+4
-4
arithm_flip.cl
modules/ocl/src/opencl/arithm_flip.cl
+4
-4
arithm_log.cl
modules/ocl/src/opencl/arithm_log.cl
+5
-1
arithm_magnitude.cl
modules/ocl/src/opencl/arithm_magnitude.cl
+5
-1
arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+1
-1
arithm_minMaxLoc.cl
modules/ocl/src/opencl/arithm_minMaxLoc.cl
+6
-1
arithm_minMaxLoc_mask.cl
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+6
-1
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+1
-1
arithm_phase.cl
modules/ocl/src/opencl/arithm_phase.cl
+11
-11
arithm_polarToCart.cl
modules/ocl/src/opencl/arithm_polarToCart.cl
+7
-7
arithm_pow.cl
modules/ocl/src/opencl/arithm_pow.cl
+13
-13
arithm_setidentity.cl
modules/ocl/src/opencl/arithm_setidentity.cl
+4
-4
arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+4
-4
arithm_transpose.cl
modules/ocl/src/opencl/arithm_transpose.cl
+1
-1
bgfg_mog.cl
modules/ocl/src/opencl/bgfg_mog.cl
+13
-8
blend_linear.cl
modules/ocl/src/opencl/blend_linear.cl
+1
-1
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+7
-8
convertC3C4.cl
modules/ocl/src/opencl/convertC3C4.cl
+11
-9
filtering_boxFilter.cl
modules/ocl/src/opencl/filtering_boxFilter.cl
+4
-0
filtering_filter2D.cl
modules/ocl/src/opencl/filtering_filter2D.cl
+4
-0
haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+2
-4
imgproc_convolve.cl
modules/ocl/src/opencl/imgproc_convolve.cl
+4
-2
imgproc_copymakeboder.cl
modules/ocl/src/opencl/imgproc_copymakeboder.cl
+1
-1
imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+5
-4
imgproc_integral_sum.cl
modules/ocl/src/opencl/imgproc_integral_sum.cl
+4
-4
imgproc_remap.cl
modules/ocl/src/opencl/imgproc_remap.cl
+4
-4
imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+5
-1
imgproc_threshold.cl
modules/ocl/src/opencl/imgproc_threshold.cl
+1
-1
imgproc_warpAffine.cl
modules/ocl/src/opencl/imgproc_warpAffine.cl
+4
-4
imgproc_warpPerspective.cl
modules/ocl/src/opencl/imgproc_warpPerspective.cl
+4
-4
kernel_stablesort_by_key.cl
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+0
-29
knearest.cl
modules/ocl/src/opencl/knearest.cl
+6
-1
match_template.cl
modules/ocl/src/opencl/match_template.cl
+4
-6
merge_mat.cl
modules/ocl/src/opencl/merge_mat.cl
+6
-2
moments.cl
modules/ocl/src/opencl/moments.cl
+4
-4
operator_convertTo.cl
modules/ocl/src/opencl/operator_convertTo.cl
+4
-0
operator_copyToM.cl
modules/ocl/src/opencl/operator_copyToM.cl
+4
-4
operator_setTo.cl
modules/ocl/src/opencl/operator_setTo.cl
+4
-4
operator_setToM.cl
modules/ocl/src/opencl/operator_setToM.cl
+4
-4
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+0
-2
split_mat.cl
modules/ocl/src/opencl/split_mat.cl
+6
-1
stereobm.cl
modules/ocl/src/opencl/stereobm.cl
+0
-1
stereobp.cl
modules/ocl/src/opencl/stereobp.cl
+3
-5
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+13
-45
svm.cl
modules/ocl/src/opencl/svm.cl
+6
-7
tvl1flow.cl
modules/ocl/src/opencl/tvl1flow.cl
+0
-0
No files found.
modules/ocl/src/opencl/arithm_LUT.cl
View file @
96121a66
...
...
@@ -34,9 +34,13 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
LUT_C1
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__global
dstT
*dst,
...
...
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
View file @
96121a66
...
...
@@ -44,11 +44,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add.cl
View file @
96121a66
...
...
@@ -44,11 +44,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_addWeighted.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_mask.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_scalar.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
View file @
96121a66
...
...
@@ -43,14 +43,6 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_bitwise_not.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_cartToPolar.cl
View file @
96121a66
...
...
@@ -43,24 +43,21 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
define
CV_PI
3.1415926535897932384626433832795
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52
#
endif
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
else
#
define
CV_PI
3.1415926535897932384626433832795f
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52f
#
endif
#
define
CV_PI
M_PI_F
#
endif
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//magnitude
__global
float
*dst2,
int
dst2_step,
int
dst2_offset,
//cartToPolar
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//
magnitude
__global
float
*dst2,
int
dst2_step,
int
dst2_offset,
//
cartToPolar
int
rows,
int
cols,
int
angInDegree
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
float
y2
=
y
*
y
;
float
magnitude
=
sqrt
(
x2
+
y2
)
;
float
cartToPolar
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5f
:
CV_PI*1.5f
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL
_EPSILON
)
;
float
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
FLT_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
FLT
_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
180/CV_PI
)
;
*
((
__global
float
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
float
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
...
...
@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_cartToPolar_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__global
double
*src2,
int
src2_step,
int
src2_offset,
__global
double
*dst1,
int
dst1_step,
int
dst1_offset,
...
...
@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
double
y2
=
y
*
y
;
double
magnitude
=
sqrt
(
x2
+
y2
)
;
double
cartToPolar
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5
:
CV_PI*1.5
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
(
float
)
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
(
float
)
DBL_EPSILON
)
;
double
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
180/CV_PI
)
;
*
((
__global
double
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
double
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
}
}
#
endif
modules/ocl/src/opencl/arithm_compare.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_exp.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_flip.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_log.cl
View file @
96121a66
...
...
@@ -43,9 +43,13 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_magnitude.cl
View file @
96121a66
...
...
@@ -43,9 +43,13 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
arithm_magnitude_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
...
...
modules/ocl/src/opencl/arithm_minMax.cl
View file @
96121a66
...
...
@@ -45,7 +45,7 @@
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/arithm_minMaxLoc.cl
View file @
96121a66
...
...
@@ -44,8 +44,13 @@
//M*/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
RES_TYPE
double4
#
define
CONVERT_RES_TYPE
convert_double4
#
else
...
...
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
View file @
96121a66
...
...
@@ -44,8 +44,13 @@
//M*/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
RES_TYPE
double4
#
define
CONVERT_RES_TYPE
convert_double4
#
else
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
96121a66
...
...
@@ -42,7 +42,7 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/arithm_phase.cl
View file @
96121a66
...
...
@@ -44,17 +44,17 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
define
CV_2PI
(
2
*
CV_PI
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
define
CV_2PI
(
2
*
CV_PI
)
#
else
#
define
CV_PI
M_PI_F
#
define
CV_2PI
(
2
*
CV_PI
)
#
define
CV_PI
M_PI_F
#
define
CV_2PI
(
2
*
CV_PI
)
#
endif
/**************************************phase
inradians**************************************/
...
...
@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
double
data1
=
src1[src1_index]
;
double
data2
=
src2[src2_index]
;
double
tmp
=
atan2
(
src2[src2_index],
src1[src1_index]
)
;
double
tmp
=
atan2
(
data2,
data1
)
;
tmp
=
180
*
tmp
/
CV_PI
;
if
(
tmp
<
0
)
...
...
modules/ocl/src/opencl/arithm_polarToCart.cl
View file @
96121a66
...
...
@@ -44,14 +44,14 @@
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
else
#
define
CV_PI
M_PI_F
#
define
CV_PI
M_PI_F
#
endif
/////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_pow.cl
View file @
96121a66
...
...
@@ -43,21 +43,22 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
typedef
double4
F4
;
#
define
convert_F4
convert_double4
;
#
endif
#
define
F
double
#
else
typedef
float
F
;
typedef
float4
F4
;
#
define
convert_F4
convert_float4
;
#
define
F
float
#
endif
/**************************************
pow
**************************************
/
__kernel
void
arithm_pow_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
F
p
)
int
rows,
int
cols,
int
dst_step1,
F
p
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
*
((
__global
float
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_pow_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__global
double
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
F
p
)
int
rows,
int
cols,
int
dst_step1,
F
p
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
double
tmp
=
src1_data
>
0
?
exp
(
p
*
log
(
src1_data
))
:
(
src1_data
==
0
?
0
:
exp
(
p
*
log
(
fabs
(
src1_data
))))
;
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
#
endif
modules/ocl/src/opencl/arithm_setidentity.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_sum.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_transpose.cl
View file @
96121a66
...
...
@@ -43,7 +43,7 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/bgfg_mog.cl
View file @
96121a66
...
...
@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
{
return
fmax
(
var
+
learningRate
*
(
diff
*
diff
-
var
)
,
minVar
)
;
}
#
else
#
define
T_FRAME
uchar4
#
define
T_MEAN_VAR
float4
#
define
CONVERT_TYPE
convert_uchar4_sat
#
define
F_ZERO
(
0.0f,
0.0f,
0.0f,
0.0f
)
inline
float4
cvt
(
const
uchar4
val
)
{
float4
result
;
...
...
@@ -93,6 +96,14 @@ inline float sum(const float4 val)
return
(
val.x
+
val.y
+
val.z
)
;
}
static
void
swap4
(
__global
float4*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
float4
val
=
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
=
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
static
float4
clamp1
(
const
float4
var,
float
learningRate,
const
float4
diff,
float
minVar
)
{
float4
result
;
...
...
@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
result.w
=
0.0f
;
return
result
;
}
#
endif
typedef
struct
...
...
@@ -114,7 +126,7 @@ typedef struct
float
c_varMax
;
float
c_tau
;
uchar
c_shadowVal
;
}con_srtuct_t
;
}
con_srtuct_t
;
static
void
swap
(
__global
float*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
...
...
@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
static
void
swap4
(
__global
float4*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
float4
val
=
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
=
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
__kernel
void
mog_withoutLearning_kernel
(
__global
T_FRAME*
frame,
__global
uchar*
fgmask,
__global
float*
weight,
__global
T_MEAN_VAR*
mean,
__global
T_MEAN_VAR*
var,
int
frame_row,
int
frame_col,
int
frame_step,
int
fgmask_step,
...
...
modules/ocl/src/opencl/blend_linear.cl
View file @
96121a66
...
...
@@ -43,7 +43,7 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
96121a66
...
...
@@ -63,14 +63,6 @@
#
define
DIST_TYPE
0
#
endif
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
//
dirty
fix
for
non-template
support
#
if
(
DIST_TYPE
==
0
)
//
L1Dist
#
ifdef
T_FLOAT
...
...
@@ -89,6 +81,13 @@ typedef float value_type;
typedef
float
result_type
;
#
define
DIST_RES
(
x
)
sqrt
(
x
)
#
elif
(
DIST_TYPE
==
2
)
//
Hamming
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
#
define
DIST
(
x,
y
)
bit1Count
(
(
x
)
^
(
y
)
)
typedef
int
value_type
;
typedef
int
result_type
;
...
...
modules/ocl/src/opencl/convertC3C4.cl
View file @
96121a66
...
...
@@ -33,12 +33,17 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
dstStep_in_piexl,int
pixel_end
)
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
dstStep_in_piexl,
int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
...
...
@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
dst[addr.y]
=
outpix1
;
}
else
if
(
outx.x<cols
&&
outy.x<rows
)
{
dst[addr.x]
=
outpix0
;
}
}
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
srcStep_in_pixel,int
pixel_end
)
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
srcStep_in_pixel,
int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
<<2
;
int
y
=
id
/
cols
;
...
...
@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
dst[outaddr.y]
=
outpixel1
;
}
else
if
(
outaddr.x
<=
pixel_end
)
{
dst[outaddr.x]
=
pixel0
;
}
}
modules/ocl/src/opencl/filtering_boxFilter.cl
View file @
96121a66
...
...
@@ -146,7 +146,11 @@
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
...
...
modules/ocl/src/opencl/filtering_filter2D.cl
View file @
96121a66
...
...
@@ -143,7 +143,11 @@
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
...
...
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
View file @
96121a66
...
...
@@ -45,8 +45,6 @@
//
//M*/
//
Enter
your
kernel
in
this
window
//#pragma
OPENCL
EXTENSION
cl_amd_printf:enable
#
define
CV_HAAR_FEATURE_MAX
3
typedef
int
sumtype
;
typedef
float
sqsumtype
;
...
...
@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
int
counter
=
get_global_id
(
0
)
;
int
tr_x[3],
tr_y[3],
tr_h[3],
tr_w[3],
i
=
0
;
GpuHidHaarTreeNode
t1
=
*
(
orinode
+
counter
)
;
#
pragma
unroll
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
{
tr_x[i]
=
(
int
)(
t1.p[i][0]
*
scale
+
0.5f
)
;
...
...
@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
t1.weight[0]
=
-
(
t1.weight[1]
*
tr_h[1]
*
tr_w[1]
+
t1.weight[2]
*
tr_h[2]
*
tr_w[2]
)
/
(
tr_h[0]
*
tr_w[0]
)
;
counter
+=
nodenum
;
#
pragma
unroll
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
{
newnode[counter].p[i][0]
=
tr_x[i]
;
...
...
modules/ocl/src/opencl/imgproc_convolve.cl
View file @
96121a66
...
...
@@ -43,11 +43,13 @@
//
//M*/
#
if
defined
(
__ATI__
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
/**************************************
convolve
**************************************
/
...
...
modules/ocl/src/opencl/imgproc_copymakeboder.cl
View file @
96121a66
...
...
@@ -34,7 +34,7 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/imgproc_integral.cl
View file @
96121a66
...
...
@@ -43,13 +43,14 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
LSIZE
256
#
define
LSIZE_1
255
#
define
LSIZE_2
254
...
...
modules/ocl/src/opencl/imgproc_integral_sum.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/imgproc_remap.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/imgproc_resize.cl
View file @
96121a66
...
...
@@ -48,8 +48,12 @@
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4are
supported.
//
We
shall
support
other
types
later
if
necessary.
#
if
defined
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
F
double
#
else
#
define
F
float
...
...
modules/ocl/src/opencl/imgproc_threshold.cl
View file @
96121a66
...
...
@@ -43,7 +43,7 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/imgproc_warpAffine.cl
View file @
96121a66
...
...
@@ -47,11 +47,11 @@
//warpAffine
kernel
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
typedef
double
F
;
typedef
double4
F4
;
...
...
modules/ocl/src/opencl/imgproc_warpPerspective.cl
View file @
96121a66
...
...
@@ -47,11 +47,11 @@
//wrapPerspective
kernel
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
typedef
double
F
;
typedef
double4
F4
;
...
...
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
View file @
96121a66
...
...
@@ -61,35 +61,6 @@
#
define
my_comp
(
x,y
)
((
x
)
<
(
y
))
#
endif
///////////// parallel merge sort ///////////////
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
{
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
uint firstIndex = left;
uint lastIndex = right;
// This loops through [firstIndex, lastIndex)
// Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
// this while loop will be divergent within a wavefront
while( firstIndex < lastIndex )
{
K_T dataVal = data[ firstIndex ];
// This branch will create divergent wavefronts
if( my_comp( dataVal, searchVal ) )
{
firstIndex = firstIndex+1;
}
else
{
break;
}
}
return firstIndex;
}
//
This
implements
a
binary
search
routine
to
look
for
an
'insertion
point
'
in
a
sequence,
denoted
//
by
a
base
pointer
and
left
and
right
index
for
a
particular
candidate
value.
The
comparison
operator
is
//
passed
as
a
functor
parameter
my_comp
...
...
modules/ocl/src/opencl/knearest.cl
View file @
96121a66
...
...
@@ -42,8 +42,13 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE
double
#
else
#
define
TYPE
float
...
...
modules/ocl/src/opencl/match_template.cl
View file @
96121a66
...
...
@@ -43,14 +43,12 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE_IMAGE_SQSUM
double
#
else
#
define
TYPE_IMAGE_SQSUM
float
...
...
modules/ocl/src/opencl/merge_mat.cl
View file @
96121a66
...
...
@@ -43,15 +43,19 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////optimized
code
using
vector
roi//////////////////////////
////////////vector
fuction
name
format:
merge_vector_C
(
channels
number
)
D_
(
data
type
depth
)
//////
////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
merge_vector_C2_D0
(
__global
uchar
*mat_dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mat_src0,
int
src0_step,
int
src0_offset,
__global
uchar
*mat_src1,
int
src1_step,
int
src1_offset,
...
...
modules/ocl/src/opencl/moments.cl
View file @
96121a66
...
...
@@ -44,11 +44,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
typedef
double
T
;
#
else
...
...
modules/ocl/src/opencl/operator_convertTo.cl
View file @
96121a66
...
...
@@ -35,8 +35,12 @@
//
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
convert_to
(
__global
const
srcT*
restrict
srcMat,
...
...
modules/ocl/src/opencl/operator_copyToM.cl
View file @
96121a66
...
...
@@ -34,11 +34,11 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/operator_setTo.cl
View file @
96121a66
...
...
@@ -34,11 +34,11 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/operator_setToM.cl
View file @
96121a66
...
...
@@ -34,11 +34,11 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
96121a66
...
...
@@ -45,8 +45,6 @@
//
//M*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
BUFFER
64
#
define
BUFFER2
BUFFER>>1
#
ifndef
WAVE_SIZE
...
...
modules/ocl/src/opencl/split_mat.cl
View file @
96121a66
...
...
@@ -38,9 +38,14 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
if
DATA_DEPTH
==
0
#
define
BASE_TYPE
uchar
...
...
modules/ocl/src/opencl/stereobm.cl
View file @
96121a66
...
...
@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
{
unsigned
int
cache
=
cols[0]
;
#
pragma
unroll
for
(
int
i
=
1
; i <= winsz; i++)
cache
+=
cols[i]
;
...
...
modules/ocl/src/opencl/stereobp.cl
View file @
96121a66
...
...
@@ -45,13 +45,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
ifdef
T_FLOAT
...
...
modules/ocl/src/opencl/stereocsbp.cl
View file @
96121a66
...
...
@@ -44,19 +44,10 @@
//
//M*/
#
ifndef
FLT_MAX
#
define
FLT_MAX
CL_FLT_MAX
#
endif
#
ifndef
SHRT_MAX
#
define
SHRT_MAX
CL_SHORT_MAX
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
...
@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
}
}
}
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
...
@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
...
@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
///////////////////////////////////////////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////////////////////////////////////////////
inline
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
{
...
...
@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
}
inline
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
{
...
...
@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
}
}
}
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
...
...
@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
...
...
@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
...
...
@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
}
}
}
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
...
...
@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
...
...
@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
}
}
static
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_new,
__global
const
float
*data_cost_cur,
__global
const
float
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
{
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost_cur[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
...
...
@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
cdisp_step1,
cdisp_step2
)
;
}
}
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
...
...
modules/ocl/src/opencl/svm.cl
View file @
96121a66
...
...
@@ -33,11 +33,12 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE
double
#
else
...
...
@@ -53,7 +54,6 @@
#
else
#
define
POW
(
X,Y
)
X
#
endif
#
define
FLT_MAX
3.402823466e+38F
#
define
MAX_VAL
(
FLT_MAX*1e-3
)
__kernel
void
svm_linear
(
__global
float*
src,
int
src_step,
__global
float*
src2,
int
src2_step,
__global
TYPE*
dst,
int
dst_step,
int
src_rows,
int
src2_cols,
...
...
@@ -206,4 +206,4 @@ __kernel void svm_rbf(__global float* src, int src_step, __global float* src2, i
dst[row
*
dst_step
+
col]
=
temp1
;
}
}
}
\ No newline at end of file
}
modules/ocl/src/opencl/tvl1flow.cl
View file @
96121a66
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment