Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
96121a66
Commit
96121a66
authored
Nov 11, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kernel warnings on AMD
parent
a81efdbb
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
57 changed files
with
264 additions
and
284 deletions
+264
-284
arithm_LUT.cl
modules/ocl/src/opencl/arithm_LUT.cl
+5
-1
arithm_absdiff_nonsaturate.cl
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+4
-4
arithm_add.cl
modules/ocl/src/opencl/arithm_add.cl
+4
-4
arithm_addWeighted.cl
modules/ocl/src/opencl/arithm_addWeighted.cl
+4
-4
arithm_add_mask.cl
modules/ocl/src/opencl/arithm_add_mask.cl
+4
-4
arithm_add_scalar.cl
modules/ocl/src/opencl/arithm_add_scalar.cl
+4
-4
arithm_add_scalar_mask.cl
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+4
-4
arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+0
-8
arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+4
-4
arithm_cartToPolar.cl
modules/ocl/src/opencl/arithm_cartToPolar.cl
+18
-21
arithm_compare.cl
modules/ocl/src/opencl/arithm_compare.cl
+4
-4
arithm_exp.cl
modules/ocl/src/opencl/arithm_exp.cl
+4
-4
arithm_flip.cl
modules/ocl/src/opencl/arithm_flip.cl
+4
-4
arithm_log.cl
modules/ocl/src/opencl/arithm_log.cl
+5
-1
arithm_magnitude.cl
modules/ocl/src/opencl/arithm_magnitude.cl
+5
-1
arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+1
-1
arithm_minMaxLoc.cl
modules/ocl/src/opencl/arithm_minMaxLoc.cl
+6
-1
arithm_minMaxLoc_mask.cl
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+6
-1
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+1
-1
arithm_phase.cl
modules/ocl/src/opencl/arithm_phase.cl
+11
-11
arithm_polarToCart.cl
modules/ocl/src/opencl/arithm_polarToCart.cl
+7
-7
arithm_pow.cl
modules/ocl/src/opencl/arithm_pow.cl
+13
-13
arithm_setidentity.cl
modules/ocl/src/opencl/arithm_setidentity.cl
+4
-4
arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+4
-4
arithm_transpose.cl
modules/ocl/src/opencl/arithm_transpose.cl
+1
-1
bgfg_mog.cl
modules/ocl/src/opencl/bgfg_mog.cl
+13
-8
blend_linear.cl
modules/ocl/src/opencl/blend_linear.cl
+1
-1
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+7
-8
convertC3C4.cl
modules/ocl/src/opencl/convertC3C4.cl
+11
-9
filtering_boxFilter.cl
modules/ocl/src/opencl/filtering_boxFilter.cl
+4
-0
filtering_filter2D.cl
modules/ocl/src/opencl/filtering_filter2D.cl
+4
-0
haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+2
-4
imgproc_convolve.cl
modules/ocl/src/opencl/imgproc_convolve.cl
+4
-2
imgproc_copymakeboder.cl
modules/ocl/src/opencl/imgproc_copymakeboder.cl
+1
-1
imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+5
-4
imgproc_integral_sum.cl
modules/ocl/src/opencl/imgproc_integral_sum.cl
+4
-4
imgproc_remap.cl
modules/ocl/src/opencl/imgproc_remap.cl
+4
-4
imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+5
-1
imgproc_threshold.cl
modules/ocl/src/opencl/imgproc_threshold.cl
+1
-1
imgproc_warpAffine.cl
modules/ocl/src/opencl/imgproc_warpAffine.cl
+4
-4
imgproc_warpPerspective.cl
modules/ocl/src/opencl/imgproc_warpPerspective.cl
+4
-4
kernel_stablesort_by_key.cl
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+0
-29
knearest.cl
modules/ocl/src/opencl/knearest.cl
+6
-1
match_template.cl
modules/ocl/src/opencl/match_template.cl
+4
-6
merge_mat.cl
modules/ocl/src/opencl/merge_mat.cl
+6
-2
moments.cl
modules/ocl/src/opencl/moments.cl
+4
-4
operator_convertTo.cl
modules/ocl/src/opencl/operator_convertTo.cl
+4
-0
operator_copyToM.cl
modules/ocl/src/opencl/operator_copyToM.cl
+4
-4
operator_setTo.cl
modules/ocl/src/opencl/operator_setTo.cl
+4
-4
operator_setToM.cl
modules/ocl/src/opencl/operator_setToM.cl
+4
-4
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+0
-2
split_mat.cl
modules/ocl/src/opencl/split_mat.cl
+6
-1
stereobm.cl
modules/ocl/src/opencl/stereobm.cl
+0
-1
stereobp.cl
modules/ocl/src/opencl/stereobp.cl
+3
-5
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+13
-45
svm.cl
modules/ocl/src/opencl/svm.cl
+5
-5
tvl1flow.cl
modules/ocl/src/opencl/tvl1flow.cl
+0
-0
No files found.
modules/ocl/src/opencl/arithm_LUT.cl
View file @
96121a66
...
@@ -34,9 +34,13 @@
...
@@ -34,9 +34,13 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
__kernel
void
LUT_C1
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__kernel
void
LUT_C1
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__global
dstT
*dst,
__global
dstT
*dst,
...
...
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
View file @
96121a66
...
@@ -44,11 +44,11 @@
...
@@ -44,11 +44,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add.cl
View file @
96121a66
...
@@ -44,11 +44,11 @@
...
@@ -44,11 +44,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_addWeighted.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_mask.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_scalar.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
View file @
96121a66
...
@@ -43,14 +43,6 @@
...
@@ -43,14 +43,6 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_bitwise_not.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_cartToPolar.cl
View file @
96121a66
...
@@ -43,24 +43,21 @@
...
@@ -43,24 +43,21 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
ifdef
cl_amd_fp64
#
define
CV_PI
3.1415926535897932384626433832795
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
ifndef
DBL_EPSILON
#
elif
defined
(
cl_khr_fp64
)
#
define
DBL_EPSILON
0x1.0p-52
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
CV_PI
M_PI
#
else
#
else
#
define
CV_PI
3.1415926535897932384626433832795f
#
define
CV_PI
M_PI_F
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52f
#
endif
#
endif
#
endif
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//magnitude
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//
magnitude
__global
float
*dst2,
int
dst2_step,
int
dst2_offset,
//cartToPolar
__global
float
*dst2,
int
dst2_step,
int
dst2_offset,
//
cartToPolar
int
rows,
int
cols,
int
angInDegree
)
int
rows,
int
cols,
int
angInDegree
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
...
@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
float
y2
=
y
*
y
;
float
y2
=
y
*
y
;
float
magnitude
=
sqrt
(
x2
+
y2
)
;
float
magnitude
=
sqrt
(
x2
+
y2
)
;
float
cartToPolar
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5f
:
CV_PI*1.5f
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5f
:
CV_PI*1.5f
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
float
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
FLT_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL
_EPSILON
)
;
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
FLT
_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
180/CV_PI
)
;
*
((
__global
float
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
float
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
float
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
*
((
__global
float
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
...
@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
...
@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_cartToPolar_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_cartToPolar_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__global
double
*src2,
int
src2_step,
int
src2_offset,
__global
double
*src2,
int
src2_step,
int
src2_offset,
__global
double
*dst1,
int
dst1_step,
int
dst1_offset,
__global
double
*dst1,
int
dst1_step,
int
dst1_offset,
...
@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
...
@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
double
y2
=
y
*
y
;
double
y2
=
y
*
y
;
double
magnitude
=
sqrt
(
x2
+
y2
)
;
double
magnitude
=
sqrt
(
x2
+
y2
)
;
double
cartToPolar
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5
:
CV_PI*1.5
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5
:
CV_PI*1.5
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
(
float
)
DBL_EPSILON
)
+
tmp
:
double
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
(
float
)
DBL_EPSILON
)
;
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
180/CV_PI
)
;
*
((
__global
double
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
double
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
double
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
*
((
__global
double
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
}
}
}
}
#
endif
#
endif
modules/ocl/src/opencl/arithm_compare.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_exp.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_flip.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_log.cl
View file @
96121a66
...
@@ -43,9 +43,13 @@
...
@@ -43,9 +43,13 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_magnitude.cl
View file @
96121a66
...
@@ -43,9 +43,13 @@
...
@@ -43,9 +43,13 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
__kernel
void
arithm_magnitude_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_magnitude_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
...
...
modules/ocl/src/opencl/arithm_minMax.cl
View file @
96121a66
...
@@ -45,7 +45,7 @@
...
@@ -45,7 +45,7 @@
/**************************************PUBLICFUNC*************************************/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/arithm_minMaxLoc.cl
View file @
96121a66
...
@@ -44,8 +44,13 @@
...
@@ -44,8 +44,13 @@
//M*/
//M*/
/**************************************PUBLICFUNC*************************************/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
RES_TYPE
double4
#
define
RES_TYPE
double4
#
define
CONVERT_RES_TYPE
convert_double4
#
define
CONVERT_RES_TYPE
convert_double4
#
else
#
else
...
...
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
View file @
96121a66
...
@@ -44,8 +44,13 @@
...
@@ -44,8 +44,13 @@
//M*/
//M*/
/**************************************PUBLICFUNC*************************************/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
RES_TYPE
double4
#
define
RES_TYPE
double4
#
define
CONVERT_RES_TYPE
convert_double4
#
define
CONVERT_RES_TYPE
convert_double4
#
else
#
else
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
96121a66
...
@@ -42,7 +42,7 @@
...
@@ -42,7 +42,7 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/arithm_phase.cl
View file @
96121a66
...
@@ -44,17 +44,17 @@
...
@@ -44,17 +44,17 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
CV_PI
M_PI
#
define
CV_PI
M_PI
#
define
CV_2PI
(
2
*
CV_PI
)
#
define
CV_2PI
(
2
*
CV_PI
)
#
else
#
else
#
define
CV_PI
M_PI_F
#
define
CV_PI
M_PI_F
#
define
CV_2PI
(
2
*
CV_PI
)
#
define
CV_2PI
(
2
*
CV_PI
)
#
endif
#
endif
/**************************************phase
inradians**************************************/
/**************************************phase
inradians**************************************/
...
@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
...
@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
double
data1
=
src1[src1_index]
;
double
data1
=
src1[src1_index]
;
double
data2
=
src2[src2_index]
;
double
data2
=
src2[src2_index]
;
double
tmp
=
atan2
(
src2[src2_index],
src1[src1_index]
)
;
double
tmp
=
atan2
(
data2,
data1
)
;
tmp
=
180
*
tmp
/
CV_PI
;
tmp
=
180
*
tmp
/
CV_PI
;
if
(
tmp
<
0
)
if
(
tmp
<
0
)
...
...
modules/ocl/src/opencl/arithm_polarToCart.cl
View file @
96121a66
...
@@ -44,14 +44,14 @@
...
@@ -44,14 +44,14 @@
//M*/
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
CV_PI
M_PI
#
define
CV_PI
M_PI
#
else
#
else
#
define
CV_PI
M_PI_F
#
define
CV_PI
M_PI_F
#
endif
#
endif
/////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_pow.cl
View file @
96121a66
...
@@ -43,21 +43,22 @@
...
@@ -43,21 +43,22 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
#
endif
typedef
double4
F4
;
#
define
F
double
#
define
convert_F4
convert_double4
;
#
else
#
else
typedef
float
F
;
#
define
F
float
typedef
float4
F4
;
#
define
convert_F4
convert_float4
;
#
endif
#
endif
/**************************************
pow
**************************************
/
/**************************************
pow
**************************************
/
__kernel
void
arithm_pow_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_pow_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*dst,
int
dst_step,
int
dst_offset,
__global
float
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
int
rows,
int
cols,
int
dst_step1,
F
p
)
F
p
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
...
@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
*
((
__global
float
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
*
((
__global
float
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_pow_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_pow_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__global
double
*dst,
int
dst_step,
int
dst_offset,
__global
double
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
int
rows,
int
cols,
int
dst_step1,
F
p
)
F
p
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
...
@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
double
tmp
=
src1_data
>
0
?
exp
(
p
*
log
(
src1_data
))
:
(
src1_data
==
0
?
0
:
exp
(
p
*
log
(
fabs
(
src1_data
))))
;
double
tmp
=
src1_data
>
0
?
exp
(
p
*
log
(
src1_data
))
:
(
src1_data
==
0
?
0
:
exp
(
p
*
log
(
fabs
(
src1_data
))))
;
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
}
}
#
endif
#
endif
modules/ocl/src/opencl/arithm_setidentity.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_sum.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_transpose.cl
View file @
96121a66
...
@@ -43,7 +43,7 @@
...
@@ -43,7 +43,7 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/bgfg_mog.cl
View file @
96121a66
...
@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
...
@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
{
{
return
fmax
(
var
+
learningRate
*
(
diff
*
diff
-
var
)
,
minVar
)
;
return
fmax
(
var
+
learningRate
*
(
diff
*
diff
-
var
)
,
minVar
)
;
}
}
#
else
#
else
#
define
T_FRAME
uchar4
#
define
T_FRAME
uchar4
#
define
T_MEAN_VAR
float4
#
define
T_MEAN_VAR
float4
#
define
CONVERT_TYPE
convert_uchar4_sat
#
define
CONVERT_TYPE
convert_uchar4_sat
#
define
F_ZERO
(
0.0f,
0.0f,
0.0f,
0.0f
)
#
define
F_ZERO
(
0.0f,
0.0f,
0.0f,
0.0f
)
inline
float4
cvt
(
const
uchar4
val
)
inline
float4
cvt
(
const
uchar4
val
)
{
{
float4
result
;
float4
result
;
...
@@ -93,6 +96,14 @@ inline float sum(const float4 val)
...
@@ -93,6 +96,14 @@ inline float sum(const float4 val)
return
(
val.x
+
val.y
+
val.z
)
;
return
(
val.x
+
val.y
+
val.z
)
;
}
}
static
void
swap4
(
__global
float4*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
float4
val
=
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
=
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
static
float4
clamp1
(
const
float4
var,
float
learningRate,
const
float4
diff,
float
minVar
)
static
float4
clamp1
(
const
float4
var,
float
learningRate,
const
float4
diff,
float
minVar
)
{
{
float4
result
;
float4
result
;
...
@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
...
@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
result.w
=
0.0f
;
result.w
=
0.0f
;
return
result
;
return
result
;
}
}
#
endif
#
endif
typedef
struct
typedef
struct
...
@@ -114,7 +126,7 @@ typedef struct
...
@@ -114,7 +126,7 @@ typedef struct
float
c_varMax
;
float
c_varMax
;
float
c_tau
;
float
c_tau
;
uchar
c_shadowVal
;
uchar
c_shadowVal
;
}con_srtuct_t
;
}
con_srtuct_t
;
static
void
swap
(
__global
float*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
static
void
swap
(
__global
float*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
{
...
@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
...
@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
}
static
void
swap4
(
__global
float4*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
float4
val
=
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
=
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
__kernel
void
mog_withoutLearning_kernel
(
__global
T_FRAME*
frame,
__global
uchar*
fgmask,
__kernel
void
mog_withoutLearning_kernel
(
__global
T_FRAME*
frame,
__global
uchar*
fgmask,
__global
float*
weight,
__global
T_MEAN_VAR*
mean,
__global
T_MEAN_VAR*
var,
__global
float*
weight,
__global
T_MEAN_VAR*
mean,
__global
T_MEAN_VAR*
var,
int
frame_row,
int
frame_col,
int
frame_step,
int
fgmask_step,
int
frame_row,
int
frame_col,
int
frame_step,
int
fgmask_step,
...
...
modules/ocl/src/opencl/blend_linear.cl
View file @
96121a66
...
@@ -43,7 +43,7 @@
...
@@ -43,7 +43,7 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
96121a66
...
@@ -63,14 +63,6 @@
...
@@ -63,14 +63,6 @@
#
define
DIST_TYPE
0
#
define
DIST_TYPE
0
#
endif
#
endif
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
//
dirty
fix
for
non-template
support
//
dirty
fix
for
non-template
support
#
if
(
DIST_TYPE
==
0
)
//
L1Dist
#
if
(
DIST_TYPE
==
0
)
//
L1Dist
#
ifdef
T_FLOAT
#
ifdef
T_FLOAT
...
@@ -89,6 +81,13 @@ typedef float value_type;
...
@@ -89,6 +81,13 @@ typedef float value_type;
typedef
float
result_type
;
typedef
float
result_type
;
#
define
DIST_RES
(
x
)
sqrt
(
x
)
#
define
DIST_RES
(
x
)
sqrt
(
x
)
#
elif
(
DIST_TYPE
==
2
)
//
Hamming
#
elif
(
DIST_TYPE
==
2
)
//
Hamming
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
#
define
DIST
(
x,
y
)
bit1Count
(
(
x
)
^
(
y
)
)
#
define
DIST
(
x,
y
)
bit1Count
(
(
x
)
^
(
y
)
)
typedef
int
value_type
;
typedef
int
value_type
;
typedef
int
result_type
;
typedef
int
result_type
;
...
...
modules/ocl/src/opencl/convertC3C4.cl
View file @
96121a66
...
@@ -33,12 +33,17 @@
...
@@ -33,12 +33,17 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
dstStep_in_piexl,int
pixel_end
)
int
cols,
int
rows,
int
dstStep_in_piexl,
int
pixel_end
)
{
{
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
...
@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
...
@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
dst[addr.y]
=
outpix1
;
dst[addr.y]
=
outpix1
;
}
}
else
if
(
outx.x<cols
&&
outy.x<rows
)
else
if
(
outx.x<cols
&&
outy.x<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.x]
=
outpix0
;
}
}
}
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
srcStep_in_pixel,int
pixel_end
)
int
cols,
int
rows,
int
srcStep_in_pixel,
int
pixel_end
)
{
{
int
id
=
get_global_id
(
0
)
<<2
;
int
id
=
get_global_id
(
0
)
<<2
;
int
y
=
id
/
cols
;
int
y
=
id
/
cols
;
...
@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
...
@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
dst[outaddr.y]
=
outpixel1
;
dst[outaddr.y]
=
outpixel1
;
}
}
else
if
(
outaddr.x
<=
pixel_end
)
else
if
(
outaddr.x
<=
pixel_end
)
{
dst[outaddr.x]
=
pixel0
;
dst[outaddr.x]
=
pixel0
;
}
}
}
modules/ocl/src/opencl/filtering_boxFilter.cl
View file @
96121a66
...
@@ -146,7 +146,11 @@
...
@@ -146,7 +146,11 @@
#endif
#endif
#if USE_DOUBLE
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#else
...
...
modules/ocl/src/opencl/filtering_filter2D.cl
View file @
96121a66
...
@@ -143,7 +143,11 @@
...
@@ -143,7 +143,11 @@
#endif
#endif
#if USE_DOUBLE
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#else
...
...
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
View file @
96121a66
...
@@ -45,8 +45,6 @@
...
@@ -45,8 +45,6 @@
//
//
//M*/
//M*/
//
Enter
your
kernel
in
this
window
//#pragma
OPENCL
EXTENSION
cl_amd_printf:enable
#
define
CV_HAAR_FEATURE_MAX
3
#
define
CV_HAAR_FEATURE_MAX
3
typedef
int
sumtype
;
typedef
int
sumtype
;
typedef
float
sqsumtype
;
typedef
float
sqsumtype
;
...
@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
...
@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
int
counter
=
get_global_id
(
0
)
;
int
counter
=
get_global_id
(
0
)
;
int
tr_x[3],
tr_y[3],
tr_h[3],
tr_w[3],
i
=
0
;
int
tr_x[3],
tr_y[3],
tr_h[3],
tr_w[3],
i
=
0
;
GpuHidHaarTreeNode
t1
=
*
(
orinode
+
counter
)
;
GpuHidHaarTreeNode
t1
=
*
(
orinode
+
counter
)
;
#
pragma
unroll
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
for
(
i
=
0
; i < 3; i++)
{
{
tr_x[i]
=
(
int
)(
t1.p[i][0]
*
scale
+
0.5f
)
;
tr_x[i]
=
(
int
)(
t1.p[i][0]
*
scale
+
0.5f
)
;
...
@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
...
@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
t1.weight[0]
=
-
(
t1.weight[1]
*
tr_h[1]
*
tr_w[1]
+
t1.weight[2]
*
tr_h[2]
*
tr_w[2]
)
/
(
tr_h[0]
*
tr_w[0]
)
;
t1.weight[0]
=
-
(
t1.weight[1]
*
tr_h[1]
*
tr_w[1]
+
t1.weight[2]
*
tr_h[2]
*
tr_w[2]
)
/
(
tr_h[0]
*
tr_w[0]
)
;
counter
+=
nodenum
;
counter
+=
nodenum
;
#
pragma
unroll
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
for
(
i
=
0
; i < 3; i++)
{
{
newnode[counter].p[i][0]
=
tr_x[i]
;
newnode[counter].p[i][0]
=
tr_x[i]
;
...
...
modules/ocl/src/opencl/imgproc_convolve.cl
View file @
96121a66
...
@@ -43,11 +43,13 @@
...
@@ -43,11 +43,13 @@
//
//
//M*/
//M*/
#
if
defined
(
__ATI__
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
/**************************************
convolve
**************************************
/
/**************************************
convolve
**************************************
/
...
...
modules/ocl/src/opencl/imgproc_copymakeboder.cl
View file @
96121a66
...
@@ -34,7 +34,7 @@
...
@@ -34,7 +34,7 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/imgproc_integral.cl
View file @
96121a66
...
@@ -43,13 +43,14 @@
...
@@ -43,13 +43,14 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
#
define
LSIZE
256
#
define
LSIZE
256
#
define
LSIZE_1
255
#
define
LSIZE_1
255
#
define
LSIZE_2
254
#
define
LSIZE_2
254
...
...
modules/ocl/src/opencl/imgproc_integral_sum.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/imgproc_remap.cl
View file @
96121a66
...
@@ -43,11 +43,11 @@
...
@@ -43,11 +43,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/imgproc_resize.cl
View file @
96121a66
...
@@ -48,8 +48,12 @@
...
@@ -48,8 +48,12 @@
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4are
supported.
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4are
supported.
//
We
shall
support
other
types
later
if
necessary.
//
We
shall
support
other
types
later
if
necessary.
#
if
defined
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
F
double
#
define
F
double
#
else
#
else
#
define
F
float
#
define
F
float
...
...
modules/ocl/src/opencl/imgproc_threshold.cl
View file @
96121a66
...
@@ -43,7 +43,7 @@
...
@@ -43,7 +43,7 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/imgproc_warpAffine.cl
View file @
96121a66
...
@@ -47,11 +47,11 @@
...
@@ -47,11 +47,11 @@
//warpAffine
kernel
//warpAffine
kernel
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
typedef
double
F
;
typedef
double
F
;
typedef
double4
F4
;
typedef
double4
F4
;
...
...
modules/ocl/src/opencl/imgproc_warpPerspective.cl
View file @
96121a66
...
@@ -47,11 +47,11 @@
...
@@ -47,11 +47,11 @@
//wrapPerspective
kernel
//wrapPerspective
kernel
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
typedef
double
F
;
typedef
double
F
;
typedef
double4
F4
;
typedef
double4
F4
;
...
...
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
View file @
96121a66
...
@@ -61,35 +61,6 @@
...
@@ -61,35 +61,6 @@
#
define
my_comp
(
x,y
)
((
x
)
<
(
y
))
#
define
my_comp
(
x,y
)
((
x
)
<
(
y
))
#
endif
#
endif
///////////// parallel merge sort ///////////////
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
{
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
uint firstIndex = left;
uint lastIndex = right;
// This loops through [firstIndex, lastIndex)
// Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
// this while loop will be divergent within a wavefront
while( firstIndex < lastIndex )
{
K_T dataVal = data[ firstIndex ];
// This branch will create divergent wavefronts
if( my_comp( dataVal, searchVal ) )
{
firstIndex = firstIndex+1;
}
else
{
break;
}
}
return firstIndex;
}
//
This
implements
a
binary
search
routine
to
look
for
an
'insertion
point
'
in
a
sequence,
denoted
//
This
implements
a
binary
search
routine
to
look
for
an
'insertion
point
'
in
a
sequence,
denoted
//
by
a
base
pointer
and
left
and
right
index
for
a
particular
candidate
value.
The
comparison
operator
is
//
by
a
base
pointer
and
left
and
right
index
for
a
particular
candidate
value.
The
comparison
operator
is
//
passed
as
a
functor
parameter
my_comp
//
passed
as
a
functor
parameter
my_comp
...
...
modules/ocl/src/opencl/knearest.cl
View file @
96121a66
...
@@ -42,8 +42,13 @@
...
@@ -42,8 +42,13 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE
double
#
define
TYPE
double
#
else
#
else
#
define
TYPE
float
#
define
TYPE
float
...
...
modules/ocl/src/opencl/match_template.cl
View file @
96121a66
...
@@ -43,14 +43,12 @@
...
@@ -43,14 +43,12 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
TYPE_IMAGE_SQSUM
double
#
define
TYPE_IMAGE_SQSUM
double
#
else
#
else
#
define
TYPE_IMAGE_SQSUM
float
#
define
TYPE_IMAGE_SQSUM
float
...
...
modules/ocl/src/opencl/merge_mat.cl
View file @
96121a66
...
@@ -43,15 +43,19 @@
...
@@ -43,15 +43,19 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////optimized
code
using
vector
roi//////////////////////////
//////////////////////////////////optimized
code
using
vector
roi//////////////////////////
////////////vector
fuction
name
format:
merge_vector_C
(
channels
number
)
D_
(
data
type
depth
)
//////
////////////vector
fuction
name
format:
merge_vector_C
(
channels
number
)
D_
(
data
type
depth
)
//////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
merge_vector_C2_D0
(
__global
uchar
*mat_dst,
int
dst_step,
int
dst_offset,
__kernel
void
merge_vector_C2_D0
(
__global
uchar
*mat_dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mat_src0,
int
src0_step,
int
src0_offset,
__global
uchar
*mat_src0,
int
src0_step,
int
src0_offset,
__global
uchar
*mat_src1,
int
src1_step,
int
src1_offset,
__global
uchar
*mat_src1,
int
src1_step,
int
src1_offset,
...
...
modules/ocl/src/opencl/moments.cl
View file @
96121a66
...
@@ -44,11 +44,11 @@
...
@@ -44,11 +44,11 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
typedef
double
T
;
typedef
double
T
;
#
else
#
else
...
...
modules/ocl/src/opencl/operator_convertTo.cl
View file @
96121a66
...
@@ -35,8 +35,12 @@
...
@@ -35,8 +35,12 @@
//
//
#
ifdef
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
__kernel
void
convert_to
(
__kernel
void
convert_to
(
__global
const
srcT*
restrict
srcMat,
__global
const
srcT*
restrict
srcMat,
...
...
modules/ocl/src/opencl/operator_copyToM.cl
View file @
96121a66
...
@@ -34,11 +34,11 @@
...
@@ -34,11 +34,11 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/operator_setTo.cl
View file @
96121a66
...
@@ -34,11 +34,11 @@
...
@@ -34,11 +34,11 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/operator_setToM.cl
View file @
96121a66
...
@@ -34,11 +34,11 @@
...
@@ -34,11 +34,11 @@
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_khr_fp64
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
96121a66
...
@@ -45,8 +45,6 @@
...
@@ -45,8 +45,6 @@
//
//
//M*/
//M*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
BUFFER
64
#
define
BUFFER
64
#
define
BUFFER2
BUFFER>>1
#
define
BUFFER2
BUFFER>>1
#
ifndef
WAVE_SIZE
#
ifndef
WAVE_SIZE
...
...
modules/ocl/src/opencl/split_mat.cl
View file @
96121a66
...
@@ -38,9 +38,14 @@
...
@@ -38,9 +38,14 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
if
DATA_DEPTH
==
0
#
if
DATA_DEPTH
==
0
#
define
BASE_TYPE
uchar
#
define
BASE_TYPE
uchar
...
...
modules/ocl/src/opencl/stereobm.cl
View file @
96121a66
...
@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
...
@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
{
{
unsigned
int
cache
=
cols[0]
;
unsigned
int
cache
=
cols[0]
;
#
pragma
unroll
for
(
int
i
=
1
; i <= winsz; i++)
for
(
int
i
=
1
; i <= winsz; i++)
cache
+=
cols[i]
;
cache
+=
cols[i]
;
...
...
modules/ocl/src/opencl/stereobp.cl
View file @
96121a66
...
@@ -45,13 +45,11 @@
...
@@ -45,13 +45,11 @@
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_amd_fp64
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
endif
#
endif
#
ifdef
T_FLOAT
#
ifdef
T_FLOAT
...
...
modules/ocl/src/opencl/stereocsbp.cl
View file @
96121a66
...
@@ -44,19 +44,10 @@
...
@@ -44,19 +44,10 @@
//
//
//M*/
//M*/
#
ifndef
FLT_MAX
#
define
FLT_MAX
CL_FLT_MAX
#
endif
#
ifndef
SHRT_MAX
#
define
SHRT_MAX
CL_SHORT_MAX
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
...
@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
}
}
}
}
}
}
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
...
@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
...
@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
inline
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
inline
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
float
cdata_weight,
float
cmax_data_term
)
{
{
...
@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
...
@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
}
}
inline
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
inline
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
float
cdata_weight,
float
cmax_data_term
)
{
{
...
@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
...
@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
}
}
}
}
}
}
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
...
@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
...
@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
}
}
}
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
...
@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
...
@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int h, int w, int level, int nr_plane, int channels,
...
@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
...
@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
}
}
}
}
}
}
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int h, int w, int level, int nr_plane, int channels,
...
@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
...
@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
}
}
}
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int level, int rows, int cols, int h, int nr_plane,
...
@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
...
@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
}
}
}
}
static
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_new,
__global
const
float
*data_cost_cur,
__global
const
float
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
{
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost_cur[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
...
@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
...
@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
cdisp_step1,
cdisp_step2
)
;
cdisp_step1,
cdisp_step2
)
;
}
}
}
}
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
...
...
modules/ocl/src/opencl/svm.cl
View file @
96121a66
...
@@ -33,11 +33,12 @@
...
@@ -33,11 +33,12 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
ifdef
DOUBLE_SUPPORT
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
ifdef
cl_amd_fp64
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
TYPE
double
#
define
TYPE
double
#
else
#
else
...
@@ -53,7 +54,6 @@
...
@@ -53,7 +54,6 @@
#
else
#
else
#
define
POW
(
X,Y
)
X
#
define
POW
(
X,Y
)
X
#
endif
#
endif
#
define
FLT_MAX
3.402823466e+38F
#
define
MAX_VAL
(
FLT_MAX*1e-3
)
#
define
MAX_VAL
(
FLT_MAX*1e-3
)
__kernel
void
svm_linear
(
__global
float*
src,
int
src_step,
__global
float*
src2,
int
src2_step,
__global
TYPE*
dst,
int
dst_step,
int
src_rows,
int
src2_cols,
__kernel
void
svm_linear
(
__global
float*
src,
int
src_step,
__global
float*
src2,
int
src2_step,
__global
TYPE*
dst,
int
dst_step,
int
src_rows,
int
src2_cols,
...
...
modules/ocl/src/opencl/tvl1flow.cl
View file @
96121a66
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment