Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
96121a66
Commit
96121a66
authored
Nov 11, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kernel warnings on AMD
parent
a81efdbb
Show whitespace changes
Inline
Side-by-side
Showing
57 changed files
with
335 additions
and
370 deletions
+335
-370
arithm_LUT.cl
modules/ocl/src/opencl/arithm_LUT.cl
+5
-1
arithm_absdiff_nonsaturate.cl
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+4
-4
arithm_add.cl
modules/ocl/src/opencl/arithm_add.cl
+4
-4
arithm_addWeighted.cl
modules/ocl/src/opencl/arithm_addWeighted.cl
+4
-4
arithm_add_mask.cl
modules/ocl/src/opencl/arithm_add_mask.cl
+4
-4
arithm_add_scalar.cl
modules/ocl/src/opencl/arithm_add_scalar.cl
+4
-4
arithm_add_scalar_mask.cl
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+4
-4
arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+0
-8
arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+4
-4
arithm_cartToPolar.cl
modules/ocl/src/opencl/arithm_cartToPolar.cl
+18
-21
arithm_compare.cl
modules/ocl/src/opencl/arithm_compare.cl
+4
-4
arithm_exp.cl
modules/ocl/src/opencl/arithm_exp.cl
+4
-4
arithm_flip.cl
modules/ocl/src/opencl/arithm_flip.cl
+4
-4
arithm_log.cl
modules/ocl/src/opencl/arithm_log.cl
+5
-1
arithm_magnitude.cl
modules/ocl/src/opencl/arithm_magnitude.cl
+5
-1
arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+1
-1
arithm_minMaxLoc.cl
modules/ocl/src/opencl/arithm_minMaxLoc.cl
+6
-1
arithm_minMaxLoc_mask.cl
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+6
-1
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+1
-1
arithm_phase.cl
modules/ocl/src/opencl/arithm_phase.cl
+11
-11
arithm_polarToCart.cl
modules/ocl/src/opencl/arithm_polarToCart.cl
+7
-7
arithm_pow.cl
modules/ocl/src/opencl/arithm_pow.cl
+13
-13
arithm_setidentity.cl
modules/ocl/src/opencl/arithm_setidentity.cl
+4
-4
arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+4
-4
arithm_transpose.cl
modules/ocl/src/opencl/arithm_transpose.cl
+1
-1
bgfg_mog.cl
modules/ocl/src/opencl/bgfg_mog.cl
+13
-8
blend_linear.cl
modules/ocl/src/opencl/blend_linear.cl
+1
-1
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+7
-8
convertC3C4.cl
modules/ocl/src/opencl/convertC3C4.cl
+11
-9
filtering_boxFilter.cl
modules/ocl/src/opencl/filtering_boxFilter.cl
+4
-0
filtering_filter2D.cl
modules/ocl/src/opencl/filtering_filter2D.cl
+4
-0
haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+2
-4
imgproc_convolve.cl
modules/ocl/src/opencl/imgproc_convolve.cl
+4
-2
imgproc_copymakeboder.cl
modules/ocl/src/opencl/imgproc_copymakeboder.cl
+1
-1
imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+5
-4
imgproc_integral_sum.cl
modules/ocl/src/opencl/imgproc_integral_sum.cl
+4
-4
imgproc_remap.cl
modules/ocl/src/opencl/imgproc_remap.cl
+4
-4
imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+5
-1
imgproc_threshold.cl
modules/ocl/src/opencl/imgproc_threshold.cl
+1
-1
imgproc_warpAffine.cl
modules/ocl/src/opencl/imgproc_warpAffine.cl
+4
-4
imgproc_warpPerspective.cl
modules/ocl/src/opencl/imgproc_warpPerspective.cl
+4
-4
kernel_stablesort_by_key.cl
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+0
-29
knearest.cl
modules/ocl/src/opencl/knearest.cl
+6
-1
match_template.cl
modules/ocl/src/opencl/match_template.cl
+4
-6
merge_mat.cl
modules/ocl/src/opencl/merge_mat.cl
+6
-2
moments.cl
modules/ocl/src/opencl/moments.cl
+4
-4
operator_convertTo.cl
modules/ocl/src/opencl/operator_convertTo.cl
+4
-0
operator_copyToM.cl
modules/ocl/src/opencl/operator_copyToM.cl
+4
-4
operator_setTo.cl
modules/ocl/src/opencl/operator_setTo.cl
+4
-4
operator_setToM.cl
modules/ocl/src/opencl/operator_setToM.cl
+4
-4
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+0
-2
split_mat.cl
modules/ocl/src/opencl/split_mat.cl
+6
-1
stereobm.cl
modules/ocl/src/opencl/stereobm.cl
+0
-1
stereobp.cl
modules/ocl/src/opencl/stereobp.cl
+3
-5
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+13
-45
svm.cl
modules/ocl/src/opencl/svm.cl
+5
-5
tvl1flow.cl
modules/ocl/src/opencl/tvl1flow.cl
+71
-86
No files found.
modules/ocl/src/opencl/arithm_LUT.cl
View file @
96121a66
...
...
@@ -34,9 +34,13 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
LUT_C1
(
__global
const
srcT
*
src,
__global
const
dstT
*lut,
__global
dstT
*dst,
...
...
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
View file @
96121a66
...
...
@@ -44,11 +44,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add.cl
View file @
96121a66
...
...
@@ -44,11 +44,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_addWeighted.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_mask.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_scalar.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
View file @
96121a66
...
...
@@ -43,14 +43,6 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_bitwise_not.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_cartToPolar.cl
View file @
96121a66
...
...
@@ -43,24 +43,21 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
define
CV_PI
3.1415926535897932384626433832795
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52
#
endif
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
else
#
define
CV_PI
3.1415926535897932384626433832795f
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52f
#
endif
#
define
CV_PI
M_PI_F
#
endif
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//magnitude
__global
float
*dst2,
int
dst2_step,
int
dst2_offset,
//cartToPolar
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//
magnitude
__global
float
*dst2,
int
dst2_step,
int
dst2_offset,
//
cartToPolar
int
rows,
int
cols,
int
angInDegree
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
float
y2
=
y
*
y
;
float
magnitude
=
sqrt
(
x2
+
y2
)
;
float
cartToPolar
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5f
:
CV_PI*1.5f
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL
_EPSILON
)
;
float
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
FLT_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
FLT
_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
180/CV_PI
)
;
*
((
__global
float
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
float
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
...
...
@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_cartToPolar_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__global
double
*src2,
int
src2_step,
int
src2_offset,
__global
double
*dst1,
int
dst1_step,
int
dst1_offset,
...
...
@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
double
y2
=
y
*
y
;
double
magnitude
=
sqrt
(
x2
+
y2
)
;
double
cartToPolar
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5
:
CV_PI*1.5
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
(
float
)
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
(
float
)
DBL_EPSILON
)
;
double
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
180/CV_PI
)
;
*
((
__global
double
*
)((
__global
char
*
)
dst1
+
dst1_index
))
=
magnitude
;
*
((
__global
double
*
)((
__global
char
*
)
dst2
+
dst2_index
))
=
cartToPolar
;
}
}
#
endif
modules/ocl/src/opencl/arithm_compare.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_exp.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_flip.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_log.cl
View file @
96121a66
...
...
@@ -43,9 +43,13 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_magnitude.cl
View file @
96121a66
...
...
@@ -43,9 +43,13 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
arithm_magnitude_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
...
...
modules/ocl/src/opencl/arithm_minMax.cl
View file @
96121a66
...
...
@@ -45,7 +45,7 @@
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/arithm_minMaxLoc.cl
View file @
96121a66
...
...
@@ -44,8 +44,13 @@
//M*/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
RES_TYPE
double4
#
define
CONVERT_RES_TYPE
convert_double4
#
else
...
...
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
View file @
96121a66
...
...
@@ -44,8 +44,13 @@
//M*/
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
RES_TYPE
double4
#
define
CONVERT_RES_TYPE
convert_double4
#
else
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
96121a66
...
...
@@ -42,7 +42,7 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/arithm_phase.cl
View file @
96121a66
...
...
@@ -44,17 +44,17 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
define
CV_2PI
(
2
*
CV_PI
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
define
CV_2PI
(
2
*
CV_PI
)
#
else
#
define
CV_PI
M_PI_F
#
define
CV_2PI
(
2
*
CV_PI
)
#
define
CV_PI
M_PI_F
#
define
CV_2PI
(
2
*
CV_PI
)
#
endif
/**************************************phase
inradians**************************************/
...
...
@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
double
data1
=
src1[src1_index]
;
double
data2
=
src2[src2_index]
;
double
tmp
=
atan2
(
src2[src2_index],
src1[src1_index]
)
;
double
tmp
=
atan2
(
data2,
data1
)
;
tmp
=
180
*
tmp
/
CV_PI
;
if
(
tmp
<
0
)
...
...
modules/ocl/src/opencl/arithm_polarToCart.cl
View file @
96121a66
...
...
@@ -44,14 +44,14 @@
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
CV_PI
M_PI
#
else
#
define
CV_PI
M_PI_F
#
define
CV_PI
M_PI_F
#
endif
/////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/arithm_pow.cl
View file @
96121a66
...
...
@@ -43,21 +43,22 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
typedef
double4
F4
;
#
define
convert_F4
convert_double4
;
#
endif
#
define
F
double
#
else
typedef
float
F
;
typedef
float4
F4
;
#
define
convert_F4
convert_float4
;
#
define
F
float
#
endif
/**************************************
pow
**************************************
/
__kernel
void
arithm_pow_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
F
p
)
int
rows,
int
cols,
int
dst_step1,
F
p
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
*
((
__global
float
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_pow_D6
(
__global
double
*src1,
int
src1_step,
int
src1_offset,
__global
double
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
F
p
)
int
rows,
int
cols,
int
dst_step1,
F
p
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
double
tmp
=
src1_data
>
0
?
exp
(
p
*
log
(
src1_data
))
:
(
src1_data
==
0
?
0
:
exp
(
p
*
log
(
fabs
(
src1_data
))))
;
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
#
endif
modules/ocl/src/opencl/arithm_setidentity.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_sum.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/arithm_transpose.cl
View file @
96121a66
...
...
@@ -43,7 +43,7 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/bgfg_mog.cl
View file @
96121a66
...
...
@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
{
return
fmax
(
var
+
learningRate
*
(
diff
*
diff
-
var
)
,
minVar
)
;
}
#
else
#
define
T_FRAME
uchar4
#
define
T_MEAN_VAR
float4
#
define
CONVERT_TYPE
convert_uchar4_sat
#
define
F_ZERO
(
0.0f,
0.0f,
0.0f,
0.0f
)
inline
float4
cvt
(
const
uchar4
val
)
{
float4
result
;
...
...
@@ -93,6 +96,14 @@ inline float sum(const float4 val)
return
(
val.x
+
val.y
+
val.z
)
;
}
static
void
swap4
(
__global
float4*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
float4
val
=
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
=
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
static
float4
clamp1
(
const
float4
var,
float
learningRate,
const
float4
diff,
float
minVar
)
{
float4
result
;
...
...
@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
result.w
=
0.0f
;
return
result
;
}
#
endif
typedef
struct
...
...
@@ -114,7 +126,7 @@ typedef struct
float
c_varMax
;
float
c_tau
;
uchar
c_shadowVal
;
}con_srtuct_t
;
}
con_srtuct_t
;
static
void
swap
(
__global
float*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
...
...
@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
static
void
swap4
(
__global
float4*
ptr,
int
x,
int
y,
int
k,
int
rows,
int
ptr_step
)
{
float4
val
=
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
(
k
*
rows
+
y
)
*
ptr_step
+
x]
=
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
;
ptr[
((
k
+
1
)
*
rows
+
y
)
*
ptr_step
+
x]
=
val
;
}
__kernel
void
mog_withoutLearning_kernel
(
__global
T_FRAME*
frame,
__global
uchar*
fgmask,
__global
float*
weight,
__global
T_MEAN_VAR*
mean,
__global
T_MEAN_VAR*
var,
int
frame_row,
int
frame_col,
int
frame_step,
int
fgmask_step,
...
...
modules/ocl/src/opencl/blend_linear.cl
View file @
96121a66
...
...
@@ -43,7 +43,7 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
96121a66
...
...
@@ -63,14 +63,6 @@
#
define
DIST_TYPE
0
#
endif
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
//
dirty
fix
for
non-template
support
#
if
(
DIST_TYPE
==
0
)
//
L1Dist
#
ifdef
T_FLOAT
...
...
@@ -89,6 +81,13 @@ typedef float value_type;
typedef
float
result_type
;
#
define
DIST_RES
(
x
)
sqrt
(
x
)
#
elif
(
DIST_TYPE
==
2
)
//
Hamming
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
#
define
DIST
(
x,
y
)
bit1Count
(
(
x
)
^
(
y
)
)
typedef
int
value_type
;
typedef
int
result_type
;
...
...
modules/ocl/src/opencl/convertC3C4.cl
View file @
96121a66
...
...
@@ -33,12 +33,17 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
dstStep_in_piexl,int
pixel_end
)
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
dstStep_in_piexl,
int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
...
...
@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
dst[addr.y]
=
outpix1
;
}
else
if
(
outx.x<cols
&&
outy.x<rows
)
{
dst[addr.x]
=
outpix0
;
}
}
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
srcStep_in_pixel,int
pixel_end
)
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
srcStep_in_pixel,
int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
<<2
;
int
y
=
id
/
cols
;
...
...
@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
dst[outaddr.y]
=
outpixel1
;
}
else
if
(
outaddr.x
<=
pixel_end
)
{
dst[outaddr.x]
=
pixel0
;
}
}
modules/ocl/src/opencl/filtering_boxFilter.cl
View file @
96121a66
...
...
@@ -146,7 +146,11 @@
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
...
...
modules/ocl/src/opencl/filtering_filter2D.cl
View file @
96121a66
...
...
@@ -143,7 +143,11 @@
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
...
...
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
View file @
96121a66
...
...
@@ -45,8 +45,6 @@
//
//M*/
//
Enter
your
kernel
in
this
window
//#pragma
OPENCL
EXTENSION
cl_amd_printf:enable
#
define
CV_HAAR_FEATURE_MAX
3
typedef
int
sumtype
;
typedef
float
sqsumtype
;
...
...
@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
int
counter
=
get_global_id
(
0
)
;
int
tr_x[3],
tr_y[3],
tr_h[3],
tr_w[3],
i
=
0
;
GpuHidHaarTreeNode
t1
=
*
(
orinode
+
counter
)
;
#
pragma
unroll
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
{
tr_x[i]
=
(
int
)(
t1.p[i][0]
*
scale
+
0.5f
)
;
...
...
@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
t1.weight[0]
=
-
(
t1.weight[1]
*
tr_h[1]
*
tr_w[1]
+
t1.weight[2]
*
tr_h[2]
*
tr_w[2]
)
/
(
tr_h[0]
*
tr_w[0]
)
;
counter
+=
nodenum
;
#
pragma
unroll
#
pragma
unroll
for
(
i
=
0
; i < 3; i++)
{
newnode[counter].p[i][0]
=
tr_x[i]
;
...
...
modules/ocl/src/opencl/imgproc_convolve.cl
View file @
96121a66
...
...
@@ -43,11 +43,13 @@
//
//M*/
#
if
defined
(
__ATI__
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
/**************************************
convolve
**************************************
/
...
...
modules/ocl/src/opencl/imgproc_copymakeboder.cl
View file @
96121a66
...
...
@@ -34,7 +34,7 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/imgproc_integral.cl
View file @
96121a66
...
...
@@ -43,13 +43,14 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
LSIZE
256
#
define
LSIZE_1
255
#
define
LSIZE_2
254
...
...
modules/ocl/src/opencl/imgproc_integral_sum.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/imgproc_remap.cl
View file @
96121a66
...
...
@@ -43,11 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/imgproc_resize.cl
View file @
96121a66
...
...
@@ -48,8 +48,12 @@
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4are
supported.
//
We
shall
support
other
types
later
if
necessary.
#
if
defined
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
F
double
#
else
#
define
F
float
...
...
modules/ocl/src/opencl/imgproc_threshold.cl
View file @
96121a66
...
...
@@ -43,7 +43,7 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
...
...
modules/ocl/src/opencl/imgproc_warpAffine.cl
View file @
96121a66
...
...
@@ -47,11 +47,11 @@
//warpAffine
kernel
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
typedef
double
F
;
typedef
double4
F4
;
...
...
modules/ocl/src/opencl/imgproc_warpPerspective.cl
View file @
96121a66
...
...
@@ -47,11 +47,11 @@
//wrapPerspective
kernel
//support
data
types:
CV_8UC1,
CV_8UC4,
CV_32FC1,
CV_32FC4,
and
three
interpolation
methods:
NN,
Linear,
Cubic.
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
typedef
double
F
;
typedef
double4
F4
;
...
...
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
View file @
96121a66
...
...
@@ -61,35 +61,6 @@
#
define
my_comp
(
x,y
)
((
x
)
<
(
y
))
#
endif
///////////// parallel merge sort ///////////////
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
{
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
uint firstIndex = left;
uint lastIndex = right;
// This loops through [firstIndex, lastIndex)
// Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
// this while loop will be divergent within a wavefront
while( firstIndex < lastIndex )
{
K_T dataVal = data[ firstIndex ];
// This branch will create divergent wavefronts
if( my_comp( dataVal, searchVal ) )
{
firstIndex = firstIndex+1;
}
else
{
break;
}
}
return firstIndex;
}
//
This
implements
a
binary
search
routine
to
look
for
an
'insertion
point
'
in
a
sequence,
denoted
//
by
a
base
pointer
and
left
and
right
index
for
a
particular
candidate
value.
The
comparison
operator
is
//
passed
as
a
functor
parameter
my_comp
...
...
modules/ocl/src/opencl/knearest.cl
View file @
96121a66
...
...
@@ -42,8 +42,13 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE
double
#
else
#
define
TYPE
float
...
...
modules/ocl/src/opencl/match_template.cl
View file @
96121a66
...
...
@@ -43,14 +43,12 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE_IMAGE_SQSUM
double
#
else
#
define
TYPE_IMAGE_SQSUM
float
...
...
modules/ocl/src/opencl/merge_mat.cl
View file @
96121a66
...
...
@@ -43,15 +43,19 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////optimized
code
using
vector
roi//////////////////////////
////////////vector
fuction
name
format:
merge_vector_C
(
channels
number
)
D_
(
data
type
depth
)
//////
////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
merge_vector_C2_D0
(
__global
uchar
*mat_dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mat_src0,
int
src0_step,
int
src0_offset,
__global
uchar
*mat_src1,
int
src1_step,
int
src1_offset,
...
...
modules/ocl/src/opencl/moments.cl
View file @
96121a66
...
...
@@ -44,11 +44,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
typedef
double
T
;
#
else
...
...
modules/ocl/src/opencl/operator_convertTo.cl
View file @
96121a66
...
...
@@ -35,8 +35,12 @@
//
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
convert_to
(
__global
const
srcT*
restrict
srcMat,
...
...
modules/ocl/src/opencl/operator_copyToM.cl
View file @
96121a66
...
...
@@ -34,11 +34,11 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/operator_setTo.cl
View file @
96121a66
...
...
@@ -34,11 +34,11 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/operator_setToM.cl
View file @
96121a66
...
...
@@ -34,11 +34,11 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
96121a66
...
...
@@ -45,8 +45,6 @@
//
//M*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
BUFFER
64
#
define
BUFFER2
BUFFER>>1
#
ifndef
WAVE_SIZE
...
...
modules/ocl/src/opencl/split_mat.cl
View file @
96121a66
...
...
@@ -38,9 +38,14 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
if
DATA_DEPTH
==
0
#
define
BASE_TYPE
uchar
...
...
modules/ocl/src/opencl/stereobm.cl
View file @
96121a66
...
...
@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
{
unsigned
int
cache
=
cols[0]
;
#
pragma
unroll
for
(
int
i
=
1
; i <= winsz; i++)
cache
+=
cols[i]
;
...
...
modules/ocl/src/opencl/stereobp.cl
View file @
96121a66
...
...
@@ -45,13 +45,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
ifdef
T_FLOAT
...
...
modules/ocl/src/opencl/stereocsbp.cl
View file @
96121a66
...
...
@@ -44,19 +44,10 @@
//
//M*/
#
ifndef
FLT_MAX
#
define
FLT_MAX
CL_FLT_MAX
#
endif
#
ifndef
SHRT_MAX
#
define
SHRT_MAX
CL_SHORT_MAX
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
...
@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
}
}
}
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
...
@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
...
...
@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
///////////////////////////////////////////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////////////////////////////////////////////
inline
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
{
...
...
@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
}
inline
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
{
...
...
@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
}
}
}
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
...
...
@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
...
...
@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
...
...
@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
}
}
}
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
...
...
@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
...
...
@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
}
}
static
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_new,
__global
const
float
*data_cost_cur,
__global
const
float
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
{
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost_cur[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
...
...
@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
cdisp_step1,
cdisp_step2
)
;
}
}
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
...
...
modules/ocl/src/opencl/svm.cl
View file @
96121a66
...
...
@@ -33,11 +33,12 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
TYPE
double
#
else
...
...
@@ -53,7 +54,6 @@
#
else
#
define
POW
(
X,Y
)
X
#
endif
#
define
FLT_MAX
3.402823466e+38F
#
define
MAX_VAL
(
FLT_MAX*1e-3
)
__kernel
void
svm_linear
(
__global
float*
src,
int
src_step,
__global
float*
src2,
int
src2_step,
__global
TYPE*
dst,
int
dst_step,
int
src_rows,
int
src2_cols,
...
...
modules/ocl/src/opencl/tvl1flow.cl
View file @
96121a66
...
...
@@ -44,7 +44,7 @@
//M*/
__kernel
void
centeredGradientKernel
(
__global
const
float*
src,
int
src_col,
int
src_row,
int
src_step,
__global
float*
dx,
__global
float*
dy,
int
dx_step
)
__global
float*
dx,
__global
float*
dy,
int
dx_step
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
...
...
@@ -53,13 +53,6 @@ __global float* dx, __global float* dy, int dx_step)
{
int
src_x1
=
(
x
+
1
)
<
(
src_col
-1
)
?
(
x
+
1
)
:
(
src_col
-
1
)
;
int
src_x2
=
(
x
-
1
)
>
0
?
(
x
-1
)
:
0
;
//if
(
src[y
*
src_step
+
src_x1]
==
src[y
*
src_step+
src_x2]
)
//{
//
printf
(
"y = %d\n"
,
y
)
;
//
printf
(
"src_x1 = %d\n"
,
src_x1
)
;
//
printf
(
"src_x2 = %d\n"
,
src_x2
)
;
//}
dx[y
*
dx_step+
x]
=
0.5f
*
(
src[y
*
src_step
+
src_x1]
-
src[y
*
src_step+
src_x2]
)
;
int
src_y1
=
(
y+1
)
<
(
src_row
-
1
)
?
(
y
+
1
)
:
(
src_row
-
1
)
;
...
...
@@ -97,24 +90,24 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
int
u2_offset_x,
int
u2_offset_y
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
I0_col&&y
<
I0_row
)
{
//
const
float
u1Val
=
u1
(
y,
x
)
;
const
float
u1Val
=
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
//
const
float
u2Val
=
u2
(
y,
x
)
;
const
float
u2Val
=
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
//float
u1Val
=
u1
(
y,
x
)
;
float
u1Val
=
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
//float
u2Val
=
u2
(
y,
x
)
;
float
u2Val
=
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
const
float
wx
=
x
+
u1Val
;
const
float
wy
=
y
+
u2Val
;
float
wx
=
x
+
u1Val
;
float
wy
=
y
+
u2Val
;
const
int
xmin
=
ceil
(
wx
-
2.0f
)
;
const
int
xmax
=
floor
(
wx
+
2.0f
)
;
int
xmin
=
ceil
(
wx
-
2.0f
)
;
int
xmax
=
floor
(
wx
+
2.0f
)
;
const
int
ymin
=
ceil
(
wy
-
2.0f
)
;
const
int
ymax
=
floor
(
wy
+
2.0f
)
;
int
ymin
=
ceil
(
wy
-
2.0f
)
;
int
ymax
=
floor
(
wy
+
2.0f
)
;
float
sum
=
0.0f
;
float
sumx
=
0.0f
;
...
...
@@ -126,7 +119,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
{
for
(
int
cx
=
xmin
; cx <= xmax; ++cx)
{
const
float
w
=
bicubicCoeff
(
wx
-
cx
)
*
bicubicCoeff
(
wy
-
cy
)
;
float
w
=
bicubicCoeff
(
wx
-
cx
)
*
bicubicCoeff
(
wy
-
cy
)
;
//sum
+=
w
*
tex2D
(
tex_I1
,
cx,
cy
)
;
int2
cood
=
(
int2
)(
cx,
cy
)
;
...
...
@@ -140,30 +133,30 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
}
}
const
float
coeff
=
1.0f
/
wsum
;
float
coeff
=
1.0f
/
wsum
;
const
float
I1wVal
=
sum
*
coeff
;
const
float
I1wxVal
=
sumx
*
coeff
;
const
float
I1wyVal
=
sumy
*
coeff
;
float
I1wVal
=
sum
*
coeff
;
float
I1wxVal
=
sumx
*
coeff
;
float
I1wyVal
=
sumy
*
coeff
;
I1w[y
*
I1w_step
+
x]
=
I1wVal
;
I1wx[y
*
I1w_step
+
x]
=
I1wxVal
;
I1wy[y
*
I1w_step
+
x]
=
I1wyVal
;
const
float
Ix2
=
I1wxVal
*
I1wxVal
;
const
float
Iy2
=
I1wyVal
*
I1wyVal
;
float
Ix2
=
I1wxVal
*
I1wxVal
;
float
Iy2
=
I1wyVal
*
I1wyVal
;
//
store
the
|Grad(I1)|^2
grad[y
*
I1w_step
+
x]
=
Ix2
+
Iy2
;
//
compute
the
constant
part
of
the
rho
function
const
float
I0Val
=
I0[y
*
I0_step
+
x]
;
float
I0Val
=
I0[y
*
I0_step
+
x]
;
rho[y
*
I1w_step
+
x]
=
I1wVal
-
I1wxVal
*
u1Val
-
I1wyVal
*
u2Val
-
I0Val
;
}
}
static
float
readImage
(
__global
const
float
*image,
const
int
x,
const
int
y,
const
int
rows,
const
int
cols,
const
int
elemCntPerRow
)
static
float
readImage
(
__global
float
*image,
int
x,
int
y,
int
rows,
int
cols,
int
elemCntPerRow
)
{
int
i0
=
clamp
(
x,
0
,
cols
-
1
)
;
int
j0
=
clamp
(
y,
0
,
rows
-
1
)
;
...
...
@@ -185,24 +178,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
int
I1_step,
int
I1x_step
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
I0_col&&y
<
I0_row
)
{
//
const
float
u1Val
=
u1
(
y,
x
)
;
const
float
u1Val
=
u1[y
*
u1_step
+
x]
;
//
const
float
u2Val
=
u2
(
y,
x
)
;
const
float
u2Val
=
u2[y
*
u2_step
+
x]
;
//float
u1Val
=
u1
(
y,
x
)
;
float
u1Val
=
u1[y
*
u1_step
+
x]
;
//float
u2Val
=
u2
(
y,
x
)
;
float
u2Val
=
u2[y
*
u2_step
+
x]
;
const
float
wx
=
x
+
u1Val
;
const
float
wy
=
y
+
u2Val
;
float
wx
=
x
+
u1Val
;
float
wy
=
y
+
u2Val
;
const
int
xmin
=
ceil
(
wx
-
2.0f
)
;
const
int
xmax
=
floor
(
wx
+
2.0f
)
;
int
xmin
=
ceil
(
wx
-
2.0f
)
;
int
xmax
=
floor
(
wx
+
2.0f
)
;
const
int
ymin
=
ceil
(
wy
-
2.0f
)
;
const
int
ymax
=
floor
(
wy
+
2.0f
)
;
int
ymin
=
ceil
(
wy
-
2.0f
)
;
int
ymax
=
floor
(
wy
+
2.0f
)
;
float
sum
=
0.0f
;
float
sumx
=
0.0f
;
...
...
@@ -213,7 +206,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
{
for
(
int
cx
=
xmin
; cx <= xmax; ++cx)
{
const
float
w
=
bicubicCoeff
(
wx
-
cx
)
*
bicubicCoeff
(
wy
-
cy
)
;
float
w
=
bicubicCoeff
(
wx
-
cx
)
*
bicubicCoeff
(
wy
-
cy
)
;
int2
cood
=
(
int2
)(
cx,
cy
)
;
sum
+=
w
*
readImage
(
tex_I1,
cood.x,
cood.y,
I0_col,
I0_row,
I1_step
)
;
...
...
@@ -223,24 +216,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
}
}
const
float
coeff
=
1.0f
/
wsum
;
float
coeff
=
1.0f
/
wsum
;
const
float
I1wVal
=
sum
*
coeff
;
const
float
I1wxVal
=
sumx
*
coeff
;
const
float
I1wyVal
=
sumy
*
coeff
;
float
I1wVal
=
sum
*
coeff
;
float
I1wxVal
=
sumx
*
coeff
;
float
I1wyVal
=
sumy
*
coeff
;
I1w[y
*
I1w_step
+
x]
=
I1wVal
;
I1wx[y
*
I1w_step
+
x]
=
I1wxVal
;
I1wy[y
*
I1w_step
+
x]
=
I1wyVal
;
const
float
Ix2
=
I1wxVal
*
I1wxVal
;
const
float
Iy2
=
I1wyVal
*
I1wyVal
;
float
Ix2
=
I1wxVal
*
I1wxVal
;
float
Iy2
=
I1wyVal
*
I1wyVal
;
//
store
the
|Grad(I1)|^2
grad[y
*
I1w_step
+
x]
=
Ix2
+
Iy2
;
//
compute
the
constant
part
of
the
rho
function
const
float
I0Val
=
I0[y
*
I0_step
+
x]
;
float
I0Val
=
I0[y
*
I0_step
+
x]
;
rho[y
*
I1w_step
+
x]
=
I1wVal
-
I1wxVal
*
u1Val
-
I1wyVal
*
u2Val
-
I0Val
;
}
...
...
@@ -253,38 +246,35 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
__global
float*
p12,
__global
float*
p21,
__global
float*
p22,
const
float
taut,
float
taut,
int
u2_step,
int
u1_offset_x,
int
u1_offset_y,
int
u2_offset_x,
int
u2_offset_y
)
{
//const
int
x
=
blockIdx.x
*
blockDim.x
+
threadIdx.x
;
//const
int
y
=
blockIdx.y
*
blockDim.y
+
threadIdx.y
;
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
u1_col
&&
y
<
u1_row
)
{
int
src_x1
=
(
x
+
1
)
<
(
u1_col
-
1
)
?
(
x
+
1
)
:
(
u1_col
-
1
)
;
const
float
u1x
=
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
src_x1
+
u1_offset_x]
-
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
float
u1x
=
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
src_x1
+
u1_offset_x]
-
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
int
src_y1
=
(
y
+
1
)
<
(
u1_row
-
1
)
?
(
y
+
1
)
:
(
u1_row
-
1
)
;
const
float
u1y
=
u1[
(
src_y1
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
-
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
float
u1y
=
u1[
(
src_y1
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
-
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
int
src_x2
=
(
x
+
1
)
<
(
u1_col
-
1
)
?
(
x
+
1
)
:
(
u1_col
-
1
)
;
const
float
u2x
=
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
src_x2
+
u2_offset_x]
-
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
float
u2x
=
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
src_x2
+
u2_offset_x]
-
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
int
src_y2
=
(
y
+
1
)
<
(
u1_row
-
1
)
?
(
y
+
1
)
:
(
u1_row
-
1
)
;
const
float
u2y
=
u2[
(
src_y2
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
-
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
float
u2y
=
u2[
(
src_y2
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
-
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
const
float
g1
=
hypot
(
u1x,
u1y
)
;
const
float
g2
=
hypot
(
u2x,
u2y
)
;
float
g1
=
hypot
(
u1x,
u1y
)
;
float
g2
=
hypot
(
u2x,
u2y
)
;
const
float
ng1
=
1.0f
+
taut
*
g1
;
const
float
ng2
=
1.0f
+
taut
*
g2
;
float
ng1
=
1.0f
+
taut
*
g1
;
float
ng2
=
1.0f
+
taut
*
g2
;
p11[y
*
p11_step
+
x]
=
(
p11[y
*
p11_step
+
x]
+
taut
*
u1x
)
/
ng1
;
p12[y
*
p11_step
+
x]
=
(
p12[y
*
p11_step
+
x]
+
taut
*
u1y
)
/
ng1
;
...
...
@@ -299,8 +289,8 @@ static float divergence(__global const float* v1, __global const float* v2, int
if
(
x
>
0
&&
y
>
0
)
{
const
float
v1x
=
v1[y
*
v1_step
+
x]
-
v1[y
*
v1_step
+
x
-
1]
;
const
float
v2y
=
v2[y
*
v2_step
+
x]
-
v2[
(
y
-
1
)
*
v2_step
+
x]
;
float
v1x
=
v1[y
*
v1_step
+
x]
-
v1[y
*
v1_step
+
x
-
1]
;
float
v2y
=
v2[y
*
v2_step
+
x]
-
v2[
(
y
-
1
)
*
v2_step
+
x]
;
return
v1x
+
v2y
;
}
else
...
...
@@ -328,30 +318,25 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
__global
const
float*
p22,
/*int
p22_step,*/
__global
float*
u1,
int
u1_step,
__global
float*
u2,
__global
float*
error,
const
float
l_t,
const
float
theta,
int
u2_step,
__global
float*
error,
float
l_t,
float
theta,
int
u2_step,
int
u1_offset_x,
int
u1_offset_y,
int
u2_offset_x,
int
u2_offset_y,
char
calc_error
)
{
//const
int
x
=
blockIdx.x
*
blockDim.x
+
threadIdx.x
;
//const
int
y
=
blockIdx.y
*
blockDim.y
+
threadIdx.y
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
I1wx_col
&&
y
<
I1wx_row
)
{
const
float
I1wxVal
=
I1wx[y
*
I1wx_step
+
x]
;
const
float
I1wyVal
=
I1wy[y
*
I1wx_step
+
x]
;
const
float
gradVal
=
grad[y
*
I1wx_step
+
x]
;
const
float
u1OldVal
=
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
const
float
u2OldVal
=
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
float
I1wxVal
=
I1wx[y
*
I1wx_step
+
x]
;
float
I1wyVal
=
I1wy[y
*
I1wx_step
+
x]
;
float
gradVal
=
grad[y
*
I1wx_step
+
x]
;
float
u1OldVal
=
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
;
float
u2OldVal
=
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
;
const
float
rho
=
rho_c[y
*
I1wx_step
+
x]
+
(
I1wxVal
*
u1OldVal
+
I1wyVal
*
u2OldVal
)
;
float
rho
=
rho_c[y
*
I1wx_step
+
x]
+
(
I1wxVal
*
u1OldVal
+
I1wyVal
*
u2OldVal
)
;
//
estimate
the
values
of
the
variable
(
v1,
v2
)
(
thresholding
operator
TH
)
...
...
@@ -370,31 +355,31 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
}
else
if
(
gradVal
>
1.192092896e-07f
)
{
const
float
fi
=
-rho
/
gradVal
;
float
fi
=
-rho
/
gradVal
;
d1
=
fi
*
I1wxVal
;
d2
=
fi
*
I1wyVal
;
}
const
float
v1
=
u1OldVal
+
d1
;
const
float
v2
=
u2OldVal
+
d2
;
float
v1
=
u1OldVal
+
d1
;
float
v2
=
u2OldVal
+
d2
;
//
compute
the
divergence
of
the
dual
variable
(
p1,
p2
)
const
float
div_p1
=
divergence
(
p11,
p12,
y,
x,
I1wx_step,
I1wx_step
)
;
const
float
div_p2
=
divergence
(
p21,
p22,
y,
x,
I1wx_step,
I1wx_step
)
;
float
div_p1
=
divergence
(
p11,
p12,
y,
x,
I1wx_step,
I1wx_step
)
;
float
div_p2
=
divergence
(
p21,
p22,
y,
x,
I1wx_step,
I1wx_step
)
;
//
estimate
the
values
of
the
optical
flow
(
u1,
u2
)
const
float
u1NewVal
=
v1
+
theta
*
div_p1
;
const
float
u2NewVal
=
v2
+
theta
*
div_p2
;
float
u1NewVal
=
v1
+
theta
*
div_p1
;
float
u2NewVal
=
v2
+
theta
*
div_p2
;
u1[
(
y
+
u1_offset_y
)
*
u1_step
+
x
+
u1_offset_x]
=
u1NewVal
;
u2[
(
y
+
u2_offset_y
)
*
u2_step
+
x
+
u2_offset_x]
=
u2NewVal
;
if
(
calc_error
)
{
const
float
n1
=
(
u1OldVal
-
u1NewVal
)
*
(
u1OldVal
-
u1NewVal
)
;
const
float
n2
=
(
u2OldVal
-
u2NewVal
)
*
(
u2OldVal
-
u2NewVal
)
;
float
n1
=
(
u1OldVal
-
u1NewVal
)
*
(
u1OldVal
-
u1NewVal
)
;
float
n2
=
(
u2OldVal
-
u2NewVal
)
*
(
u2OldVal
-
u2NewVal
)
;
error[y
*
I1wx_step
+
x]
=
n1
+
n2
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment