Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a9a39a3a
Commit
a9a39a3a
authored
Nov 01, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
Nov 01, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1730 from ilya-lavrenov:ocl_kern_warn
parents
b1bed14e
1f9ab2e0
Hide whitespace changes
Inline
Side-by-side
Showing
39 changed files
with
578 additions
and
604 deletions
+578
-604
arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+0
-2
arithm_cartToPolar.cl
modules/ocl/src/opencl/arithm_cartToPolar.cl
+13
-9
arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+100
-100
arithm_minMaxLoc.cl
modules/ocl/src/opencl/arithm_minMaxLoc.cl
+101
-105
arithm_minMaxLoc_mask.cl
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+91
-91
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+4
-4
arithm_phase.cl
modules/ocl/src/opencl/arithm_phase.cl
+10
-8
arithm_polarToCart.cl
modules/ocl/src/opencl/arithm_polarToCart.cl
+7
-6
arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+29
-29
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+6
-12
cvt_color.cl
modules/ocl/src/opencl/cvt_color.cl
+4
-2
haarobjectdetect.cl
modules/ocl/src/opencl/haarobjectdetect.cl
+0
-1
haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+0
-1
imgproc_calcHarris.cl
modules/ocl/src/opencl/imgproc_calcHarris.cl
+4
-5
imgproc_calcMinEigenVal.cl
modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
+4
-5
imgproc_canny.cl
modules/ocl/src/opencl/imgproc_canny.cl
+7
-5
imgproc_clahe.cl
modules/ocl/src/opencl/imgproc_clahe.cl
+6
-6
imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+12
-12
imgproc_integral_sum.cl
modules/ocl/src/opencl/imgproc_integral_sum.cl
+8
-8
imgproc_median.cl
modules/ocl/src/opencl/imgproc_median.cl
+16
-16
imgproc_remap.cl
modules/ocl/src/opencl/imgproc_remap.cl
+6
-12
imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+12
-12
imgproc_threshold.cl
modules/ocl/src/opencl/imgproc_threshold.cl
+10
-10
imgproc_warpAffine.cl
modules/ocl/src/opencl/imgproc_warpAffine.cl
+4
-4
imgproc_warpPerspective.cl
modules/ocl/src/opencl/imgproc_warpPerspective.cl
+17
-17
kernel_sort_by_key.cl
modules/ocl/src/opencl/kernel_sort_by_key.cl
+0
-1
kernel_stablesort_by_key.cl
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+9
-12
knearest.cl
modules/ocl/src/opencl/knearest.cl
+34
-39
match_template.cl
modules/ocl/src/opencl/match_template.cl
+2
-4
meanShift.cl
modules/ocl/src/opencl/meanShift.cl
+1
-2
moments.cl
modules/ocl/src/opencl/moments.cl
+5
-19
objdetect_hog.cl
modules/ocl/src/opencl/objdetect_hog.cl
+1
-3
optical_flow_farneback.cl
modules/ocl/src/opencl/optical_flow_farneback.cl
+2
-2
pyr_down.cl
modules/ocl/src/opencl/pyr_down.cl
+6
-6
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+15
-14
split_mat.cl
modules/ocl/src/opencl/split_mat.cl
+2
-2
stereobm.cl
modules/ocl/src/opencl/stereobm.cl
+7
-7
stereobp.cl
modules/ocl/src/opencl/stereobp.cl
+4
-4
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+19
-7
No files found.
modules/ocl/src/opencl/arithm_bitwise_not.cl
View file @
a9a39a3a
...
@@ -67,7 +67,6 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
...
@@ -67,7 +67,6 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
x
=
x
<<
2
;
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
...
@@ -97,7 +96,6 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
...
@@ -97,7 +96,6 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
x
=
x
<<
2
;
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
...
...
modules/ocl/src/opencl/arithm_cartToPolar.cl
View file @
a9a39a3a
...
@@ -44,14 +44,18 @@
...
@@ -44,14 +44,18 @@
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
define
CV_PI
3.1415926535897932384626433832795
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52
#
endif
#
else
#
define
CV_PI
3.1415926535897932384626433832795f
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52f
#
endif
#
endif
#
endif
#
define
CV_PI
3.1415926535897932384626433832795
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52
#
endif
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
...
@@ -82,9 +86,9 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
...
@@ -82,9 +86,9 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
float
tmp
=
y
>=
0
?
0
:
CV_PI*2
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
tmp
=
x
<
0
?
CV_PI
:
tmp
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5
:
CV_PI*1.5
;
float
tmp1
=
y
>=
0
?
CV_PI*0.5
f
:
CV_PI*1.5f
;
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
(
float
)
DBL_EPSILON
)
+
tmp
:
cartToPolar
=
y2
<=
x2
?
x*y/
(
x2
+
0.28f*y2
+
DBL_EPSILON
)
+
tmp
:
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
(
float
)
DBL_EPSILON
)
;
tmp1
-
x*y/
(
y2
+
0.28f*x2
+
DBL_EPSILON
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
cartToPolar
=
angInDegree
==
0
?
cartToPolar
:
cartToPolar
*
(
float
)(
180/CV_PI
)
;
...
...
modules/ocl/src/opencl/arithm_minMax.cl
View file @
a9a39a3a
...
@@ -66,53 +66,53 @@
...
@@ -66,53 +66,53 @@
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
__global
T
*
dst,
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
)
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
temp
=
src[idx]
;
minval
=
min
(
minval,
temp
)
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
}
if
(
lid
>
127
)
if
(
lid
>
127
)
{
{
localmem_min[lid
-
128]
=
minval
;
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
localmem_max[lid
-
128]
=
maxval
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
if
(
lid
<
128
)
{
{
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
{
if
(
lid
<
lsize
)
if
(
lid
<
lsize
)
{
{
int
lid2
=
lsize
+
lid
;
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lid
==
0
)
if
(
lid
==
0
)
{
{
dst[gid]
=
localmem_min[0]
;
dst[gid]
=
localmem_min[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
}
}
}
}
__kernel
void
arithm_op_minMax_mask
(
__global
const
T
*
src,
__global
T
*
dst,
__kernel
void
arithm_op_minMax_mask
(
__global
const
T
*
src,
__global
T
*
dst,
...
@@ -120,57 +120,57 @@ __kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
...
@@ -120,57 +120,57 @@ __kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
int
elemnum,
int
groupnum,
int
elemnum,
int
groupnum,
const
__global
uchar
*
mask,
int
minvalid_cols,
int
moffset
)
const
__global
uchar
*
mask,
int
minvalid_cols,
int
moffset
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
unsigned
int
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
int
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
if
(
mask[midx]
)
if
(
mask[midx]
)
{
{
temp
=
src[idx]
;
temp
=
src[idx]
;
minval
=
min
(
minval,
temp
)
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
}
}
}
if
(
lid
>
127
)
if
(
lid
>
127
)
{
{
localmem_min[lid
-
128]
=
minval
;
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
localmem_max[lid
-
128]
=
maxval
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
if
(
lid
<
128
)
{
{
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
{
if
(
lid
<
lsize
)
if
(
lid
<
lsize
)
{
{
int
lid2
=
lsize
+
lid
;
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lid
==
0
)
if
(
lid
==
0
)
{
{
dst[gid]
=
localmem_min[0]
;
dst[gid]
=
localmem_min[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
}
}
}
}
modules/ocl/src/opencl/arithm_minMaxLoc.cl
View file @
a9a39a3a
...
@@ -137,118 +137,114 @@
...
@@ -137,118 +137,114 @@
#
define
repeat_e
(
a
)
a.s3
=
a.s0
;a.s2 = a.s0;a.s1 = a.s0;
#
define
repeat_e
(
a
)
a.s3
=
a.s0
;a.s2 = a.s0;a.s1 = a.s0;
#
endif
#
endif
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_extended_atomics:enable
/**************************************Array
minMax**************************************/
/**************************************Array
minMax**************************************/
__kernel
void
arithm_op_minMaxLoc
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
__kernel
void
arithm_op_minMaxLoc
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
__global
VEC_TYPE
*src,
__global
RES_TYPE
*dst
)
__global
VEC_TYPE
*src,
__global
RES_TYPE
*dst
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
VEC_TYPE
localmem_max[128],
localmem_min[128]
;
__local
VEC_TYPE
localmem_max[128],
localmem_min[128]
;
VEC_TYPE
minval,
maxval,
temp
;
VEC_TYPE
minval,
maxval,
temp
;
__local
VEC_TYPE_LOC
localmem_maxloc[128],
localmem_minloc[128]
;
__local
VEC_TYPE_LOC
localmem_maxloc[128],
localmem_minloc[128]
;
VEC_TYPE_LOC
minloc,
maxloc,
temploc,
negative
=
-1
;
VEC_TYPE_LOC
minloc,
maxloc,
temploc,
negative
=
-1
;
int
idx_c
;
int
idx_c
;
if
(
id
<
elemnum
)
if
(
id
<
elemnum
)
{
{
temp
=
src[idx]
;
temp
=
src[idx]
;
idx_c
=
idx
<<
2
;
idx_c
=
idx
<<
2
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,
idx_c
+
1
,
idx_c
+
2
,
idx_c
+
3
)
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,
idx_c
+
1
,
idx_c
+
2
,
idx_c
+
3
)
;
if
(
id
%
cols
==
0
)
if
(
id
%
cols
==
0
)
{
{
repeat_s
(
temp
)
;
repeat_s
(
temp
)
;
repeat_s
(
temploc
)
;
repeat_s
(
temploc
)
;
}
}
if
(
id
%
cols
==
cols
-
1
)
if
(
id
%
cols
==
cols
-
1
)
{
{
repeat_e
(
temp
)
;
repeat_e
(
temp
)
;
repeat_e
(
temploc
)
;
repeat_e
(
temploc
)
;
}
}
minval
=
temp
;
minval
=
temp
;
maxval
=
temp
;
maxval
=
temp
;
minloc
=
temploc
;
minloc
=
temploc
;
maxloc
=
temploc
;
maxloc
=
temploc
;
}
}
else
else
{
{
minval
=
MAX_VAL
;
minval
=
MAX_VAL
;
maxval
=
MIN_VAL
;
maxval
=
MIN_VAL
;
minloc
=
negative
;
minloc
=
negative
;
maxloc
=
negative
;
maxloc
=
negative
;
}
}
int
grainSize
=
(
groupnum
<<
8
)
;
int
grainSize
=
(
groupnum
<<
8
)
;
for
(
id
=
id
+
grainSize
; id < elemnum; id = id + grainSize)
for
(
id
=
id
+
grainSize
; id < elemnum; id = id + grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
temp
=
src[idx]
;
idx_c
=
idx
<<
2
;
idx_c
=
idx
<<
2
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,
idx_c+1,
idx_c+2,
idx_c+3
)
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,
idx_c+1,
idx_c+2,
idx_c+3
)
;
if
(
id
%
cols
==
0
)
if
(
id
%
cols
==
0
)
{
{
repeat_s
(
temp
)
;
repeat_s
(
temp
)
;
repeat_s
(
temploc
)
;
repeat_s
(
temploc
)
;
}
}
if
(
id
%
cols
==
cols
-
1
)
if
(
id
%
cols
==
cols
-
1
)
{
{
repeat_e
(
temp
)
;
repeat_e
(
temp
)
;
repeat_e
(
temploc
)
;
repeat_e
(
temploc
)
;
}
}
minval
=
min
(
minval,
temp
)
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
minloc
=
CONDITION_FUNC
(
minval
==
temp,
temploc,
minloc
)
;
minloc
=
CONDITION_FUNC
(
minval
==
temp,
temploc,
minloc
)
;
maxloc
=
CONDITION_FUNC
(
maxval
==
temp,
temploc,
maxloc
)
;
maxloc
=
CONDITION_FUNC
(
maxval
==
temp,
temploc,
maxloc
)
;
}
}
if
(
lid
>
127
)
if
(
lid
>
127
)
{
{
localmem_min[lid
-
128]
=
minval
;
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
localmem_max[lid
-
128]
=
maxval
;
localmem_minloc[lid
-
128]
=
minloc
;
localmem_minloc[lid
-
128]
=
minloc
;
localmem_maxloc[lid
-
128]
=
maxloc
;
localmem_maxloc[lid
-
128]
=
maxloc
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
if
(
lid
<
128
)
{
{
localmem_min[lid]
=
min
(
minval,localmem_min[lid]
)
;
localmem_min[lid]
=
min
(
minval,localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,localmem_max[lid]
)
;
localmem_max[lid]
=
max
(
maxval,localmem_max[lid]
)
;
localmem_minloc[lid]
=
CONDITION_FUNC
(
localmem_min[lid]
==
minval,
minloc,
localmem_minloc[lid]
)
;
localmem_minloc[lid]
=
CONDITION_FUNC
(
localmem_min[lid]
==
minval,
minloc,
localmem_minloc[lid]
)
;
localmem_maxloc[lid]
=
CONDITION_FUNC
(
localmem_max[lid]
==
maxval,
maxloc,
localmem_maxloc[lid]
)
;
localmem_maxloc[lid]
=
CONDITION_FUNC
(
localmem_max[lid]
==
maxval,
maxloc,
localmem_maxloc[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
{
if
(
lid
<
lsize
)
if
(
lid
<
lsize
)
{
{
int
lid2
=
lsize
+
lid
;
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
localmem_minloc[lid]
=
CONDITION_FUNC
(
localmem_min[lid]
==
localmem_min[lid2],
localmem_minloc[lid2],
localmem_minloc[lid]
)
;
localmem_minloc[lid]
=
CONDITION_FUNC
(
localmem_min[lid]
==
localmem_min[lid2],
localmem_minloc[lid2],
localmem_minloc[lid]
)
;
localmem_maxloc[lid]
=
CONDITION_FUNC
(
localmem_max[lid]
==
localmem_max[lid2],
localmem_maxloc[lid2],
localmem_maxloc[lid]
)
;
localmem_maxloc[lid]
=
CONDITION_FUNC
(
localmem_max[lid]
==
localmem_max[lid2],
localmem_maxloc[lid2],
localmem_maxloc[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lid
==
0
)
if
(
lid
==
0
)
{
{
dst[gid]
=
CONVERT_RES_TYPE
(
localmem_min[0]
)
;
dst[gid]
=
CONVERT_RES_TYPE
(
localmem_min[0]
)
;
dst[gid
+
groupnum]
=
CONVERT_RES_TYPE
(
localmem_max[0]
)
;
dst[gid
+
groupnum]
=
CONVERT_RES_TYPE
(
localmem_max[0]
)
;
dst[gid
+
2
*
groupnum]
=
CONVERT_RES_TYPE
(
localmem_minloc[0]
)
;
dst[gid
+
2
*
groupnum]
=
CONVERT_RES_TYPE
(
localmem_minloc[0]
)
;
dst[gid
+
3
*
groupnum]
=
CONVERT_RES_TYPE
(
localmem_maxloc[0]
)
;
dst[gid
+
3
*
groupnum]
=
CONVERT_RES_TYPE
(
localmem_maxloc[0]
)
;
}
}
}
}
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
View file @
a9a39a3a
...
@@ -147,96 +147,96 @@
...
@@ -147,96 +147,96 @@
__kernel
void
arithm_op_minMaxLoc_mask
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum,__global
TYPE
*src,
__kernel
void
arithm_op_minMaxLoc_mask
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum,__global
TYPE
*src,
int
minvalid_cols,int
moffset,__global
uchar
*mask,__global
RES_TYPE
*dst
)
int
minvalid_cols,int
moffset,__global
uchar
*mask,__global
RES_TYPE
*dst
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
id
+
(
id
/
cols
)
*
invalid_cols
;
int
idx
=
id
+
(
id
/
cols
)
*
invalid_cols
;
unsigned
int
midx
=
id
+
(
id
/
cols
)
*
minvalid_cols
;
int
midx
=
id
+
(
id
/
cols
)
*
minvalid_cols
;
__local
VEC_TYPE
lm_max[128],lm_min[128]
;
__local
VEC_TYPE
lm_max[128],lm_min[128]
;
VEC_TYPE
minval,maxval,temp,m_temp
;
VEC_TYPE
minval,maxval,temp,m_temp
;
__local
VEC_TYPE_LOC
lm_maxloc[128],lm_minloc[128]
;
__local
VEC_TYPE_LOC
lm_maxloc[128],lm_minloc[128]
;
VEC_TYPE_LOC
minloc,maxloc,temploc,negative
=
-1
,
one
=
1
,
zero
=
0
;
VEC_TYPE_LOC
minloc,maxloc,temploc,negative
=
-1
,
one
=
1
,
zero
=
0
;
if
(
id
<
elemnum
)
if
(
id
<
elemnum
)
{
{
temp
=
vload4
(
idx,
&src[offset]
)
;
temp
=
vload4
(
idx,
&src[offset]
)
;
m_temp
=
CONVERT_TYPE
(
vload4
(
midx,&mask[moffset]
))
;
m_temp
=
CONVERT_TYPE
(
vload4
(
midx,&mask[moffset]
))
;
int
idx_c
=
(
idx
<<
2
)
+
offset
;
int
idx_c
=
(
idx
<<
2
)
+
offset
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,idx_c+1,idx_c+2,idx_c+3
)
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,idx_c+1,idx_c+2,idx_c+3
)
;
if
(
id
%
cols
==
cols
-
1
)
if
(
id
%
cols
==
cols
-
1
)
{
{
repeat_me
(
m_temp
)
;
repeat_me
(
m_temp
)
;
repeat_e
(
temploc
)
;
repeat_e
(
temploc
)
;
}
}
minval
=
m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
(
VEC_TYPE
)
MAX_VAL
;
minval
=
m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
(
VEC_TYPE
)
MAX_VAL
;
maxval
=
m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
(
VEC_TYPE
)
MIN_VAL
;
maxval
=
m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
(
VEC_TYPE
)
MIN_VAL
;
minloc
=
CONDITION_FUNC
(
m_temp
!=
(
VEC_TYPE
)
0
,
temploc
,
negative
)
;
minloc
=
CONDITION_FUNC
(
m_temp
!=
(
VEC_TYPE
)
0
,
temploc
,
negative
)
;
maxloc
=
minloc
;
maxloc
=
minloc
;
}
}
else
else
{
{
minval
=
MAX_VAL
;
minval
=
MAX_VAL
;
maxval
=
MIN_VAL
;
maxval
=
MIN_VAL
;
minloc
=
negative
;
minloc
=
negative
;
maxloc
=
negative
;
maxloc
=
negative
;
}
}
for
(
id=id
+
(
groupnum
<<
8
)
; id < elemnum;id = id + (groupnum << 8))
for
(
id=id
+
(
groupnum
<<
8
)
; id < elemnum;id = id + (groupnum << 8))
{
{
idx
=
id
+
(
id
/
cols
)
*
invalid_cols
;
idx
=
id
+
(
id
/
cols
)
*
invalid_cols
;
midx
=
id
+
(
id
/
cols
)
*
minvalid_cols
;
midx
=
id
+
(
id
/
cols
)
*
minvalid_cols
;
temp
=
vload4
(
idx,
&src[offset]
)
;
temp
=
vload4
(
idx,
&src[offset]
)
;
m_temp
=
CONVERT_TYPE
(
vload4
(
midx,&mask[moffset]
))
;
m_temp
=
CONVERT_TYPE
(
vload4
(
midx,&mask[moffset]
))
;
int
idx_c
=
(
idx
<<
2
)
+
offset
;
int
idx_c
=
(
idx
<<
2
)
+
offset
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,idx_c+1,idx_c+2,idx_c+3
)
;
temploc
=
(
VEC_TYPE_LOC
)(
idx_c,idx_c+1,idx_c+2,idx_c+3
)
;
if
(
id
%
cols
==
cols
-
1
)
if
(
id
%
cols
==
cols
-
1
)
{
{
repeat_me
(
m_temp
)
;
repeat_me
(
m_temp
)
;
repeat_e
(
temploc
)
;
repeat_e
(
temploc
)
;
}
}
minval
=
min
(
minval,m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
minval
)
;
minval
=
min
(
minval,m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
minval
)
;
maxval
=
max
(
maxval,m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
maxval
)
;
maxval
=
max
(
maxval,m_temp
!=
(
VEC_TYPE
)
0
?
temp
:
maxval
)
;
minloc
=
CONDITION_FUNC
((
minval
==
temp
)
&&
(
m_temp
!=
(
VEC_TYPE
)
0
)
,
temploc
,
minloc
)
;
minloc
=
CONDITION_FUNC
((
minval
==
temp
)
&&
(
m_temp
!=
(
VEC_TYPE
)
0
)
,
temploc
,
minloc
)
;
maxloc
=
CONDITION_FUNC
((
maxval
==
temp
)
&&
(
m_temp
!=
(
VEC_TYPE
)
0
)
,
temploc
,
maxloc
)
;
maxloc
=
CONDITION_FUNC
((
maxval
==
temp
)
&&
(
m_temp
!=
(
VEC_TYPE
)
0
)
,
temploc
,
maxloc
)
;
}
}
if
(
lid
>
127
)
if
(
lid
>
127
)
{
{
lm_min[lid
-
128]
=
minval
;
lm_min[lid
-
128]
=
minval
;
lm_max[lid
-
128]
=
maxval
;
lm_max[lid
-
128]
=
maxval
;
lm_minloc[lid
-
128]
=
minloc
;
lm_minloc[lid
-
128]
=
minloc
;
lm_maxloc[lid
-
128]
=
maxloc
;
lm_maxloc[lid
-
128]
=
maxloc
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
if
(
lid
<
128
)
{
{
lm_min[lid]
=
min
(
minval,lm_min[lid]
)
;
lm_min[lid]
=
min
(
minval,lm_min[lid]
)
;
lm_max[lid]
=
max
(
maxval,lm_max[lid]
)
;
lm_max[lid]
=
max
(
maxval,lm_max[lid]
)
;
VEC_TYPE
con_min
=
CONVERT_TYPE
(
minloc
!=
negative
?
one
:
zero
)
;
VEC_TYPE
con_min
=
CONVERT_TYPE
(
minloc
!=
negative
?
one
:
zero
)
;
VEC_TYPE
con_max
=
CONVERT_TYPE
(
maxloc
!=
negative
?
one
:
zero
)
;
VEC_TYPE
con_max
=
CONVERT_TYPE
(
maxloc
!=
negative
?
one
:
zero
)
;
lm_minloc[lid]
=
CONDITION_FUNC
((
lm_min[lid]
==
minval
)
&&
(
con_min
!=
(
VEC_TYPE
)
0
)
,
minloc
,
lm_minloc[lid]
)
;
lm_minloc[lid]
=
CONDITION_FUNC
((
lm_min[lid]
==
minval
)
&&
(
con_min
!=
(
VEC_TYPE
)
0
)
,
minloc
,
lm_minloc[lid]
)
;
lm_maxloc[lid]
=
CONDITION_FUNC
((
lm_max[lid]
==
maxval
)
&&
(
con_max
!=
(
VEC_TYPE
)
0
)
,
maxloc
,
lm_maxloc[lid]
)
;
lm_maxloc[lid]
=
CONDITION_FUNC
((
lm_max[lid]
==
maxval
)
&&
(
con_max
!=
(
VEC_TYPE
)
0
)
,
maxloc
,
lm_maxloc[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
{
if
(
lid
<
lsize
)
if
(
lid
<
lsize
)
{
{
int
lid2
=
lsize
+
lid
;
int
lid2
=
lsize
+
lid
;
lm_min[lid]
=
min
(
lm_min[lid]
,
lm_min[lid2]
)
;
lm_min[lid]
=
min
(
lm_min[lid]
,
lm_min[lid2]
)
;
lm_max[lid]
=
max
(
lm_max[lid]
,
lm_max[lid2]
)
;
lm_max[lid]
=
max
(
lm_max[lid]
,
lm_max[lid2]
)
;
VEC_TYPE
con_min
=
CONVERT_TYPE
(
lm_minloc[lid2]
!=
negative
?
one
:
zero
)
;
VEC_TYPE
con_min
=
CONVERT_TYPE
(
lm_minloc[lid2]
!=
negative
?
one
:
zero
)
;
VEC_TYPE
con_max
=
CONVERT_TYPE
(
lm_maxloc[lid2]
!=
negative
?
one
:
zero
)
;
VEC_TYPE
con_max
=
CONVERT_TYPE
(
lm_maxloc[lid2]
!=
negative
?
one
:
zero
)
;
lm_minloc[lid]
=
lm_minloc[lid]
=
CONDITION_FUNC
((
lm_min[lid]
==
lm_min[lid2]
)
&&
(
con_min
!=
(
VEC_TYPE
)
0
)
,
lm_minloc[lid2]
,
lm_minloc[lid]
)
;
CONDITION_FUNC
((
lm_min[lid]
==
lm_min[lid2]
)
&&
(
con_min
!=
(
VEC_TYPE
)
0
)
,
lm_minloc[lid2]
,
lm_minloc[lid]
)
;
lm_maxloc[lid]
=
lm_maxloc[lid]
=
CONDITION_FUNC
((
lm_max[lid]
==
lm_max[lid2]
)
&&
(
con_max
!=
(
VEC_TYPE
)
0
)
,
lm_maxloc[lid2]
,
lm_maxloc[lid]
)
;
CONDITION_FUNC
((
lm_max[lid]
==
lm_max[lid2]
)
&&
(
con_max
!=
(
VEC_TYPE
)
0
)
,
lm_maxloc[lid2]
,
lm_maxloc[lid]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lid
==
0
)
if
(
lid
==
0
)
{
{
dst[gid]
=
CONVERT_RES_TYPE
(
lm_min[0]
)
;
dst[gid]
=
CONVERT_RES_TYPE
(
lm_min[0]
)
;
dst[gid
+
groupnum]
=
CONVERT_RES_TYPE
(
lm_max[0]
)
;
dst[gid
+
groupnum]
=
CONVERT_RES_TYPE
(
lm_max[0]
)
;
dst[gid
+
2
*
groupnum]
=
CONVERT_RES_TYPE
(
lm_minloc[0]
)
;
dst[gid
+
2
*
groupnum]
=
CONVERT_RES_TYPE
(
lm_minloc[0]
)
;
dst[gid
+
3
*
groupnum]
=
CONVERT_RES_TYPE
(
lm_maxloc[0]
)
;
dst[gid
+
3
*
groupnum]
=
CONVERT_RES_TYPE
(
lm_maxloc[0]
)
;
}
}
}
}
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
a9a39a3a
...
@@ -55,11 +55,11 @@
...
@@ -55,11 +55,11 @@
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
__global
srcT
*src,
__global
dstT
*dst
)
__global
srcT
*src,
__global
dstT
*dst
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_nonzero[128]
;
__local
dstT
localmem_nonzero[128]
;
dstT
nonzero
=
(
dstT
)(
0
)
;
dstT
nonzero
=
(
dstT
)(
0
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
...
...
modules/ocl/src/opencl/arithm_phase.cl
View file @
a9a39a3a
...
@@ -45,15 +45,17 @@
...
@@ -45,15 +45,17 @@
//
//
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
define
CV_PI
3.1415926535897932384626433832795
#
define
CV_2PI
2*CV_PI
#
else
#
define
CV_PI
3.1415926535897932384626433832795f
#
define
CV_2PI
2*CV_PI
#
endif
#
endif
#
endif
#
define
CV_PI
3.1415926535898
#
define
CV_2PI
2*3.1415926535898
/**************************************phase
inradians**************************************/
/**************************************phase
inradians**************************************/
...
...
modules/ocl/src/opencl/arithm_polarToCart.cl
View file @
a9a39a3a
...
@@ -43,12 +43,13 @@
...
@@ -43,12 +43,13 @@
//
//
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
DOUBLE_SUPPORT
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
define
CV_PI
3.1415926535897932384626433832795
#
else
#
define
CV_PI
3.1415926535897932384626433832795f
#
endif
#
endif
#
define
CV_PI
3.1415926535897932384626433832795
/////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////polarToCart
with
magnitude//////////////////////////////
/////////////////////////////////////////polarToCart
with
magnitude//////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
...
@@ -72,7 +73,7 @@ __kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, in
...
@@ -72,7 +73,7 @@ __kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, in
float
x
=
*
((
__global
float
*
)((
__global
char
*
)
src1
+
src1_index
))
;
float
x
=
*
((
__global
float
*
)((
__global
char
*
)
src1
+
src1_index
))
;
float
y
=
*
((
__global
float
*
)((
__global
char
*
)
src2
+
src2_index
))
;
float
y
=
*
((
__global
float
*
)((
__global
char
*
)
src2
+
src2_index
))
;
float
ascale
=
CV_PI/180.0
;
float
ascale
=
CV_PI/180.0
f
;
float
alpha
=
angInDegree
==
1
?
y
*
ascale
:
y
;
float
alpha
=
angInDegree
==
1
?
y
*
ascale
:
y
;
float
a
=
cos
(
alpha
)
*
x
;
float
a
=
cos
(
alpha
)
*
x
;
float
b
=
sin
(
alpha
)
*
x
;
float
b
=
sin
(
alpha
)
*
x
;
...
@@ -134,7 +135,7 @@ __kernel void arithm_polarToCart_D5 (__global float *src, int src_step, int sr
...
@@ -134,7 +135,7 @@ __kernel void arithm_polarToCart_D5 (__global float *src, int src_step, int sr
float
y
=
*
((
__global
float
*
)((
__global
char
*
)
src
+
src_index
))
;
float
y
=
*
((
__global
float
*
)((
__global
char
*
)
src
+
src_index
))
;
float
ascale
=
CV_PI/180.0
;
float
ascale
=
CV_PI/180.0
f
;
float
alpha
=
angInDegree
==
1
?
y
*
ascale
:
y
;
float
alpha
=
angInDegree
==
1
?
y
*
ascale
:
y
;
float
a
=
cos
(
alpha
)
;
float
a
=
cos
(
alpha
)
;
float
b
=
sin
(
alpha
)
;
float
b
=
sin
(
alpha
)
;
...
...
modules/ocl/src/opencl/arithm_sum.cl
View file @
a9a39a3a
...
@@ -66,39 +66,39 @@
...
@@ -66,39 +66,39 @@
__kernel
void
arithm_op_sum
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum,
__kernel
void
arithm_op_sum
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum,
__global
srcT
*src,
__global
dstT
*dst
)
__global
srcT
*src,
__global
dstT
*dst
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_sum[128]
;
__local
dstT
localmem_sum[128]
;
dstT
sum
=
(
dstT
)(
0
)
,
temp
;
dstT
sum
=
(
dstT
)(
0
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
convertToDstT
(
src[idx]
)
;
temp
=
convertToDstT
(
src[idx]
)
;
FUNC
(
temp,
sum
)
;
FUNC
(
temp,
sum
)
;
}
}
if
(
lid
>
127
)
if
(
lid
>
127
)
localmem_sum[lid
-
128]
=
sum
;
localmem_sum[lid
-
128]
=
sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
if
(
lid
<
128
)
localmem_sum[lid]
=
sum
+
localmem_sum[lid]
;
localmem_sum[lid]
=
sum
+
localmem_sum[lid]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
{
if
(
lid
<
lsize
)
if
(
lid
<
lsize
)
{
{
int
lid2
=
lsize
+
lid
;
int
lid2
=
lsize
+
lid
;
localmem_sum[lid]
=
localmem_sum[lid]
+
localmem_sum[lid2]
;
localmem_sum[lid]
=
localmem_sum[lid]
+
localmem_sum[lid2]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lid
==
0
)
if
(
lid
==
0
)
dst[gid]
=
localmem_sum[0]
;
dst[gid]
=
localmem_sum[0]
;
}
}
modules/ocl/src/opencl/brute_force_match.cl
View file @
a9a39a3a
...
@@ -64,7 +64,7 @@
...
@@ -64,7 +64,7 @@
#
endif
#
endif
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
int
bit1Count
(
int
v
)
static
int
bit1Count
(
int
v
)
{
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
...
@@ -95,7 +95,7 @@ typedef int result_type;
...
@@ -95,7 +95,7 @@ typedef int result_type;
#
define
DIST_RES
(
x
)
(
x
)
#
define
DIST_RES
(
x
)
(
x
)
#
endif
#
endif
result_type
reduce_block
(
static
result_type
reduce_block
(
__local
value_type
*s_query,
__local
value_type
*s_query,
__local
value_type
*s_train,
__local
value_type
*s_train,
int
lidx,
int
lidx,
...
@@ -113,7 +113,7 @@ result_type reduce_block(
...
@@ -113,7 +113,7 @@ result_type reduce_block(
return
DIST_RES
(
result
)
;
return
DIST_RES
(
result
)
;
}
}
result_type
reduce_block_match
(
static
result_type
reduce_block_match
(
__local
value_type
*s_query,
__local
value_type
*s_query,
__local
value_type
*s_train,
__local
value_type
*s_train,
int
lidx,
int
lidx,
...
@@ -131,7 +131,7 @@ result_type reduce_block_match(
...
@@ -131,7 +131,7 @@ result_type reduce_block_match(
return
(
result
)
;
return
(
result
)
;
}
}
result_type
reduce_multi_block
(
static
result_type
reduce_multi_block
(
__local
value_type
*s_query,
__local
value_type
*s_query,
__local
value_type
*s_train,
__local
value_type
*s_train,
int
block_index,
int
block_index,
...
@@ -187,7 +187,6 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -187,7 +187,6 @@ __kernel void BruteForceMatch_UnrollMatch(
int
myBestTrainIdx
=
-1
;
int
myBestTrainIdx
=
-1
;
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
,
endt
=
(
train_rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
; t < endt; t++)
for
(
int
t
=
0
,
endt
=
(
train_rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
; t < endt; t++)
{
{
result_type
result
=
0
;
result_type
result
=
0
;
...
@@ -212,7 +211,6 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -212,7 +211,6 @@ __kernel void BruteForceMatch_UnrollMatch(
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
//bestImgIdx
=
imgIdx
;
myBestDistance
=
result
;
myBestDistance
=
result
;
myBestTrainIdx
=
trainIdx
;
myBestTrainIdx
=
trainIdx
;
}
}
...
@@ -304,7 +302,6 @@ __kernel void BruteForceMatch_Match(
...
@@ -304,7 +302,6 @@ __kernel void BruteForceMatch_Match(
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance
/*&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance
/*&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
//myBestImgidx
=
imgIdx
;
myBestDistance
=
result
;
myBestDistance
=
result
;
myBestTrainIdx
=
trainIdx
;
myBestTrainIdx
=
trainIdx
;
}
}
...
@@ -390,11 +387,10 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
...
@@ -390,11 +387,10 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
convert_float
(
result
)
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
convert_float
(
result
)
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx/*,
(
unsigned
int
)
-1*/
)
;
int
ind
=
atom_inc
(
nMatches
+
queryIdx/*,
(
unsigned
int
)
-1*/
)
;
if
(
ind
<
bestTrainIdx_cols
)
if
(
ind
<
bestTrainIdx_cols
)
{
{
//bestImgIdx
=
imgIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
bestDistance[queryIdx
*
(
ostep
/
sizeof
(
float
))
+
ind]
=
result
;
bestDistance[queryIdx
*
(
ostep
/
sizeof
(
float
))
+
ind]
=
result
;
}
}
...
@@ -451,11 +447,10 @@ __kernel void BruteForceMatch_RadiusMatch(
...
@@ -451,11 +447,10 @@ __kernel void BruteForceMatch_RadiusMatch(
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
convert_float
(
result
)
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
convert_float
(
result
)
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx
)
;
int
ind
=
atom_inc
(
nMatches
+
queryIdx
)
;
if
(
ind
<
bestTrainIdx_cols
)
if
(
ind
<
bestTrainIdx_cols
)
{
{
//bestImgIdx
=
imgIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
bestDistance[queryIdx
*
(
ostep
/
sizeof
(
float
))
+
ind]
=
result
;
bestDistance[queryIdx
*
(
ostep
/
sizeof
(
float
))
+
ind]
=
result
;
}
}
...
@@ -498,7 +493,6 @@ __kernel void BruteForceMatch_knnUnrollMatch(
...
@@ -498,7 +493,6 @@ __kernel void BruteForceMatch_knnUnrollMatch(
int
myBestTrainIdx2
=
-1
;
int
myBestTrainIdx2
=
-1
;
//loopUnrolledCached
//loopUnrolledCached
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
for
(
int
t
=
0
; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
{
result_type
result
=
0
;
result_type
result
=
0
;
...
...
modules/ocl/src/opencl/cvt_color.cl
View file @
a9a39a3a
...
@@ -50,8 +50,6 @@
...
@@ -50,8 +50,6 @@
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
DATA_TYPE
UNDEFINED
#
if
defined
(
DEPTH_0
)
#
if
defined
(
DEPTH_0
)
#
define
DATA_TYPE
uchar
#
define
DATA_TYPE
uchar
#
define
MAX_NUM
255
#
define
MAX_NUM
255
...
@@ -73,6 +71,10 @@
...
@@ -73,6 +71,10 @@
#
define
SAT_CAST
(
num
)
(
num
)
#
define
SAT_CAST
(
num
)
(
num
)
#
endif
#
endif
#
ifndef
DATA_TYPE
#
define
DATA_TYPE
UNDEFINED
#
endif
#
define
CV_DESCALE
(
x,n
)
(((
x
)
+
(
1
<<
((
n
)
-1
)))
>>
(
n
))
#
define
CV_DESCALE
(
x,n
)
(((
x
)
+
(
1
<<
((
n
)
-1
)))
>>
(
n
))
enum
enum
...
...
modules/ocl/src/opencl/haarobjectdetect.cl
View file @
a9a39a3a
...
@@ -37,7 +37,6 @@
...
@@ -37,7 +37,6 @@
//
//
//
//
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
CV_HAAR_FEATURE_MAX
3
#
define
CV_HAAR_FEATURE_MAX
3
#
define
calc_sum
(
rect,offset
)
(
sum[
(
rect
)
.
p0+offset]
-
sum[
(
rect
)
.
p1+offset]
-
sum[
(
rect
)
.
p2+offset]
+
sum[
(
rect
)
.
p3+offset]
)
#
define
calc_sum
(
rect,offset
)
(
sum[
(
rect
)
.
p0+offset]
-
sum[
(
rect
)
.
p1+offset]
-
sum[
(
rect
)
.
p2+offset]
+
sum[
(
rect
)
.
p3+offset]
)
...
...
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
View file @
a9a39a3a
...
@@ -120,7 +120,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
...
@@ -120,7 +120,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int
grpidx
=
get_group_id
(
0
)
;
int
grpidx
=
get_group_id
(
0
)
;
int
lclidx
=
get_local_id
(
0
)
;
int
lclidx
=
get_local_id
(
0
)
;
int
lclidy
=
get_local_id
(
1
)
;
int
lclidy
=
get_local_id
(
1
)
;
int
lcl_sz
=
mul24
(
grpszx,
grpszy
)
;
int
lcl_id
=
mad24
(
lclidy,
grpszx,
lclidx
)
;
int
lcl_id
=
mad24
(
lclidy,
grpszx,
lclidx
)
;
__local
int
glboutindex[1]
;
__local
int
glboutindex[1]
;
__local
int
lclcount[1]
;
__local
int
lclcount[1]
;
...
...
modules/ocl/src/opencl/imgproc_calcHarris.cl
View file @
a9a39a3a
...
@@ -99,7 +99,6 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
...
@@ -99,7 +99,6 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
int
col
=
get_local_id
(
0
)
;
int
col
=
get_local_id
(
0
)
;
int
gX
=
get_group_id
(
0
)
;
int
gX
=
get_group_id
(
0
)
;
int
gY
=
get_group_id
(
1
)
;
int
gY
=
get_group_id
(
1
)
;
int
glx
=
get_global_id
(
0
)
;
int
gly
=
get_global_id
(
1
)
;
int
gly
=
get_global_id
(
1
)
;
int
dx_x_off
=
(
dx_offset
%
dx_step
)
>>
2
;
int
dx_x_off
=
(
dx_offset
%
dx_step
)
>>
2
;
...
@@ -126,11 +125,11 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
...
@@ -126,11 +125,11 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
{
{
dx_con
=
dx_startX+col
>=
0
&&
dx_startX+col
<
dx_whole_cols
&&
dx_startY+i
>=
0
&&
dx_startY+i
<
dx_whole_rows
;
dx_con
=
dx_startX+col
>=
0
&&
dx_startX+col
<
dx_whole_cols
&&
dx_startY+i
>=
0
&&
dx_startY+i
<
dx_whole_rows
;
dx_s
=
Dx[
(
dx_startY+i
)
*
(
dx_step>>2
)
+
(
dx_startX+col
)
]
;
dx_s
=
Dx[
(
dx_startY+i
)
*
(
dx_step>>2
)
+
(
dx_startX+col
)
]
;
dx_data[i]
=
dx_con
?
dx_s
:
0.0
;
dx_data[i]
=
dx_con
?
dx_s
:
0.0
f
;
dy_con
=
dy_startX+col
>=
0
&&
dy_startX+col
<
dy_whole_cols
&&
dy_startY+i
>=
0
&&
dy_startY+i
<
dy_whole_rows
;
dy_con
=
dy_startX+col
>=
0
&&
dy_startX+col
<
dy_whole_cols
&&
dy_startY+i
>=
0
&&
dy_startY+i
<
dy_whole_rows
;
dy_s
=
Dy[
(
dy_startY+i
)
*
(
dy_step>>2
)
+
(
dy_startX+col
)
]
;
dy_s
=
Dy[
(
dy_startY+i
)
*
(
dy_step>>2
)
+
(
dy_startX+col
)
]
;
dy_data[i]
=
dy_con
?
dy_s
:
0.0
;
dy_data[i]
=
dy_con
?
dy_s
:
0.0
f
;
data[0][i]
=
dx_data[i]
*
dx_data[i]
;
data[0][i]
=
dx_data[i]
*
dx_data[i]
;
data[1][i]
=
dx_data[i]
*
dy_data[i]
;
data[1][i]
=
dx_data[i]
*
dy_data[i]
;
...
@@ -155,7 +154,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
...
@@ -155,7 +154,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
data[2][i]
=
dy_data[i]
*
dy_data[i]
;
data[2][i]
=
dy_data[i]
*
dy_data[i]
;
}
}
#
endif
#
endif
float
sum0
=
0.0
,
sum1
=
0.0
,
sum2
=
0.0
;
float
sum0
=
0.0
f,
sum1
=
0.0f,
sum2
=
0.0f
;
for
(
int
i=1
; i < ksY; i++)
for
(
int
i=1
; i < ksY; i++)
{
{
sum0
+=
data[0][i]
;
sum0
+=
data[0][i]
;
...
@@ -183,7 +182,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
...
@@ -183,7 +182,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posY
=
(
gly
<<
1
)
;
int
posY
=
(
gly
<<
1
)
;
int
till
=
(
ksX
+
1
)
%2
;
int
till
=
(
ksX
+
1
)
%2
;
float
tmp_sum[6]
=
{
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
}
;
float
tmp_sum[6]
=
{
0.0
f,
0.0f,
0.0f,
0.0f,
0.0f,
0.0f
}
;
for
(
int
k=0
; k<6; k++)
for
(
int
k=0
; k<6; k++)
for
(
int
i=-anX
; i<=anX - till; i++)
for
(
int
i=-anX
; i<=anX - till; i++)
tmp_sum[k]
+=
temp[k][col+i]
;
tmp_sum[k]
+=
temp[k][col+i]
;
...
...
modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
View file @
a9a39a3a
...
@@ -98,7 +98,6 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
...
@@ -98,7 +98,6 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
int
col
=
get_local_id
(
0
)
;
int
col
=
get_local_id
(
0
)
;
int
gX
=
get_group_id
(
0
)
;
int
gX
=
get_group_id
(
0
)
;
int
gY
=
get_group_id
(
1
)
;
int
gY
=
get_group_id
(
1
)
;
int
glx
=
get_global_id
(
0
)
;
int
gly
=
get_global_id
(
1
)
;
int
gly
=
get_global_id
(
1
)
;
int
dx_x_off
=
(
dx_offset
%
dx_step
)
>>
2
;
int
dx_x_off
=
(
dx_offset
%
dx_step
)
>>
2
;
...
@@ -125,10 +124,10 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
...
@@ -125,10 +124,10 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
{
{
dx_con
=
dx_startX+col
>=
0
&&
dx_startX+col
<
dx_whole_cols
&&
dx_startY+i
>=
0
&&
dx_startY+i
<
dx_whole_rows
;
dx_con
=
dx_startX+col
>=
0
&&
dx_startX+col
<
dx_whole_cols
&&
dx_startY+i
>=
0
&&
dx_startY+i
<
dx_whole_rows
;
dx_s
=
Dx[
(
dx_startY+i
)
*
(
dx_step>>2
)
+
(
dx_startX+col
)
]
;
dx_s
=
Dx[
(
dx_startY+i
)
*
(
dx_step>>2
)
+
(
dx_startX+col
)
]
;
dx_data[i]
=
dx_con
?
dx_s
:
0.0
;
dx_data[i]
=
dx_con
?
dx_s
:
0.0
f
;
dy_con
=
dy_startX+col
>=
0
&&
dy_startX+col
<
dy_whole_cols
&&
dy_startY+i
>=
0
&&
dy_startY+i
<
dy_whole_rows
;
dy_con
=
dy_startX+col
>=
0
&&
dy_startX+col
<
dy_whole_cols
&&
dy_startY+i
>=
0
&&
dy_startY+i
<
dy_whole_rows
;
dy_s
=
Dy[
(
dy_startY+i
)
*
(
dy_step>>2
)
+
(
dy_startX+col
)
]
;
dy_s
=
Dy[
(
dy_startY+i
)
*
(
dy_step>>2
)
+
(
dy_startX+col
)
]
;
dy_data[i]
=
dy_con
?
dy_s
:
0.0
;
dy_data[i]
=
dy_con
?
dy_s
:
0.0
f
;
data[0][i]
=
dx_data[i]
*
dx_data[i]
;
data[0][i]
=
dx_data[i]
*
dx_data[i]
;
data[1][i]
=
dx_data[i]
*
dy_data[i]
;
data[1][i]
=
dx_data[i]
*
dy_data[i]
;
data[2][i]
=
dy_data[i]
*
dy_data[i]
;
data[2][i]
=
dy_data[i]
*
dy_data[i]
;
...
@@ -152,7 +151,7 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
...
@@ -152,7 +151,7 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
data[2][i]
=
dy_data[i]
*
dy_data[i]
;
data[2][i]
=
dy_data[i]
*
dy_data[i]
;
}
}
#
endif
#
endif
float
sum0
=
0.0
,
sum1
=
0.0
,
sum2
=
0.0
;
float
sum0
=
0.0
f,
sum1
=
0.0f,
sum2
=
0.0f
;
for
(
int
i=1
; i < ksY; i++)
for
(
int
i=1
; i < ksY; i++)
{
{
sum0
+=
(
data[0][i]
)
;
sum0
+=
(
data[0][i]
)
;
...
@@ -180,7 +179,7 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
...
@@ -180,7 +179,7 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posY
=
(
gly
<<
1
)
;
int
posY
=
(
gly
<<
1
)
;
int
till
=
(
ksX
+
1
)
%2
;
int
till
=
(
ksX
+
1
)
%2
;
float
tmp_sum[6]
=
{
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
}
;
float
tmp_sum[6]
=
{
0.0
f,
0.0f
,
0.0f,
0.0f,
0.0f,
0.0f
}
;
for
(
int
k=0
; k<6; k++)
for
(
int
k=0
; k<6; k++)
for
(
int
i=-anX
; i<=anX - till; i++)
for
(
int
i=-anX
; i<=anX - till; i++)
tmp_sum[k]
+=
temp[k][col+i]
;
tmp_sum[k]
+=
temp[k][col+i]
;
...
...
modules/ocl/src/opencl/imgproc_canny.cl
View file @
a9a39a3a
...
@@ -43,9 +43,6 @@
...
@@ -43,9 +43,6 @@
//
//
//M*/
//M*/
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_local_int32_base_atomics
:
enable
#
ifdef
L2GRAD
#
ifdef
L2GRAD
inline
float
calc
(
int
x,
int
y
)
inline
float
calc
(
int
x,
int
y
)
{
{
...
@@ -248,7 +245,12 @@ void calcMagnitude
...
@@ -248,7 +245,12 @@ void calcMagnitude
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//
0.4142135623730950488016887242097
is
tan
(
22.5
)
//
0.4142135623730950488016887242097
is
tan
(
22.5
)
#
define
CANNY_SHIFT
15
#
define
CANNY_SHIFT
15
#
define
TG22
(
int
)(
0.4142135623730950488016887242097*
(
1<<CANNY_SHIFT
)
+
0.5
)
#
ifdef
DOUBLE_SUPPORT
#
define
TG22
(
int
)(
0.4142135623730950488016887242097*
(
1<<CANNY_SHIFT
)
+
0.5
)
#
else
#
define
TG22
(
int
)(
0.4142135623730950488016887242097f*
(
1<<CANNY_SHIFT
)
+
0.5f
)
#
endif
//First
pass
of
edge
detection
and
non-maximum
suppression
//First
pass
of
edge
detection
and
non-maximum
suppression
//
edgetype
is
set
to
for
each
pixel:
//
edgetype
is
set
to
for
each
pixel:
...
@@ -681,7 +683,7 @@ edgesHysteresisGlobal
...
@@ -681,7 +683,7 @@ edgesHysteresisGlobal
ind
=
s_ind
;
ind
=
s_ind
;
for
(
int
i
=
lidx
; i < s_counter; i += get_local_size(0))
for
(
int
i
=
lidx
; i <
(int)
s_counter; i += get_local_size(0))
{
{
st2[ind
+
i]
=
s_st[i]
;
st2[ind
+
i]
=
s_st[i]
;
}
}
...
...
modules/ocl/src/opencl/imgproc_clahe.cl
View file @
a9a39a3a
...
@@ -47,7 +47,7 @@
...
@@ -47,7 +47,7 @@
#
define
WAVE_SIZE
1
#
define
WAVE_SIZE
1
#
endif
#
endif
int
calc_lut
(
__local
int*
smem,
int
val,
int
tid
)
static
int
calc_lut
(
__local
int*
smem,
int
val,
int
tid
)
{
{
smem[tid]
=
val
;
smem[tid]
=
val
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -61,7 +61,7 @@ int calc_lut(__local int* smem, int val, int tid)
...
@@ -61,7 +61,7 @@ int calc_lut(__local int* smem, int val, int tid)
}
}
#
ifdef
CPU
#
ifdef
CPU
void
reduce
(
volatile
__local
int*
smem,
int
val,
int
tid
)
static
void
reduce
(
volatile
__local
int*
smem,
int
val,
int
tid
)
{
{
smem[tid]
=
val
;
smem[tid]
=
val
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -101,7 +101,7 @@ void reduce(volatile __local int* smem, int val, int tid)
...
@@ -101,7 +101,7 @@ void reduce(volatile __local int* smem, int val, int tid)
#
else
#
else
void
reduce
(
__local
volatile
int*
smem,
int
val,
int
tid
)
static
void
reduce
(
__local
volatile
int*
smem,
int
val,
int
tid
)
{
{
smem[tid]
=
val
;
smem[tid]
=
val
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -147,9 +147,9 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
...
@@ -147,9 +147,9 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
{
{
__local
int
smem[512]
;
__local
int
smem[512]
;
const
int
tx
=
get_group_id
(
0
)
;
int
tx
=
get_group_id
(
0
)
;
const
int
ty
=
get_group_id
(
1
)
;
int
ty
=
get_group_id
(
1
)
;
const
unsigned
int
tid
=
get_local_id
(
1
)
*
get_local_size
(
0
)
int
tid
=
get_local_id
(
1
)
*
get_local_size
(
0
)
+
get_local_id
(
0
)
;
+
get_local_id
(
0
)
;
smem[tid]
=
0
;
smem[tid]
=
0
;
...
...
modules/ocl/src/opencl/imgproc_integral.cl
View file @
a9a39a3a
...
@@ -63,8 +63,8 @@
...
@@ -63,8 +63,8 @@
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
float
*sqsum,
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
float
*sqsum,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
int4
src_t[2],
sum_t[2]
;
float4
sqsum_t[2]
;
float4
sqsum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
...
@@ -75,8 +75,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
...
@@ -75,8 +75,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
gid
=
gid
<<
1
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
{
src_t[0]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid,
(
uint
)
cols
-
1
)
]
)
:
0
)
;
src_t[0]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid,
cols
-
1
)
]
)
:
0
)
;
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
(
uint
)
cols
-
1
)
]
)
:
0
)
;
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
cols
-
1
)
]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
...
@@ -163,8 +163,8 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
...
@@ -163,8 +163,8 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
__global float *sqsum,int rows,int cols,int src_step,int sum_step,
__global float *sqsum,int rows,int cols,int src_step,int sum_step,
int sqsum_step,int sum_offset,int sqsum_offset)
int sqsum_step,int sum_offset,int sqsum_offset)
{
{
unsigned
int lid = get_local_id(0);
int lid = get_local_id(0);
unsigned
int gid = get_group_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
int4 src_t[2], sum_t[2];
float4 sqsrc_t[2],sqsum_t[2];
float4 sqsrc_t[2],sqsum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
...
@@ -279,8 +279,8 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
...
@@ -279,8 +279,8 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global float *sqsum,
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global float *sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
{
{
unsigned
int lid = get_local_id(0);
int lid = get_local_id(0);
unsigned
int gid = get_group_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
float4 src_t[2], sum_t[2];
float4 sqsum_t[2];
float4 sqsum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
...
@@ -291,8 +291,8 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
...
@@ -291,8 +291,8 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
gid = gid << 1;
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
for(int i = 0; i < rows; i =i + LSIZE_1)
{
{
src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid,
(uint)
cols - 1)]) : (float4)0);
src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid, cols - 1)]) : (float4)0);
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1,
(uint)
cols - 1)]) : (float4)0);
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
...
@@ -379,8 +379,8 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
...
@@ -379,8 +379,8 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
__global
float
*sqsum,int
rows,int
cols,int
src_step,int
sum_step,
__global
float
*sqsum,int
rows,int
cols,int
src_step,int
sum_step,
int
sqsum_step,int
sum_offset,int
sqsum_offset
)
int
sqsum_step,int
sum_offset,int
sqsum_offset
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
float4
src_t[2],
sum_t[2]
;
float4
sqsrc_t[2],sqsum_t[2]
;
float4
sqsrc_t[2],sqsum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
...
...
modules/ocl/src/opencl/imgproc_integral_sum.cl
View file @
a9a39a3a
...
@@ -64,8 +64,8 @@
...
@@ -64,8 +64,8 @@
kernel
void
integral_sum_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
kernel
void
integral_sum_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
int4
src_t[2],
sum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
int*
sum_p
;
__local
int*
sum_p
;
...
@@ -146,8 +146,8 @@ kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
...
@@ -146,8 +146,8 @@ kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
int rows,int cols,int src_step,int sum_step,
int rows,int cols,int src_step,int sum_step,
int sum_offset)
int sum_offset)
{
{
unsigned
int lid = get_local_id(0);
int lid = get_local_id(0);
unsigned
int gid = get_group_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
int4 src_t[2], sum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
__local int *sum_p;
...
@@ -239,8 +239,8 @@ kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
...
@@ -239,8 +239,8 @@ kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
kernel void integral_sum_cols_D5(__global uchar4 *src,__global float *sum ,
kernel void integral_sum_cols_D5(__global uchar4 *src,__global float *sum ,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
{
{
unsigned
int lid = get_local_id(0);
int lid = get_local_id(0);
unsigned
int gid = get_group_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
float4 src_t[2], sum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local float* sum_p;
__local float* sum_p;
...
@@ -321,8 +321,8 @@ kernel void integral_sum_rows_D5(__global float4 *srcsum,__global float *sum ,
...
@@ -321,8 +321,8 @@ kernel void integral_sum_rows_D5(__global float4 *srcsum,__global float *sum ,
int
rows,int
cols,int
src_step,int
sum_step,
int
rows,int
cols,int
src_step,int
sum_step,
int
sum_offset
)
int
sum_offset
)
{
{
unsigned
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
float4
src_t[2],
sum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
__local
float
*sum_p
;
...
...
modules/ocl/src/opencl/imgproc_median.cl
View file @
a9a39a3a
...
@@ -106,10 +106,10 @@ __kernel void medianFilter3_C4_D0(__global uchar4 * src, __global uchar4 * dst,
...
@@ -106,10 +106,10 @@ __kernel void medianFilter3_C4_D0(__global uchar4 * src, __global uchar4 * dst,
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter3_C1_D0
(
__global
uchar
*
src,
__global
uchar
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter3_C1_D0
(
__global
uchar
*
src,
__global
uchar
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -148,10 +148,10 @@ __kernel void medianFilter3_C1_D0(__global uchar * src, __global uchar * dst, i
...
@@ -148,10 +148,10 @@ __kernel void medianFilter3_C1_D0(__global uchar * src, __global uchar * dst, i
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter3_C1_D5
(
__global
float
*
src,
__global
float
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter3_C1_D5
(
__global
float
*
src,
__global
float
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -190,10 +190,10 @@ __kernel void medianFilter3_C1_D5(__global float * src, __global float * dst, i
...
@@ -190,10 +190,10 @@ __kernel void medianFilter3_C1_D5(__global float * src, __global float * dst, i
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter3_C4_D5
(
__global
float4
*
src,
__global
float4
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter3_C4_D5
(
__global
float4
*
src,
__global
float4
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -232,10 +232,10 @@ __kernel void medianFilter3_C4_D5(__global float4 * src, __global float4 * dst,
...
@@ -232,10 +232,10 @@ __kernel void medianFilter3_C4_D5(__global float4 * src, __global float4 * dst,
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p4
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter5_C4_D0
(
__global
uchar4
*
src,
__global
uchar4
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter5_C4_D0
(
__global
uchar4
*
src,
__global
uchar4
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -294,10 +294,10 @@ __kernel void medianFilter5_C4_D0(__global uchar4 * src, __global uchar4 * dst,
...
@@ -294,10 +294,10 @@ __kernel void medianFilter5_C4_D0(__global uchar4 * src, __global uchar4 * dst,
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter5_C1_D0
(
__global
uchar
*
src,
__global
uchar
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter5_C1_D0
(
__global
uchar
*
src,
__global
uchar
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -356,10 +356,10 @@ __kernel void medianFilter5_C1_D0(__global uchar * src, __global uchar * dst, i
...
@@ -356,10 +356,10 @@ __kernel void medianFilter5_C1_D0(__global uchar * src, __global uchar * dst, i
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter5_C4_D5
(
__global
float4
*
src,
__global
float4
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter5_C4_D5
(
__global
float4
*
src,
__global
float4
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -418,10 +418,10 @@ __kernel void medianFilter5_C4_D5(__global float4 * src, __global float4 * dst,
...
@@ -418,10 +418,10 @@ __kernel void medianFilter5_C4_D5(__global float4 * src, __global float4 * dst,
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
}
}
#
undef
op
(
a,b
)
#
undef
op
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
#
define
op
(
a,b
)
{mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter5_C1_D5
(
__global
float
*
src,
__global
float
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
__kernel
void
medianFilter5_C1_D5
(
__global
float
*
src,
__global
float
*
dst,
int
srcOffset,
int
dstOffset,
int
cols,
...
@@ -480,7 +480,7 @@ __kernel void medianFilter5_C1_D5(__global float * src, __global float * dst, i
...
@@ -480,7 +480,7 @@ __kernel void medianFilter5_C1_D5(__global float * src, __global float * dst, i
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
if
(
get_global_id
(
1
)
<rows
&&
get_global_id
(
0
)
<cols
)
if
(
(
int
)
get_global_id
(
1
)
<rows
&&
(
int
)
get_global_id
(
0
)
<cols
)
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
dst[dstOffset
+
get_global_id
(
1
)
*dstStep
+
get_global_id
(
0
)
]=p12
;
}
}
#
undef
op
(
a,b
)
#
undef
op
modules/ocl/src/opencl/imgproc_remap.cl
View file @
a9a39a3a
...
@@ -60,7 +60,7 @@
...
@@ -60,7 +60,7 @@
#
elif
defined
BORDER_REPLICATE
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
v2,
v
)
\
#
define
EXTRAPOLATE
(
v2,
v
)
\
{
\
{
\
v2
=
max
(
min
(
v2,
(
int2
)(
src_cols
-
1
,
src_rows
-
1
))
,
zero
)
; \
v2
=
max
(
min
(
v2,
(
int2
)(
src_cols
-
1
,
src_rows
-
1
))
,
(
int2
)(
0
)
)
; \
v
=
convertToWT
(
src[mad24
(
v2.y,
src_step,
v2.x
+
src_offset
)
]
)
; \
v
=
convertToWT
(
src[mad24
(
v2.y,
src_step,
v2.x
+
src_offset
)
]
)
; \
}
}
#
elif
defined
BORDER_WRAP
#
elif
defined
BORDER_WRAP
...
@@ -139,7 +139,9 @@ __kernel void remap_2_32FC1(__global const T * restrict src, __global T * dst,
...
@@ -139,7 +139,9 @@ __kernel void remap_2_32FC1(__global const T * restrict src, __global T * dst,
if
(
NEED_EXTRAPOLATION
(
gx,
gy
))
if
(
NEED_EXTRAPOLATION
(
gx,
gy
))
{
{
int2
gxy
=
(
int2
)(
gx,
gy
)
,
zero
=
(
int2
)(
0
)
;
#
ifndef
BORDER_CONSTANT
int2
gxy
=
(
int2
)(
gx,
gy
)
;
#
endif
EXTRAPOLATE
(
gxy,
dst[dstIdx]
)
;
EXTRAPOLATE
(
gxy,
dst[dstIdx]
)
;
}
}
else
else
...
@@ -167,10 +169,7 @@ __kernel void remap_32FC2(__global const T * restrict src, __global T * dst, __g
...
@@ -167,10 +169,7 @@ __kernel void remap_32FC2(__global const T * restrict src, __global T * dst, __g
int
gx
=
gxy.x,
gy
=
gxy.y
;
int
gx
=
gxy.x,
gy
=
gxy.y
;
if
(
NEED_EXTRAPOLATION
(
gx,
gy
))
if
(
NEED_EXTRAPOLATION
(
gx,
gy
))
{
EXTRAPOLATE
(
gxy,
dst[dstIdx]
)
int2
zero
=
(
int2
)(
0
)
;
EXTRAPOLATE
(
gxy,
dst[dstIdx]
)
;
}
else
else
{
{
int
srcIdx
=
mad24
(
gy,
src_step,
gx
+
src_offset
)
;
int
srcIdx
=
mad24
(
gy,
src_step,
gx
+
src_offset
)
;
...
@@ -196,10 +195,7 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
...
@@ -196,10 +195,7 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
int
gx
=
gxy.x,
gy
=
gxy.y
;
int
gx
=
gxy.x,
gy
=
gxy.y
;
if
(
NEED_EXTRAPOLATION
(
gx,
gy
))
if
(
NEED_EXTRAPOLATION
(
gx,
gy
))
{
EXTRAPOLATE
(
gxy,
dst[dstIdx]
)
int2
zero
=
(
int2
)(
0
)
;
EXTRAPOLATE
(
gxy,
dst[dstIdx]
)
;
}
else
else
{
{
int
srcIdx
=
mad24
(
gy,
src_step,
gx
+
src_offset
)
;
int
srcIdx
=
mad24
(
gy,
src_step,
gx
+
src_offset
)
;
...
@@ -231,7 +227,6 @@ __kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
...
@@ -231,7 +227,6 @@ __kernel void remap_2_32FC1(__global T const * restrict src, __global T * dst,
int2
map_dataB
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
)
;
int2
map_dataB
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
)
;
int2
map_dataC
=
(
int2
)(
map_dataA.x,
map_dataA.y
+
1
)
;
int2
map_dataC
=
(
int2
)(
map_dataA.x,
map_dataA.y
+
1
)
;
int2
map_dataD
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
+1
)
;
int2
map_dataD
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
+1
)
;
int2
zero
=
(
int2
)(
0
)
;
float2
_u
=
map_data
-
convert_float2
(
map_dataA
)
;
float2
_u
=
map_data
-
convert_float2
(
map_dataA
)
;
WT2
u
=
convertToWT2
(
convert_int2_rte
(
convertToWT2
(
_u
)
*
(
WT2
)
32
))
/
(
WT2
)
32
;
WT2
u
=
convertToWT2
(
convert_int2_rte
(
convertToWT2
(
_u
)
*
(
WT2
)
32
))
/
(
WT2
)
32
;
...
@@ -285,7 +280,6 @@ __kernel void remap_32FC2(__global T const * restrict src, __global T * dst,
...
@@ -285,7 +280,6 @@ __kernel void remap_32FC2(__global T const * restrict src, __global T * dst,
int2
map_dataB
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
)
;
int2
map_dataB
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
)
;
int2
map_dataC
=
(
int2
)(
map_dataA.x,
map_dataA.y
+
1
)
;
int2
map_dataC
=
(
int2
)(
map_dataA.x,
map_dataA.y
+
1
)
;
int2
map_dataD
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
+
1
)
;
int2
map_dataD
=
(
int2
)(
map_dataA.x
+
1
,
map_dataA.y
+
1
)
;
int2
zero
=
(
int2
)(
0
)
;
float2
_u
=
map_data
-
convert_float2
(
map_dataA
)
;
float2
_u
=
map_data
-
convert_float2
(
map_dataA
)
;
WT2
u
=
convertToWT2
(
convert_int2_rte
(
convertToWT2
(
_u
)
*
(
WT2
)
32
))
/
(
WT2
)
32
;
WT2
u
=
convertToWT2
(
convert_int2_rte
(
convertToWT2
(
_u
)
*
(
WT2
)
32
))
/
(
WT2
)
32
;
...
...
modules/ocl/src/opencl/imgproc_resize.cl
View file @
a9a39a3a
...
@@ -182,10 +182,10 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
...
@@ -182,10 +182,10 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
int
x
=
floor
(
sx
)
,
y
=
floor
(
sy
)
;
int
x
=
floor
(
sx
)
,
y
=
floor
(
sy
)
;
float
u
=
sx
-
x,
v
=
sy
-
y
;
float
u
=
sx
-
x,
v
=
sy
-
y
;
x<0
?
x=0,u=0
:
x,u
;
if
(
x<0
)
x=0,u=0
;
x>=src_cols
?
x=src_cols-1,u=0
:
x,u
;
if
(
x>=src_cols
)
x=src_cols-1,u=0
;
y<0
?
y=0,v=0
:
y,v
;
if
(
y<0
)
y=0,v=0
;
y>=src_rows
?
y=src_rows-1,v=0
:
y,v
;
if
(
y>=src_rows
)
y=src_rows-1,v=0
;
u
=
u
*
INTER_RESIZE_COEF_SCALE
;
u
=
u
*
INTER_RESIZE_COEF_SCALE
;
v
=
v
*
INTER_RESIZE_COEF_SCALE
;
v
=
v
*
INTER_RESIZE_COEF_SCALE
;
...
@@ -225,10 +225,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
...
@@ -225,10 +225,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
int
x
=
floor
(
sx
)
,
y
=
floor
(
sy
)
;
int
x
=
floor
(
sx
)
,
y
=
floor
(
sy
)
;
float
u
=
sx
-
x,
v
=
sy
-
y
;
float
u
=
sx
-
x,
v
=
sy
-
y
;
x<0
?
x=0,u=0
:
x,u
;
if
(
x<0
)
x=0,u=0
;
x>=src_cols
?
x=src_cols-1,u=0
:
x,u
;
if
(
x>=src_cols
)
x=src_cols-1,u=0
;
y<0
?
y=0,v=0
:
y,v
;
if
(
y<0
)
y=0,v=0
;
y>=src_rows
?
y=src_rows-1,v=0
:
y,v
;
if
(
y>=src_rows
)
y=src_rows-1,v=0
;
int
y_
=
INC
(
y,src_rows
)
;
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int
x_
=
INC
(
x,src_cols
)
;
...
@@ -264,10 +264,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
...
@@ -264,10 +264,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
int
x
=
floor
(
sx
)
,
y
=
floor
(
sy
)
;
int
x
=
floor
(
sx
)
,
y
=
floor
(
sy
)
;
float
u
=
sx
-
x,
v
=
sy
-
y
;
float
u
=
sx
-
x,
v
=
sy
-
y
;
x<0
?
x=0,u=0
:
x
;
if
(
x<0
)
x=0,u=0
;
x>=src_cols
?
x=src_cols-1,u=0
:
x
;
if
(
x>=src_cols
)
x=src_cols-1,u=0
;
y<0
?
y=0,v=0
:
y
;
if
(
y<0
)
y=0,v=0
;
y>=src_rows
?
y=src_rows-1,v=0
:
y
;
if
(
y>=src_rows
)
y=src_rows-1,v=0
;
int
y_
=
INC
(
y,src_rows
)
;
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int
x_
=
INC
(
x,src_cols
)
;
...
...
modules/ocl/src/opencl/imgproc_threshold.cl
View file @
a9a39a3a
...
@@ -71,18 +71,18 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
...
@@ -71,18 +71,18 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
#
else
#
else
VT
sdata
=
VLOADN
(
0
,
src
+
src_index
)
;
VT
sdata
=
VLOADN
(
0
,
src
+
src_index
)
;
#
endif
#
endif
VT
vthresh
=
(
VT
)(
thresh
)
,
zero
=
(
VT
)(
0
)
;
VT
vthresh
=
(
VT
)(
thresh
)
;
#
ifdef
THRESH_BINARY
#
ifdef
THRESH_BINARY
VT
vecValue
=
sdata
>
vthresh
?
max_val
:
zero
;
VT
vecValue
=
sdata
>
vthresh
?
max_val
:
(
VT
)(
0
)
;
#
elif
defined
THRESH_BINARY_INV
#
elif
defined
THRESH_BINARY_INV
VT
vecValue
=
sdata
>
vthresh
?
zero
:
max_val
;
VT
vecValue
=
sdata
>
vthresh
?
(
VT
)(
0
)
:
max_val
;
#
elif
defined
THRESH_TRUNC
#
elif
defined
THRESH_TRUNC
VT
vecValue
=
sdata
>
vthresh
?
thresh
:
sdata
;
VT
vecValue
=
sdata
>
vthresh
?
thresh
:
sdata
;
#
elif
defined
THRESH_TOZERO
#
elif
defined
THRESH_TOZERO
VT
vecValue
=
sdata
>
vthresh
?
sdata
:
zero
;
VT
vecValue
=
sdata
>
vthresh
?
sdata
:
(
VT
)(
0
)
;
#
elif
defined
THRESH_TOZERO_INV
#
elif
defined
THRESH_TOZERO_INV
VT
vecValue
=
sdata
>
vthresh
?
zero
:
sdata
;
VT
vecValue
=
sdata
>
vthresh
?
(
VT
)(
0
)
:
sdata
;
#
endif
#
endif
if
(
gx
+
VECSIZE
<=
max_index
)
if
(
gx
+
VECSIZE
<=
max_index
)
...
@@ -117,18 +117,18 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
...
@@ -117,18 +117,18 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
int
src_index
=
mad24
(
gy,
src_step,
src_offset
+
gx
)
;
int
src_index
=
mad24
(
gy,
src_step,
src_offset
+
gx
)
;
int
dst_index
=
mad24
(
gy,
dst_step,
dst_offset
+
gx
)
;
int
dst_index
=
mad24
(
gy,
dst_step,
dst_offset
+
gx
)
;
T
sdata
=
src[src_index]
,
zero
=
(
T
)(
0
)
;
T
sdata
=
src[src_index]
;
#
ifdef
THRESH_BINARY
#
ifdef
THRESH_BINARY
dst[dst_index]
=
sdata
>
thresh
?
max_val
:
zero
;
dst[dst_index]
=
sdata
>
thresh
?
max_val
:
(
T
)(
0
)
;
#
elif
defined
THRESH_BINARY_INV
#
elif
defined
THRESH_BINARY_INV
dst[dst_index]
=
sdata
>
thresh
?
zero
:
max_val
;
dst[dst_index]
=
sdata
>
thresh
?
(
T
)(
0
)
:
max_val
;
#
elif
defined
THRESH_TRUNC
#
elif
defined
THRESH_TRUNC
dst[dst_index]
=
sdata
>
thresh
?
thresh
:
sdata
;
dst[dst_index]
=
sdata
>
thresh
?
thresh
:
sdata
;
#
elif
defined
THRESH_TOZERO
#
elif
defined
THRESH_TOZERO
dst[dst_index]
=
sdata
>
thresh
?
sdata
:
zero
;
dst[dst_index]
=
sdata
>
thresh
?
sdata
:
(
T
)(
0
)
;
#
elif
defined
THRESH_TOZERO_INV
#
elif
defined
THRESH_TOZERO_INV
dst[dst_index]
=
sdata
>
thresh
?
zero
:
sdata
;
dst[dst_index]
=
sdata
>
thresh
?
(
T
)(
0
)
:
sdata
;
#
endif
#
endif
}
}
}
}
...
...
modules/ocl/src/opencl/imgproc_warpAffine.cl
View file @
a9a39a3a
...
@@ -537,9 +537,9 @@ __kernel void warpAffineLinear_C1_D5(__global float * src, __global float * dst,
...
@@ -537,9 +537,9 @@ __kernel void warpAffineLinear_C1_D5(__global float * src, __global float * dst,
float
tab[4]
;
float
tab[4]
;
float
taby[2],
tabx[2]
;
float
taby[2],
tabx[2]
;
taby[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ay0
;
taby[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ay0
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay0
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay0
;
tabx[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ax0
;
tabx[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ax0
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax0
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax0
;
tab[0]
=
taby[0]
*
tabx[0]
;
tab[0]
=
taby[0]
*
tabx[0]
;
...
@@ -680,9 +680,9 @@ __kernel void warpAffineLinear_C4_D5(__global float4 * src, __global float4 * ds
...
@@ -680,9 +680,9 @@ __kernel void warpAffineLinear_C4_D5(__global float4 * src, __global float4 * ds
float
tab[4]
;
float
tab[4]
;
float
taby[2],
tabx[2]
;
float
taby[2],
tabx[2]
;
taby[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ay0
;
taby[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ay0
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay0
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay0
;
tabx[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ax0
;
tabx[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ax0
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax0
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax0
;
tab[0]
=
taby[0]
*
tabx[0]
;
tab[0]
=
taby[0]
*
tabx[0]
;
...
...
modules/ocl/src/opencl/imgproc_warpPerspective.cl
View file @
a9a39a3a
...
@@ -133,7 +133,7 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _
...
@@ -133,7 +133,7 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -150,9 +150,9 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _
...
@@ -150,9 +150,9 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _
short
itab[4]
;
short
itab[4]
;
float
tab1y[2],
tab1x[2]
;
float
tab1y[2],
tab1x[2]
;
tab1y[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ay
;
tab1y[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ay
;
tab1y[1]
=
1.f/INTER_TAB_SIZE*ay
;
tab1y[1]
=
1.f/INTER_TAB_SIZE*ay
;
tab1x[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ax
;
tab1x[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ax
;
tab1x[1]
=
1.f/INTER_TAB_SIZE*ax
;
tab1x[1]
=
1.f/INTER_TAB_SIZE*ax
;
#
pragma
unroll
4
#
pragma
unroll
4
...
@@ -185,7 +185,7 @@ __kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar *
...
@@ -185,7 +185,7 @@ __kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar *
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -265,7 +265,7 @@ __kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __gl
...
@@ -265,7 +265,7 @@ __kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __gl
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
1./W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
1.f/W
:
0.0f
;
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
...
@@ -289,7 +289,7 @@ __kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src,
...
@@ -289,7 +289,7 @@ __kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src,
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -341,7 +341,7 @@ __kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, _
...
@@ -341,7 +341,7 @@ __kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, _
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -424,7 +424,7 @@ __kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst
...
@@ -424,7 +424,7 @@ __kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
1./W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
1.f/W
:
0.0f
;
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
...
@@ -447,7 +447,7 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float *
...
@@ -447,7 +447,7 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float *
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -465,9 +465,9 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float *
...
@@ -465,9 +465,9 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float *
float
tab[4]
;
float
tab[4]
;
float
taby[2],
tabx[2]
;
float
taby[2],
tabx[2]
;
taby[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ay
;
taby[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ay
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay
;
tabx[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ax
;
tabx[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ax
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax
;
tab[0]
=
taby[0]
*
tabx[0]
;
tab[0]
=
taby[0]
*
tabx[0]
;
...
@@ -497,7 +497,7 @@ __kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float *
...
@@ -497,7 +497,7 @@ __kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float *
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -557,7 +557,7 @@ __kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * d
...
@@ -557,7 +557,7 @@ __kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * d
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
1./W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
1.f/W
:
0.0f
;
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
...
@@ -583,7 +583,7 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4
...
@@ -583,7 +583,7 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
@@ -602,9 +602,9 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4
...
@@ -602,9 +602,9 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4
float
tab[4]
;
float
tab[4]
;
float
taby[2],
tabx[2]
;
float
taby[2],
tabx[2]
;
taby[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ay0
;
taby[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ay0
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay0
;
taby[1]
=
1.f/INTER_TAB_SIZE*ay0
;
tabx[0]
=
1.0
-
1.f/INTER_TAB_SIZE*ax0
;
tabx[0]
=
1.0
f
-
1.f/INTER_TAB_SIZE*ax0
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax0
;
tabx[1]
=
1.f/INTER_TAB_SIZE*ax0
;
tab[0]
=
taby[0]
*
tabx[0]
;
tab[0]
=
taby[0]
*
tabx[0]
;
...
@@ -636,7 +636,7 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4
...
@@ -636,7 +636,7 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
X0
=
M[0]*dx
+
M[1]*dy
+
M[2]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
Y0
=
M[3]*dx
+
M[4]*dy
+
M[5]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
F
W
=
M[6]*dx
+
M[7]*dy
+
M[8]
;
W
=
(
W
!=
0.0
)
?
INTER_TAB_SIZE/W
:
0.0
;
W
=
(
W
!=
0.0
f
)
?
INTER_TAB_SIZE/W
:
0.0f
;
int
X
=
rint
(
X0*W
)
;
int
X
=
rint
(
X0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
int
Y
=
rint
(
Y0*W
)
;
...
...
modules/ocl/src/opencl/kernel_sort_by_key.cl
View file @
a9a39a3a
...
@@ -192,7 +192,6 @@ __kernel
...
@@ -192,7 +192,6 @@ __kernel
{
{
const
int
i
=
get_local_id
(
0
)
; // index in workgroup
const
int
i
=
get_local_id
(
0
)
; // index in workgroup
const
int
numOfGroups
=
get_num_groups
(
0
)
; // index in workgroup
const
int
numOfGroups
=
get_num_groups
(
0
)
; // index in workgroup
const
int
groupID
=
get_group_id
(
0
)
;
const
int
wg
=
get_local_size
(
0
)
; // workgroup size = block size
const
int
wg
=
get_local_size
(
0
)
; // workgroup size = block size
int
pos
=
0
,
same
=
0
;
int
pos
=
0
,
same
=
0
;
const
int
offset
=
get_group_id
(
0
)
*
wg
;
const
int
offset
=
get_group_id
(
0
)
*
wg
;
...
...
modules/ocl/src/opencl/kernel_stablesort_by_key.cl
View file @
a9a39a3a
...
@@ -63,7 +63,7 @@
...
@@ -63,7 +63,7 @@
///////////// parallel merge sort ///////////////
///////////// parallel merge sort ///////////////
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
static
uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
{
{
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
uint firstIndex = left;
uint firstIndex = left;
...
@@ -94,7 +94,7 @@ uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
...
@@ -94,7 +94,7 @@ uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
// by a base pointer and left and right index for a particular candidate value. The comparison operator is
// by a base pointer and left and right index for a particular candidate value. The comparison operator is
// passed as a functor parameter my_comp
// passed as a functor parameter my_comp
// This function returns an index that is the first index whos value would be equal to the searched value
// This function returns an index that is the first index whos value would be equal to the searched value
uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
static
uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
{
{
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
// The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
uint firstIndex = left;
uint firstIndex = left;
...
@@ -130,7 +130,7 @@ uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
...
@@ -130,7 +130,7 @@ uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
// passed as a functor parameter my_comp
// passed as a functor parameter my_comp
// This function returns an index that is the first index whos value would be greater than the searched value
// This function returns an index that is the first index whos value would be greater than the searched value
// If the search value is not found in the sequence, upperbound returns the same result as lowerbound
// If the search value is not found in the sequence, upperbound returns the same result as lowerbound
uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
static
uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
{
{
uint upperBound = lowerBoundBinary( data, left, right, searchVal );
uint upperBound = lowerBoundBinary( data, left, right, searchVal );
...
@@ -167,9 +167,6 @@ kernel void merge(
...
@@ -167,9 +167,6 @@ kernel void merge(
)
)
{
{
size_t globalID = get_global_id( 0 );
size_t globalID = get_global_id( 0 );
size_t groupID = get_group_id( 0 );
size_t localID = get_local_id( 0 );
size_t wgSize = get_local_size( 0 );
// Abort threads that are passed the end of the input vector
// Abort threads that are passed the end of the input vector
if( globalID >= srcVecSize )
if( globalID >= srcVecSize )
...
@@ -230,12 +227,12 @@ kernel void blockInsertionSort(
...
@@ -230,12 +227,12 @@ kernel void blockInsertionSort(
local V_T* val_lds
local V_T* val_lds
)
)
{
{
size_
t gloId = get_global_id( 0 );
in
t gloId = get_global_id( 0 );
size_
t groId = get_group_id( 0 );
in
t groId = get_group_id( 0 );
size_
t locId = get_local_id( 0 );
in
t locId = get_local_id( 0 );
size_
t wgSize = get_local_size( 0 );
in
t wgSize = get_local_size( 0 );
bool in_range = gloId < vecSize;
bool in_range = gloId <
(int)
vecSize;
K_T key;
K_T key;
V_T val;
V_T val;
// Abort threads that are passed the end of the input vector
// Abort threads that are passed the end of the input vector
...
@@ -254,7 +251,7 @@ kernel void blockInsertionSort(
...
@@ -254,7 +251,7 @@ kernel void blockInsertionSort(
{
{
// The last workgroup may have an irregular size, so we calculate a per-block endIndex
// The last workgroup may have an irregular size, so we calculate a per-block endIndex
// endIndex is essentially emulating a mod operator with subtraction and multiply
// endIndex is essentially emulating a mod operator with subtraction and multiply
size_
t endIndex = vecSize - ( groId * wgSize );
in
t endIndex = vecSize - ( groId * wgSize );
endIndex = min( endIndex, wgSize );
endIndex = min( endIndex, wgSize );
// printf( "Debug: endIndex[%i]=%i\n", groId, endIndex );
// printf( "Debug: endIndex[%i]=%i\n", groId, endIndex );
...
...
modules/ocl/src/opencl/knearest.cl
View file @
a9a39a3a
...
@@ -129,58 +129,53 @@ __kernel void knn_find_nearest(__global float* sample, int sample_row, int sampl
...
@@ -129,58 +129,53 @@ __kernel void knn_find_nearest(__global float* sample, int sample_row, int sampl
}
}
/*!
find_nearest_neighbor
done!*/
/*!
find_nearest_neighbor
done!*/
/*!
write_results
start!*/
/*!
write_results
start!*/
switch
(
regression
)
if
(
regression
)
{
{
case
true:
TYPE
s
;
{
TYPE
s
;
#
ifdef
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
s
=
0.0
;
s
=
0.0
;
#
else
#
else
s
=
0.0f
;
s
=
0.0f
;
#
endif
#
endif
for
(
j
=
0
; j < K1; j++)
for
(
j
=
0
; j < K1; j++)
s
+=
nr[j
*
nThreads
+
threadY]
;
s
+=
nr[j
*
nThreads
+
threadY]
;
_results[y
*
_results_step]
=
(
float
)(
s
*
inv_scale
)
;
_results[y
*
_results_step]
=
(
float
)(
s
*
inv_scale
)
;
}
}
break
;
else
case
false:
{
{
int
prev_start
=
0
,
best_count
=
0
,
cur_count
;
int
prev_start
=
0
,
best_count
=
0
,
cur_count
;
float
best_val
;
float
best_val
;
for
(
j
=
K1
-
1
; j > 0; j--)
for
(
j
=
K1
-
1
; j > 0; j--)
{
bool
swap_f1
=
false
;
for
(
j1
=
0
; j1 < j; j1++)
{
{
bool
swap_f1
=
false
;
if
(
nr[j1
*
nThreads
+
threadY]
>
nr[
(
j1
+
1
)
*
nThreads
+
threadY]
)
for
(
j1
=
0
; j1 < j; j1++)
{
{
if
(
nr[j1
*
nThreads
+
threadY]
>
nr[
(
j1
+
1
)
*
nThreads
+
threadY]
)
int
t
;
{
CV_SWAP
(
nr[j1
*
nThreads
+
threadY],
nr[
(
j1
+
1
)
*
nThreads
+
threadY],
t
)
;
int
t
;
swap_f1
=
true
;
CV_SWAP
(
nr[j1
*
nThreads
+
threadY],
nr[
(
j1
+
1
)
*
nThreads
+
threadY],
t
)
;
swap_f1
=
true
;
}
}
}
if
(
!swap_f1
)
break
;
}
}
if
(
!swap_f1
)
break
;
}
best_val
=
0
;
best_val
=
0
;
for
(
j
=
1
; j <= K1; j++)
for
(
j
=
1
; j <= K1; j++)
if
(
j
==
K1
||
nr[j
*
nThreads
+
threadY]
!=
nr[
(
j
-
1
)
*
nThreads
+
threadY]
)
if
(
j
==
K1
||
nr[j
*
nThreads
+
threadY]
!=
nr[
(
j
-
1
)
*
nThreads
+
threadY]
)
{
cur_count
=
j
-
prev_start
;
if
(
best_count
<
cur_count
)
{
{
cur_count
=
j
-
prev_start
;
best_count
=
cur_count
;
if
(
best_count
<
cur_count
)
best_val
=
nr[
(
j
-
1
)
*
nThreads
+
threadY]
;
{
best_count
=
cur_count
;
best_val
=
nr[
(
j
-
1
)
*
nThreads
+
threadY]
;
}
prev_start
=
j
;
}
}
_results[y
*
_results_step]
=
best_val
;
prev_start
=
j
;
}
}
break
;
_results[y
*
_results_step]
=
best_val
;
}
}
///*!
write_results
done!*/
///*!
write_results
done!*/
}
}
modules/ocl/src/opencl/match_template.cl
View file @
a9a39a3a
...
@@ -43,8 +43,6 @@
...
@@ -43,8 +43,6 @@
//
//
//M*/
//M*/
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
ifdef
cl_khr_fp64
...
@@ -70,7 +68,7 @@
...
@@ -70,7 +68,7 @@
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sums_step,
gidx
+
img_sums_offset
+
ox
)
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sums_step,
gidx
+
img_sums_offset
+
ox
)
//
normAcc*
are
accurate
normalization
routines
which
make
GPU
matchTemplate
//
normAcc*
are
accurate
normalization
routines
which
make
GPU
matchTemplate
//
consistent
with
CPU
one
//
consistent
with
CPU
one
float
normAcc
(
float
num,
float
denum
)
inline
float
normAcc
(
float
num,
float
denum
)
{
{
if
(
fabs
(
num
)
<
denum
)
if
(
fabs
(
num
)
<
denum
)
{
{
...
@@ -83,7 +81,7 @@ float normAcc(float num, float denum)
...
@@ -83,7 +81,7 @@ float normAcc(float num, float denum)
return
0
;
return
0
;
}
}
float
normAcc_SQDIFF
(
float
num,
float
denum
)
inline
float
normAcc_SQDIFF
(
float
num,
float
denum
)
{
{
if
(
fabs
(
num
)
<
denum
)
if
(
fabs
(
num
)
<
denum
)
{
{
...
...
modules/ocl/src/opencl/meanShift.cl
View file @
a9a39a3a
...
@@ -46,7 +46,7 @@
...
@@ -46,7 +46,7 @@
//
//
//M*/
//M*/
short2
do_mean_shift
(
int
x0,
int
y0,
__global
uchar4*
out,int
out_step,
s
tatic
s
hort2
do_mean_shift
(
int
x0,
int
y0,
__global
uchar4*
out,int
out_step,
__global
uchar4*
in,
int
in_step,
int
dst_off,
int
src_off,
__global
uchar4*
in,
int
in_step,
int
dst_off,
int
src_off,
int
cols,
int
rows,
int
sp,
int
sr,
int
maxIter,
float
eps
)
int
cols,
int
rows,
int
sp,
int
sr,
int
maxIter,
float
eps
)
{
{
...
@@ -56,7 +56,6 @@ short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
...
@@ -56,7 +56,6 @@ short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
src_off
=
src_off
>>
2
;
src_off
=
src_off
>>
2
;
dst_off
=
dst_off
>>
2
;
dst_off
=
dst_off
>>
2
;
int
idx
=
src_off
+
y0
*
in_step
+
x0
;
int
idx
=
src_off
+
y0
*
in_step
+
x0
;
//
uchar4
c
=
vload4
(
0
,
(
__global
uchar*
)
in+idx
)
;
uchar4
c
=
in[idx]
;
uchar4
c
=
in[idx]
;
int
base
=
dst_off
+
get_global_id
(
1
)
*out_step
+
get_global_id
(
0
)
;
int
base
=
dst_off
+
get_global_id
(
1
)
*out_step
+
get_global_id
(
0
)
;
...
...
modules/ocl/src/opencl/moments.cl
View file @
a9a39a3a
...
@@ -162,7 +162,6 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -162,7 +162,6 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
WT4
x3
=
(
WT4
)(
0.f
)
;
WT4
x3
=
(
WT4
)(
0.f
)
;
__global
TT*
row
=
src_data
+
gidy
*
src_step
+
ly
*
src_step
+
gidx
*
256
;
__global
TT*
row
=
src_data
+
gidy
*
src_step
+
ly
*
src_step
+
gidx
*
256
;
bool
switchFlag
=
false
;
WT4
p
;
WT4
p
;
WT4
x
;
WT4
x
;
...
@@ -173,7 +172,7 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -173,7 +172,7 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
if
(
dy
<
src_rows
)
if
(
dy
<
src_rows
)
{
{
if
((
x_rest
>
0
)
&&
(
gidx
==
(
get_num_groups
(
0
)
-
1
)))
if
((
x_rest
>
0
)
&&
(
gidx
==
(
(
int
)
get_num_groups
(
0
)
-
1
)))
{
{
int
i
;
int
i
;
for
(
i
=
0
; i < x_rest - 4; i += 4)
for
(
i
=
0
; i < x_rest - 4; i += 4)
...
@@ -190,11 +189,8 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -190,11 +189,8 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
}
}
x0.s0
=
x0.s0
+
x0.s1
+
x0.s2
+
x0.s3
;
x0.s0
=
x0.s0
+
x0.s1
+
x0.s2
+
x0.s3
;
x1.s0
=
x1.s0
+
x1.s1
+
x1.s2
+
x1.s3
;
x1.s0
=
x1.s0
+
x1.s1
+
x1.s2
+
x1.s3
;
x2.s0
=
x2.s0
+
x2.s1
+
x2.s2
+
x2.s3
;
x2.s0
=
x2.s0
+
x2.s1
+
x2.s2
+
x2.s3
;
x3.s0
=
x3.s0
+
x3.s1
+
x3.s2
+
x3.s3
;
x3.s0
=
x3.s0
+
x3.s1
+
x3.s2
+
x3.s3
;
WT
x0_
=
0
;
WT
x0_
=
0
;
...
@@ -238,11 +234,8 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -238,11 +234,8 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
}
}
x0.s0
=
x0.s0
+
x0.s1
+
x0.s2
+
x0.s3
;
x0.s0
=
x0.s0
+
x0.s1
+
x0.s2
+
x0.s3
;
x1.s0
=
x1.s0
+
x1.s1
+
x1.s2
+
x1.s3
;
x1.s0
=
x1.s0
+
x1.s1
+
x1.s2
+
x1.s3
;
x2.s0
=
x2.s0
+
x2.s1
+
x2.s2
+
x2.s3
;
x2.s0
=
x2.s0
+
x2.s1
+
x2.s2
+
x2.s3
;
x3.s0
=
x3.s0
+
x3.s1
+
x3.s2
+
x3.s3
;
x3.s0
=
x3.s0
+
x3.s1
+
x3.s2
+
x3.s3
;
}
}
...
@@ -251,7 +244,7 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -251,7 +244,7 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
}
}
__local
WT
mom[10][256]
;
__local
WT
mom[10][256]
;
if
((
y_rest
>
0
)
&&
(
gidy
==
(
get_num_groups
(
1
)
-
1
)))
if
((
y_rest
>
0
)
&&
(
gidy
==
(
(
int
)
get_num_groups
(
1
)
-
1
)))
{
{
if
(
ly
<
y_rest
)
if
(
ly
<
y_rest
)
{
{
...
@@ -268,13 +261,10 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -268,13 +261,10 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
ly
<
10
)
if
(
ly
<
10
)
{
for
(
int
i
=
1
; i < y_rest; i++)
for
(
int
i
=
1
; i < y_rest; i++)
{
mom[ly][0]
=
mom[ly][i]
+
mom[ly][0]
;
mom[ly][0]
=
mom[ly][i]
+
mom[ly][0]
;
}
}
}
else
}else
{
{
mom[9][ly]
=
py
*
sy
;
mom[9][ly]
=
py
*
sy
;
mom[8][ly]
=
x1.s0
*
sy
;
mom[8][ly]
=
x1.s0
*
sy
;
...
@@ -413,11 +403,9 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -413,11 +403,9 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
if
(
binary
)
if
(
binary
)
{
{
WT
s
=
1./255
;
WT
s
=
1.
0f
/255
;
if
(
ly
<
10
)
if
(
ly
<
10
)
{
mom[ly][0]
*=
s
;
mom[ly][0]
*=
s
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
WT
xm
=
(
gidx
*
256
)
*
mom[0][0]
;
WT
xm
=
(
gidx
*
256
)
*
mom[0][0]
;
...
@@ -440,7 +428,5 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
...
@@ -440,7 +428,5 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
ly
<
10
)
if
(
ly
<
10
)
{
dst_m[10
*
gidy
*
dst_step
+
ly
*
dst_step
+
gidx]
=
mom[ly][1]
;
dst_m[10
*
gidy
*
dst_step
+
ly
*
dst_step
+
gidx]
=
mom[ly][1]
;
}
}
}
modules/ocl/src/opencl/objdetect_hog.cl
View file @
a9a39a3a
...
@@ -200,7 +200,7 @@ __kernel void normalize_hists_36_kernel(__global float* block_hists,
...
@@ -200,7 +200,7 @@ __kernel void normalize_hists_36_kernel(__global float* block_hists,
//-------------------------------------------------------------
//-------------------------------------------------------------
//
Normalization
of
histograms
via
L2Hys_norm
//
Normalization
of
histograms
via
L2Hys_norm
//
//
float
reduce_smem
(
volatile
__local
float*
smem,
int
size
)
static
float
reduce_smem
(
volatile
__local
float*
smem,
int
size
)
{
{
unsigned
int
tid
=
get_local_id
(
0
)
;
unsigned
int
tid
=
get_local_id
(
0
)
;
float
sum
=
smem[tid]
;
float
sum
=
smem[tid]
;
...
@@ -564,7 +564,6 @@ __kernel void compute_gradients_8UC4_kernel(
...
@@ -564,7 +564,6 @@ __kernel void compute_gradients_8UC4_kernel(
const
int
x
=
get_global_id
(
0
)
;
const
int
x
=
get_global_id
(
0
)
;
const
int
tid
=
get_local_id
(
0
)
;
const
int
tid
=
get_local_id
(
0
)
;
const
int
gSizeX
=
get_local_size
(
0
)
;
const
int
gSizeX
=
get_local_size
(
0
)
;
const
int
gidX
=
get_group_id
(
0
)
;
const
int
gidY
=
get_group_id
(
1
)
;
const
int
gidY
=
get_group_id
(
1
)
;
__global
const
uchar4*
row
=
img
+
gidY
*
img_step
;
__global
const
uchar4*
row
=
img
+
gidY
*
img_step
;
...
@@ -667,7 +666,6 @@ __kernel void compute_gradients_8UC1_kernel(
...
@@ -667,7 +666,6 @@ __kernel void compute_gradients_8UC1_kernel(
const
int
x
=
get_global_id
(
0
)
;
const
int
x
=
get_global_id
(
0
)
;
const
int
tid
=
get_local_id
(
0
)
;
const
int
tid
=
get_local_id
(
0
)
;
const
int
gSizeX
=
get_local_size
(
0
)
;
const
int
gSizeX
=
get_local_size
(
0
)
;
const
int
gidX
=
get_group_id
(
0
)
;
const
int
gidY
=
get_group_id
(
1
)
;
const
int
gidY
=
get_group_id
(
1
)
;
__global
const
uchar*
row
=
img
+
gidY
*
img_step
;
__global
const
uchar*
row
=
img
+
gidY
*
img_step
;
...
...
modules/ocl/src/opencl/optical_flow_farneback.cl
View file @
a9a39a3a
...
@@ -44,10 +44,10 @@
...
@@ -44,10 +44,10 @@
//M*/
//M*/
#
define
tx
get_local_id
(
0
)
#
define
tx
(
int
)
get_local_id
(
0
)
#
define
ty
get_local_id
(
1
)
#
define
ty
get_local_id
(
1
)
#
define
bx
get_group_id
(
0
)
#
define
bx
get_group_id
(
0
)
#
define
bdx
get_local_size
(
0
)
#
define
bdx
(
int
)
get_local_size
(
0
)
#
define
BORDER_SIZE
5
#
define
BORDER_SIZE
5
#
define
MAX_KSIZE_HALF
100
#
define
MAX_KSIZE_HALF
100
...
...
modules/ocl/src/opencl/pyr_down.cl
View file @
a9a39a3a
...
@@ -43,32 +43,32 @@
...
@@ -43,32 +43,32 @@
//
//
//M*/
//M*/
int
idx_row_low
(
int
y,
int
last_row
)
in
line
in
t
idx_row_low
(
int
y,
int
last_row
)
{
{
return
abs
(
y
)
%
(
last_row
+
1
)
;
return
abs
(
y
)
%
(
last_row
+
1
)
;
}
}
int
idx_row_high
(
int
y,
int
last_row
)
in
line
in
t
idx_row_high
(
int
y,
int
last_row
)
{
{
return
abs
(
last_row
-
(
int
)
abs
(
last_row
-
y
))
%
(
last_row
+
1
)
;
return
abs
(
last_row
-
(
int
)
abs
(
last_row
-
y
))
%
(
last_row
+
1
)
;
}
}
int
idx_row
(
int
y,
int
last_row
)
in
line
in
t
idx_row
(
int
y,
int
last_row
)
{
{
return
idx_row_low
(
idx_row_high
(
y,
last_row
)
,
last_row
)
;
return
idx_row_low
(
idx_row_high
(
y,
last_row
)
,
last_row
)
;
}
}
int
idx_col_low
(
int
x,
int
last_col
)
in
line
in
t
idx_col_low
(
int
x,
int
last_col
)
{
{
return
abs
(
x
)
%
(
last_col
+
1
)
;
return
abs
(
x
)
%
(
last_col
+
1
)
;
}
}
int
idx_col_high
(
int
x,
int
last_col
)
in
line
in
t
idx_col_high
(
int
x,
int
last_col
)
{
{
return
abs
(
last_col
-
(
int
)
abs
(
last_col
-
x
))
%
(
last_col
+
1
)
;
return
abs
(
last_col
-
(
int
)
abs
(
last_col
-
x
))
%
(
last_col
+
1
)
;
}
}
int
idx_col
(
int
x,
int
last_col
)
in
line
in
t
idx_col
(
int
x,
int
last_col
)
{
{
return
idx_col_low
(
idx_col_high
(
x,
last_col
)
,
last_col
)
;
return
idx_col_low
(
idx_col_high
(
x,
last_col
)
,
last_col
)
;
}
}
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
a9a39a3a
...
@@ -53,7 +53,8 @@
...
@@ -53,7 +53,8 @@
#
define
WAVE_SIZE
1
#
define
WAVE_SIZE
1
#
endif
#
endif
#
ifdef
CPU
#
ifdef
CPU
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
float*
smem1,
__local
float*
smem2,
__local
float*
smem3,
int
tid
)
static
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
float*
smem1,
__local
float*
smem2,
__local
float*
smem3,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
smem2[tid]
=
val2
;
...
@@ -72,7 +73,7 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
...
@@ -72,7 +73,7 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
}
}
}
}
void
reduce2
(
float
val1,
float
val2,
volatile
__local
float*
smem1,
volatile
__local
float*
smem2,
int
tid
)
static
void
reduce2
(
float
val1,
float
val2,
volatile
__local
float*
smem1,
volatile
__local
float*
smem2,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
smem2[tid]
=
val2
;
...
@@ -89,7 +90,7 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
...
@@ -89,7 +90,7 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
}
}
}
}
void
reduce1
(
float
val1,
volatile
__local
float*
smem1,
int
tid
)
static
void
reduce1
(
float
val1,
volatile
__local
float*
smem1,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -104,7 +105,7 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
...
@@ -104,7 +105,7 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
}
}
}
}
#
else
#
else
void
reduce3
(
float
val1,
float
val2,
float
val3,
static
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
__local
volatile
float*
smem3,
int
tid
)
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
__local
volatile
float*
smem3,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
...
@@ -151,7 +152,7 @@ void reduce3(float val1, float val2, float val3,
...
@@ -151,7 +152,7 @@ void reduce3(float val1, float val2, float val3,
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
void
reduce2
(
float
val1,
float
val2,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
int
tid
)
static
void
reduce2
(
float
val1,
float
val2,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
smem2[tid]
=
val2
;
...
@@ -190,7 +191,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
...
@@ -190,7 +191,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
void
reduce1
(
float
val1,
__local
volatile
float*
smem1,
int
tid
)
static
void
reduce1
(
float
val1,
__local
volatile
float*
smem1,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -226,7 +227,7 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
...
@@ -226,7 +227,7 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
//
Image
read
mode
//
Image
read
mode
__constant
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR
;
__constant
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR
;
void
SetPatch
(
image2d_t
I,
float
x,
float
y,
static
void
SetPatch
(
image2d_t
I,
float
x,
float
y,
float*
Pch,
float*
Dx,
float*
Dy,
float*
Pch,
float*
Dx,
float*
Dy,
float*
A11,
float*
A12,
float*
A22
)
float*
A11,
float*
A12,
float*
A22
)
{
{
...
@@ -247,7 +248,7 @@ void SetPatch(image2d_t I, float x, float y,
...
@@ -247,7 +248,7 @@ void SetPatch(image2d_t I, float x, float y,
*A22
+=
dIdy
*
dIdy
;
*A22
+=
dIdy
*
dIdy
;
}
}
void
GetPatch
(
image2d_t
J,
float
x,
float
y,
inline
void
GetPatch
(
image2d_t
J,
float
x,
float
y,
float*
Pch,
float*
Dx,
float*
Dy,
float*
Pch,
float*
Dx,
float*
Dy,
float*
b1,
float*
b2
)
float*
b1,
float*
b2
)
{
{
...
@@ -257,13 +258,13 @@ void GetPatch(image2d_t J, float x, float y,
...
@@ -257,13 +258,13 @@ void GetPatch(image2d_t J, float x, float y,
*b2
+=
diff**Dy
;
*b2
+=
diff**Dy
;
}
}
void
GetError
(
image2d_t
J,
const
float
x,
const
float
y,
const
float*
Pch,
float*
errval
)
inline
void
GetError
(
image2d_t
J,
const
float
x,
const
float
y,
const
float*
Pch,
float*
errval
)
{
{
float
diff
=
read_imagef
(
J,
sampler,
(
float2
)(
x,y
))
.
x-*Pch
;
float
diff
=
read_imagef
(
J,
sampler,
(
float2
)(
x,y
))
.
x-*Pch
;
*errval
+=
fabs
(
diff
)
;
*errval
+=
fabs
(
diff
)
;
}
}
void
SetPatch4
(
image2d_t
I,
const
float
x,
const
float
y,
static
void
SetPatch4
(
image2d_t
I,
const
float
x,
const
float
y,
float4*
Pch,
float4*
Dx,
float4*
Dy,
float4*
Pch,
float4*
Dx,
float4*
Dy,
float*
A11,
float*
A12,
float*
A22
)
float*
A11,
float*
A12,
float*
A22
)
{
{
...
@@ -286,7 +287,7 @@ void SetPatch4(image2d_t I, const float x, const float y,
...
@@ -286,7 +287,7 @@ void SetPatch4(image2d_t I, const float x, const float y,
*A22
+=
sqIdx.x
+
sqIdx.y
+
sqIdx.z
;
*A22
+=
sqIdx.x
+
sqIdx.y
+
sqIdx.z
;
}
}
void
GetPatch4
(
image2d_t
J,
const
float
x,
const
float
y,
static
void
GetPatch4
(
image2d_t
J,
const
float
x,
const
float
y,
const
float4*
Pch,
const
float4*
Dx,
const
float4*
Dy,
const
float4*
Pch,
const
float4*
Dx,
const
float4*
Dy,
float*
b1,
float*
b2
)
float*
b1,
float*
b2
)
{
{
...
@@ -298,7 +299,7 @@ void GetPatch4(image2d_t J, const float x, const float y,
...
@@ -298,7 +299,7 @@ void GetPatch4(image2d_t J, const float x, const float y,
*b2
+=
xdiff.x
+
xdiff.y
+
xdiff.z
;
*b2
+=
xdiff.x
+
xdiff.y
+
xdiff.z
;
}
}
void
GetError4
(
image2d_t
J,
const
float
x,
const
float
y,
const
float4*
Pch,
float*
errval
)
static
void
GetError4
(
image2d_t
J,
const
float
x,
const
float
y,
const
float4*
Pch,
float*
errval
)
{
{
float4
diff
=
read_imagef
(
J,
sampler,
(
float2
)(
x,y
))
-*Pch
;
float4
diff
=
read_imagef
(
J,
sampler,
(
float2
)(
x,y
))
-*Pch
;
*errval
+=
fabs
(
diff.x
)
+
fabs
(
diff.y
)
+
fabs
(
diff.z
)
;
*errval
+=
fabs
(
diff.x
)
+
fabs
(
diff.y
)
+
fabs
(
diff.z
)
;
...
@@ -318,7 +319,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
...
@@ -318,7 +319,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
unsigned
int
gid=get_group_id
(
0
)
;
unsigned
int
gid=get_group_id
(
0
)
;
unsigned
int
xsize=get_local_size
(
0
)
;
unsigned
int
xsize=get_local_size
(
0
)
;
unsigned
int
ysize=get_local_size
(
1
)
;
unsigned
int
ysize=get_local_size
(
1
)
;
int
xBase,
yBase,
i,
j,
k
;
int
xBase,
yBase,
k
;
float2
c_halfWin
=
(
float2
)((
c_winSize_x
-
1
)
>>1,
(
c_winSize_y
-
1
)
>>1
)
;
float2
c_halfWin
=
(
float2
)((
c_winSize_x
-
1
)
>>1,
(
c_winSize_y
-
1
)
>>1
)
;
...
@@ -597,7 +598,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
...
@@ -597,7 +598,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
unsigned int gid=get_group_id(0);
unsigned int gid=get_group_id(0);
unsigned int xsize=get_local_size(0);
unsigned int xsize=get_local_size(0);
unsigned int ysize=get_local_size(1);
unsigned int ysize=get_local_size(1);
int xBase, yBase,
i, j,
k;
int xBase, yBase, k;
float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
...
...
modules/ocl/src/opencl/split_mat.cl
View file @
a9a39a3a
...
@@ -183,7 +183,7 @@ __kernel void split_vector(
...
@@ -183,7 +183,7 @@ __kernel void split_vector(
int
dst
##
xOffsetLimitBytes
=
dst
##
Offset.x
+
size.x
*
sizeof
(
TYPE
)
; \
int
dst
##
xOffsetLimitBytes
=
dst
##
Offset.x
+
size.x
*
sizeof
(
TYPE
)
; \
int
dst
##
xOffsetBytes
=
dst
##
Offset.x
+
x
*
sizeof
(
TYPE
)
; \
int
dst
##
xOffsetBytes
=
dst
##
Offset.x
+
x
*
sizeof
(
TYPE
)
; \
int
dst
##
yOffsetBytes
=
(
dst
##
Offset.y
+
y
)
*
dst
##
StepBytes
; \
int
dst
##
yOffsetBytes
=
(
dst
##
Offset.y
+
y
)
*
dst
##
StepBytes
; \
if
(
!BYPASS_VSTORE
&&
dst
##
xOffsetBytes
+
sizeof
(
DST_VEC_TYPE
)
<=
dst
##
xOffsetLimitBytes
)
\
if
(
!BYPASS_VSTORE
&&
dst
##
xOffsetBytes
+
(
int
)
sizeof
(
DST_VEC_TYPE
)
<=
dst
##
xOffsetLimitBytes
)
\
{
\
{
\
VSTORE_
##
dst
(((
__global
char*
)
dst
+
dst
##
yOffsetBytes
+
dst
##
xOffsetBytes
)
,
vecValue
)
; \
VSTORE_
##
dst
(((
__global
char*
)
dst
+
dst
##
yOffsetBytes
+
dst
##
xOffsetBytes
)
,
vecValue
)
; \
}
\
}
\
...
@@ -192,7 +192,7 @@ __kernel void split_vector(
...
@@ -192,7 +192,7 @@ __kernel void split_vector(
VEC_TO_ARRAY
(
vecValue,
vecValue##Array
)
; \
VEC_TO_ARRAY
(
vecValue,
vecValue##Array
)
; \
for
(
int
i
=
0
; i < VEC_SIZE; i++, dst ## xOffsetBytes += sizeof(TYPE)) \
for
(
int
i
=
0
; i < VEC_SIZE; i++, dst ## xOffsetBytes += sizeof(TYPE)) \
{
\
{
\
if
(
dst
##
xOffsetBytes
+
sizeof
(
TYPE
)
<=
dst
##
xOffsetLimitBytes
)
\
if
(
dst
##
xOffsetBytes
+
(
int
)
sizeof
(
TYPE
)
<=
dst
##
xOffsetLimitBytes
)
\
*
(
__global
TYPE*
)((
__global
char*
)
dst
+
dst
##
yOffsetBytes
+
dst
##
xOffsetBytes
)
=
vecValue##Array[i]
; \
*
(
__global
TYPE*
)((
__global
char*
)
dst
+
dst
##
yOffsetBytes
+
dst
##
xOffsetBytes
)
=
vecValue##Array[i]
; \
else
\
else
\
break
; \
break
; \
...
...
modules/ocl/src/opencl/stereobm.cl
View file @
a9a39a3a
...
@@ -56,7 +56,7 @@
...
@@ -56,7 +56,7 @@
#
define
radius
64
#
define
radius
64
#
endif
#
endif
unsigned
int
CalcSSD
(
__local
unsigned
int
*col_ssd
)
static
unsigned
int
CalcSSD
(
__local
unsigned
int
*col_ssd
)
{
{
unsigned
int
cache
=
col_ssd[0]
;
unsigned
int
cache
=
col_ssd[0]
;
...
@@ -67,7 +67,7 @@ unsigned int CalcSSD(__local unsigned int *col_ssd)
...
@@ -67,7 +67,7 @@ unsigned int CalcSSD(__local unsigned int *col_ssd)
return
cache
;
return
cache
;
}
}
uint2
MinSSD
(
__local
unsigned
int
*col_ssd
)
static
uint2
MinSSD
(
__local
unsigned
int
*col_ssd
)
{
{
unsigned
int
ssd[N_DISPARITIES]
;
unsigned
int
ssd[N_DISPARITIES]
;
const
int
win_size
=
(
radius
<<
1
)
;
const
int
win_size
=
(
radius
<<
1
)
;
...
@@ -95,7 +95,7 @@ uint2 MinSSD(__local unsigned int *col_ssd)
...
@@ -95,7 +95,7 @@ uint2 MinSSD(__local unsigned int *col_ssd)
return
(
uint2
)(
mssd,
bestIdx
)
;
return
(
uint2
)(
mssd,
bestIdx
)
;
}
}
void
StepDown
(
int
idx1,
int
idx2,
__global
unsigned
char*
imageL,
static
void
StepDown
(
int
idx1,
int
idx2,
__global
unsigned
char*
imageL,
__global
unsigned
char*
imageR,
int
d,
__local
unsigned
int
*col_ssd
)
__global
unsigned
char*
imageR,
int
d,
__local
unsigned
int
*col_ssd
)
{
{
uint8
imgR1
=
convert_uint8
(
vload8
(
0
,
imageR
+
(
idx1
-
d
-
7
)))
;
uint8
imgR1
=
convert_uint8
(
vload8
(
0
,
imageR
+
(
idx1
-
d
-
7
)))
;
...
@@ -114,7 +114,7 @@ void StepDown(int idx1, int idx2, __global unsigned char* imageL,
...
@@ -114,7 +114,7 @@ void StepDown(int idx1, int idx2, __global unsigned char* imageL,
col_ssd[7
*
(
BLOCK_W
+
win_size
)
]
+=
res.s0
;
col_ssd[7
*
(
BLOCK_W
+
win_size
)
]
+=
res.s0
;
}
}
void
InitColSSD
(
int
x_tex,
int
y_tex,
int
im_pitch,
__global
unsigned
char*
imageL,
static
void
InitColSSD
(
int
x_tex,
int
y_tex,
int
im_pitch,
__global
unsigned
char*
imageL,
__global
unsigned
char*
imageR,
int
d,
__global
unsigned
char*
imageR,
int
d,
__local
unsigned
int
*col_ssd
)
__local
unsigned
int
*col_ssd
)
{
{
...
@@ -153,7 +153,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
...
@@ -153,7 +153,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
int
X
=
get_group_id
(
0
)
*
BLOCK_W
+
get_local_id
(
0
)
+
maxdisp
+
radius
;
int
X
=
get_group_id
(
0
)
*
BLOCK_W
+
get_local_id
(
0
)
+
maxdisp
+
radius
;
#
define
Y
(
get_group_id
(
1
)
*
ROWSperTHREAD
+
radius
)
#
define
Y
(
int
)(
get_group_id
(
1
)
*
ROWSperTHREAD
+
radius
)
__global
unsigned
int*
minSSDImage
=
cminSSDImage
+
X
+
Y
*
cminSSD_step
;
__global
unsigned
int*
minSSDImage
=
cminSSDImage
+
X
+
Y
*
cminSSD_step
;
__global
unsigned
char*
disparImage
=
disp
+
X
+
Y
*
disp_step
;
__global
unsigned
char*
disparImage
=
disp
+
X
+
Y
*
disp_step
;
...
@@ -241,7 +241,7 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
...
@@ -241,7 +241,7 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
///////////////////////////////////
Textureness
filtering
////////////////////////////////////////
///////////////////////////////////
Textureness
filtering
////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
float
sobel
(
__global
unsigned
char
*input,
int
x,
int
y,
int
rows,
int
cols
)
static
float
sobel
(
__global
unsigned
char
*input,
int
x,
int
y,
int
rows,
int
cols
)
{
{
float
conv
=
0
;
float
conv
=
0
;
int
y1
=
y==0?
0
:
y-1
;
int
y1
=
y==0?
0
:
y-1
;
...
@@ -256,7 +256,7 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
...
@@ -256,7 +256,7 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
return
fabs
(
conv
)
;
return
fabs
(
conv
)
;
}
}
float
CalcSums
(
__local
float
*cols,
__local
float
*cols_cache,
int
winsz
)
static
float
CalcSums
(
__local
float
*cols,
__local
float
*cols_cache,
int
winsz
)
{
{
unsigned
int
cache
=
cols[0]
;
unsigned
int
cache
=
cols[0]
;
...
...
modules/ocl/src/opencl/stereobp.cl
View file @
a9a39a3a
...
@@ -65,7 +65,7 @@
...
@@ -65,7 +65,7 @@
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
/////////////////common///////////////////////////////////////
/////////////////common///////////////////////////////////////
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
T
saturate_cast
(
float
v
)
{
inline
T
saturate_cast
(
float
v
)
{
#
ifdef
T_SHORT
#
ifdef
T_SHORT
return
convert_short_sat_rte
(
v
)
;
return
convert_short_sat_rte
(
v
)
;
#
else
#
else
...
@@ -73,7 +73,7 @@ T saturate_cast(float v){
...
@@ -73,7 +73,7 @@ T saturate_cast(float v){
#
endif
#
endif
}
}
T4
saturate_cast4
(
float4
v
)
{
inline
T4
saturate_cast4
(
float4
v
)
{
#
ifdef
T_SHORT
#
ifdef
T_SHORT
return
convert_short4_sat_rte
(
v
)
;
return
convert_short4_sat_rte
(
v
)
;
#
else
#
else
...
@@ -99,7 +99,7 @@ inline float pix_diff_1(const uchar4 l, __global const uchar *rs)
...
@@ -99,7 +99,7 @@ inline float pix_diff_1(const uchar4 l, __global const uchar *rs)
return
abs
((
int
)(
l.x
)
-
*rs
)
;
return
abs
((
int
)(
l.x
)
-
*rs
)
;
}
}
float
pix_diff_4
(
const
uchar4
l,
__global
const
uchar
*rs
)
static
float
pix_diff_4
(
const
uchar4
l,
__global
const
uchar
*rs
)
{
{
uchar4
r
;
uchar4
r
;
r
=
*
((
__global
uchar4
*
)
rs
)
;
r
=
*
((
__global
uchar4
*
)
rs
)
;
...
@@ -235,7 +235,7 @@ __kernel void level_up_message(__global T *src, int src_rows, int src_step,
...
@@ -235,7 +235,7 @@ __kernel void level_up_message(__global T *src, int src_rows, int src_step,
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
////////////////////
calc
all
iterations
/////////////////////
////////////////////
calc
all
iterations
/////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
void
message
(
__global
T
*us_,
__global
T
*ds_,
__global
T
*ls_,
__global
T
*rs_,
static
void
message
(
__global
T
*us_,
__global
T
*ds_,
__global
T
*ls_,
__global
T
*rs_,
const
__global
T
*dt,
const
__global
T
*dt,
int
u_step,
int
msg_disp_step,
int
data_disp_step,
int
u_step,
int
msg_disp_step,
int
data_disp_step,
float4
cmax_disc_term,
float4
cdisc_single_jump
)
float4
cmax_disc_term,
float4
cdisc_single_jump
)
...
...
modules/ocl/src/opencl/stereocsbp.cl
View file @
a9a39a3a
...
@@ -248,7 +248,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
...
@@ -248,7 +248,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
inline
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
float
cdata_weight,
float
cmax_data_term
)
{
{
float
tb
=
0.114f
*
abs
((
int
)
left[0]
-
right[0]
)
;
float
tb
=
0.114f
*
abs
((
int
)
left[0]
-
right[0]
)
;
...
@@ -257,17 +257,21 @@ float compute_3(__global uchar* left, __global uchar* right,
...
@@ -257,17 +257,21 @@ float compute_3(__global uchar* left, __global uchar* right,
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
}
}
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
inline
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
float
cdata_weight,
float
cmax_data_term
)
{
{
return
fmin
(
cdata_weight
*
abs
((
int
)
*left
-
(
int
)
*right
)
,
cdata_weight
*
cmax_data_term
)
;
return
fmin
(
cdata_weight
*
abs
((
int
)
*left
-
(
int
)
*right
)
,
cdata_weight
*
cmax_data_term
)
;
}
}
short
round_short
(
float
v
)
{
inline
short
round_short
(
float
v
)
{
return
convert_short_sat_rte
(
v
)
;
return
convert_short_sat_rte
(
v
)
;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////init_data_cost///////////////////////////////////////////////
///////////////////////////////////init_data_cost///////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
init_data_cost_0
(
__global
short
*ctemp,
__global
uchar
*cleft,
__global
uchar
*cright,
__kernel
void
init_data_cost_0
(
__global
short
*ctemp,
__global
uchar
*cleft,
__global
uchar
*cright,
int
h,
int
w,
int
level,
int
channels,
int
h,
int
w,
int
level,
int
channels,
int
cmsg_step1,
float
cdata_weight,
float
cmax_data_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdata_weight,
float
cmax_data_term,
int
cdisp_step1,
...
@@ -993,7 +997,8 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
...
@@ -993,7 +997,8 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
////////////////////////
init
message
/////////////////////////
////////////////////////
init
message
/////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
void
get_first_k_element_increase_0
(
__global
short*
u_new,
__global
short
*d_new,
__global
short
*l_new,
static
void
get_first_k_element_increase_0
(
__global
short*
u_new,
__global
short
*d_new,
__global
short
*l_new,
__global
short
*r_new,
__global
const
short
*u_cur,
__global
const
short
*d_cur,
__global
short
*r_new,
__global
const
short
*u_cur,
__global
const
short
*d_cur,
__global
const
short
*l_cur,
__global
const
short
*r_cur,
__global
const
short
*l_cur,
__global
const
short
*r_cur,
__global
short
*data_cost_selected,
__global
short
*disparity_selected_new,
__global
short
*data_cost_selected,
__global
short
*disparity_selected_new,
...
@@ -1027,7 +1032,8 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
...
@@ -1027,7 +1032,8 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
data_cost_new[id
*
cdisp_step1]
=
SHRT_MAX
;
data_cost_new[id
*
cdisp_step1]
=
SHRT_MAX
;
}
}
}
}
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
static
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
...
@@ -1190,7 +1196,8 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
...
@@ -1190,7 +1196,8 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
////////////////////
calc
all
iterations
/////////////////////
////////////////////
calc
all
iterations
/////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
void
message_per_pixel_0
(
__global
const
short
*data,
__global
short
*msg_dst,
__global
const
short
*msg1,
static
void
message_per_pixel_0
(
__global
const
short
*data,
__global
short
*msg_dst,
__global
const
short
*msg1,
__global
const
short
*msg2,
__global
const
short
*msg3,
__global
const
short
*msg2,
__global
const
short
*msg3,
__global
const
short
*dst_disp,
__global
const
short
*src_disp,
__global
const
short
*dst_disp,
__global
const
short
*src_disp,
int
nr_plane,
__global
short
*temp,
int
nr_plane,
__global
short
*temp,
...
@@ -1226,7 +1233,8 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
...
@@ -1226,7 +1233,8 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
for
(
int
d
=
0
; d < nr_plane; d++)
for
(
int
d
=
0
; d < nr_plane; d++)
msg_dst[d
*
cdisp_step1]
=
convert_short_sat_rte
(
temp[d
*
cdisp_step1]
-
sum
)
;
msg_dst[d
*
cdisp_step1]
=
convert_short_sat_rte
(
temp[d
*
cdisp_step1]
-
sum
)
;
}
}
void
message_per_pixel_1
(
__global
const
float
*data,
__global
float
*msg_dst,
__global
const
float
*msg1,
static
void
message_per_pixel_1
(
__global
const
float
*data,
__global
float
*msg_dst,
__global
const
float
*msg1,
__global
const
float
*msg2,
__global
const
float
*msg3,
__global
const
float
*msg2,
__global
const
float
*msg3,
__global
const
float
*dst_disp,
__global
const
float
*src_disp,
__global
const
float
*dst_disp,
__global
const
float
*src_disp,
int
nr_plane,
__global
float
*temp,
int
nr_plane,
__global
float
*temp,
...
@@ -1262,6 +1270,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
...
@@ -1262,6 +1270,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
for
(
int
d
=
0
; d < nr_plane; d++)
for
(
int
d
=
0
; d < nr_plane; d++)
msg_dst[d
*
cdisp_step1]
=
temp[d
*
cdisp_step1]
-
sum
;
msg_dst[d
*
cdisp_step1]
=
temp[d
*
cdisp_step1]
-
sum
;
}
}
__kernel
void
compute_message_0
(
__global
short
*u_,
__global
short
*d_,
__global
short
*l_,
__global
short
*r_,
__kernel
void
compute_message_0
(
__global
short
*u_,
__global
short
*d_,
__global
short
*l_,
__global
short
*r_,
__global
const
short
*data_cost_selected,
__global
const
short
*selected_disp_pyr_cur,
__global
const
short
*data_cost_selected,
__global
const
short
*selected_disp_pyr_cur,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
...
@@ -1293,6 +1302,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
...
@@ -1293,6 +1302,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
}
}
}
}
__kernel
void
compute_message_1
(
__global
float
*u_,
__global
float
*d_,
__global
float
*l_,
__global
float
*r_,
__kernel
void
compute_message_1
(
__global
float
*u_,
__global
float
*d_,
__global
float
*l_,
__global
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*selected_disp_pyr_cur,
__global
const
float
*data_cost_selected,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
...
@@ -1327,6 +1337,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
...
@@ -1327,6 +1337,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////
output
////////////////////////////
///////////////////////////
output
////////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
__kernel
void
compute_disp_0
(
__global
const
short
*u_,
__global
const
short
*d_,
__global
const
short
*l_,
__kernel
void
compute_disp_0
(
__global
const
short
*u_,
__global
const
short
*d_,
__global
const
short
*l_,
__global
const
short
*r_,
__global
const
short
*
data_cost_selected,
__global
const
short
*r_,
__global
const
short
*
data_cost_selected,
__global
const
short
*disp_selected_pyr,
__global
const
short
*disp_selected_pyr,
...
@@ -1364,6 +1375,7 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
...
@@ -1364,6 +1375,7 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
disp[res_step
*
y
+
x]
=
best
;
disp[res_step
*
y
+
x]
=
best
;
}
}
}
}
__kernel
void
compute_disp_1
(
__global
const
float
*u_,
__global
const
float
*d_,
__global
const
float
*l_,
__kernel
void
compute_disp_1
(
__global
const
float
*u_,
__global
const
float
*d_,
__global
const
float
*l_,
__global
const
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*disp_selected_pyr,
__global
const
float
*disp_selected_pyr,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment