Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
21ad8e92
Commit
21ad8e92
authored
Sep 29, 2014
by
vbystricky
Committed by
VBystricky
Oct 23, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize OpenCL version of StereoBM function
Fix problems on NVidia devices.
parent
e40567ea
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
168 additions
and
133 deletions
+168
-133
stereobm.cl
modules/calib3d/src/opencl/stereobm.cl
+152
-116
stereobm.cpp
modules/calib3d/src/stereobm.cpp
+16
-17
No files found.
modules/calib3d/src/opencl/stereobm.cl
View file @
21ad8e92
...
...
@@ -44,217 +44,251 @@
//////////////////////////////////////////
stereoBM
//////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
csize
#
define
MAX_VAL
32767
void
calcDisp
(
__local
short
*
cost,
__global
short
*
disp,
int
uniquenessRatio,
int
mindisp,
int
ndisp,
int
w,
__local
int
*
bestDisp,
__local
int
*
bestCost,
int
d,
int
x,
int
y,
int
cols,
int
rows,
int
wsz2
)
#
ifndef
WSZ
#
define
WSZ
2
#
endif
#
define
WSZ2
(
WSZ
/
2
)
#
ifdef
DEFINE_KERNEL_STEREOBM
#
define
DISPARITY_SHIFT
4
#
define
FILTERED
((
MIN_DISP
-
1
)
<<
DISPARITY_SHIFT
)
void
calcDisp
(
__local
short
*
cost,
__global
short
*
disp,
int
uniquenessRatio,
__local
int
*
bestDisp,
__local
int
*
bestCost,
int
d,
int
x,
int
y,
int
cols,
int
rows
)
{
short
FILTERED
=
(
mindisp
-
1
)
<<4
;
int
best_disp
=
*bestDisp,
best_cost
=
*bestCost,
best_disp_back
=
ndisp
-
best_disp
-
1
;
int
best_disp
=
*bestDisp,
best_cost
=
*bestCost
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
short
c
=
cost[0]
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio
/
100
)
;
bool
notUniq
=
(
(
c
<=
thresh
)
&&
(
d
<
(
best_disp
-
1
)
||
d
>
(
best_disp
+
1
)
)
)
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio/100
)
;
bool
notUniq
=
(
(
c
<=
thresh
)
&&
(
d
<
(
best_disp_back
-
1
)
||
d
>
(
best_disp_back
+
1
)
)
)
;
if
(
notUniq
)
if
(
notUniq
)
*bestCost
=
FILTERED
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
*bestCost
!=
FILTERED
&&
x
<
cols
-wsz2-mindisp
&&
y
<
rows-wsz2
&&
d
==
best_disp_back
)
if
(
*bestCost
!=
FILTERED
&&
x
<
cols
-
WSZ2
-
MIN_DISP
&&
y
<
rows
-
WSZ2
&&
d
==
best_disp
)
{
int
y3
=
(
best_disp_back
>
0
)
?
cost[-w]
:
cost[w],
y2
=
c,
y1
=
(
best_disp_back
<
ndisp-1
)
?
cost[w]
:
cost[-w]
;
int
d_aprox
=
y3+y1-2*y2
+
abs
(
y3-y1
)
;
disp[0]
=
(
short
)(((
best_disp_back
+
mindisp
)
*256
+
(
d_aprox
!=
0
?
(
y3-y1
)
*256/d_aprox
:
0
)
+
15
)
>>
4
)
;
int
d_aprox
=
0
;
int
yp
=0,
yn
=
0
;
if
((
0
<
best_disp
)
&&
(
best_disp
<
NUM_DISP
-
1
))
{
yp
=
cost[-2
*
BLOCK_SIZE_Y]
;
yn
=
cost[2
*
BLOCK_SIZE_Y]
;
d_aprox
=
yp
+
yn
-
2
*
c
+
abs
(
yp
-
yn
)
;
}
disp[0]
=
(
short
)(((
best_disp
+
MIN_DISP
)
*256
+
(
d_aprox
!=
0
?
(
yp
-
yn
)
*
256
/
d_aprox
:
0
)
+
15
)
>>
4
)
;
}
}
int
calcLocalIdx
(
int
x,
int
y,
int
d,
int
w
)
{
return
d*2*w
+
(
w
-
1
-
y
+
x
)
;
}
void
calcNewCoordinates
(
int
*
x,
int
*
y,
int
nthread
)
{
int
oldX
=
*x
-
(
1-nthread
)
,
oldY
=
*y
;
*x
=
(
oldX
==
oldY
)
?
(
0*nthread
+
(
oldX
+
2
)
*
(
1-nthread
)
)
:
(
oldX+1
)
*
(
1-nthread
)
+
(
oldX+1
)
*nthread
;
*y
=
(
oldX
==
oldY
)
?
(
0*
(
1-nthread
)
+
(
oldY
+
1
)
*nthread
)
:
oldY
+
1*
(
1-nthread
)
;
}
short
calcCostBorder
(
__global
const
uchar
*
leftptr,
__global
const
uchar
*
rightptr,
int
x,
int
y,
int
nthread,
int
wsz2,
short
*
costbuf,
int
*
h,
int
cols,
int
d,
short
cost,
int
winsize
)
short
*
costbuf,
int
*h,
int
cols,
int
d,
short
cost
)
{
int
head
=
(
*h
)
%wsz
;
int
head
=
(
*h
)
%
WSZ
;
__global
const
uchar
*
left,
*
right
;
int
idx
=
mad24
(
y
+wsz2*
(
2*nthread-1
)
,
cols,
x+wsz2*
(
1-2*
nthread
))
;
int
idx
=
mad24
(
y
+
WSZ2
*
(
2
*
nthread
-
1
)
,
cols,
x
+
WSZ2
*
(
1
-
2
*
nthread
))
;
left
=
leftptr
+
idx
;
right
=
rightptr
+
(
idx
-
d
)
;
int
shift
=
1*nthread
+
cols*
(
1-nthread
)
;
short
costdiff
=
0
;
for
(
int
i
=
0
; i < winsize; i++)
if
(
0
==
nthread
)
{
#
pragma
unroll
for
(
int
i
=
0
; i < WSZ; i++)
{
costdiff
+=
abs
(
left[0]
-
right[0]
)
;
left
+=
cols
;
right
+=
cols
;
}
}
else
//
(
1
==
nthread
)
{
costdiff
+=
abs
(
left[0]
-
right[0]
)
;
left
+=
shift
;
right
+=
shift
;
#
pragma
unroll
for
(
int
i
=
0
; i < WSZ; i++)
{
costdiff
+=
abs
(
left[i]
-
right[i]
)
;
}
}
cost
+=
costdiff
-
costbuf[head]
;
costbuf[head]
=
costdiff
;
(
*h
)
=
(
*h
)
%wsz
+
1
;
*h
=
head
+
1
;
return
cost
;
}
short
calcCostInside
(
__global
const
uchar
*
leftptr,
__global
const
uchar
*
rightptr,
int
x,
int
y,
int
wsz2,
int
cols,
int
d,
short
cost_up_left,
short
cost_up,
short
cost_left,
int
winsize
)
int
cols,
int
d,
short
cost_up_left,
short
cost_up,
short
cost_left
)
{
__global
const
uchar
*
left,
*
right
;
int
idx
=
mad24
(
y
-wsz2-1,
cols,
x-wsz2-
1
)
;
int
idx
=
mad24
(
y
-
WSZ2
-
1
,
cols,
x
-
WSZ2
-
1
)
;
left
=
leftptr
+
idx
;
right
=
rightptr
+
(
idx
-
d
)
;
int
idx2
=
winsize
*cols
;
int
idx2
=
WSZ
*cols
;
uchar
corrner1
=
abs
(
left[0]
-
right[0]
)
,
corrner2
=
abs
(
left[
winsize]
-
right[winsize
]
)
,
corrner2
=
abs
(
left[
WSZ]
-
right[WSZ
]
)
,
corrner3
=
abs
(
left[idx2]
-
right[idx2]
)
,
corrner4
=
abs
(
left[idx2
+
winsize]
-
right[idx2
+
winsize
]
)
;
corrner4
=
abs
(
left[idx2
+
WSZ]
-
right[idx2
+
WSZ
]
)
;
return
cost_up
+
cost_left
-
cost_up_left
+
corrner1
-
corrner2
-
corrner3
+
corrner4
;
}
__kernel
void
stereoBM
(
__global
const
uchar
*
leftptr,
__global
const
uchar
*
rightptr,
__global
uchar
*
dispptr,
int
disp_step,
int
disp_offset,
int
rows,
int
cols,
int
mindisp,
int
ndisp,
int
preFilterCap,
int
textureTreshold,
int
uniquenessRatio,
int
sizeX,
int
sizeY,
int
winsize
)
__kernel
void
stereoBM
(
__global
const
uchar
*
leftptr,
__global
const
uchar
*
rightptr,
__global
uchar
*
dispptr,
int
disp_step,
int
disp_offset,
int
rows,
int
cols,
//
rows,
cols
of
left
and
right
images,
not
disp
int
textureTreshold,
int
uniquenessRatio
)
{
int
gx
=
get_global_id
(
0
)
*sizeX
;
int
g
y
=
get_global_id
(
1
)
*sizeY
;
int
lz
=
get_local_id
(
2
)
;
int
lz
=
get_local_id
(
0
)
;
int
g
x
=
get_global_id
(
1
)
*
BLOCK_SIZE_X
;
int
gy
=
get_global_id
(
2
)
*
BLOCK_SIZE_Y
;
int
nthread
=
lz/ndisp
;
int
d
=
lz%ndisp
;
int
wsz2
=
wsz/2
;
int
nthread
=
lz
/
NUM_DISP
;
int
disp_idx
=
lz
%
NUM_DISP
;
__global
short
*
disp
;
__global
const
uchar
*
left,
*
right
;
__local
short
costFunc[csize]
;
__local
short
costFunc[2
*
BLOCK_SIZE_Y
*
NUM_DISP]
;
__local
short
*
cost
;
__local
int
best_disp[2]
;
__local
int
best_cost[2]
;
best_cost[nthread]
=
MAX_VAL
;
best_disp[nthread]
=
MAX_VAL
;
best_disp[nthread]
=
-1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
short
costbuf[
wsz
]
;
short
costbuf[
WSZ
]
;
int
head
=
0
;
int
shiftX
=
wsz2
+
ndisp
+
mindisp
-
1
;
int
shiftY
=
wsz
2
;
int
shiftX
=
WSZ2
+
NUM_DISP
+
MIN_DISP
-
1
;
int
shiftY
=
WSZ
2
;
int
x
=
gx
+
shiftX,
y
=
gy
+
shiftY,
lx
=
0
,
ly
=
0
;
int
costIdx
=
calcLocalIdx
(
lx,
ly,
d,
sizeY
)
;
int
costIdx
=
disp_idx
*
2
*
BLOCK_SIZE_Y
+
(
BLOCK_SIZE_Y
-
1
)
;
cost
=
costFunc
+
costIdx
;
int
tempcost
=
0
;
if
(
x
<
cols-wsz2-mindisp
&&
y
<
rows-wsz
2
)
if
(
x
<
cols
-
WSZ2
-
MIN_DISP
&&
y
<
rows
-
WSZ
2
)
{
int
shift
=
1*nthread
+
cols*
(
1-nthread
)
;
for
(
int
i
=
0
; i < winsize; i++)
if
(
0
==
nthread
)
{
int
idx
=
mad24
(
y-wsz2+i*nthread,
cols,
x-wsz2+i*
(
1-nthread
))
;
left
=
leftptr
+
idx
;
right
=
rightptr
+
(
idx
-
d
)
;
short
costdiff
=
0
;
for
(
int
j
=
0
; j < winsize; j++)
#
pragma
unroll
for
(
int
i
=
0
; i < WSZ; i++)
{
costdiff
+=
abs
(
left[0]
-
right[0]
)
;
left
+=
shift
;
right
+=
shift
;
int
idx
=
mad24
(
y
-
WSZ2,
cols,
x
-
WSZ2
+
i
)
;
left
=
leftptr
+
idx
;
right
=
rightptr
+
(
idx
-
disp_idx
)
;
short
costdiff
=
0
;
for
(
int
j
=
0
; j < WSZ; j++)
{
costdiff
+=
abs
(
left[0]
-
right[0]
)
;
left
+=
cols
;
right
+=
cols
;
}
costbuf[i]
=
costdiff
;
}
if
(
nthread==1
)
}
else
//
(
1
==
nthread
)
{
#
pragma
unroll
for
(
int
i
=
0
; i < WSZ; i++)
{
int
idx
=
mad24
(
y
-
WSZ2
+
i,
cols,
x
-
WSZ2
)
;
left
=
leftptr
+
idx
;
right
=
rightptr
+
(
idx
-
disp_idx
)
;
short
costdiff
=
0
;
for
(
int
j
=
0
; j < WSZ; j++)
{
costdiff
+=
abs
(
left[j]
-
right[j]
)
;
}
tempcost
+=
costdiff
;
costbuf[i]
=
costdiff
;
}
costbuf[head]
=
costdiff
;
head++
;
}
}
if
(
nthread==
1
)
if
(
nthread
==
1
)
{
cost[0]
=
tempcost
;
atomic_min
(
best_cost
+nthread
,
tempcost
)
;
atomic_min
(
best_cost
+
1
,
tempcost
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
best_cost[1]
==
tempcost
)
atomic_min
(
best_disp
+
1
,
ndisp
-
d
-
1
)
;
if
(
best_cost[1]
==
tempcost
)
atomic_max
(
best_disp
+
1
,
disp_idx
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
dispIdx
=
mad24
(
gy,
disp_step,
disp_offset
+
gx*
(
int
)
sizeof
(
shor
t
))
;
int
dispIdx
=
mad24
(
gy,
disp_step,
mad24
((
int
)
sizeof
(
short
)
,
gx,
disp_offse
t
))
;
disp
=
(
__global
short
*
)(
dispptr
+
dispIdx
)
;
calcDisp
(
cost,
disp,
uniquenessRatio,
mindisp,
ndisp,
2*sizeY,
best_disp
+
1
,
best_cost+1,
d,
x,
y,
cols,
rows,
wsz2
)
;
calcDisp
(
cost,
disp,
uniquenessRatio,
best_disp
+
1
,
best_cost
+
1
,
disp_idx,
x,
y,
cols,
rows
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
lx
=
1
-
nthread
;
ly
=
nthread
;
for
(
int
i
=
0
; i < sizeY*sizeX/
2; i++)
for
(
int
i
=
0
; i < BLOCK_SIZE_Y * BLOCK_SIZE_X /
2; i++)
{
x
=
(
lx
<
size
X
)
?
gx
+
shiftX
+
lx
:
cols
;
y
=
(
ly
<
size
Y
)
?
gy
+
shiftY
+
ly
:
rows
;
x
=
(
lx
<
BLOCK_SIZE_
X
)
?
gx
+
shiftX
+
lx
:
cols
;
y
=
(
ly
<
BLOCK_SIZE_
Y
)
?
gy
+
shiftY
+
ly
:
rows
;
best_cost[nthread]
=
MAX_VAL
;
best_disp[nthread]
=
MAX_VAL
;
best_disp[nthread]
=
-1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
costIdx
=
calcLocalIdx
(
lx,
ly,
d,
sizeY
)
;
costIdx
=
mad24
(
2
*
BLOCK_SIZE_Y,
disp_idx,
(
BLOCK_SIZE_Y
-
1
-
ly
+
lx
))
;
if
(
0
>
costIdx
)
costIdx
=
BLOCK_SIZE_Y
-
1
;
cost
=
costFunc
+
costIdx
;
if
(
x
<
cols-wsz2-mindisp
&&
y
<
rows-wsz2
)
if
(
x
<
cols
-
WSZ2
-
MIN_DISP
&&
y
<
rows
-
WSZ2
)
{
tempcost
=
(
ly*
(
1-nthread
)
+
lx*nthread
==
0
)
?
calcCostBorder
(
leftptr,
rightptr,
x,
y,
nthread,
wsz2,
costbuf,
&head,
cols,
d,
cost[2*nthread-1],
winsize
)
:
calcCostInside
(
leftptr,
rightptr,
x,
y,
wsz2,
cols,
d,
cost[0],
cost[1],
cost[-1],
winsize
)
;
tempcost
=
(
ly
*
(
1
-
nthread
)
+
lx
*
nthread
==
0
)
?
calcCostBorder
(
leftptr,
rightptr,
x,
y,
nthread,
costbuf,
&head,
cols,
disp_idx,
cost[2*nthread-1]
)
:
calcCostInside
(
leftptr,
rightptr,
x,
y,
cols,
disp_idx,
cost[0],
cost[1],
cost[-1]
)
;
}
cost[0]
=
tempcost
;
atomic_min
(
best_cost
+
nthread,
tempcost
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
best_cost[nthread]
==
tempcost
)
atomic_m
in
(
best_disp
+
nthread,
ndisp
-
d
-
1
)
;
if
(
best_cost[nthread]
==
tempcost
)
atomic_m
ax
(
best_disp
+
nthread,
disp_idx
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
dispIdx
=
mad24
(
gy+ly,
disp_step,
disp_offset
+
(
gx+lx
)
*
(
int
)
sizeof
(
shor
t
))
;
dispIdx
=
mad24
(
gy
+
ly,
disp_step,
mad24
((
int
)
sizeof
(
short
)
,
(
gx
+
lx
)
,
disp_offse
t
))
;
disp
=
(
__global
short
*
)(
dispptr
+
dispIdx
)
;
calcDisp
(
cost,
disp,
uniquenessRatio,
mindisp,
ndisp,
2*sizeY,
best_disp
+
nthread,
best_cost
+
nthread,
d,
x,
y,
cols,
rows,
wsz2
)
;
calcDisp
(
cost,
disp,
uniquenessRatio,
best_disp
+
nthread,
best_cost
+
nthread,
disp_idx,
x,
y,
cols,
rows
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
calcNewCoordinates
(
&lx,
&ly,
nthread
)
;
if
(
lx
+
nthread
-
1
==
ly
)
{
lx
=
(
lx
+
nthread
+
1
)
*
(
1
-
nthread
)
;
ly
=
(
ly
+
1
)
*
nthread
;
}
else
{
lx
+=
nthread
;
ly
=
ly
-
nthread
+
1
;
}
}
}
#
endif
#
endif
//DEFINE_KERNEL_STEREOBM
//////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////
Norm
Prefiler
////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
prefilter_norm
(
__global
unsigned
char
*input,
__global
unsigned
char
*output,
int
rows,
int
cols,
int
prefilterCap,
int
winsize,
int
scale_g,
int
scale_s
)
int
rows,
int
cols,
int
prefilterCap,
int
scale_g,
int
scale_s
)
{
//
prefilterCap
in
range
1..63,
checked
in
StereoBMImpl::compute
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
wsz2
=
winsize/2
;
if
(
x
<
cols
&&
y
<
rows
)
{
...
...
@@ -262,13 +296,13 @@ __kernel void prefilter_norm(__global unsigned char *input, __global unsigned ch
input[y
*
cols
+
max
(
x-1,0
)
]
*
1
+
input[
y
*
cols
+
x]
*
4
+
input[y
*
cols
+
min
(
x+1,
cols-1
)
]
*
1
+
input[min
(
y+1,
rows-1
)
*
cols
+
x]
*
1
;
int
cov2
=
0
;
for
(
int
i
=
-
wsz2
; i < wsz
2+1; i++)
for
(
int
j
=
-
wsz2
; j < wsz
2+1; j++)
for
(
int
i
=
-
WSZ2
; i < WSZ
2+1; i++)
for
(
int
j
=
-
WSZ2
; j < WSZ
2+1; j++)
cov2
+=
input[clamp
(
y+i,
0
,
rows-1
)
*
cols
+
clamp
(
x+j,
0
,
cols-1
)
]
;
int
res
=
(
cov1*scale_g
-
cov2*scale_s
)
>>10
;
res
=
min
(
clamp
(
res,
-prefilterCap,
prefilterCap
)
+
prefilterCap,
255
)
;
output[y
*
cols
+
x]
=
res
&
0xFF
;
res
=
clamp
(
res,
-prefilterCap,
prefilterCap
)
+
prefilterCap
;
output[y
*
cols
+
x]
=
res
;
}
}
...
...
@@ -280,20 +314,21 @@ __kernel void prefilter_norm(__global unsigned char *input, __global unsigned ch
__kernel
void
prefilter_xsobel
(
__global
unsigned
char
*input,
__global
unsigned
char
*output,
int
rows,
int
cols,
int
prefilterCap
)
{
//
prefilterCap
in
range
1..63,
checked
in
StereoBMImpl::compute
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
output[y
*
cols
+
x]
=
min
(
prefilterCap,
255
)
&
0xFF
;
}
if
(
x
<
cols
&&
y
<
rows
&&
x
>
0
&&
!
((
y
==
rows-1
)
&
(
rows%2==1
)
)
)
{
int
cov
=
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
+
input[
(
y
)
*
cols
+
(
x-1
)
]
*
(
-2
)
+
input[
(
y
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
2
)
+
input[
((
y<rows-1
)
?
(
y+1
)
:
(
y-1
))
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y<rows-1
)
?
(
y+1
)
:
(
y-1
))
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
;
if
(
0
<
x
&&
!
((
y
==
rows-1
)
&
(
rows%2==1
)
)
)
{
int
cov
=
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
+
input[
(
y
)
*
cols
+
(
x-1
)
]
*
(
-2
)
+
input[
(
y
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
2
)
+
input[
((
y<rows-1
)
?
(
y+1
)
:
(
y-1
))
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y<rows-1
)
?
(
y+1
)
:
(
y-1
))
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
;
cov
=
min
(
clamp
(
cov,
-prefilterCap,
prefilterCap
)
+
prefilterCap,
255
)
;
output[y
*
cols
+
x]
=
cov
&
0xFF
;
cov
=
clamp
(
cov,
-prefilterCap,
prefilterCap
)
+
prefilterCap
;
output[y
*
cols
+
x]
=
cov
;
}
else
output[y
*
cols
+
x]
=
prefilterCap
;
}
}
}
\ No newline at end of file
modules/calib3d/src/stereobm.cpp
View file @
21ad8e92
...
...
@@ -88,7 +88,7 @@ struct StereoBMParams
static
bool
ocl_prefilter_norm
(
InputArray
_input
,
OutputArray
_output
,
int
winsize
,
int
prefilterCap
)
{
ocl
::
Kernel
k
(
"prefilter_norm"
,
ocl
::
calib3d
::
stereobm_oclsrc
);
ocl
::
Kernel
k
(
"prefilter_norm"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D WSZ=%d"
,
winsize
)
);
if
(
k
.
empty
())
return
false
;
...
...
@@ -102,7 +102,7 @@ static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsi
size_t
globalThreads
[
3
]
=
{
input
.
cols
,
input
.
rows
,
1
};
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
input
),
ocl
::
KernelArg
::
PtrWriteOnly
(
output
),
input
.
rows
,
input
.
cols
,
prefilterCap
,
winsize
,
scale_g
,
scale_s
);
prefilterCap
,
scale_g
,
scale_s
);
return
k
.
run
(
2
,
globalThreads
,
NULL
,
false
);
}
...
...
@@ -743,9 +743,16 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
int
wsz
=
state
->
SADWindowSize
;
int
wsz2
=
wsz
/
2
;
int
sizeX
=
std
::
max
(
11
,
27
-
ocl
::
Device
::
getDefault
().
maxComputeUnits
()
),
sizeY
=
sizeX
-
1
,
N
=
ndisp
*
2
;
ocl
::
Device
devDef
=
ocl
::
Device
::
getDefault
();
int
sizeX
=
devDef
.
isIntel
()
?
32
:
std
::
max
(
11
,
27
-
devDef
.
maxComputeUnits
()),
sizeY
=
sizeX
-
1
,
N
=
ndisp
*
2
;
ocl
::
Kernel
k
(
"stereoBM"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D wsz=%d"
,
(
2
*
sizeY
)
*
ndisp
,
wsz
)
);
cv
::
String
opt
=
cv
::
format
(
"-D DEFINE_KERNEL_STEREOBM -D MIN_DISP=%d -D NUM_DISP=%d"
" -D BLOCK_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D WSZ=%d"
,
mindisp
,
ndisp
,
sizeX
,
sizeY
,
wsz
);
ocl
::
Kernel
k
(
"stereoBM"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
opt
);
if
(
k
.
empty
())
return
false
;
...
...
@@ -753,15 +760,14 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
int
cols
=
left
.
cols
,
rows
=
left
.
rows
;
_disp
.
create
(
_left
.
size
(),
CV_16S
);
_disp
.
setTo
((
mindisp
-
1
)
<<
4
);
_disp
.
setTo
((
mindisp
-
1
)
<<
4
);
Rect
roi
=
Rect
(
Point
(
wsz2
+
mindisp
+
ndisp
-
1
,
wsz2
),
Point
(
cols
-
wsz2
-
mindisp
,
rows
-
wsz2
)
);
UMat
disp
=
(
_disp
.
getUMat
())(
roi
);
int
globalX
=
disp
.
cols
/
sizeX
,
globalY
=
disp
.
rows
/
sizeY
;
globalX
+=
(
disp
.
cols
%
sizeX
)
>
0
?
1
:
0
;
globalY
+=
(
disp
.
rows
%
sizeY
)
>
0
?
1
:
0
;
size_t
globalThreads
[
3
]
=
{
globalX
,
globalY
,
N
};
size_t
localThreads
[
3
]
=
{
1
,
1
,
N
};
int
globalX
=
(
disp
.
cols
+
sizeX
-
1
)
/
sizeX
,
globalY
=
(
disp
.
rows
+
sizeY
-
1
)
/
sizeY
;
size_t
globalThreads
[
3
]
=
{
N
,
globalX
,
globalY
};
size_t
localThreads
[
3
]
=
{
N
,
1
,
1
};
int
idx
=
0
;
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
left
));
...
...
@@ -769,15 +775,8 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
WriteOnlyNoSize
(
disp
));
idx
=
k
.
set
(
idx
,
rows
);
idx
=
k
.
set
(
idx
,
cols
);
idx
=
k
.
set
(
idx
,
mindisp
);
idx
=
k
.
set
(
idx
,
ndisp
);
idx
=
k
.
set
(
idx
,
state
->
preFilterCap
);
idx
=
k
.
set
(
idx
,
state
->
textureThreshold
);
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
idx
=
k
.
set
(
idx
,
sizeX
);
idx
=
k
.
set
(
idx
,
sizeY
);
idx
=
k
.
set
(
idx
,
wsz
);
return
k
.
run
(
3
,
globalThreads
,
localThreads
,
false
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment