Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0904f10a
Commit
0904f10a
authored
Feb 18, 2014
by
Konstantin Matskevich
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimizations
parent
bfc843a5
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
146 additions
and
103 deletions
+146
-103
perf_stereobm.cpp
modules/calib3d/perf/opencl/perf_stereobm.cpp
+3
-3
stereobm.cl
modules/calib3d/src/opencl/stereobm.cl
+112
-86
stereobm.cpp
modules/calib3d/src/stereobm.cpp
+13
-12
test_stereobm.cpp
modules/calib3d/test/opencl/test_stereobm.cpp
+18
-2
No files found.
modules/calib3d/perf/opencl/perf_stereobm.cpp
View file @
0904f10a
...
@@ -51,7 +51,7 @@ namespace ocl {
...
@@ -51,7 +51,7 @@ namespace ocl {
typedef
std
::
tr1
::
tuple
<
int
,
int
>
StereoBMFixture_t
;
typedef
std
::
tr1
::
tuple
<
int
,
int
>
StereoBMFixture_t
;
typedef
TestBaseWithParam
<
StereoBMFixture_t
>
StereoBMFixture
;
typedef
TestBaseWithParam
<
StereoBMFixture_t
>
StereoBMFixture
;
OCL_PERF_TEST_P
(
StereoBMFixture
,
StereoBM
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
32
,
64
,
128
),
OCL_PERF_ENUM
(
11
,
21
)
)
)
OCL_PERF_TEST_P
(
StereoBMFixture
,
StereoBM
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
32
,
64
),
OCL_PERF_ENUM
(
11
,
21
)
)
)
{
{
const
int
n_disp
=
get
<
0
>
(
GetParam
()),
winSize
=
get
<
1
>
(
GetParam
());
const
int
n_disp
=
get
<
0
>
(
GetParam
()),
winSize
=
get
<
1
>
(
GetParam
());
UMat
left
,
right
,
disp
;
UMat
left
,
right
,
disp
;
...
@@ -64,11 +64,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
...
@@ -64,11 +64,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
declare
.
in
(
left
,
right
);
declare
.
in
(
left
,
right
);
Ptr
<
StereoBM
>
bm
=
createStereoBM
(
n_disp
,
winSize
);
Ptr
<
StereoBM
>
bm
=
createStereoBM
(
n_disp
,
winSize
);
bm
->
setPreFilterType
(
bm
->
PREFILTER_
NORMALIZED_RESPONSE
);
bm
->
setPreFilterType
(
bm
->
PREFILTER_
XSOBEL
);
OCL_TEST_CYCLE
()
bm
->
compute
(
left
,
right
,
disp
);
OCL_TEST_CYCLE
()
bm
->
compute
(
left
,
right
,
disp
);
SANITY_CHECK
(
disp
,
0.05
,
ERROR_RELATIVE
);
SANITY_CHECK
(
disp
,
1e-3
,
ERROR_RELATIVE
);
}
}
}
//ocl
}
//ocl
...
...
modules/calib3d/src/opencl/stereobm.cl
View file @
0904f10a
...
@@ -47,90 +47,119 @@
...
@@ -47,90 +47,119 @@
#
ifdef
csize
#
ifdef
csize
__kernel
void
stereoBM_opt
(
__global
const
uchar
*
left,
__global
const
uchar
*
right,
__global
uchar
*
dispptr,
#
define
MAX_VAL
32767
__kernel
void
stereoBM_opt
(
__global
const
uchar
*
leftptr,
__global
const
uchar
*
rightptr,
__global
uchar
*
dispptr,
int
disp_step,
int
disp_offset,
int
rows,
int
cols,
int
mindisp,
int
ndisp,
int
disp_step,
int
disp_offset,
int
rows,
int
cols,
int
mindisp,
int
ndisp,
int
preFilterCap,
int
winsize
,
int
textureTreshold,
int
uniquenessRatio
)
int
preFilterCap,
int
nthreads
,
int
textureTreshold,
int
uniquenessRatio
)
{
{
int
total_x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
gx
=
get_group_id
(
0
)
,
x
=
gx*ndisp
;
int
total_y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
int
z
=
get_local_id
(
2
)
;
int
d
=
get_local_id
(
0
)
+
mindisp
;
int
d
=
get_local_id
(
1
)
;
int
wsz2
=
winsize/2
;
int
gy
=
get_group_id
(
1
)
,
y
=
gy*ndisp
+
z*ndisp/nthreads
;
int
wsz2
=
wsz/2
;
short
FILTERED
=
(
mindisp
-
1
)
<<4
;
short
FILTERED
=
(
mindisp
-
1
)
<<4
;
__local
int
cost[csize]
;
__local
short
costFunc[csize]
;
int
textsum[tsize]
;
short
textsum[tsize]
;
if
(
total_x<cols
&&
y<rows
&&
d<ndisp
)
__local
short
*
cost
=
&costFunc[0]
+
d
+
ndisp*ndisp/nthreads*z
;
__global
uchar
*
left,
*
right
;
int
dispIdx
=
mad24
(
total_y,
disp_step,
disp_offset
+
x*
(
int
)
sizeof
(
short
)
)
;
__global
short
*
disp
=
(
__global
short*
)(
dispptr
+
dispIdx
)
;
if
(
x
<
cols
&&
total_y
<
rows
)
{
{
int
dispIdx
=
mad24
(
y,
disp_step,
disp_offset
+
total_x*
(
int
)
sizeof
(
short
)
)
;
__global
short
*
disp
=
(
__global
short*
)(
dispptr
+
dispIdx
)
;
disp[0]
=
FILTERED
;
disp[0]
=
FILTERED
;
}
short
costbuf[wsz]
;
short
textbuf[wsz]
;
int
head
=
0
;
if
(
(
total_x
>
ndisp-1
)
&&
(
y
>
wsz2-1
)
&&
(
total_x
<
cols
+
ndisp
-
cols%ndisp
)
&&
(
y
<
rows
-
wsz2
))
if
(
(
x
>
ndisp+mindisp+wsz2-2
)
&&
(
x
<
cols
-
wsz2
-
mindisp
)
)
{
cost
+=
(
y
<
wsz2
)
?
ndisp*wsz2
:
0
;
y
=
(
y<wsz2
)
?
wsz2
:
y
;
cost[0]
=
0
;
textsum[y-
(
gy*ndisp
)
]
=
0
;
for
(
int
i
=
-wsz2
; (i < wsz2+1) && (y < rows-wsz2); i++)
{
{
for
(
; (x <= ndisp+mindisp+wsz2-2); x++)
left
=
leftptr
+
mad24
(
y+i,
cols,
x-wsz2
)
;
{
right
=
rightptr
+
mad24
(
y+i,
cols,
x-wsz2-d-mindisp
)
;
cost[
(
d-mindisp
)
+ndisp*
(
x%
(
gx*ndisp
))
]
=
INT_MAX
;
textsum[x%
(
gx*ndisp
)
]
=
INT_MAX
;
}
cost[
(
d-mindisp
)
+ndisp*
(
x%
(
gx*ndisp
))
]
=
0
;
textsum[x%
(
gx*ndisp
)
]
=
0
;
for
(
int
i
=
-wsz2
; i < wsz2+1; i++)
for
(
int
j
=
-wsz2
; j < wsz2+1; j++)
{
cost[
(
d-mindisp
)
+ndisp*
(
x%
(
gx*ndisp
))
]
+=
abs
(
left[min
(
y+i,
rows-1
)
*
cols
+
min
(
x+j,
cols-1
)
]
-
right[min
(
y+i,
rows-1
)
*
cols
+
min
(
x+j-d,
cols-1
)
]
)
;
textsum[x%
(
gx*ndisp
)
]
+=
abs
(
left[min
(
y+i,
rows-1
)
*
cols
+
min
(
x+j,
cols-1
)
]
-
preFilterCap
)
;
}
x++
;
for
(
; (x < gx*ndisp + ndisp) && (x < cols-wsz2-mindisp); x++)
{
cost[
(
d-mindisp
)
+ndisp*
(
x%
(
gx*ndisp
))
]
=
cost[
(
d-mindisp
)
+ndisp*
((
x-1
)
%
(
gx*ndisp
))
]
;
textsum[x%
(
gx*ndisp
)
]
=
textsum[
(
x-1
)
%
(
gx*ndisp
)
]
;
for
(
int
i
=
-wsz2
; i < wsz2+1; i++)
{
cost[
(
d-mindisp
)
+ndisp*
(
x%
(
gx*ndisp
))
]
+=
-abs
(
left[min
(
y+i,
rows-1
)
*
cols
+
min
(
x-wsz2-1,
cols-1
)
]
-
right[min
(
y+i,
rows-1
)
*
cols
+
min
(
x-wsz2-1-d,
cols-1
)
]
)
+
abs
(
left[min
(
y+i,
rows-1
)
*
cols
+
min
(
x+wsz2,
cols-1
)
]
-
right[min
(
y+i,
rows-1
)
*
cols
+
min
(
x+wsz2-d,
cols-1
)
]
)
;
textsum[x%
(
gx*ndisp
)
]
+=
-abs
(
left[min
(
y+i,
rows-1
)
*
cols
+
min
(
x-wsz2-1,
cols-1
)
]
-
preFilterCap
)
+
abs
(
left[min
(
y+i,
rows-1
)
*
cols
+
min
(
x+wsz2,
cols-1
)
]
-
preFilterCap
)
;
}
}
for
(
; (x > cols - (cols-1)%ndisp - 1) && (x < cols + ndisp - (cols-1)%ndisp - 1); x++)
int
costdiff
=
0
,
textdiff
=
0
;
#
pragma
unroll
for
(
int
j
=
0
; j < wsz; j++)
{
{
cost[
(
d-mindisp
)
+ndisp*
(
x%
(
gx*ndisp
))
]
=
INT_MAX
;
costdiff
+=
abs
(
left[0]
-
right[0]
)
;
textsum[x%
(
gx*ndisp
)
]
=
INT_MAX
;
textdiff
+=
abs
(
left[0]
-
preFilterCap
)
;
left++
; right++;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
cost[0]
+=
costdiff
;
textsum[y-
(
gy*ndisp
)
]
+=
textdiff
;
costbuf[head]
=
costdiff
;
textbuf[head]
=
textdiff
;
head++
;
}
y++
;
for
(
; (y < gy*ndisp + ndisp/nthreads + z*ndisp/nthreads) && (y < rows-wsz2); y++)
{
head
=
head%wsz
;
cost
+=
ndisp
;
cost[0]
=
cost[-ndisp]
;
textsum[y-
(
gy*ndisp
)
]
=
textsum[
(
y-1
)
-
(
gy*ndisp
)
]
;
left
=
leftptr
+
mad24
(
y-wsz2-1,
cols,
x
-
wsz2
)
;
right
=
rightptr
+
mad24
(
y-wsz2-1,
cols,
x
-
wsz2
-
d
-
mindisp
)
;
int
best_disp
=
FILTERED,
best_cost
=
INT_MAX-1
;
int
costdiff
=
0
,
textdiff
=
0
;
for
(
int
i
=
0
; (i < ndisp); i++)
#
pragma
unroll
for
(
int
i
=
0
; i < wsz; i++)
{
{
best_cost
=
(
cost[i
+
ndisp*
(
d-mindisp
)
]
<
best_cost
)
?
cost[i
+
ndisp*
(
d-mindisp
)
]
:
best_cost
;
costdiff
+=
best_disp
=
(
best_cost
==
cost[i
+
ndisp*
(
d-mindisp
)
]
)
?
i+mindisp
:
best_disp
;
abs
(
left[wsz*cols]
-
right[wsz*cols]
)
;
textdiff
+=
abs
(
left[wsz*cols]
-
preFilterCap
)
;
left++
; right++;
}
}
cost[0]
+=
costdiff
-
costbuf[head]
;
textsum[y-
(
gy*ndisp
)
]
+=
textdiff
-
textbuf[head]
;
costbuf[head]
=
costdiff
;
textbuf[head]
=
textdiff
;
head++
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio/100
)
;
cost
=
&costFunc[0]
+
d*ndisp
;
for
(
int
i
=
0
; (i < ndisp) && (uniquenessRatio > 0); i++)
short
best_disp
=
FILTERED,
best_cost
=
MAX_VAL-1
;
{
#
pragma
unroll
best_disp
=
(
(
cost[i
+
ndisp*
(
d-mindisp
)
]
<=
thresh
)
&&
(
i
<
best_disp
-
mindisp
-
1
|
| i > best_disp - mindisp + 1) ) ?
for
(
int
i
=
0
; i < tsize; i++)
FILTERED : best_disp;
{
}
short
c
=
cost[0]
;
best_cost
=
(
c
<
best_cost
)
?
c
:
best_cost
;
best_disp
=
(
best_cost
==
c
)
?
ndisp
-
i
-
1
:
best_disp
;
cost++
;
}
disp[0] = textsum[d-mindisp] < textureTreshold ? (FILTERED) : (best_disp == FILTERED) ? (short)(best_disp) : (short)(best_disp);
cost
=
&costFunc[0]
+
d*ndisp
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio/100
)
;
#
pragma
unroll
for
(
int
i
=
0
; (i < tsize) && (uniquenessRatio > 0); i++)
{
best_disp
=
(
(
cost[0]
<=
thresh
)
&&
(
i
<
(
ndisp
-
best_disp
-
2
)
|
| i > (ndisp - best_disp) ) ) ?
FILTERED : best_disp;
cost++;
}
if( best_disp != FILTERED )
best_disp = (total_y >= rows-wsz2) || (total_y < wsz2) || (textsum[d] < textureTreshold) ? FILTERED : best_disp;
if( best_disp != FILTERED )
{
cost = &costFunc[0] + (ndisp - best_disp - 1) + ndisp*d;
int y3 = ((ndisp - best_disp - 1) > 0) ? cost[-1] : cost[1],
y2 = cost[0],
y1 = ((ndisp - best_disp - 1) < ndisp-1) ? cost[1] : cost[-1];
d = y3+y1-2*y2 + abs(y3-y1);
if( x < cols && total_y < rows)
{
{
int y1 = (best_disp > mindisp) ? cost[(best_disp-mindisp-1) + ndisp*(d-mindisp)] :
disp[0] = (short)(((ndisp - best_disp - 1 + mindisp)*256 + (d != 0 ? (y3-y1)*256/d : 0) + 15) >> 4);
cost[(best_disp-mindisp+1) + ndisp*(d-mindisp)],
y2 = cost[(best_disp-mindisp) + ndisp*(d-mindisp)],
y3 = (best_disp < mindisp+ndisp-1) ? cost[(best_disp-mindisp+1) + ndisp*(d-mindisp)] :
cost[(best_disp-mindisp-1) + ndisp*(d-mindisp)];
float a = (y3 - ((best_disp+1)*(y2-y1) + best_disp*y1 - (best_disp-1)*y2)/(best_disp - (best_disp-1)) )/
((best_disp+1)*((best_disp+1) - (best_disp-1) - best_disp) + (best_disp-1)*best_disp);
float b = (y2 - y1)/(best_disp - (best_disp-1)) - a*((best_disp-1)+best_disp);
disp[0] = (y1 == y2 || y3 == y2) ? (short)(best_disp*16) :(short)(-b/(2*a)*16);
}
}
}
}
}
}
...
@@ -148,7 +177,7 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
...
@@ -148,7 +177,7 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
int y = get_global_id(1);
int y = get_global_id(1);
int wsz2 = winsize/2;
int wsz2 = winsize/2;
short FILTERED = (mindisp - 1)<<4;
short FILTERED = (mindisp - 1)<<4;
if(x < cols && y < rows )
if(x < cols && y < rows )
{
{
int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) );
int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) );
...
@@ -161,21 +190,20 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
...
@@ -161,21 +190,20 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
for(int d = mindisp; d < ndisp+mindisp; d++)
for(int d = mindisp; d < ndisp+mindisp; d++)
{
{
cost[
d-mindisp
] = 0;
cost[
(ndisp-1) - (d - mindisp)
] = 0;
for(int i = -wsz2; i < wsz2+1; i++)
for(int i = -wsz2; i < wsz2+1; i++)
for(int j = -wsz2; j < wsz2+1; j++)
for(int j = -wsz2; j < wsz2+1; j++)
{
{
textsum += abs( left[min( y+i, rows-1 ) * cols + min( x+j, cols-1 )] - preFilterCap );
textsum += (d == mindisp) ? abs( left[ (y+i) * cols + x + j] - preFilterCap ) : 0;
cost[d-mindisp] += abs( left[min( y+i, rows-1 ) * cols + min( x+j, cols-1 )]
cost[(ndisp-1) - (d - mindisp)] += abs(left[(y+i) * cols + x+j] - right[(y+i) * cols + x+j-d] );
- right[min( y+i, rows-1 ) * cols + min( x+j-d, cols-1 )] );
}
}
}
}
int best_disp =
mindisp, best_cost = cost[0]
;
int best_disp =
-1, best_cost = INT_MAX
;
for(int d =
mindisp; d < ndisp+mindisp; d++
)
for(int d =
ndisp + mindisp - 1; d > mindisp-1; d--
)
{
{
best_cost = (cost[d-mindisp] < best_cost) ? cost[d-mindisp] : best_cost;
best_cost = (cost[d-mindisp] < best_cost) ? cost[d-mindisp] : best_cost;
best_disp = (best_cost == cost[d-mindisp]) ?
d
: best_disp;
best_disp = (best_cost == cost[d-mindisp]) ?
(d)
: best_disp;
}
}
int thresh = best_cost + (best_cost * uniquenessRatio/100);
int thresh = best_cost + (best_cost * uniquenessRatio/100);
...
@@ -191,10 +219,8 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
...
@@ -191,10 +219,8 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
int
y1
=
(
best_disp
>
mindisp
)
?
cost[best_disp-mindisp-1]
:
cost[best_disp-mindisp+1],
int
y1
=
(
best_disp
>
mindisp
)
?
cost[best_disp-mindisp-1]
:
cost[best_disp-mindisp+1],
y2
=
cost[best_disp-mindisp],
y2
=
cost[best_disp-mindisp],
y3
=
(
best_disp
<
mindisp+ndisp-1
)
?
cost[best_disp-mindisp+1]
:
cost[best_disp-mindisp-1]
;
y3
=
(
best_disp
<
mindisp+ndisp-1
)
?
cost[best_disp-mindisp+1]
:
cost[best_disp-mindisp-1]
;
float a = (y3 - ((best_disp+1)*(y2-y1) + best_disp*y1 - (best_disp-1)*y2)/(best_disp - (best_disp-1)) )/
int
_d
=
y3+y1-2*y2
+
abs
(
y3-y1
)
;
((best_disp+1)*((best_disp+1) - (best_disp-1) - best_disp) + (best_disp-1)*best_disp);
disp[0]
=
(
short
)(((
ndisp
-
(
best_disp-mindisp
)
-
1
+
mindisp
)
*256
+
(
_d
!=
0
?
(
y3-y1
)
*256/_d
:
0
)
+
15
)
>>
4
)
;
float b = (y2 - y1)/(best_disp - (best_disp-1)) - a*((best_disp-1)+best_disp);
disp[0] = (y1 == y2 |
|
y2
==
y3
)
?
(
short
)(
best_disp*16
)
:
(
short
)(
-b/
(
2*a
)
*16
)
;
}
}
}
}
}
}
...
@@ -221,10 +247,10 @@ __kernel void prefilter_norm(__global unsigned char *input, __global unsigned ch
...
@@ -221,10 +247,10 @@ __kernel void prefilter_norm(__global unsigned char *input, __global unsigned ch
int
cov2
=
0
;
int
cov2
=
0
;
for
(
int
i
=
-wsz2
; i < wsz2+1; i++)
for
(
int
i
=
-wsz2
; i < wsz2+1; i++)
for
(
int
j
=
-wsz2
; j < wsz2+1; j++)
for
(
int
j
=
-wsz2
; j < wsz2+1; j++)
cov2
+=
input[
min
(
max
(
(
y+i
)
,
0
)
,
rows-1
)
*
cols
+
min
(
max
(
(
x+j
)
,
0
)
,
cols-1
)
]
;
cov2
+=
input[
clamp
(
y+i,
0
,
rows-1
)
*
cols
+
clamp
(
x+j,
0
,
cols-1
)
]
;
int
res
=
(
cov1*scale_g
-
cov2*scale_s
)
>>10
;
int
res
=
(
cov1*scale_g
-
cov2*scale_s
)
>>10
;
res
=
min
(
min
(
max
(
-prefilterCap,
res
)
,
prefilterCap
)
+
prefilterCap,
255
)
;
res
=
min
(
clamp
(
res,
-prefilterCap
,
prefilterCap
)
+
prefilterCap,
255
)
;
output[y
*
cols
+
x]
=
res
&
0xFF
;
output[y
*
cols
+
x]
=
res
&
0xFF
;
}
}
}
}
...
@@ -240,13 +266,13 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
...
@@ -240,13 +266,13 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
output[y
*
cols
+
x]
=
min
(
prefilterCap,
255
)
&
0xFF
;
output[y
*
cols
+
x]
=
min
(
prefilterCap,
255
)
&
0xFF
;
if
(
x
<
cols
&&
y
<
rows
-1
&&
x
>
0
)
if
(
x
<
cols
&&
y
<
rows
&&
x
>
0
&&
!
((
y
==
rows-1
)
&
(
rows%2==1
)
)
)
{
{
int
cov
=
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
+
int
cov
=
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
+
input[
(
y
)
*
cols
+
(
x-1
)
]
*
(
-2
)
+
input[
(
y
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
2
)
+
input[
(
y
)
*
cols
+
(
x-1
)
]
*
(
-2
)
+
input[
(
y
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
2
)
+
input[
(
y+1
)
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
(
y+1
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
;
input[
((
y<rows-1
)
?
(
y+1
)
:
(
y-1
))
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y<rows-1
)
?
(
y+1
)
:
(
y-1
))
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
;
cov
=
min
(
min
(
max
(
-prefilterCap,
cov
)
,
prefilterCap
)
+
prefilterCap,
255
)
;
cov
=
min
(
clamp
(
cov,
-prefilterCap
,
prefilterCap
)
+
prefilterCap,
255
)
;
output[y
*
cols
+
x]
=
cov
&
0xFF
;
output[y
*
cols
+
x]
=
cov
&
0xFF
;
}
}
}
}
modules/calib3d/src/stereobm.cpp
View file @
0904f10a
...
@@ -180,13 +180,11 @@ static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int pre
...
@@ -180,13 +180,11 @@ static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int pre
_output
.
create
(
input
.
size
(),
input
.
type
());
_output
.
create
(
input
.
size
(),
input
.
type
());
output
=
_output
.
getUMat
();
output
=
_output
.
getUMat
();
size_t
blockSize
=
1
;
size_t
globalThreads
[
3
]
=
{
input
.
cols
,
input
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
input
.
cols
,
input
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
blockSize
,
blockSize
,
1
};
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
input
),
ocl
::
KernelArg
::
PtrWriteOnly
(
output
),
input
.
rows
,
input
.
cols
,
prefilterCap
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
input
),
ocl
::
KernelArg
::
PtrWriteOnly
(
output
),
input
.
rows
,
input
.
cols
,
prefilterCap
);
return
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
return
k
.
run
(
2
,
globalThreads
,
NULL
,
false
);
}
}
static
void
static
void
...
@@ -655,6 +653,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
...
@@ -655,6 +653,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
mind
=
d
;
mind
=
d
;
}
}
}
}
tsum
+=
htext
[
y
+
wsz2
]
-
htext
[
y
-
wsz2
-
1
];
tsum
+=
htext
[
y
+
wsz2
]
-
htext
[
y
-
wsz2
-
1
];
if
(
tsum
<
textureThreshold
)
if
(
tsum
<
textureThreshold
)
{
{
...
@@ -738,9 +737,9 @@ struct PrefilterInvoker : public ParallelLoopBody
...
@@ -738,9 +737,9 @@ struct PrefilterInvoker : public ParallelLoopBody
static
bool
ocl_stereobm_opt
(
InputArray
_left
,
InputArray
_right
,
static
bool
ocl_stereobm_opt
(
InputArray
_left
,
InputArray
_right
,
OutputArray
_disp
,
StereoBMParams
*
state
)
OutputArray
_disp
,
StereoBMParams
*
state
)
{
{
//printf("opt\n");
int
ndisp
=
state
->
numDisparities
;
int
ndisp
=
state
->
numDisparities
;
ocl
::
Kernel
k
(
"stereoBM_opt"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D tsize=%d
"
,
ndisp
*
ndisp
,
ndisp
)
);
ocl
::
Kernel
k
(
"stereoBM_opt"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D tsize=%d
-D wsz=%d"
,
ndisp
*
ndisp
,
ndisp
,
state
->
SADWindowSize
)
);
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
@@ -748,8 +747,9 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
...
@@ -748,8 +747,9 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
_disp
.
create
(
_left
.
size
(),
CV_16S
);
_disp
.
create
(
_left
.
size
(),
CV_16S
);
UMat
disp
=
_disp
.
getUMat
();
UMat
disp
=
_disp
.
getUMat
();
size_t
globalThreads
[
3
]
=
{
left
.
cols
,
left
.
rows
,
1
};
int
nthreads
=
(
ndisp
<=
64
)
?
2
:
4
;
size_t
localThreads
[
3
]
=
{
ndisp
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
left
.
cols
,
(
left
.
rows
-
left
.
rows
%
ndisp
+
ndisp
),
nthreads
};
size_t
localThreads
[
3
]
=
{
1
,
ndisp
,
nthreads
};
int
idx
=
0
;
int
idx
=
0
;
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
left
));
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
left
));
...
@@ -758,11 +758,11 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
...
@@ -758,11 +758,11 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
idx
=
k
.
set
(
idx
,
state
->
minDisparity
);
idx
=
k
.
set
(
idx
,
state
->
minDisparity
);
idx
=
k
.
set
(
idx
,
ndisp
);
idx
=
k
.
set
(
idx
,
ndisp
);
idx
=
k
.
set
(
idx
,
state
->
preFilterCap
);
idx
=
k
.
set
(
idx
,
state
->
preFilterCap
);
idx
=
k
.
set
(
idx
,
state
->
SADWindowSize
);
idx
=
k
.
set
(
idx
,
nthreads
);
idx
=
k
.
set
(
idx
,
state
->
textureThreshold
);
idx
=
k
.
set
(
idx
,
state
->
textureThreshold
);
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
return
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
return
k
.
run
(
3
,
globalThreads
,
localThreads
,
false
);
}
}
static
bool
ocl_stereobm_bf
(
InputArray
_left
,
InputArray
_right
,
static
bool
ocl_stereobm_bf
(
InputArray
_left
,
InputArray
_right
,
...
@@ -790,15 +790,16 @@ static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
...
@@ -790,15 +790,16 @@ static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
return
k
.
run
(
2
,
globalThreads
,
NULL
,
false
);
return
k
.
run
(
2
,
globalThreads
,
NULL
,
false
);
return
false
;
}
}
static
bool
ocl_stereo
(
InputArray
_left
,
InputArray
_right
,
static
bool
ocl_stereo
(
InputArray
_left
,
InputArray
_right
,
OutputArray
_disp
,
StereoBMParams
*
state
)
OutputArray
_disp
,
StereoBMParams
*
state
)
{
{
if
(
ocl
::
Device
::
getDefault
().
localMemSize
()
>
state
->
numDisparities
*
state
->
numDisparities
*
sizeof
(
in
t
)
)
if
(
ocl
::
Device
::
getDefault
().
localMemSize
()
>
state
->
numDisparities
*
state
->
numDisparities
*
sizeof
(
shor
t
)
)
return
ocl_stereobm_opt
(
_left
,
_right
,
_disp
,
state
);
return
ocl_stereobm_opt
(
_left
,
_right
,
_disp
,
state
);
else
else
return
ocl_stereobm_bf
(
_left
,
_right
,
_disp
,
state
);
return
false
;
//
ocl_stereobm_bf(_left, _right, _disp, state);
}
}
struct
FindStereoCorrespInvoker
:
public
ParallelLoopBody
struct
FindStereoCorrespInvoker
:
public
ParallelLoopBody
...
@@ -992,7 +993,7 @@ public:
...
@@ -992,7 +993,7 @@ public:
bufSize2
=
width
*
height
*
(
sizeof
(
Point_
<
short
>
)
+
sizeof
(
int
)
+
sizeof
(
uchar
));
bufSize2
=
width
*
height
*
(
sizeof
(
Point_
<
short
>
)
+
sizeof
(
int
)
+
sizeof
(
uchar
));
#if CV_SSE2
#if CV_SSE2
bool
useShorts
=
false
;
//
params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
bool
useShorts
=
params
.
preFilterCap
<=
31
&&
params
.
SADWindowSize
<=
21
&&
checkHardwareSupport
(
CV_CPU_SSE2
);
#else
#else
const
bool
useShorts
=
false
;
const
bool
useShorts
=
false
;
#endif
#endif
...
...
modules/calib3d/test/opencl/test_stereobm.cpp
View file @
0904f10a
...
@@ -81,11 +81,27 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
...
@@ -81,11 +81,27 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
{
{
Ptr
<
StereoBM
>
bm
=
createStereoBM
(
n_disp
,
winSize
);
Ptr
<
StereoBM
>
bm
=
createStereoBM
(
n_disp
,
winSize
);
bm
->
setPreFilterType
(
bm
->
PREFILTER_XSOBEL
);
bm
->
setPreFilterType
(
bm
->
PREFILTER_XSOBEL
);
// bm->setMinDisparity(15);
long
t1
=
clock
();
OCL_OFF
(
bm
->
compute
(
left
,
right
,
disp
));
OCL_OFF
(
bm
->
compute
(
left
,
right
,
disp
));
long
t2
=
clock
();
OCL_ON
(
bm
->
compute
(
uleft
,
uright
,
udisp
));
OCL_ON
(
bm
->
compute
(
uleft
,
uright
,
udisp
));
cv
::
ocl
::
finish
();
Near
(
0.05
);
long
t3
=
clock
();
std
::
cout
<<
(
double
)(
t2
-
t1
)
/
CLOCKS_PER_SEC
<<
" "
<<
(
double
)(
t3
-
t2
)
/
CLOCKS_PER_SEC
<<
std
::
endl
;
/*
Mat t; absdiff(disp, udisp, t);
/* for(int i = 0; i<t.rows; i++)
for(int j = 0; j< t.cols; j++)
// if(t.at<short>(i,j) > 0)
if(i>=5 && i <=16 && j == 36+15)
printf("%d %d cv: %d ocl: %d\n", i, j, disp.at<short>(i,j), udisp.getMat(ACCESS_READ).at<short>(i,j) );*/
/* imshow("diff.png", t*100);
imshow("cv.png", disp*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
waitKey(0);*/
Near
(
1e-3
);
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
StereoMatcher
,
StereoBMFixture
,
testing
::
Combine
(
testing
::
Values
(
32
,
64
,
128
),
OCL_INSTANTIATE_TEST_CASE_P
(
StereoMatcher
,
StereoBMFixture
,
testing
::
Combine
(
testing
::
Values
(
32
,
64
,
128
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment