Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ddc23517
Commit
ddc23517
authored
Mar 06, 2014
by
Konstantin Matskevich
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new approach
parent
a5d989f3
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
189 additions
and
89 deletions
+189
-89
perf_stereobm.cpp
modules/calib3d/perf/opencl/perf_stereobm.cpp
+1
-1
stereobm.cl
modules/calib3d/src/opencl/stereobm.cl
+157
-72
stereobm.cpp
modules/calib3d/src/stereobm.cpp
+27
-12
test_stereobm.cpp
modules/calib3d/test/opencl/test_stereobm.cpp
+4
-4
No files found.
modules/calib3d/perf/opencl/perf_stereobm.cpp
View file @
ddc23517
...
...
@@ -68,7 +68,7 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
OCL_TEST_CYCLE
()
bm
->
compute
(
left
,
right
,
disp
);
SANITY_CHECK
(
disp
,
1e-3
,
ERROR_RELATIVE
);
SANITY_CHECK
_NOTHING
();
//
(disp, 1e-3, ERROR_RELATIVE);
}
}
//ocl
...
...
modules/calib3d/src/opencl/stereobm.cl
View file @
ddc23517
...
...
@@ -40,6 +40,7 @@
//
//M*/
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
//////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////
stereoBM
//////////////////////////////////////////////
...
...
@@ -49,117 +50,196 @@
#
define
MAX_VAL
32767
void
calcDisp
(
__local
short
*
costFunc,
__global
short
*
disp,
int
uniquenessRatio,
int
textureTreshold,
short
textsum,
int
mindisp,
int
ndisp
)
void
calcDisp
(
__local
short
*
costFunc,
__global
short
*
disp,
int
uniquenessRatio/*,
int
textureTreshold,
short
textsum*/,
int
mindisp,
int
ndisp,
int
w,
__local
short
*
dispbuf,
int
d
)
{
short
FILTERED
=
(
mindisp
-
1
)
<<4
;
short
best_disp
=
FILTERED,
best_cost
=
MAX_VAL-1
;
__local
short
*
cost
;
cost
=
&costFunc[0]
;
#
pragma
unroll
for
(
int
i
=
0
; i < tsize; i++)
dispbuf[d]
=
d
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
tsize/2
>>
1
; lsize > 0; lsize >>= 1)
{
short
c
=
cost[0]
;
best_cost
=
(
c
<
best_cost
)
?
c
:
best_cost
;
best_disp
=
(
best_cost
==
c
)
?
ndisp
-
i
-
1
:
best_disp
;
cost++
;
short
lid1
=
dispbuf[d],
lid2
=
dispbuf[d+lsize],
cost1
=
cost[lid1*w],
cost2
=
cost[lid2*w]
;
if
(
d
<
lsize
)
{
dispbuf[d]
=
(
cost1
<
cost2
)
?
lid1
:
(
cost1==cost2
)
?
max
(
lid1,
lid2
)
:
lid2
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
best_disp
=
ndisp
-
dispbuf[0]
-
1
;
best_cost
=
costFunc[
(
ndisp-best_disp-1
)
*w]
;
cost
=
&costFunc[0]
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio/100
)
;
#
pragma
unroll
for
(
int
i
=
0
; (i < tsize) && (uniquenessRatio > 0); i++)
dispbuf[d]
=
(
(
cost[d*w]
<=
thresh
)
&&
(
d
<
(
ndisp
-
best_disp
-
2
)
|
| d > (ndisp - best_disp) ) ) ? FILTERED : best_disp;
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = tsize/2 >> 1; lsize > 0; lsize >>= 1)
{
best_disp
=
(
(
cost[0]
<=
thresh
)
&&
(
i
<
(
ndisp
-
best_disp
-
2
)
|
| i > (ndisp - best_disp) ) ) ?
FILTERED : best_disp;
cost++;
short val1 = dispbuf[d], val2 = dispbuf[d+lsize];
if (d < lsize)
{
dispbuf[d] = min(val1, val2);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
// best_disp = (textsum < textureTreshold) ? FILTERED : best_disp;
if(
best_disp
!= FILTERED )
if(
dispbuf[0]
!= FILTERED )
{
cost = &costFunc[0] + (ndisp - best_disp - 1);
int y3 = ((ndisp - best_disp - 1) > 0) ? cost[-
1] : cost[1
],
cost = &costFunc[0] + (ndisp - best_disp - 1)
*w
;
int y3 = ((ndisp - best_disp - 1) > 0) ? cost[-
w] : cost[w
],
y2 = cost[0],
y1 = ((ndisp - best_disp - 1) < ndisp-1) ? cost[
1] : cost[-1
];
y1 = ((ndisp - best_disp - 1) < ndisp-1) ? cost[
w] : cost[-w
];
int d = y3+y1-2*y2 + abs(y3-y1);
disp[0] = (short)
best_disp;//
(((ndisp - best_disp - 1 + mindisp)*256 + (d != 0 ? (y3-y1)*256/d : 0) + 15) >> 4);
disp[0] = (short)(((ndisp - best_disp - 1 + mindisp)*256 + (d != 0 ? (y3-y1)*256/d : 0) + 15) >> 4);
}
}
int calcLocalIdx(int x, int y, int d, int w)
{
return d*2*w + (w - 1 - y + x);
}
void calcNewCoordinates(int * x, int * y, int nthread)
{
int oldX = *x - (1-nthread), oldY = *y;
*x = (oldX == oldY) ? (0*nthread + (oldX + 2)*(1-nthread) ) : (oldX+1)*(1-nthread) + (oldX+1)*nthread;
*y = (oldX == oldY) ? (0*(1-nthread) + (oldY + 1)*nthread) : oldY + 1*(1-nthread);
}
short calcCostBorder(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y, int nthread,
int wsz2, short * costbuf, int * h, int cols, int d, short cost)
{
int head = (*h)%wsz;
__global const uchar * left, * right;
int idx = mad24(y+wsz2*(2*nthread-1), cols, x+wsz2*(1-2*nthread));
left = leftptr + idx;
right = rightptr + (idx - d);
short costdiff = 0;
for(int i = 0; i < wsz; i++)
{
costdiff += abs( left[0] - right[0] );
left += 1*nthread + cols*(1-nthread);
right += 1*nthread + cols*(1-nthread);// maybe use ? operator
}
cost += costdiff - costbuf[head];
costbuf[head] = costdiff;
(*h) = (*h)%wsz + 1;
return cost;
}
short calcCostInside(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y,
int wsz2, int cols, int d, short cost_up_left, short cost_up, short cost_left)
{
__global const uchar * left, * right;
int idx = mad24(y-wsz2-1, cols, x-wsz2-1);
left = leftptr + idx;
right = rightptr + (idx - d);
return cost_up + cost_left - cost_up_left + abs(left[0] - right[0]) -
abs(left[wsz] - right[wsz]) - abs(left[(wsz)*cols] - right[(wsz)*cols]) +
abs(left[(wsz)*cols + wsz] - right[(wsz)*cols + wsz]);
}
__kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar * rightptr, __global uchar * dispptr,
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
int preFilterCap, int textureTreshold, int uniquenessRatio)
int preFilterCap, int textureTreshold, int uniquenessRatio
, int sizeX, int sizeY
)
{
int x = get_global_id(0);
int total_y = get_global_id(1);
int d = get_local_id(2);
int ly = get_local_id(1);
int gy = get_group_id(1), y = gy*wsz;
int gx = get_global_id(0)*sizeX;
int gy = get_global_id(1)*sizeY;
int lz = get_local_id(2);
int nthread = lz/32;// only 0 or 1
int d = lz%32;// 1 .. 32
int wsz2 = wsz/2;
short FILTERED = (mindisp - 1)<<4;
__local short costFunc[csize];
short textsum;
__local short * cost = costFunc + d;
__global short * disp;
__global const uchar * left, * right;
int dispIdx = mad24(total_y, disp_step, disp_offset + x*(int)sizeof(short) );
__global short * disp = (__global short*)(dispptr + dispIdx);
short best_cost = MAX_VAL-1, best_disp = FILTERED;
__local short dispbuf[tsize];
__local short costFunc[csize];
__local short * cost;
short costbuf[wsz];
int head = 0;
if( x < cols && total_y < rows)
{
disp[0] = FILTERED;
}
int shiftX = wsz2 + ndisp + mindisp - 1;
int shiftY = wsz2;
if( (x > ndisp+mindisp+wsz2-2) && (x < cols - wsz2 - mindisp) )
{
cost[ly*ndisp] = 0;
cost += (y < wsz2) ? ndisp*wsz2 : 0;
y = (y<wsz2) ? wsz2 : y;
int x = gx + shiftX, y = gy + shiftY, lx = 0, ly = 0;
int costIdx = calcLocalIdx(lx, ly, d, sizeY);
cost = costFunc + costIdx;
short tempcost = 0;
for(int i = 0; i < wsz; i++)
{
int idx = mad24(y-wsz2+i*nthread, cols, x-wsz2+i*(1-nthread));
left = leftptr + idx;
right = rightptr + (idx - d);
short costdiff = 0;
for(int i = 0; (i < wsz) && (y < rows-wsz2); i++)
for(int j = 0; j < wsz; j++)
{
left = leftptr + mad24(y-wsz2+i, cols, x-wsz2+ly);
right = rightptr + mad24(y-wsz2+i, cols, x-wsz2-d-mindisp+ly);
costbuf[i] = abs(left[0] - right[0]);
costdiff += costbuf[i];
costdiff += abs( left[0] - right[0] );
left += 1*nthread + cols*(1-nthread);
right += 1*nthread + cols*(1-nthread);// maybe use ? operator
}
for( int i = 0; i < wsz; i++
)
if(nthread==1
)
{
if(ly == i)
cost[0] += costdiff;
tempcost += costdiff;
}
costbuf[head] = costdiff;
head++;
}
barrier(CLK_LOCAL_MEM_FENCE);
cost[0] = tempcost;
if(x < cols-wsz2-mindisp && y < rows-wsz2 && nthread == 1)
{
int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short));
disp = (__global short *)(dispptr + dispIdx);
calcDisp(&costFunc[sizeY - 1 + lx - ly], disp, uniquenessRatio, /*textureTreshold, textsum,*/
mindisp, ndisp, 2*sizeY, &dispbuf[nthread*tsize/2], d);
}
barrier(CLK_LOCAL_MEM_FENCE);
lx = 1 - nthread;
ly = nthread;
while(lx < sizeX && ly < sizeY )
{
x = gx + shiftX + lx;
y = gy + shiftY + ly;
costIdx = calcLocalIdx(lx, ly, d, sizeY);
cost = costFunc + costIdx;
cost[0] = ( ly*(1-nthread) + lx*nthread == 0 ) ?
calcCostBorder(leftptr, rightptr, x, y, nthread, wsz2, costbuf, &head, cols, d,
costFunc[calcLocalIdx(lx-1*(1-nthread), ly-1*nthread, d, sizeY)]) :
calcCostInside(leftptr, rightptr, x, y, wsz2, cols, d,
costFunc[calcLocalIdx(lx-1, ly-1, d, sizeY)],
costFunc[calcLocalIdx(lx, ly-1, d, sizeY)],
costFunc[calcLocalIdx(lx-1, ly, d, sizeY)]);
barrier(CLK_LOCAL_MEM_FENCE);
y++;
for(; (y < gy*wsz + wsz) && (y < rows-wsz2); y++
)
if(x < cols-mindisp-wsz2 && y < rows-wsz2
)
{
cost += ndisp;
left += cols;
right += cols;
costdiff += abs(left[0] - right[0]) - abs(left[(-wsz2-1)*cols] - right[(-wsz2-1)*cols]);//costbuf[(y-1)%wsz];
for( int i = 0; i < wsz; i++)
{
if(ly == i)
cost[0] += costdiff;
}
barrier(CLK_LOCAL_MEM_FENCE);
int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short));
disp = (__global short *)(dispptr + dispIdx);
calcDisp(&costFunc[sizeY - 1 - ly + lx], disp, uniquenessRatio, //textureTreshold, textsum,
mindisp, ndisp, 2*sizeY, &dispbuf[nthread*tsize/2], d);
}
barrier(CLK_LOCAL_MEM_FENCE);
/*
if(total_y >= wsz2 && total_y < rows - wsz2 && d == 0)
{
cost = costFunc + ly*ndisp;
disp[0] = cost[wsz-1];
}*/
if(total_y >= wsz2 && total_y < rows - wsz2 && d == 0)
{
calcDisp(&(costFunc + ly*ndisp)[0], disp, uniquenessRatio, textureTreshold, textsum, mindisp, ndisp);
}
calcNewCoordinates(&lx, &ly, nthread);
}
}
...
...
@@ -175,8 +255,9 @@ __kernel void stereoBM_BF(__global const uchar * left, __global const uchar * ri
int y = get_global_id(1);
int wsz2 = winsize/2;
short FILTERED = (mindisp - 1)<<4;
if(x < cols && y < rows )
{
int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) );
__global short * disp = (__global short*)(dispptr + dispIdx);
...
...
@@ -263,7 +344,11 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
output[y
*
cols
+
x]
=
min
(
prefilterCap,
255
)
&
0xFF
;
if
(
x
<
cols
&&
y
<
rows
)
{
output[y
*
cols
+
x]
=
min
(
prefilterCap,
255
)
&
0xFF
;
}
if
(
x
<
cols
&&
y
<
rows
&&
x
>
0
&&
!
((
y
==
rows-1
)
&
(
rows%2==1
)
)
)
{
int
cov
=
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
(
x-1
)
]
*
(
-1
)
+
input[
((
y
>
0
)
?
y-1
:
y+1
)
*
cols
+
((
x<cols-1
)
?
x+1
:
x-1
)
]
*
(
1
)
+
...
...
modules/calib3d/src/stereobm.cpp
View file @
ddc23517
...
...
@@ -681,7 +681,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
sad
[
ndisp
]
=
sad
[
ndisp
-
2
];
int
p
=
sad
[
mind
+
1
],
n
=
sad
[
mind
-
1
];
d
=
p
+
n
-
2
*
sad
[
mind
]
+
std
::
abs
(
p
-
n
);
dptr
[
y
*
dstep
]
=
(
short
)
mind
;
//
(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4);
dptr
[
y
*
dstep
]
=
(
short
)(((
ndisp
-
mind
-
1
+
mindisp
)
*
256
+
(
d
!=
0
?
(
p
-
n
)
*
256
/
d
:
0
)
+
15
)
>>
4
);
costptr
[
y
*
coststep
]
=
sad
[
mind
];
}
}
...
...
@@ -739,27 +739,43 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
OutputArray
_disp
,
StereoBMParams
*
state
)
{
//printf("opt\n");
int
ndisp
=
state
->
numDisparities
;
int
mindisp
=
state
->
minDisparity
;
int
wsz
=
state
->
SADWindowSize
;
ocl
::
Kernel
k
(
"stereoBM_opt"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D tsize=%d -D wsz=%d"
,
wsz
*
ndisp
,
ndisp
,
wsz
)
);
int
wsz2
=
wsz
/
2
;
int
sizeX
=
9
,
sizeY
=
sizeX
-
1
,
N
=
ndisp
*
2
;
ocl
::
Kernel
k
(
"stereoBM_opt"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D tsize=%d -D wsz=%d"
,
(
2
*
sizeY
)
*
ndisp
,
2
*
ndisp
,
wsz
)
);
if
(
k
.
empty
())
return
false
;
UMat
left
=
_left
.
getUMat
(),
right
=
_right
.
getUMat
();
int
cols
=
left
.
cols
,
rows
=
left
.
rows
;
_disp
.
create
(
_left
.
size
(),
CV_16S
);
UMat
disp
=
_disp
.
getUMat
();
_disp
.
setTo
((
mindisp
-
1
)
<<
4
);
Rect
roi
=
Rect
(
Point
(
wsz2
+
mindisp
+
ndisp
-
1
,
wsz2
),
Point
(
cols
-
1
-
wsz2
-
mindisp
,
rows
-
1
-
wsz2
)
);
UMat
disp
=
(
_disp
.
getUMat
())(
roi
);
size_t
globalThreads
[
3
]
=
{
left
.
cols
,
(
left
.
rows
-
left
.
rows
%
wsz
+
wsz
),
ndisp
};
size_t
localThreads
[
3
]
=
{
1
,
wsz
,
ndisp
};
int
globalX
=
disp
.
cols
/
sizeX
,
globalY
=
disp
.
rows
/
sizeY
;
globalX
+=
(
disp
.
cols
%
sizeX
)
>
0
?
1
:
0
;
globalY
+=
(
disp
.
rows
%
sizeY
)
>
0
?
1
:
0
;
size_t
globalThreads
[
3
]
=
{
globalX
,
globalY
,
N
};
size_t
localThreads
[
3
]
=
{
1
,
1
,
N
};
int
idx
=
0
;
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
left
));
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
right
));
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
WriteOnly
(
disp
));
idx
=
k
.
set
(
idx
,
state
->
minDisparity
);
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
WriteOnlyNoSize
(
disp
));
idx
=
k
.
set
(
idx
,
rows
);
idx
=
k
.
set
(
idx
,
cols
);
idx
=
k
.
set
(
idx
,
mindisp
);
idx
=
k
.
set
(
idx
,
ndisp
);
idx
=
k
.
set
(
idx
,
state
->
preFilterCap
);
idx
=
k
.
set
(
idx
,
state
->
textureThreshold
);
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
idx
=
k
.
set
(
idx
,
sizeX
);
idx
=
k
.
set
(
idx
,
sizeY
);
return
k
.
run
(
3
,
globalThreads
,
localThreads
,
false
);
}
...
...
@@ -789,16 +805,15 @@ static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
idx
=
k
.
set
(
idx
,
state
->
uniquenessRatio
);
return
k
.
run
(
2
,
globalThreads
,
NULL
,
false
);
return
false
;
}
static
bool
ocl_stereo
(
InputArray
_left
,
InputArray
_right
,
OutputArray
_disp
,
StereoBMParams
*
state
)
{
if
(
ocl
::
Device
::
getDefault
().
localMemSize
()
>
state
->
numDisparities
*
state
->
numDisparities
*
sizeof
(
short
)
)
//
if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(short) )
return
ocl_stereobm_opt
(
_left
,
_right
,
_disp
,
state
);
else
return
ocl_stereobm_bf
(
_left
,
_right
,
_disp
,
state
);
//
else
//
return ocl_stereobm_bf(_left, _right, _disp, state);
}
struct
FindStereoCorrespInvoker
:
public
ParallelLoopBody
...
...
@@ -992,7 +1007,7 @@ public:
bufSize2
=
width
*
height
*
(
sizeof
(
Point_
<
short
>
)
+
sizeof
(
int
)
+
sizeof
(
uchar
));
#if CV_SSE2
bool
useShorts
=
false
;
//
params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
bool
useShorts
=
params
.
preFilterCap
<=
31
&&
params
.
SADWindowSize
<=
21
&&
checkHardwareSupport
(
CV_CPU_SSE2
);
#else
const
bool
useShorts
=
false
;
#endif
...
...
modules/calib3d/test/opencl/test_stereobm.cpp
View file @
ddc23517
...
...
@@ -90,18 +90,18 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
cv
::
ocl
::
finish
();
long
t3
=
clock
();
std
::
cout
<<
(
double
)(
t2
-
t1
)
/
CLOCKS_PER_SEC
<<
" "
<<
(
double
)(
t3
-
t2
)
/
CLOCKS_PER_SEC
<<
std
::
endl
;
/*
Mat t; absdiff(disp, udisp, t);
for(int i = 0; i<t.rows; i++)
for(int j = 0; j< t.cols; j++)
//
if(t.at<short>(i,j) > 0)
if
(
i
==
5
&&
j
==
38
)
if(t.at<short>(i,j) > 0)
// if(i== 255 && j == 375
)
printf("%d %d cv: %d ocl: %d\n", i, j, disp.at<short>(i,j), udisp.getMat(ACCESS_READ).at<short>(i,j) );
/* imshow("diff.png", t*100);
imshow("cv.png", disp*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
waitKey(0);*/
Near
(
1e-3
);
//
Near(1e-3);
}
OCL_INSTANTIATE_TEST_CASE_P
(
StereoMatcher
,
StereoBMFixture
,
testing
::
Combine
(
testing
::
Values
(
32
,
64
,
128
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment