Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a5d989f3
Commit
a5d989f3
authored
Mar 04, 2014
by
Konstantin Matskevich
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new attempt
parent
799d7e7a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
84 deletions
+54
-84
stereobm.cl
modules/calib3d/src/opencl/stereobm.cl
+45
-76
stereobm.cpp
modules/calib3d/src/stereobm.cpp
+5
-4
test_stereobm.cpp
modules/calib3d/test/opencl/test_stereobm.cpp
+4
-4
No files found.
modules/calib3d/src/opencl/stereobm.cl
View file @
a5d989f3
...
@@ -56,7 +56,7 @@ void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRat
...
@@ -56,7 +56,7 @@ void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRat
__local
short
*
cost
;
__local
short
*
cost
;
cost
=
&costFunc[0]
;
cost
=
&costFunc[0]
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i
=
0
; i < tsize
/2
; i++)
for
(
int
i
=
0
; i < tsize; i++)
{
{
short
c
=
cost[0]
;
short
c
=
cost[0]
;
best_cost
=
(
c
<
best_cost
)
?
c
:
best_cost
;
best_cost
=
(
c
<
best_cost
)
?
c
:
best_cost
;
...
@@ -67,14 +67,14 @@ void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRat
...
@@ -67,14 +67,14 @@ void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRat
cost
=
&costFunc[0]
;
cost
=
&costFunc[0]
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio/100
)
;
int
thresh
=
best_cost
+
(
best_cost
*
uniquenessRatio/100
)
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i
=
0
; (i < tsize
/2
) && (uniquenessRatio > 0); i++)
for
(
int
i
=
0
; (i < tsize) && (uniquenessRatio > 0); i++)
{
{
best_disp
=
(
(
cost[0]
<=
thresh
)
&&
(
i
<
(
ndisp
-
best_disp
-
2
)
|
| i > (ndisp - best_disp) ) ) ?
best_disp
=
(
(
cost[0]
<=
thresh
)
&&
(
i
<
(
ndisp
-
best_disp
-
2
)
|
| i > (ndisp - best_disp) ) ) ?
FILTERED : best_disp;
FILTERED : best_disp;
cost++;
cost++;
}
}
best_disp = (textsum < textureTreshold) ? FILTERED : best_disp;
//
best_disp = (textsum < textureTreshold) ? FILTERED : best_disp;
if( best_disp != FILTERED )
if( best_disp != FILTERED )
{
{
...
@@ -92,104 +92,73 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
...
@@ -92,104 +92,73 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
int preFilterCap, int textureTreshold, int uniquenessRatio)
int preFilterCap, int textureTreshold, int uniquenessRatio)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int ly = get_local_id(1);
int total_y = get_global_id(1);
int y = get_global_id(1)*32;
int d = get_local_id(2);
int d = get_local_id(2);
int ly = get_local_id(1);
int gy = get_group_id(1), y = gy*wsz;
int wsz2 = wsz/2;
int wsz2 = wsz/2;
short FILTERED = (mindisp - 1)<<4;
short FILTERED = (mindisp - 1)<<4;
__local short costFunc[tsize];
__local short costFunc[csize];
__local short bestdisp[tsize];
short textsum;
short textsum;
__local short * cost = &costFunc[0] + d +ly*ndisp;
__local short * cost = costFunc + d;
__global uchar * left, * right;
int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) );
__global const uchar * left, * right;
int dispIdx = mad24(total_y, disp_step, disp_offset + x*(int)sizeof(short) );
__global short * disp = (__global short*)(dispptr + dispIdx);
__global short * disp = (__global short*)(dispptr + dispIdx);
short best_cost = MAX_VAL-1, best_disp = FILTERED;
short best_cost = MAX_VAL-1, best_disp = FILTERED;
short costbuf[wsz];
short costbuf[wsz];
short textbuf[wsz];
int head = 0;
int endy = y+32;
cost[0] = 0;
if( x < cols && total_y < rows)
bestdisp[d + ly*ndisp] = d;
textsum = 0;
for(; y < wsz2; y++)
{
disp[0] = FILTERED;
disp += cols;
}
if( x < cols && y < rows)
{
{
disp[0] = FILTERED;
disp[0] = FILTERED;
}
}
if( (x > ndisp+mindisp+wsz2-2) && (x < cols - wsz2 - mindisp) )
if( (x > ndisp+mindisp+wsz2-2) && (x < cols - wsz2 - mindisp) )
{
{
for(int i = -wsz2; (i < wsz2+1) && (y < rows-wsz2); i++)
cost[ly*ndisp] = 0;
{
cost += (y < wsz2) ? ndisp*wsz2 : 0;
left = leftptr + mad24(y+i, cols, x-wsz2);
y = (y<wsz2) ? wsz2 : y;
right = rightptr + mad24(y+i, cols, x-wsz2-d-mindisp);
int costdiff = 0, textdiff = 0;
short costdiff = 0;
#pragma unroll
for(int i = 0; (i < wsz) && (y < rows-wsz2); i++)
for(int j = 0; j < wsz; j++)
{
{
left = leftptr + mad24(y-wsz2+i, cols, x-wsz2+ly);
costdiff += abs( left[0] - right[0] );
right = rightptr + mad24(y-wsz2+i, cols, x-wsz2-d-mindisp+ly);
textdiff += abs( left[0] - preFilterCap );
costbuf[i] = abs(left[0] - right[0]);
left++; right++;
costdiff += costbuf[i];
}
cost[0] += costdiff;
textsum += textdiff;
costbuf[head] = costdiff;
textbuf[head] = textdiff;
head++;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
for( int i = 0; i < wsz; i++)
if( (y < rows-wsz2) && (y >= wsz2) )
{
{
calcDisp(&costFunc[ly*ndisp], &disp[0], uniquenessRatio, textureTreshold, textsum, mindisp, ndisp);
if(ly == i)
cost[0] += costdiff;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
y++;
for(; (y < gy*wsz + wsz) && (y < rows-wsz2); y++)
y++;
cost = &costFunc[0] + d+ly*ndisp;
for(; (y < endy) && (y<rows); y++)
{
disp+=cols;
disp[0] = FILTERED;
if(y < rows - wsz2 && y > wsz2 && (x > ndisp+mindisp+wsz2-2) && (x < cols - wsz2 - mindisp) )
{
{
head = head%wsz;
cost += ndisp;
left = leftptr + mad24(y-wsz2-1, cols, x - wsz2);
left += cols;
right = rightptr + mad24(y-wsz2-1, cols, x - wsz2 - d - mindisp);
right += cols;
costdiff += abs(left[0] - right[0]) - abs(left[(-wsz2-1)*cols] - right[(-wsz2-1)*cols]);//costbuf[(y-1)%wsz];
int costdiff = 0, textdiff = 0;
for( int i = 0; i < wsz; i++)
#pragma unroll
for(int i = 0; i < wsz; i++)
{
{
costdiff +=
if(ly == i)
abs( left[wsz*cols] - right[wsz*cols] );
cost[0] += costdiff;
textdiff += abs( left[wsz*cols] - preFilterCap );
left++; right++;
}
}
cost[0] += costdiff - costbuf[head];
textsum += textdiff - textbuf[head];
costbuf[head] = costdiff;
textbuf[head] = textdiff;
head++;
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
/*
if(total_y >= wsz2 && total_y < rows - wsz2 && d == 0)
{
cost = costFunc + ly*ndisp;
disp[0] = cost[wsz-1];
}*/
if(d == 0)
if(total_y >= wsz2 && total_y < rows - wsz2 && d == 0)
{
{
calcDisp(&costFunc[ly*ndisp], &disp[0], uniquenessRatio, textureTreshold, textsum, mindisp, ndisp);
calcDisp(&(costFunc + ly*ndisp)[0], disp, uniquenessRatio, textureTreshold, textsum, mindisp, ndisp);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
}
}
}
}
...
...
modules/calib3d/src/stereobm.cpp
View file @
a5d989f3
...
@@ -739,7 +739,8 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
...
@@ -739,7 +739,8 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
OutputArray
_disp
,
StereoBMParams
*
state
)
OutputArray
_disp
,
StereoBMParams
*
state
)
{
//printf("opt\n");
{
//printf("opt\n");
int
ndisp
=
state
->
numDisparities
;
int
ndisp
=
state
->
numDisparities
;
ocl
::
Kernel
k
(
"stereoBM_opt"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D tsize=%d -D wsz=%d"
,
ndisp
*
ndisp
,
2
*
ndisp
,
state
->
SADWindowSize
)
);
int
wsz
=
state
->
SADWindowSize
;
ocl
::
Kernel
k
(
"stereoBM_opt"
,
ocl
::
calib3d
::
stereobm_oclsrc
,
cv
::
format
(
"-D csize=%d -D tsize=%d -D wsz=%d"
,
wsz
*
ndisp
,
ndisp
,
wsz
)
);
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
@@ -747,8 +748,8 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
...
@@ -747,8 +748,8 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
_disp
.
create
(
_left
.
size
(),
CV_16S
);
_disp
.
create
(
_left
.
size
(),
CV_16S
);
UMat
disp
=
_disp
.
getUMat
();
UMat
disp
=
_disp
.
getUMat
();
size_t
globalThreads
[
3
]
=
{
left
.
cols
,
(
left
.
rows
-
left
.
rows
%
32
+
32
)
/
32
,
ndisp
};
size_t
globalThreads
[
3
]
=
{
left
.
cols
,
(
left
.
rows
-
left
.
rows
%
wsz
+
wsz
)
,
ndisp
};
size_t
localThreads
[
3
]
=
{
1
,
2
,
ndisp
};
size_t
localThreads
[
3
]
=
{
1
,
wsz
,
ndisp
};
int
idx
=
0
;
int
idx
=
0
;
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
left
));
idx
=
k
.
set
(
idx
,
ocl
::
KernelArg
::
PtrReadOnly
(
left
));
...
@@ -797,7 +798,7 @@ static bool ocl_stereo(InputArray _left, InputArray _right,
...
@@ -797,7 +798,7 @@ static bool ocl_stereo(InputArray _left, InputArray _right,
if
(
ocl
::
Device
::
getDefault
().
localMemSize
()
>
state
->
numDisparities
*
state
->
numDisparities
*
sizeof
(
short
)
)
if
(
ocl
::
Device
::
getDefault
().
localMemSize
()
>
state
->
numDisparities
*
state
->
numDisparities
*
sizeof
(
short
)
)
return
ocl_stereobm_opt
(
_left
,
_right
,
_disp
,
state
);
return
ocl_stereobm_opt
(
_left
,
_right
,
_disp
,
state
);
else
else
return
false
;
//
ocl_stereobm_bf(_left, _right, _disp, state);
return
ocl_stereobm_bf
(
_left
,
_right
,
_disp
,
state
);
}
}
struct
FindStereoCorrespInvoker
:
public
ParallelLoopBody
struct
FindStereoCorrespInvoker
:
public
ParallelLoopBody
...
...
modules/calib3d/test/opencl/test_stereobm.cpp
View file @
a5d989f3
...
@@ -92,11 +92,11 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
...
@@ -92,11 +92,11 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
std
::
cout
<<
(
double
)(
t2
-
t1
)
/
CLOCKS_PER_SEC
<<
" "
<<
(
double
)(
t3
-
t2
)
/
CLOCKS_PER_SEC
<<
std
::
endl
;
std
::
cout
<<
(
double
)(
t2
-
t1
)
/
CLOCKS_PER_SEC
<<
" "
<<
(
double
)(
t3
-
t2
)
/
CLOCKS_PER_SEC
<<
std
::
endl
;
Mat
t
;
absdiff
(
disp
,
udisp
,
t
);
Mat
t
;
absdiff
(
disp
,
udisp
,
t
);
/*
for(int i = 0; i<t.rows; i++)
for
(
int
i
=
0
;
i
<
t
.
rows
;
i
++
)
for
(
int
j
=
0
;
j
<
t
.
cols
;
j
++
)
for
(
int
j
=
0
;
j
<
t
.
cols
;
j
++
)
if(t.at<short>(i,j) > 0)
//
if(t.at<short>(i,j) > 0)
// if(i == 125 && j == 174
)
if
(
i
==
5
&&
j
==
38
)
printf("%d %d cv: %d ocl: %d\n", i, j, disp.at<short>(i,j), udisp.getMat(ACCESS_READ).at<short>(i,j) );
*/
printf
(
"%d %d cv: %d ocl: %d
\n
"
,
i
,
j
,
disp
.
at
<
short
>
(
i
,
j
),
udisp
.
getMat
(
ACCESS_READ
).
at
<
short
>
(
i
,
j
)
);
/* imshow("diff.png", t*100);
/* imshow("diff.png", t*100);
imshow("cv.png", disp*100);
imshow("cv.png", disp*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment