Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9cfa24e5
Commit
9cfa24e5
authored
Apr 23, 2013
by
peng xiao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix thread sync for csbp.
parent
c701d542
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
535 additions
and
268 deletions
+535
-268
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+535
-268
No files found.
modules/ocl/src/opencl/stereocsbp.cl
View file @
9cfa24e5
...
@@ -53,13 +53,13 @@
...
@@ -53,13 +53,13 @@
#
define
SHRT_MAX
CL_SHORT_MAX
#
define
SHRT_MAX
CL_SHORT_MAX
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
...
@@ -74,7 +74,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
...
@@ -74,7 +74,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
{
{
short
minimum
=
SHRT_MAX
;
short
minimum
=
SHRT_MAX
;
int
id
=
0
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
for
(
int
d
=
0
; d < cndisp; d++)
{
{
short
cur
=
data_cost[d
*
cdisp_step1]
;
short
cur
=
data_cost[d
*
cdisp_step1]
;
...
@@ -84,7 +84,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
...
@@ -84,7 +84,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
id
=
d
;
id
=
d
;
}
}
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost
[id
*
cdisp_step1]
=
SHRT_MAX
;
data_cost
[id
*
cdisp_step1]
=
SHRT_MAX
;
...
@@ -92,8 +92,8 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
...
@@ -92,8 +92,8 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
}
}
}
}
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
...
@@ -108,7 +108,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
...
@@ -108,7 +108,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
{
{
float
minimum
=
FLT_MAX
;
float
minimum
=
FLT_MAX
;
int
id
=
0
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
for
(
int
d
=
0
; d < cndisp; d++)
{
{
float
cur
=
data_cost[d
*
cdisp_step1]
;
float
cur
=
data_cost[d
*
cdisp_step1]
;
...
@@ -118,7 +118,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
...
@@ -118,7 +118,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
id
=
d
;
id
=
d
;
}
}
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost
[id
*
cdisp_step1]
=
FLT_MAX
;
data_cost
[id
*
cdisp_step1]
=
FLT_MAX
;
...
@@ -130,8 +130,8 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
...
@@ -130,8 +130,8 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_,
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
...
@@ -147,10 +147,10 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
...
@@ -147,10 +147,10 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
short
prev
=
data_cost[0
*
cdisp_step1]
;
short
prev
=
data_cost[0
*
cdisp_step1]
;
short
cur
=
data_cost[1
*
cdisp_step1]
;
short
cur
=
data_cost[1
*
cdisp_step1]
;
short
next
=
data_cost[2
*
cdisp_step1]
;
short
next
=
data_cost[2
*
cdisp_step1]
;
for
(
int
d
=
1
; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
for
(
int
d
=
1
; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
{
{
if
(
cur
<
prev
&&
cur
<
next
)
if
(
cur
<
prev
&&
cur
<
next
)
{
{
data_cost_selected[nr_local_minimum
*
cdisp_step1]
=
cur
;
data_cost_selected[nr_local_minimum
*
cdisp_step1]
=
cur
;
...
@@ -159,7 +159,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
...
@@ -159,7 +159,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
nr_local_minimum++
;
nr_local_minimum++
;
}
}
prev
=
cur
;
prev
=
cur
;
cur
=
next
;
cur
=
next
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
...
@@ -169,7 +169,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
...
@@ -169,7 +169,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
{
{
short
minimum
=
SHRT_MAX
;
short
minimum
=
SHRT_MAX
;
int
id
=
0
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
for
(
int
d
=
0
; d < cndisp; d++)
{
{
cur
=
data_cost[d
*
cdisp_step1]
;
cur
=
data_cost[d
*
cdisp_step1]
;
...
@@ -179,7 +179,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
...
@@ -179,7 +179,7 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
id
=
d
;
id
=
d
;
}
}
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost[id
*
cdisp_step1]
=
SHRT_MAX
;
data_cost[id
*
cdisp_step1]
=
SHRT_MAX
;
...
@@ -188,8 +188,8 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
...
@@ -188,8 +188,8 @@ __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_,
}
}
__kernel
void
get_first_k_initial_local_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__kernel
void
get_first_k_initial_local_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,int
h,
int
w,
int
nr_plane,
__global
float
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
...
@@ -205,7 +205,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
...
@@ -205,7 +205,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
float
prev
=
data_cost[0
*
cdisp_step1]
;
float
prev
=
data_cost[0
*
cdisp_step1]
;
float
cur
=
data_cost[1
*
cdisp_step1]
;
float
cur
=
data_cost[1
*
cdisp_step1]
;
float
next
=
data_cost[2
*
cdisp_step1]
;
float
next
=
data_cost[2
*
cdisp_step1]
;
for
(
int
d
=
1
; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
for
(
int
d
=
1
; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
{
{
if
(
cur
<
prev
&&
cur
<
next
)
if
(
cur
<
prev
&&
cur
<
next
)
...
@@ -216,16 +216,18 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
...
@@ -216,16 +216,18 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
nr_local_minimum++
;
nr_local_minimum++
;
}
}
prev
=
cur
;
prev
=
cur
;
cur
=
next
;
cur
=
next
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
}
}
for
(
int
i
=
nr_local_minimum
; i < nr_plane; i++)
for
(
int
i
=
nr_local_minimum
; i < nr_plane; i++)
{
{
float
minimum
=
FLT_MAX
;
float
minimum
=
FLT_MAX
;
int
id
=
0
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
for
(
int
d
=
0
; d < cndisp; d++)
{
{
cur
=
data_cost[d
*
cdisp_step1]
;
cur
=
data_cost[d
*
cdisp_step1]
;
...
@@ -235,7 +237,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
...
@@ -235,7 +237,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
id
=
d
;
id
=
d
;
}
}
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost[id
*
cdisp_step1]
=
FLT_MAX
;
data_cost[id
*
cdisp_step1]
=
FLT_MAX
;
...
@@ -247,16 +249,16 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
...
@@ -247,16 +249,16 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
///////////////////////
init
data
cost
////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
float
cdata_weight,
float
cmax_data_term
)
{
{
float
tb
=
0.114f
*
abs
((
int
)
left[0]
-
right[0]
)
;
float
tb
=
0.114f
*
abs
((
int
)
left[0]
-
right[0]
)
;
float
tg
=
0.587f
*
abs
((
int
)
left[1]
-
right[1]
)
;
float
tg
=
0.587f
*
abs
((
int
)
left[1]
-
right[1]
)
;
float
tr
=
0.299f
*
abs
((
int
)
left[2]
-
right[2]
)
;
float
tr
=
0.299f
*
abs
((
int
)
left[2]
-
right[2]
)
;
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
}
}
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
float
cdata_weight,
float
cmax_data_term
)
{
{
return
fmin
(
cdata_weight
*
abs
((
int
)
*left
-
(
int
)
*right
)
,
cdata_weight
*
cmax_data_term
)
;
return
fmin
(
cdata_weight
*
abs
((
int
)
*left
-
(
int
)
*right
)
,
cdata_weight
*
cmax_data_term
)
;
}
}
...
@@ -267,23 +269,23 @@ short round_short(float v){
...
@@ -267,23 +269,23 @@ short round_short(float v){
///////////////////////////////////init_data_cost///////////////////////////////////////////////
///////////////////////////////////init_data_cost///////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
init_data_cost_0
(
__global
short
*ctemp,
__global
uchar
*cleft,
__global
uchar
*cright,
__kernel
void
init_data_cost_0
(
__global
short
*ctemp,
__global
uchar
*cleft,
__global
uchar
*cright,
int
h,
int
w,
int
level,
int
channels,
int
h,
int
w,
int
level,
int
channels,
int
cmsg_step1,
float
cdata_weight,
float
cmax_data_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdata_weight,
float
cmax_data_term,
int
cdisp_step1,
int
cth,
int
cimg_step,
int
cndisp
)
int
cth,
int
cimg_step,
int
cndisp
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
if
(
y
<
h
&&
x
<
w
)
{
{
int
y0
=
y
<<
level
;
int
y0
=
y
<<
level
;
int
yt
=
(
y
+
1
)
<<
level
;
int
yt
=
(
y
+
1
)
<<
level
;
int
x0
=
x
<<
level
;
int
x0
=
x
<<
level
;
int
xt
=
(
x
+
1
)
<<
level
;
int
xt
=
(
x
+
1
)
<<
level
;
__global
short
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
short
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
for
(
int
d
=
0
; d < cndisp; ++d)
for
(
int
d
=
0
; d < cndisp; ++d)
{
{
float
val
=
0.0f
;
float
val
=
0.0f
;
...
@@ -311,23 +313,23 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
...
@@ -311,23 +313,23 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
}
}
}
}
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cth, int cimg_step, int cndisp)
int cth, int cimg_step, int cndisp)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1);
if (y < h && x < w)
if (y < h && x < w)
{
{
int y0 = y << level;
int y0 = y << level;
int yt = (y + 1) << level;
int yt = (y + 1) << level;
int x0 = x << level;
int x0 = x << level;
int xt = (x + 1) << level;
int xt = (x + 1) << level;
__global float *data_cost = ctemp + y * cmsg_step1 + x;
__global float *data_cost = ctemp + y * cmsg_step1 + x;
for(int d = 0; d < cndisp; ++d)
for(int d = 0; d < cndisp; ++d)
{
{
float val = 0.0f;
float val = 0.0f;
...
@@ -358,9 +360,9 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
...
@@ -358,9 +360,9 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cdisp_step1, int cmsg_step1)
int cdisp_step1, int cmsg_step1)
{
{
int x_out = get_group_id(0);
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int y_out = get_group_id(1) % h;
...
@@ -373,9 +375,9 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
...
@@ -373,9 +375,9 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
{
{
int x0 = x_out << level;
int x0 = x_out << level;
int y0 = y_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
float val = 0.0f;
if (x0 + tid < cols)
if (x0 + tid < cols)
{
{
...
@@ -385,7 +387,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
...
@@ -385,7 +387,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
{
{
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
for(int y = 0; y < len; ++y)
for(int y = 0; y < len; ++y)
{
{
if(channels == 1)
if(channels == 1)
...
@@ -398,36 +400,103 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
...
@@ -398,36 +400,103 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
}
}
}
}
}
}
__local float* dline = smem + winsz * get_local_id(2);
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < cndisp)
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
{
if (tid < 128)
dline[tid] += dline[tid + 128];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__local volatile float* vdline = smem + winsz * get_local_id(2);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
{
if (tid < 64)
dline[tid] += dline[tid + 64];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
if(d < cndisp)
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
{
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
if (winsz >= 64)
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
if (tid < 32)
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
__global short* data_cost = ctemp + y_out * cmsg_step1 + x_out;
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
__global short* data_cost = ctemp + y_out * cmsg_step1 + x_out;
if (tid == 0)
if (tid == 0)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(dline[0]);
data_cost[cdisp_step1 * d] = convert_short_sat_rte(dline[0]);
}
}
}
}
__kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
__kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cdisp_step1, int cmsg_step1)
int cdisp_step1, int cmsg_step1)
{
{
int x_out = get_group_id(0);
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int y_out = get_group_id(1) % h;
...
@@ -439,9 +508,9 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
...
@@ -439,9 +508,9 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
{
{
int x0 = x_out << level;
int x0 = x_out << level;
int y0 = y_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
float val = 0.0f;
//float val = 528.0f;
//float val = 528.0f;
...
@@ -453,7 +522,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
...
@@ -453,7 +522,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
{
{
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
for(int y = 0; y < len; ++y)
for(int y = 0; y < len; ++y)
{
{
if(channels == 1)
if(channels == 1)
...
@@ -466,27 +535,89 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
...
@@ -466,27 +535,89 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
}
}
}
}
}
}
__local float* dline = smem + winsz * get_local_id(2);
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
if (tid < 128)
dline[tid] += dline[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < cndisp)
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
if (tid < 64)
dline[tid] += dline[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE);
__local volatile float* vdline = smem + winsz * get_local_id(2);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
if(d < cndisp)
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
{
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
if (winsz >= 32)
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
if (tid < 16)
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
__global float *data_cost = ctemp + y_out * cmsg_step1 + x_out;
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__global float *data_cost = ctemp + y_out * cmsg_step1 + x_out;
__local float* dline = smem + winsz * get_local_id(2);
if (tid == 0)
if (tid == 0)
data_cost[cdisp_step1 * d] = dline[0];
data_cost[cdisp_step1 * d] = dline[0];
}
}
...
@@ -496,10 +627,10 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
...
@@ -496,10 +627,10 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
////////////////////// compute data cost //////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int h, int w, int level, int nr_plane, int channels,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
float cmax_data_term, int cimg_step, int cth)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
...
@@ -509,7 +640,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
...
@@ -509,7 +640,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
{
{
int y0 = y << level;
int y0 = y << level;
int yt = (y + 1) << level;
int yt = (y + 1) << level;
int x0 = x << level;
int x0 = x << level;
int xt = (x + 1) << level;
int xt = (x + 1) << level;
...
@@ -525,10 +656,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
...
@@ -525,10 +656,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
{
{
int sel_disp = selected_disparity[d * cdisp_step2];
int sel_disp = selected_disparity[d * cdisp_step2];
int xr = xi - sel_disp;
int xr = xi - sel_disp;
if (xr < 0 || sel_disp < cth)
if (xr < 0 || sel_disp < cth)
val += cdata_weight * cmax_data_term;
val += cdata_weight * cmax_data_term;
else
else
{
{
__global uchar* left_x = cleft + yi * cimg_step + xi * channels;
__global uchar* left_x = cleft + yi * cimg_step + xi * channels;
...
@@ -546,10 +677,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
...
@@ -546,10 +677,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
}
}
}
}
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int h, int w, int level, int nr_plane, int channels,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
float cmax_data_term, int cimg_step, int cth)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
...
@@ -559,7 +690,7 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
...
@@ -559,7 +690,7 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
{
{
int y0 = y << level;
int y0 = y << level;
int yt = (y + 1) << level;
int yt = (y + 1) << level;
int x0 = x << level;
int x0 = x << level;
int xt = (x + 1) << level;
int xt = (x + 1) << level;
...
@@ -575,7 +706,7 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
...
@@ -575,7 +706,7 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
{
{
int sel_disp = selected_disparity[d * cdisp_step2];
int sel_disp = selected_disparity[d * cdisp_step2];
int xr = xi - sel_disp;
int xr = xi - sel_disp;
if (xr < 0 || sel_disp < cth)
if (xr < 0 || sel_disp < cth)
val += cdata_weight * cmax_data_term;
val += cdata_weight * cmax_data_term;
else
else
...
@@ -598,31 +729,31 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
...
@@ -598,31 +729,31 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int level, int rows, int cols, int h, int nr_plane,
int channels, int winsz,
int channels, int winsz,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
float cdata_weight, float cmax_data_term, int cimg_step,int cth)
float cdata_weight, float cmax_data_term, int cimg_step,int cth)
{
{
int x_out = get_group_id(0);
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int y_out = get_group_id(1) % h;
int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
int tid = get_local_id(0);
__global const short* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
__global const short* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
__global short* data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
__global short* data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
if (d < nr_plane)
if (d < nr_plane)
{
{
int sel_disp = selected_disparity[d * cdisp_step2];
int sel_disp = selected_disparity[d * cdisp_step2];
int x0 = x_out << level;
int x0 = x_out << level;
int y0 = y_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
float val = 0.0f;
if (x0 + tid < cols)
if (x0 + tid < cols)
{
{
...
@@ -632,7 +763,7 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
...
@@ -632,7 +763,7 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
{
{
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
for(int y = 0; y < len; ++y)
for(int y = 0; y < len; ++y)
{
{
if(channels == 1)
if(channels == 1)
...
@@ -644,60 +775,116 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
...
@@ -644,60 +775,116 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
lri += cimg_step;
lri += cimg_step;
}
}
}
}
}
}
__local float* dline = smem + winsz * get_local_id(2);
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
// if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
//if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
{
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
dline[tid] = val;
if(d < nr_plane)
}
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
{
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
if(d < nr_plane)
{
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
{
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
// if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < nr_plane)
//if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
{
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__local volatile float* vdline = smem + winsz * get_local_id(2);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
{
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
if(d < nr_plane)
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
{
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
if (winsz >= 2)
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
{
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
if(d < nr_plane)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(vdline[0]);
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (tid == 0)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(vdline[0]);
}
}
}
}
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright, __local float *smem,
__global uchar *cleft, __global uchar *cright, __local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int level, int rows, int cols, int h, int nr_plane,
int channels, int winsz,
int channels, int winsz,
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
float cmax_data_term, int cimg_step, int cth)
{
{
int x_out = get_group_id(0);
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int y_out = get_group_id(1) % h;
int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
int tid = get_local_id(0);
__global const float *selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
__global const float *selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
__global float *data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
__global float *data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
if (d < nr_plane)
if (d < nr_plane)
{
{
int sel_disp = selected_disparity[d * cdisp_step2];
int sel_disp = selected_disparity[d * cdisp_step2];
int x0 = x_out << level;
int x0 = x_out << level;
int y0 = y_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
float val = 0.0f;
if (x0 + tid < cols)
if (x0 + tid < cols)
{
{
...
@@ -707,7 +894,7 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
...
@@ -707,7 +894,7 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
{
{
__global
uchar*
lle
=
cleft
+
y0
*
cimg_step
+
channels
*
(
x0
+
tid
)
;
__global
uchar*
lle
=
cleft
+
y0
*
cimg_step
+
channels
*
(
x0
+
tid
)
;
__global
uchar*
lri
=
cright
+
y0
*
cimg_step
+
channels
*
(
x0
+
tid
-
sel_disp
)
;
__global
uchar*
lri
=
cright
+
y0
*
cimg_step
+
channels
*
(
x0
+
tid
-
sel_disp
)
;
for
(
int
y
=
0
; y < len; ++y)
for
(
int
y
=
0
; y < len; ++y)
{
{
if
(
channels
==
1
)
if
(
channels
==
1
)
...
@@ -719,31 +906,87 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
...
@@ -719,31 +906,87 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
lri
+=
cimg_step
;
lri
+=
cimg_step
;
}
}
}
}
}
}
__local
float*
dline
=
smem
+
winsz
*
get_local_id
(
2
)
;
__local
float*
dline
=
smem
+
winsz
*
get_local_id
(
2
)
;
dline[tid]
=
val
;
dline[tid]
=
val
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
if
(
d
<
nr_plane
)
{
//if
(
winsz
>=
256
)
{
if
(
tid
<
128
)
{
dline[tid]
+=
dline[tid
+
128]
; } barrier(CLK_LOCAL_MEM_FENCE); }
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
//if
(
winsz
>=
128
)
{
if
(
tid
<
64
)
{
dline[tid]
+=
dline[tid
+
64]
; } barrier(CLK_LOCAL_MEM_FENCE); }
if
(
winsz
>=
64
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
tid
<
32
)
vdline[tid]
+=
vdline[tid
+
32]
;
if
(
winsz
>=
64
)
if
(
tid
<
32
)
vdline[tid]
+=
vdline[tid
+
32]
;
}
if
(
winsz
>=
32
)
if
(
tid
<
16
)
vdline[tid]
+=
vdline[tid
+
16]
;
}
if
(
winsz
>=
16
)
if
(
tid
<
8
)
vdline[tid]
+=
vdline[tid
+
8]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
winsz
>=
8
)
if
(
tid
<
4
)
vdline[tid]
+=
vdline[tid
+
4]
;
if
(
winsz
>=
4
)
if
(
tid
<
2
)
vdline[tid]
+=
vdline[tid
+
2]
;
if
(
winsz
>=
2
)
if
(
tid
<
1
)
vdline[tid]
+=
vdline[tid
+
1]
;
if
(
d
<
nr_plane
)
{
if
(
tid
==
0
)
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
data_cost[cdisp_step1
*
d]
=
vdline[0]
;
if
(
winsz
>=
32
)
{
if
(
tid
<
16
)
vdline[tid]
+=
vdline[tid
+
16]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
16
)
{
if
(
tid
<
8
)
vdline[tid]
+=
vdline[tid
+
8]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
8
)
{
if
(
tid
<
4
)
vdline[tid]
+=
vdline[tid
+
4]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
4
)
{
if
(
tid
<
2
)
vdline[tid]
+=
vdline[tid
+
2]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
2
)
{
if
(
tid
<
1
)
vdline[tid]
+=
vdline[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
tid
==
0
)
data_cost[cdisp_step1
*
d]
=
vdline[0]
;
}
}
}
}
...
@@ -751,13 +994,13 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
...
@@ -751,13 +994,13 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
////////////////////////
init
message
/////////////////////////
////////////////////////
init
message
/////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
void
get_first_k_element_increase_0
(
__global
short*
u_new,
__global
short
*d_new,
__global
short
*l_new,
void
get_first_k_element_increase_0
(
__global
short*
u_new,
__global
short
*d_new,
__global
short
*l_new,
__global
short
*r_new,
__global
const
short
*u_cur,
__global
const
short
*d_cur,
__global
short
*r_new,
__global
const
short
*u_cur,
__global
const
short
*d_cur,
__global
const
short
*l_cur,
__global
const
short
*r_cur,
__global
const
short
*l_cur,
__global
const
short
*r_cur,
__global
short
*data_cost_selected,
__global
short
*disparity_selected_new,
__global
short
*data_cost_selected,
__global
short
*disparity_selected_new,
__global
short
*data_cost_new,
__global
const
short*
data_cost_cur,
__global
short
*data_cost_new,
__global
const
short*
data_cost_cur,
__global
const
short
*disparity_selected_cur,
__global
const
short
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
int
cdisp_step1,
int
cdisp_step2
)
{
{
for
(
int
i
=
0
; i < nr_plane; i++)
for
(
int
i
=
0
; i < nr_plane; i++)
{
{
...
@@ -780,18 +1023,18 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
...
@@ -780,18 +1023,18 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
SHRT_MAX
;
data_cost_new[id
*
cdisp_step1]
=
SHRT_MAX
;
}
}
}
}
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_new,
__global
const
float
*data_cost_cur,
__global
float
*data_cost_new,
__global
const
float
*data_cost_cur,
__global
const
float
*disparity_selected_cur,
__global
const
float
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
int
cdisp_step1,
int
cdisp_step2
)
{
{
for
(
int
i
=
0
; i < nr_plane; i++)
for
(
int
i
=
0
; i < nr_plane; i++)
{
{
...
@@ -820,16 +1063,16 @@ void get_first_k_element_increase_1(__global float *u_new, __global float *d_new
...
@@ -820,16 +1063,16 @@ void get_first_k_element_increase_1(__global float *u_new, __global float *d_new
}
}
}
}
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
__global
short
*selected_disp_pyr_new,
__global
const
short
*selected_disp_pyr_cur,
__global
short
*selected_disp_pyr_new,
__global
const
short
*selected_disp_pyr_cur,
__global
short
*data_cost_selected_,
__global
const
short
*data_cost_,
__global
short
*data_cost_selected_,
__global
const
short
*data_cost_,
int
h,
int
w,
int
nr_plane,
int
h2,
int
w2,
int
nr_plane2,
int
h,
int
w,
int
nr_plane,
int
h2,
int
w2,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2,
int
cmsg_step1,
int
cmsg_step2
)
int
cdisp_step1,
int
cdisp_step2,
int
cmsg_step1,
int
cmsg_step2
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
if
(
y
<
h
&&
x
<
w
)
{
{
__global
const
short
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
__global
const
short
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
...
@@ -838,7 +1081,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
...
@@ -838,7 +1081,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
__global
const
short
*r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
max
(
0
,
x/2
-
1
)
;
__global
const
short
*r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
max
(
0
,
x/2
-
1
)
;
__global
short
*data_cost_new
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
short
*data_cost_new
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
const
short
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
short
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
short
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
__global
const
short
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
...
@@ -864,34 +1107,35 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
...
@@ -864,34 +1107,35 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
get_first_k_element_increase_0
(
u_new,
d_new,
l_new,
r_new,
u_cur,
d_cur,
l_cur,
r_cur,
get_first_k_element_increase_0
(
u_new,
d_new,
l_new,
r_new,
u_cur,
d_cur,
l_cur,
r_cur,
data_cost_selected,
disparity_selected_new,
data_cost_new,
data_cost_selected,
disparity_selected_new,
data_cost_new,
data_cost,
disparity_selected_cur,
nr_plane,
nr_plane2,
data_cost,
disparity_selected_cur,
nr_plane,
nr_plane2,
cdisp_step1,
cdisp_step2
)
;
cdisp_step1,
cdisp_step2
)
;
}
}
}
}
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__global
float
*r_new_,
__global
float
*u_cur_,
__global
const
float
*d_cur_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
__global
float
*selected_disp_pyr_new,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*selected_disp_pyr_new,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*data_cost_selected_,
__global
const
float
*data_cost_,
__global
float
*data_cost_selected_,
__global
const
float
*data_cost_,
int
h,
int
w,
int
nr_plane,
int
h2,
int
w2,
int
nr_plane2,
int
h,
int
w,
int
nr_plane,
int
h2,
int
w2,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2,
int
cmsg_step1,
int
cmsg_step2
)
int
cdisp_step1,
int
cdisp_step2,
int
cmsg_step1,
int
cmsg_step2
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
const
float
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*d_cur
=
d_cur_
+
max
(
0
,
y/2
-
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
min
(
w2-1,
x/2
+
1
)
;
__global
const
float
*r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
max
(
0
,
x/2
-
1
)
;
__global
float
*data_cost_new
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
float
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
__global
const
float
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*d_cur
=
d_cur_
+
max
(
0
,
y/2
-
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
min
(
w2-1,
x/2
+
1
)
;
__global
const
float
*r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
max
(
0
,
x/2
-
1
)
;
__global
float
*data_cost_new
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
float
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
if
(
y
<
h
&&
x
<
w
)
{
for
(
int
d
=
0
; d < nr_plane2; d++)
for
(
int
d
=
0
; d < nr_plane2; d++)
{
{
int
idx2
=
d
*
cdisp_step2
;
int
idx2
=
d
*
cdisp_step2
;
...
@@ -899,56 +1143,80 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
...
@@ -899,56 +1143,80 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
float
val
=
data_cost[d
*
cdisp_step1]
+
u_cur[idx2]
+
d_cur[idx2]
+
l_cur[idx2]
+
r_cur[idx2]
;
float
val
=
data_cost[d
*
cdisp_step1]
+
u_cur[idx2]
+
d_cur[idx2]
+
l_cur[idx2]
+
r_cur[idx2]
;
data_cost_new[d
*
cdisp_step1]
=
val
;
data_cost_new[d
*
cdisp_step1]
=
val
;
}
}
}
__global
float
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
float
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
float
*disparity_selected_new
=
selected_disp_pyr_new
+
y
*
cmsg_step1
+
x
;
__global
float
*disparity_selected_new
=
selected_disp_pyr_new
+
y
*
cmsg_step1
+
x
;
__global
float
*u_new
=
u_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*d_new
=
d_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*l_new
=
l_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*r_new
=
r_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*u_new
=
u_new_
+
y
*
cmsg_step1
+
x
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__global
float
*d_new
=
d_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*l_new
=
l_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*r_new
=
r_new_
+
y
*
cmsg_step1
+
x
;
if
(
y
<
h
&&
x
<
w
)
{
u_cur
=
u_cur_
+
y/2
*
cmsg_step2
+
x/2
;
u_cur
=
u_cur_
+
y/2
*
cmsg_step2
+
x/2
;
d_cur
=
d_cur_
+
y/2
*
cmsg_step2
+
x/2
;
d_cur
=
d_cur_
+
y/2
*
cmsg_step2
+
x/2
;
l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
x/2
;
l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
x/2
;
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
get_first_k_element_increase_1
(
u_new,
d_new,
l_new,
r_new,
u_cur,
d_cur,
l_cur,
r_cur,
for
(
int
i
=
0
; i < nr_plane; i++)
data_cost_selected,
disparity_selected_new,
data_cost_new,
{
data_cost,
disparity_selected_cur,
nr_plane,
nr_plane2,
float
minimum
=
FLT_MAX
;
cdisp_step1,
cdisp_step2
)
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
}
}
}
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
////////////////////
calc
all
iterations
/////////////////////
////////////////////
calc
all
iterations
/////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
void
message_per_pixel_0
(
__global
const
short
*data,
__global
short
*msg_dst,
__global
const
short
*msg1,
void
message_per_pixel_0
(
__global
const
short
*data,
__global
short
*msg_dst,
__global
const
short
*msg1,
__global
const
short
*msg2,
__global
const
short
*msg3,
__global
const
short
*msg2,
__global
const
short
*msg3,
__global
const
short
*dst_disp,
__global
const
short
*src_disp,
__global
const
short
*dst_disp,
__global
const
short
*src_disp,
int
nr_plane,
__global
short
*temp,
int
nr_plane,
__global
short
*temp,
float
cmax_disc_term,
int
cdisp_step1,
float
cdisc_single_jump
)
float
cmax_disc_term,
int
cdisp_step1,
float
cdisc_single_jump
)
{
{
short
minimum
=
SHRT_MAX
;
short
minimum
=
SHRT_MAX
;
for
(
int
d
=
0
; d < nr_plane; d++)
for
(
int
d
=
0
; d < nr_plane; d++)
{
{
int
idx
=
d
*
cdisp_step1
;
int
idx
=
d
*
cdisp_step1
;
short
val
=
data[idx]
+
msg1[idx]
+
msg2[idx]
+
msg3[idx]
;
short
val
=
data[idx]
+
msg1[idx]
+
msg2[idx]
+
msg3[idx]
;
if
(
val
<
minimum
)
if
(
val
<
minimum
)
minimum
=
val
;
minimum
=
val
;
msg_dst[idx]
=
val
;
msg_dst[idx]
=
val
;
}
}
float
sum
=
0
;
float
sum
=
0
;
for
(
int
d
=
0
; d < nr_plane; d++)
for
(
int
d
=
0
; d < nr_plane; d++)
{
{
float
cost_min
=
minimum
+
cmax_disc_term
;
float
cost_min
=
minimum
+
cmax_disc_term
;
short
src_disp_reg
=
src_disp[d
*
cdisp_step1]
;
short
src_disp_reg
=
src_disp[d
*
cdisp_step1]
;
for
(
int
d2
=
0
; d2 < nr_plane; d2++)
for
(
int
d2
=
0
; d2 < nr_plane; d2++)
cost_min
=
fmin
(
cost_min,
(
msg_dst[d2
*
cdisp_step1]
+
cost_min
=
fmin
(
cost_min,
(
msg_dst[d2
*
cdisp_step1]
+
cdisc_single_jump
*
abs
(
dst_disp[d2
*
cdisp_step1]
-
src_disp_reg
)))
;
cdisc_single_jump
*
abs
(
dst_disp[d2
*
cdisp_step1]
-
src_disp_reg
)))
;
temp[d
*
cdisp_step1]
=
convert_short_sat_rte
(
cost_min
)
;
temp[d
*
cdisp_step1]
=
convert_short_sat_rte
(
cost_min
)
;
sum
+=
cost_min
;
sum
+=
cost_min
;
...
@@ -959,32 +1227,32 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
...
@@ -959,32 +1227,32 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
msg_dst[d
*
cdisp_step1]
=
convert_short_sat_rte
(
temp[d
*
cdisp_step1]
-
sum
)
;
msg_dst[d
*
cdisp_step1]
=
convert_short_sat_rte
(
temp[d
*
cdisp_step1]
-
sum
)
;
}
}
void
message_per_pixel_1
(
__global
const
float
*data,
__global
float
*msg_dst,
__global
const
float
*msg1,
void
message_per_pixel_1
(
__global
const
float
*data,
__global
float
*msg_dst,
__global
const
float
*msg1,
__global
const
float
*msg2,
__global
const
float
*msg3,
__global
const
float
*msg2,
__global
const
float
*msg3,
__global
const
float
*dst_disp,
__global
const
float
*src_disp,
__global
const
float
*dst_disp,
__global
const
float
*src_disp,
int
nr_plane,
__global
float
*temp,
int
nr_plane,
__global
float
*temp,
float
cmax_disc_term,
int
cdisp_step1,
float
cdisc_single_jump
)
float
cmax_disc_term,
int
cdisp_step1,
float
cdisc_single_jump
)
{
{
float
minimum
=
FLT_MAX
;
float
minimum
=
FLT_MAX
;
for
(
int
d
=
0
; d < nr_plane; d++)
for
(
int
d
=
0
; d < nr_plane; d++)
{
{
int
idx
=
d
*
cdisp_step1
;
int
idx
=
d
*
cdisp_step1
;
float
val
=
data[idx]
+
msg1[idx]
+
msg2[idx]
+
msg3[idx]
;
float
val
=
data[idx]
+
msg1[idx]
+
msg2[idx]
+
msg3[idx]
;
if
(
val
<
minimum
)
if
(
val
<
minimum
)
minimum
=
val
;
minimum
=
val
;
msg_dst[idx]
=
val
;
msg_dst[idx]
=
val
;
}
}
float
sum
=
0
;
float
sum
=
0
;
for
(
int
d
=
0
; d < nr_plane; d++)
for
(
int
d
=
0
; d < nr_plane; d++)
{
{
float
cost_min
=
minimum
+
cmax_disc_term
;
float
cost_min
=
minimum
+
cmax_disc_term
;
float
src_disp_reg
=
src_disp[d
*
cdisp_step1]
;
float
src_disp_reg
=
src_disp[d
*
cdisp_step1]
;
for
(
int
d2
=
0
; d2 < nr_plane; d2++)
for
(
int
d2
=
0
; d2 < nr_plane; d2++)
cost_min
=
fmin
(
cost_min,
(
msg_dst[d2
*
cdisp_step1]
+
cost_min
=
fmin
(
cost_min,
(
msg_dst[d2
*
cdisp_step1]
+
cdisc_single_jump
*
fabs
(
dst_disp[d2
*
cdisp_step1]
-
src_disp_reg
)))
;
cdisc_single_jump
*
fabs
(
dst_disp[d2
*
cdisp_step1]
-
src_disp_reg
)))
;
temp[d
*
cdisp_step1]
=
cost_min
;
temp[d
*
cdisp_step1]
=
cost_min
;
sum
+=
cost_min
;
sum
+=
cost_min
;
...
@@ -995,64 +1263,64 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
...
@@ -995,64 +1263,64 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
msg_dst[d
*
cdisp_step1]
=
temp[d
*
cdisp_step1]
-
sum
;
msg_dst[d
*
cdisp_step1]
=
temp[d
*
cdisp_step1]
-
sum
;
}
}
__kernel
void
compute_message_0
(
__global
short
*u_,
__global
short
*d_,
__global
short
*l_,
__global
short
*r_,
__kernel
void
compute_message_0
(
__global
short
*u_,
__global
short
*d_,
__global
short
*l_,
__global
short
*r_,
__global
const
short
*data_cost_selected,
__global
const
short
*selected_disp_pyr_cur,
__global
const
short
*data_cost_selected,
__global
const
short
*selected_disp_pyr_cur,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
float
cmax_disc_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdisc_single_jump
)
float
cmax_disc_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdisc_single_jump
)
{
{
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
int
x
=
((
get_global_id
(
0
))
<<
1
)
+
((
y
+
i
)
&
1
)
;
int
x
=
((
get_global_id
(
0
))
<<
1
)
+
((
y
+
i
)
&
1
)
;
if
(
y
>
0
&&
y
<
h
-
1
&&
x
>
0
&&
x
<
w
-
1
)
if
(
y
>
0
&&
y
<
h
-
1
&&
x
>
0
&&
x
<
w
-
1
)
{
{
__global
const
short
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
const
short
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
short
*u
=
u_
+
y
*
cmsg_step1
+
x
;
__global
short
*u
=
u_
+
y
*
cmsg_step1
+
x
;
__global
short
*d
=
d_
+
y
*
cmsg_step1
+
x
;
__global
short
*d
=
d_
+
y
*
cmsg_step1
+
x
;
__global
short
*l
=
l_
+
y
*
cmsg_step1
+
x
;
__global
short
*l
=
l_
+
y
*
cmsg_step1
+
x
;
__global
short
*r
=
r_
+
y
*
cmsg_step1
+
x
;
__global
short
*r
=
r_
+
y
*
cmsg_step1
+
x
;
__global
const
short
*disp
=
selected_disp_pyr_cur
+
y
*
cmsg_step1
+
x
;
__global
const
short
*disp
=
selected_disp_pyr_cur
+
y
*
cmsg_step1
+
x
;
__global
short
*temp
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
short
*temp
=
ctemp
+
y
*
cmsg_step1
+
x
;
message_per_pixel_0
(
data,
u,
r
-
1
,
u
+
cmsg_step1,
l
+
1
,
disp,
disp
-
cmsg_step1,
nr_plane,
temp,
message_per_pixel_0
(
data,
u,
r
-
1
,
u
+
cmsg_step1,
l
+
1
,
disp,
disp
-
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_0
(
data,
d,
d
-
cmsg_step1,
r
-
1
,
l
+
1
,
disp,
disp
+
cmsg_step1,
nr_plane,
temp,
message_per_pixel_0
(
data,
d,
d
-
cmsg_step1,
r
-
1
,
l
+
1
,
disp,
disp
+
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_0
(
data,
l,
u
+
cmsg_step1,
d
-
cmsg_step1,
l
+
1
,
disp,
disp
-
1
,
nr_plane,
temp,
message_per_pixel_0
(
data,
l,
u
+
cmsg_step1,
d
-
cmsg_step1,
l
+
1
,
disp,
disp
-
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_0
(
data,
r,
u
+
cmsg_step1,
d
-
cmsg_step1,
r
-
1
,
disp,
disp
+
1
,
nr_plane,
temp,
message_per_pixel_0
(
data,
r,
u
+
cmsg_step1,
d
-
cmsg_step1,
r
-
1
,
disp,
disp
+
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
}
}
}
}
__kernel
void
compute_message_1
(
__global
float
*u_,
__global
float
*d_,
__global
float
*l_,
__global
float
*r_,
__kernel
void
compute_message_1
(
__global
float
*u_,
__global
float
*d_,
__global
float
*l_,
__global
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*selected_disp_pyr_cur,
__global
const
float
*data_cost_selected,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
float
cmax_disc_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdisc_single_jump
)
float
cmax_disc_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdisc_single_jump
)
{
{
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
int
x
=
((
get_global_id
(
0
))
<<
1
)
+
((
y
+
i
)
&
1
)
;
int
x
=
((
get_global_id
(
0
))
<<
1
)
+
((
y
+
i
)
&
1
)
;
if
(
y
>
0
&&
y
<
h
-
1
&&
x
>
0
&&
x
<
w
-
1
)
if
(
y
>
0
&&
y
<
h
-
1
&&
x
>
0
&&
x
<
w
-
1
)
{
{
__global
const
float
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
const
float
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
float
*u
=
u_
+
y
*
cmsg_step1
+
x
;
__global
float
*u
=
u_
+
y
*
cmsg_step1
+
x
;
__global
float
*d
=
d_
+
y
*
cmsg_step1
+
x
;
__global
float
*d
=
d_
+
y
*
cmsg_step1
+
x
;
__global
float
*l
=
l_
+
y
*
cmsg_step1
+
x
;
__global
float
*l
=
l_
+
y
*
cmsg_step1
+
x
;
__global
float
*r
=
r_
+
y
*
cmsg_step1
+
x
;
__global
float
*r
=
r_
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disp
=
selected_disp_pyr_cur
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disp
=
selected_disp_pyr_cur
+
y
*
cmsg_step1
+
x
;
__global
float
*temp
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
float
*temp
=
ctemp
+
y
*
cmsg_step1
+
x
;
message_per_pixel_1
(
data,
u,
r
-
1
,
u
+
cmsg_step1,
l
+
1
,
disp,
disp
-
cmsg_step1,
nr_plane,
temp,
message_per_pixel_1
(
data,
u,
r
-
1
,
u
+
cmsg_step1,
l
+
1
,
disp,
disp
-
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_1
(
data,
d,
d
-
cmsg_step1,
r
-
1
,
l
+
1
,
disp,
disp
+
cmsg_step1,
nr_plane,
temp,
message_per_pixel_1
(
data,
d,
d
-
cmsg_step1,
r
-
1
,
l
+
1
,
disp,
disp
+
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_1
(
data,
l,
u
+
cmsg_step1,
d
-
cmsg_step1,
l
+
1
,
disp,
disp
-
1
,
nr_plane,
temp,
message_per_pixel_1
(
data,
l,
u
+
cmsg_step1,
d
-
cmsg_step1,
l
+
1
,
disp,
disp
-
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_1
(
data,
r,
u
+
cmsg_step1,
d
-
cmsg_step1,
r
-
1
,
disp,
disp
+
1
,
nr_plane,
temp,
message_per_pixel_1
(
data,
r,
u
+
cmsg_step1,
d
-
cmsg_step1,
r
-
1
,
disp,
disp
+
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
}
}
}
}
...
@@ -1060,15 +1328,15 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
...
@@ -1060,15 +1328,15 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
///////////////////////////
output
////////////////////////////
///////////////////////////
output
////////////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
__kernel
void
compute_disp_0
(
__global
const
short
*u_,
__global
const
short
*d_,
__global
const
short
*l_,
__kernel
void
compute_disp_0
(
__global
const
short
*u_,
__global
const
short
*d_,
__global
const
short
*l_,
__global
const
short
*r_,
__global
const
short
*
data_cost_selected,
__global
const
short
*r_,
__global
const
short
*
data_cost_selected,
__global
const
short
*disp_selected_pyr,
__global
const
short
*disp_selected_pyr,
__global
short*
disp,
__global
short*
disp,
int
res_step,
int
cols,
int
rows,
int
nr_plane,
int
res_step,
int
cols,
int
rows,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1
)
int
cmsg_step1,
int
cdisp_step1
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
>
0
&&
y
<
rows
-
1
&&
x
>
0
&&
x
<
cols
-
1
)
if
(
y
>
0
&&
y
<
rows
-
1
&&
x
>
0
&&
x
<
cols
-
1
)
{
{
__global
const
short
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
const
short
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
...
@@ -1078,15 +1346,15 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
...
@@ -1078,15 +1346,15 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
__global
const
short
*d
=
d_
+
(
y-1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
short
*d
=
d_
+
(
y-1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
short
*l
=
l_
+
(
y+0
)
*
cmsg_step1
+
(
x+1
)
;
__global
const
short
*l
=
l_
+
(
y+0
)
*
cmsg_step1
+
(
x+1
)
;
__global
const
short
*r
=
r_
+
(
y+0
)
*
cmsg_step1
+
(
x-1
)
;
__global
const
short
*r
=
r_
+
(
y+0
)
*
cmsg_step1
+
(
x-1
)
;
short
best
=
0
;
short
best
=
0
;
short
best_val
=
SHRT_MAX
;
short
best_val
=
SHRT_MAX
;
for
(
int
i
=
0
; i < nr_plane; ++i)
for
(
int
i
=
0
; i < nr_plane; ++i)
{
{
int
idx
=
i
*
cdisp_step1
;
int
idx
=
i
*
cdisp_step1
;
short
val
=
data[idx]+
u[idx]
+
d[idx]
+
l[idx]
+
r[idx]
;
short
val
=
data[idx]+
u[idx]
+
d[idx]
+
l[idx]
+
r[idx]
;
if
(
val
<
best_val
)
if
(
val
<
best_val
)
{
{
best_val
=
val
;
best_val
=
val
;
...
@@ -1097,15 +1365,15 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
...
@@ -1097,15 +1365,15 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
}
}
}
}
__kernel
void
compute_disp_1
(
__global
const
float
*u_,
__global
const
float
*d_,
__global
const
float
*l_,
__kernel
void
compute_disp_1
(
__global
const
float
*u_,
__global
const
float
*d_,
__global
const
float
*l_,
__global
const
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*disp_selected_pyr,
__global
const
float
*disp_selected_pyr,
__global
short
*disp,
__global
short
*disp,
int
res_step,
int
cols,
int
rows,
int
nr_plane,
int
res_step,
int
cols,
int
rows,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1
)
int
cmsg_step1,
int
cdisp_step1
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
>
0
&&
y
<
rows
-
1
&&
x
>
0
&&
x
<
cols
-
1
)
if
(
y
>
0
&&
y
<
rows
-
1
&&
x
>
0
&&
x
<
cols
-
1
)
{
{
__global
const
float
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
const
float
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
...
@@ -1115,14 +1383,14 @@ __kernel void compute_disp_1(__global const float *u_, __global const float *d_,
...
@@ -1115,14 +1383,14 @@ __kernel void compute_disp_1(__global const float *u_, __global const float *d_,
__global
const
float
*d
=
d_
+
(
y-1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
float
*d
=
d_
+
(
y-1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
float
*l
=
l_
+
(
y+0
)
*
cmsg_step1
+
(
x+1
)
;
__global
const
float
*l
=
l_
+
(
y+0
)
*
cmsg_step1
+
(
x+1
)
;
__global
const
float
*r
=
r_
+
(
y+0
)
*
cmsg_step1
+
(
x-1
)
;
__global
const
float
*r
=
r_
+
(
y+0
)
*
cmsg_step1
+
(
x-1
)
;
short
best
=
0
;
short
best
=
0
;
short
best_val
=
SHRT_MAX
;
short
best_val
=
SHRT_MAX
;
for
(
int
i
=
0
; i < nr_plane; ++i)
for
(
int
i
=
0
; i < nr_plane; ++i)
{
{
int
idx
=
i
*
cdisp_step1
;
int
idx
=
i
*
cdisp_step1
;
float
val
=
data[idx]+
u[idx]
+
d[idx]
+
l[idx]
+
r[idx]
;
float
val
=
data[idx]+
u[idx]
+
d[idx]
+
l[idx]
+
r[idx]
;
if
(
val
<
best_val
)
if
(
val
<
best_val
)
{
{
best_val
=
val
;
best_val
=
val
;
...
@@ -1132,4 +1400,3 @@ __kernel void compute_disp_1(__global const float *u_, __global const float *d_,
...
@@ -1132,4 +1400,3 @@ __kernel void compute_disp_1(__global const float *u_, __global const float *d_,
disp[res_step
*
y
+
x]
=
best
;
disp[res_step
*
y
+
x]
=
best
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment