Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9cfa24e5
Commit
9cfa24e5
authored
Apr 23, 2013
by
peng xiao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix thread sync for csbp.
parent
c701d542
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
309 additions
and
42 deletions
+309
-42
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+309
-42
No files found.
modules/ocl/src/opencl/stereocsbp.cl
View file @
9cfa24e5
...
...
@@ -221,6 +221,8 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
cur
=
next
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
}
for
(
int
i
=
nr_local_minimum
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
...
...
@@ -402,23 +404,90 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
{
if (tid < 128)
dline[tid] += dline[tid + 128];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
{
if (tid < 64)
dline[tid] += dline[tid + 64];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
__global short* data_cost = ctemp + y_out * cmsg_step1 + x_out;
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
__global short* data_cost = ctemp + y_out * cmsg_step1 + x_out;
if (tid == 0)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(dline[0]);
}
...
...
@@ -470,23 +539,85 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
if (tid < 128)
dline[tid] += dline[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
if (tid < 64)
dline[tid] += dline[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
__global float *data_cost = ctemp + y_out * cmsg_step1 + x_out;
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__global float *data_cost = ctemp + y_out * cmsg_step1 + x_out;
__local float* dline = smem + winsz * get_local_id(2);
if (tid == 0)
data_cost[cdisp_step1 * d] = dline[0];
}
...
...
@@ -652,21 +783,77 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
}
barrier(CLK_LOCAL_MEM_FENCE);
// if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
//if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
{
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
// if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
//if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
{
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
{
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (winsz >= 64) if (tid < 32) vdline[tid] += vdline[tid + 32];
if (winsz >= 32) if (tid < 16) vdline[tid] += vdline[tid + 16];
if (winsz >= 16) if (tid < 8) vdline[tid] += vdline[tid + 8];
if (winsz >= 8) if (tid < 4) vdline[tid] += vdline[tid + 4];
if (winsz >= 4) if (tid < 2) vdline[tid] += vdline[tid + 2];
if (winsz >= 2) if (tid < 1) vdline[tid] += vdline[tid + 1];
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
{
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
{
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
{
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (tid == 0)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(vdline[0]);
}
...
...
@@ -727,21 +914,77 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
64
)
{
if
(
tid
<
32
)
vdline[tid]
+=
vdline[tid
+
32]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//if
(
winsz
>=
256
)
{
if
(
tid
<
128
)
{
dline[tid]
+=
dline[tid
+
128]
; } barrier(CLK_LOCAL_MEM_FENCE); }
//if
(
winsz
>=
128
)
{
if
(
tid
<
64
)
{
dline[tid]
+=
dline[tid
+
64]
; } barrier(CLK_LOCAL_MEM_FENCE); }
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
32
)
{
if
(
tid
<
16
)
vdline[tid]
+=
vdline[tid
+
16]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
winsz
>=
64
)
if
(
tid
<
32
)
vdline[tid]
+=
vdline[tid
+
32]
;
if
(
winsz
>=
32
)
if
(
tid
<
16
)
vdline[tid]
+=
vdline[tid
+
16]
;
if
(
winsz
>=
16
)
if
(
tid
<
8
)
vdline[tid]
+=
vdline[tid
+
8]
;
if
(
winsz
>=
8
)
if
(
tid
<
4
)
vdline[tid]
+=
vdline[tid
+
4]
;
if
(
winsz
>=
4
)
if
(
tid
<
2
)
vdline[tid]
+=
vdline[tid
+
2]
;
if
(
winsz
>=
2
)
if
(
tid
<
1
)
vdline[tid]
+=
vdline[tid
+
1]
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
16
)
{
if
(
tid
<
8
)
vdline[tid]
+=
vdline[tid
+
8]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
8
)
{
if
(
tid
<
4
)
vdline[tid]
+=
vdline[tid
+
4]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
4
)
{
if
(
tid
<
2
)
vdline[tid]
+=
vdline[tid
+
2]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
2
)
{
if
(
tid
<
1
)
vdline[tid]
+=
vdline[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
tid
==
0
)
data_cost[cdisp_step1
*
d]
=
vdline[0]
;
}
...
...
@@ -870,7 +1113,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
}
}
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__global
float
*r_new_,
__global
float
*u_cur_,
__global
const
float
*d_cur_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
__global
float
*selected_disp_pyr_new,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*data_cost_selected_,
__global
const
float
*data_cost_,
...
...
@@ -880,8 +1123,7 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
const
float
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*d_cur
=
d_cur_
+
max
(
0
,
y/2
-
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
min
(
w2-1,
x/2
+
1
)
;
...
...
@@ -892,6 +1134,8 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
__global
const
float
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
float
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
if
(
y
<
h
&&
x
<
w
)
{
for
(
int
d
=
0
; d < nr_plane2; d++)
{
int
idx2
=
d
*
cdisp_step2
;
...
...
@@ -899,6 +1143,7 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
float
val
=
data_cost[d
*
cdisp_step1]
+
u_cur[idx2]
+
d_cur[idx2]
+
l_cur[idx2]
+
r_cur[idx2]
;
data_cost_new[d
*
cdisp_step1]
=
val
;
}
}
__global
float
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
float
*disparity_selected_new
=
selected_disp_pyr_new
+
y
*
cmsg_step1
+
x
;
...
...
@@ -908,17 +1153,40 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
__global
float
*l_new
=
l_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*r_new
=
r_new_
+
y
*
cmsg_step1
+
x
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
y
<
h
&&
x
<
w
)
{
u_cur
=
u_cur_
+
y/2
*
cmsg_step2
+
x/2
;
d_cur
=
d_cur_
+
y/2
*
cmsg_step2
+
x/2
;
l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
x/2
;
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
get_first_k_element_increase_1
(
u_new,
d_new,
l_new,
r_new,
u_cur,
d_cur,
l_cur,
r_cur,
data_cost_selected,
disparity_selected_new,
data_cost_new,
data_cost,
disparity_selected_cur,
nr_plane,
nr_plane2,
cdisp_step1,
cdisp_step2
)
;
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
}
///////////////////////////////////////////////////////////////
////////////////////
calc
all
iterations
/////////////////////
///////////////////////////////////////////////////////////////
...
...
@@ -1132,4 +1400,3 @@ __kernel void compute_disp_1(__global const float *u_, __global const float *d_,
disp[res_step
*
y
+
x]
=
best
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment