Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f1060ac5
Commit
f1060ac5
authored
Jul 17, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
a little simplify to pyrlk kernel
parent
cdd9234f
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
169 deletions
+48
-169
perf_opticalflow.cpp
modules/ocl/perf/perf_opticalflow.cpp
+2
-2
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+46
-167
No files found.
modules/ocl/perf/perf_opticalflow.cpp
View file @
f1060ac5
...
...
@@ -48,8 +48,8 @@
///////////// PyrLKOpticalFlow ////////////////////////
PERFTEST
(
PyrLKOpticalFlow
)
{
std
::
string
images1
[]
=
{
"rubberwhale1.png"
,
"
basketball1.pn
g"
};
std
::
string
images2
[]
=
{
"rubberwhale2.png"
,
"
basketball2.pn
g"
};
std
::
string
images1
[]
=
{
"rubberwhale1.png"
,
"
aloeL.jp
g"
};
std
::
string
images2
[]
=
{
"rubberwhale2.png"
,
"
aloeR.jp
g"
};
for
(
size_t
i
=
0
;
i
<
sizeof
(
images1
)
/
sizeof
(
std
::
string
);
i
++
)
{
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
f1060ac5
...
...
@@ -17,6 +17,7 @@
//
@Authors
//
Dachuan
Zhao,
dachuan@multicorewareinc.com
//
Yao
Wang,
bitwangyaoyao@gmail.com
//
Xiaopeng
Fu,
fuxiaopeng2222@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
...
...
@@ -47,6 +48,7 @@
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
BUFFER
64
#
define
BUFFER2
BUFFER>>1
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
...
...
@@ -58,53 +60,16 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
smem3[tid]
=
val3
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
{
smem1[tid]
+=
smem1[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
smem3[tid]
+=
smem3[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
smem3[tid]
+=
smem3[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
smem3[tid]
+=
smem3[tid
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
for
(
int
i
=
BUFFER2
; i > 0; i >>= 1)
{
smem1[tid]
+=
smem1[tid
+
4]
;
smem2[tid]
+=
smem2[tid
+
4]
;
smem3[tid]
+=
smem3[tid
+
4]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
if
(
tid
<
i
)
{
smem1[tid]
+=
smem1[tid
+
2
]
;
smem2[tid]
+=
smem2[tid
+
2
]
;
smem3[tid]
+=
smem3[tid
+
2
]
;
smem1[tid]
+=
smem1[tid
+
i
]
;
smem2[tid]
+=
smem2[tid
+
i
]
;
smem3[tid]
+=
smem3[tid
+
i
]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
smem1[tid]
+
smem1[tid
+
1]
;
smem2[BUFFER]
=
smem2[tid]
+
smem2[tid
+
1]
;
smem3[BUFFER]
=
smem3[tid]
+
smem3[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
void
reduce2
(
float
val1,
float
val2,
volatile
__local
float*
smem1,
volatile
__local
float*
smem2,
int
tid
)
...
...
@@ -113,47 +78,15 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
smem2[tid]
=
val2
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
{
smem1[tid]
+=
smem1[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
for
(
int
i
=
BUFFER2
; i > 0; i >>= 1)
{
smem1[tid]
+=
smem1[tid
+
4]
;
smem2[tid]
+=
smem2[tid
+
4]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
if
(
tid
<
i
)
{
smem1[tid]
+=
smem1[tid
+
2
]
;
smem2[tid]
+=
smem2[tid
+
2
]
;
smem1[tid]
+=
smem1[tid
+
i
]
;
smem2[tid]
+=
smem2[tid
+
i
]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
smem1[tid]
+
smem1[tid
+
1]
;
smem2[BUFFER]
=
smem2[tid]
+
smem2[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
void
reduce1
(
float
val1,
volatile
__local
float*
smem1,
int
tid
)
...
...
@@ -161,45 +94,18 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
smem1[tid]
=
val1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
{
smem1[tid]
+=
smem1[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
+=
smem1[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
for
(
int
i
=
BUFFER2
; i > 0; i >>= 1)
{
smem1[tid]
+=
smem1[tid
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
if
(
tid
<
i
)
{
smem1[tid]
+=
smem1[tid
+
4
]
;
smem1[tid]
+=
smem1[tid
+
i
]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
smem1[tid]
+=
smem1[tid
+
2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
smem1[tid]
+
smem1[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
#
else
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
__local
volatile
float*
smem3,
int
tid
)
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
__local
volatile
float*
smem3,
int
tid
)
{
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
...
...
@@ -212,15 +118,19 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
smem2[tid]
+=
smem2[tid
+
32]
;
smem3[tid]
+=
smem3[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
smem3[tid]
+=
smem3[tid
+
16]
;
#
if
WAVE_SIZE
<16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
...
...
@@ -238,6 +148,7 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
smem2[tid]
+=
smem2[tid
+
1]
;
smem3[tid]
+=
smem3[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
void
reduce2
(
float
val1,
float
val2,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
int
tid
)
...
...
@@ -251,14 +162,18 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
smem1[tid]
+=
smem1[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
#
if
WAVE_SIZE
<16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
...
...
@@ -272,6 +187,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
smem1[tid]
+=
smem1[tid
+
1]
;
smem2[tid]
+=
smem2[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
void
reduce1
(
float
val1,
__local
volatile
float*
smem1,
int
tid
)
...
...
@@ -283,19 +199,24 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
{
smem1[tid]
+=
smem1[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem1[tid]
+=
smem1[tid
+
16]
;
#
if
WAVE_SIZE
<16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem1[tid]
+=
smem1[tid
+
8]
;
smem1[tid]
+=
smem1[tid
+
4]
;
smem1[tid]
+=
smem1[tid
+
2]
;
smem1[tid]
+=
smem1[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
#
endif
...
...
@@ -388,15 +309,9 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
__global
const
float2*
prevPts,
int
prevPtsStep,
__global
float2*
nextPts,
int
nextPtsStep,
__global
uchar*
status,
__global
float*
err,
const
int
level,
const
int
rows,
const
int
cols,
int
PATCH_X,
int
PATCH_Y,
int
cn,
int
c_winSize_x,
int
c_winSize_y,
int
c_iters,
char
calcErr
)
{
#
ifdef
CPU
__local
float
smem1[BUFFER+1]
;
__local
float
smem2[BUFFER+1]
;
__local
float
smem3[BUFFER+1]
;
#
else
__local
float
smem1[BUFFER]
;
__local
float
smem2[BUFFER]
;
__local
float
smem3[BUFFER]
;
#
endif
unsigned
int
xid=get_local_id
(
0
)
;
unsigned
int
yid=get_local_id
(
1
)
;
...
...
@@ -492,17 +407,11 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
A11 = smem1[BUFFER];
A12 = smem2[BUFFER];
A22 = smem3[BUFFER];
#else
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float D = A11 * A22 - A12 * A12;
...
...
@@ -592,15 +501,10 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
}
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
b1 = smem1[BUFFER];
b2 = smem2[BUFFER];
#else
b1 = smem1[0];
b2 = smem2[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float2 delta;
delta.x = A12 * b2 - A22 * b1;
...
...
@@ -675,11 +579,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
nextPts[gid] = prevPt;
if (calcErr)
#ifdef CPU
err[gid] = smem1[BUFFER] / (float)(c_winSize_x * c_winSize_y);
#else
err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
#endif
}
}
...
...
@@ -688,15 +588,9 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
#ifdef CPU
__local float smem1[BUFFER+1];
__local float smem2[BUFFER+1];
__local float smem3[BUFFER+1];
#else
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
#endif
unsigned int xid=get_local_id(0);
unsigned int yid=get_local_id(1);
...
...
@@ -795,17 +689,11 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
A11 = smem1[BUFFER];
A12 = smem2[BUFFER];
A22 = smem3[BUFFER];
#else
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float D = A11 * A22 - A12 * A12;
...
...
@@ -895,15 +783,10 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
}
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
b1 = smem1[BUFFER];
b2 = smem2[BUFFER];
#else
b1 = smem1[0];
b2 = smem2[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float2 delta;
delta.x = A12 * b2 - A22 * b1;
...
...
@@ -977,11 +860,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
nextPts[gid] = nextPt;
if (calcErr)
#ifdef CPU
err[gid] = smem1[BUFFER] / (float)(3 * c_winSize_x * c_winSize_y);
#else
err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
#endif
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment