Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fd4a6f0a
Commit
fd4a6f0a
authored
Apr 03, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
make the sparse method give correct results on CPU ocl
Add CL_CPU to supportsFeature check simplify the logic of pyrlk
parent
656594ad
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
300 additions
and
494 deletions
+300
-494
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+1
-1
initialization.cpp
modules/ocl/src/initialization.cpp
+6
-0
matrix_operations.cpp
modules/ocl/src/matrix_operations.cpp
+1
-1
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+259
-13
pyrlk.cpp
modules/ocl/src/pyrlk.cpp
+33
-479
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
fd4a6f0a
...
...
@@ -155,7 +155,7 @@ namespace cv
static
Context
*
getContext
();
static
void
setContext
(
Info
&
oclinfo
);
enum
{
CL_DOUBLE
,
CL_UNIFIED_MEM
};
enum
{
CL_DOUBLE
,
CL_UNIFIED_MEM
,
CL_CPU
};
bool
supportsFeature
(
int
ftype
);
size_t
computeUnits
();
void
*
oclContext
();
...
...
modules/ocl/src/initialization.cpp
View file @
fd4a6f0a
...
...
@@ -979,6 +979,12 @@ namespace cv
return
impl
->
double_support
==
1
;
case
CL_UNIFIED_MEM
:
return
impl
->
unified_memory
==
1
;
case
CL_CPU
:
cl_device_type
devicetype
;
clGetDeviceInfo
(
impl
->
devices
[
impl
->
devnum
],
CL_DEVICE_TYPE
,
sizeof
(
cl_device_type
),
&
devicetype
,
NULL
);
return
devicetype
==
CVCL_DEVICE_TYPE_CPU
;
default
:
return
false
;
}
...
...
modules/ocl/src/matrix_operations.cpp
View file @
fd4a6f0a
...
...
@@ -394,7 +394,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
if
(
rtype
<
0
)
rtype
=
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
channels
());
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
ocl
channels
());
//int scn = channels();
int
sdepth
=
depth
(),
ddepth
=
CV_MAT_DEPTH
(
rtype
);
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
fd4a6f0a
...
...
@@ -184,6 +184,209 @@ float linearFilter_float(__global const float* src, int srcStep, int cn, float2
}
#
define
BUFFER
64
#
ifdef
CPU
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
float*
smem1,
__local
float*
smem2,
__local
float*
smem3,
int
tid
)
{
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
smem3[tid]
=
val3
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
if
BUFFER
>
128
if
(
tid
<
128
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
128]
;
smem2[tid]
=
val2
+=
smem2[tid
+
128]
;
smem3[tid]
=
val3
+=
smem3[tid
+
128]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
#
if
BUFFER
>
64
if
(
tid
<
64
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
64]
;
smem2[tid]
=
val2
+=
smem2[tid
+
64]
;
smem3[tid]
=
val3
+=
smem3[tid
+
64]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
tid
<
32
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
32]
;
smem2[tid]
=
val2
+=
smem2[tid
+
32]
;
smem3[tid]
=
val3
+=
smem3[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
16]
;
smem2[tid]
=
val2
+=
smem2[tid
+
16]
;
smem3[tid]
=
val3
+=
smem3[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
8]
;
smem2[tid]
=
val2
+=
smem2[tid
+
8]
;
smem3[tid]
=
val3
+=
smem3[tid
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
4]
;
smem2[tid]
=
val2
+=
smem2[tid
+
4]
;
smem3[tid]
=
val3
+=
smem3[tid
+
4]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
smem1[tid]
=
val1
+=
smem1[tid
+
2]
;
smem2[tid]
=
val2
+=
smem2[tid
+
2]
;
smem3[tid]
=
val3
+=
smem3[tid
+
2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
val1
+=
smem1[tid
+
1]
;
smem2[BUFFER]
=
val2
+=
smem2[tid
+
1]
;
smem3[BUFFER]
=
val3
+=
smem3[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
void
reduce2
(
float
val1,
float
val2,
volatile
__local
float*
smem1,
volatile
__local
float*
smem2,
int
tid
)
{
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
if
BUFFER
>
128
if
(
tid
<
128
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
128]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
128]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
#
if
BUFFER
>
64
if
(
tid
<
64
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
64]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
64]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
tid
<
32
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
32]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
32]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
16]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
16]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
8]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
8]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
4]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
4]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
2]
)
;
smem2[tid]
=
(
val2
+=
smem2[tid
+
2]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
(
val1
+=
smem1[tid
+
1]
)
;
smem2[BUFFER]
=
(
val2
+=
smem2[tid
+
1]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
void
reduce1
(
float
val1,
volatile
__local
float*
smem1,
int
tid
)
{
smem1[tid]
=
val1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
if
BUFFER
>
128
if
(
tid
<
128
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
128]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
#
if
BUFFER
>
64
if
(
tid
<
64
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
64]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
tid
<
32
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
32]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
16]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
8]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
4]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
smem1[tid]
=
(
val1
+=
smem1[tid
+
2]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
(
val1
+=
smem1[tid
+
1]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
#
else
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
float*
smem1,
__local
float*
smem2,
__local
float*
smem3,
int
tid
)
{
smem1[tid]
=
val1
;
...
...
@@ -325,6 +528,7 @@ void reduce1(float val1, __local float* smem1, int tid)
vmem1[tid]
=
val1
+=
vmem1[tid
+
1]
;
}
}
#
endif
#
define
SCALE
(
1.0f
/
(
1
<<
20
))
#
define
THRESHOLD
0.01f
...
...
@@ -411,14 +615,20 @@ void GetError4(image2d_t J, const float x, const float y, const float4* Pch, flo
*errval
+=
fabs
(
diff.x
)
+
fabs
(
diff.y
)
+
fabs
(
diff.z
)
;
}
#
define
GRIDSIZE
3
__kernel
void
lkSparse_C1_D5
(
image2d_t
I,
image2d_t
J,
__global
const
float2*
prevPts,
int
prevPtsStep,
__global
float2*
nextPts,
int
nextPtsStep,
__global
uchar*
status,
__global
float*
err,
const
int
level,
const
int
rows,
const
int
cols,
int
PATCH_X,
int
PATCH_Y,
int
cn,
int
c_winSize_x,
int
c_winSize_y,
int
c_iters,
char
calcErr
)
{
#
ifdef
CPU
__local
float
smem1[BUFFER+1]
;
__local
float
smem2[BUFFER+1]
;
__local
float
smem3[BUFFER+1]
;
#
else
__local
float
smem1[BUFFER]
;
__local
float
smem2[BUFFER]
;
__local
float
smem3[BUFFER]
;
#
endif
unsigned
int
xid=get_local_id
(
0
)
;
unsigned
int
yid=get_local_id
(
1
)
;
...
...
@@ -431,7 +641,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
const
int
tid
=
mad24
(
yid,
xsize,
xid
)
;
float2
prevPt
=
prevPts[gid]
/
(
1
<<
level
)
;
float2
prevPt
=
prevPts[gid]
/
(
float2
)(
1
<<
level
)
;
if
(
prevPt.x
<
0
|
| prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
...
...
@@ -450,9 +660,9 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
float A12 = 0;
float A22 = 0;
float I_patch[
3][3
];
float dIdx_patch[
3][3
];
float dIdy_patch[
3][3
];
float I_patch[
GRIDSIZE][GRIDSIZE
];
float dIdx_patch[
GRIDSIZE][GRIDSIZE
];
float dIdy_patch[
GRIDSIZE][GRIDSIZE
];
yBase=yid;
{
...
...
@@ -512,12 +722,19 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&A11, &A12, &A22);
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
A11 = smem1[BUFFER];
A12 = smem2[BUFFER];
A22 = smem3[BUFFER];
#else
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
#endif
float D = A11 * A22 - A12 * A12;
...
...
@@ -609,8 +826,13 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
b1 = smem1[BUFFER];
b2 = smem2[BUFFER];
#else
b1 = smem1[0];
b2 = smem2[0];
#endif
float2 delta;
delta.x = A12 * b2 - A22 * b1;
...
...
@@ -685,18 +907,28 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
nextPts[gid] = prevPt;
if (calcErr)
err[gid] = smem1[0] / (c_winSize_x * c_winSize_y);
#ifdef CPU
err[gid] = smem1[BUFFER] / (float)(c_winSize_x * c_winSize_y);
#else
err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
#endif
}
}
__kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
#ifdef CPU
__local float smem1[BUFFER+1];
__local float smem2[BUFFER+1];
__local float smem3[BUFFER+1];
#else
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
#endif
unsigned int xid=get_local_id(0);
unsigned int yid=get_local_id(1);
...
...
@@ -709,7 +941,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
const int tid = mad24(yid, xsize, xid);
float2 nextPt = prevPts[gid]/(1<<level);
float2 nextPt = prevPts[gid]/(
float2)(
1<<level);
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
{
...
...
@@ -725,9 +957,9 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
// extract the patch from the first image, compute covariation matrix of derivatives
float A11 = 0;
float A12 = 0;
float A22 = 0;
float A11 = 0
.0f
;
float A12 = 0
.0f
;
float A22 = 0
.0f
;
float4 I_patch[8];
float4 dIdx_patch[8];
...
...
@@ -797,9 +1029,15 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
A11 = smem1[BUFFER];
A12 = smem2[BUFFER];
A22 = smem3[BUFFER];
#else
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
#endif
float D = A11 * A22 - A12 * A12;
...
...
@@ -888,12 +1126,16 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
&b1, &b2);
}
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
b1 = smem1[BUFFER];
b2 = smem2[BUFFER];
#else
b1 = smem1[0];
b2 = smem2[0];
#endif
float2 delta;
delta.x = A12 * b2 - A22 * b1;
...
...
@@ -967,7 +1209,11 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
nextPts[gid] = nextPt;
if (calcErr)
err[gid] = smem1[0] / (3 * c_winSize_x * c_winSize_y);
#ifdef CPU
err[gid] = smem1[BUFFER] / (float)(3 * c_winSize_x * c_winSize_y);
#else
err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
#endif
}
}
...
...
modules/ocl/src/pyrlk.cpp
View file @
fd4a6f0a
...
...
@@ -16,7 +16,7 @@
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang,
yao@multicorewareinc
.com
// Yao Wang,
bitwangyaoyao@gmail
.com
// Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
...
...
@@ -47,6 +47,7 @@
#include "precomp.hpp"
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
ocl
;
...
...
@@ -58,11 +59,7 @@ namespace ocl
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
pyrlk
;
extern
const
char
*
pyrlk_no_image
;
extern
const
char
*
operator_setTo
;
extern
const
char
*
operator_convertTo
;
extern
const
char
*
operator_copyToM
;
extern
const
char
*
arithm_mul
;
extern
const
char
*
pyr_down
;
}
}
...
...
@@ -105,364 +102,7 @@ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDe
}
}
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static
void
convert_run_cus
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
alpha
,
double
beta
)
{
string
kernelName
=
"convert_to_S"
;
stringstream
idxStr
;
idxStr
<<
src
.
depth
();
kernelName
+=
idxStr
.
str
();
float
alpha_f
=
(
float
)
alpha
,
beta_f
=
(
float
)
beta
;
CV_DbgAssert
(
src
.
rows
==
dst
.
rows
&&
src
.
cols
==
dst
.
cols
);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
1
]
=
(
dst
.
rows
+
localThreads
[
1
]
-
1
)
/
localThreads
[
1
]
*
localThreads
[
1
];
globalThreads
[
2
]
=
1
;
int
dststep_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
int
srcstep_in_pixel
=
src
.
step
/
src
.
elemSize
(),
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
if
(
dst
.
type
()
==
CV_8UC1
)
{
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
])
/
localThreads
[
0
]
*
localThreads
[
0
];
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
srcstep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dststep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
alpha_f
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
beta_f
));
openCLExecuteKernel2
(
dst
.
clCxt
,
&
operator_convertTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
dst
.
oclchannels
(),
dst
.
depth
(),
CLFLUSH
);
}
void
convertTo
(
const
oclMat
&
src
,
oclMat
&
m
,
int
rtype
,
double
alpha
=
1
,
double
beta
=
0
);
void
convertTo
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
rtype
,
double
alpha
,
double
beta
)
{
//cout << "cv::ocl::oclMat::convertTo()" << endl;
bool
noScale
=
fabs
(
alpha
-
1
)
<
std
::
numeric_limits
<
double
>::
epsilon
()
&&
fabs
(
beta
)
<
std
::
numeric_limits
<
double
>::
epsilon
();
if
(
rtype
<
0
)
rtype
=
src
.
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
src
.
oclchannels
());
int
sdepth
=
src
.
depth
(),
ddepth
=
CV_MAT_DEPTH
(
rtype
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
src
.
copyTo
(
dst
);
return
;
}
oclMat
temp
;
const
oclMat
*
psrc
=
&
src
;
if
(
sdepth
!=
ddepth
&&
psrc
==
&
dst
)
psrc
=
&
(
temp
=
src
);
dst
.
create
(
src
.
size
(),
rtype
);
convert_run_cus
(
*
psrc
,
dst
,
alpha
,
beta
);
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// setTo ////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
//oclMat &operator = (const Scalar &s)
//{
// //cout << "cv::ocl::oclMat::=" << endl;
// setTo(s);
// return *this;
//}
static
void
set_to_withoutmask_run_cus
(
const
oclMat
&
dst
,
const
Scalar
&
scalar
,
string
kernelName
)
{
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
1
]
=
(
dst
.
rows
+
localThreads
[
1
]
-
1
)
/
localThreads
[
1
]
*
localThreads
[
1
];
globalThreads
[
2
]
=
1
;
int
step_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
offset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
if
(
dst
.
type
()
==
CV_8UC1
)
{
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
}
char
compile_option
[
32
];
union
sc
{
cl_uchar4
uval
;
cl_char4
cval
;
cl_ushort4
usval
;
cl_short4
shval
;
cl_int4
ival
;
cl_float4
fval
;
cl_double4
dval
;
}
val
;
switch
(
dst
.
depth
())
{
case
0
:
val
.
uval
.
s
[
0
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
0
]);
val
.
uval
.
s
[
1
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
1
]);
val
.
uval
.
s
[
2
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
2
]);
val
.
uval
.
s
[
3
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar
)
,
(
void
*
)
&
val
.
uval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar4
)
,
(
void
*
)
&
val
.
uval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
1
:
val
.
cval
.
s
[
0
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
0
]);
val
.
cval
.
s
[
1
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
1
]);
val
.
cval
.
s
[
2
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
2
]);
val
.
cval
.
s
[
3
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=char"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
)
,
(
void
*
)
&
val
.
cval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=char4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char4
)
,
(
void
*
)
&
val
.
cval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
2
:
val
.
usval
.
s
[
0
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
0
]);
val
.
usval
.
s
[
1
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
1
]);
val
.
usval
.
s
[
2
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
2
]);
val
.
usval
.
s
[
3
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort
)
,
(
void
*
)
&
val
.
usval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort4
)
,
(
void
*
)
&
val
.
usval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
3
:
val
.
shval
.
s
[
0
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
0
]);
val
.
shval
.
s
[
1
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
1
]);
val
.
shval
.
s
[
2
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
2
]);
val
.
shval
.
s
[
3
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=short"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short
)
,
(
void
*
)
&
val
.
shval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=short4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short4
)
,
(
void
*
)
&
val
.
shval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
4
:
val
.
ival
.
s
[
0
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
0
]);
val
.
ival
.
s
[
1
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
1
]);
val
.
ival
.
s
[
2
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
2
]);
val
.
ival
.
s
[
3
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=int"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
val
.
ival
.
s
[
0
]
));
break
;
case
2
:
sprintf
(
compile_option
,
"-D GENTYPE=int2"
);
cl_int2
i2val
;
i2val
.
s
[
0
]
=
val
.
ival
.
s
[
0
];
i2val
.
s
[
1
]
=
val
.
ival
.
s
[
1
];
args
.
push_back
(
make_pair
(
sizeof
(
cl_int2
)
,
(
void
*
)
&
i2val
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=int4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
val
.
ival
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
5
:
val
.
fval
.
s
[
0
]
=
(
float
)
scalar
.
val
[
0
];
val
.
fval
.
s
[
1
]
=
(
float
)
scalar
.
val
[
1
];
val
.
fval
.
s
[
2
]
=
(
float
)
scalar
.
val
[
2
];
val
.
fval
.
s
[
3
]
=
(
float
)
scalar
.
val
[
3
];
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=float"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
val
.
fval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=float4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float4
)
,
(
void
*
)
&
val
.
fval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
6
:
val
.
dval
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
dval
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
dval
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
dval
.
s
[
3
]
=
scalar
.
val
[
3
];
switch
(
dst
.
oclchannels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=double"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
)
,
(
void
*
)
&
val
.
dval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=double4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double4
)
,
(
void
*
)
&
val
.
dval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unknown depth"
);
}
#ifdef CL_VERSION_1_2
if
(
dst
.
offset
==
0
&&
dst
.
cols
==
dst
.
wholecols
)
{
clEnqueueFillBuffer
((
cl_command_queue
)
dst
.
clCxt
->
oclCommandQueue
(),
(
cl_mem
)
dst
.
data
,
args
[
0
].
second
,
args
[
0
].
first
,
0
,
dst
.
step
*
dst
.
rows
,
0
,
NULL
,
NULL
);
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset_in_pixel
));
openCLExecuteKernel2
(
dst
.
clCxt
,
&
operator_setTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
,
CLFLUSH
);
}
#else
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset_in_pixel
));
openCLExecuteKernel2
(
dst
.
clCxt
,
&
operator_setTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
,
CLFLUSH
);
#endif
}
static
oclMat
&
setTo
(
oclMat
&
src
,
const
Scalar
&
scalar
)
{
CV_Assert
(
src
.
depth
()
>=
0
&&
src
.
depth
()
<=
6
);
CV_DbgAssert
(
!
src
.
empty
());
if
(
src
.
type
()
==
CV_8UC1
)
{
set_to_withoutmask_run_cus
(
src
,
scalar
,
"set_to_without_mask_C1_D0"
);
}
else
{
set_to_withoutmask_run_cus
(
src
,
scalar
,
"set_to_without_mask"
);
}
return
src
;
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// static void copy_to_with_mask_cus(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
// {
// CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
// src.rows == dst.rows && src.cols == dst.cols
// && mask.type() == CV_8UC1);
// vector<pair<size_t , const void *> > args;
// std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
// {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
// {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
// {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
// };
// char compile_option[32];
// sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
// size_t localThreads[3] = {16, 16, 1};
// size_t globalThreads[3];
// globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
// globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
// globalThreads[2] = 1;
// int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
// int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
// args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
// args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
// args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
// args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
// openCLExecuteKernel2(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
// localThreads, args, -1, -1, compile_option, CLFLUSH);
// }
static
void
copyTo
(
const
oclMat
&
src
,
oclMat
&
m
)
{
CV_DbgAssert
(
!
src
.
empty
());
m
.
create
(
src
.
size
(),
src
.
type
());
openCLCopyBuffer2D
(
src
.
clCxt
,
m
.
data
,
m
.
step
,
m
.
offset
,
src
.
data
,
src
.
step
,
src
.
cols
*
src
.
elemSize
(),
src
.
rows
,
src
.
offset
);
}
// static void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask)
// {
// if (mask.empty())
// {
// copyTo(src, mat);
// }
// else
// {
// mat.create(src.size(), src.type());
// copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask");
// }
// }
static
void
arithmetic_run
(
const
oclMat
&
src1
,
oclMat
&
dst
,
string
kernelName
,
const
char
**
kernelString
,
void
*
_scalar
)
static
void
multiply_cus
(
const
oclMat
&
src1
,
oclMat
&
dst
,
float
scalar
)
{
if
(
!
src1
.
clCxt
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
src1
.
type
()
==
CV_64F
)
{
...
...
@@ -470,9 +110,6 @@ static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, c
return
;
}
//dst.create(src1.size(), src1.type());
//CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
// src1.rows == src2.rows && src2.rows == dst.rows);
CV_Assert
(
src1
.
cols
==
dst
.
cols
&&
src1
.
rows
==
dst
.
rows
);
...
...
@@ -480,24 +117,8 @@ static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, c
CV_Assert
(
src1
.
depth
()
!=
CV_8S
);
Context
*
clCxt
=
src1
.
clCxt
;
//int channels = dst.channels();
//int depth = dst.depth();
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1}
//};
//size_t vector_length = vector_lengths[channels-1][depth];
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
//int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
//size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
// divUp(dst.rows, localThreads[1]) * localThreads[1],
// 1
// };
size_t
globalThreads
[
3
]
=
{
src1
.
cols
,
src1
.
rows
,
1
...
...
@@ -508,67 +129,20 @@ static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, c
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
//args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
args
.
push_back
(
make_pair
(
sizeof
(
float
),
(
float
*
)
&
scalar
));
//if(_scalar != NULL)
//{
float
scalar1
=
*
((
float
*
)
_scalar
);
args
.
push_back
(
make_pair
(
sizeof
(
float
),
(
float
*
)
&
scalar1
));
//}
openCLExecuteKernel2
(
clCxt
,
kernelString
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
src1
.
depth
(),
CLFLUSH
);
}
static
void
multiply_cus
(
const
oclMat
&
src1
,
oclMat
&
dst
,
float
scalar
)
{
arithmetic_run
(
src1
,
dst
,
"arithm_muls"
,
&
arithm_mul
,
(
void
*
)(
&
scalar
));
}
static
void
pyrdown_run_cus
(
const
oclMat
&
src
,
const
oclMat
&
dst
)
{
CV_Assert
(
src
.
type
()
==
dst
.
type
());
CV_Assert
(
src
.
depth
()
!=
CV_8S
);
Context
*
clCxt
=
src
.
clCxt
;
string
kernelName
=
"pyrDown"
;
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
dst
.
rows
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
openCLExecuteKernel2
(
clCxt
,
&
pyr_down
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
oclchannels
(),
src
.
depth
(),
CLFLUSH
);
}
static
void
pyrDown_cus
(
const
oclMat
&
src
,
oclMat
&
dst
)
{
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
dst
.
create
((
src
.
rows
+
1
)
/
2
,
(
src
.
cols
+
1
)
/
2
,
src
.
type
());
pyrdown_run_cus
(
src
,
dst
);
openCLExecuteKernel
(
clCxt
,
&
arithm_mul
,
"arithm_muls"
,
globalThreads
,
localThreads
,
args
,
-
1
,
src1
.
depth
());
}
static
void
lkSparse_run
(
oclMat
&
I
,
oclMat
&
J
,
const
oclMat
&
prevPts
,
oclMat
&
nextPts
,
oclMat
&
status
,
oclMat
&
err
,
bool
/*GET_MIN_EIGENVALS*/
,
int
ptcount
,
int
level
,
/*dim3 block, */
dim3
patch
,
Size
winSize
,
int
iters
)
int
level
,
dim3
patch
,
Size
winSize
,
int
iters
)
{
Context
*
clCxt
=
I
.
clCxt
;
int
elemCntPerRow
=
I
.
step
/
I
.
elemSize
();
...
...
@@ -613,15 +187,24 @@ static void lkSparse_run(oclMat &I, oclMat &J,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
iters
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
calcErr
));
if
(
clCxt
->
supportsFeature
(
Context
::
CL_CPU
))
{
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
(
char
*
)
" -D CPU"
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
else
{
if
(
isImageSupported
)
{
openCLExecuteKernel2
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
CLFLUSH
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
()
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
else
{
openCLExecuteKernel2
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
CLFLUSH
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
());
}
}
}
...
...
@@ -631,7 +214,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
{
nextPts
.
release
();
status
.
release
();
//
if (err) err->release();
if
(
err
)
err
->
release
();
return
;
}
...
...
@@ -657,13 +240,11 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
oclMat
temp1
=
(
useInitialFlow
?
nextPts
:
prevPts
).
reshape
(
1
);
oclMat
temp2
=
nextPts
.
reshape
(
1
);
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
multiply_cus
(
temp1
,
temp2
,
1.0
f
/
(
1
<<
maxLevel
)
/
2.0
f
);
//::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_8UC1
,
status
);
//status.setTo(Scalar::all(1));
setTo
(
status
,
Scalar
::
all
(
1
));
status
.
setTo
(
Scalar
::
all
(
1
));
bool
errMat
=
false
;
if
(
!
err
)
...
...
@@ -673,7 +254,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
}
else
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
//ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err);
// build the image pyramids.
...
...
@@ -682,25 +262,14 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
if
(
cn
==
1
||
cn
==
4
)
{
//prevImg.convertTo(prevPyr_[0], CV_32F);
//nextImg.convertTo(nextPyr_[0], CV_32F);
convertTo
(
prevImg
,
prevPyr_
[
0
],
CV_32F
);
convertTo
(
nextImg
,
nextPyr_
[
0
],
CV_32F
);
}
else
{
//oclMat buf_;
// cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
// buf_.convertTo(prevPyr_[0], CV_32F);
// cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
// buf_.convertTo(nextPyr_[0], CV_32F);
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
}
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
pyrDown
_cus
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
_cus
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
// dI/dx ~ Ix, dI/dy ~ Iy
...
...
@@ -709,11 +278,9 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
{
lkSparse_run
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
,
nextPts
,
status
,
*
err
,
getMinEigenVals
,
prevPts
.
cols
,
level
,
/*block, */
patch
,
winSize
,
iters
);
level
,
patch
,
winSize
,
iters
);
}
clFinish
((
cl_command_queue
)
prevImg
.
clCxt
->
oclCommandQueue
());
if
(
errMat
)
delete
err
;
}
...
...
@@ -754,11 +321,6 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
JTex
=
(
cl_mem
)
J
.
data
;
}
//int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
//const int patchWidth = 16 + 2 * halfWin.x;
//const int patchHeight = 16 + 2 * halfWin.y;
//size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
ITex
));
...
...
@@ -787,15 +349,14 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
if
(
isImageSupported
)
{
openCLExecuteKernel
2
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
CLFLUSH
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
()
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
else
{
//printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
openCLExecuteKernel2
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
CLFLUSH
);
openCLExecuteKernel
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
());
}
}
...
...
@@ -813,23 +374,20 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
nextPyr_
.
resize
(
maxLevel
+
1
);
prevPyr_
[
0
]
=
prevImg
;
//nextImg.convertTo(nextPyr_[0], CV_32F);
convertTo
(
nextImg
,
nextPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
pyrDown
_cus
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
_cus
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
0
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
uPyr_
[
1
]);
ensureSizeIsEnough
(
prevImg
.
size
(),
CV_32FC1
,
vPyr_
[
1
]);
//uPyr_[1].setTo(Scalar::all(0));
//vPyr_[1].setTo(Scalar::all(0));
setTo
(
uPyr_
[
1
],
Scalar
::
all
(
0
));
setTo
(
vPyr_
[
1
],
Scalar
::
all
(
0
));
uPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
vPyr_
[
1
].
setTo
(
Scalar
::
all
(
0
));
Size
winSize2i
(
winSize
.
width
,
winSize
.
height
);
...
...
@@ -846,10 +404,6 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
idx
=
idx2
;
}
//uPyr_[idx].copyTo(u);
//vPyr_[idx].copyTo(v);
copyTo
(
uPyr_
[
idx
],
u
);
copyTo
(
vPyr_
[
idx
],
v
);
clFinish
((
cl_command_queue
)
prevImg
.
clCxt
->
oclCommandQueue
());
uPyr_
[
idx
].
copyTo
(
u
);
vPyr_
[
idx
].
copyTo
(
v
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment