Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
09359982
Commit
09359982
authored
Sep 24, 2012
by
bitwangyaoyao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some optimizations to ocl::pyrDown, PyrLK and Canny
parent
494ae156
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
1116 additions
and
484 deletions
+1116
-484
canny.cpp
modules/ocl/src/canny.cpp
+8
-7
hog.cpp
modules/ocl/src/hog.cpp
+9
-9
pyr_down.cl
modules/ocl/src/kernels/pyr_down.cl
+351
-221
pyrlk.cl
modules/ocl/src/kernels/pyrlk.cl
+19
-0
mcwutil.cpp
modules/ocl/src/mcwutil.cpp
+130
-0
mcwutil.hpp
modules/ocl/src/mcwutil.hpp
+74
-0
pyrdown.cpp
modules/ocl/src/pyrdown.cpp
+1
-42
pyrlk.cpp
modules/ocl/src/pyrlk.cpp
+521
-202
test_pyrlk.cpp
modules/ocl/test/test_pyrlk.cpp
+3
-3
No files found.
modules/ocl/src/canny.cpp
View file @
09359982
...
...
@@ -45,6 +45,7 @@
#include <iomanip>
#include "precomp.hpp"
#include "mcwutil.hpp"
using
namespace
cv
;
using
namespace
cv
::
ocl
;
...
...
@@ -237,7 +238,7 @@ void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_b
size_t
globalThreads
[
3
]
=
{
cols
,
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
canny
::
calcMagnitude_gpu
(
const
oclMat
&
dx_buf
,
const
oclMat
&
dy_buf
,
oclMat
&
dx
,
oclMat
&
dy
,
oclMat
&
mag
,
int
rows
,
int
cols
,
bool
L2Grad
)
...
...
@@ -272,7 +273,7 @@ void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat
{
strcat
(
build_options
,
"-D L2GRAD"
);
}
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
}
void
canny
::
calcMagnitude_gpu
(
const
oclMat
&
dx
,
const
oclMat
&
dy
,
oclMat
&
mag
,
int
rows
,
int
cols
,
bool
L2Grad
)
{
...
...
@@ -300,7 +301,7 @@ void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, i
{
strcat
(
build_options
,
"-D L2GRAD"
);
}
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
build_options
);
}
void
canny
::
calcMap_gpu
(
oclMat
&
dx
,
oclMat
&
dy
,
oclMat
&
mag
,
oclMat
&
map
,
int
rows
,
int
cols
,
float
low_thresh
,
float
high_thresh
)
...
...
@@ -331,7 +332,7 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
string
kernelName
=
"calcMap"
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
canny
::
edgesHysteresisLocal_gpu
(
oclMat
&
map
,
oclMat
&
st1
,
void
*
counter
,
int
rows
,
int
cols
)
...
...
@@ -351,7 +352,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, i
size_t
globalThreads
[
3
]
=
{
cols
,
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
canny
::
edgesHysteresisGlobal_gpu
(
oclMat
&
map
,
oclMat
&
st1
,
oclMat
&
st2
,
void
*
counter
,
int
rows
,
int
cols
)
...
...
@@ -381,7 +382,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, voi
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
map
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
map
.
offset
));
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
DISABLE
);
openCLSafeCall
(
clEnqueueReadBuffer
(
Context
::
getContext
()
->
impl
->
clCmdQueue
,
(
cl_mem
)
counter
,
1
,
0
,
sizeof
(
int
),
&
count
,
0
,
NULL
,
NULL
));
std
::
swap
(
st1
,
st2
);
}
...
...
@@ -406,7 +407,7 @@ void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols)
size_t
globalThreads
[
3
]
=
{
cols
,
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
imgproc_canny
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
#endif // HAVE_OPENCL
modules/ocl/src/hog.cpp
View file @
09359982
...
...
@@ -44,7 +44,7 @@
//M*/
#include "precomp.hpp"
#include "mcwutil.hpp"
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
std
;
...
...
@@ -1613,7 +1613,7 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
block_hists
.
data
));
args
.
push_back
(
make_pair
(
smem
,
(
void
*
)
NULL
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
cv
::
ocl
::
device
::
hog
::
normalize_hists
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
...
...
@@ -1641,7 +1641,7 @@ void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int bl
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
threshold
));
args
.
push_back
(
make_pair
(
nthreads
*
sizeof
(
float
),
(
void
*
)
NULL
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
cv
::
ocl
::
device
::
hog
::
classify_hists
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
...
...
@@ -1675,7 +1675,7 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
threshold
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
labels
.
data
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
cv
::
ocl
::
device
::
hog
::
extract_descrs_by_rows
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
...
...
@@ -1706,7 +1706,7 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
block_hists
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
cv
::
ocl
::
device
::
hog
::
extract_descrs_by_cols
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
...
...
@@ -1738,7 +1738,7 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
block_hists
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
static
inline
int
divUp
(
int
total
,
int
grain
)
...
...
@@ -1772,7 +1772,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
correctGamma
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cnbins
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
cv
::
ocl
::
device
::
hog
::
compute_gradients_8UC4
(
int
height
,
int
width
,
const
cv
::
ocl
::
oclMat
&
img
,
...
...
@@ -1802,7 +1802,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
correctGamma
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cnbins
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
cv
::
ocl
::
device
::
hog
::
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
const
Size
sz
)
...
...
@@ -1834,7 +1834,7 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
2
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
#endif
modules/ocl/src/kernels/pyr_down.cl
View file @
09359982
...
...
@@ -43,14 +43,9 @@
//
//M*/
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
//
#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
uchar
round_uchar_uchar
(
uchar
v
)
{
return
v
;
}
uchar
round_uchar_int
(
int
v
)
{
return
(
uchar
)((
uint
)
v
<=
255
?
v
:
v
>
0
?
255
:
0
)
;
...
...
@@ -58,13 +53,7 @@ uchar round_uchar_int(int v)
uchar
round_uchar_float
(
float
v
)
{
int
iv
=
convert_int_sat_rte
(
v
)
;
return
round_uchar_int
(
iv
)
;
}
uchar4
round_uchar4_uchar4
(
uchar4
v
)
{
return
v
;
return
round_uchar_int
(
convert_int_sat_rte
(
v
))
;
}
uchar4
round_uchar4_int4
(
int4
v
)
...
...
@@ -79,52 +68,45 @@ uchar4 round_uchar4_int4(int4 v)
uchar4
round_uchar4_float4
(
float4
v
)
{
int4
iv
=
convert_int4_sat_rte
(
v
)
;
return
round_uchar4_int4
(
iv
)
;
return
round_uchar4_int4
(
convert_int4_sat_rte
(
v
))
;
}
int
idx_row_low
(
int
y,
int
last_row
)
{
return
abs
(
y
)
%
(
last_row
+
1
)
;
}
int
idx_row_high
(
int
y,
int
last_row
)
{
int
i=abs_diff
(
y,last_row
)
;
int
j=abs_diff
(
i,last_row
)
;
return
j
%
(
last_row
+
1
)
;
}
int
idx_row
(
int
y,
int
last_row
)
{
return
idx_row_low
(
idx_row_high
(
y,
last_row
)
,
last_row
)
;
}
int
idx_col_low
(
int
x,
int
last_col
)
{
return
abs
(
x
)
%
(
last_col
+
1
)
;
}
int
idx_col_high
(
int
x,
int
last_col
)
{
int
i=abs_diff
(
x,last_col
)
;
int
j=abs_diff
(
i,last_col
)
;
return
j
%
(
last_col
+
1
)
;
}
int
idx_col
(
int
x,
int
last_col
)
{
return
idx_col_low
(
idx_col_high
(
x,
last_col
)
,
last_col
)
;
}
__kernel
void
pyrDown_C1_D0
(
__global
uchar
*
srcData,
int
srcStep,
int
srcOffset,
int
srcRows,
int
srcCols,
__global
uchar
*dst,
int
dstStep,
int
dstOffset,
int
dstCols
)
int
idx_row_low
(
int
y,
int
last_row
)
{
return
abs
(
y
)
%
(
last_row
+
1
)
;
}
int
idx_row_high
(
int
y,
int
last_row
)
{
return
abs
(
last_row
-
(
int
)
abs
(
last_row
-
y
))
%
(
last_row
+
1
)
;
}
int
idx_row
(
int
y,
int
last_row
)
{
return
idx_row_low
(
idx_row_high
(
y,
last_row
)
,
last_row
)
;
}
int
idx_col_low
(
int
x,
int
last_col
)
{
return
abs
(
x
)
%
(
last_col
+
1
)
;
}
int
idx_col_high
(
int
x,
int
last_col
)
{
return
abs
(
last_col
-
(
int
)
abs
(
last_col
-
x
))
%
(
last_col
+
1
)
;
}
int
idx_col
(
int
x,
int
last_col
)
{
return
idx_col_low
(
idx_col_high
(
x,
last_col
)
,
last_col
)
;
}
__kernel
void
pyrDown_C1_D0
(
__global
uchar
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
uchar
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_g
roup_id
(
0
)
*
get_local_size
(
0
)
+
get_loc
al_id
(
0
)
;
const
int
x
=
get_g
lob
al_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
...
...
@@ -135,44 +117,83 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
sum
=
0.0625f
*
(((
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
(
src_y
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
(((
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
(((
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
if
(
get_local_id
(
0
)
>
253
)
else
{
const
int
right_x
=
x
+
2
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -181,9 +202,7 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
...
...
@@ -196,9 +215,9 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
}
}
__kernel
void
pyrDown_C4_D0
(
__global
uchar4
*
srcData,
int
srcStep,
int
src
Offset,
int
srcRows,
int
srcCols,
__global
uchar4
*dst,
int
dstStep,
int
dstOffset
,
int
dstCols
)
__kernel
void
pyrDown_C4_D0
(
__global
uchar4
*
srcData,
int
srcStep,
int
src
Rows,
int
srcCols,
__global
uchar4
*dst,
int
dstStep
,
int
dstCols
)
{
const
int
x
=
get_g
roup_id
(
0
)
*
get_local_size
(
0
)
+
get_loc
al_id
(
0
)
;
const
int
x
=
get_g
lob
al_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
...
...
@@ -209,48 +228,87 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
sum
=
0
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
))
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
float4
co1
=
0.375f
;//(float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4
co2
=
0.25f
;//(float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4
co3
=
0.0625f
;//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
))
;
smem[get_local_id
(
0
)
]
=
sum
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convert_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
))
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
))
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
))
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
if
(
get_local_id
(
0
)
>
253
)
else
{
const
int
right_x
=
x
+
2
;
sum
=
0
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
))
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -259,9 +317,7 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0
;
sum
=
sum
+
co3
*
smem[2
+
tid2
-
2]
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
...
...
@@ -274,9 +330,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
}
}
__kernel
void
pyrDown_C1_D5
(
__global
float
*
srcData,
int
srcStep,
int
src
Offset,
int
srcRows,
int
srcCols,
__global
float
*dst,
int
dstStep,
int
dstOffset
,
int
dstCols
)
__kernel
void
pyrDown_C1_D5
(
__global
float
*
srcData,
int
srcStep,
int
src
Rows,
int
srcCols,
__global
float
*dst,
int
dstStep
,
int
dstCols
)
{
const
int
x
=
get_g
roup_id
(
0
)
*
get_local_size
(
0
)
+
get_loc
al_id
(
0
)
;
const
int
x
=
get_g
lob
al_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
...
...
@@ -287,44 +343,83 @@ __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[idx_col
(
x,
last_col
)
]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[idx_col
(
left_x,
last_col
)
]
;
smem[get_local_id
(
0
)
]
=
sum
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
if
(
get_local_id
(
0
)
>
253
)
else
{
const
int
right_x
=
x
+
2
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[idx_col
(
right_x,
last_col
)
]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -333,9 +428,7 @@ __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
...
...
@@ -348,9 +441,9 @@ __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset
}
}
__kernel
void
pyrDown_C4_D5
(
__global
float4
*
srcData,
int
srcStep,
int
src
Offset,
int
srcRows,
int
srcCols,
__global
float4
*dst,
int
dstStep,
int
dstOffset
,
int
dstCols
)
__kernel
void
pyrDown_C4_D5
(
__global
float4
*
srcData,
int
srcStep,
int
src
Rows,
int
srcCols,
__global
float4
*dst,
int
dstStep
,
int
dstCols
)
{
const
int
x
=
get_g
roup_id
(
0
)
*
get_local_size
(
0
)
+
get_loc
al_id
(
0
)
;
const
int
x
=
get_g
lob
al_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
...
...
@@ -361,48 +454,87 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffse
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
sum
=
0
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
x,
last_col
)
]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
float4
co1
=
0.375f
;//(float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4
co2
=
0.25f
;//(float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4
co3
=
0.0625f
;//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
left_x,
last_col
)
]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[idx_col
(
right_x,
last_col
)
]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -411,9 +543,7 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffse
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0
;
sum
=
sum
+
co3
*
smem[2
+
tid2
-
2]
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
...
...
modules/ocl/src/kernels/pyrlk.cl
View file @
09359982
...
...
@@ -45,6 +45,25 @@
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
__kernel
void
arithm_muls_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
float
scalar
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
float
data1
=
*
((
__global
float
*
)((
__global
char
*
)
src1
+
src1_index
))
;
float
tmp
=
data1
*
scalar
;
*
((
__global
float
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
__kernel
void
calcSharrDeriv_vertical_C1_D0
(
__global
const
uchar*
src,
int
srcStep,
int
rows,
int
cols,
int
cn,
__global
short*
dx_buf,
int
dx_bufStep,
__global
short*
dy_buf,
int
dy_bufStep
)
{
...
...
modules/ocl/src/mcwutil.cpp
0 → 100644
View file @
09359982
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "mcwutil.hpp"
#if defined (HAVE_OPENCL)
using
namespace
std
;
namespace
cv
{
namespace
ocl
{
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
// provide additional methods for the user to interact with the command queue after a task is fired
void
openCLExecuteKernel_2
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
char
*
build_options
,
FLUSH_MODE
finish_mode
)
{
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
stringstream
idxStr
;
if
(
channels
!=
-
1
)
idxStr
<<
"_C"
<<
channels
;
if
(
depth
!=
-
1
)
idxStr
<<
"_D"
<<
depth
;
kernelName
+=
idxStr
.
str
();
cl_kernel
kernel
;
kernel
=
openCLGetKernelFromSource
(
clCxt
,
source
,
kernelName
,
build_options
);
if
(
localThreads
!=
NULL
)
{
globalThreads
[
0
]
=
divUp
(
globalThreads
[
0
],
localThreads
[
0
])
*
localThreads
[
0
];
globalThreads
[
1
]
=
divUp
(
globalThreads
[
1
],
localThreads
[
1
])
*
localThreads
[
1
];
globalThreads
[
2
]
=
divUp
(
globalThreads
[
2
],
localThreads
[
2
])
*
localThreads
[
2
];
size_t
blockSize
=
localThreads
[
0
]
*
localThreads
[
1
]
*
localThreads
[
2
];
cv
::
ocl
::
openCLVerifyKernel
(
clCxt
,
kernel
,
&
blockSize
,
globalThreads
,
localThreads
);
}
for
(
int
i
=
0
;
i
<
args
.
size
();
i
++
)
openCLSafeCall
(
clSetKernelArg
(
kernel
,
i
,
args
[
i
].
first
,
args
[
i
].
second
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
clCxt
->
impl
->
clCmdQueue
,
kernel
,
3
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
switch
(
finish_mode
)
{
case
CLFINISH
:
clFinish
(
clCxt
->
impl
->
clCmdQueue
);
case
CLFLUSH
:
clFlush
(
clCxt
->
impl
->
clCmdQueue
);
break
;
case
DISABLE
:
default
:
break
;
}
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
void
openCLExecuteKernel2
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
FLUSH_MODE
finish_mode
)
{
openCLExecuteKernel2
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
,
NULL
,
finish_mode
);
}
void
openCLExecuteKernel2
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
char
*
build_options
,
FLUSH_MODE
finish_mode
)
{
openCLExecuteKernel_2
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
,
build_options
,
finish_mode
);
}
}
//namespace ocl
}
//namespace cv
#endif
\ No newline at end of file
modules/ocl/src/mcwutil.hpp
0 → 100644
View file @
09359982
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _OPENCV_MCWUTIL_
#define _OPENCV_MCWUTIL_
#include "precomp.hpp"
#if defined (HAVE_OPENCL)
using
namespace
std
;
namespace
cv
{
namespace
ocl
{
enum
FLUSH_MODE
{
CLFINISH
=
0
,
CLFLUSH
,
DISABLE
};
void
openCLExecuteKernel2
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
FLUSH_MODE
finish_mode
=
DISABLE
);
void
openCLExecuteKernel2
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
char
*
build_options
,
FLUSH_MODE
finish_mode
=
DISABLE
);
}
//namespace ocl
}
//namespace cv
#endif // HAVE_OPENCL
#endif //_OPENCV_MCWUTIL_
modules/ocl/src/pyrdown.cpp
View file @
09359982
...
...
@@ -66,7 +66,6 @@ namespace cv
//////////////////////////////////////////////////////////////////////////////
/////////////////////// add subtract multiply divide /////////////////////////
//////////////////////////////////////////////////////////////////////////////
template
<
typename
T
>
void
pyrdown_run
(
const
oclMat
&
src
,
const
oclMat
&
dst
)
{
...
...
@@ -95,52 +94,14 @@ void pyrdown_run(const oclMat &src, const oclMat &dst)
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
openCLExecuteKernel
(
clCxt
,
&
pyr_down
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
channels
(),
src
.
depth
());
}
void
pyrdown_run
(
const
oclMat
&
src
,
const
oclMat
&
dst
)
{
switch
(
src
.
depth
())
{
case
0
:
pyrdown_run
<
unsigned
char
>
(
src
,
dst
);
break
;
case
1
:
pyrdown_run
<
char
>
(
src
,
dst
);
break
;
case
2
:
pyrdown_run
<
unsigned
short
>
(
src
,
dst
);
break
;
case
3
:
pyrdown_run
<
short
>
(
src
,
dst
);
break
;
case
4
:
pyrdown_run
<
int
>
(
src
,
dst
);
break
;
case
5
:
pyrdown_run
<
float
>
(
src
,
dst
);
break
;
case
6
:
pyrdown_run
<
double
>
(
src
,
dst
);
break
;
default
:
break
;
}
}
//////////////////////////////////////////////////////////////////////////////
// pyrDown
...
...
@@ -148,11 +109,9 @@ void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
{
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
//src.step = src.rows;
dst
.
create
((
src
.
rows
+
1
)
/
2
,
(
src
.
cols
+
1
)
/
2
,
src
.
type
());
dst
.
download_channels
=
src
.
download_channels
;
dst
.
download_channels
=
src
.
download_channels
;
pyrdown_run
(
src
,
dst
);
}
...
...
modules/ocl/src/pyrlk.cpp
View file @
09359982
...
...
@@ -41,7 +41,7 @@
//M*/
#include "precomp.hpp"
#include "mcwutil.hpp"
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
ocl
;
...
...
@@ -59,7 +59,10 @@ namespace cv
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
pyrlk
;
extern
const
char
*
operator_setTo
;
extern
const
char
*
operator_convertTo
;
extern
const
char
*
arithm_mul
;
extern
const
char
*
pyr_down
;
}
}
...
...
@@ -78,103 +81,6 @@ struct int2
int
x
,
y
;
};
void
calcSharrDeriv_run
(
const
oclMat
&
src
,
oclMat
&
dx_buf
,
oclMat
&
dy_buf
,
oclMat
&
dIdx
,
oclMat
&
dIdy
,
int
cn
)
{
Context
*
clCxt
=
src
.
clCxt
;
string
kernelName
=
"calcSharrDeriv_vertical"
;
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dx_buf
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dx_buf
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dy_buf
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dy_buf
.
step
));
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
channels
(),
src
.
depth
());
kernelName
=
"calcSharrDeriv_horizontal"
;
vector
<
pair
<
size_t
,
const
void
*>
>
args2
;
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cn
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dx_buf
.
data
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dx_buf
.
step
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dy_buf
.
data
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dy_buf
.
step
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dIdx
.
data
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dIdx
.
step
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dIdy
.
data
));
args2
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dIdy
.
step
));
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args2
,
src
.
channels
(),
src
.
depth
());
}
void
cv
::
ocl
::
PyrLKOpticalFlow
::
calcSharrDeriv
(
const
oclMat
&
src
,
oclMat
&
dIdx
,
oclMat
&
dIdy
)
{
CV_Assert
(
src
.
rows
>
1
&&
src
.
cols
>
1
);
CV_Assert
(
src
.
depth
()
==
CV_8U
);
const
int
cn
=
src
.
channels
();
ensureSizeIsEnough
(
src
.
size
(),
CV_MAKETYPE
(
CV_16S
,
cn
),
dx_calcBuf_
);
ensureSizeIsEnough
(
src
.
size
(),
CV_MAKETYPE
(
CV_16S
,
cn
),
dy_calcBuf_
);
calcSharrDeriv_run
(
src
,
dx_calcBuf_
,
dy_calcBuf_
,
dIdx
,
dIdy
,
cn
);
}
void
cv
::
ocl
::
PyrLKOpticalFlow
::
buildImagePyramid
(
const
oclMat
&
img0
,
vector
<
oclMat
>&
pyr
,
bool
withBorder
)
{
pyr
.
resize
(
maxLevel
+
1
);
Size
sz
=
img0
.
size
();
Mat
img0Temp
;
img0
.
download
(
img0Temp
);
Mat
pyrTemp
;
oclMat
o
;
for
(
int
level
=
0
;
level
<=
maxLevel
;
++
level
)
{
oclMat
temp
;
if
(
withBorder
)
{
temp
.
create
(
sz
.
height
+
winSize
.
height
*
2
,
sz
.
width
+
winSize
.
width
*
2
,
img0
.
type
());
}
else
{
ensureSizeIsEnough
(
sz
,
img0
.
type
(),
pyr
[
level
]);
}
if
(
level
==
0
)
pyr
[
level
]
=
img0Temp
;
else
pyrDown
(
pyr
[
level
-
1
],
pyr
[
level
]);
if
(
withBorder
)
copyMakeBorder
(
pyr
[
level
],
temp
,
winSize
.
height
,
winSize
.
height
,
winSize
.
width
,
winSize
.
width
,
BORDER_REFLECT_101
);
sz
=
Size
((
sz
.
width
+
1
)
/
2
,
(
sz
.
height
+
1
)
/
2
);
if
(
sz
.
width
<=
winSize
.
width
||
sz
.
height
<=
winSize
.
height
)
{
maxLevel
=
level
;
break
;
}
}
}
namespace
{
void
calcPatchSize
(
cv
::
Size
winSize
,
int
cn
,
dim3
&
block
,
dim3
&
patch
,
bool
isDeviceArch11
)
...
...
@@ -199,110 +105,507 @@ namespace
}
}
struct
MultiplyScalar
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
void
convert_run_cus
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
alpha
,
double
beta
)
{
MultiplyScalar
(
double
val_
,
double
scale_
)
:
val
(
val_
),
scale
(
scale_
)
{}
double
operator
()(
double
a
)
const
string
kernelName
=
"convert_to_S"
;
stringstream
idxStr
;
idxStr
<<
src
.
depth
();
kernelName
+=
idxStr
.
str
();
float
alpha_f
=
(
float
)
alpha
,
beta_f
=
(
float
)
beta
;
CV_DbgAssert
(
src
.
rows
==
dst
.
rows
&&
src
.
cols
==
dst
.
cols
);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
1
]
=
(
dst
.
rows
+
localThreads
[
1
]
-
1
)
/
localThreads
[
1
]
*
localThreads
[
1
];
globalThreads
[
2
]
=
1
;
int
dststep_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
int
srcstep_in_pixel
=
src
.
step
/
src
.
elemSize
(),
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
if
(
dst
.
type
()
==
CV_8UC1
)
{
return
(
scale
*
a
*
val
)
;
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
])
/
localThreads
[
0
]
*
localThreads
[
0
]
;
}
const
double
val
;
const
double
scale
;
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
srcstep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dststep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
alpha_f
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
beta_f
));
openCLExecuteKernel2
(
dst
.
clCxt
,
&
operator_convertTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
dst
.
channels
(),
dst
.
depth
(),
CLFLUSH
);
}
void
convertTo
(
const
oclMat
&
src
,
oclMat
&
m
,
int
rtype
,
double
alpha
=
1
,
double
beta
=
0
);
void
convertTo
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
rtype
,
double
alpha
,
double
beta
)
{
//cout << "cv::ocl::oclMat::convertTo()" << endl;
void
callF
(
const
oclMat
&
src
,
oclMat
&
dst
,
MultiplyScalar
op
,
int
mask
)
bool
noScale
=
fabs
(
alpha
-
1
)
<
std
::
numeric_limits
<
double
>::
epsilon
()
&&
fabs
(
beta
)
<
std
::
numeric_limits
<
double
>::
epsilon
();
if
(
rtype
<
0
)
rtype
=
src
.
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
src
.
channels
());
int
sdepth
=
src
.
depth
(),
ddepth
=
CV_MAT_DEPTH
(
rtype
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
src
.
copyTo
(
dst
);
return
;
}
oclMat
temp
;
const
oclMat
*
psrc
=
&
src
;
if
(
sdepth
!=
ddepth
&&
psrc
==
&
dst
)
psrc
=
&
(
temp
=
src
);
dst
.
create
(
src
.
size
(),
rtype
);
convert_run_cus
(
*
psrc
,
dst
,
alpha
,
beta
);
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// setTo ////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
//oclMat &operator = (const Scalar &s)
//{
// //cout << "cv::ocl::oclMat::=" << endl;
// setTo(s);
// return *this;
//}
void
set_to_withoutmask_run_cus
(
const
oclMat
&
dst
,
const
Scalar
&
scalar
,
string
kernelName
)
{
Mat
srcTemp
;
Mat
dstTemp
;
src
.
download
(
srcTemp
);
dst
.
download
(
dstTemp
);
int
i
;
int
j
;
int
k
;
for
(
i
=
0
;
i
<
srcTemp
.
rows
;
i
++
)
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
];
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
1
]
=
(
dst
.
rows
+
localThreads
[
1
]
-
1
)
/
localThreads
[
1
]
*
localThreads
[
1
];
globalThreads
[
2
]
=
1
;
int
step_in_pixel
=
dst
.
step
/
dst
.
elemSize
(),
offset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
if
(
dst
.
type
()
==
CV_8UC1
)
{
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
}
char
compile_option
[
32
];
union
sc
{
for
(
j
=
0
;
j
<
srcTemp
.
cols
;
j
++
)
cl_uchar4
uval
;
cl_char4
cval
;
cl_ushort4
usval
;
cl_short4
shval
;
cl_int4
ival
;
cl_float4
fval
;
cl_double4
dval
;
}
val
;
switch
(
dst
.
depth
())
{
case
0
:
val
.
uval
.
s
[
0
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
0
]);
val
.
uval
.
s
[
1
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
1
]);
val
.
uval
.
s
[
2
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
2
]);
val
.
uval
.
s
[
3
]
=
saturate_cast
<
uchar
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar
)
,
(
void
*
)
&
val
.
uval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=uchar4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_uchar4
)
,
(
void
*
)
&
val
.
uval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
1
:
val
.
cval
.
s
[
0
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
0
]);
val
.
cval
.
s
[
1
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
1
]);
val
.
cval
.
s
[
2
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
2
]);
val
.
cval
.
s
[
3
]
=
saturate_cast
<
char
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=char"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
)
,
(
void
*
)
&
val
.
cval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=char4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_char4
)
,
(
void
*
)
&
val
.
cval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
2
:
val
.
usval
.
s
[
0
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
0
]);
val
.
usval
.
s
[
1
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
1
]);
val
.
usval
.
s
[
2
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
2
]);
val
.
usval
.
s
[
3
]
=
saturate_cast
<
ushort
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort
)
,
(
void
*
)
&
val
.
usval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=ushort4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_ushort4
)
,
(
void
*
)
&
val
.
usval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
3
:
val
.
shval
.
s
[
0
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
0
]);
val
.
shval
.
s
[
1
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
1
]);
val
.
shval
.
s
[
2
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
2
]);
val
.
shval
.
s
[
3
]
=
saturate_cast
<
short
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
for
(
k
=
0
;
k
<
srcTemp
.
channels
();
k
++
)
{
((
float
*
)
dstTemp
.
data
)[
srcTemp
.
channels
()
*
(
i
*
srcTemp
.
rows
+
j
)
+
k
]
=
(
float
)
op
(((
float
*
)
srcTemp
.
data
)[
srcTemp
.
channels
()
*
(
i
*
srcTemp
.
rows
+
j
)
+
k
]);
}
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=short"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short
)
,
(
void
*
)
&
val
.
shval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=short4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_short4
)
,
(
void
*
)
&
val
.
shval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
4
:
val
.
ival
.
s
[
0
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
0
]);
val
.
ival
.
s
[
1
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
1
]);
val
.
ival
.
s
[
2
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
2
]);
val
.
ival
.
s
[
3
]
=
saturate_cast
<
int
>
(
scalar
.
val
[
3
]);
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=int"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
val
.
ival
.
s
[
0
]
));
break
;
case
2
:
sprintf
(
compile_option
,
"-D GENTYPE=int2"
);
cl_int2
i2val
;
i2val
.
s
[
0
]
=
val
.
ival
.
s
[
0
];
i2val
.
s
[
1
]
=
val
.
ival
.
s
[
1
];
args
.
push_back
(
make_pair
(
sizeof
(
cl_int2
)
,
(
void
*
)
&
i2val
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=int4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
val
.
ival
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
5
:
val
.
fval
.
s
[
0
]
=
(
float
)
scalar
.
val
[
0
];
val
.
fval
.
s
[
1
]
=
(
float
)
scalar
.
val
[
1
];
val
.
fval
.
s
[
2
]
=
(
float
)
scalar
.
val
[
2
];
val
.
fval
.
s
[
3
]
=
(
float
)
scalar
.
val
[
3
];
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=float"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
val
.
fval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=float4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float4
)
,
(
void
*
)
&
val
.
fval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
case
6
:
val
.
dval
.
s
[
0
]
=
scalar
.
val
[
0
];
val
.
dval
.
s
[
1
]
=
scalar
.
val
[
1
];
val
.
dval
.
s
[
2
]
=
scalar
.
val
[
2
];
val
.
dval
.
s
[
3
]
=
scalar
.
val
[
3
];
switch
(
dst
.
channels
())
{
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE=double"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
)
,
(
void
*
)
&
val
.
dval
.
s
[
0
]
));
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE=double4"
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_double4
)
,
(
void
*
)
&
val
.
dval
));
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unsupported channels"
);
}
break
;
default:
CV_Error
(
CV_StsUnsupportedFormat
,
"unknown depth"
);
}
#if CL_VERSION_1_2
if
(
dst
.
offset
==
0
&&
dst
.
cols
==
dst
.
wholecols
)
{
clEnqueueFillBuffer
(
dst
.
clCxt
->
impl
->
clCmdQueue
,(
cl_mem
)
dst
.
data
,
args
[
0
].
second
,
args
[
0
].
first
,
0
,
dst
.
step
*
dst
.
rows
,
0
,
NULL
,
NULL
);
}
dst
=
dstTemp
;
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset_in_pixel
));
openCLExecuteKernel2
(
dst
.
clCxt
,
&
operator_setTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
,
CLFLUSH
);
}
#else
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset_in_pixel
));
openCLExecuteKernel2
(
dst
.
clCxt
,
&
operator_setTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
,
CLFLUSH
);
#endif
}
static
inline
bool
isAligned
(
const
unsigned
char
*
ptr
,
size_t
size
)
oclMat
&
setTo
(
oclMat
&
src
,
const
Scalar
&
scalar
)
{
return
reinterpret_cast
<
size_t
>
(
ptr
)
%
size
==
0
;
}
CV_Assert
(
src
.
depth
()
>=
0
&&
src
.
depth
()
<=
6
)
;
CV_DbgAssert
(
!
src
.
empty
());
static
inline
bool
isAligned
(
size_t
step
,
size_t
size
)
{
return
step
%
size
==
0
;
if
(
src
.
type
()
==
CV_8UC1
)
{
set_to_withoutmask_run_cus
(
src
,
scalar
,
"set_to_without_mask_C1_D0"
);
}
else
{
set_to_withoutmask_run_cus
(
src
,
scalar
,
"set_to_without_mask"
);
}
return
src
;
}
void
callT
(
const
oclMat
&
src
,
oclMat
&
dst
,
MultiplyScalar
op
,
int
mask
)
void
arithmetic_run
(
const
oclMat
&
src1
,
oclMat
&
dst
,
string
kernelName
,
const
char
**
kernelString
,
void
*
_scalar
)
{
if
(
!
isAligned
(
src
.
data
,
4
*
sizeof
(
double
))
||
!
isAligned
(
src
.
step
,
4
*
sizeof
(
double
))
||
!
isAligned
(
dst
.
data
,
4
*
sizeof
(
double
))
||
!
isAligned
(
dst
.
step
,
4
*
sizeof
(
double
)))
if
(
src1
.
clCxt
->
impl
->
double_support
==
0
&&
src1
.
type
()
==
CV_64F
)
{
callF
(
src
,
dst
,
op
,
mask
);
CV_Error
(
CV_GpuNotSupported
,
"Selected device don't support double
\r\n
"
);
return
;
}
Mat
srcTemp
;
Mat
dstTemp
;
src
.
download
(
srcTemp
);
dst
.
download
(
dstTemp
);
//dst.create(src1.size(), src1.type());
//CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
// src1.rows == src2.rows && src2.rows == dst.rows);
CV_Assert
(
src1
.
cols
==
dst
.
cols
&&
src1
.
rows
==
dst
.
rows
);
int
x_shifted
;
CV_Assert
(
src1
.
type
()
==
dst
.
type
());
CV_Assert
(
src1
.
depth
()
!=
CV_8S
);
int
i
;
int
j
;
for
(
i
=
0
;
i
<
srcTemp
.
rows
;
i
++
)
{
const
double
*
srcRow
=
(
const
double
*
)
srcTemp
.
data
+
i
*
srcTemp
.
rows
;
double
*
dstRow
=
(
double
*
)
dstTemp
.
data
+
i
*
dstTemp
.
rows
;;
Context
*
clCxt
=
src1
.
clCxt
;
//int channels = dst.channels();
//int depth = dst.depth();
for
(
j
=
0
;
j
<
srcTemp
.
cols
;
j
++
)
{
x_shifted
=
j
*
4
;
if
(
x_shifted
+
4
-
1
<
srcTemp
.
cols
)
{
dstRow
[
x_shifted
]
=
op
(
srcRow
[
x_shifted
]);
dstRow
[
x_shifted
+
1
]
=
op
(
srcRow
[
x_shifted
+
1
]);
dstRow
[
x_shifted
+
2
]
=
op
(
srcRow
[
x_shifted
+
2
]);
dstRow
[
x_shifted
+
3
]
=
op
(
srcRow
[
x_shifted
+
3
]);
}
else
{
for
(
int
real_x
=
x_shifted
;
real_x
<
srcTemp
.
cols
;
++
real_x
)
{
((
float
*
)
dstTemp
.
data
)[
i
*
srcTemp
.
rows
+
real_x
]
=
op
(((
float
*
)
srcTemp
.
data
)[
i
*
srcTemp
.
rows
+
real_x
]);
}
}
}
}
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1}
//};
//size_t vector_length = vector_lengths[channels-1][depth];
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
//int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
//size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
// divUp(dst.rows, localThreads[1]) * localThreads[1],
// 1
// };
size_t
globalThreads
[
3
]
=
{
src1
.
cols
,
src1
.
rows
,
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
//args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
//if(_scalar != NULL)
//{
float
scalar1
=
*
((
float
*
)
_scalar
);
args
.
push_back
(
make_pair
(
sizeof
(
float
),
(
float
*
)
&
scalar1
));
//}
openCLExecuteKernel2
(
clCxt
,
kernelString
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
src1
.
depth
(),
CLFLUSH
);
}
void
multiply_cus
(
const
oclMat
&
src1
,
oclMat
&
dst
,
float
scalar
)
{
arithmetic_run
(
src1
,
dst
,
"arithm_muls"
,
&
pyrlk
,
(
void
*
)(
&
scalar
));
}
void
pyrdown_run_cus
(
const
oclMat
&
src
,
const
oclMat
&
dst
)
{
CV_Assert
(
src
.
type
()
==
dst
.
type
());
CV_Assert
(
src
.
depth
()
!=
CV_8S
);
Context
*
clCxt
=
src
.
clCxt
;
string
kernelName
=
"pyrDown"
;
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
dst
.
rows
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
openCLExecuteKernel2
(
clCxt
,
&
pyr_down
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
channels
(),
src
.
depth
(),
CLFLUSH
);
}
void
multiply
(
const
oclMat
&
src1
,
double
val
,
oclMat
&
dst
,
double
scale
=
1.0
f
);
void
multiply
(
const
oclMat
&
src1
,
double
val
,
oclMat
&
dst
,
double
scale
)
void
pyrDown_cus
(
const
oclMat
&
src
,
oclMat
&
dst
)
{
MultiplyScalar
op
(
val
,
scale
);
//if(src1.channels() == 1 && dst.channels() == 1)
//{
// callT(src1, dst, op, 0);
//}
//else
//{
callF
(
src1
,
dst
,
op
,
0
);
//}
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
dst
.
create
((
src
.
rows
+
1
)
/
2
,
(
src
.
cols
+
1
)
/
2
,
src
.
type
());
pyrdown_run_cus
(
src
,
dst
);
}
//struct MultiplyScalar
//{
// MultiplyScalar(double val_, double scale_) : val(val_), scale(scale_) {}
// double operator ()(double a) const
// {
// return (scale * a * val);
// }
// const double val;
// const double scale;
//};
//
//void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
//{
// Mat srcTemp;
// Mat dstTemp;
// src.download(srcTemp);
// dst.download(dstTemp);
//
// int i;
// int j;
// int k;
// for(i = 0; i < srcTemp.rows; i++)
// {
// for(j = 0; j < srcTemp.cols; j++)
// {
// for(k = 0; k < srcTemp.channels(); k++)
// {
// ((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
// }
// }
// }
//
// dst = dstTemp;
//}
//
//static inline bool isAligned(const unsigned char* ptr, size_t size)
//{
// return reinterpret_cast<size_t>(ptr) % size == 0;
//}
//
//static inline bool isAligned(size_t step, size_t size)
//{
// return step % size == 0;
//}
//
//void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
//{
// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
// !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
// {
// callF(src, dst, op, mask);
// return;
// }
//
// Mat srcTemp;
// Mat dstTemp;
// src.download(srcTemp);
// dst.download(dstTemp);
//
// int x_shifted;
//
// int i;
// int j;
// for(i = 0; i < srcTemp.rows; i++)
// {
// const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
// double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;;
//
// for(j = 0; j < srcTemp.cols; j++)
// {
// x_shifted = j * 4;
//
// if(x_shifted + 4 - 1 < srcTemp.cols)
// {
// dstRow[x_shifted ] = op(srcRow[x_shifted ]);
// dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
// dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
// dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
// }
// else
// {
// for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
// {
// ((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
// }
// }
// }
// }
//}
//
//void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f);
//void multiply(const oclMat& src1, double val, oclMat& dst, double scale)
//{
// MultiplyScalar op(val, scale);
// //if(src1.channels() == 1 && dst.channels() == 1)
// //{
// // callT(src1, dst, op, 0);
// //}
// //else
// //{
// callF(src1, dst, op, 0);
// //}
//}
cl_mem
bindTexture
(
const
oclMat
&
mat
,
int
depth
,
int
channels
)
{
cl_mem
texture
;
...
...
@@ -331,7 +634,7 @@ cl_mem bindTexture(const oclMat& mat, int depth, int channels)
#if CL_VERSION_1_2
cl_image_desc
desc
;
desc
.
image_type
=
CL_MEM_OBJECT_IMAGE2D
;
desc
.
image_width
=
mat
.
cols
;
desc
.
image_width
=
mat
.
step
/
mat
.
elemSize
()
;
desc
.
image_height
=
mat
.
rows
;
desc
.
image_depth
=
NULL
;
desc
.
image_array_size
=
1
;
...
...
@@ -346,30 +649,35 @@ cl_mem bindTexture(const oclMat& mat, int depth, int channels)
mat
.
clCxt
->
impl
->
clContext
,
CL_MEM_READ_WRITE
,
&
format
,
mat
.
cols
,
mat
.
step
/
mat
.
elemSize
()
,
mat
.
rows
,
0
,
NULL
,
&
err
);
#endif
size_t
origin
[]
=
{
0
,
0
,
0
};
size_t
region
[]
=
{
mat
.
cols
,
mat
.
rows
,
1
};
size_t
region
[]
=
{
mat
.
step
/
mat
.
elemSize
()
,
mat
.
rows
,
1
};
clEnqueueCopyBufferToImage
(
mat
.
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
mat
.
data
,
texture
,
0
,
origin
,
region
,
0
,
NULL
,
0
);
openCLSafeCall
(
err
);
return
texture
;
}
void
releaseTexture
(
cl_mem
texture
)
{
openCLFree
(
texture
);
}
void
lkSparse_run
(
oclMat
&
I
,
oclMat
&
J
,
const
oclMat
&
prevPts
,
oclMat
&
nextPts
,
oclMat
&
status
,
oclMat
*
err
,
bool
GET_MIN_EIGENVALS
,
int
ptcount
,
int
level
,
dim3
block
,
dim3
patch
,
Size
winSize
,
int
iters
)
int
level
,
/*dim3 block, */
dim3
patch
,
Size
winSize
,
int
iters
)
{
Context
*
clCxt
=
I
.
clCxt
;
string
kernelName
=
"lkSparse"
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
16
*
ptcount
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
8
,
32
,
1
};
size_t
globalThreads
[
3
]
=
{
8
*
ptcount
,
32
,
1
};
int
cn
=
I
.
channels
();
...
...
@@ -410,7 +718,10 @@ void lkSparse_run(oclMat& I, oclMat& J,
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
calcErr
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
GET_MIN_EIGENVALS
));
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
channels
(),
I
.
depth
());
openCLExecuteKernel2
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
channels
(),
I
.
depth
(),
CLFLUSH
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
void
cv
::
ocl
::
PyrLKOpticalFlow
::
sparse
(
const
oclMat
&
prevImg
,
const
oclMat
&
nextImg
,
const
oclMat
&
prevPts
,
oclMat
&
nextPts
,
oclMat
&
status
,
oclMat
*
err
)
...
...
@@ -446,14 +757,15 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
oclMat
temp1
=
(
useInitialFlow
?
nextPts
:
prevPts
).
reshape
(
1
);
oclMat
temp2
=
nextPts
.
reshape
(
1
);
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
//ocl::multiply(temp1, scalar, temp2
);
::
multiply
(
temp1
,
1.0
f
/
(
1
<<
maxLevel
)
/
2.0
f
,
temp2
);
multiply_cus
(
temp1
,
temp2
,
1.0
f
/
(
1
<<
maxLevel
)
/
2.0
f
);
//
::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_8UC1
,
status
);
status
.
setTo
(
Scalar
::
all
(
1
));
//status.setTo(Scalar::all(1));
setTo
(
status
,
Scalar
::
all
(
1
));
if
(
err
)
ensureSizeIsEnough
(
1
,
prevPts
.
cols
,
CV_32FC1
,
*
err
);
//
if (err)
//
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
// build the image pyramids.
...
...
@@ -462,23 +774,25 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
if
(
cn
==
1
||
cn
==
4
)
{
prevImg
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
nextImg
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
//prevImg.convertTo(prevPyr_[0], CV_32F);
//nextImg.convertTo(nextPyr_[0], CV_32F);
convertTo
(
prevImg
,
prevPyr_
[
0
],
CV_32F
);
convertTo
(
nextImg
,
nextPyr_
[
0
],
CV_32F
);
}
else
{
oclMat
buf_
;
cvtColor
(
prevImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
prevPyr_
[
0
],
CV_32F
);
//
oclMat buf_;
//
cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
//
buf_.convertTo(prevPyr_[0], CV_32F);
cvtColor
(
nextImg
,
buf_
,
COLOR_BGR2BGRA
);
buf_
.
convertTo
(
nextPyr_
[
0
],
CV_32F
);
//
cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
//
buf_.convertTo(nextPyr_[0], CV_32F);
}
for
(
int
level
=
1
;
level
<=
maxLevel
;
++
level
)
{
pyrDown
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
pyrDown
_cus
(
prevPyr_
[
level
-
1
],
prevPyr_
[
level
]);
pyrDown
_cus
(
nextPyr_
[
level
-
1
],
nextPyr_
[
level
]);
}
// dI/dx ~ Ix, dI/dy ~ Iy
...
...
@@ -487,8 +801,10 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next
{
lkSparse_run
(
prevPyr_
[
level
],
nextPyr_
[
level
],
prevPts
,
nextPts
,
status
,
level
==
0
&&
err
?
err
:
0
,
getMinEigenVals
,
prevPts
.
cols
,
level
,
block
,
patch
,
winSize
,
iters
);
level
,
/*block, */
patch
,
winSize
,
iters
);
}
clFinish
(
prevImg
.
clCxt
->
impl
->
clCmdQueue
);
}
void
lkDense_run
(
oclMat
&
I
,
oclMat
&
J
,
oclMat
&
u
,
oclMat
&
v
,
...
...
@@ -516,10 +832,10 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
cl_mem
ITex
=
bindTexture
(
I
,
I
.
depth
(),
cn
);
cl_mem
JTex
=
bindTexture
(
J
,
J
.
depth
(),
cn
);
int2
halfWin
=
{(
winSize
.
width
-
1
)
/
2
,
(
winSize
.
height
-
1
)
/
2
};
const
int
patchWidth
=
16
+
2
*
halfWin
.
x
;
const
int
patchHeight
=
16
+
2
*
halfWin
.
y
;
size_t
smem_size
=
3
*
patchWidth
*
patchHeight
*
sizeof
(
int
);
//
int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
//
const int patchWidth = 16 + 2 * halfWin.x;
//
const int patchHeight = 16 + 2 * halfWin.y;
//
size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -543,7 +859,10 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
iters
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
calcErr
));
openCLExecuteKernel
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
channels
(),
I
.
depth
());
openCLExecuteKernel2
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
channels
(),
I
.
depth
(),
CLFLUSH
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
}
void
cv
::
ocl
::
PyrLKOpticalFlow
::
dense
(
const
oclMat
&
prevImg
,
const
oclMat
&
nextImg
,
oclMat
&
u
,
oclMat
&
v
,
oclMat
*
err
)
...
...
modules/ocl/test/test_pyrlk.cpp
View file @
09359982
...
...
@@ -118,9 +118,9 @@ TEST_P(Sparse, Mat)
cv
::
Mat
status_mat
(
1
,
d_status
.
cols
,
CV_8UC1
,
(
void
*
)
&
status
[
0
]);
d_status
.
download
(
status_mat
);
std
::
vector
<
float
>
err
(
d_err
.
cols
);
cv
::
Mat
err_mat
(
1
,
d_err
.
cols
,
CV_32FC1
,
(
void
*
)
&
err
[
0
]);
d_err
.
download
(
err_mat
);
//
std::vector<float> err(d_err.cols);
//
cv::Mat err_mat(1, d_err.cols, CV_32FC1, (void*)&err[0]);
//
d_err.download(err_mat);
std
::
vector
<
cv
::
Point2f
>
nextPts_gold
;
std
::
vector
<
unsigned
char
>
status_gold
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment