Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c9d8025f
Commit
c9d8025f
authored
Mar 04, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Mar 04, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2391 from ilya-lavrenov:tapi_match_template
parents
c00f0faf
653b99c9
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
378 additions
and
303 deletions
+378
-303
match_template.cl
modules/imgproc/src/opencl/match_template.cl
+276
-212
templmatch.cpp
modules/imgproc/src/templmatch.cpp
+101
-89
test_match_template.cpp
modules/imgproc/test/ocl/test_match_template.cpp
+1
-2
No files found.
modules/imgproc/src/opencl/match_template.cl
View file @
c9d8025f
...
...
@@ -32,259 +32,315 @@
#
define
DATA_SIZE
((
int
)
sizeof
(
type
))
#
define
ELEM_TYPE
elem_type
#
define
ELEM_SIZE
((
int
)
sizeof
(
elem_type
))
#
define
CN
cn
#
define
SQSUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sqsums_step,
gidx*CN
+
img_sqsums_offset
+
ox*CN
)
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sums_step,
gidx*CN
+
img_sums_offset
+
ox*CN
)
#
define
SQSUMS_PTR
(
ox,
oy
)
mad24
(
y
+
oy,
src_sqsums_step,
mad24
(
x
+
ox,
cn,
src_sqsums_offset
)
)
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
y
+
oy,
src_sums_step,
mad24
(
x
+
ox,
cn,
src_sums_offset
)
)
inline
float
normAcc
(
float
num,
float
denum
)
{
if
(
fabs
(
num
)
<
denum
)
{
if
(
fabs
(
num
)
<
denum
)
return
num
/
denum
;
}
if
(
fabs
(
num
)
<
denum
*
1.125f
)
{
if
(
fabs
(
num
)
<
denum
*
1.125f
)
return
num
>
0
?
1
:
-1
;
}
return
0
;
}
inline
float
normAcc_SQDIFF
(
float
num,
float
denum
)
{
if
(
fabs
(
num
)
<
denum
)
{
if
(
fabs
(
num
)
<
denum
)
return
num
/
denum
;
}
if
(
fabs
(
num
)
<
denum
*
1.125f
)
{
if
(
fabs
(
num
)
<
denum
*
1.125f
)
return
num
>
0
?
1
:
-1
;
}
return
1
;
}
//////////////////////////////////////////CCORR/////////////////////////////////////////////////////////////////////////
#
define
noconvert
#
if
cn
==
1
#
define
convertToDT
(
value
)
(
float
)(
value
)
#
elif
cn
==
2
#
define
convertToDT
(
value
)
(
float
)(
value.x
+
value.y
)
#
elif
cn
==
4
#
define
convertToDT
(
value
)
(
float
)(
value.x
+
value.y
+
value.z
+
value.w
)
#
else
#
error
"cn should be 1, 2 or 4"
#
endif
__kernel
void
matchTemplate_Naive_CCORR
(
__global
const
uchar
*
img,int
img_step,int
img_offset,
__global
const
uchar
*
tpl,int
tpl_step,int
tpl_offset,int
tpl_rows,
int
tpl_cols,
__global
uchar
*
res,int
res_step,int
res_offset,int
res_rows,int
res_cols
)
#
ifdef
CALC_SUM
__kernel
void
calcSum
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
cols,
int
total,
__global
float
*
dst
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
int
i,j
;
float
sum
=
0
;
int
lid
=
get_local_id
(
0
)
,
id
=
get_global_id
(
0
)
;
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
__local
WT
localmem[WGS2_ALIGNED]
;
WT
accumulator
=
(
WT
)(
0
)
,
tmp
;
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
for
(
; id < total; id += WGS
)
{
for
(
i
=
0
; i < tpl_rows; i ++)
{
__global
const
ELEM_TYPE
*
img_ptr
=
(
__global
const
ELEM_TYPE
*
)(
img
+
mad24
(
gidy
+
i,
img_step,
gidx*DATA_SIZE
+
img_offset
))
;
__global
const
ELEM_TYPE
*
tpl_ptr
=
(
__global
const
ELEM_TYPE
*
)(
tpl
+
mad24
(
i,
tpl_step,
tpl_offset
))
;
for
(
j
=
0
; j < tpl_cols; j ++)
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
;
tmp
=
convertToWT
(
src[0]
)
;
#
if
wdepth
==
4
accumulator
=
mad24
(
tmp,
tmp,
accumulator
)
;
#
else
accumulator
=
mad
(
tmp,
tmp,
accumulator
)
;
#
endif
}
#
pragma
unroll
for
(
int
c
=
0
; c < CN; c++)
if
(
lid
<
WGS2_ALIGNED
)
localmem[lid]
=
accumulator
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
+=
(
float
)(
img_ptr[j*CN+c]
*
tpl_ptr[j*CN+c]
)
;
if
(
lid
>=
WGS2_ALIGNED
&&
total
>=
WGS2_ALIGNED
)
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem[lid]
+=
localmem[lid2]
;
}
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*result
=
sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
dst[0]
=
convertToDT
(
localmem[0]
)
;
}
__kernel
void
matchTemplate_CCORR_NORMED
(
__global
const
uchar
*
img_sqsums,
int
img_sqsums_step,
int
img_sqsums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
tpl_rows,
int
tpl_cols,
float
tpl_sqsum
)
#
elif
defined
CCORR
__kernel
void
matchTemplate_Naive_CCORR
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
const
uchar
*
templateptr,
int
template_step,
int
template_offset,
int
template_rows,
int
template_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
int
gid
x
=
get_global_id
(
0
)
;
int
gid
y
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
img_sqsums_step
/=
sizeof
(
float
)
;
img_sqsums_offset
/=
sizeof
(
float
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
WT
sum
=
(
WT
)(
0
)
;
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)))
;
__global
const
T
*
template
=
(
__global
const
T
*
)(
templateptr
+
template_offset
)
;
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
__global
float
*
sqsum
=
(
__global
float*
)(
img_sqsums
)
;
float
image_sqsum_
=
(
float
)(
(
sqsum[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
sqsum[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
for
(
int
i
=
0
; i < template_rows; ++i)
{
for
(
int
j
=
0
; j < template_cols; ++j)
#
if
wdepth
==
4
sum
=
mad24
(
convertToWT
(
src[j]
)
,
convertToWT
(
template[j]
)
,
sum
)
;
#
else
sum
=
mad
(
convertToWT
(
src[j]
)
,
convertToWT
(
template[j]
)
,
sum
)
;
#
endif
src
=
(
__global
const
T
*
)((
__global
const
uchar
*
)
src
+
src_step
)
;
template
=
(
__global
const
T
*
)((
__global
const
uchar
*
)
template
+
template_step
)
;
}
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*
result
=
normAcc
(
*result,
sqrt
(
image_sqsum_
*
tpl_sqsum
)
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
)
)
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
=
convertToDT
(
sum
)
;
}
}
////////////////////////////////////////////SQDIFF////////////////////////////////////////////////////////////////////////
#
elif
defined
CCORR_NORMED
__kernel
void
matchTemplate_
Naive_SQDIFF
(
__global
const
uchar
*
img,int
img_step,int
img
_offset,
__global
const
uchar
*
tpl,int
tpl_step,int
tpl_offset,int
tpl_rows,
int
tpl
_cols,
__global
uchar
*
res,int
res_step,int
res_offset,int
res_rows,int
res_cols
)
__kernel
void
matchTemplate_
CCORR_NORMED
(
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sqsums
_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst
_cols,
int
template_rows,
int
template_cols,
__global
const
float
*
template_sqsum
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
int
i,j
;
float
delta
;
float
sum
=
0
;
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
gidx
<
res_cols
&&
gidy
<
res
_rows
)
if
(
x
<
dst_cols
&&
y
<
dst
_rows
)
{
for
(
i
=
0
; i < tpl_rows; i ++)
{
__global
const
ELEM_TYPE
*
img_ptr
=
(
__global
const
ELEM_TYPE
*
)(
img
+
mad24
(
gidy
+
i,
img_step,
gidx*DATA_SIZE
+
img_offset
))
;
__global
const
ELEM_TYPE
*
tpl_ptr
=
(
__global
const
ELEM_TYPE
*
)(
tpl
+
mad24
(
i,
tpl_step,
tpl_offset
))
;
__global
const
float
*
sqsum
=
(
__global
const
float
*
)(
src_sqsums
)
;
for
(
j
=
0
; j < tpl_cols; j ++)
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
float
image_sqsum_
=
(
float
)(
sqsum[SQSUMS_PTR
(
template_cols,
template_rows
)
]
-
sqsum[SQSUMS_PTR
(
template_cols,
0
)
]
-
sqsum[SQSUMS_PTR
(
0
,
template_rows
)
]
+
sqsum[SQSUMS_PTR
(
0
,
0
)
]
)
;
#
pragma
unroll
for
(
int
c
=
0
; c < CN; c++)
{
delta
=
(
float
)(
img_ptr[j*CN+c]
-
tpl_ptr[j*CN+c]
)
;
sum
+=
delta*delta
;
}
}
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*result
=
sum
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst
+
dst_idx
)
;
*dstult
=
normAcc
(
*dstult,
sqrt
(
image_sqsum_
*
template_sqsum[0]
))
;
}
}
__kernel
void
matchTemplate_SQDIFF_NORMED
(
__global
const
uchar
*
img_sqsums,
int
img_sqsums_step,
int
img_sqsums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
tpl_rows,
int
tpl_cols,
float
tpl_sqsum
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
img_sqsums_step
/=
sizeof
(
float
)
;
img_sqsums_offset
/=
sizeof
(
float
)
;
#
elif
defined
SQDIFF
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
__kernel
void
matchTemplate_Naive_SQDIFF
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
const
uchar
*
templateptr,
int
template_step,
int
template_offset,
int
template_rows,
int
template_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
gidx
<
res_cols
&&
gidy
<
res
_rows
)
if
(
x
<
dst_cols
&&
y
<
dst
_rows
)
{
__global
float
*
sqsum
=
(
__global
float*
)(
img_sqsums
)
;
float
image_sqsum_
=
(
float
)(
(
sqsum[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
sqsum[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)))
;
__global
const
T
*
template
=
(
__global
const
T
*
)(
templateptr
+
template_offset
)
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
WT
sum
=
(
WT
)(
0
)
,
value
;
*result
=
normAcc_SQDIFF
(
image_sqsum_
-
2.f
*
result[0]
+
tpl_sqsum,
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
for
(
int
i
=
0
; i < template_rows; ++i)
{
for
(
int
j
=
0
; j < template_cols; ++j)
{
value
=
convertToWT
(
src[j]
)
-
convertToWT
(
template[j]
)
;
#
if
wdepth
==
4
sum
=
mad24
(
value,
value,
sum
)
;
#
else
sum
=
mad
(
value,
value,
sum
)
;
#
endif
}
src
=
(
__global
const
T
*
)((
__global
const
uchar
*
)
src
+
src_step
)
;
template
=
(
__global
const
T
*
)((
__global
const
uchar
*
)
template
+
template_step
)
;
}
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
=
convertToDT
(
sum
)
;
}
}
////////////////////////////////////////////CCOEFF/////////////////////////////////////////////////////////////////
#
elif
defined
SQDIFF_NORMED
__kernel
void
matchTemplate_
Prepared_CCOEFF_C1
(
__global
const
uchar
*
img_sums,
int
img_sums_step,
int
img_
sums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res
_cols,
int
tpl_rows,
int
tpl_cols,
float
tpl_
sum
)
__kernel
void
matchTemplate_
SQDIFF_NORMED
(
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sq
sums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst
_cols,
int
template_rows,
int
template_cols,
__global
const
float
*
template_sq
sum
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
img_sums_step
/=
ELEM_SIZE
;
img_sums_offset
/=
ELEM_SIZE
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
float
image_sum_
=
0
;
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
img_sums
)
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
image_sum_
+=
(
float
)((
sum[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
sum[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
tpl_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
*
tpl_sum
;
__global
const
float
*
sqsum
=
(
__global
const
float
*
)(
src_sqsums
)
;
float
image_sqsum_
=
(
float
)(
(
sqsum[SQSUMS_PTR
(
template_cols,
template_rows
)
]
-
sqsum[SQSUMS_PTR
(
template_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
template_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
template_sqsum_value
=
template_sqsum[0]
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*result
-=
image_sum_
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst
+
dst_idx
)
;
*dstult
=
normAcc_SQDIFF
(
image_sqsum_
-
2.0f
*
dstult[0]
+
template_sqsum_value,
sqrt
(
image_sqsum_
*
template_sqsum_value
))
;
}
}
__kernel
void
matchTemplate_Prepared_CCOEFF_C2
(
__global
const
uchar
*
img_sums,
int
img_sums_step,
int
img_sums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
tpl_rows,
int
tpl_cols,
float
tpl_sum_0,float
tpl_sum_1
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
#
elif
defined
CCOEFF
img_sums_step
/=
ELEM_SIZE
;
img_sums_offset
/=
ELEM_SIZE
;
#
if
cn
==
1
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
float
image_sum_
=
0
;
__kernel
void
matchTemplate_Prepared_CCOEFF
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
template_rows,
int
template_cols,
float
template_sum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
gidx
<
res_cols
&&
gidy
<
res
_rows
)
if
(
x
<
dst_cols
&&
y
<
dst
_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
img
_sums
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src
_sums
)
;
image_sum_
+=
tpl_sum_0
*
(
float
)((
sum[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
sum[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
tpl_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
image_sum_
+=
tpl_sum_1
*
(
float
)((
sum[SUMS_PTR
(
tpl_cols,
tpl_rows
)
+1]
-
sum[SUMS_PTR
(
tpl_cols,
0
)
+1]
)
-
(
sum[SUMS_PTR
(
0
,
tpl_rows
)
+1]
-
sum[SUMS_PTR
(
0
,
0
)
+1]
))
;
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
float
image_sum_
=
(
float
)((
sum[SUMS_PTR
(
template_cols,
template_rows
)
]
-
sum[SUMS_PTR
(
template_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
template_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
*
template_sum
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*
res
ult
-=
image_sum_
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
)
)
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst
+
dst_idx
)
;
*
dst
ult
-=
image_sum_
;
}
}
__kernel
void
matchTemplate_Prepared_CCOEFF_C4
(
__global
const
uchar
*
img_sums,
int
img_sums_step,
int
img_sums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
tpl_rows,
int
tpl_cols,
float
tpl_sum_0,float
tpl_sum_1,float
tpl_sum_2,float
tpl_sum_3
)
#
elif
cn
==
2
__kernel
void
matchTemplate_Prepared_CCOEFF
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
template_rows,
int
template_cols,
float
template_sum_0,
float
template_sum_1
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
img_sums_step
/=
ELEM_SIZE
;
img_sums_offset
/=
ELEM_SIZE
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
float
image_sum_
=
0
;
float
image_sum_
=
template_sum_0
*
(
float
)((
sum[SUMS_PTR
(
template_cols,
template_rows
)
]
-
sum[SUMS_PTR
(
template_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
template_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
image_sum_
+=
template_sum_1
*
(
float
)((
sum[SUMS_PTR
(
template_cols,
template_rows
)
+1]
-
sum[SUMS_PTR
(
template_cols,
0
)
+1]
)
-
(
sum[SUMS_PTR
(
0
,
template_rows
)
+1]
-
sum[SUMS_PTR
(
0
,
0
)
+1]
))
;
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
-=
image_sum_
;
}
}
#
elif
cn
==
4
__kernel
void
matchTemplate_Prepared_CCOEFF
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
template_rows,
int
template_cols,
float
template_sum_0,
float
template_sum_1,
float
template_sum_2,
float
template_sum_3
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
img_sums
)
;
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
int
c_r
=
SUMS_PTR
(
tpl_cols,
tpl_rows
)
;
int
c_o
=
SUMS_PTR
(
tpl_cols,
0
)
;
int
o_r
=
SUMS_PTR
(
0
,
tpl_rows
)
;
int
oo
=
SUMS_PTR
(
0
,
0
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
i
mage_sum_
+=
tpl_sum_0
*
(
float
)((
sum[c_r]
-
sum[c_o]
)
-
(
sum[o_r]
-
sum[oo]
)
)
;
i
mage_sum_
+=
tpl_sum_1
*
(
float
)((
sum[c_r+1]
-
sum[c_o+1]
)
-
(
sum[o_r+1]
-
sum[oo+1]
)
)
;
i
mage_sum_
+=
tpl_sum_2
*
(
float
)((
sum[c_r+2]
-
sum[c_o+2]
)
-
(
sum[o_r+2]
-
sum[oo+2]
)
)
;
i
mage_sum_
+=
tpl_sum_3
*
(
float
)((
sum[c_r+3]
-
sum[c_o+3]
)
-
(
sum[o_r+3]
-
sum[oo+3]
)
)
;
i
nt
c_r
=
SUMS_PTR
(
template_cols,
template_rows
)
;
i
nt
c_o
=
SUMS_PTR
(
template_cols,
0
)
;
i
nt
o_r
=
SUMS_PTR
(
0
,
template_rows
)
;
i
nt
oo
=
SUMS_PTR
(
0
,
0
)
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
float
image_sum_
=
template_sum_0
*
(
float
)((
sum[c_r]
-
sum[c_o]
)
-
(
sum[o_r]
-
sum[oo]
))
;
image_sum_
+=
template_sum_1
*
(
float
)((
sum[c_r+1]
-
sum[c_o+1]
)
-
(
sum[o_r+1]
-
sum[oo+1]
))
;
image_sum_
+=
template_sum_2
*
(
float
)((
sum[c_r+2]
-
sum[c_o+2]
)
-
(
sum[o_r+2]
-
sum[oo+2]
))
;
image_sum_
+=
template_sum_3
*
(
float
)((
sum[c_r+3]
-
sum[c_o+3]
)
-
(
sum[o_r+3]
-
sum[oo+3]
))
;
*result
-=
image_sum_
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
-=
image_sum_
;
}
}
__kernel
void
matchTemplate_CCOEFF_NORMED_C1
(
__global
const
uchar
*
img_sums,
int
img_sums_step,
int
img_sums_offset,
__global
const
uchar
*
img_sqsums,
int
img_sqsums_step,
int
img_sqsums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
t_rows,
int
t_cols,
float
weight,
float
tpl_sum,
float
tpl_sqsum
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
#
else
#
error
"cn should be 1, 2 or 4"
#
endif
img_sums_offset
/=
ELEM_SIZE
;
img_sums_step
/=
ELEM_SIZE
;
img_sqsums_step
/=
sizeof
(
float
)
;
img_sqsums_offset
/=
sizeof
(
float
)
;
#
elif
defined
CCOEFF_NORMED
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
#
if
cn
==
1
__kernel
void
matchTemplate_CCOEFF_NORMED
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sqsums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
t_rows,
int
t_cols,
float
weight,
float
template_sum,
float
template_sqsum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
gidx
<
res_cols
&&
gidy
<
res
_rows
)
if
(
x
<
dst_cols
&&
y
<
dst
_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
img_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
img_sqsums
)
;
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
float
image_sum_
=
(
float
)((
sum[SUMS_PTR
(
t_cols,
t_rows
)
]
-
sum[SUMS_PTR
(
t_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
t_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
...
...
@@ -292,35 +348,35 @@ __kernel void matchTemplate_CCOEFF_NORMED_C1 (__global const uchar * img_sums, i
float
image_sqsum_
=
(
float
)((
sqsum[SQSUMS_PTR
(
t_cols,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
t_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*
result
=
normAcc
((
*result
)
-
image_sum_
*
tpl
_sum,
sqrt
(
t
pl
_sqsum
*
(
image_sqsum_
-
weight
*
image_sum_
*
image_sum_
)))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
)
)
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*
dstult
=
normAcc
((
*dstult
)
-
image_sum_
*
template
_sum,
sqrt
(
t
emplate
_sqsum
*
(
image_sqsum_
-
weight
*
image_sum_
*
image_sum_
)))
;
}
}
__kernel
void
matchTemplate_CCOEFF_NORMED_C2
(
__global
const
uchar
*
img_sums,
int
img_sums_step,
int
img_sums_offset,
__global
const
uchar
*
img_sqsums,
int
img_sqsums_step,
int
img_sqsums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
t_rows,
int
t_cols,
float
weight,
float
tpl_sum_0,
float
tpl_sum_1,
float
tpl_sqsum
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
img_sums_offset
/=
ELEM_SIZE
;
img_sums_step
/=
ELEM_SIZE
;
img_sqsums_step
/=
sizeof
(
float
)
;
img_sqsums_offset
/=
sizeof
(
float
)
;
#
elif
cn
==
2
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
__kernel
void
matchTemplate_CCOEFF_NORMED
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sqsums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
t_rows,
int
t_cols,
float
weight,
float
template_sum_0,
float
template_sum_1,
float
template_sqsum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
float
sum_[2]
;
float
sqsum_[2]
;
if
(
gidx
<
res_cols
&&
gidy
<
res
_rows
)
if
(
x
<
dst_cols
&&
y
<
dst
_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
img_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
img_sqsums
)
;
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
sum_[0]
=
(
float
)((
sum[SUMS_PTR
(
t_cols,
t_rows
)
]
-
sum[SUMS_PTR
(
t_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
t_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
sum_[1]
=
(
float
)((
sum[SUMS_PTR
(
t_cols,
t_rows
)
+1]
-
sum[SUMS_PTR
(
t_cols,
0
)
+1]
)
-
(
sum[SUMS_PTR
(
0
,
t_rows
)
+1]
-
sum[SUMS_PTR
(
0
,
0
)
+1]
))
;
...
...
@@ -328,40 +384,41 @@ __kernel void matchTemplate_CCOEFF_NORMED_C2 (__global const uchar * img_sums, i
sqsum_[0]
=
(
float
)((
sqsum[SQSUMS_PTR
(
t_cols,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
t_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
sqsum_[1]
=
(
float
)((
sqsum[SQSUMS_PTR
(
t_cols,
t_rows
)
+1]
-
sqsum[SQSUMS_PTR
(
t_cols,
0
)
+1]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
t_rows
)
+1]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
+1]
))
;
float
num
=
sum_[0]*t
pl_sum_0
+
sum_[1]*tpl
_sum_1
;
float
num
=
sum_[0]*t
emplate_sum_0
+
sum_[1]*template
_sum_1
;
float
denum
=
sqrt
(
t
pl
_sqsum
*
(
sqsum_[0]
-
weight
*
sum_[0]*
sum_[0]
+
float
denum
=
sqrt
(
t
emplate
_sqsum
*
(
sqsum_[0]
-
weight
*
sum_[0]*
sum_[0]
+
sqsum_[1]
-
weight
*
sum_[1]*
sum_[1]
))
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*result
=
normAcc
((
*result
)
-
num,
denum
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
=
normAcc
((
*dstult
)
-
num,
denum
)
;
}
}
__kernel
void
matchTemplate_CCOEFF_NORMED_C4
(
__global
const
uchar
*
img_sums,
int
img_sums_step,
int
img_sums_offset,
__global
const
uchar
*
img_sqsums,
int
img_sqsums_step,
int
img_sqsums_offset,
__global
uchar
*
res,
int
res_step,
int
res_offset,
int
res_rows,
int
res_cols,
int
t_rows,
int
t_cols,
float
weight,
float
tpl_sum_0,float
tpl_sum_1,float
tpl_sum_2,float
tpl_sum_3,
float
tpl_sqsum
)
{
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
img_sums_offset
/=
ELEM_SIZE
;
img_sums_step
/=
ELEM_SIZE
;
img_sqsums_step
/=
sizeof
(
float
)
;
img_sqsums_offset
/=
sizeof
(
float
)
;
#
elif
cn
==
4
int
res_idx
=
mad24
(
gidy,
res_step,
res_offset
+
gidx
*
(
int
)
sizeof
(
float
))
;
__kernel
void
matchTemplate_CCOEFF_NORMED
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sqsums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
t_rows,
int
t_cols,
float
weight,
float
template_sum_0,
float
template_sum_1,
float
template_sum_2,
float
template_sum_3,
float
template_sqsum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
float
sum_[4]
;
float
sqsum_[4]
;
if
(
gidx
<
res_cols
&&
gidy
<
res
_rows
)
if
(
x
<
dst_cols
&&
y
<
dst
_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
img_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
img_sqsums
)
;
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
int
c_r
=
SUMS_PTR
(
t_cols,
t_rows
)
;
int
c_o
=
SUMS_PTR
(
t_cols,
0
)
;
...
...
@@ -383,15 +440,22 @@ __kernel void matchTemplate_CCOEFF_NORMED_C4 (__global const uchar * img_sums, i
sqsum_[2]
=
(
float
)((
sqsum[c_r+2]
-
sqsum[c_o+2]
)
-
(
sqsum[o_r+2]
-
sqsum[o_o+2]
))
;
sqsum_[3]
=
(
float
)((
sqsum[c_r+3]
-
sqsum[c_o+3]
)
-
(
sqsum[o_r+3]
-
sqsum[o_o+3]
))
;
float
num
=
sum_[0]*t
pl_sum_0
+
sum_[1]*tpl_sum_1
+
sum_[2]*tpl_sum_2
+
sum_[3]*tpl
_sum_3
;
float
num
=
sum_[0]*t
emplate_sum_0
+
sum_[1]*template_sum_1
+
sum_[2]*template_sum_2
+
sum_[3]*template
_sum_3
;
float
denum
=
sqrt
(
t
pl
_sqsum
*
(
float
denum
=
sqrt
(
t
emplate
_sqsum
*
(
sqsum_[0]
-
weight
*
sum_[0]*
sum_[0]
+
sqsum_[1]
-
weight
*
sum_[1]*
sum_[1]
+
sqsum_[2]
-
weight
*
sum_[2]*
sum_[2]
+
sqsum_[3]
-
weight
*
sum_[3]*
sum_[3]
))
;
__global
float
*
result
=
(
__global
float
*
)(
res+res_idx
)
;
*result
=
normAcc
((
*result
)
-
num,
denum
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
=
normAcc
((
*dstult
)
-
num,
denum
)
;
}
}
#
else
#
error
"cn should be 1, 2 or 4"
#
endif
#
endif
modules/imgproc/src/templmatch.cpp
View file @
c9d8025f
...
...
@@ -49,80 +49,95 @@ namespace cv
#ifdef HAVE_OPENCL
static
bool
useNaive
(
int
method
,
int
depth
,
const
Size
&
size
)
/////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
enum
{
#ifdef HAVE_CLAMDFFT
if
(
method
==
TM_SQDIFF
&&
depth
==
CV_32F
)
return
true
;
else
if
(
method
==
TM_CCORR
||
(
method
==
TM_SQDIFF
&&
depth
==
CV_8U
))
return
size
.
height
<
18
&&
size
.
width
<
18
;
else
SUM_1
=
0
,
SUM_2
=
1
};
static
bool
sumTemplate
(
InputArray
_src
,
UMat
&
result
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
wdepth
=
std
::
max
(
CV_32S
,
depth
),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
char
cvt
[
40
];
ocl
::
Kernel
k
(
"calcSum"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D CALC_SUM -D T=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d -D wdepth=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
wtype
),
cn
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
),
(
int
)
wgs
,
wgs2_aligned
,
wdepth
));
if
(
k
.
empty
())
return
false
;
#else
(
void
)(
method
);
(
void
)(
depth
);
(
void
)(
size
);
return
true
;
#endif
}
/////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
UMat
src
=
_src
.
getUMat
();
result
.
create
(
1
,
1
,
CV_32FC1
);
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
resarg
=
ocl
::
KernelArg
::
PtrWriteOnly
(
result
);
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
resarg
);
size_t
globalsize
=
wgs
;
return
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
);
}
static
bool
matchTemplateNaive_CCORR
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
int
type
=
_image
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
wdepth
=
std
::
max
(
depth
,
CV_32S
),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
char
cvt
[
40
];
ocl
::
Kernel
k
(
"matchTemplate_Naive_CCORR"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D type=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
format
(
"-D CCORR -D T=%s -D WT=%s -D convertToWT=%s -D cn=%d -D wdepth=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
wtype
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
),
cn
,
wdepth
));
if
(
k
.
empty
())
return
false
;
UMat
image
=
_image
.
getUMat
(),
templ
=
_templ
.
getUMat
();
_result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
_result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
C1
);
UMat
result
=
_result
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image
),
ocl
::
KernelArg
::
ReadOnly
(
templ
),
ocl
::
KernelArg
::
WriteOnly
(
result
));
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image
),
ocl
::
KernelArg
::
ReadOnly
(
templ
),
ocl
::
KernelArg
::
WriteOnly
(
result
)).
run
(
2
,
globalsize
,
NULL
,
false
);
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
static
bool
matchTemplate_CCORR_NORMED
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
matchTemplate
(
_image
,
_templ
,
_result
,
CV_TM_CCORR
);
int
type
=
_image
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_image
.
type
(),
cn
=
CV_MAT_CN
(
type
);
ocl
::
Kernel
k
(
"matchTemplate_CCORR_NORMED"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D type=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
format
(
"-D CCORR_NORMED -D T=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
cn
));
if
(
k
.
empty
())
return
false
;
UMat
image
=
_image
.
getUMat
(),
templ
=
_templ
.
getUMat
();
_result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
_result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
C1
);
UMat
result
=
_result
.
getUMat
();
UMat
image_sums
,
image_sqsums
;
integral
(
image
.
reshape
(
1
),
image_sums
,
image_sqsums
,
CV_32F
,
CV_32F
);
UMat
temp
;
multiply
(
templ
,
templ
,
temp
,
1
,
CV_32F
);
Scalar
s
=
sum
(
temp
);
float
templ_sqsum
=
0
;
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
templ_sqsum
+=
static_cast
<
float
>
(
s
[
i
]);
UMat
templ_sqsum
;
if
(
!
sumTemplate
(
templ
,
templ_sqsum
))
return
false
;
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sqsum
).
run
(
2
,
globalsize
,
NULL
,
false
);
}
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
ocl
::
KernelArg
::
PtrReadOnly
(
templ_sqsum
));
static
bool
matchTemplate_CCORR
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
if
(
useNaive
(
TM_CCORR
,
_image
.
depth
(),
_templ
.
size
())
)
return
matchTemplateNaive_CCORR
(
_image
,
_templ
,
_result
);
else
return
false
;
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
...
...
@@ -130,10 +145,12 @@ static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArra
static
bool
matchTemplateNaive_SQDIFF
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
int
type
=
_image
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
wdepth
=
std
::
max
(
depth
,
CV_32S
),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
char
cvt
[
40
];
ocl
::
Kernel
k
(
"matchTemplate_Naive_SQDIFF"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D
type=%s -D elem_type=%s -D cn
=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
format
(
"-D
SQDIFF -D T=%s -D WT=%s -D convertToWT=%s -D cn=%d -D wdepth
=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
wtype
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
),
cn
,
wdepth
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -141,20 +158,21 @@ static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, Outp
_result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
UMat
result
=
_result
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image
),
ocl
::
KernelArg
::
ReadOnly
(
templ
),
ocl
::
KernelArg
::
WriteOnly
(
result
));
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image
),
ocl
::
KernelArg
::
ReadOnly
(
templ
),
ocl
::
KernelArg
::
WriteOnly
(
result
)).
run
(
2
,
globalsize
,
NULL
,
false
);
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
static
bool
matchTemplate_SQDIFF_NORMED
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
matchTemplate
(
_image
,
_templ
,
_result
,
CV_TM_CCORR
);
int
type
=
_image
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_image
.
type
(),
cn
=
CV_MAT_CN
(
type
);
ocl
::
Kernel
k
(
"matchTemplate_SQDIFF_NORMED"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D type=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
format
(
"-D SQDIFF_NORMED -D T=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
cn
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -165,24 +183,15 @@ static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, Ou
UMat
image_sums
,
image_sqsums
;
integral
(
image
.
reshape
(
1
),
image_sums
,
image_sqsums
,
CV_32F
,
CV_32F
);
UMat
temp
;
multiply
(
templ
,
templ
,
temp
,
1
,
CV_32F
);
Scalar
s
=
sum
(
temp
);
float
templ_sqsum
=
0
;
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
templ_sqsum
+=
(
float
)
s
[
i
];
UMat
templ_sqsum
;
if
(
!
sumTemplate
(
_templ
,
templ_sqsum
))
return
false
;
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sqsum
).
run
(
2
,
globalsize
,
NULL
,
false
);
}
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
ocl
::
KernelArg
::
PtrReadOnly
(
templ_sqsum
));
static
bool
matchTemplate_SQDIFF
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
if
(
useNaive
(
TM_SQDIFF
,
_image
.
depth
(),
_templ
.
size
()))
return
matchTemplateNaive_SQDIFF
(
_image
,
_templ
,
_result
);
else
return
false
;
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
...
...
@@ -194,15 +203,15 @@ static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArr
UMat
image_sums
,
temp
;
integral
(
_image
,
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
image_sums
,
CV_32F
);
else
image_sums
=
temp
;
int
type
=
image_sums
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
ocl
::
Kernel
k
(
cv
::
format
(
"matchTemplate_Prepared_CCOEFF_C%d"
,
cn
).
c_str
()
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D
type
=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
ocl
::
Kernel
k
(
"matchTemplate_Prepared_CCOEFF"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D
CCOEFF -D T
=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -211,25 +220,28 @@ static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArr
_result
.
create
(
size
.
height
-
templ
.
rows
+
1
,
size
.
width
-
templ
.
cols
+
1
,
CV_32F
);
UMat
result
=
_result
.
getUMat
();
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
if
(
cn
==
1
)
{
float
templ_sum
=
static_cast
<
float
>
(
sum
(
_templ
)[
0
])
/
tsize
.
area
();
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
).
run
(
2
,
globalsize
,
NULL
,
false
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
);
}
else
{
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
templ_sum
=
sum
(
templ
)
/
tsize
.
area
();
if
(
cn
==
2
)
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
]).
run
(
2
,
globalsize
,
NULL
,
false
);
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sum
[
3
]).
run
(
2
,
globalsize
,
NULL
,
false
);
if
(
cn
==
2
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
]);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sum
[
3
]);
}
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
static
bool
matchTemplate_CCOEFF_NORMED
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
...
...
@@ -241,8 +253,8 @@ static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, Ou
int
type
=
image_sums
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
ocl
::
Kernel
k
(
format
(
"matchTemplate_CCOEFF_NORMED_C%d"
,
cn
).
c_str
()
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D type=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
ocl
::
Kernel
k
(
"matchTemplate_CCOEFF_NORMED"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D
CCOEFF_NORMED -D
type=%s -D elem_type=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -251,7 +263,6 @@ static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, Ou
_result
.
create
(
size
.
height
-
templ
.
rows
+
1
,
size
.
width
-
templ
.
cols
+
1
,
CV_32F
);
UMat
result
=
_result
.
getUMat
();
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
float
scale
=
1.
f
/
tsize
.
area
();
if
(
cn
==
1
)
...
...
@@ -270,9 +281,8 @@ static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, Ou
return
true
;
}
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
,
templ_sqsum
)
.
run
(
2
,
globalsize
,
NULL
,
false
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
,
templ_sqsum
);
}
else
{
...
...
@@ -295,15 +305,17 @@ static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, Ou
}
if
(
cn
==
2
)
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sqsum_sum
).
run
(
2
,
globalsize
,
NULL
,
false
);
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sum
[
3
],
templ_sqsum_sum
).
run
(
2
,
globalsize
,
NULL
,
false
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sqsum_sum
);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sum
[
3
],
templ_sqsum_sum
);
}
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -319,7 +331,7 @@ static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _
static
const
Caller
callers
[]
=
{
matchTemplate
_SQDIFF
,
matchTemplate_SQDIFF_NORMED
,
matchTemplat
e_CCORR
,
matchTemplate
Naive_SQDIFF
,
matchTemplate_SQDIFF_NORMED
,
matchTemplateNaiv
e_CCORR
,
matchTemplate_CCORR_NORMED
,
matchTemplate_CCOEFF
,
matchTemplate_CCOEFF_NORMED
};
const
Caller
caller
=
callers
[
method
];
...
...
modules/imgproc/test/ocl/test_match_template.cpp
View file @
c9d8025f
...
...
@@ -53,8 +53,7 @@ namespace ocl {
///////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
CV_ENUM
(
MatchTemplType
,
CV_TM_SQDIFF
,
CV_TM_SQDIFF_NORMED
,
CV_TM_CCORR
,
CV_TM_CCORR_NORMED
,
CV_TM_CCOEFF
,
CV_TM_CCOEFF_NORMED
)
CV_ENUM
(
MatchTemplType
,
CV_TM_CCORR
,
CV_TM_CCORR_NORMED
,
CV_TM_SQDIFF
,
CV_TM_SQDIFF_NORMED
,
CV_TM_CCOEFF
,
CV_TM_CCOEFF_NORMED
)
PARAM_TEST_CASE
(
MatchTemplate
,
MatDepth
,
Channels
,
MatchTemplType
,
bool
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment