Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a7036d96
Commit
a7036d96
authored
May 21, 2014
by
Elena Gvozdeva
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
changed support for 3-channels, changed CCOEFF
parent
04628d77
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
86 additions
and
208 deletions
+86
-208
match_template.cl
modules/imgproc/src/opencl/match_template.cl
+76
-172
templmatch.cpp
modules/imgproc/src/templmatch.cpp
+10
-36
No files found.
modules/imgproc/src/opencl/match_template.cl
View file @
a7036d96
...
...
@@ -330,42 +330,18 @@ __kernel void matchTemplate_Prepared_CCOEFF(__global const uchar * src_sums, int
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
const
T*
sum
=
(
__global
const
T*
)(
src_sums
+
mad24
(
y,
src_sums_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_sums_offset
))
)
;
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
float
image_sum_
=
(
float
)((
sum[SUMS_PTR
(
template_cols,
template_rows
)
]
-
sum[SUMS_PTR
(
template_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
template_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
*
template_sum
;
int
step
=
src_sums_step/
(
int
)
sizeof
(
T
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst
+
dst_idx
)
;
*dstult
-=
image_sum_
;
}
}
#
elif
cn
==
2
__kernel
void
matchTemplate_Prepared_CCOEFF
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
template_rows,
int
template_cols,
float
template_sum_0,
float
template_sum_1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
T
image_sum
=
(
T
)(
0
)
,
value
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
float
image_sum_
=
template_sum_0
*
(
float
)((
sum[SUMS_PTR
(
template_cols,
template_rows
)
]
-
sum[SUMS_PTR
(
template_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
template_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
image_sum_
+=
template_sum_1
*
(
float
)((
sum[SUMS_PTR
(
template_cols,
template_rows
)
+1]
-
sum[SUMS_PTR
(
template_cols,
0
)
+1]
)
-
(
sum[SUMS_PTR
(
0
,
template_rows
)
+1]
-
sum[SUMS_PTR
(
0
,
0
)
+1]
))
;
value
=
(
T
)(
sum[mad24
(
template_rows,
step,
template_cols
)
]
-
sum[mad24
(
template_rows,
step,
0
)
]
-
sum[template_cols]
+
sum[0]
)
;
image_sum
=
mad
(
value,
template_sum
,
image_sum
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
-=
image_sum_
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
-=
convertToDT
(
image_sum
)
;
}
}
...
...
@@ -373,62 +349,61 @@ __kernel void matchTemplate_Prepared_CCOEFF(__global const uchar * src_sums, int
__kernel
void
matchTemplate_Prepared_CCOEFF
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
template_rows,
int
template_cols,
float
template_sum_0,
float
template_sum_1,
float
template_sum_2
)
int
template_rows,
int
template_cols,
float
4
template_sum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
T
image_sum
=
(
T
)(
0
)
,
value,
temp_sum
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
temp_sum.x
=
template_sum.x
;
temp_sum.y
=
template_sum.y
;
temp_sum.z
=
template_sum.z
;
int
c_r
=
SUMS_PTR
(
template_cols,
template_rows
)
;
int
c_o
=
SUMS_PTR
(
template_cols,
0
)
;
int
o_r
=
SUMS_PTR
(
0
,
template_rows
)
;
int
oo
=
SUMS_PTR
(
0
,
0
)
;
value
=
vload3
(
0
,
(
__global
const
T1
*
)(
src_sums
+
SUMS
(
template_cols,
template_rows
))
)
;
value
-=
vload3
(
0
,
(
__global
const
T1
*
)(
src_sums
+
SUMS
(
0
,
template_rows
))
)
;
value
-=
vload3
(
0
,
(
__global
const
T1
*
)(
src_sums
+
SUMS
(
template_cols,
0
))
)
;
value
+=
vload3
(
0
,
(
__global
const
T1
*
)(
src_sums
+
SUMS
(
0
,
0
))
)
;
float
image_sum_
=
template_sum_0
*
(
float
)((
sum[c_r]
-
sum[c_o]
)
-
(
sum[o_r]
-
sum[oo]
))
;
image_sum_
+=
template_sum_1
*
(
float
)((
sum[c_r+1]
-
sum[c_o+1]
)
-
(
sum[o_r+1]
-
sum[oo+1]
))
;
image_sum_
+=
template_sum_2
*
(
float
)((
sum[c_r+2]
-
sum[c_o+2]
)
-
(
sum[o_r+2]
-
sum[oo+2]
))
;
image_sum
=
mad
(
value,
temp_sum
,
0
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
-=
image_sum_
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
-=
convertToDT
(
image_sum
)
;
}
}
#
elif
cn
==
4
#
elif
(
cn==2
|
| cn==4)
__kernel void matchTemplate_Prepared_CCOEFF(__global const uchar * src_sums, int src_sums_step, int src_sums_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
int
template_rows,
int
template_cols,
float
template_sum_0,
float
template_sum_1,
float
template_sum_2,
float
template_sum_3
)
int template_rows, int template_cols, float
4 template_sum
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
src_sums_step
/=
ELEM_SIZE
;
src_sums_offset
/=
ELEM_SIZE
;
__global const T* sum = (__global const T*)(src_sums + mad24(y, src_sums_step, mad24(x, (int)sizeof(T), src_sums_offset)));
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
int step = src_sums_step/(int)sizeof(T);
T image_sum = (T)(0), value, temp_sum;
#if cn==2
temp_sum.x = template_sum.x;
temp_sum.y = template_sum.y;
#else
temp_sum = template_sum;
#endif
int
c_r
=
SUMS_PTR
(
template_cols,
template_rows
)
;
int
c_o
=
SUMS_PTR
(
template_cols,
0
)
;
int
o_r
=
SUMS_PTR
(
0
,
template_rows
)
;
int
oo
=
SUMS_PTR
(
0
,
0
)
;
value = (sum[mad24(template_rows, step, template_cols)] - sum[mad24(template_rows, step, 0)] - sum[template_cols] + sum[0]);
float
image_sum_
=
template_sum_0
*
(
float
)((
sum[c_r]
-
sum[c_o]
)
-
(
sum[o_r]
-
sum[oo]
))
;
image_sum_
+=
template_sum_1
*
(
float
)((
sum[c_r+1]
-
sum[c_o+1]
)
-
(
sum[o_r+1]
-
sum[oo+1]
))
;
image_sum_
+=
template_sum_2
*
(
float
)((
sum[c_r+2]
-
sum[c_o+2]
)
-
(
sum[o_r+2]
-
sum[oo+2]
))
;
image_sum_
+=
template_sum_3
*
(
float
)((
sum[c_r+3]
-
sum[c_o+3]
)
-
(
sum[o_r+3]
-
sum[oo+3]
))
;
image_sum = mad(value, temp_sum , image_sum);
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
-=
image_sum_
;
*(__global float *)(dst + dst_idx) -= convertToDT(image_sum);
}
}
...
...
@@ -448,62 +423,24 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
int x = get_global_id(0);
int y = get_global_id(1);
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
float
image_sum_
=
(
float
)((
sum[SUMS_PTR
(
t_cols,
t_rows
)
]
-
sum[SUMS_PTR
(
t_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
t_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_
=
(
float
)((
sqsum[SQSUMS_PTR
(
t_cols,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
t_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
*dstult
=
normAcc
((
*dstult
)
-
image_sum_
*
template_sum,
sqrt
(
template_sqsum
*
(
image_sqsum_
-
weight
*
image_sum_
*
image_sum_
)))
;
}
}
#
elif
cn
==
2
__kernel
void
matchTemplate_CCOEFF_NORMED
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sqsums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
t_rows,
int
t_cols,
float
weight,
float
template_sum_0,
float
template_sum_1,
float
template_sqsum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
float sum_[2];
float sqsum_[2];
if (x < dst_cols && y < dst_rows)
{
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
int step = src_sums_step/(int)sizeof(T);
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
__global
const T* sum = (__global const T*)(src_sums + mad24(y, src_sums_step, mad24(x, (int)sizeof(T), src_sums_offset))
);
__global
const T* sqsum = (__global const T*)(src_sqsums + mad24(y, src_sqsums_step, mad24(x, (int)sizeof(T), src_sqsums_offset))
);
sum_[0]
=
(
float
)((
sum[SUMS_PTR
(
t_cols,
t_rows
)
]
-
sum[SUMS_PTR
(
t_cols,
0
)
]
)
-
(
sum[SUMS_PTR
(
0
,
t_rows
)
]
-
sum[SUMS_PTR
(
0
,
0
)
]
))
;
sum_[1]
=
(
float
)((
sum[SUMS_PTR
(
t_cols,
t_rows
)
+1]
-
sum[SUMS_PTR
(
t_cols,
0
)
+1]
)
-
(
sum[SUMS_PTR
(
0
,
t_rows
)
+1]
-
sum[SUMS_PTR
(
0
,
0
)
+1]
))
;
T value_sum = sum[mad24(t_rows, step, t_cols)] - sum[mad24(t_rows, step, 0)] - sum[t_cols] + sum[0]
;
T value_sqsum = sqsum[mad24(t_rows, step, t_cols)] - sqsum[mad24(t_rows, step, 0)] - sqsum[t_cols] + sqsum[0]
;
sqsum_[0]
=
(
float
)((
sqsum[SQSUMS_PTR
(
t_cols,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
t_cols,
0
)
]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
t_rows
)
]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
]
))
;
sqsum_[1]
=
(
float
)((
sqsum[SQSUMS_PTR
(
t_cols,
t_rows
)
+1]
-
sqsum[SQSUMS_PTR
(
t_cols,
0
)
+1]
)
-
(
sqsum[SQSUMS_PTR
(
0
,
t_rows
)
+1]
-
sqsum[SQSUMS_PTR
(
0
,
0
)
+1]
))
;
float num = convertToDT(mad(value_sum, template_sum, 0));
float
num
=
sum_[0]*template_sum_0
+
sum_[1]*template_sum_1
;
float
denum
=
sqrt
(
template_sqsum
*
(
sqsum_[0]
-
weight
*
sum_[0]*
sum_[0]
+
sqsum_[1]
-
weight
*
sum_[1]*
sum_[1]
))
;
value_sqsum -= weight * value_sum * value_sum;
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum), 0));
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global float * dstult = (__global float *)(dst+dst_idx);
...
...
@@ -516,49 +453,35 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
__kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int src_sums_step, int src_sums_offset,
__global const uchar * src_sqsums, int src_sqsums_step, int src_sqsums_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
int
t_rows,
int
t_cols,
float
weight,
float
template_sum_0,
float
template_sum_1,
float
template_sum_2,
float
template_sqsum
)
int t_rows, int t_cols, float weight, float4 template_sum, float template_sqsum)
{
int x = get_global_id(0);
int y = get_global_id(1);
float
sum_[3]
;
float
sqsum_[3]
;
if (x < dst_cols && y < dst_rows)
{
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
int step = src_sums_step/(int)sizeof(T);
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
T temp_sum, value_sum, value_sqsum;
int
c_r
=
SUMS_PTR
(
t_cols,
t_rows
)
;
int
c_o
=
SUMS_PTR
(
t_cols,
0
)
;
int
o_r
=
SUMS_PTR
(
0
,
t_rows
)
;
int
o_o
=
SUMS_PTR
(
0
,
0
)
;
temp_sum.x = template_sum.x;
temp_sum.y = template_sum.y;
temp_sum.z = template_sum.z;
sum_[0]
=
(
float
)((
sum[c_r]
-
sum[c_o]
)
-
(
sum[o_r]
-
sum[o_o
]
))
;
sum_[1]
=
(
float
)((
sum[c_r+1]
-
sum[c_o+1]
)
-
(
sum[o_r+1]
-
sum[o_o
+1]
))
;
sum_[2]
=
(
float
)((
sum[c_r+2]
-
sum[c_o+2]
)
-
(
sum[o_r+2]
-
sum[o_o
+2]
))
;
value_sum = vload3(0, (__global const T1 *)(src_sums + SUMS(t_cols, t_rows)));
value_sum -= vload3(0, (__global const T1 *)(src_sums + SUMS(0, t_rows)));
value_sum -= vload3(0, (__global const T1 *)(src_sums + SUMS(t_cols, 0)));
value_sum += vload3(0, (__global const T1 *)(src_sums + SUMS(0, 0)));
c_r
=
SQSUMS_PTR
(
t_cols,
t_rows
)
;
c_o
=
SQSUMS_PTR
(
t_cols,
0
)
;
o_r
=
SQSUMS_PTR
(
0
,
t_rows
)
;
o_o
=
SQSUMS_PTR
(
0
,
0
)
;
value_sqsum = vload3(0, (__global const T1 *)(src_sqsums + SQ_SUMS(t_cols, t_rows))
);
value_sqsum -= vload3(0, (__global const T1 *)(src_sqsums + SQ_SUMS(0, t_rows))
);
value_sqsum -= vload3(0, (__global const T1 *)(src_sqsums + SQ_SUMS(t_cols, 0))
);
value_sqsum += vload3(0, (__global const T1 *)(src_sqsums + SQ_SUMS(0, 0))
);
sqsum_[0]
=
(
float
)((
sqsum[c_r]
-
sqsum[c_o]
)
-
(
sqsum[o_r]
-
sqsum[o_o]
))
;
sqsum_[1]
=
(
float
)((
sqsum[c_r+1]
-
sqsum[c_o+1]
)
-
(
sqsum[o_r+1]
-
sqsum[o_o+1]
))
;
sqsum_[2]
=
(
float
)((
sqsum[c_r+2]
-
sqsum[c_o+2]
)
-
(
sqsum[o_r+2]
-
sqsum[o_o+2]
))
;
float num = convertToDT(mad(value_sum, temp_sum, 0));
float
num
=
sum_[0]*template_sum_0
+
sum_[1]*template_sum_1
+
sum_[2]*template_sum_2
;
float
denum
=
sqrt
(
template_sqsum
*
(
sqsum_[0]
-
weight
*
sum_[0]*
sum_[0]
+
sqsum_[1]
-
weight
*
sum_[1]*
sum_[1]
+
sqsum_[2]
-
weight
*
sum_[2]*
sum_[2]
))
;
value_sqsum -= weight * value_sum * value_sum;
float denum = sqrt(mad(template_sqsum, convertToDT(value_sqsum), 0));
int dst_idx = mad24(y, dst_step, mad24(x, (int)sizeof(float), dst_offset));
__global float * dstult = (__global float *)(dst+dst_idx);
...
...
@@ -566,58 +489,39 @@ __kernel void matchTemplate_CCOEFF_NORMED(__global const uchar * src_sums, int s
}
}
#
elif
cn
==
4
#elif
(cn==2 |
|
cn==4
)
__kernel
void
matchTemplate_CCOEFF_NORMED
(
__global
const
uchar
*
src_sums,
int
src_sums_step,
int
src_sums_offset,
__global
const
uchar
*
src_sqsums,
int
src_sqsums_step,
int
src_sqsums_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
int
t_rows,
int
t_cols,
float
weight,
float
template_sum_0,
float
template_sum_1,
float
template_sum_2,
float
template_sum_3,
float
template_sqsum
)
int
t_rows,
int
t_cols,
float
weight,
float4
template_sum,
float
template_sqsum
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
float
sum_[4]
;
float
sqsum_[4]
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
src_sums_offset
/=
ELEM_SIZE
;
src_sums_step
/=
ELEM_SIZE
;
src_sqsums_step
/=
sizeof
(
float
)
;
src_sqsums_offset
/=
sizeof
(
float
)
;
int
step
=
src_sums_step/
(
int
)
sizeof
(
T
)
;
__global
ELEM_TYPE*
sum
=
(
__global
ELEM_TYPE*
)(
src_sums
)
;
__global
float
*
sqsum
=
(
__global
float*
)(
src_sqsums
)
;
T
temp_sum
;
int
c_r
=
SUMS_PTR
(
t_cols,
t_rows
)
;
int
c_o
=
SUMS_PTR
(
t_cols,
0
)
;
int
o_r
=
SUMS_PTR
(
0
,
t_rows
)
;
int
o_o
=
SUMS_PTR
(
0
,
0
)
;
__global
const
T*
sum
=
(
__global
const
T*
)(
src_sums
+
mad24
(
y,
src_sums_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_sums_offset
)))
;
__global
const
T*
sqsum
=
(
__global
const
T*
)(
src_sqsums
+
mad24
(
y,
src_sqsums_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_sqsums_offset
)))
;
sum_[0]
=
(
float
)((
sum[c_r]
-
sum[c_o]
)
-
(
sum[o_r]
-
sum[o_o
]
))
;
sum_[1]
=
(
float
)((
sum[c_r+1]
-
sum[c_o+1]
)
-
(
sum[o_r+1]
-
sum[o_o
+1]
))
;
sum_[2]
=
(
float
)((
sum[c_r+2]
-
sum[c_o+2]
)
-
(
sum[o_r+2]
-
sum[o_o
+2]
))
;
sum_[3]
=
(
float
)((
sum[c_r+3]
-
sum[c_o+3]
)
-
(
sum[o_r+3]
-
sum[o_o
+3]
))
;
T
value_sum
=
sum[mad24
(
t_rows,
step,
t_cols
)
]
-
sum[mad24
(
t_rows,
step,
0
)
]
-
sum[t_cols]
+
sum[0]
;
T
value_sqsum
=
sqsum[mad24
(
t_rows,
step,
t_cols
)
]
-
sqsum[mad24
(
t_rows,
step,
0
)
]
-
sqsum[t_cols]
+
sqsum[0]
;
c_r
=
SQSUMS_PTR
(
t_cols,
t_rows
)
;
c_o
=
SQSUMS_PTR
(
t_cols,
0
)
;
o_r
=
SQSUMS_PTR
(
0
,
t_rows
)
;
o_o
=
SQSUMS_PTR
(
0
,
0
)
;
sqsum_[0]
=
(
float
)((
sqsum[c_r]
-
sqsum[c_o]
)
-
(
sqsum[o_r]
-
sqsum[o_o]
))
;
sqsum_[1]
=
(
float
)((
sqsum[c_r+1]
-
sqsum[c_o+1]
)
-
(
sqsum[o_r+1]
-
sqsum[o_o+1]
))
;
sqsum_[2]
=
(
float
)((
sqsum[c_r+2]
-
sqsum[c_o+2]
)
-
(
sqsum[o_r+2]
-
sqsum[o_o+2]
))
;
sqsum_[3]
=
(
float
)((
sqsum[c_r+3]
-
sqsum[c_o+3]
)
-
(
sqsum[o_r+3]
-
sqsum[o_o+3]
))
;
#
if
cn==2
temp_sum.x
=
template_sum.x
;
temp_sum.y
=
template_sum.y
;
#
else
temp_sum
=
template_sum
;
#
endif
float
num
=
sum_[0]*template_sum_0
+
sum_[1]*template_sum_1
+
sum_[2]*template_sum_2
+
sum_[3]*template_sum_3
;
float
num
=
convertToDT
(
mad
(
value_sum,
temp_sum,
0
))
;
float
denum
=
sqrt
(
template_sqsum
*
(
sqsum_[0]
-
weight
*
sum_[0]*
sum_[0]
+
sqsum_[1]
-
weight
*
sum_[1]*
sum_[1]
+
sqsum_[2]
-
weight
*
sum_[2]*
sum_[2]
+
sqsum_[3]
-
weight
*
sum_[3]*
sum_[3]
))
;
value_sqsum
-=
weight
*
value_sum
*
value_sum
;
float
denum
=
sqrt
(
mad
(
template_sqsum,
convertToDT
(
value_sqsum
)
,
0
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
__global
float
*
dstult
=
(
__global
float
*
)(
dst+dst_idx
)
;
...
...
modules/imgproc/src/templmatch.cpp
View file @
a7036d96
...
...
@@ -202,47 +202,31 @@ static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArr
matchTemplate
(
_image
,
_templ
,
_result
,
CV_TM_CCORR
);
UMat
image_sums
,
temp
;
integral
(
_image
,
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
image_sums
,
CV_32F
);
else
image_sums
=
temp
;
integral
(
_image
,
image_sums
,
CV_32F
);
int
type
=
image_sums
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
ocl
::
Kernel
k
(
"matchTemplate_Prepared_CCOEFF"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D CCOEFF -D T=%s -D
elem_type
=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
format
(
"-D CCOEFF -D T=%s -D
T1
=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
if
(
k
.
empty
())
return
false
;
UMat
templ
=
_templ
.
getUMat
();
Size
size
=
_image
.
size
(),
tsize
=
templ
.
size
();
_result
.
create
(
size
.
height
-
templ
.
rows
+
1
,
size
.
width
-
templ
.
cols
+
1
,
CV_32F
);
UMat
templ
=
_templ
.
getUMat
();
UMat
result
=
_result
.
getUMat
();
Size
tsize
=
templ
.
size
();
if
(
cn
==
1
)
if
(
cn
==
1
)
{
float
templ_sum
=
static_cast
<
float
>
(
sum
(
_templ
)[
0
])
/
tsize
.
area
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
);
}
else
{
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
templ_sum
=
sum
(
templ
)
/
tsize
.
area
();
if
(
cn
==
2
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
]);
else
if
(
cn
==
3
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
]);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sum
[
3
]);
}
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
templ_sum
);
}
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
...
...
@@ -258,7 +242,7 @@ static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, Ou
int
type
=
image_sums
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
ocl
::
Kernel
k
(
"matchTemplate_CCOEFF_NORMED"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D CCOEFF_NORMED -D
type=%s -D elem_type
=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
format
(
"-D CCOEFF_NORMED -D
T=%s -D T1
=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -308,19 +292,9 @@ static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, Ou
return
true
;
}
if
(
cn
==
2
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sqsum_sum
);
else
if
(
cn
==
3
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sqsum_sum
);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sums
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image_sqsums
),
ocl
::
KernelArg
::
ReadWrite
(
result
),
templ
.
rows
,
templ
.
cols
,
scale
,
templ_sum
[
0
],
templ_sum
[
1
],
templ_sum
[
2
],
templ_sum
[
3
],
templ_sqsum_sum
);
}
templ_sum
,
templ_sqsum_sum
);
}
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment