Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f36db3a0
Commit
f36db3a0
authored
Mar 26, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
more fix of mismatch
parent
ad6aae45
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
196 additions
and
109 deletions
+196
-109
match_template.cpp
modules/ocl/src/match_template.cpp
+97
-45
match_template.cl
modules/ocl/src/opencl/match_template.cl
+94
-59
test_match_template.cpp
modules/ocl/test/test_match_template.cpp
+5
-5
No files found.
modules/ocl/src/match_template.cpp
View file @
f36db3a0
...
...
@@ -71,6 +71,9 @@ namespace cv
void
matchTemplate_SQDIFF_NORMED
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
void
convolve_32F
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
void
matchTemplate_CCORR
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
...
...
@@ -90,41 +93,65 @@ namespace cv
void
matchTemplateNaive_CCORR
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
int
cn
);
void
extractFirstChannel_32F
(
const
oclMat
&
image
,
oclMat
&
result
);
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
static
int
getTemplateThreshold
(
int
method
,
int
depth
)
static
bool
useNaive
(
int
,
int
,
Size
)
{
switch
(
method
)
{
case
CV_TM_CCORR
:
if
(
depth
==
CV_32F
)
return
250
;
if
(
depth
==
CV_8U
)
return
300
;
break
;
case
CV_TM_SQDIFF
:
if
(
depth
==
CV_32F
)
return
0x7fffffff
;
// do naive SQDIFF for CV_32F
if
(
depth
==
CV_8U
)
return
300
;
break
;
}
CV_Error
(
CV_StsBadArg
,
"getTemplateThreshold: unsupported match template mode"
);
return
0
;
// FIXME!
// always use naive until convolve is imported
return
true
;
}
//////////////////////////////////////////////////////////////////////
// SQDIFF
void
matchTemplate_SQDIFF
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
)
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
{
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
if
(
templ
.
size
().
area
()
<
getTemplateThreshold
(
CV_TM_SQDIFF
,
image
.
depth
()))
if
(
useNaive
(
CV_TM_SQDIFF
,
image
.
depth
(),
templ
.
size
()))
{
matchTemplateNaive_SQDIFF
(
image
,
templ
,
result
,
image
.
oclchannels
());
return
;
}
else
{
// TODO
CV_Error
(
CV_StsBadArg
,
"Not supported yet for this size template"
);
buf
.
image_sqsums
.
resize
(
1
);
// TODO, add double support for ocl::integral
// use CPU integral temporarily
Mat
sums
,
sqsums
;
cv
::
integral
(
Mat
(
image
.
reshape
(
1
)),
sums
,
sqsums
);
buf
.
image_sqsums
[
0
]
=
sqsums
;
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
//port CUDA's matchTemplatePrepared_SQDIFF_8U
Context
*
clCxt
=
image
.
clCxt
;
string
kernelName
=
"matchTemplate_Prepared_SQDIFF"
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_ulong
),
(
void
*
)
&
templ_sqsum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
templ
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
templ
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
const
char
*
build_opt
=
image
.
oclchannels
()
==
4
?
"-D CN4"
:
""
;
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
,
build_opt
);
}
}
...
...
@@ -134,7 +161,6 @@ namespace cv
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
buf
.
image_sums
.
resize
(
1
);
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
]);
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -156,7 +182,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
);
}
...
...
@@ -191,33 +217,39 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
}
//////////////////////////////////////////////////////////////////////
// CCORR
void
convolve_32F
(
const
oclMat
&
,
const
oclMat
&
,
oclMat
&
,
MatchTemplateBuf
&
)
{
CV_Error
(
-
1
,
"convolve is not fully implemented yet"
);
}
void
matchTemplate_CCORR
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
{
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
if
(
templ
.
size
().
area
()
<
getTemplateThreshold
(
CV_TM_SQDIFF
,
image
.
depth
()))
if
(
useNaive
(
CV_TM_CCORR
,
image
.
depth
(),
templ
.
size
()))
{
matchTemplateNaive_CCORR
(
image
,
templ
,
result
,
image
.
oclchannels
());
return
;
}
else
{
CV_Error
(
CV_StsBadArg
,
"Not supported yet for this size template"
);
if
(
image
.
depth
()
==
CV_8U
&&
templ
.
depth
()
==
CV_8U
)
{
image
.
convertTo
(
buf
.
imagef
,
CV_32F
);
templ
.
convertTo
(
buf
.
templf
,
CV_32F
);
convolve_32F
(
buf
.
imagef
,
buf
.
templf
,
result
,
buf
);
}
else
{
convolve_32F
(
image
,
templ
,
result
,
buf
);
}
CV_Assert
(
image
.
oclchannels
()
==
1
);
oclMat
o_result
(
image
.
size
(),
CV_MAKETYPE
(
CV_32F
,
image
.
oclchannels
()));
filter2D
(
buf
.
imagef
,
o_result
,
CV_32F
,
buf
.
templf
,
Point
(
0
,
0
));
result
=
o_result
(
Rect
(
0
,
0
,
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
));
}
}
...
...
@@ -249,7 +281,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
);
}
...
...
@@ -284,7 +316,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
}
//////////////////////////////////////////////////////////////////////
...
...
@@ -301,7 +333,7 @@ namespace cv
kernelName
=
"matchTemplate_Prepared_CCOFF"
;
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
...
...
@@ -313,22 +345,22 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
// to be continued in the following section
if
(
image
.
oclchannels
()
==
1
)
{
buf
.
image_sums
.
resize
(
1
);
integral
(
image
,
buf
.
image_sums
[
0
]);
float
templ_sum
=
0
;
templ_sum
=
(
float
)
sum
(
templ
)[
0
]
/
templ
.
size
().
area
();
templ_sum
[
0
]
=
(
float
)
sum
(
templ
)[
0
]
/
templ
.
size
().
area
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
step
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
[
0
]
)
);
}
else
{
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
split
(
image
,
buf
.
images
);
templ_sum
=
sum
(
templ
)
/
templ
.
size
().
area
();
buf
.
image_sums
.
resize
(
buf
.
images
.
size
());
...
...
@@ -374,7 +406,7 @@ namespace cv
kernelName
=
"matchTemplate_Prepared_CCOFF_NORMED"
;
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
...
...
@@ -387,20 +419,22 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
scale
)
);
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
Vec4f
templ_sqsum
=
Vec4f
::
all
(
0
);
// to be continued in the following section
if
(
image
.
oclchannels
()
==
1
)
{
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
integral
(
image
,
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
float
templ_sum
=
0
;
float
templ_sqsum
=
0
;
templ_sum
=
(
float
)
sum
(
templ
)[
0
];
templ_s
qsum
=
sqrS
um
(
templ
)[
0
];
templ_s
um
[
0
]
=
(
float
)
s
um
(
templ
)[
0
];
templ_sqsum
-=
scale
*
templ_sum
*
templ_sum
;
templ_sum
*=
scale
;
templ_sqsum
[
0
]
=
sqrSum
(
templ
)[
0
];
templ_sqsum
[
0
]
-=
scale
*
templ_sum
[
0
]
*
templ_sum
[
0
];
templ_sum
[
0
]
*=
scale
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
offset
)
);
...
...
@@ -408,13 +442,11 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
step
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sqsum
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
[
0
]
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sqsum
[
0
]
)
);
}
else
{
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
Vec4f
templ_sqsum
=
Vec4f
::
all
(
0
);
split
(
image
,
buf
.
images
);
templ_sum
=
sum
(
templ
);
...
...
@@ -465,7 +497,27 @@ namespace cv
}
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
}
void
extractFirstChannel_32F
(
const
oclMat
&
image
,
oclMat
&
result
)
{
Context
*
clCxt
=
image
.
clCxt
;
string
kernelName
;
kernelName
=
"extractFirstChannel"
;
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
image
.
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
rows
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
image
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
image
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
}
/*ocl*/
}
/*cv*/
...
...
modules/ocl/src/opencl/match_template.cl
View file @
f36db3a0
...
...
@@ -45,22 +45,28 @@
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
if
defined
(
DOUBLE_SUPPORT
)
#
elif
defined
(
__NVIDIA__
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
if
!defined
(
USE_SQR_INTEGRAL
)
&&
(
defined
(
__ATI__
)
||
defined
(
__NVIDIA__
))
#
define
TYPE_IMAGE_SQSUM
double
#
else
#
define
TYPE_IMAGE_SQSUM
ulong
#
define
TYPE_IMAGE_SQSUM
float
#
endif
#
ifndef
CN4
#
define
CN4
1
#
else
#
define
CN4
4
#
endif
//////////////////////////////////////////////////
//
utilities
#
define
SQSUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sqsums_step,
gidx
+
img_sqsums_offset
+
ox
)
#
define
SQSUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sqsums_step,
(
gidx
+
img_sqsums_offset
+
ox
)
*
CN4
)
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sums_step,
gidx
+
img_sums_offset
+
ox
)
//
normAcc*
are
accurate
normalization
routines
which
make
GPU
matchTemplate
//
consistent
with
CPU
one
...
...
@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum)
__kernel
void
normalizeKernel_C1_D0
(
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums,
__global
const
float
*
img_sqsums,
__global
float
*
res,
ulong
tpl_sqsum,
int
res_rows,
...
...
@@ -119,8 +125,8 @@ void normalizeKernel_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
normAcc
(
res[res_idx],
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
}
}
...
...
@@ -152,8 +158,8 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
image_sqsum_
-
2.f
*
res[res_idx]
+
tpl_sqsum
;
}
}
...
...
@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
__kernel
void
matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
(
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums,
__global
const
float
*
img_sqsums,
__global
float
*
res,
ulong
tpl_sqsum,
int
res_rows,
...
...
@@ -185,10 +191,10 @@ void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
normAcc_SQDIFF
(
image_sqsum_
-
2.f
*
res[res_idx]
+
tpl_sqsum,
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
}
}
...
...
@@ -628,8 +634,8 @@ void matchTemplate_Prepared_CCOFF_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
float
sum
=
(
float
)(
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
-=
sum
*
tpl_sum
;
}
}
...
...
@@ -671,17 +677,17 @@ void matchTemplate_Prepared_CCOFF_C4_D0
{
float
ccorr
=
res[res_idx]
;
ccorr
-=
tpl_sum_c0*
(
float
)(
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
ccorr
-=
tpl_sum_c1*
(
float
)(
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
ccorr
-=
tpl_sum_c2*
(
float
)(
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
ccorr
-=
tpl_sum_c3*
(
float
)(
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
ccorr
;
}
}
...
...
@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
__global
const
uint
*
img_sums,
int
img_sums_offset,
int
img_sums_step,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums,
__global
const
float
*
img_sqsums,
int
img_sqsums_offset,
int
img_sqsums_step,
float
tpl_sum,
...
...
@@ -725,12 +731,12 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
float
image_sum_
=
(
float
)(
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
normAcc
(
res[res_idx]
-
image_sum_
*
tpl_sum,
sqrt
(
tpl_sqsum
*
(
image_sqsum_
-
weight
*
image_sum_
*
image_sum_
)))
;
}
...
...
@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
__global
const
uint
*
img_sums_c3,
int
img_sums_offset,
int
img_sums_step,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c0,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c1,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c2,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c3,
__global
const
float
*
img_sqsums_c0,
__global
const
float
*
img_sqsums_c1,
__global
const
float
*
img_sqsums_c2,
__global
const
float
*
img_sqsums_c3,
int
img_sqsums_offset,
int
img_sqsums_step,
float
tpl_sum_c0,
...
...
@@ -782,42 +788,71 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
float
image_sum_c0
=
(
float
)(
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sum_c1
=
(
float
)(
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sum_c2
=
(
float
)(
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sum_c3
=
(
float
)(
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c0
=
(
float
)(
(
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c0[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c0[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c1
=
(
float
)(
(
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c1[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c1[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c2
=
(
float
)(
(
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c2[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c2[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c3
=
(
float
)(
(
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c3[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c3[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
num
=
res[res_idx]
-
image_sum_c0
*
tpl_sum_c0
-
image_sum_c1
*
tpl_sum_c1
-
image_sum_c2
*
tpl_sum_c2
-
image_sum_c3
*
tpl_sum_c3
;
image_sum_c0
*
tpl_sum_c0
-
image_sum_c1
*
tpl_sum_c1
-
image_sum_c2
*
tpl_sum_c2
-
image_sum_c3
*
tpl_sum_c3
;
float
denum
=
sqrt
(
tpl_sqsum
*
(
image_sqsum_c0
-
weight
*
image_sum_c0
*
image_sum_c0
+
image_sqsum_c1
-
weight
*
image_sum_c1
*
image_sum_c1
+
image_sqsum_c2
-
weight
*
image_sum_c2
*
image_sum_c2
+
image_sqsum_c3
-
weight
*
image_sum_c0
*
image_sum_c3
)
)
;
image_sqsum_c0
-
weight
*
image_sum_c0
*
image_sum_c0
+
image_sqsum_c1
-
weight
*
image_sum_c1
*
image_sum_c1
+
image_sqsum_c2
-
weight
*
image_sum_c2
*
image_sum_c2
+
image_sqsum_c3
-
weight
*
image_sum_c0
*
image_sum_c3
)
)
;
res[res_idx]
=
normAcc
(
num,
denum
)
;
}
}
//////////////////////////////////////////////////////////////////////
//
extractFirstChannel
__kernel
void
extractFirstChannel
(
const
__global
float4*
img,
__global
float*
res,
int
rows,
int
cols,
int
img_offset,
int
res_offset,
int
img_step,
int
res_step
)
{
img_step
/=
sizeof
(
float4
)
;
res_step
/=
sizeof
(
float
)
;
img_offset
/=
sizeof
(
float4
)
;
res_offset
/=
sizeof
(
float
)
;
img
+=
img_offset
;
res
+=
res_offset
;
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
if
(
gidx
<
cols
&&
gidy
<
rows
)
{
res[gidx
+
gidy
*
res_step]
=
img[gidx
+
gidy
*
img_step].x
;
}
}
modules/ocl/test/test_match_template.cpp
View file @
f36db3a0
...
...
@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
}
};
TEST_P
(
MatchTemplate8U
,
DISABLED_
Accuracy
)
TEST_P
(
MatchTemplate8U
,
Accuracy
)
{
std
::
cout
<<
"Method: "
<<
TEMPLATE_METHOD_NAMES
[
method
]
<<
std
::
endl
;
...
...
@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
EXPECT_MAT_NEAR
(
dst_gold
,
mat_dst
,
templ_size
.
area
()
*
1e-1
,
sss
);
}
INSTANTIATE_TEST_CASE_P
(
GPU
_ImgProc
,
MatchTemplate8U
,
INSTANTIATE_TEST_CASE_P
(
OCL
_ImgProc
,
MatchTemplate8U
,
testing
::
Combine
(
MTEMP_SIZES
,
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
/*, TemplateSize(cv::Size(30, 30))*/
),
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
,
TemplateSize
(
cv
::
Size
(
30
,
30
))
),
testing
::
Values
(
Channels
(
1
),
Channels
(
3
),
Channels
(
4
)),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P
(
GPU
_ImgProc
,
MatchTemplate32F
,
testing
::
Combine
(
INSTANTIATE_TEST_CASE_P
(
OCL
_ImgProc
,
MatchTemplate32F
,
testing
::
Combine
(
MTEMP_SIZES
,
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
/*, TemplateSize(cv::Size(30, 30))*/
),
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
,
TemplateSize
(
cv
::
Size
(
30
,
30
))
),
testing
::
Values
(
Channels
(
1
),
Channels
(
3
),
Channels
(
4
)),
testing
::
Values
(
TemplateMethod
(
cv
::
TM_SQDIFF
),
TemplateMethod
(
cv
::
TM_CCORR
))));
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment