Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f36db3a0
Commit
f36db3a0
authored
Mar 26, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
more fix of mismatch
parent
ad6aae45
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
196 additions
and
109 deletions
+196
-109
match_template.cpp
modules/ocl/src/match_template.cpp
+97
-45
match_template.cl
modules/ocl/src/opencl/match_template.cl
+94
-59
test_match_template.cpp
modules/ocl/test/test_match_template.cpp
+5
-5
No files found.
modules/ocl/src/match_template.cpp
View file @
f36db3a0
...
@@ -71,6 +71,9 @@ namespace cv
...
@@ -71,6 +71,9 @@ namespace cv
void
matchTemplate_SQDIFF_NORMED
(
void
matchTemplate_SQDIFF_NORMED
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
void
convolve_32F
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
void
matchTemplate_CCORR
(
void
matchTemplate_CCORR
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
);
...
@@ -90,41 +93,65 @@ namespace cv
...
@@ -90,41 +93,65 @@ namespace cv
void
matchTemplateNaive_CCORR
(
void
matchTemplateNaive_CCORR
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
int
cn
);
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
int
cn
);
void
extractFirstChannel_32F
(
const
oclMat
&
image
,
oclMat
&
result
);
// Evaluates optimal template's area threshold. If
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
// template version, otherwise FFT-based (if available)
static
int
getTemplateThreshold
(
int
method
,
int
depth
)
static
bool
useNaive
(
int
,
int
,
Size
)
{
{
switch
(
method
)
// FIXME!
{
// always use naive until convolve is imported
case
CV_TM_CCORR
:
return
true
;
if
(
depth
==
CV_32F
)
return
250
;
if
(
depth
==
CV_8U
)
return
300
;
break
;
case
CV_TM_SQDIFF
:
if
(
depth
==
CV_32F
)
return
0x7fffffff
;
// do naive SQDIFF for CV_32F
if
(
depth
==
CV_8U
)
return
300
;
break
;
}
CV_Error
(
CV_StsBadArg
,
"getTemplateThreshold: unsupported match template mode"
);
return
0
;
}
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// SQDIFF
// SQDIFF
void
matchTemplate_SQDIFF
(
void
matchTemplate_SQDIFF
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
)
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
{
{
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
if
(
templ
.
size
().
area
()
<
getTemplateThreshold
(
CV_TM_SQDIFF
,
image
.
depth
()))
if
(
useNaive
(
CV_TM_SQDIFF
,
image
.
depth
(),
templ
.
size
()))
{
{
matchTemplateNaive_SQDIFF
(
image
,
templ
,
result
,
image
.
oclchannels
());
matchTemplateNaive_SQDIFF
(
image
,
templ
,
result
,
image
.
oclchannels
());
return
;
return
;
}
}
else
else
{
{
// TODO
buf
.
image_sqsums
.
resize
(
1
);
CV_Error
(
CV_StsBadArg
,
"Not supported yet for this size template"
);
// TODO, add double support for ocl::integral
// use CPU integral temporarily
Mat
sums
,
sqsums
;
cv
::
integral
(
Mat
(
image
.
reshape
(
1
)),
sums
,
sqsums
);
buf
.
image_sqsums
[
0
]
=
sqsums
;
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
//port CUDA's matchTemplatePrepared_SQDIFF_8U
Context
*
clCxt
=
image
.
clCxt
;
string
kernelName
=
"matchTemplate_Prepared_SQDIFF"
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_ulong
),
(
void
*
)
&
templ_sqsum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
templ
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
templ
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
const
char
*
build_opt
=
image
.
oclchannels
()
==
4
?
"-D CN4"
:
""
;
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
,
build_opt
);
}
}
}
}
...
@@ -134,7 +161,6 @@ namespace cv
...
@@ -134,7 +161,6 @@ namespace cv
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sums
.
resize
(
1
);
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
]);
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
]);
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
@@ -156,7 +182,7 @@ namespace cv
...
@@ -156,7 +182,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
);
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
);
}
}
...
@@ -191,33 +217,39 @@ namespace cv
...
@@ -191,33 +217,39 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
}
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// CCORR
// CCORR
void
convolve_32F
(
const
oclMat
&
,
const
oclMat
&
,
oclMat
&
,
MatchTemplateBuf
&
)
{
CV_Error
(
-
1
,
"convolve is not fully implemented yet"
);
}
void
matchTemplate_CCORR
(
void
matchTemplate_CCORR
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
{
{
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
result
.
create
(
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
,
CV_32F
);
if
(
templ
.
size
().
area
()
<
getTemplateThreshold
(
CV_TM_SQDIFF
,
image
.
depth
()))
if
(
useNaive
(
CV_TM_CCORR
,
image
.
depth
(),
templ
.
size
()))
{
{
matchTemplateNaive_CCORR
(
image
,
templ
,
result
,
image
.
oclchannels
());
matchTemplateNaive_CCORR
(
image
,
templ
,
result
,
image
.
oclchannels
());
return
;
return
;
}
}
else
else
{
{
CV_Error
(
CV_StsBadArg
,
"Not supported yet for this size template"
);
if
(
image
.
depth
()
==
CV_8U
&&
templ
.
depth
()
==
CV_8U
)
if
(
image
.
depth
()
==
CV_8U
&&
templ
.
depth
()
==
CV_8U
)
{
{
image
.
convertTo
(
buf
.
imagef
,
CV_32F
);
image
.
convertTo
(
buf
.
imagef
,
CV_32F
);
templ
.
convertTo
(
buf
.
templf
,
CV_32F
);
templ
.
convertTo
(
buf
.
templf
,
CV_32F
);
convolve_32F
(
buf
.
imagef
,
buf
.
templf
,
result
,
buf
);
}
else
{
convolve_32F
(
image
,
templ
,
result
,
buf
);
}
}
CV_Assert
(
image
.
oclchannels
()
==
1
);
oclMat
o_result
(
image
.
size
(),
CV_MAKETYPE
(
CV_32F
,
image
.
oclchannels
()));
filter2D
(
buf
.
imagef
,
o_result
,
CV_32F
,
buf
.
templf
,
Point
(
0
,
0
));
result
=
o_result
(
Rect
(
0
,
0
,
image
.
rows
-
templ
.
rows
+
1
,
image
.
cols
-
templ
.
cols
+
1
));
}
}
}
}
...
@@ -249,7 +281,7 @@ namespace cv
...
@@ -249,7 +281,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
);
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
1
,
CV_8U
);
}
}
...
@@ -284,7 +316,7 @@ namespace cv
...
@@ -284,7 +316,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
}
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
...
@@ -301,7 +333,7 @@ namespace cv
...
@@ -301,7 +333,7 @@ namespace cv
kernelName
=
"matchTemplate_Prepared_CCOFF"
;
kernelName
=
"matchTemplate_Prepared_CCOFF"
;
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
...
@@ -313,22 +345,22 @@ namespace cv
...
@@ -313,22 +345,22 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
// to be continued in the following section
// to be continued in the following section
if
(
image
.
oclchannels
()
==
1
)
if
(
image
.
oclchannels
()
==
1
)
{
{
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sums
.
resize
(
1
);
integral
(
image
,
buf
.
image_sums
[
0
]);
integral
(
image
,
buf
.
image_sums
[
0
]);
float
templ_sum
=
0
;
templ_sum
[
0
]
=
(
float
)
sum
(
templ
)[
0
]
/
templ
.
size
().
area
();
templ_sum
=
(
float
)
sum
(
templ
)[
0
]
/
templ
.
size
().
area
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
step
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
step
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
[
0
]
)
);
}
}
else
else
{
{
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
split
(
image
,
buf
.
images
);
split
(
image
,
buf
.
images
);
templ_sum
=
sum
(
templ
)
/
templ
.
size
().
area
();
templ_sum
=
sum
(
templ
)
/
templ
.
size
().
area
();
buf
.
image_sums
.
resize
(
buf
.
images
.
size
());
buf
.
image_sums
.
resize
(
buf
.
images
.
size
());
...
@@ -374,7 +406,7 @@ namespace cv
...
@@ -374,7 +406,7 @@ namespace cv
kernelName
=
"matchTemplate_Prepared_CCOFF_NORMED"
;
kernelName
=
"matchTemplate_Prepared_CCOFF_NORMED"
;
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
...
@@ -387,20 +419,22 @@ namespace cv
...
@@ -387,20 +419,22 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
scale
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
scale
)
);
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
Vec4f
templ_sqsum
=
Vec4f
::
all
(
0
);
// to be continued in the following section
// to be continued in the following section
if
(
image
.
oclchannels
()
==
1
)
if
(
image
.
oclchannels
()
==
1
)
{
{
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
integral
(
image
,
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
integral
(
image
,
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
float
templ_sum
=
0
;
float
templ_sqsum
=
0
;
templ_sum
=
(
float
)
sum
(
templ
)[
0
];
templ_s
qsum
=
sqrS
um
(
templ
)[
0
];
templ_s
um
[
0
]
=
(
float
)
s
um
(
templ
)[
0
];
templ_sqsum
-=
scale
*
templ_sum
*
templ_sum
;
templ_sqsum
[
0
]
=
sqrSum
(
templ
)[
0
];
templ_sum
*=
scale
;
templ_sqsum
[
0
]
-=
scale
*
templ_sum
[
0
]
*
templ_sum
[
0
];
templ_sum
[
0
]
*=
scale
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sums
[
0
].
offset
)
);
...
@@ -408,13 +442,11 @@ namespace cv
...
@@ -408,13 +442,11 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
offset
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
step
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
buf
.
image_sqsums
[
0
].
step
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sum
[
0
]
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sqsum
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
templ_sqsum
[
0
]
)
);
}
}
else
else
{
{
Vec4f
templ_sum
=
Vec4f
::
all
(
0
);
Vec4f
templ_sqsum
=
Vec4f
::
all
(
0
);
split
(
image
,
buf
.
images
);
split
(
image
,
buf
.
images
);
templ_sum
=
sum
(
templ
);
templ_sum
=
sum
(
templ
);
...
@@ -465,7 +497,27 @@ namespace cv
...
@@ -465,7 +497,27 @@ namespace cv
}
}
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
image
.
oclchannels
(),
image
.
depth
());
}
}
void
extractFirstChannel_32F
(
const
oclMat
&
image
,
oclMat
&
result
)
{
Context
*
clCxt
=
image
.
clCxt
;
string
kernelName
;
kernelName
=
"extractFirstChannel"
;
size_t
globalThreads
[
3
]
=
{
result
.
cols
,
result
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
image
.
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
result
.
data
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
rows
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
cols
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
image
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
image
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
result
.
step
));
openCLExecuteKernel
(
clCxt
,
&
match_template
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
}
/*ocl*/
}
/*ocl*/
}
/*cv*/
}
/*cv*/
...
...
modules/ocl/src/opencl/match_template.cl
View file @
f36db3a0
...
@@ -45,22 +45,28 @@
...
@@ -45,22 +45,28 @@
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
if
defined
(
__ATI__
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
#
if
!defined
(
USE_SQR_INTEGRAL
)
&&
(
defined
(
__ATI__
)
||
defined
(
__NVIDIA__
))
#
define
TYPE_IMAGE_SQSUM
double
#
define
TYPE_IMAGE_SQSUM
double
#
else
#
else
#
define
TYPE_IMAGE_SQSUM
ulong
#
define
TYPE_IMAGE_SQSUM
float
#
endif
#
ifndef
CN4
#
define
CN4
1
#
else
#
define
CN4
4
#
endif
#
endif
//////////////////////////////////////////////////
//////////////////////////////////////////////////
//
utilities
//
utilities
#
define
SQSUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sqsums_step,
gidx
+
img_sqsums_offset
+
ox
)
#
define
SQSUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sqsums_step,
(
gidx
+
img_sqsums_offset
+
ox
)
*
CN4
)
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sums_step,
gidx
+
img_sums_offset
+
ox
)
#
define
SUMS_PTR
(
ox,
oy
)
mad24
(
gidy
+
oy,
img_sums_step,
gidx
+
img_sums_offset
+
ox
)
//
normAcc*
are
accurate
normalization
routines
which
make
GPU
matchTemplate
//
normAcc*
are
accurate
normalization
routines
which
make
GPU
matchTemplate
//
consistent
with
CPU
one
//
consistent
with
CPU
one
...
@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum)
...
@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum)
__kernel
__kernel
void
normalizeKernel_C1_D0
void
normalizeKernel_C1_D0
(
(
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums,
__global
const
float
*
img_sqsums,
__global
float
*
res,
__global
float
*
res,
ulong
tpl_sqsum,
ulong
tpl_sqsum,
int
res_rows,
int
res_rows,
...
@@ -119,8 +125,8 @@ void normalizeKernel_C1_D0
...
@@ -119,8 +125,8 @@ void normalizeKernel_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
{
float
image_sqsum_
=
(
float
)(
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
normAcc
(
res[res_idx],
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
res[res_idx]
=
normAcc
(
res[res_idx],
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
}
}
}
}
...
@@ -152,8 +158,8 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
...
@@ -152,8 +158,8 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
{
float
image_sqsum_
=
(
float
)(
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
image_sqsum_
-
2.f
*
res[res_idx]
+
tpl_sqsum
;
res[res_idx]
=
image_sqsum_
-
2.f
*
res[res_idx]
+
tpl_sqsum
;
}
}
}
}
...
@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
...
@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
__kernel
__kernel
void
matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
void
matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
(
(
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums,
__global
const
float
*
img_sqsums,
__global
float
*
res,
__global
float
*
res,
ulong
tpl_sqsum,
ulong
tpl_sqsum,
int
res_rows,
int
res_rows,
...
@@ -185,10 +191,10 @@ void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
...
@@ -185,10 +191,10 @@ void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
{
float
image_sqsum_
=
(
float
)(
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
normAcc_SQDIFF
(
image_sqsum_
-
2.f
*
res[res_idx]
+
tpl_sqsum,
res[res_idx]
=
normAcc_SQDIFF
(
image_sqsum_
-
2.f
*
res[res_idx]
+
tpl_sqsum,
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
sqrt
(
image_sqsum_
*
tpl_sqsum
))
;
}
}
}
}
...
@@ -628,8 +634,8 @@ void matchTemplate_Prepared_CCOFF_C1_D0
...
@@ -628,8 +634,8 @@ void matchTemplate_Prepared_CCOFF_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
{
float
sum
=
(
float
)(
float
sum
=
(
float
)(
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
-=
sum
*
tpl_sum
;
res[res_idx]
-=
sum
*
tpl_sum
;
}
}
}
}
...
@@ -671,17 +677,17 @@ void matchTemplate_Prepared_CCOFF_C4_D0
...
@@ -671,17 +677,17 @@ void matchTemplate_Prepared_CCOFF_C4_D0
{
{
float
ccorr
=
res[res_idx]
;
float
ccorr
=
res[res_idx]
;
ccorr
-=
tpl_sum_c0*
(
float
)(
ccorr
-=
tpl_sum_c0*
(
float
)(
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
ccorr
-=
tpl_sum_c1*
(
float
)(
ccorr
-=
tpl_sum_c1*
(
float
)(
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
ccorr
-=
tpl_sum_c2*
(
float
)(
ccorr
-=
tpl_sum_c2*
(
float
)(
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
ccorr
-=
tpl_sum_c3*
(
float
)(
ccorr
-=
tpl_sum_c3*
(
float
)(
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
ccorr
;
res[res_idx]
=
ccorr
;
}
}
}
}
...
@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
...
@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
__global
const
uint
*
img_sums,
__global
const
uint
*
img_sums,
int
img_sums_offset,
int
img_sums_offset,
int
img_sums_step,
int
img_sums_step,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums,
__global
const
float
*
img_sqsums,
int
img_sqsums_offset,
int
img_sqsums_offset,
int
img_sqsums_step,
int
img_sqsums_step,
float
tpl_sum,
float
tpl_sum,
...
@@ -725,12 +731,12 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
...
@@ -725,12 +731,12 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
{
float
image_sum_
=
(
float
)(
float
image_sum_
=
(
float
)(
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_
=
(
float
)(
float
image_sqsum_
=
(
float
)(
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums[SQSUMS_PTR
(
0
,
0
)
]
))
;
res[res_idx]
=
normAcc
(
res[res_idx]
-
image_sum_
*
tpl_sum,
res[res_idx]
=
normAcc
(
res[res_idx]
-
image_sum_
*
tpl_sum,
sqrt
(
tpl_sqsum
*
(
image_sqsum_
-
weight
*
image_sum_
*
image_sum_
)))
;
sqrt
(
tpl_sqsum
*
(
image_sqsum_
-
weight
*
image_sum_
*
image_sum_
)))
;
}
}
...
@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
...
@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
__global
const
uint
*
img_sums_c3,
__global
const
uint
*
img_sums_c3,
int
img_sums_offset,
int
img_sums_offset,
int
img_sums_step,
int
img_sums_step,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c0,
__global
const
float
*
img_sqsums_c0,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c1,
__global
const
float
*
img_sqsums_c1,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c2,
__global
const
float
*
img_sqsums_c2,
__global
const
TYPE_IMAGE_SQSUM
*
img_sqsums_c3,
__global
const
float
*
img_sqsums_c3,
int
img_sqsums_offset,
int
img_sqsums_offset,
int
img_sqsums_step,
int
img_sqsums_step,
float
tpl_sum_c0,
float
tpl_sum_c0,
...
@@ -782,42 +788,71 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
...
@@ -782,42 +788,71 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
if
(
gidx
<
res_cols
&&
gidy
<
res_rows
)
{
{
float
image_sum_c0
=
(
float
)(
float
image_sum_c0
=
(
float
)(
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c0[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c0[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c0[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sum_c1
=
(
float
)(
float
image_sum_c1
=
(
float
)(
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c1[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c1[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c1[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sum_c2
=
(
float
)(
float
image_sum_c2
=
(
float
)(
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c2[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c2[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c2[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sum_c3
=
(
float
)(
float
image_sum_c3
=
(
float
)(
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
(
img_sums_c3[SUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
-
(
img_sums_c3[SUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sums_c3[SUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c0
=
(
float
)(
float
image_sqsum_c0
=
(
float
)(
(
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c0[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c0[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c0[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c1
=
(
float
)(
float
image_sqsum_c1
=
(
float
)(
(
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c1[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c1[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c1[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c2
=
(
float
)(
float
image_sqsum_c2
=
(
float
)(
(
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c2[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c2[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c2[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
image_sqsum_c3
=
(
float
)(
float
image_sqsum_c3
=
(
float
)(
(
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
tpl_cols,
0
)
]
)
-
(
img_sqsums_c3[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
0
,
0
)
]
))
;
(
img_sqsums_c3[SQSUMS_PTR
(
0
,
tpl_rows
)
]
-
img_sqsums_c3[SQSUMS_PTR
(
0
,
0
)
]
))
;
float
num
=
res[res_idx]
-
float
num
=
res[res_idx]
-
image_sum_c0
*
tpl_sum_c0
-
image_sum_c0
*
tpl_sum_c0
-
image_sum_c1
*
tpl_sum_c1
-
image_sum_c1
*
tpl_sum_c1
-
image_sum_c2
*
tpl_sum_c2
-
image_sum_c2
*
tpl_sum_c2
-
image_sum_c3
*
tpl_sum_c3
;
image_sum_c3
*
tpl_sum_c3
;
float
denum
=
sqrt
(
tpl_sqsum
*
(
float
denum
=
sqrt
(
tpl_sqsum
*
(
image_sqsum_c0
-
weight
*
image_sum_c0
*
image_sum_c0
+
image_sqsum_c0
-
weight
*
image_sum_c0
*
image_sum_c0
+
image_sqsum_c1
-
weight
*
image_sum_c1
*
image_sum_c1
+
image_sqsum_c1
-
weight
*
image_sum_c1
*
image_sum_c1
+
image_sqsum_c2
-
weight
*
image_sum_c2
*
image_sum_c2
+
image_sqsum_c2
-
weight
*
image_sum_c2
*
image_sum_c2
+
image_sqsum_c3
-
weight
*
image_sum_c0
*
image_sum_c3
)
image_sqsum_c3
-
weight
*
image_sum_c0
*
image_sum_c3
)
)
;
)
;
res[res_idx]
=
normAcc
(
num,
denum
)
;
res[res_idx]
=
normAcc
(
num,
denum
)
;
}
}
}
}
//////////////////////////////////////////////////////////////////////
//
extractFirstChannel
__kernel
void
extractFirstChannel
(
const
__global
float4*
img,
__global
float*
res,
int
rows,
int
cols,
int
img_offset,
int
res_offset,
int
img_step,
int
res_step
)
{
img_step
/=
sizeof
(
float4
)
;
res_step
/=
sizeof
(
float
)
;
img_offset
/=
sizeof
(
float4
)
;
res_offset
/=
sizeof
(
float
)
;
img
+=
img_offset
;
res
+=
res_offset
;
int
gidx
=
get_global_id
(
0
)
;
int
gidy
=
get_global_id
(
1
)
;
if
(
gidx
<
cols
&&
gidy
<
rows
)
{
res[gidx
+
gidy
*
res_step]
=
img[gidx
+
gidy
*
img_step].x
;
}
}
modules/ocl/test/test_match_template.cpp
View file @
f36db3a0
...
@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
...
@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
}
}
};
};
TEST_P
(
MatchTemplate8U
,
DISABLED_
Accuracy
)
TEST_P
(
MatchTemplate8U
,
Accuracy
)
{
{
std
::
cout
<<
"Method: "
<<
TEMPLATE_METHOD_NAMES
[
method
]
<<
std
::
endl
;
std
::
cout
<<
"Method: "
<<
TEMPLATE_METHOD_NAMES
[
method
]
<<
std
::
endl
;
...
@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
...
@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
EXPECT_MAT_NEAR
(
dst_gold
,
mat_dst
,
templ_size
.
area
()
*
1e-1
,
sss
);
EXPECT_MAT_NEAR
(
dst_gold
,
mat_dst
,
templ_size
.
area
()
*
1e-1
,
sss
);
}
}
INSTANTIATE_TEST_CASE_P
(
GPU
_ImgProc
,
MatchTemplate8U
,
INSTANTIATE_TEST_CASE_P
(
OCL
_ImgProc
,
MatchTemplate8U
,
testing
::
Combine
(
testing
::
Combine
(
MTEMP_SIZES
,
MTEMP_SIZES
,
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
/*, TemplateSize(cv::Size(30, 30))*/
),
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
,
TemplateSize
(
cv
::
Size
(
30
,
30
))
),
testing
::
Values
(
Channels
(
1
),
Channels
(
3
),
Channels
(
4
)),
testing
::
Values
(
Channels
(
1
),
Channels
(
3
),
Channels
(
4
)),
ALL_TEMPLATE_METHODS
ALL_TEMPLATE_METHODS
)
)
);
);
INSTANTIATE_TEST_CASE_P
(
GPU
_ImgProc
,
MatchTemplate32F
,
testing
::
Combine
(
INSTANTIATE_TEST_CASE_P
(
OCL
_ImgProc
,
MatchTemplate32F
,
testing
::
Combine
(
MTEMP_SIZES
,
MTEMP_SIZES
,
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
/*, TemplateSize(cv::Size(30, 30))*/
),
testing
::
Values
(
TemplateSize
(
cv
::
Size
(
5
,
5
)),
TemplateSize
(
cv
::
Size
(
16
,
16
))
,
TemplateSize
(
cv
::
Size
(
30
,
30
))
),
testing
::
Values
(
Channels
(
1
),
Channels
(
3
),
Channels
(
4
)),
testing
::
Values
(
Channels
(
1
),
Channels
(
3
),
Channels
(
4
)),
testing
::
Values
(
TemplateMethod
(
cv
::
TM_SQDIFF
),
TemplateMethod
(
cv
::
TM_CCORR
))));
testing
::
Values
(
TemplateMethod
(
cv
::
TM_SQDIFF
),
TemplateMethod
(
cv
::
TM_CCORR
))));
#endif
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment