Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
03e2a52e
Commit
03e2a52e
authored
Apr 12, 2013
by
Vadim Pisarevsky
Committed by
OpenCV Buildbot
Apr 12, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #807 from pengx17:2.4_ocl_bfm_opt
parents
3d39087a
2338a895
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
250 additions
and
253 deletions
+250
-253
brute_force_matcher.cpp
modules/ocl/src/brute_force_matcher.cpp
+71
-57
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+173
-168
test_brute_force_matcher.cpp
modules/ocl/test/test_brute_force_matcher.cpp
+6
-28
No files found.
modules/ocl/src/brute_force_matcher.cpp
View file @
03e2a52e
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
//
//
// @Authors
// @Authors
// Nathan, liujun@multicorewareinc.com
// Nathan, liujun@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// are permitted provided that the following conditions are met:
...
@@ -61,6 +62,8 @@ namespace cv
...
@@ -61,6 +62,8 @@ namespace cv
}
}
}
}
static
const
int
OPT_SIZE
=
100
;
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
...
@@ -74,6 +77,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
...
@@ -74,6 +77,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
int
m_size
=
MAX_DESC_LEN
;
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -82,18 +88,15 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
...
@@ -82,18 +88,15 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
m_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_UnrollMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_UnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -115,6 +118,9 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
...
@@ -115,6 +118,9 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
int
block_size
=
BLOCK_SIZE
;
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -123,17 +129,15 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
...
@@ -123,17 +129,15 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_Match"
;
std
::
string
kernelName
=
"BruteForceMatch_Match"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -157,6 +161,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
...
@@ -157,6 +161,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
int
m_size
=
MAX_DESC_LEN
;
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -167,8 +174,6 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
...
@@ -167,8 +174,6 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
nMatches
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
nMatches
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
m_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
@@ -176,11 +181,10 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
...
@@ -176,11 +181,10 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_RadiusUnrollMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_RadiusUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -197,6 +201,9 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
...
@@ -197,6 +201,9 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
int
block_size
=
BLOCK_SIZE
;
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -207,7 +214,6 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
...
@@ -207,7 +214,6 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
nMatches
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
nMatches
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
@@ -215,11 +221,10 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
...
@@ -215,11 +221,10 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trainIdx
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_RadiusMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_RadiusMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -294,6 +299,9 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
...
@@ -294,6 +299,9 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
int
m_size
=
MAX_DESC_LEN
;
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -302,18 +310,15 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
...
@@ -302,18 +310,15 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
m_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_knnUnrollMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_knnUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -328,6 +333,9 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
...
@@ -328,6 +333,9 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
int
block_size
=
BLOCK_SIZE
;
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -336,17 +344,15 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
...
@@ -336,17 +344,15 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_knnMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_knnMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -361,6 +367,8 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
...
@@ -361,6 +367,8 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
int
m_size
=
MAX_DESC_LEN
;
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d"
,
distType
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -375,11 +383,10 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
...
@@ -375,11 +383,10 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_calcDistanceUnrolled"
;
std
::
string
kernelName
=
"BruteForceMatch_calcDistanceUnrolled"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -393,6 +400,8 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
...
@@ -393,6 +400,8 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
int
block_size
=
BLOCK_SIZE
;
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d"
,
distType
);
if
(
globalSize
[
0
]
!=
0
)
if
(
globalSize
[
0
]
!=
0
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
@@ -406,11 +415,10 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
...
@@ -406,11 +415,10 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
distType
));
std
::
string
kernelName
=
"BruteForceMatch_calcDistance"
;
std
::
string
kernelName
=
"BruteForceMatch_calcDistance"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
}
}
...
@@ -534,24 +542,23 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
...
@@ -534,24 +542,23 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"singleMatch"
;
if
(
callType
!=
5
)
if
(
callType
!=
5
)
CV_E
RROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
||
callType
!=
2
||
callType
!=
4
)))
{
{
CV_E
RROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
());
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
());
trainIdx
.
create
(
1
,
query
.
rows
,
CV_32S
);
ensureSizeIsEnough
(
1
,
query
.
rows
,
CV_32S
,
trainIdx
);
distance
.
create
(
1
,
query
.
rows
,
CV_32F
);
ensureSizeIsEnough
(
1
,
query
.
rows
,
CV_32F
,
distance
);
matchDispatcher
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
matchDispatcher
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
exit
:
return
;
return
;
}
}
...
@@ -656,24 +663,26 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
...
@@ -656,24 +663,26 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"matchCollection"
;
if
(
callType
!=
5
)
if
(
callType
!=
5
)
CV_E
RROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
||
callType
!=
2
||
callType
!=
4
)))
{
{
CV_E
RROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
trainIdx
.
create
(
1
,
query
.
rows
,
CV_32S
);
const
int
nQuery
=
query
.
rows
;
imgIdx
.
create
(
1
,
query
.
rows
,
CV_32S
);
distance
.
create
(
1
,
query
.
rows
,
CV_32F
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
trainIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
imgIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32F
,
distance
);
matchDispatcher
(
query
,
(
const
oclMat
*
)
trainCollection
.
ptr
(),
trainCollection
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
matchDispatcher
(
query
,
(
const
oclMat
*
)
trainCollection
.
ptr
(),
trainCollection
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
exit
:
return
;
return
;
}
}
...
@@ -746,35 +755,37 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, co
...
@@ -746,35 +755,37 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, co
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"knnMatchSingle"
;
if
(
callType
!=
5
)
if
(
callType
!=
5
)
CV_E
RROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
||
callType
!=
2
||
callType
!=
4
)))
{
{
CV_E
RROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
if
(
k
==
2
)
if
(
k
==
2
)
{
{
trainIdx
.
create
(
1
,
query
.
rows
,
CV_32SC2
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
trainIdx
);
distance
.
create
(
1
,
query
.
rows
,
CV_32FC2
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32FC2
,
distance
);
}
}
else
else
{
{
trainIdx
.
create
(
query
.
rows
,
k
,
CV_32S
);
ensureSizeIsEnough
(
nQuery
,
k
,
CV_32S
,
trainIdx
);
distance
.
create
(
query
.
rows
,
k
,
CV_32F
);
ensureSizeIsEnough
(
nQuery
,
k
,
CV_32F
,
distance
);
allDist
.
create
(
query
.
rows
,
train
.
rows
,
CV_32FC1
);
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
allDist
);
}
}
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
kmatchDispatcher
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
distType
);
kmatchDispatcher
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
distType
);
exit
:
return
;
return
;
}
}
...
@@ -873,9 +884,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &quer
...
@@ -873,9 +884,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &quer
const
int
nQuery
=
query
.
rows
;
const
int
nQuery
=
query
.
rows
;
trainIdx
.
create
(
1
,
nQuery
,
CV_32SC2
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
trainIdx
);
imgIdx
.
create
(
1
,
nQuery
,
CV_32SC2
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
imgIdx
);
distance
.
create
(
1
,
nQuery
,
CV_32SC2
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32FC2
,
distance
);
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
...
@@ -1021,31 +1032,34 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query,
...
@@ -1021,31 +1032,34 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query,
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"radiusMatchSingle"
;
if
(
callType
!=
5
)
if
(
callType
!=
5
)
CV_E
RROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
||
callType
!=
2
||
callType
!=
4
)))
{
{
CV_E
RROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
CV_E
rror
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
}
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
query
.
rows
&&
trainIdx
.
size
()
==
distance
.
size
()));
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
query
.
rows
&&
trainIdx
.
size
()
==
distance
.
size
()));
nMatches
.
create
(
1
,
query
.
rows
,
CV_32SC1
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC1
,
nMatches
);
if
(
trainIdx
.
empty
())
if
(
trainIdx
.
empty
())
{
{
trainIdx
.
create
(
query
.
rows
,
std
::
max
((
train
.
rows
/
100
),
10
),
CV_32SC1
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32SC1
,
trainIdx
);
distance
.
create
(
query
.
rows
,
std
::
max
((
train
.
rows
/
100
),
10
),
CV_32FC1
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32FC1
,
distance
);
}
}
nMatches
.
setTo
(
Scalar
::
all
(
0
));
nMatches
.
setTo
(
Scalar
::
all
(
0
));
matchDispatcher
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
matchDispatcher
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
exit
:
return
;
return
;
}
}
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
03e2a52e
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Nathan,
liujun@multicorewareinc.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
oclMaterials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
"as is"
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
define
MAX_FLOAT
1e7f
#
define
MAX_FLOAT
3.40282e+038f
#
ifndef
BLOCK_SIZE
#
define
BLOCK_SIZE
16
#
endif
#
ifndef
MAX_DESC_LEN
#
define
MAX_DESC_LEN
64
#
endif
int
bit1Count
(
float
x
)
int
bit1Count
(
float
x
)
{
{
...
@@ -13,83 +66,52 @@ int bit1Count(float x)
...
@@ -13,83 +66,52 @@ int bit1Count(float x)
return
(
float
)
c
;
return
(
float
)
c
;
}
}
#
ifndef
DIST_TYPE
#
define
DIST_TYPE
0
#
endif
#
if
(
DIST_TYPE
==
0
)
#
define
DIST
(
x,
y
)
fabs
((
x
)
-
(
y
))
#
elif
(
DIST_TYPE
==
1
)
#
define
DIST
(
x,
y
)
(((
x
)
-
(
y
))
*
((
x
)
-
(
y
)))
#
elif
(
DIST_TYPE
==
2
)
#
define
DIST
(
x,
y
)
bit1Count
((
uint
)(
x
)
^
(
uint
)(
y
))
#
endif
float
reduce_block
(
__local
float
*s_query,
float
reduce_block
(
__local
float
*s_query,
__local
float
*s_train,
__local
float
*s_train,
int
block_size,
int
lidx,
int
lidx,
int
lidy,
int
lidy
int
distType
)
)
{
{
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
float
result
=
0
;
float
result
=
0
;
switch
(
distType
)
#
pragma
unroll
{
for
(
int
j
=
0
; j < BLOCK_SIZE ; j++)
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
{
result
+=
fabs
(
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
result
+=
DIST
(
s_query[lidy
*
BLOCK_SIZE
+
j],
s_train[j
*
BLOCK_SIZE
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
bit1Count
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[
(
uint
)
j
*
block_size
+
lidx]
)
;
}
break
;
}
}
return
result
;
return
result
;
}
}
float
reduce_multi_block
(
__local
float
*s_query,
float
reduce_multi_block
(
__local
float
*s_query,
__local
float
*s_train,
__local
float
*s_train,
int
max_desc_len,
int
block_size,
int
block_index,
int
block_index,
int
lidx,
int
lidx,
int
lidy,
int
lidy
int
distType
)
)
{
{
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
float
result
=
0
;
float
result
=
0
;
switch
(
distType
)
#
pragma
unroll
{
for
(
int
j
=
0
; j < BLOCK_SIZE ; j++)
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
{
result
+=
fabs
(
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
result
+=
DIST
(
s_query[lidy
*
MAX_DESC_LEN
+
block_index
*
BLOCK_SIZE
+
j],
s_train[j
*
BLOCK_SIZE
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
//result
+=
popcount
((
uint
)
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
result
+=
bit1Count
((
uint
)
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
}
}
return
result
;
return
result
;
}
}
/*
2dim
launch,
global
size:
dim0
is
(
query
rows
+
block_size
-
1
)
/
block_size
*
block_size,
dim1
is
block_size
/*
2dim
launch,
global
size:
dim0
is
(
query
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE,
dim1
is
BLOCK_SIZE
local
size:
dim0
is
block_size,
dim1
is
block_size
.
local
size:
dim0
is
BLOCK_SIZE,
dim1
is
BLOCK_SIZE
.
*/
*/
__kernel
void
BruteForceMatch_UnrollMatch_D5
(
__kernel
void
BruteForceMatch_UnrollMatch_D5
(
__global
float
*query,
__global
float
*query,
...
@@ -98,29 +120,28 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
...
@@ -98,29 +120,28 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
__global
int
*bestTrainIdx,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
__global
float
*bestDistance,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
step,
int
step
int
distType
)
)
{
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
max_desc_len
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
MAX_DESC_LEN
;
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
//
load
the
query
into
local
memory.
//
load
the
query
into
local
memory.
for
(
int
i
=
0
; i < max_desc_len / block_size; i ++)
#
pragma
unroll
for
(
int
i
=
0
; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{
{
int
loadx
=
lidx
+
i
*
block_size
;
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_query[lidy
*
max_desc_len
+
loadx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_query[lidy
*
MAX_DESC_LEN
+
loadx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
}
}
float
myBestDistance
=
MAX_FLOAT
;
float
myBestDistance
=
MAX_FLOAT
;
...
@@ -128,24 +149,25 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
...
@@ -128,24 +149,25 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
volatile
int
imgIdx
=
0
;
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size
; t++)
for
(
int
t
=
0
,
endt
=
(
train_rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
; t < endt
; t++)
{
{
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < max_desc_len / block_size ; i++)
#
pragma
unroll
for
(
int
i
=
0
; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
{
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_train[lidx
*
block_size
+
lidy]
=
loadx
<
train_cols
?
train[min
(
t
*
block_size
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
loadx
<
train_cols
?
train[min
(
t
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
max_desc_len,
block_size,
i,
lidx,
lidy,
distType
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
i,
lidx,
lidy
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
int
trainIdx
=
t
*
block_size
+
lidx
;
int
trainIdx
=
t
*
BLOCK_SIZE
+
lidx
;
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
...
@@ -157,18 +179,19 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
...
@@ -157,18 +179,19 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__local
float
*s_distance
=
(
__local
float*
)(
sharebuffer
)
;
__local
float
*s_distance
=
(
__local
float*
)(
sharebuffer
)
;
__local
int*
s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
block_size
*
block_size
)
;
__local
int*
s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
)
;
//find
BestMatch
//find
BestMatch
s_distance
+=
lidy
*
block_size
;
s_distance
+=
lidy
*
BLOCK_SIZE
;
s_trainIdx
+=
lidy
*
block_size
;
s_trainIdx
+=
lidy
*
BLOCK_SIZE
;
s_distance[lidx]
=
myBestDistance
;
s_distance[lidx]
=
myBestDistance
;
s_trainIdx[lidx]
=
myBestTrainIdx
;
s_trainIdx[lidx]
=
myBestTrainIdx
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//reduce
--
now
all
reduce
implement
in
each
threads.
//reduce
--
now
all
reduce
implement
in
each
threads.
for
(
int
k
=
0
; k < block_size; k++)
#
pragma
unroll
for
(
int
k
=
0
; k < BLOCK_SIZE; k++)
{
{
if
(
myBestDistance
>
s_distance[k]
)
if
(
myBestDistance
>
s_distance[k]
)
{
{
...
@@ -191,53 +214,51 @@ __kernel void BruteForceMatch_Match_D5(
...
@@ -191,53 +214,51 @@ __kernel void BruteForceMatch_Match_D5(
__global
int
*bestTrainIdx,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
__global
float
*bestDistance,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
step,
int
step
int
distType
)
)
{
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
const
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
float
myBestDistance
=
MAX_FLOAT
;
float
myBestDistance
=
MAX_FLOAT
;
int
myBestTrainIdx
=
-1
;
int
myBestTrainIdx
=
-1
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
//
loop
//
loop
for
(
int
t
=
0
; t < (train_rows +
block_size - 1) / block_size
; t++)
for
(
int
t
=
0
; t < (train_rows +
BLOCK_SIZE - 1) / BLOCK_SIZE
; t++)
{
{
//Dist
dist
;
//Dist
dist
;
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < (query_cols +
block_size - 1) / block_size
; i++)
for
(
int
i
=
0
; i < (query_cols +
BLOCK_SIZE - 1) / BLOCK_SIZE
; i++)
{
{
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
//load
query
and
train
into
local
memory
//load
query
and
train
into
local
memory
s_query[lidy
*
block_size
+
lidx]
=
0
;
s_query[lidy
*
BLOCK_SIZE
+
lidx]
=
0
;
s_train[lidx
*
block_size
+
lidy]
=
0
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
0
;
if
(
loadx
<
query_cols
)
if
(
loadx
<
query_cols
)
{
{
s_query[lidy
*
block_size
+
lidx]
=
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
s_query[lidy
*
BLOCK_SIZE
+
lidx]
=
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
s_train[lidx
*
block_size
+
lidy]
=
train[min
(
t
*
block_size
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
train[min
(
t
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
const
int
trainIdx
=
t
*
block_size
+
lidx
;
const
int
trainIdx
=
t
*
BLOCK_SIZE
+
lidx
;
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance
/*&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
myBestDistance
/*&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
...
@@ -250,18 +271,18 @@ __kernel void BruteForceMatch_Match_D5(
...
@@ -250,18 +271,18 @@ __kernel void BruteForceMatch_Match_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__local
float
*s_distance
=
(
__local
float
*
)
sharebuffer
;
__local
float
*s_distance
=
(
__local
float
*
)
sharebuffer
;
__local
int
*s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
block_size
*
block_size
)
;
__local
int
*s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
)
;
//findBestMatch
//findBestMatch
s_distance
+=
lidy
*
block_size
;
s_distance
+=
lidy
*
BLOCK_SIZE
;
s_trainIdx
+=
lidy
*
block_size
;
s_trainIdx
+=
lidy
*
BLOCK_SIZE
;
s_distance[lidx]
=
myBestDistance
;
s_distance[lidx]
=
myBestDistance
;
s_trainIdx[lidx]
=
myBestTrainIdx
;
s_trainIdx[lidx]
=
myBestTrainIdx
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//reduce
--
now
all
reduce
implement
in
each
threads.
//reduce
--
now
all
reduce
implement
in
each
threads.
for
(
int
k
=
0
; k <
block_size
; k++)
for
(
int
k
=
0
; k <
BLOCK_SIZE
; k++)
{
{
if
(
myBestDistance
>
s_distance[k]
)
if
(
myBestDistance
>
s_distance[k]
)
{
{
...
@@ -287,16 +308,13 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
...
@@ -287,16 +308,13 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
__global
float
*bestDistance,
__global
float
*bestDistance,
__global
int
*nMatches,
__global
int
*nMatches,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
bestTrainIdx_cols,
int
bestTrainIdx_cols,
int
step,
int
step,
int
ostep,
int
ostep
int
distType
)
)
{
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidx
=
get_local_id
(
0
)
;
...
@@ -304,25 +322,25 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
...
@@ -304,25 +322,25 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidy
=
get_group_id
(
1
)
;
const
int
groupidy
=
get_group_id
(
1
)
;
const
int
queryIdx
=
groupidy
*
block_size
+
lidy
;
const
int
queryIdx
=
groupidy
*
BLOCK_SIZE
+
lidy
;
const
int
trainIdx
=
groupidx
*
block_size
+
lidx
;
const
int
trainIdx
=
groupidx
*
BLOCK_SIZE
+
lidx
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i <
max_desc_len / block_size
; ++i)
for
(
int
i
=
0
; i <
MAX_DESC_LEN / BLOCK_SIZE
; ++i)
{
{
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_query[lidy
*
block_size
+
lidx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_query[lidy
*
BLOCK_SIZE
+
lidx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_train[lidx
*
block_size
+
lidy]
=
loadx
<
query_cols
?
train[min
(
groupidx
*
block_size
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
loadx
<
query_cols
?
train[min
(
groupidx
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -350,15 +368,13 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
...
@@ -350,15 +368,13 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
__global
float
*bestDistance,
__global
float
*bestDistance,
__global
int
*nMatches,
__global
int
*nMatches,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
bestTrainIdx_cols,
int
bestTrainIdx_cols,
int
step,
int
step,
int
ostep,
int
ostep
int
distType
)
)
{
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidx
=
get_local_id
(
0
)
;
...
@@ -366,25 +382,25 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
...
@@ -366,25 +382,25 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidy
=
get_group_id
(
1
)
;
const
int
groupidy
=
get_group_id
(
1
)
;
const
int
queryIdx
=
groupidy
*
block_size
+
lidy
;
const
int
queryIdx
=
groupidy
*
BLOCK_SIZE
+
lidy
;
const
int
trainIdx
=
groupidx
*
block_size
+
lidx
;
const
int
trainIdx
=
groupidx
*
BLOCK_SIZE
+
lidx
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < (query_cols +
block_size - 1) / block_size
; ++i)
for
(
int
i
=
0
; i < (query_cols +
BLOCK_SIZE - 1) / BLOCK_SIZE
; ++i)
{
{
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_query[lidy
*
block_size
+
lidx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_query[lidy
*
BLOCK_SIZE
+
lidx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_train[lidx
*
block_size
+
lidy]
=
loadx
<
query_cols
?
train[min
(
groupidx
*
block_size
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
loadx
<
query_cols
?
train[min
(
groupidx
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -410,29 +426,26 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
...
@@ -410,29 +426,26 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
__global
int2
*bestTrainIdx,
__global
int2
*bestTrainIdx,
__global
float2
*bestDistance,
__global
float2
*bestDistance,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
step,
int
step
int
distType
)
)
{
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
const
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
local
float
*s_query
=
sharebuffer
;
local
float
*s_query
=
sharebuffer
;
local
float
*s_train
=
sharebuffer
+
block_size
*
max_desc_len
;
local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
MAX_DESC_LEN
;
//
load
the
query
into
local
memory.
//
load
the
query
into
local
memory.
for
(
int
i
=
0
; i <
max_desc_len / block_size
; i ++)
for
(
int
i
=
0
; i <
MAX_DESC_LEN / BLOCK_SIZE
; i ++)
{
{
int
loadx
=
lidx
+
i
*
block_size
;
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_query[lidy
*
max_desc_len
+
loadx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_query[lidy
*
MAX_DESC_LEN
+
loadx]
=
loadx
<
query_cols
?
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
}
}
float
myBestDistance1
=
MAX_FLOAT
;
float
myBestDistance1
=
MAX_FLOAT
;
...
@@ -442,25 +455,25 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
...
@@ -442,25 +455,25 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
//loopUnrolledCached
//loopUnrolledCached
volatile
int
imgIdx
=
0
;
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows +
block_size - 1) / block_size
; t++)
for
(
int
t
=
0
; t < (train_rows +
BLOCK_SIZE - 1) / BLOCK_SIZE
; t++)
{
{
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i <
max_desc_len / block_size
; i++)
for
(
int
i
=
0
; i <
MAX_DESC_LEN / BLOCK_SIZE
; i++)
{
{
const
int
loadX
=
lidx
+
i
*
block_size
;
const
int
loadX
=
lidx
+
i
*
BLOCK_SIZE
;
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_train[lidx
*
block_size
+
lidy]
=
loadx
<
train_cols
?
train[min
(
t
*
block_size
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
loadx
<
train_cols
?
train[min
(
t
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
max_desc_len,
block_size,
i,
lidx,
lidy,
distType
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
i,
lidx,
lidy
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
const
int
trainIdx
=
t
*
block_size
+
lidx
;
const
int
trainIdx
=
t
*
BLOCK_SIZE
+
lidx
;
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
)
{
{
...
@@ -482,11 +495,11 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
...
@@ -482,11 +495,11 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
local
float
*s_distance
=
(
local
float
*
)
sharebuffer
;
local
float
*s_distance
=
(
local
float
*
)
sharebuffer
;
local
int
*s_trainIdx
=
(
local
int
*
)(
sharebuffer
+
block_size
*
block_size
)
;
local
int
*s_trainIdx
=
(
local
int
*
)(
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
)
;
//
find
BestMatch
//
find
BestMatch
s_distance
+=
lidy
*
block_size
;
s_distance
+=
lidy
*
BLOCK_SIZE
;
s_trainIdx
+=
lidy
*
block_size
;
s_trainIdx
+=
lidy
*
BLOCK_SIZE
;
s_distance[lidx]
=
myBestDistance1
;
s_distance[lidx]
=
myBestDistance1
;
s_trainIdx[lidx]
=
myBestTrainIdx1
;
s_trainIdx[lidx]
=
myBestTrainIdx1
;
...
@@ -499,7 +512,7 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
...
@@ -499,7 +512,7 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
if
(
lidx
==
0
)
if
(
lidx
==
0
)
{
{
for
(
int
i
=
0
; i <
block_size
; i++)
for
(
int
i
=
0
; i <
BLOCK_SIZE
; i++)
{
{
float
val
=
s_distance[i]
;
float
val
=
s_distance[i]
;
if
(
val
<
bestDistance1
)
if
(
val
<
bestDistance1
)
...
@@ -527,7 +540,7 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
...
@@ -527,7 +540,7 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
if
(
lidx
==
0
)
if
(
lidx
==
0
)
{
{
for
(
int
i
=
0
; i <
block_size
; i++)
for
(
int
i
=
0
; i <
BLOCK_SIZE
; i++)
{
{
float
val
=
s_distance[i]
;
float
val
=
s_distance[i]
;
...
@@ -559,22 +572,20 @@ __kernel void BruteForceMatch_knnMatch_D5(
...
@@ -559,22 +572,20 @@ __kernel void BruteForceMatch_knnMatch_D5(
__global
int2
*bestTrainIdx,
__global
int2
*bestTrainIdx,
__global
float2
*bestDistance,
__global
float2
*bestDistance,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
step,
int
step
int
distType
)
)
{
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
const
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
local
float
*s_query
=
sharebuffer
;
local
float
*s_query
=
sharebuffer
;
local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
float
myBestDistance1
=
MAX_FLOAT
;
float
myBestDistance1
=
MAX_FLOAT
;
float
myBestDistance2
=
MAX_FLOAT
;
float
myBestDistance2
=
MAX_FLOAT
;
...
@@ -582,30 +593,30 @@ __kernel void BruteForceMatch_knnMatch_D5(
...
@@ -582,30 +593,30 @@ __kernel void BruteForceMatch_knnMatch_D5(
int
myBestTrainIdx2
=
-1
;
int
myBestTrainIdx2
=
-1
;
//loop
//loop
for
(
int
t
=
0
; t < (train_rows +
block_size - 1) / block_size
; t++)
for
(
int
t
=
0
; t < (train_rows +
BLOCK_SIZE - 1) / BLOCK_SIZE
; t++)
{
{
float
result
=
0.0f
;
float
result
=
0.0f
;
for
(
int
i
=
0
; i < (query_cols +
block_size -1) / block_size
; i++)
for
(
int
i
=
0
; i < (query_cols +
BLOCK_SIZE -1) / BLOCK_SIZE
; i++)
{
{
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
//load
query
and
train
into
local
memory
//load
query
and
train
into
local
memory
s_query[lidy
*
block_size
+
lidx]
=
0
;
s_query[lidy
*
BLOCK_SIZE
+
lidx]
=
0
;
s_train[lidx
*
block_size
+
lidy]
=
0
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
0
;
if
(
loadx
<
query_cols
)
if
(
loadx
<
query_cols
)
{
{
s_query[lidy
*
block_size
+
lidx]
=
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
s_query[lidy
*
BLOCK_SIZE
+
lidx]
=
query[min
(
queryIdx,
query_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
s_train[lidx
*
block_size
+
lidy]
=
train[min
(
t
*
block_size
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
train[min
(
t
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
const
int
trainIdx
=
t
*
block_size
+
lidx
;
const
int
trainIdx
=
t
*
BLOCK_SIZE
+
lidx
;
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
/*&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
/*&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
...
@@ -627,11 +638,11 @@ __kernel void BruteForceMatch_knnMatch_D5(
...
@@ -627,11 +638,11 @@ __kernel void BruteForceMatch_knnMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__local
float
*s_distance
=
(
__local
float
*
)
sharebuffer
;
__local
float
*s_distance
=
(
__local
float
*
)
sharebuffer
;
__local
int
*s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
block_size
*
block_size
)
;
__local
int
*s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
)
;
//findBestMatch
//findBestMatch
s_distance
+=
lidy
*
block_size
;
s_distance
+=
lidy
*
BLOCK_SIZE
;
s_trainIdx
+=
lidy
*
block_size
;
s_trainIdx
+=
lidy
*
BLOCK_SIZE
;
s_distance[lidx]
=
myBestDistance1
;
s_distance[lidx]
=
myBestDistance1
;
s_trainIdx[lidx]
=
myBestTrainIdx1
;
s_trainIdx[lidx]
=
myBestTrainIdx1
;
...
@@ -644,7 +655,7 @@ __kernel void BruteForceMatch_knnMatch_D5(
...
@@ -644,7 +655,7 @@ __kernel void BruteForceMatch_knnMatch_D5(
if
(
lidx
==
0
)
if
(
lidx
==
0
)
{
{
for
(
int
i
=
0
; i <
block_size
; i++)
for
(
int
i
=
0
; i <
BLOCK_SIZE
; i++)
{
{
float
val
=
s_distance[i]
;
float
val
=
s_distance[i]
;
if
(
val
<
bestDistance1
)
if
(
val
<
bestDistance1
)
...
@@ -672,7 +683,7 @@ __kernel void BruteForceMatch_knnMatch_D5(
...
@@ -672,7 +683,7 @@ __kernel void BruteForceMatch_knnMatch_D5(
if
(
lidx
==
0
)
if
(
lidx
==
0
)
{
{
for
(
int
i
=
0
; i <
block_size
; i++)
for
(
int
i
=
0
; i <
BLOCK_SIZE
; i++)
{
{
float
val
=
s_distance[i]
;
float
val
=
s_distance[i]
;
...
@@ -703,14 +714,11 @@ kernel void BruteForceMatch_calcDistanceUnrolled_D5(
...
@@ -703,14 +714,11 @@ kernel void BruteForceMatch_calcDistanceUnrolled_D5(
//__global
float
*mask,
//__global
float
*mask,
__global
float
*allDist,
__global
float
*allDist,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
step,
int
step
)
int
distType
)
{
{
/*
Todo
*/
/*
Todo
*/
}
}
...
@@ -721,13 +729,11 @@ kernel void BruteForceMatch_calcDistance_D5(
...
@@ -721,13 +729,11 @@ kernel void BruteForceMatch_calcDistance_D5(
//__global
float
*mask,
//__global
float
*mask,
__global
float
*allDist,
__global
float
*allDist,
__local
float
*sharebuffer,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_rows,
int
query_cols,
int
query_cols,
int
train_rows,
int
train_rows,
int
train_cols,
int
train_cols,
int
step,
int
step
)
int
distType
)
{
{
/*
Todo
*/
/*
Todo
*/
}
}
...
@@ -736,8 +742,7 @@ kernel void BruteForceMatch_findBestMatch_D5(
...
@@ -736,8 +742,7 @@ kernel void BruteForceMatch_findBestMatch_D5(
__global
float
*allDist,
__global
float
*allDist,
__global
int
*bestTrainIdx,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
__global
float
*bestDistance,
int
k,
int
k
int
block_size
)
)
{
{
/*
Todo
*/
/*
Todo
*/
...
...
modules/ocl/test/test_brute_force_matcher.cpp
View file @
03e2a52e
...
@@ -43,16 +43,14 @@
...
@@ -43,16 +43,14 @@
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
namespace
namespace
{
{
/////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
// BruteForceMatcher
// BruteForceMatcher
CV_ENUM
(
DistType
,
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L1Dist
,
\
CV_ENUM
(
DistType
,
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L1Dist
,
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
,
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
HammingDist
)
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
,
\
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
HammingDist
)
IMPLEMENT_PARAM_CLASS
(
DescriptorSize
,
int
)
IMPLEMENT_PARAM_CLASS
(
DescriptorSize
,
int
)
PARAM_TEST_CASE
(
BruteForceMatcher
,
DistType
,
DescriptorSize
)
PARAM_TEST_CASE
(
BruteForceMatcher
/*, NormCode*/
,
DistType
,
DescriptorSize
)
{
{
//std::vector<cv::ocl::Info> oclinfo;
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
DistType
distType
;
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
DistType
distType
;
int
normCode
;
int
normCode
;
int
dim
;
int
dim
;
...
@@ -64,13 +62,9 @@ namespace
...
@@ -64,13 +62,9 @@ namespace
virtual
void
SetUp
()
virtual
void
SetUp
()
{
{
//normCode = GET_PARAM(0);
distType
=
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
DistType
)(
int
)
GET_PARAM
(
0
);
distType
=
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
DistType
)(
int
)
GET_PARAM
(
0
);
dim
=
GET_PARAM
(
1
);
dim
=
GET_PARAM
(
1
);
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
//CV_Assert(devnums > 0);
queryDescCount
=
300
;
// must be even number because we split train data in some cases in two
queryDescCount
=
300
;
// must be even number because we split train data in some cases in two
countFactor
=
4
;
// do not change it
countFactor
=
4
;
// do not change it
...
@@ -172,21 +166,6 @@ namespace
...
@@ -172,21 +166,6 @@ namespace
cv
::
ocl
::
BruteForceMatcher_OCL_base
matcher
(
distType
);
cv
::
ocl
::
BruteForceMatcher_OCL_base
matcher
(
distType
);
// assume support atomic.
//if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
//{
// try
// {
// std::vector< std::vector<cv::DMatch> > matches;
// matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
// }
// catch (const cv::Exception& e)
// {
// ASSERT_EQ(CV_StsNotImplemented, e.code);
// }
//}
//else
{
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
std
::
vector
<
std
::
vector
<
cv
::
DMatch
>
>
matches
;
matcher
.
radiusMatch
(
cv
::
ocl
::
oclMat
(
query
),
cv
::
ocl
::
oclMat
(
train
),
matches
,
radius
);
matcher
.
radiusMatch
(
cv
::
ocl
::
oclMat
(
query
),
cv
::
ocl
::
oclMat
(
train
),
matches
,
radius
);
...
@@ -209,10 +188,9 @@ namespace
...
@@ -209,10 +188,9 @@ namespace
ASSERT_EQ
(
0
,
badCount
);
ASSERT_EQ
(
0
,
badCount
);
}
}
}
INSTANTIATE_TEST_CASE_P
(
GPU_Features2D
,
BruteForceMatcher
,
testing
::
Combine
(
INSTANTIATE_TEST_CASE_P
(
OCL_Features2D
,
BruteForceMatcher
,
//ALL_DEVICES,
testing
::
Combine
(
testing
::
Values
(
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L1Dist
),
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
)),
testing
::
Values
(
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L1Dist
),
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
)),
testing
::
Values
(
DescriptorSize
(
57
),
DescriptorSize
(
64
),
DescriptorSize
(
83
),
DescriptorSize
(
128
),
DescriptorSize
(
179
),
DescriptorSize
(
256
),
DescriptorSize
(
304
))));
testing
::
Values
(
DescriptorSize
(
57
),
DescriptorSize
(
64
),
DescriptorSize
(
83
),
DescriptorSize
(
128
),
DescriptorSize
(
179
),
DescriptorSize
(
256
),
DescriptorSize
(
304
))));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment