Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0df6dc16
Commit
0df6dc16
authored
Apr 15, 2013
by
Andrey Kamaev
Committed by
OpenCV Buildbot
Apr 15, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #811 from pengx17:2.4_ocl_bfmatcher_newtype
parents
abe2ea59
6dd60135
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
172 additions
and
189 deletions
+172
-189
brute_force_matcher.cpp
modules/ocl/src/brute_force_matcher.cpp
+49
-93
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+107
-88
test_brute_force_matcher.cpp
modules/ocl/test/test_brute_force_matcher.cpp
+16
-8
No files found.
modules/ocl/src/brute_force_matcher.cpp
View file @
0df6dc16
...
...
@@ -64,11 +64,19 @@ namespace cv
static
const
int
OPT_SIZE
=
100
;
static
const
char
*
T_ARR
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float -D T_FLOAT"
,
"double"
};
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
...
@@ -78,7 +86,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
distType
,
block_size
,
m_size
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
...
...
@@ -96,7 +106,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
std
::
string
kernelName
=
"BruteForceMatch_UnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -110,7 +120,6 @@ template < int BLOCK_SIZE/*, typename Mask*/ >
void
match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
...
@@ -119,8 +128,9 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
distType
,
block_size
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -137,7 +147,7 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
std
::
string
kernelName
=
"BruteForceMatch_Match"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -152,7 +162,6 @@ template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
train
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
...
@@ -162,7 +171,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
distType
,
block_size
,
m_size
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
...
...
@@ -184,7 +195,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
std
::
string
kernelName
=
"BruteForceMatch_RadiusUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -193,7 +204,6 @@ template < int BLOCK_SIZE/*, typename Mask*/ >
void
radius_match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
train
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
...
@@ -202,7 +212,9 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
distType
,
block_size
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
...
...
@@ -224,7 +236,7 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
std
::
string
kernelName
=
"BruteForceMatch_RadiusMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -300,7 +312,9 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
distType
,
block_size
,
m_size
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
...
...
@@ -318,7 +332,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
std
::
string
kernelName
=
"BruteForceMatch_knnUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -334,7 +348,9 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
distType
,
block_size
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
...
...
@@ -352,7 +368,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
std
::
string
kernelName
=
"BruteForceMatch_knnMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -368,7 +384,10 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d"
,
distType
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -386,7 +405,7 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
std
::
string
kernelName
=
"BruteForceMatch_calcDistanceUnrolled"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -401,7 +420,10 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D DIST_TYPE=%d"
,
distType
);
sprintf
(
opt
,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d"
,
T_ARR
[
query
.
depth
()],
distType
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -418,7 +440,7 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
std
::
string
kernelName
=
"BruteForceMatch_calcDistance"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
,
opt
);
}
}
...
...
@@ -480,7 +502,7 @@ void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const o
//args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
trainIdx
.
depth
()
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
}
}
...
...
@@ -540,17 +562,6 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
if
(
query
.
empty
()
||
train
.
empty
())
return
;
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
if
(
callType
!=
5
)
CV_Error
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
{
CV_Error
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
());
...
...
@@ -605,7 +616,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, cons
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
vector
<
DMatch
>
&
matches
,
const
oclMat
&
mask
)
{
assert
(
mask
.
empty
());
// mask is not supported at the moment
assert
(
mask
.
empty
());
// mask is not supported at the moment
oclMat
trainIdx
,
distance
;
matchSingle
(
query
,
train
,
trainIdx
,
distance
,
mask
);
matchDownload
(
trainIdx
,
distance
,
matches
);
...
...
@@ -661,26 +672,14 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
if
(
query
.
empty
()
||
trainCollection
.
empty
())
return
;
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
if
(
callType
!=
5
)
CV_Error
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
{
CV_Error
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
const
int
nQuery
=
query
.
rows
;
const
int
nQuery
=
query
.
rows
;
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
trainIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32S
,
imgIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32F
,
distance
);
matchDispatcher
(
query
,
(
const
oclMat
*
)
trainCollection
.
ptr
(),
trainCollection
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
return
;
...
...
@@ -752,18 +751,6 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, co
if
(
query
.
empty
()
||
train
.
empty
())
return
;
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
if
(
callType
!=
5
)
CV_Error
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
{
CV_Error
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
...
...
@@ -860,26 +847,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &quer
typedef
void
(
*
caller_t
)(
const
oclMat
&
query
,
const
oclMat
&
trains
,
const
oclMat
&
masks
,
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
);
#if 0
static const caller_t callers[3][6] =
{
{
ocl_match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
ocl_match2L1_gpu<unsigned short>, ocl_match2L1_gpu<short>,
ocl_match2L1_gpu<int>, ocl_match2L1_gpu<float>
},
{
0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
0/*match2L2_gpu<int>*/, ocl_match2L2_gpu<float>
},
{
ocl_match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
ocl_match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
ocl_match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
}
};
#endif
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
const
int
nQuery
=
query
.
rows
;
...
...
@@ -1025,23 +993,11 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector<
// radiusMatchSingle
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
radiusMatchSingle
(
const
oclMat
&
query
,
const
oclMat
&
train
,
oclMat
&
trainIdx
,
oclMat
&
distance
,
oclMat
&
nMatches
,
float
maxDistance
,
const
oclMat
&
mask
)
oclMat
&
trainIdx
,
oclMat
&
distance
,
oclMat
&
nMatches
,
float
maxDistance
,
const
oclMat
&
mask
)
{
if
(
query
.
empty
()
||
train
.
empty
())
return
;
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
if
(
callType
!=
5
)
CV_Error
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
{
CV_Error
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
}
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
0df6dc16
...
...
@@ -47,6 +47,10 @@
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
define
MAX_FLOAT
3.40282e+038f
#
ifndef
T
#
define
T
float
#
endif
#
ifndef
BLOCK_SIZE
#
define
BLOCK_SIZE
16
#
endif
...
...
@@ -54,68 +58,85 @@
#
define
MAX_DESC_LEN
64
#
endif
int
bit1Count
(
float
x
)
{
int
c
=
0
;
int
ix
=
(
int
)
x
;
for
(
int
i
=
0
; i < 32 ; i++)
{
c
+=
ix
&
0x1
;
ix
>>=
1
;
}
return
(
float
)
c
;
}
#
ifndef
DIST_TYPE
#
define
DIST_TYPE
0
#
endif
#
if
(
DIST_TYPE
==
0
)
#
define
DIST
(
x,
y
)
fabs
((
x
)
-
(
y
)
)
#
elif
(
DIST_TYPE
==
1
)
#
define
DIST
(
x,
y
)
(((
x
)
-
(
y
))
*
((
x
)
-
(
y
)))
#
elif
(
DIST_TYPE
==
2
)
#
define
DIST
(
x,
y
)
bit1Count
((
uint
)(
x
)
^
(
uint
)(
y
))
#
endif
//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
int
bit1Count
(
int
v
)
{
v
=
v
-
((
v
>>
1
)
&
0x55555555
)
; // reuse input as temporary
v
=
(
v
&
0x33333333
)
+
((
v
>>
2
)
&
0x33333333
)
; // temp
return
((
v
+
(
v
>>
4
)
&
0xF0F0F0F
)
*
0x1010101
)
>>
24
; // count
}
//
dirty
fix
for
non-template
support
#
if
(
DIST_TYPE
==
0
)
//
L1Dist
#
ifdef
T_FLOAT
#
define
DIST
(
x,
y
)
fabs
((
x
)
-
(
y
))
typedef
float
value_type
;
typedef
float
result_type
;
#
else
#
define
DIST
(
x,
y
)
abs
((
x
)
-
(
y
))
typedef
int
value_type
;
typedef
int
result_type
;
#
endif
#
define
DIST_RES
(
x
)
(
x
)
#
elif
(
DIST_TYPE
==
1
)
//
L2Dist
#
define
DIST
(
x,
y
)
(((
x
)
-
(
y
))
*
((
x
)
-
(
y
)))
typedef
float
value_type
;
typedef
float
result_type
;
#
define
DIST_RES
(
x
)
sqrt
(
x
)
#
elif
(
DIST_TYPE
==
2
)
//
Hamming
#
define
DIST
(
x,
y
)
bit1Count
(
(
x
)
^
(
y
)
)
typedef
int
value_type
;
typedef
int
result_type
;
#
define
DIST_RES
(
x
)
(
x
)
#
endif
float
reduce_block
(
__local
float
*s_query,
__local
float
*s_train,
int
lidx,
int
lidy
)
result_type
reduce_block
(
__local
value_type
*s_query,
__local
value_type
*s_train,
int
lidx,
int
lidy
)
{
float
result
=
0
;
result_type
result
=
0
;
#
pragma
unroll
for
(
int
j
=
0
; j < BLOCK_SIZE ; j++)
{
result
+=
DIST
(
s_query[lidy
*
BLOCK_SIZE
+
j],
s_train[j
*
BLOCK_SIZE
+
lidx]
)
;
result
+=
DIST
(
s_query[lidy
*
BLOCK_SIZE
+
j],
s_train[j
*
BLOCK_SIZE
+
lidx]
)
;
}
return
result
;
return
DIST_RES
(
result
)
;
}
float
reduce_multi_block
(
__local
float
*s_query,
__local
float
*s_train,
int
block_index,
int
lidx,
int
lidy
)
result_type
reduce_multi_block
(
__local
value_type
*s_query,
__local
value_type
*s_train,
int
block_index,
int
lidx,
int
lidy
)
{
float
result
=
0
;
result_type
result
=
0
;
#
pragma
unroll
for
(
int
j
=
0
; j < BLOCK_SIZE ; j++)
{
result
+=
DIST
(
s_query[lidy
*
MAX_DESC_LEN
+
block_index
*
BLOCK_SIZE
+
j],
s_train[j
*
BLOCK_SIZE
+
lidx]
)
;
result
+=
DIST
(
s_query[lidy
*
MAX_DESC_LEN
+
block_index
*
BLOCK_SIZE
+
j],
s_train[j
*
BLOCK_SIZE
+
lidx]
)
;
}
return
result
;
return
DIST_RES
(
result
)
;
}
/*
2dim
launch,
global
size:
dim0
is
(
query
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE,
dim1
is
BLOCK_SIZE
local
size:
dim0
is
BLOCK_SIZE,
dim1
is
BLOCK_SIZE.
*/
__kernel
void
BruteForceMatch_UnrollMatch
_D5
(
__global
float
*query,
__global
float
*train,
__kernel
void
BruteForceMatch_UnrollMatch
(
__global
T
*query,
__global
T
*train,
//__global
float
*mask,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
...
...
@@ -127,13 +148,12 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
int
step
)
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
MAX_DESC_LEN
;
__local
value_type
*s_query
=
(
__local
value_type
*
)
sharebuffer
;
__local
value_type
*s_train
=
(
__local
value_type
*
)
sharebuffer
+
BLOCK_SIZE
*
MAX_DESC_LEN
;
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
//
load
the
query
into
local
memory.
...
...
@@ -151,7 +171,7 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
,
endt
=
(
train_rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
; t < endt; t++)
{
float
result
=
0
;
result_type
result
=
0
;
#
pragma
unroll
for
(
int
i
=
0
; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
...
...
@@ -207,9 +227,9 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
}
}
__kernel
void
BruteForceMatch_Match
_D5
(
__global
float
*query,
__global
float
*train,
__kernel
void
BruteForceMatch_Match
(
__global
T
*query,
__global
T
*train,
//__global
float
*mask,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
...
...
@@ -230,14 +250,13 @@ __kernel void BruteForceMatch_Match_D5(
float
myBestDistance
=
MAX_FLOAT
;
int
myBestTrainIdx
=
-1
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
__local
value_type
*s_query
=
(
__local
value_type
*
)
sharebuffer
;
__local
value_type
*s_train
=
(
__local
value_type
*
)
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
//
loop
for
(
int
t
=
0
; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
//Dist
dist
;
float
result
=
0
;
result_type
result
=
0
;
for
(
int
i
=
0
; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; i++)
{
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
...
...
@@ -299,9 +318,9 @@ __kernel void BruteForceMatch_Match_D5(
}
//radius_unrollmatch
__kernel
void
BruteForceMatch_RadiusUnrollMatch
_D5
(
__global
float
*query,
__global
float
*train,
__kernel
void
BruteForceMatch_RadiusUnrollMatch
(
__global
T
*query,
__global
T
*train,
float
maxDistance,
//__global
float
*mask,
__global
int
*bestTrainIdx,
...
...
@@ -325,10 +344,10 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
const
int
queryIdx
=
groupidy
*
BLOCK_SIZE
+
lidy
;
const
int
trainIdx
=
groupidx
*
BLOCK_SIZE
+
lidx
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
__local
value_type
*s_query
=
(
__local
value_type
*
)
sharebuffer
;
__local
value_type
*s_train
=
(
__local
value_type
*
)
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
float
result
=
0
;
result_type
result
=
0
;
for
(
int
i
=
0
; i < MAX_DESC_LEN / BLOCK_SIZE ; ++i)
{
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
...
...
@@ -345,7 +364,8 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
convert_float
(
result
)
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx/*,
(
unsigned
int
)
-1*/
)
;
...
...
@@ -359,9 +379,9 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
}
//radius_match
__kernel
void
BruteForceMatch_RadiusMatch
_D5
(
__global
float
*query,
__global
float
*train,
__kernel
void
BruteForceMatch_RadiusMatch
(
__global
T
*query,
__global
T
*train,
float
maxDistance,
//__global
float
*mask,
__global
int
*bestTrainIdx,
...
...
@@ -385,10 +405,10 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
const
int
queryIdx
=
groupidy
*
BLOCK_SIZE
+
lidy
;
const
int
trainIdx
=
groupidx
*
BLOCK_SIZE
+
lidx
;
__local
float
*s_query
=
sharebuffer
;
__local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
__local
value_type
*s_query
=
(
__local
value_type
*
)
sharebuffer
;
__local
value_type
*s_train
=
(
__local
value_type
*
)
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
float
result
=
0
;
result_type
result
=
0
;
for
(
int
i
=
0
; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
{
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
...
...
@@ -405,7 +425,8 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
convert_float
(
result
)
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx
)
;
...
...
@@ -419,9 +440,9 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
}
__kernel
void
BruteForceMatch_knnUnrollMatch
_D5
(
__global
float
*query,
__global
float
*train,
__kernel
void
BruteForceMatch_knnUnrollMatch
(
__global
T
*query,
__global
T
*train,
//__global
float
*mask,
__global
int2
*bestTrainIdx,
__global
float2
*bestDistance,
...
...
@@ -438,8 +459,8 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
local
float
*s_query
=
sharebuffer
;
local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
MAX_DESC_LEN
;
__local
value_type
*s_query
=
(
__local
value_type
*
)
sharebuffer
;
__local
value_type
*s_train
=
(
__local
value_type
*
)
sharebuffer
+
BLOCK_SIZE
*
MAX_DESC_LEN
;
//
load
the
query
into
local
memory.
for
(
int
i
=
0
; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
...
...
@@ -457,10 +478,9 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
float
result
=
0
;
result_type
result
=
0
;
for
(
int
i
=
0
; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
const
int
loadX
=
lidx
+
i
*
BLOCK_SIZE
;
//load
a
BLOCK_SIZE
*
BLOCK_SIZE
block
into
local
train.
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
s_train[lidx
*
BLOCK_SIZE
+
lidy]
=
loadx
<
train_cols
?
train[min
(
t
*
BLOCK_SIZE
+
lidy,
train_rows
-
1
)
*
(
step
/
sizeof
(
float
))
+
loadx]
:
0
;
...
...
@@ -494,8 +514,8 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
local
float
*s_distance
=
(
local
float
*
)
sharebuffer
;
local
int
*s_trainIdx
=
(
local
int
*
)(
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
)
;
__
local
float
*s_distance
=
(
local
float
*
)
sharebuffer
;
__
local
int
*s_trainIdx
=
(
local
int
*
)(
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
)
;
//
find
BestMatch
s_distance
+=
lidy
*
BLOCK_SIZE
;
...
...
@@ -565,9 +585,9 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
}
}
__kernel
void
BruteForceMatch_knnMatch
_D5
(
__global
float
*query,
__global
float
*train,
__kernel
void
BruteForceMatch_knnMatch
(
__global
T
*query,
__global
T
*train,
//__global
float
*mask,
__global
int2
*bestTrainIdx,
__global
float2
*bestDistance,
...
...
@@ -584,8 +604,8 @@ __kernel void BruteForceMatch_knnMatch_D5(
const
int
groupidx
=
get_group_id
(
0
)
;
const
int
queryIdx
=
groupidx
*
BLOCK_SIZE
+
lidy
;
local
float
*s_query
=
sharebuffer
;
local
float
*s_train
=
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
__local
value_type
*s_query
=
(
__local
value_type
*
)
sharebuffer
;
__local
value_type
*s_train
=
(
__local
value_type
*
)
sharebuffer
+
BLOCK_SIZE
*
BLOCK_SIZE
;
float
myBestDistance1
=
MAX_FLOAT
;
float
myBestDistance2
=
MAX_FLOAT
;
...
...
@@ -595,7 +615,7 @@ __kernel void BruteForceMatch_knnMatch_D5(
//loop
for
(
int
t
=
0
; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
float
result
=
0.0f
;
result_type
result
=
0.0f
;
for
(
int
i
=
0
; i < (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE ; i++)
{
const
int
loadx
=
lidx
+
i
*
BLOCK_SIZE
;
...
...
@@ -708,9 +728,9 @@ __kernel void BruteForceMatch_knnMatch_D5(
}
}
kernel
void
BruteForceMatch_calcDistanceUnrolled
_D5
(
__global
float
*query,
__global
float
*train,
kernel
void
BruteForceMatch_calcDistanceUnrolled
(
__global
T
*query,
__global
T
*train,
//__global
float
*mask,
__global
float
*allDist,
__local
float
*sharebuffer,
...
...
@@ -723,9 +743,9 @@ kernel void BruteForceMatch_calcDistanceUnrolled_D5(
/*
Todo
*/
}
kernel
void
BruteForceMatch_calcDistance
_D5
(
__global
float
*query,
__global
float
*train,
kernel
void
BruteForceMatch_calcDistance
(
__global
T
*query,
__global
T
*train,
//__global
float
*mask,
__global
float
*allDist,
__local
float
*sharebuffer,
...
...
@@ -738,7 +758,7 @@ kernel void BruteForceMatch_calcDistance_D5(
/*
Todo
*/
}
kernel
void
BruteForceMatch_findBestMatch
_D5
(
kernel
void
BruteForceMatch_findBestMatch
(
__global
float
*allDist,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
...
...
@@ -746,4 +766,4 @@ kernel void BruteForceMatch_findBestMatch_D5(
)
{
/*
Todo
*/
}
\ No newline at end of file
}
modules/ocl/test/test_brute_force_matcher.cpp
View file @
0df6dc16
...
...
@@ -158,11 +158,7 @@ namespace
TEST_P
(
BruteForceMatcher
,
RadiusMatch_Single
)
{
float
radius
;
if
(
distType
==
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
)
radius
=
1.
f
/
countFactor
/
countFactor
;
else
radius
=
1.
f
/
countFactor
;
float
radius
=
1.
f
/
countFactor
;
cv
::
ocl
::
BruteForceMatcher_OCL_base
matcher
(
distType
);
...
...
@@ -191,8 +187,20 @@ namespace
INSTANTIATE_TEST_CASE_P
(
OCL_Features2D
,
BruteForceMatcher
,
testing
::
Combine
(
testing
::
Values
(
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L1Dist
),
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
)),
testing
::
Values
(
DescriptorSize
(
57
),
DescriptorSize
(
64
),
DescriptorSize
(
83
),
DescriptorSize
(
128
),
DescriptorSize
(
179
),
DescriptorSize
(
256
),
DescriptorSize
(
304
))));
testing
::
Values
(
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L1Dist
),
DistType
(
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
L2Dist
)
/*,
DistType(cv::ocl::BruteForceMatcher_OCL_base::HammingDist)*/
),
testing
::
Values
(
DescriptorSize
(
57
),
DescriptorSize
(
64
),
DescriptorSize
(
83
),
DescriptorSize
(
128
),
DescriptorSize
(
179
),
DescriptorSize
(
256
),
DescriptorSize
(
304
))
)
);
}
// namespace
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment