Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ad6aae45
Commit
ad6aae45
authored
Mar 26, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
more fix of mismatch functions on CPU OCL
parent
2c06e59a
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
231 additions
and
798 deletions
+231
-798
brute_force_matcher.cpp
modules/ocl/src/brute_force_matcher.cpp
+119
-573
haar.cpp
modules/ocl/src/haar.cpp
+2
-2
moments.cpp
modules/ocl/src/moments.cpp
+3
-3
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+97
-218
haarobjectdetect.cl
modules/ocl/src/opencl/haarobjectdetect.cl
+8
-0
test_brute_force_matcher.cpp
modules/ocl/test/test_brute_force_matcher.cpp
+2
-2
No files found.
modules/ocl/src/brute_force_matcher.cpp
View file @
ad6aae45
...
@@ -44,6 +44,7 @@
...
@@ -44,6 +44,7 @@
//M*/
//M*/
#include "precomp.hpp"
#include "precomp.hpp"
#include <functional>
#include <functional>
#include <iterator>
#include <iterator>
#include <vector>
#include <vector>
...
@@ -60,10 +61,11 @@ namespace cv
...
@@ -60,10 +61,11 @@ namespace cv
}
}
}
}
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
globalSize
[]
=
{(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
@@ -91,20 +93,21 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
...
@@ -91,20 +93,21 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
std
::
string
kernelName
=
"BruteForceMatch_UnrollMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_UnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
matchUnrolledCached
(
const
oclMat
/*query*/
,
const
oclMat
*
/*trains*/
,
int
/*n*/
,
const
oclMat
/*mask*/
,
void
matchUnrolledCached
(
const
oclMat
/*query*/
,
const
oclMat
*
/*trains*/
,
int
/*n*/
,
const
oclMat
/*mask*/
,
const
oclMat
&
/*bestTrainIdx*/
,
const
oclMat
&
/*bestImgIdx*/
,
const
oclMat
&
/*bestDistance*/
,
int
/*distType*/
)
const
oclMat
&
/*bestTrainIdx*/
,
const
oclMat
&
/*bestImgIdx*/
,
const
oclMat
&
/*bestDistance*/
,
int
/*distType*/
)
{
{
}
}
template
<
int
BLOCK_SIZE
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
/*, typename Mask*/
>
void
match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
void
match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
globalSize
[]
=
{(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
@@ -130,21 +133,22 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
...
@@ -130,21 +133,22 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
std
::
string
kernelName
=
"BruteForceMatch_Match"
;
std
::
string
kernelName
=
"BruteForceMatch_Match"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
template
<
int
BLOCK_SIZE
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
/*, typename Mask*/
>
void
match
(
const
oclMat
/*query*/
,
const
oclMat
*
/*trains*/
,
int
/*n*/
,
const
oclMat
/*mask*/
,
void
match
(
const
oclMat
/*query*/
,
const
oclMat
*
/*trains*/
,
int
/*n*/
,
const
oclMat
/*mask*/
,
const
oclMat
&
/*bestTrainIdx*/
,
const
oclMat
&
/*bestImgIdx*/
,
const
oclMat
&
/*bestDistance*/
,
int
/*distType*/
)
const
oclMat
&
/*bestTrainIdx*/
,
const
oclMat
&
/*bestImgIdx*/
,
const
oclMat
&
/*bestDistance*/
,
int
/*distType*/
)
{
{
}
}
//radius_matchUnrolledCached
//radius_matchUnrolledCached
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
/*mask*/
,
void
matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
train
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
1
};
size_t
globalSize
[]
=
{(
train
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
@@ -176,15 +180,16 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
...
@@ -176,15 +180,16 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
std
::
string
kernelName
=
"BruteForceMatch_RadiusUnrollMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_RadiusUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
//radius_match
//radius_match
template
<
int
BLOCK_SIZE
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
/*, typename Mask*/
>
void
radius_match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
/*mask*/
,
void
radius_match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
{
assert
(
query
.
type
()
==
CV_32F
);
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
size_t
globalSize
[]
=
{(
train
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
1
};
size_t
globalSize
[]
=
{(
train
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
(
query
.
rows
+
BLOCK_SIZE
-
1
)
/
BLOCK_SIZE
*
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
size_t
localSize
[]
=
{
BLOCK_SIZE
,
BLOCK_SIZE
,
1
};
...
@@ -214,263 +219,70 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
...
@@ -214,263 +219,70 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
std
::
string
kernelName
=
"BruteForceMatch_RadiusMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_RadiusMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
//float *dis = (float *)clEnqueueMapBuffer(ctx->impl->clCmdQueue, (cl_mem)distance.data, CL_TRUE, CL_MAP_READ, 0, 8, 0, NULL, NULL, NULL);
//printf("%f, %f\n", dis[0], dis[1]);
}
}
}
}
// with mask
static
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
const
oclMat
zeroMask
;
const
oclMat
&
tempMask
=
mask
.
data
?
mask
:
zeroMask
;
if
(
query
.
cols
<=
64
)
if
(
query
.
cols
<=
64
)
{
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
matchUnrolledCached
<
16
,
64
>
(
query
,
train
,
tempMask
,
trainIdx
,
distance
,
distType
);
}
else
if
(
query
.
cols
<=
128
)
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream);
}*/
else
{
match
<
16
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
}
// without mask
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
oclMat
mask
;
if
(
query
.
cols
<=
64
)
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
}
else
if
(
query
.
cols
<=
128
)
else
if
(
query
.
cols
<=
128
)
{
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
matchUnrolledCached
<
16
,
128
>
(
query
,
train
,
tempMask
,
trainIdx
,
distance
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance);
}
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance);
}*/
else
else
{
{
match
<
16
,
T
>
(
query
,
train
,
m
ask
,
trainIdx
,
distance
,
distType
);
match
<
16
>
(
query
,
train
,
tempM
ask
,
trainIdx
,
distance
,
distType
);
}
}
}
}
template
<
typename
T
/*, typename Mask*/
>
static
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
*
trains
,
int
n
,
const
oclMat
&
mask
,
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
*
trains
,
int
n
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
const
oclMat
zeroMask
;
const
oclMat
&
tempMask
=
mask
.
data
?
mask
:
zeroMask
;
if
(
query
.
cols
<=
64
)
if
(
query
.
cols
<=
64
)
{
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
trains
,
n
,
m
ask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
matchUnrolledCached
<
16
,
64
>
(
query
,
trains
,
n
,
tempM
ask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
}
else
if
(
query
.
cols
<=
128
)
else
if
(
query
.
cols
<=
128
)
{
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
trains
,
n
,
mask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
matchUnrolledCached
<
16
,
128
>
(
query
,
trains
,
n
,
tempMask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}*/
else
else
{
{
match
<
16
,
T
>
(
query
,
trains
,
n
,
mask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
match
<
16
>
(
query
,
trains
,
n
,
tempMask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
}
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
*
trains
,
int
n
,
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
,
int
distType
)
{
oclMat
mask
;
if
(
query
.
cols
<=
64
)
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
trains
,
n
,
mask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
else
if
(
query
.
cols
<=
128
)
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
trains
,
n
,
mask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}*/
else
{
match
<
16
,
T
>
(
query
,
trains
,
n
,
mask
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
}
}
}
//radius matchDispatcher
//radius matchDispatcher
// with mask
static
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
mask
,
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
{
const
oclMat
zeroMask
;
const
oclMat
&
tempMask
=
mask
.
data
?
mask
:
zeroMask
;
if
(
query
.
cols
<=
64
)
if
(
query
.
cols
<=
64
)
{
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
maxDistance
,
m
ask
,
trainIdx
,
distance
,
nMatches
,
distType
);
matchUnrolledCached
<
16
,
64
>
(
query
,
train
,
maxDistance
,
tempM
ask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
}
else
if
(
query
.
cols
<=
128
)
else
if
(
query
.
cols
<=
128
)
{
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
matchUnrolledCached
<
16
,
128
>
(
query
,
train
,
maxDistance
,
tempMask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}*/
else
{
radius_match
<
16
,
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
}
// without mask
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
oclMat
mask
;
if
(
query
.
cols
<=
64
)
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
else
if
(
query
.
cols
<=
128
)
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}*/
else
else
{
{
radius_match
<
16
,
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
radius_match
<
16
>
(
query
,
train
,
maxDistance
,
tempMask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
}
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
n
,
float
maxDistance
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
if
(
query
.
cols
<=
64
)
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
n
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
else
if
(
query
.
cols
<=
128
)
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
n
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}*/
else
{
match
<
16
,
T
>
(
query
,
train
,
n
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
}
// without mask
template
<
typename
T
/*, typename Mask*/
>
void
matchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
n
,
float
maxDistance
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
int
distType
)
{
oclMat
mask
;
if
(
query
.
cols
<=
64
)
{
matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
n
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
else
if
(
query
.
cols
<=
128
)
{
matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
n
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}*/
else
{
match
<
16
,
T
>
(
query
,
train
,
n
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
}
}
}
//knn match Dispatcher
//knn match Dispatcher
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
knn_matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
void
knn_matchUnrolledCached
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
...
@@ -501,11 +313,11 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
...
@@ -501,11 +313,11 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
std
::
string
kernelName
=
"BruteForceMatch_knnUnrollMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_knnUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
template
<
int
BLOCK_SIZE
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
/*, typename Mask*/
>
void
knn_match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
void
knn_match
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
...
@@ -534,11 +346,11 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
...
@@ -534,11 +346,11 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
std
::
string
kernelName
=
"BruteForceMatch_knnMatch"
;
std
::
string
kernelName
=
"BruteForceMatch_knnMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
,
int
MAX_DESC_LEN
/*, typename Mask*/
>
void
calcDistanceUnrolled
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
allDist
,
int
distType
)
void
calcDistanceUnrolled
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
allDist
,
int
distType
)
{
{
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
...
@@ -567,11 +379,11 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
...
@@ -567,11 +379,11 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
std
::
string
kernelName
=
"BruteForceMatch_calcDistanceUnrolled"
;
std
::
string
kernelName
=
"BruteForceMatch_calcDistanceUnrolled"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
template
<
int
BLOCK_SIZE
,
typename
T
/*, typename Mask*/
>
template
<
int
BLOCK_SIZE
/*, typename Mask*/
>
void
calcDistance
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
allDist
,
int
distType
)
void
calcDistance
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
/*mask*/
,
const
oclMat
&
allDist
,
int
distType
)
{
{
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
cv
::
ocl
::
Context
*
ctx
=
query
.
clCxt
;
...
@@ -598,69 +410,43 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
...
@@ -598,69 +410,43 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
std
::
string
kernelName
=
"BruteForceMatch_calcDistance"
;
std
::
string
kernelName
=
"BruteForceMatch_calcDistance"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
);
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Calc Distance dispatcher
// Calc Distance dispatcher
template
<
typename
T
/*, typename Mask*/
>
static
void
calcDistanceDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
void
calcDistanceDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
const
oclMat
&
allDist
,
int
distType
)
const
oclMat
&
allDist
,
int
distType
)
{
{
if
(
query
.
cols
<=
64
)
if
(
query
.
cols
<=
64
)
{
{
calcDistanceUnrolled
<
16
,
64
,
T
>
(
query
,
train
,
mask
,
allDist
,
distType
);
calcDistanceUnrolled
<
16
,
64
>
(
query
,
train
,
mask
,
allDist
,
distType
);
}
}
else
if
(
query
.
cols
<=
128
)
else
if
(
query
.
cols
<=
128
)
{
{
calcDistanceUnrolled
<
16
,
128
,
T
>
(
query
,
train
,
mask
,
allDist
,
distType
);
calcDistanceUnrolled
<
16
,
128
>
(
query
,
train
,
mask
,
allDist
,
distType
);
}
}
/*else if (query.cols <= 256)
{
calcDistanceUnrolled<16, 256, Dist>(query, train, mask, allDist, stream);
}
else if (query.cols <= 512)
{
calcDistanceUnrolled<16, 512, Dist>(query, train, mask, allDist, stream);
}
else if (query.cols <= 1024)
{
calcDistanceUnrolled<16, 1024, Dist>(query, train, mask, allDist, stream);
}*/
else
else
{
{
calcDistance
<
16
,
T
>
(
query
,
train
,
mask
,
allDist
,
distType
);
calcDistance
<
16
>
(
query
,
train
,
mask
,
allDist
,
distType
);
}
}
}
}
template
<
typename
T
/*, typename Mask*/
>
static
void
match2Dispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
void
match2Dispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
int
distType
)
{
{
if
(
query
.
cols
<=
64
)
if
(
query
.
cols
<=
64
)
{
{
knn_matchUnrolledCached
<
16
,
64
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
knn_matchUnrolledCached
<
16
,
64
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
}
else
if
(
query
.
cols
<=
128
)
else
if
(
query
.
cols
<=
128
)
{
{
knn_matchUnrolledCached
<
16
,
128
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
knn_matchUnrolledCached
<
16
,
128
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
}
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
}*/
else
else
{
{
knn_match
<
16
,
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
knn_match
<
16
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
}
}
}
...
@@ -686,7 +472,7 @@ void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const o
...
@@ -686,7 +472,7 @@ void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const o
//args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
trainIdx
.
depth
()
,
-
1
);
}
}
}
}
...
@@ -695,206 +481,22 @@ static void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &
...
@@ -695,206 +481,22 @@ static void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &
findKnnMatch
<
256
>
(
k
,
trainIdx
,
distance
,
allDist
,
distType
);
findKnnMatch
<
256
>
(
k
,
trainIdx
,
distance
,
allDist
,
distType
);
}
}
//with mask
static
void
kmatchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
mask
,
template
<
typename
T
/*, typename Mask*/
>
void
kmatchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
,
int
distType
)
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
,
int
distType
)
{
{
const
oclMat
zeroMask
;
const
oclMat
&
tempMask
=
mask
.
data
?
mask
:
zeroMask
;
if
(
k
==
2
)
if
(
k
==
2
)
{
{
match2Dispatcher
<
T
>
(
query
,
train
,
m
ask
,
trainIdx
,
distance
,
distType
);
match2Dispatcher
(
query
,
train
,
tempM
ask
,
trainIdx
,
distance
,
distType
);
}
}
else
else
{
{
calcDistanceDispatcher
<
T
>
(
query
,
train
,
m
ask
,
allDist
,
distType
);
calcDistanceDispatcher
(
query
,
train
,
tempM
ask
,
allDist
,
distType
);
findKnnMatchDispatcher
(
k
,
trainIdx
,
distance
,
allDist
,
distType
);
findKnnMatchDispatcher
(
k
,
trainIdx
,
distance
,
allDist
,
distType
);
}
}
}
}
//without mask
template
<
typename
T
/*, typename Mask*/
>
void
kmatchDispatcher
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
,
int
distType
)
{
oclMat
mask
;
if
(
k
==
2
)
{
match2Dispatcher
<
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
else
{
calcDistanceDispatcher
<
T
>
(
query
,
train
,
mask
,
allDist
,
distType
);
findKnnMatchDispatcher
(
k
,
trainIdx
,
distance
,
allDist
,
distType
);
}
}
template
<
typename
T
>
void
ocl_matchL1_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
)
{
int
distType
=
0
;
if
(
mask
.
data
)
{
matchDispatcher
<
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
else
{
matchDispatcher
<
T
>
(
query
,
train
,
trainIdx
,
distance
,
distType
);
}
}
template
<
typename
T
>
void
ocl_matchL1_gpu
(
const
oclMat
&
query
,
const
oclMat
&
trains
,
const
oclMat
&
masks
,
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
)
{
int
distType
=
0
;
if
(
masks
.
data
)
{
matchDispatcher
<
T
>
(
query
,
(
const
oclMat
*
)
trains
.
ptr
(),
trains
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
else
{
matchDispatcher
<
T
>
(
query
,
(
const
oclMat
*
)
trains
.
ptr
(),
trains
.
cols
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
}
template
<
typename
T
>
void
ocl_matchL2_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
)
{
int
distType
=
1
;
if
(
mask
.
data
)
{
matchDispatcher
<
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
else
{
matchDispatcher
<
T
>
(
query
,
train
,
trainIdx
,
distance
,
distType
);
}
}
template
<
typename
T
>
void
ocl_matchL2_gpu
(
const
oclMat
&
query
,
const
oclMat
&
trains
,
const
oclMat
&
masks
,
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
)
{
int
distType
=
1
;
if
(
masks
.
data
)
{
matchDispatcher
<
T
>
(
query
,
(
const
oclMat
*
)
trains
.
ptr
(),
trains
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
else
{
matchDispatcher
<
T
>
(
query
,
(
const
oclMat
*
)
trains
.
ptr
(),
trains
.
cols
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
}
template
<
typename
T
>
void
ocl_matchHamming_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
)
{
int
distType
=
2
;
if
(
mask
.
data
)
{
matchDispatcher
<
T
>
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
}
else
{
matchDispatcher
<
T
>
(
query
,
train
,
trainIdx
,
distance
,
distType
);
}
}
template
<
typename
T
>
void
ocl_matchHamming_gpu
(
const
oclMat
&
query
,
const
oclMat
&
trains
,
const
oclMat
&
masks
,
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
)
{
int
distType
=
2
;
if
(
masks
.
data
)
{
matchDispatcher
<
T
>
(
query
,
(
const
oclMat
*
)
trains
.
ptr
(),
trains
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
else
{
matchDispatcher
<
T
>
(
query
,
(
const
oclMat
*
)
trains
.
ptr
(),
trains
.
cols
,
trainIdx
,
imgIdx
,
distance
,
distType
);
}
}
// knn caller
template
<
typename
T
>
void
ocl_matchL1_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
)
{
int
distType
=
0
;
if
(
mask
.
data
)
kmatchDispatcher
<
T
>
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
distType
);
else
kmatchDispatcher
<
T
>
(
query
,
train
,
k
,
trainIdx
,
distance
,
allDist
,
distType
);
}
template
<
typename
T
>
void
ocl_matchL2_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
)
{
int
distType
=
1
;
if
(
mask
.
data
)
kmatchDispatcher
<
T
>
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
distType
);
else
kmatchDispatcher
<
T
>
(
query
,
train
,
k
,
trainIdx
,
distance
,
allDist
,
distType
);
}
template
<
typename
T
>
void
ocl_matchHamming_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
)
{
int
distType
=
2
;
if
(
mask
.
data
)
kmatchDispatcher
<
T
>
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
distType
);
else
kmatchDispatcher
<
T
>
(
query
,
train
,
k
,
trainIdx
,
distance
,
allDist
,
distType
);
}
//radius caller
template
<
typename
T
>
void
ocl_matchL1_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
)
{
int
distType
=
0
;
if
(
mask
.
data
)
matchDispatcher
<
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
else
matchDispatcher
<
T
>
(
query
,
train
,
maxDistance
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
template
<
typename
T
>
void
ocl_matchL2_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
)
{
int
distType
=
1
;
if
(
mask
.
data
)
matchDispatcher
<
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
else
matchDispatcher
<
T
>
(
query
,
train
,
maxDistance
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
template
<
typename
T
>
void
ocl_matchHamming_gpu
(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
mask
,
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
)
{
int
distType
=
2
;
if
(
mask
.
data
)
matchDispatcher
<
T
>
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
else
matchDispatcher
<
T
>
(
query
,
train
,
maxDistance
,
trainIdx
,
distance
,
nMatches
,
distType
);
}
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
BruteForceMatcher_OCL_base
(
DistType
distType_
)
:
distType
(
distType_
)
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
BruteForceMatcher_OCL_base
(
DistType
distType_
)
:
distType
(
distType_
)
{
{
}
}
...
@@ -929,38 +531,28 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
...
@@ -929,38 +531,28 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
{
{
if
(
query
.
empty
()
||
train
.
empty
())
if
(
query
.
empty
()
||
train
.
empty
())
return
;
return
;
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"singleMatch"
;
if
(
callType
!=
5
)
CV_ERROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
typedef
void
(
*
caller_t
)(
const
oclMat
&
query
,
const
oclMat
&
train
,
const
oclMat
&
mask
,
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
);
||
callType
!=
2
||
callType
!=
4
)))
static
const
caller_t
callers
[
3
][
6
]
=
{
{
{
CV_ERROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
ocl_matchL1_gpu
<
unsigned
char
>
,
0
/*ocl_matchL1_gpu<signed char>*/
,
}
ocl_matchL1_gpu
<
unsigned
short
>
,
ocl_matchL1_gpu
<
short
>
,
ocl_matchL1_gpu
<
int
>
,
ocl_matchL1_gpu
<
float
>
},
{
0
/*ocl_matchL2_gpu<unsigned char>*/
,
0
/*ocl_matchL2_gpu<signed char>*/
,
0
/*ocl_matchL2_gpu<unsigned short>*/
,
0
/*ocl_matchL2_gpu<short>*/
,
0
/*ocl_matchL2_gpu<int>*/
,
ocl_matchL2_gpu
<
float
>
},
{
ocl_matchHamming_gpu
<
unsigned
char
>
,
0
/*ocl_matchHamming_gpu<signed char>*/
,
ocl_matchHamming_gpu
<
unsigned
short
>
,
0
/*ocl_matchHamming_gpu<short>*/
,
ocl_matchHamming_gpu
<
int
>
,
0
/*ocl_matchHamming_gpu<float>*/
}
};
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
());
CV_Assert
(
train
.
cols
==
query
.
cols
&&
train
.
type
()
==
query
.
type
());
const
int
nQuery
=
query
.
rows
;
trainIdx
.
create
(
1
,
query
.
rows
,
CV_32S
);
trainIdx
.
create
(
1
,
nQuery
,
CV_32S
);
distance
.
create
(
1
,
query
.
rows
,
CV_32F
);
distance
.
create
(
1
,
nQuery
,
CV_32F
);
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
matchDispatcher
(
query
,
train
,
mask
,
trainIdx
,
distance
,
distType
);
func
(
query
,
train
,
mask
,
trainIdx
,
distance
);
exit
:
return
;
}
}
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
matchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
vector
<
DMatch
>
&
matches
)
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
matchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
vector
<
DMatch
>
&
matches
)
...
@@ -1062,40 +654,27 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
...
@@ -1062,40 +654,27 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
if
(
query
.
empty
()
||
trainCollection
.
empty
())
if
(
query
.
empty
()
||
trainCollection
.
empty
())
return
;
return
;
typedef
void
(
*
caller_t
)(
const
oclMat
&
query
,
const
oclMat
&
trains
,
const
oclMat
&
masks
,
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
);
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"matchCollection"
;
if
(
callType
!=
5
)
CV_ERROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
static
const
caller_t
callers
[
3
][
6
]
=
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
{
{
{
CV_ERROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
ocl_matchL1_gpu
<
unsigned
char
>
,
0
/*matchL1_gpu<signed char>*/
,
}
ocl_matchL1_gpu
<
unsigned
short
>
,
ocl_matchL1_gpu
<
short
>
,
ocl_matchL1_gpu
<
int
>
,
ocl_matchL1_gpu
<
float
>
},
{
0
/*matchL2_gpu<unsigned char>*/
,
0
/*matchL2_gpu<signed char>*/
,
0
/*matchL2_gpu<unsigned short>*/
,
0
/*matchL2_gpu<short>*/
,
0
/*matchL2_gpu<int>*/
,
ocl_matchL2_gpu
<
float
>
},
{
ocl_matchHamming_gpu
<
unsigned
char
>
,
0
/*matchHamming_gpu<signed char>*/
,
ocl_matchHamming_gpu
<
unsigned
short
>
,
0
/*matchHamming_gpu<short>*/
,
ocl_matchHamming_gpu
<
int
>
,
0
/*matchHamming_gpu<float>*/
}
};
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
const
int
nQuery
=
query
.
rows
;
trainIdx
.
create
(
1
,
query
.
rows
,
CV_32S
);
imgIdx
.
create
(
1
,
query
.
rows
,
CV_32S
);
trainIdx
.
create
(
1
,
nQuery
,
CV_32S
);
distance
.
create
(
1
,
query
.
rows
,
CV_32F
);
imgIdx
.
create
(
1
,
nQuery
,
CV_32S
);
distance
.
create
(
1
,
nQuery
,
CV_32F
);
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
CV_Assert
(
func
!=
0
);
func
(
query
,
trainCollection
,
masks
,
trainIdx
,
imgIdx
,
distance
);
matchDispatcher
(
query
,
(
const
oclMat
*
)
trainCollection
.
ptr
(),
trainCollection
.
cols
,
masks
,
trainIdx
,
imgIdx
,
distance
,
distType
);
exit
:
return
;
}
}
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
matchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
,
vector
<
DMatch
>
&
matches
)
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
matchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
imgIdx
,
const
oclMat
&
distance
,
vector
<
DMatch
>
&
matches
)
...
@@ -1164,52 +743,39 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, co
...
@@ -1164,52 +743,39 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, co
if
(
query
.
empty
()
||
train
.
empty
())
if
(
query
.
empty
()
||
train
.
empty
())
return
;
return
;
typedef
void
(
*
caller_t
)(
const
oclMat
&
query
,
const
oclMat
&
train
,
int
k
,
const
oclMat
&
mask
,
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
allDist
);
int
callType
=
query
.
depth
(
);
static
const
caller_t
callers
[
3
][
6
]
=
char
cvFuncName
[]
=
"knnMatchSingle"
;
if
(
callType
!=
5
)
CV_ERROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
||
callType
!=
2
||
callType
!=
4
)))
{
{
{
CV_ERROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
ocl_matchL1_gpu
<
unsigned
char
>
,
0
/*ocl_matchL1_gpu<signed char>*/
,
}
ocl_matchL1_gpu
<
unsigned
short
>
,
ocl_matchL1_gpu
<
short
>
,
ocl_matchL1_gpu
<
int
>
,
ocl_matchL1_gpu
<
float
>
},
{
0
/*ocl_matchL2_gpu<unsigned char>*/
,
0
/*ocl_matchL2_gpu<signed char>*/
,
0
/*ocl_matchL2_gpu<unsigned short>*/
,
0
/*ocl_matchL2_gpu<short>*/
,
0
/*ocl_matchL2_gpu<int>*/
,
ocl_matchL2_gpu
<
float
>
},
{
ocl_matchHamming_gpu
<
unsigned
char
>
,
0
/*ocl_matchHamming_gpu<signed char>*/
,
ocl_matchHamming_gpu
<
unsigned
short
>
,
0
/*ocl_matchHamming_gpu<short>*/
,
ocl_matchHamming_gpu
<
int
>
,
0
/*ocl_matchHamming_gpu<float>*/
}
};
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
if
(
k
==
2
)
if
(
k
==
2
)
{
{
trainIdx
.
create
(
1
,
nQuery
,
CV_32SC2
);
trainIdx
.
create
(
1
,
query
.
rows
,
CV_32SC2
);
distance
.
create
(
1
,
nQuery
,
CV_32FC2
);
distance
.
create
(
1
,
query
.
rows
,
CV_32FC2
);
}
}
else
else
{
{
trainIdx
.
create
(
nQuery
,
k
,
CV_32S
);
trainIdx
.
create
(
query
.
rows
,
k
,
CV_32S
);
distance
.
create
(
nQuery
,
k
,
CV_32F
);
distance
.
create
(
query
.
rows
,
k
,
CV_32F
);
allDist
.
create
(
nQuery
,
nTrain
,
CV_32FC1
);
allDist
.
create
(
query
.
rows
,
train
.
rows
,
CV_32FC1
);
}
}
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
kmatchDispatcher
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
,
distType
);
CV_Assert
(
func
!=
0
);
exit
:
return
;
func
(
query
,
train
,
k
,
mask
,
trainIdx
,
distance
,
allDist
);
}
}
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
knnMatchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
vector
<
vector
<
DMatch
>
>
&
matches
,
bool
compactResult
)
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
knnMatchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
vector
<
vector
<
DMatch
>
>
&
matches
,
bool
compactResult
)
...
@@ -1394,8 +960,6 @@ namespace
...
@@ -1394,8 +960,6 @@ namespace
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
knnMatch
(
const
oclMat
&
query
,
vector
<
vector
<
DMatch
>
>
&
matches
,
int
k
,
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
knnMatch
(
const
oclMat
&
query
,
vector
<
vector
<
DMatch
>
>
&
matches
,
int
k
,
const
vector
<
oclMat
>
&
masks
,
bool
compactResult
)
const
vector
<
oclMat
>
&
masks
,
bool
compactResult
)
{
{
if
(
k
==
2
)
if
(
k
==
2
)
{
{
oclMat
trainCollection
;
oclMat
trainCollection
;
...
@@ -1455,50 +1019,34 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query,
...
@@ -1455,50 +1019,34 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query,
if
(
query
.
empty
()
||
train
.
empty
())
if
(
query
.
empty
()
||
train
.
empty
())
return
;
return
;
typedef
void
(
*
caller_t
)(
const
oclMat
&
query
,
const
oclMat
&
train
,
float
maxDistance
,
const
oclMat
&
mask
,
// match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
);
int
callType
=
query
.
depth
();
char
cvFuncName
[]
=
"radiusMatchSingle"
;
if
(
callType
!=
5
)
CV_ERROR
(
CV_UNSUPPORTED_FORMAT_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
//#if
0
if
((
distType
==
0
&&
callType
==
1
)
||
(
distType
==
1
&&
callType
!=
5
)
||
(
distType
==
2
&&
(
callType
!=
0
static
const
caller_t
callers
[
3
][
6
]
=
||
callType
!=
2
||
callType
!=
4
)))
{
{
{
CV_ERROR
(
CV_UNSUPPORTED_DEPTH_ERR
,
"BruteForceMatch OpenCL only support float type query!
\n
"
);
ocl_matchL1_gpu
<
unsigned
char
>
,
0
/*ocl_matchL1_gpu<signed char>*/
,
}
ocl_matchL1_gpu
<
unsigned
short
>
,
ocl_matchL1_gpu
<
short
>
,
ocl_matchL1_gpu
<
int
>
,
ocl_matchL1_gpu
<
float
>
},
{
0
/*ocl_matchL2_gpu<unsigned char>*/
,
0
/*ocl_matchL2_gpu<signed char>*/
,
0
/*ocl_matchL2_gpu<unsigned short>*/
,
0
/*ocl_matchL2_gpu<short>*/
,
0
/*ocl_matchL2_gpu<int>*/
,
ocl_matchL2_gpu
<
float
>
},
{
ocl_matchHamming_gpu
<
unsigned
char
>
,
0
/*ocl_matchHamming_gpu<signed char>*/
,
ocl_matchHamming_gpu
<
unsigned
short
>
,
0
/*ocl_matchHamming_gpu<short>*/
,
ocl_matchHamming_gpu
<
int
>
,
0
/*ocl_matchHamming_gpu<float>*/
}
};
//#endif
const
int
nQuery
=
query
.
rows
;
const
int
nTrain
=
train
.
rows
;
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
query
.
channels
()
==
1
&&
query
.
depth
()
<
CV_64F
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
train
.
type
()
==
query
.
type
()
&&
train
.
cols
==
query
.
cols
);
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
nQuery
&&
trainIdx
.
size
()
==
distance
.
size
()));
CV_Assert
(
trainIdx
.
empty
()
||
(
trainIdx
.
rows
==
query
.
rows
&&
trainIdx
.
size
()
==
distance
.
size
()));
nMatches
.
create
(
1
,
nQuery
,
CV_32SC1
);
nMatches
.
create
(
1
,
query
.
rows
,
CV_32SC1
);
if
(
trainIdx
.
empty
())
if
(
trainIdx
.
empty
())
{
{
trainIdx
.
create
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32SC1
);
trainIdx
.
create
(
query
.
rows
,
std
::
max
((
train
.
rows
/
100
),
10
),
CV_32SC1
);
distance
.
create
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32FC1
);
distance
.
create
(
query
.
rows
,
std
::
max
((
train
.
rows
/
100
),
10
),
CV_32FC1
);
}
}
nMatches
.
setTo
(
Scalar
::
all
(
0
));
nMatches
.
setTo
(
Scalar
::
all
(
0
));
caller_t
func
=
callers
[
distType
][
query
.
depth
()];
matchDispatcher
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
,
distType
);
//CV_Assert(func != 0);
exit
:
//func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
return
;
func
(
query
,
train
,
maxDistance
,
mask
,
trainIdx
,
distance
,
nMatches
);
}
}
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
radiusMatchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
void
cv
::
ocl
::
BruteForceMatcher_OCL_base
::
radiusMatchDownload
(
const
oclMat
&
trainIdx
,
const
oclMat
&
distance
,
const
oclMat
&
nMatches
,
...
@@ -1697,5 +1245,3 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vecto
...
@@ -1697,5 +1245,3 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vecto
radiusMatchCollection
(
query
,
trainIdx
,
imgIdx
,
distance
,
nMatches
,
maxDistance
,
masks
);
radiusMatchCollection
(
query
,
trainIdx
,
imgIdx
,
distance
,
nMatches
,
maxDistance
,
masks
);
radiusMatchDownload
(
trainIdx
,
imgIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
radiusMatchDownload
(
trainIdx
,
imgIdx
,
distance
,
nMatches
,
matches
,
compactResult
);
}
}
modules/ocl/src/haar.cpp
View file @
ad6aae45
...
@@ -953,8 +953,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -953,8 +953,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
//int flag = 0;
//int flag = 0;
oclMat
gimg1
(
gimg
.
rows
,
gimg
.
cols
,
CV_8UC1
);
oclMat
gimg1
(
gimg
.
rows
,
gimg
.
cols
,
CV_8UC1
);
oclMat
gsum
(
totalheight
,
gimg
.
cols
+
1
,
CV_32SC1
);
oclMat
gsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32SC1
);
oclMat
gsqsum
(
totalheight
,
gimg
.
cols
+
1
,
CV_32FC1
);
oclMat
gsqsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32FC1
);
//cl_mem cascadebuffer;
//cl_mem cascadebuffer;
cl_mem
stagebuffer
;
cl_mem
stagebuffer
;
...
...
modules/ocl/src/moments.cpp
View file @
ad6aae45
...
@@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
...
@@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
bool
is_float
=
CV_SEQ_ELTYPE
(
contour
)
==
CV_32FC2
;
bool
is_float
=
CV_SEQ_ELTYPE
(
contour
)
==
CV_32FC2
;
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
impl
->
double_support
&&
is_float
)
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
is_float
)
{
{
CV_Error
(
CV_StsUnsupportedFormat
,
"Moments - double is not supported by your GPU!"
);
CV_Error
(
CV_StsUnsupportedFormat
,
"Moments - double is not supported by your GPU!"
);
}
}
...
@@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
...
@@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
cv
::
Mat
dst
(
dst_a
);
cv
::
Mat
dst
(
dst_a
);
a00
=
a10
=
a01
=
a20
=
a11
=
a02
=
a30
=
a21
=
a12
=
a03
=
0.0
;
a00
=
a10
=
a01
=
a20
=
a11
=
a02
=
a30
=
a21
=
a12
=
a03
=
0.0
;
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
impl
->
double_support
)
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
supportsFeature
(
Context
::
CL_DOUBLE
)
)
{
{
for
(
int
i
=
0
;
i
<
contour
->
total
;
++
i
)
for
(
int
i
=
0
;
i
<
contour
->
total
;
++
i
)
{
{
...
@@ -161,7 +161,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
...
@@ -161,7 +161,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
a12
+=
dst
.
at
<
cl_long
>
(
8
,
i
);
a12
+=
dst
.
at
<
cl_long
>
(
8
,
i
);
a03
+=
dst
.
at
<
cl_long
>
(
9
,
i
);
a03
+=
dst
.
at
<
cl_long
>
(
9
,
i
);
}
}
}
}
else
else
{
{
a00
=
cv
::
sum
(
dst
.
row
(
0
))[
0
];
a00
=
cv
::
sum
(
dst
.
row
(
0
))[
0
];
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
ad6aae45
...
@@ -5,19 +5,93 @@ int bit1Count(float x)
...
@@ -5,19 +5,93 @@ int bit1Count(float x)
{
{
int
c
=
0
;
int
c
=
0
;
int
ix
=
(
int
)
x
;
int
ix
=
(
int
)
x
;
for
(
int
i
=
0
; i < 32 ; i++)
for
(
int
i
=
0
; i < 32 ; i++)
{
{
c
+=
ix
&
0x1
;
c
+=
ix
&
0x1
;
ix
>>=
1
;
ix
>>=
1
;
}
}
return
(
float
)
c
;
return
(
float
)
c
;
}
}
float
reduce_block
(
__local
float
*s_query,
__local
float
*s_train,
int
block_size,
int
lidx,
int
lidy,
int
distType
)
{
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
float
result
=
0
;
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
fabs
(
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
bit1Count
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[
(
uint
)
j
*
block_size
+
lidx]
)
;
}
break
;
}
return
result
;
}
float
reduce_multi_block
(
__local
float
*s_query,
__local
float
*s_train,
int
max_desc_len,
int
block_size,
int
block_index,
int
lidx,
int
lidy,
int
distType
)
{
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
float
result
=
0
;
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
fabs
(
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
//result
+=
popcount
((
uint
)
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
result
+=
bit1Count
((
uint
)
s_query[lidy
*
max_desc_len
+
block_index
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
}
return
result
;
}
/*
2dim
launch,
global
size:
dim0
is
(
query
rows
+
block_size
-
1
)
/
block_size
*
block_size,
dim1
is
block_size
/*
2dim
launch,
global
size:
dim0
is
(
query
rows
+
block_size
-
1
)
/
block_size
*
block_size,
dim1
is
block_size
local
size:
dim0
is
block_size,
dim1
is
block_size.
local
size:
dim0
is
block_size,
dim1
is
block_size.
*/
*/
__kernel
void
BruteForceMatch_UnrollMatch
(
__kernel
void
BruteForceMatch_UnrollMatch
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
//__global
float
*mask,
//__global
float
*mask,
...
@@ -42,7 +116,6 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -42,7 +116,6 @@ __kernel void BruteForceMatch_UnrollMatch(
__local
float
*s_train
=
sharebuffer
+
block_size
*
max_desc_len
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
max_desc_len
;
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
//
load
the
query
into
local
memory.
//
load
the
query
into
local
memory.
for
(
int
i
=
0
; i < max_desc_len / block_size; i ++)
for
(
int
i
=
0
; i < max_desc_len / block_size; i ++)
{
{
...
@@ -55,11 +128,9 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -55,11 +128,9 @@ __kernel void BruteForceMatch_UnrollMatch(
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
volatile
int
imgIdx
=
0
;
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size ; t++)
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size ; t++)
{
{
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < max_desc_len / block_size ; i++)
for
(
int
i
=
0
; i < max_desc_len / block_size ; i++)
{
{
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
block_size
*
block_size
block
into
local
train.
...
@@ -69,38 +140,7 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -69,38 +140,7 @@ __kernel void BruteForceMatch_UnrollMatch(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
result
+=
reduce_multi_block
(
s_query,
s_train,
max_desc_len,
block_size,
i,
lidx,
lidy,
distType
)
;
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
fabs
(
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
//result
+=
popcount
((
uint
)
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
result
+=
bit1Count
((
uint
)
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -116,8 +156,8 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -116,8 +156,8 @@ __kernel void BruteForceMatch_UnrollMatch(
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__local
float
*s_distance
=
(
__local
float
*
)(
sharebuffer
)
;
__local
float
*s_distance
=
(
__local
float*
)(
sharebuffer
)
;
__local
int
*
s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
block_size
*
block_size
)
;
__local
int
*
s_trainIdx
=
(
__local
int
*
)(
sharebuffer
+
block_size
*
block_size
)
;
//find
BestMatch
//find
BestMatch
s_distance
+=
lidy
*
block_size
;
s_distance
+=
lidy
*
block_size
;
...
@@ -144,7 +184,7 @@ __kernel void BruteForceMatch_UnrollMatch(
...
@@ -144,7 +184,7 @@ __kernel void BruteForceMatch_UnrollMatch(
}
}
}
}
__kernel
void
BruteForceMatch_Match
(
__kernel
void
BruteForceMatch_Match
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
//__global
float
*mask,
//__global
float
*mask,
...
@@ -177,7 +217,6 @@ __kernel void BruteForceMatch_Match(
...
@@ -177,7 +217,6 @@ __kernel void BruteForceMatch_Match(
{
{
//Dist
dist
;
//Dist
dist
;
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < (query_cols + block_size - 1) / block_size ; i++)
for
(
int
i
=
0
; i < (query_cols + block_size - 1) / block_size ; i++)
{
{
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
block_size
;
...
@@ -193,38 +232,7 @@ __kernel void BruteForceMatch_Match(
...
@@ -193,38 +232,7 @@ __kernel void BruteForceMatch_Match(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
fabs
(
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
//result
+=
popcount
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
result
+=
bit1Count
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[
(
uint
)
j
*
block_size
+
lidx]
)
;
}
break
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -270,7 +278,7 @@ __kernel void BruteForceMatch_Match(
...
@@ -270,7 +278,7 @@ __kernel void BruteForceMatch_Match(
}
}
//radius_unrollmatch
//radius_unrollmatch
__kernel
void
BruteForceMatch_RadiusUnrollMatch
(
__kernel
void
BruteForceMatch_RadiusUnrollMatch
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
float
maxDistance,
float
maxDistance,
...
@@ -303,7 +311,6 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
...
@@ -303,7 +311,6 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < max_desc_len / block_size ; ++i)
for
(
int
i
=
0
; i < max_desc_len / block_size ; ++i)
{
{
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
block_size
*
block_size
block
into
local
train.
...
@@ -315,37 +322,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
...
@@ -315,37 +322,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
there
are
three
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; ++j)
{
result
+=
fabs
(
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; ++j)
{
float
qr
=
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; ++j)
{
result
+=
bit1Count
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -354,7 +331,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
...
@@ -354,7 +331,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
{
{
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx/*,
(
unsigned
int
)
-1*/
)
;
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx/*,
(
unsigned
int
)
-1*/
)
;
if
(
ind
<
bestTrainIdx_cols
)
if
(
ind
<
bestTrainIdx_cols
)
{
{
//bestImgIdx
=
imgIdx
;
//bestImgIdx
=
imgIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
...
@@ -364,7 +341,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
...
@@ -364,7 +341,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
}
}
//radius_match
//radius_match
__kernel
void
BruteForceMatch_RadiusMatch
(
__kernel
void
BruteForceMatch_RadiusMatch
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
float
maxDistance,
float
maxDistance,
...
@@ -396,7 +373,6 @@ __kernel void BruteForceMatch_RadiusMatch(
...
@@ -396,7 +373,6 @@ __kernel void BruteForceMatch_RadiusMatch(
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
__local
float
*s_train
=
sharebuffer
+
block_size
*
block_size
;
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < (query_cols + block_size - 1) / block_size ; ++i)
for
(
int
i
=
0
; i < (query_cols + block_size - 1) / block_size ; ++i)
{
{
//load
a
block_size
*
block_size
block
into
local
train.
//load
a
block_size
*
block_size
block
into
local
train.
...
@@ -408,46 +384,16 @@ __kernel void BruteForceMatch_RadiusMatch(
...
@@ -408,46 +384,16 @@ __kernel void BruteForceMatch_RadiusMatch(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
there
are
three
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; ++j)
{
result
+=
fabs
(
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; ++j)
{
float
qr
=
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; ++j)
{
result
+=
bit1Count
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
if
(
queryIdx
<
query_rows
&&
trainIdx
<
train_rows
&&
result
<
maxDistance/*
&&
mask
(
queryIdx,
trainIdx
)
*/
)
{
{
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx
/*,
(
unsigned
int
)
-1*/
)
;
unsigned
int
ind
=
atom_inc
(
nMatches
+
queryIdx
)
;
if
(
ind
<
bestTrainIdx_cols
)
if
(
ind
<
bestTrainIdx_cols
)
{
{
//bestImgIdx
=
imgIdx
;
//bestImgIdx
=
imgIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
bestTrainIdx[queryIdx
*
(
ostep
/
sizeof
(
int
))
+
ind]
=
trainIdx
;
...
@@ -457,7 +403,7 @@ __kernel void BruteForceMatch_RadiusMatch(
...
@@ -457,7 +403,7 @@ __kernel void BruteForceMatch_RadiusMatch(
}
}
__kernel
void
BruteForceMatch_knnUnrollMatch
(
__kernel
void
BruteForceMatch_knnUnrollMatch
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
//__global
float
*mask,
//__global
float
*mask,
...
@@ -496,11 +442,9 @@ __kernel void BruteForceMatch_knnUnrollMatch(
...
@@ -496,11 +442,9 @@ __kernel void BruteForceMatch_knnUnrollMatch(
//loopUnrolledCached
//loopUnrolledCached
volatile
int
imgIdx
=
0
;
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size ; t++)
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size ; t++)
{
{
float
result
=
0
;
float
result
=
0
;
for
(
int
i
=
0
; i < max_desc_len / block_size ; i++)
for
(
int
i
=
0
; i < max_desc_len / block_size ; i++)
{
{
const
int
loadX
=
lidx
+
i
*
block_size
;
const
int
loadX
=
lidx
+
i
*
block_size
;
...
@@ -511,38 +455,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
...
@@ -511,38 +455,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
result
+=
reduce_multi_block
(
s_query,
s_train,
max_desc_len,
block_size,
i,
lidx,
lidy,
distType
)
;
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
fabs
(
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
//result
+=
popcount
((
uint
)
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
result
+=
bit1Count
((
uint
)
s_query[lidy
*
max_desc_len
+
i
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -589,7 +502,6 @@ __kernel void BruteForceMatch_knnUnrollMatch(
...
@@ -589,7 +502,6 @@ __kernel void BruteForceMatch_knnUnrollMatch(
for
(
int
i
=
0
; i < block_size ; i++)
for
(
int
i
=
0
; i < block_size ; i++)
{
{
float
val
=
s_distance[i]
;
float
val
=
s_distance[i]
;
if
(
val
<
bestDistance1
)
if
(
val
<
bestDistance1
)
{
{
bestDistance2
=
bestDistance1
;
bestDistance2
=
bestDistance1
;
...
@@ -640,7 +552,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
...
@@ -640,7 +552,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
}
}
}
}
__kernel
void
BruteForceMatch_knnMatch
(
__kernel
void
BruteForceMatch_knnMatch
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
//__global
float
*mask,
//__global
float
*mask,
...
@@ -673,8 +585,7 @@ __kernel void BruteForceMatch_knnMatch(
...
@@ -673,8 +585,7 @@ __kernel void BruteForceMatch_knnMatch(
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size ; t++)
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size ; t++)
{
{
float
result
=
0.0f
;
float
result
=
0.0f
;
for
(
int
i
=
0
; i < (query_cols + block_size -1) / block_size ; i++)
for
(
int
i
=
0
; i < (query_cols + block_size - 1) / block_size ; i++)
{
{
const
int
loadx
=
lidx
+
i
*
block_size
;
const
int
loadx
=
lidx
+
i
*
block_size
;
//load
query
and
train
into
local
memory
//load
query
and
train
into
local
memory
...
@@ -689,38 +600,7 @@ __kernel void BruteForceMatch_knnMatch(
...
@@ -689,38 +600,7 @@ __kernel void BruteForceMatch_knnMatch(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
/*
there
are
threee
types
in
the
reducer.
the
first
is
L1Dist,
which
to
sum
the
abs
(
v1,
v2
)
,
the
second
is
L2Dist,
which
to
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
sum
the
(
v1
-
v2
)
*
(
v1
-
v2
)
,
the
third
is
humming,
which
to
popc
(
v1
^
v2
)
,
popc
is
to
count
the
bits
are
set
to
1*/
switch
(
distType
)
{
case
0:
for
(
int
j
=
0
; j < block_size ; j++)
{
result
+=
fabs
(
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
)
;
}
break
;
case
1:
for
(
int
j
=
0
; j < block_size ; j++)
{
float
qr
=
s_query[lidy
*
block_size
+
j]
-
s_train[j
*
block_size
+
lidx]
;
result
+=
qr
*
qr
;
}
break
;
case
2:
for
(
int
j
=
0
; j < block_size ; j++)
{
//result
+=
popcount
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[j
*
block_size
+
lidx]
)
;
result
+=
bit1Count
((
uint
)
s_query[lidy
*
block_size
+
j]
^
(
uint
)
s_train[
(
uint
)
j
*
block_size
+
lidx]
)
;
}
break
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -767,7 +647,6 @@ __kernel void BruteForceMatch_knnMatch(
...
@@ -767,7 +647,6 @@ __kernel void BruteForceMatch_knnMatch(
for
(
int
i
=
0
; i < block_size ; i++)
for
(
int
i
=
0
; i < block_size ; i++)
{
{
float
val
=
s_distance[i]
;
float
val
=
s_distance[i]
;
if
(
val
<
bestDistance1
)
if
(
val
<
bestDistance1
)
{
{
bestDistance2
=
bestDistance1
;
bestDistance2
=
bestDistance1
;
...
@@ -818,7 +697,7 @@ __kernel void BruteForceMatch_knnMatch(
...
@@ -818,7 +697,7 @@ __kernel void BruteForceMatch_knnMatch(
}
}
}
}
kernel
void
BruteForceMatch_calcDistanceUnrolled
(
kernel
void
BruteForceMatch_calcDistanceUnrolled
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
//__global
float
*mask,
//__global
float
*mask,
...
@@ -836,7 +715,7 @@ kernel void BruteForceMatch_calcDistanceUnrolled(
...
@@ -836,7 +715,7 @@ kernel void BruteForceMatch_calcDistanceUnrolled(
/*
Todo
*/
/*
Todo
*/
}
}
kernel
void
BruteForceMatch_calcDistance
(
kernel
void
BruteForceMatch_calcDistance
_D5
(
__global
float
*query,
__global
float
*query,
__global
float
*train,
__global
float
*train,
//__global
float
*mask,
//__global
float
*mask,
...
@@ -853,7 +732,7 @@ kernel void BruteForceMatch_calcDistance(
...
@@ -853,7 +732,7 @@ kernel void BruteForceMatch_calcDistance(
/*
Todo
*/
/*
Todo
*/
}
}
kernel
void
BruteForceMatch_findBestMatch
(
kernel
void
BruteForceMatch_findBestMatch
_D5
(
__global
float
*allDist,
__global
float
*allDist,
__global
int
*bestTrainIdx,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
__global
float
*bestDistance,
...
...
modules/ocl/src/opencl/haarobjectdetect.cl
View file @
ad6aae45
...
@@ -211,10 +211,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
...
@@ -211,10 +211,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int4
data
=
*
(
__global
int4*
)
&sum[glb_off]
;
int4
data
=
*
(
__global
int4*
)
&sum[glb_off]
;
int
lcl_off
=
mad24
(
lcl_y,
readwidth,
lcl_x<<2
)
;
int
lcl_off
=
mad24
(
lcl_y,
readwidth,
lcl_x<<2
)
;
#
if
OFF
lcldata[lcl_off]
=
data.x
;
lcldata[lcl_off]
=
data.x
;
lcldata[lcl_off+1]
=
data.y
;
lcldata[lcl_off+1]
=
data.y
;
lcldata[lcl_off+2]
=
data.z
;
lcldata[lcl_off+2]
=
data.z
;
lcldata[lcl_off+3]
=
data.w
;
lcldata[lcl_off+3]
=
data.w
;
#
else
vstore4
(
data,
0
,
&lcldata[lcl_off]
)
;
#
endif
}
}
lcloutindex[lcl_id]
=
0
;
lcloutindex[lcl_id]
=
0
;
...
@@ -559,3 +563,7 @@ if(result)
...
@@ -559,3 +563,7 @@ if(result)
}
}
}
}
*/
*/
modules/ocl/test/test_brute_force_matcher.cpp
View file @
ad6aae45
...
@@ -110,7 +110,7 @@ namespace
...
@@ -110,7 +110,7 @@ namespace
}
}
};
};
TEST_P
(
BruteForceMatcher
,
DISABLED_
Match_Single
)
TEST_P
(
BruteForceMatcher
,
Match_Single
)
{
{
cv
::
ocl
::
BruteForceMatcher_OCL_base
matcher
(
distType
);
cv
::
ocl
::
BruteForceMatcher_OCL_base
matcher
(
distType
);
...
@@ -130,7 +130,7 @@ namespace
...
@@ -130,7 +130,7 @@ namespace
ASSERT_EQ
(
0
,
badCount
);
ASSERT_EQ
(
0
,
badCount
);
}
}
TEST_P
(
BruteForceMatcher
,
DISABLED_
KnnMatch_2_Single
)
TEST_P
(
BruteForceMatcher
,
KnnMatch_2_Single
)
{
{
const
int
knn
=
2
;
const
int
knn
=
2
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment