Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
113b7584
Commit
113b7584
authored
Apr 12, 2013
by
peng xiao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize bfmatcher by passing macros.
parent
1e49c00f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
90 additions
and
40 deletions
+90
-40
brute_force_matcher.cpp
modules/ocl/src/brute_force_matcher.cpp
+24
-15
brute_force_match.cl
modules/ocl/src/opencl/brute_force_match.cl
+66
-25
No files found.
modules/ocl/src/brute_force_matcher.cpp
View file @
113b7584
...
...
@@ -74,6 +74,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
static
const
int
OPT_SIZE
=
40
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D block_size=%d -D max_desc_len=%d"
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -82,8 +85,6 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
m_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
...
@@ -93,7 +94,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
std
::
string
kernelName
=
"BruteForceMatch_UnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
...
...
@@ -115,6 +116,9 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
static
const
int
OPT_SIZE
=
40
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D block_size=%d"
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -123,7 +127,6 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
...
@@ -133,7 +136,7 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
std
::
string
kernelName
=
"BruteForceMatch_Match"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
...
...
@@ -157,6 +160,9 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
static
const
int
OPT_SIZE
=
40
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D block_size=%d -D max_desc_len=%d"
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -167,8 +173,6 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
nMatches
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
m_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
...
@@ -180,7 +184,7 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
std
::
string
kernelName
=
"BruteForceMatch_RadiusUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
...
...
@@ -197,6 +201,9 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
static
const
int
OPT_SIZE
=
40
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D block_size=%d"
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -207,7 +214,6 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
nMatches
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
...
@@ -219,7 +225,7 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
std
::
string
kernelName
=
"BruteForceMatch_RadiusMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
...
...
@@ -294,6 +300,9 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
int
m_size
=
MAX_DESC_LEN
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
static
const
int
OPT_SIZE
=
40
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D block_size=%d -D max_desc_len=%d"
,
block_size
,
m_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -302,8 +311,6 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
m_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
...
@@ -313,7 +320,7 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
std
::
string
kernelName
=
"BruteForceMatch_knnUnrollMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
...
...
@@ -328,6 +335,9 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
int
block_size
=
BLOCK_SIZE
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
static
const
int
OPT_SIZE
=
40
;
char
opt
[
OPT_SIZE
]
=
""
;
sprintf
(
opt
,
"-D block_size=%d"
,
block_size
);
if
(
globalSize
[
0
]
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
query
.
data
));
...
...
@@ -336,7 +346,6 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trainIdx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
distance
.
data
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
block_size
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
query
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
train
.
rows
));
...
...
@@ -346,7 +355,7 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
std
::
string
kernelName
=
"BruteForceMatch_knnMatch"
;
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
());
openCLExecuteKernel
(
ctx
,
&
brute_force_match
,
kernelName
,
globalSize
,
localSize
,
args
,
-
1
,
query
.
depth
()
,
opt
);
}
}
...
...
modules/ocl/src/opencl/brute_force_match.cl
View file @
113b7584
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Nathan,
liujun@multicorewareinc.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
oclMaterials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
"as is"
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
define
MAX_FLOAT
1e7f
#
define
MAX_FLOAT
3.40282e+038f
#
ifndef
block_size
#
define
block_size
16
#
endif
#
ifndef
max_desc_len
#
define
max_desc_len
64
#
endif
int
bit1Count
(
float
x
)
{
...
...
@@ -15,7 +68,6 @@ int bit1Count(float x)
float
reduce_block
(
__local
float
*s_query,
__local
float
*s_train,
int
block_size,
int
lidx,
int
lidy,
int
distType
...
...
@@ -51,8 +103,6 @@ float reduce_block(__local float *s_query,
float
reduce_multi_block
(
__local
float
*s_query,
__local
float
*s_train,
int
max_desc_len,
int
block_size,
int
block_index,
int
lidx,
int
lidy,
...
...
@@ -98,8 +148,6 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -108,6 +156,7 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
int
distType
)
{
const
int
lidx
=
get_local_id
(
0
)
;
const
int
lidy
=
get_local_id
(
1
)
;
const
int
groupidx
=
get_group_id
(
0
)
;
...
...
@@ -117,6 +166,7 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
int
queryIdx
=
groupidx
*
block_size
+
lidy
;
//
load
the
query
into
local
memory.
#
pragma
unroll
for
(
int
i
=
0
; i < max_desc_len / block_size; i ++)
{
int
loadx
=
lidx
+
i
*
block_size
;
...
...
@@ -128,9 +178,10 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
//
loopUnrolledCached
to
find
the
best
trainIdx
and
best
distance.
volatile
int
imgIdx
=
0
;
for
(
int
t
=
0
; t < (train_rows + block_size - 1) / block_size
; t++)
for
(
int
t
=
0
,
endt
=
(
train_rows
+
block_size
-
1
)
/
block_size
; t < endt
; t++)
{
float
result
=
0
;
#
pragma
unroll
for
(
int
i
=
0
; i < max_desc_len / block_size ; i++)
{
//load
a
block_size
*
block_size
block
into
local
train.
...
...
@@ -140,7 +191,7 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
max_desc_len,
block_size,
i,
lidx,
lidy,
distType
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
i,
lidx,
lidy,
distType
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
...
...
@@ -168,6 +219,7 @@ __kernel void BruteForceMatch_UnrollMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//reduce
--
now
all
reduce
implement
in
each
threads.
#
pragma
unroll
for
(
int
k
=
0
; k < block_size; k++)
{
if
(
myBestDistance
>
s_distance[k]
)
...
...
@@ -191,7 +243,6 @@ __kernel void BruteForceMatch_Match_D5(
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -232,7 +283,7 @@ __kernel void BruteForceMatch_Match_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy,
distType
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
...
...
@@ -287,8 +338,6 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
__global
float
*bestDistance,
__global
int
*nMatches,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -322,7 +371,7 @@ __kernel void BruteForceMatch_RadiusUnrollMatch_D5(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy,
distType
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
...
...
@@ -350,7 +399,6 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
__global
float
*bestDistance,
__global
int
*nMatches,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -384,7 +432,7 @@ __kernel void BruteForceMatch_RadiusMatch_D5(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy,
distType
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
...
...
@@ -410,8 +458,6 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
__global
int2
*bestTrainIdx,
__global
float2
*bestDistance,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -455,7 +501,7 @@ __kernel void BruteForceMatch_knnUnrollMatch_D5(
//synchronize
to
make
sure
each
elem
for
reduceIteration
in
share
memory
is
written
already.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
max_desc_len,
block_size,
i,
lidx,
lidy,
distType
)
;
result
+=
reduce_multi_block
(
s_query,
s_train,
i,
lidx,
lidy,
distType
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
...
...
@@ -559,7 +605,6 @@ __kernel void BruteForceMatch_knnMatch_D5(
__global
int2
*bestTrainIdx,
__global
float2
*bestDistance,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -600,7 +645,7 @@ __kernel void BruteForceMatch_knnMatch_D5(
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
result
+=
reduce_block
(
s_query,
s_train,
block_size,
lidx,
lidy,
distType
)
;
result
+=
reduce_block
(
s_query,
s_train,
lidx,
lidy,
distType
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
...
...
@@ -703,8 +748,6 @@ kernel void BruteForceMatch_calcDistanceUnrolled_D5(
//__global
float
*mask,
__global
float
*allDist,
__local
float
*sharebuffer,
int
block_size,
int
max_desc_len,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -721,7 +764,6 @@ kernel void BruteForceMatch_calcDistance_D5(
//__global
float
*mask,
__global
float
*allDist,
__local
float
*sharebuffer,
int
block_size,
int
query_rows,
int
query_cols,
int
train_rows,
...
...
@@ -736,8 +778,7 @@ kernel void BruteForceMatch_findBestMatch_D5(
__global
float
*allDist,
__global
int
*bestTrainIdx,
__global
float
*bestDistance,
int
k,
int
block_size
int
k
)
{
/*
Todo
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment