Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8891acb6
Commit
8891acb6
authored
Dec 06, 2010
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added BruteForceMatcher_GPU
parent
77027f60
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
2136 additions
and
3 deletions
+2136
-3
CMakeLists.txt
modules/gpu/CMakeLists.txt
+1
-1
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+147
-1
brute_force_matcher.cpp
modules/gpu/src/brute_force_matcher.cpp
+605
-0
brute_force_matcher.cu
modules/gpu/src/cuda/brute_force_matcher.cu
+1205
-0
brute_force_matcher.cpp
tests/gpu/src/brute_force_matcher.cpp
+176
-0
gputest.hpp
tests/gpu/src/gputest.hpp
+2
-1
No files found.
modules/gpu/CMakeLists.txt
View file @
8891acb6
set
(
name
"gpu"
)
set
(
DEPS
"opencv_core"
"opencv_imgproc"
"opencv_objdetect"
)
set
(
DEPS
"opencv_core"
"opencv_imgproc"
"opencv_objdetect"
"opencv_features2d"
"opencv_flann"
)
set
(
OPENCV_LINKER_LIBS
${
OPENCV_LINKER_LIBS
}
opencv_gpu
)
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
8891acb6
...
...
@@ -48,6 +48,7 @@
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/gpu/devmem2d.hpp"
#include "opencv2/features2d/features2d.hpp"
namespace
cv
{
...
...
@@ -1118,7 +1119,152 @@ namespace cv
// Gradients conputation results
GpuMat
grad
,
qangle
;
};
};
////////////////////////////////// BruteForceMatcher //////////////////////////////////
class
CV_EXPORTS
BruteForceMatcher_GPU_base
{
public
:
enum
DistType
{
L1Dist
=
0
,
L2Dist
};
explicit
BruteForceMatcher_GPU_base
(
DistType
distType
=
L2Dist
);
// Add descriptors to train descriptor collection.
void
add
(
const
std
::
vector
<
GpuMat
>&
descCollection
);
// Get train descriptors collection.
const
std
::
vector
<
GpuMat
>&
getTrainDescriptors
()
const
;
// Clear train descriptors collection.
void
clear
();
// Return true if there are not train descriptors in collection.
bool
empty
()
const
;
// Return true if the matcher supports mask in match methods.
bool
isMaskSupported
()
const
;
// Find one best match for each query descriptor.
// trainIdx.at<int>(0, queryIdx) will contain best train index for queryIdx
// distance.at<float>(0, queryIdx) will contain distance
void
matchSingle
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
const
GpuMat
&
mask
=
GpuMat
());
// Download trainIdx and distance to CPU vector with DMatch
static
void
matchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
DMatch
>&
matches
);
// Find one best match for each query descriptor.
void
match
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
std
::
vector
<
DMatch
>&
matches
,
const
GpuMat
&
mask
=
GpuMat
());
// Make gpu collection of trains and masks in suitable format for matchCollection function
void
makeGpuCollection
(
GpuMat
&
trainCollection
,
GpuMat
&
maskCollection
,
const
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
());
// Find one best match from train collection for each query descriptor.
// trainIdx.at<int>(0, queryIdx) will contain best train index for queryIdx
// imgIdx.at<int>(0, queryIdx) will contain best image index for queryIdx
// distance.at<float>(0, queryIdx) will contain distance
void
matchCollection
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
maskCollection
);
// Download trainIdx, imgIdx and distance to CPU vector with DMatch
static
void
matchDownload
(
const
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
DMatch
>&
matches
);
// Find one best match from train collection for each query descriptor.
void
match
(
const
GpuMat
&
queryDescs
,
std
::
vector
<
DMatch
>&
matches
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
());
// Find k best matches for each query descriptor (in increasing order of distances).
// trainIdx.at<int>(queryIdx, i) will contain index of i'th best trains (i < k).
// distance.at<float>(queryIdx, i) will contain distance.
// allDist is a buffer to store all distance between query descriptors and train descriptors
// it have size (nQuery,nTrain) and CV_32F type
// allDist.at<float>(queryIdx, trainIdx) will contain FLT_MAX, if trainIdx is one from k best,
// otherwise it will contain distance between queryIdx and trainIdx descriptors
void
knnMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
allDist
,
int
k
,
const
GpuMat
&
mask
=
GpuMat
());
// Download trainIdx and distance to CPU vector with DMatch
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
static
void
knnMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
// Find k best matches for each query descriptor (in increasing order of distances).
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
void
knnMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
k
,
const
GpuMat
&
mask
=
GpuMat
(),
bool
compactResult
=
false
);
// Find k best matches for each query descriptor (in increasing order of distances).
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
void
knnMatch
(
const
GpuMat
&
queryDescs
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
int
knn
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
// Find best matches for each query descriptor which have distance less than maxDistance.
// nMatches.at<unsigned int>(0, queruIdx) will contain matches count for queryIdx.
// carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
// because it didn't have enough memory.
// trainIdx.at<int>(queruIdx, i) will contain ith train index (i < min(nMatches.at<unsigned int>(0, queruIdx), trainIdx.cols))
// distance.at<int>(queruIdx, i) will contain ith distance (i < min(nMatches.at<unsigned int>(0, queruIdx), trainIdx.cols))
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x nTrain,
// otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
// Matches doesn't sorted.
void
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
nMatches
,
GpuMat
&
distance
,
float
maxDistance
,
const
GpuMat
&
mask
=
GpuMat
());
// Download trainIdx, nMatches and distance to CPU vector with DMatch.
// matches will be sorted in increasing order of distances.
// compactResult is used when mask is not empty. If compactResult is false matches
// vector will have the same size as queryDescriptors rows. If compactResult is true
// matches vector will not contain matches for fully masked out query descriptors.
static
void
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
nMatches
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
=
false
);
// Find best matches for each query descriptor which have distance less than maxDistance
// in increasing order of distances).
void
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
GpuMat
&
mask
=
GpuMat
(),
bool
compactResult
=
false
);
// Find best matches from train collection for each query descriptor which have distance less than
// maxDistance (in increasing order of distances).
void
radiusMatch
(
const
GpuMat
&
queryDescs
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
std
::
vector
<
GpuMat
>&
masks
=
std
::
vector
<
GpuMat
>
(),
bool
compactResult
=
false
);
private
:
DistType
distType
;
std
::
vector
<
GpuMat
>
trainDescCollection
;
};
template
<
class
Distance
>
class
CV_EXPORTS
BruteForceMatcher_GPU
;
template
<
typename
T
>
class
CV_EXPORTS
BruteForceMatcher_GPU
<
L1
<
T
>
>
:
public
BruteForceMatcher_GPU_base
{
public
:
explicit
BruteForceMatcher_GPU
(
L1
<
T
>
d
=
L1
<
T
>
())
:
BruteForceMatcher_GPU_base
(
L1Dist
)
{}
};
template
<
typename
T
>
class
CV_EXPORTS
BruteForceMatcher_GPU
<
L2
<
T
>
>
:
public
BruteForceMatcher_GPU_base
{
public
:
explicit
BruteForceMatcher_GPU
(
L2
<
T
>
d
=
L2
<
T
>
())
:
BruteForceMatcher_GPU_base
(
L2Dist
)
{}
};
}
...
...
modules/gpu/src/brute_force_matcher.cpp
0 → 100644
View file @
8891acb6
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
cv
;
using
namespace
cv
::
gpu
;
using
namespace
std
;
#if !defined (HAVE_CUDA)
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
BruteForceMatcher_GPU_base
(
DistType
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
add
(
const
vector
<
GpuMat
>&
)
{
throw_nogpu
();
}
const
vector
<
GpuMat
>&
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
getTrainDescriptors
()
const
{
throw_nogpu
();
return
trainDescCollection
;
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
clear
()
{
throw_nogpu
();
}
bool
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
empty
()
const
{
throw_nogpu
();
return
true
;
}
bool
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
isMaskSupported
()
const
{
throw_nogpu
();
return
true
;
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchSingle
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
vector
<
DMatch
>&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
match
(
const
GpuMat
&
,
const
GpuMat
&
,
vector
<
DMatch
>&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
makeGpuCollection
(
GpuMat
&
,
GpuMat
&
,
const
vector
<
GpuMat
>&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchCollection
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchDownload
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
DMatch
>&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
match
(
const
GpuMat
&
,
std
::
vector
<
DMatch
>&
,
const
std
::
vector
<
GpuMat
>&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
int
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
int
,
const
GpuMat
&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
int
,
const
std
::
vector
<
GpuMat
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
float
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
,
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
float
,
const
GpuMat
&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
,
float
,
const
std
::
vector
<
GpuMat
>&
,
bool
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
bfmatcher
{
template
<
typename
T
>
void
matchSingleL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
);
template
<
typename
T
>
void
matchSingleL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
);
template
<
typename
T
>
void
matchCollectionL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
);
template
<
typename
T
>
void
matchCollectionL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
);
template
<
typename
T
>
void
knnMatchL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
template
<
typename
T
>
void
knnMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
template
<
typename
T
>
void
radiusMatchL1_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
template
<
typename
T
>
void
radiusMatchL2_gpu
(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
}}}
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
BruteForceMatcher_GPU_base
(
DistType
distType_
)
:
distType
(
distType_
)
{
}
////////////////////////////////////////////////////////////////////
// Train collection
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
add
(
const
vector
<
GpuMat
>&
descCollection
)
{
trainDescCollection
.
insert
(
trainDescCollection
.
end
(),
descCollection
.
begin
(),
descCollection
.
end
());
}
const
vector
<
GpuMat
>&
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
getTrainDescriptors
()
const
{
return
trainDescCollection
;
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
clear
()
{
trainDescCollection
.
clear
();
}
bool
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
empty
()
const
{
return
trainDescCollection
.
empty
();
}
bool
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
isMaskSupported
()
const
{
return
true
;
}
////////////////////////////////////////////////////////////////////
// Match
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchSingle
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
const
GpuMat
&
mask
)
{
using
namespace
cv
::
gpu
::
bfmatcher
;
typedef
void
(
*
match_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
);
static
const
match_caller_t
match_callers
[
2
][
8
]
=
{
{
matchSingleL1_gpu
<
unsigned
char
>
,
matchSingleL1_gpu
<
char
>
,
matchSingleL1_gpu
<
unsigned
short
>
,
matchSingleL1_gpu
<
short
>
,
matchSingleL1_gpu
<
int
>
,
matchSingleL1_gpu
<
float
>
,
0
,
0
},
{
matchSingleL2_gpu
<
unsigned
char
>
,
matchSingleL2_gpu
<
char
>
,
matchSingleL2_gpu
<
unsigned
short
>
,
matchSingleL2_gpu
<
short
>
,
matchSingleL2_gpu
<
int
>
,
matchSingleL2_gpu
<
float
>
,
0
,
0
}
};
CV_Assert
(
queryDescs
.
channels
()
==
1
);
CV_Assert
(
trainDescs
.
cols
==
queryDescs
.
cols
&&
trainDescs
.
type
()
==
queryDescs
.
type
());
const
int
nQuery
=
queryDescs
.
rows
;
trainIdx
.
create
(
1
,
nQuery
,
CV_32S
);
distance
.
create
(
1
,
nQuery
,
CV_32F
);
match_caller_t
func
=
match_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
// For single train there is no need to save imgIdx, so we just save imgIdx to trainIdx.
// trainIdx store after imgIdx, so we doesn't lose it value.
func
(
queryDescs
,
trainDescs
,
mask
,
trainIdx
,
trainIdx
,
distance
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
vector
<
DMatch
>&
matches
)
{
const
int
nQuery
=
trainIdx
.
cols
;
Mat
trainIdxCPU
=
trainIdx
;
Mat
distanceCPU
=
distance
;
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
int
*
trainIdx_ptr
=
trainIdxCPU
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distanceCPU
.
ptr
<
float
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
,
++
trainIdx_ptr
,
++
distance_ptr
)
{
int
trainIdx
=
*
trainIdx_ptr
;
if
(
trainIdx
==
-
1
)
continue
;
float
distance
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
trainIdx
,
0
,
distance
);
matches
.
push_back
(
m
);
}
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
match
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
vector
<
DMatch
>&
matches
,
const
GpuMat
&
mask
)
{
GpuMat
trainIdx
,
distance
;
matchSingle
(
queryDescs
,
trainDescs
,
trainIdx
,
distance
,
mask
);
matchDownload
(
trainIdx
,
distance
,
matches
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
makeGpuCollection
(
GpuMat
&
trainCollection
,
GpuMat
&
maskCollection
,
const
vector
<
GpuMat
>&
masks
)
{
if
(
masks
.
empty
())
{
Mat
trainCollectionCPU
(
1
,
trainDescCollection
.
size
(),
CV_8UC
(
sizeof
(
DevMem2D
)));
for
(
size_t
i
=
0
;
i
<
trainDescCollection
.
size
();
++
i
)
{
const
GpuMat
&
trainDescs
=
trainDescCollection
[
i
];
trainCollectionCPU
.
ptr
<
DevMem2D
>
(
0
)[
i
]
=
trainDescs
;
}
trainCollection
.
upload
(
trainCollectionCPU
);
}
else
{
CV_Assert
(
masks
.
size
()
==
trainDescCollection
.
size
());
Mat
trainCollectionCPU
(
1
,
trainDescCollection
.
size
(),
CV_8UC
(
sizeof
(
DevMem2D
)));
Mat
maskCollectionCPU
(
1
,
trainDescCollection
.
size
(),
CV_8UC
(
sizeof
(
PtrStep
)));
for
(
size_t
i
=
0
;
i
<
trainDescCollection
.
size
();
++
i
)
{
const
GpuMat
&
trainDescs
=
trainDescCollection
[
i
];
const
GpuMat
&
mask
=
masks
[
i
];
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
));
trainCollectionCPU
.
ptr
<
DevMem2D
>
(
0
)[
i
]
=
trainDescs
;
maskCollectionCPU
.
ptr
<
PtrStep
>
(
0
)[
i
]
=
static_cast
<
PtrStep
>
(
mask
);
}
trainCollection
.
upload
(
trainCollectionCPU
);
maskCollection
.
upload
(
maskCollectionCPU
);
}
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchCollection
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainCollection
,
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
GpuMat
&
distance
,
const
GpuMat
&
maskCollection
)
{
using
namespace
cv
::
gpu
::
bfmatcher
;
typedef
void
(
*
match_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainCollection
,
const
DevMem2D_
<
PtrStep
>&
maskCollection
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Di
&
imgIdx
,
const
DevMem2Df
&
distance
);
static
const
match_caller_t
match_callers
[
2
][
8
]
=
{
{
matchCollectionL1_gpu
<
unsigned
char
>
,
matchCollectionL1_gpu
<
char
>
,
matchCollectionL1_gpu
<
unsigned
short
>
,
matchCollectionL1_gpu
<
short
>
,
matchCollectionL1_gpu
<
int
>
,
matchCollectionL1_gpu
<
float
>
,
0
,
0
},
{
matchCollectionL2_gpu
<
unsigned
char
>
,
matchCollectionL2_gpu
<
char
>
,
matchCollectionL2_gpu
<
unsigned
short
>
,
matchCollectionL2_gpu
<
short
>
,
matchCollectionL2_gpu
<
int
>
,
matchCollectionL2_gpu
<
float
>
,
0
,
0
}
};
CV_Assert
(
queryDescs
.
channels
()
==
1
);
const
int
nQuery
=
queryDescs
.
rows
;
trainIdx
.
create
(
1
,
nQuery
,
CV_32S
);
imgIdx
.
create
(
1
,
nQuery
,
CV_32S
);
distance
.
create
(
1
,
nQuery
,
CV_32F
);
match_caller_t
func
=
match_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
func
(
queryDescs
,
trainCollection
,
maskCollection
,
trainIdx
,
imgIdx
,
distance
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
matchDownload
(
const
GpuMat
&
trainIdx
,
GpuMat
&
imgIdx
,
const
GpuMat
&
distance
,
vector
<
DMatch
>&
matches
)
{
const
int
nQuery
=
trainIdx
.
cols
;
Mat
trainIdxCPU
=
trainIdx
;
Mat
imgIdxCPU
=
imgIdx
;
Mat
distanceCPU
=
distance
;
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
int
*
trainIdx_ptr
=
trainIdxCPU
.
ptr
<
int
>
();
const
int
*
imgIdx_ptr
=
imgIdxCPU
.
ptr
<
int
>
();
const
float
*
distance_ptr
=
distanceCPU
.
ptr
<
float
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
,
++
trainIdx_ptr
,
++
imgIdx_ptr
,
++
distance_ptr
)
{
int
trainIdx
=
*
trainIdx_ptr
;
if
(
trainIdx
==
-
1
)
continue
;
int
imgIdx
=
*
imgIdx_ptr
;
float
distance
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
trainIdx
,
imgIdx
,
distance
);
matches
.
push_back
(
m
);
}
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
match
(
const
GpuMat
&
queryDescs
,
vector
<
DMatch
>&
matches
,
const
vector
<
GpuMat
>&
masks
)
{
GpuMat
trainCollection
;
GpuMat
maskCollection
;
makeGpuCollection
(
trainCollection
,
maskCollection
,
masks
);
GpuMat
trainIdx
,
imgIdx
,
distance
;
matchCollection
(
queryDescs
,
trainCollection
,
trainIdx
,
imgIdx
,
distance
,
maskCollection
);
matchDownload
(
trainIdx
,
imgIdx
,
distance
,
matches
);
}
////////////////////////////////////////////////////////////////////
// KnnMatch
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
distance
,
GpuMat
&
allDist
,
int
k
,
const
GpuMat
&
mask
)
{
using
namespace
cv
::
gpu
::
bfmatcher
;
typedef
void
(
*
match_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
int
knn
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
const
DevMem2Df
&
distance
,
const
DevMem2Df
&
allDist
);
static
const
match_caller_t
match_callers
[
2
][
8
]
=
{
{
knnMatchL1_gpu
<
unsigned
char
>
,
knnMatchL1_gpu
<
char
>
,
knnMatchL1_gpu
<
unsigned
short
>
,
knnMatchL1_gpu
<
short
>
,
knnMatchL1_gpu
<
int
>
,
knnMatchL1_gpu
<
float
>
,
0
,
0
},
{
knnMatchL2_gpu
<
unsigned
char
>
,
knnMatchL2_gpu
<
char
>
,
knnMatchL2_gpu
<
unsigned
short
>
,
knnMatchL2_gpu
<
short
>
,
knnMatchL2_gpu
<
int
>
,
knnMatchL2_gpu
<
float
>
,
0
,
0
}
};
CV_Assert
(
queryDescs
.
channels
()
==
1
);
const
int
nQuery
=
queryDescs
.
rows
;
const
int
nTrain
=
trainDescs
.
rows
;
trainIdx
.
create
(
nQuery
,
k
,
CV_32S
);
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
distance
.
create
(
nQuery
,
k
,
CV_32F
);
allDist
.
create
(
nQuery
,
nTrain
,
CV_32F
);
match_caller_t
func
=
match_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
func
(
queryDescs
,
trainDescs
,
k
,
mask
,
trainIdx
,
distance
,
allDist
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
distance
,
vector
<
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
const
int
nQuery
=
distance
.
rows
;
const
int
k
=
trainIdx
.
cols
;
Mat
trainIdxCPU
=
trainIdx
;
Mat
distanceCPU
=
distance
;
matches
.
clear
();
matches
.
reserve
(
nQuery
);
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
matches
.
push_back
(
vector
<
DMatch
>
());
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
k
);
int
*
trainIdx_ptr
=
trainIdxCPU
.
ptr
<
int
>
(
queryIdx
);
float
*
distance_ptr
=
distanceCPU
.
ptr
<
float
>
(
queryIdx
);
for
(
int
i
=
0
;
i
<
k
;
++
i
,
++
trainIdx_ptr
,
++
distance_ptr
)
{
int
trainIdx
=
*
trainIdx_ptr
;
if
(
trainIdx
!=
-
1
)
{
float
distance
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
trainIdx
,
0
,
distance
);
curMatches
.
push_back
(
m
);
}
}
if
(
compactResult
&&
curMatches
.
empty
())
matches
.
pop_back
();
}
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
int
k
,
const
GpuMat
&
mask
,
bool
compactResult
)
{
GpuMat
trainIdx
,
distance
,
allDist
;
knnMatch
(
queryDescs
,
trainDescs
,
trainIdx
,
distance
,
allDist
,
k
,
mask
);
knnMatchDownload
(
trainIdx
,
distance
,
matches
,
compactResult
);
}
namespace
{
class
ImgIdxSetter
{
public
:
ImgIdxSetter
(
int
imgIdx_
)
:
imgIdx
(
imgIdx_
)
{}
void
operator
()(
DMatch
&
m
)
const
{
m
.
imgIdx
=
imgIdx
;}
private
:
int
imgIdx
;
};
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
knnMatch
(
const
GpuMat
&
queryDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
int
knn
,
const
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
vector
<
vector
<
DMatch
>
>
curMatches
;
vector
<
DMatch
>
temp
;
temp
.
reserve
(
2
*
knn
);
matches
.
resize
(
queryDescs
.
rows
);
for_each
(
matches
.
begin
(),
matches
.
end
(),
bind2nd
(
mem_fun_ref
(
&
vector
<
DMatch
>::
reserve
),
knn
));
for
(
size_t
imgIdx
=
0
;
imgIdx
<
trainDescCollection
.
size
();
++
imgIdx
)
{
knnMatch
(
queryDescs
,
trainDescCollection
[
imgIdx
],
curMatches
,
knn
,
masks
.
empty
()
?
GpuMat
()
:
masks
[
imgIdx
]);
for
(
int
queryIdx
=
0
;
queryIdx
<
queryDescs
.
rows
;
++
queryIdx
)
{
vector
<
DMatch
>&
localMatch
=
curMatches
[
queryIdx
];
vector
<
DMatch
>&
globalMatch
=
matches
[
queryIdx
];
for_each
(
localMatch
.
begin
(),
localMatch
.
end
(),
ImgIdxSetter
(
imgIdx
));
temp
.
clear
();
merge
(
globalMatch
.
begin
(),
globalMatch
.
end
(),
localMatch
.
begin
(),
localMatch
.
end
(),
back_inserter
(
temp
));
globalMatch
.
clear
();
const
size_t
count
=
std
::
min
((
size_t
)
knn
,
temp
.
size
());
copy
(
temp
.
begin
(),
temp
.
begin
()
+
count
,
back_inserter
(
globalMatch
));
}
}
if
(
compactResult
)
{
vector
<
vector
<
DMatch
>
>::
iterator
new_end
=
remove_if
(
matches
.
begin
(),
matches
.
end
(),
mem_fun_ref
(
&
vector
<
DMatch
>::
empty
));
matches
.
erase
(
new_end
,
matches
.
end
());
}
}
////////////////////////////////////////////////////////////////////
// RadiusMatch
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
GpuMat
&
trainIdx
,
GpuMat
&
nMatches
,
GpuMat
&
distance
,
float
maxDistance
,
const
GpuMat
&
mask
)
{
using
namespace
cv
::
gpu
::
bfmatcher
;
typedef
void
(
*
radiusMatch_caller_t
)(
const
DevMem2D
&
queryDescs
,
const
DevMem2D
&
trainDescs
,
float
maxDistance
,
const
DevMem2D
&
mask
,
const
DevMem2Di
&
trainIdx
,
unsigned
int
*
nMatches
,
const
DevMem2Df
&
distance
);
static
const
radiusMatch_caller_t
radiusMatch_callers
[
2
][
8
]
=
{
{
radiusMatchL1_gpu
<
unsigned
char
>
,
radiusMatchL1_gpu
<
char
>
,
radiusMatchL1_gpu
<
unsigned
short
>
,
radiusMatchL1_gpu
<
short
>
,
radiusMatchL1_gpu
<
int
>
,
radiusMatchL1_gpu
<
float
>
,
0
,
0
},
{
radiusMatchL2_gpu
<
unsigned
char
>
,
radiusMatchL2_gpu
<
char
>
,
radiusMatchL2_gpu
<
unsigned
short
>
,
radiusMatchL2_gpu
<
short
>
,
radiusMatchL2_gpu
<
int
>
,
radiusMatchL2_gpu
<
float
>
,
0
,
0
}
};
const
int
nQuery
=
queryDescs
.
rows
;
const
int
nTrain
=
trainDescs
.
rows
;
CV_Assert
(
queryDescs
.
channels
()
==
1
);
CV_Assert
(
trainDescs
.
type
()
==
queryDescs
.
type
()
&&
trainDescs
.
cols
==
queryDescs
.
cols
);
CV_Assert
(
trainIdx
.
empty
()
||
trainIdx
.
rows
==
nQuery
);
nMatches
.
create
(
1
,
nQuery
,
CV_32SC1
);
nMatches
.
setTo
(
Scalar
::
all
(
0
));
if
(
trainIdx
.
empty
())
{
trainIdx
.
create
(
nQuery
,
nTrain
,
CV_32SC1
);
distance
.
create
(
nQuery
,
nTrain
,
CV_32FC1
);
}
radiusMatch_caller_t
func
=
radiusMatch_callers
[
distType
][
queryDescs
.
depth
()];
CV_Assert
(
func
!=
0
);
func
(
queryDescs
,
trainDescs
,
maxDistance
,
mask
,
trainIdx
,
nMatches
.
ptr
<
unsigned
int
>
(),
distance
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatchDownload
(
const
GpuMat
&
trainIdx
,
const
GpuMat
&
nMatches
,
const
GpuMat
&
distance
,
std
::
vector
<
std
::
vector
<
DMatch
>
>&
matches
,
bool
compactResult
)
{
const
int
nQuery
=
trainIdx
.
rows
;
Mat
trainIdxCPU
=
trainIdx
;
Mat
nMatchesCPU
=
nMatches
;
Mat
distanceCPU
=
distance
;
matches
.
clear
();
matches
.
reserve
(
nQuery
);
const
unsigned
int
*
nMatches_ptr
=
nMatchesCPU
.
ptr
<
unsigned
int
>
();
for
(
int
queryIdx
=
0
;
queryIdx
<
nQuery
;
++
queryIdx
)
{
const
int
*
trainIdx_ptr
=
trainIdxCPU
.
ptr
<
int
>
(
queryIdx
);
const
float
*
distance_ptr
=
distanceCPU
.
ptr
<
float
>
(
queryIdx
);
const
int
nMatches
=
std
::
min
(
static_cast
<
int
>
(
nMatches_ptr
[
queryIdx
]),
trainIdx
.
cols
);
if
(
nMatches
==
0
)
{
if
(
!
compactResult
)
matches
.
push_back
(
vector
<
DMatch
>
());
continue
;
}
matches
.
push_back
(
vector
<
DMatch
>
());
vector
<
DMatch
>&
curMatches
=
matches
.
back
();
curMatches
.
reserve
(
nMatches
);
for
(
int
i
=
0
;
i
<
nMatches
;
++
i
,
++
trainIdx_ptr
,
++
distance_ptr
)
{
int
trainIdx
=
*
trainIdx_ptr
;
float
distance
=
*
distance_ptr
;
DMatch
m
(
queryIdx
,
trainIdx
,
0
,
distance
);
curMatches
.
push_back
(
m
);
}
sort
(
curMatches
.
begin
(),
curMatches
.
end
());
}
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
const
GpuMat
&
trainDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
GpuMat
&
mask
,
bool
compactResult
)
{
GpuMat
trainIdx
,
nMatches
,
distance
;
radiusMatch
(
queryDescs
,
trainDescs
,
trainIdx
,
nMatches
,
distance
,
maxDistance
,
mask
);
radiusMatchDownload
(
trainIdx
,
nMatches
,
distance
,
matches
,
compactResult
);
}
void
cv
::
gpu
::
BruteForceMatcher_GPU_base
::
radiusMatch
(
const
GpuMat
&
queryDescs
,
vector
<
vector
<
DMatch
>
>&
matches
,
float
maxDistance
,
const
vector
<
GpuMat
>&
masks
,
bool
compactResult
)
{
matches
.
resize
(
queryDescs
.
rows
);
vector
<
vector
<
DMatch
>
>
curMatches
;
for
(
size_t
imgIdx
=
0
;
imgIdx
<
trainDescCollection
.
size
();
++
imgIdx
)
{
radiusMatch
(
queryDescs
,
trainDescCollection
[
imgIdx
],
curMatches
,
maxDistance
,
masks
.
empty
()
?
GpuMat
()
:
masks
[
imgIdx
]);
for
(
int
queryIdx
=
0
;
queryIdx
<
queryDescs
.
rows
;
++
queryIdx
)
{
vector
<
DMatch
>&
localMatch
=
curMatches
[
queryIdx
];
vector
<
DMatch
>&
globalMatch
=
matches
[
queryIdx
];
for_each
(
localMatch
.
begin
(),
localMatch
.
end
(),
ImgIdxSetter
(
imgIdx
));
const
size_t
oldSize
=
globalMatch
.
size
();
copy
(
localMatch
.
begin
(),
localMatch
.
end
(),
back_inserter
(
globalMatch
));
inplace_merge
(
globalMatch
.
begin
(),
globalMatch
.
begin
()
+
oldSize
,
globalMatch
.
end
());
}
}
if
(
compactResult
)
{
vector
<
vector
<
DMatch
>
>::
iterator
new_end
=
remove_if
(
matches
.
begin
(),
matches
.
end
(),
mem_fun_ref
(
&
vector
<
DMatch
>::
empty
));
matches
.
erase
(
new_end
,
matches
.
end
());
}
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/cuda/brute_force_matcher.cu
0 → 100644
View file @
8891acb6
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "cuda_shared.hpp"
#include "limits_gpu.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace bfmatcher
{
///////////////////////////////////////////////////////////////////////////////////
////////////////////////////////// General funcs //////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
template <bool expr> struct StaticAssert;
template <> struct StaticAssert<true> {static __host__ __device__ void check(){}};
///////////////////////////////////////////////////////////////////////////////
// Mask strategy
class SingleMask
{
public:
explicit SingleMask(const PtrStep& mask_) : mask(mask_) {}
__device__ bool operator()(int queryIdx, int trainIdx) const
{
return mask.ptr(queryIdx)[trainIdx] != 0;
}
private:
PtrStep mask;
};
class MaskCollection
{
public:
explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {}
__device__ void nextMask()
{
curMask = *maskCollection++;
}
__device__ bool operator()(int queryIdx, int trainIdx) const
{
return curMask.data == 0 || curMask.ptr(queryIdx)[trainIdx] != 0;
}
private:
PtrStep* maskCollection;
PtrStep curMask;
};
class WithOutMask
{
public:
__device__ void nextMask()
{
}
__device__ bool operator()(int queryIdx, int trainIdx) const
{
return true;
}
};
///////////////////////////////////////////////////////////////////////////////
// Reduce Sum
template <int BLOCK_DIM_X>
__device__ void reduceSum(float* sdiff, float mySum, int tid)
{
sdiff[tid] = mySum;
__syncthreads();
if (BLOCK_DIM_X == 512)
{
if (tid < 256)
{
sdiff[tid] = mySum += sdiff[tid + 256]; __syncthreads();
sdiff[tid] = mySum += sdiff[tid + 128]; __syncthreads();
sdiff[tid] = mySum += sdiff[tid + 64]; __syncthreads();
}
volatile float* smem = sdiff;
smem[tid] = mySum += smem[tid + 32];
smem[tid] = mySum += smem[tid + 16];
smem[tid] = mySum += smem[tid + 8];
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
if (BLOCK_DIM_X == 256)
{
if (tid < 128)
{
sdiff[tid] = mySum += sdiff[tid + 128]; __syncthreads();
sdiff[tid] = mySum += sdiff[tid + 64]; __syncthreads();
}
volatile float* smem = sdiff;
smem[tid] = mySum += smem[tid + 32];
smem[tid] = mySum += smem[tid + 16];
smem[tid] = mySum += smem[tid + 8];
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
if (BLOCK_DIM_X == 128)
{
if (tid < 64)
{
sdiff[tid] = mySum += sdiff[tid + 64]; __syncthreads();
}
volatile float* smem = sdiff;
smem[tid] = mySum += smem[tid + 32];
smem[tid] = mySum += smem[tid + 16];
smem[tid] = mySum += smem[tid + 8];
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
volatile float* smem = sdiff;
if (BLOCK_DIM_X == 64)
{
if (tid < 32)
{
smem[tid] = mySum += smem[tid + 32];
smem[tid] = mySum += smem[tid + 16];
smem[tid] = mySum += smem[tid + 8];
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
}
if (BLOCK_DIM_X == 32)
{
if (tid < 16)
{
smem[tid] = mySum += smem[tid + 16];
smem[tid] = mySum += smem[tid + 8];
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
}
if (BLOCK_DIM_X == 16)
{
if (tid < 8)
{
smem[tid] = mySum += smem[tid + 8];
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
}
if (BLOCK_DIM_X == 8)
{
if (tid < 4)
{
smem[tid] = mySum += smem[tid + 4];
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
}
if (BLOCK_DIM_X == 4)
{
if (tid < 2)
{
smem[tid] = mySum += smem[tid + 2];
smem[tid] = mySum += smem[tid + 1];
}
}
if (BLOCK_DIM_X == 2)
{
if (tid < 1)
{
smem[tid] = mySum += smem[tid + 1];
}
}
}
///////////////////////////////////////////////////////////////////////////////
// loadDescsVals
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, typename T>
__device__ void loadDescsVals(const T* descs, int desc_len, float* smem, float* queryVals)
{
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
if (tid < desc_len)
{
smem[tid] = (float)descs[tid];
}
__syncthreads();
#pragma unroll
for (int i = threadIdx.x; i < MAX_DESCRIPTORS_LEN; i += BLOCK_DIM_X)
{
*queryVals = smem[i];
++queryVals;
}
}
///////////////////////////////////////////////////////////////////////////////
// Distance
template <int BLOCK_DIM_X>
class L1Dist
{
public:
__device__ L1Dist() : mySum(0) {}
__device__ void reduceIter(float val1, float val2)
{
mySum += fabs(val1 - val2);
}
__device__ void reduceAll(float* sdiff, int tid)
{
reduceSum<BLOCK_DIM_X>(sdiff, mySum, tid);
}
static __device__ float finalResult(float res)
{
return res;
}
private:
float mySum;
};
template <int BLOCK_DIM_X>
class L2Dist
{
public:
__device__ L2Dist() : mySum(0) {}
__device__ void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
__device__ void reduceAll(float* sdiff, int tid)
{
reduceSum<BLOCK_DIM_X>(sdiff, mySum, tid);
}
static __device__ float finalResult(float res)
{
return sqrtf(res);
}
private:
float mySum;
};
///////////////////////////////////////////////////////////////////////////////
// reduceDescDiff
template <int BLOCK_DIM_X, typename Dist, typename T>
__device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, float* sdiff)
{
const int tid = threadIdx.x;
Dist dist;
for (int i = tid; i < desc_len; i += BLOCK_DIM_X)
dist.reduceIter(queryDescs[i], trainDescs[i]);
dist.reduceAll(sdiff, tid);
}
///////////////////////////////////////////////////////////////////////////////
// reduceDescDiff_smem
template <int N> struct UnrollDescDiff
{
template <typename Dist, typename T>
static __device__ void calcCheck(Dist& dist, const float* queryVals, const T* trainDescs,
int ind, int desc_len)
{
if (ind < desc_len)
dist.reduceIter(*queryVals, trainDescs[ind]);
++queryVals;
UnrollDescDiff<N - 1>::calcCheck(dist, queryVals, trainDescs, ind + blockDim.x, desc_len);
}
template <typename Dist, typename T>
static __device__ void calcWithoutCheck(Dist& dist, const float* queryVals, const T* trainDescs)
{
dist.reduceIter(*queryVals, *trainDescs);
++queryVals;
trainDescs += blockDim.x;
UnrollDescDiff<N - 1>::calcWithoutCheck(dist, queryVals, trainDescs);
}
};
template <> struct UnrollDescDiff<0>
{
template <typename Dist, typename T>
static __device__ void calcCheck(Dist& dist, const float* queryVals, const T* trainDescs,
int ind, int desc_len)
{
}
template <typename Dist, typename T>
static __device__ void calcWithoutCheck(Dist& dist, const float* queryVals, const T* trainDescs)
{
}
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool WITH_OUT_CHECK> struct DescDiffCalculator;
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN>
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false>
{
template <typename Dist, typename T>
static __device__ void calc(Dist& dist, const float* queryVals, const T* trainDescs, int desc_len)
{
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(dist, queryVals, trainDescs,
threadIdx.x, desc_len);
}
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN>
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true>
{
template <typename Dist, typename T>
static __device__ void calc(Dist& dist, const float* queryVals, const T* trainDescs, int desc_len)
{
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(dist, queryVals,
trainDescs + threadIdx.x);
}
};
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T>
__device__ void reduceDescDiff_smem(const float* queryVals, const T* trainDescs, int desc_len, float* sdiff)
{
const int tid = threadIdx.x;
Dist dist;
DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(dist, queryVals,
trainDescs, desc_len);
dist.reduceAll(sdiff, tid);
}
///////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////// Match //////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// warpReduceMin
template <int BLOCK_DIM_Y>
__device__ void warpReduceMin(int tid, volatile float* sdata, volatile int* strainIdx, volatile int* simgIdx)
{
float minSum = sdata[tid];
if (BLOCK_DIM_Y >= 64)
{
float reg = sdata[tid + 32];
if (reg < minSum)
{
sdata[tid] = minSum = reg;
strainIdx[tid] = strainIdx[tid + 32];
simgIdx[tid] = simgIdx[tid + 32];
}
}
if (BLOCK_DIM_Y >= 32)
{
float reg = sdata[tid + 16];
if (reg < minSum)
{
sdata[tid] = minSum = reg;
strainIdx[tid] = strainIdx[tid + 16];
simgIdx[tid] = simgIdx[tid + 16];
}
}
if (BLOCK_DIM_Y >= 16)
{
float reg = sdata[tid + 8];
if (reg < minSum)
{
sdata[tid] = minSum = reg;
strainIdx[tid] = strainIdx[tid + 8];
simgIdx[tid] = simgIdx[tid + 8];
}
}
if (BLOCK_DIM_Y >= 8)
{
float reg = sdata[tid + 4];
if (reg < minSum)
{
sdata[tid] = minSum = reg;
strainIdx[tid] = strainIdx[tid + 4];
simgIdx[tid] = simgIdx[tid + 4];
}
}
if (BLOCK_DIM_Y >= 4)
{
float reg = sdata[tid + 2];
if (reg < minSum)
{
sdata[tid] = minSum = reg;
strainIdx[tid] = strainIdx[tid + 2];
simgIdx[tid] = simgIdx[tid + 2];
}
}
if (BLOCK_DIM_Y >= 2)
{
float reg = sdata[tid + 1];
if (reg < minSum)
{
sdata[tid] = minSum = reg;
strainIdx[tid] = strainIdx[tid + 1];
simgIdx[tid] = simgIdx[tid + 1];
}
}
}
///////////////////////////////////////////////////////////////////////////////
// findBestMatch
template <int BLOCK_DIM_Y, typename Dist>
__device__ void findBestMatch(int queryIdx, float myMin, int myBestTrainIdx, int myBestImgIdx,
float* smin, int* strainIdx, int* simgIdx, int* trainIdx, int* imgIdx, float* distance)
{
if (threadIdx.x == 0)
{
smin[threadIdx.y] = myMin;
strainIdx[threadIdx.y] = myBestTrainIdx;
simgIdx[threadIdx.y] = myBestImgIdx;
}
__syncthreads();
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
if (tid < 32)
warpReduceMin<BLOCK_DIM_Y>(tid, smin, strainIdx, simgIdx);
if (threadIdx.x == 0 && threadIdx.y == 0)
{
float minSum = smin[0];
int bestTrainIdx = strainIdx[0];
int bestImgIdx = simgIdx[0];
imgIdx[queryIdx] = bestImgIdx;
trainIdx[queryIdx] = bestTrainIdx;
distance[queryIdx] = Dist::finalResult(minSum);
}
}
///////////////////////////////////////////////////////////////////////////////
// ReduceDescCalculator
template <int BLOCK_DIM_X, typename Dist, typename T>
class ReduceDescCalculatorSimple
{
public:
__device__ void prepare(const T* queryDescs_, int, float*)
{
queryDescs = queryDescs_;
}
__device__ void calc(const T* trainDescs, int desc_len, float* sdiff_row) const
{
reduceDescDiff<BLOCK_DIM_X, Dist>(queryDescs, trainDescs, desc_len, sdiff_row);
}
private:
const T* queryDescs;
};
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN,
typename Dist, typename T>
class ReduceDescCalculatorSmem
{
public:
__device__ void prepare(const T* queryDescs, int desc_len, float* smem)
{
loadDescsVals<BLOCK_DIM_X, BLOCK_DIM_Y, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, smem, queryVals);
}
__device__ void calc(const T* trainDescs, int desc_len, float* sdiff_row) const
{
reduceDescDiff_smem<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, Dist>(queryVals, trainDescs,
desc_len, sdiff_row);
}
private:
float queryVals[MAX_DESCRIPTORS_LEN / BLOCK_DIM_X];
};
///////////////////////////////////////////////////////////////////////////////
// matchDescs loop
template <typename ReduceDescCalculator, typename T, typename Mask>
__device__ void matchDescs(int queryIdx, const int imgIdx, const DevMem2D_<T>& trainDescs_,
const Mask& m, const ReduceDescCalculator& reduceDescCalc,
float* sdiff_row, float& myMin, int& myBestTrainIdx, int& myBestImgIdx)
{
const T* trainDescs = trainDescs_.ptr(threadIdx.y);
const int trainDescsStep = blockDim.y * trainDescs_.step / sizeof(T);
for (int trainIdx = threadIdx.y; trainIdx < trainDescs_.rows;
trainIdx += blockDim.y, trainDescs += trainDescsStep)
{
if (m(queryIdx, trainIdx))
{
reduceDescCalc.calc(trainDescs, trainDescs_.cols, sdiff_row);
if (threadIdx.x == 0)
{
float reg = sdiff_row[0];
if (reg < myMin)
{
myMin = reg;
myBestTrainIdx = trainIdx;
myBestImgIdx = imgIdx;
}
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Train collection loop strategy
template <typename T>
class SingleTrain
{
public:
explicit SingleTrain(const DevMem2D_<T>& trainDescs_) : trainDescs(trainDescs_)
{
}
template <typename ReduceDescCalculator, typename Mask>
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
float* sdiff_row, float& myMin, int& myBestTrainIdx, int& myBestImgIdx) const
{
matchDescs(queryIdx, 0, trainDescs, m, reduceDescCalc,
sdiff_row, myMin, myBestTrainIdx, myBestImgIdx);
}
__device__ int desc_len() const
{
return trainDescs.cols;
}
private:
DevMem2D_<T> trainDescs;
};
template <typename T>
class TrainCollection
{
public:
TrainCollection(const DevMem2D_<T>* trainCollection_, int nImg_, int desclen_) :
trainCollection(trainCollection_), nImg(nImg_), desclen(desclen_)
{
}
template <typename ReduceDescCalculator, typename Mask>
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
float* sdiff_row, float& myMin, int& myBestTrainIdx, int& myBestImgIdx) const
{
for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)
{
DevMem2D_<T> trainDescs = trainCollection[imgIdx];
m.nextMask();
matchDescs(queryIdx, imgIdx, trainDescs, m, reduceDescCalc,
sdiff_row, myMin, myBestTrainIdx, myBestImgIdx);
}
}
__device__ int desc_len() const
{
return desclen;
}
private:
const DevMem2D_<T>* trainCollection;
int nImg;
int desclen;
};
///////////////////////////////////////////////////////////////////////////////
// Match kernel
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename ReduceDescCalculator, typename Dist, typename T,
typename Train, typename Mask>
__global__ void match(PtrStep_<T> queryDescs_, Train train, Mask mask, int* trainIdx, int* imgIdx, float* distance)
{
__shared__ float sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
__shared__ float smin[64];
__shared__ int strainIdx[64];
__shared__ int simgIdx[64];
const int queryIdx = blockIdx.x;
int myBestTrainIdx = -1;
int myBestImgIdx = -1;
float myMin = numeric_limits_gpu<float>::max();
{
float* sdiff_row = sdiff + BLOCK_DIM_X * threadIdx.y;
Mask m = mask;
ReduceDescCalculator reduceDescCalc;
reduceDescCalc.prepare(queryDescs_.ptr(queryIdx), train.desc_len(), sdiff);
train.loop(queryIdx, m, reduceDescCalc, sdiff_row, myMin, myBestTrainIdx, myBestImgIdx);
}
findBestMatch<BLOCK_DIM_Y, Dist>(queryIdx, myMin, myBestTrainIdx, myBestImgIdx,
smin, strainIdx, simgIdx, trainIdx, imgIdx, distance);
}
///////////////////////////////////////////////////////////////////////////////
// Match kernel callers
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, template <int> class Dist, typename T,
typename Train, typename Mask>
void match_caller(const DevMem2D_<T>& queryDescs, const Train& train,
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
StaticAssert<BLOCK_DIM_Y <= 64>::check(); // blockDimY vals must reduce by warp
dim3 grid(queryDescs.rows, 1, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorSimple<BLOCK_DIM_X, Dist<BLOCK_DIM_X>, T>,
Dist<BLOCK_DIM_X>, T><<<grid, threads>>>(queryDescs, train, mask, trainIdx.data,
imgIdx.data, distance.data);
cudaSafeCall( cudaThreadSynchronize() );
}
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN,
template <int> class Dist, typename T, typename Train, typename Mask>
void match_smem_caller(const DevMem2D_<T>& queryDescs, const Train& train,
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
StaticAssert<BLOCK_DIM_Y <= 64>::check(); // blockDimY vals must reduce by warp
StaticAssert<BLOCK_DIM_X * BLOCK_DIM_Y >= MAX_DESCRIPTORS_LEN>::check(); // block size must be greter than descriptors length
StaticAssert<MAX_DESCRIPTORS_LEN % BLOCK_DIM_X == 0>::check(); // max descriptors length must divide to blockDimX
dim3 grid(queryDescs.rows, 1, 1);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
match<BLOCK_DIM_X, BLOCK_DIM_Y, ReduceDescCalculatorSmem<BLOCK_DIM_X, BLOCK_DIM_Y,
MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN, Dist<BLOCK_DIM_X>, T>,
Dist<BLOCK_DIM_X>, T><<<grid, threads>>>(queryDescs, train, mask, trainIdx.data,
imgIdx.data, distance.data);
cudaSafeCall( cudaThreadSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// Match kernel chooser
template <template <int> class Dist, typename T, typename Train, typename Mask>
void match_chooser(const DevMem2D_<T>& queryDescs, const Train& train,
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
if (queryDescs.cols < 64)
match_smem_caller<16, 16, 64, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
else if (queryDescs.cols == 64)
match_smem_caller<16, 16, 64, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
else if (queryDescs.cols < 128)
match_smem_caller<16, 16, 128, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
else if (queryDescs.cols == 128)
match_smem_caller<16, 16, 128, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
else if (queryDescs.cols < 256)
match_smem_caller<16, 16, 256, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
else if (queryDescs.cols == 256)
match_smem_caller<16, 16, 256, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
else
match_caller<16, 16, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T>
void matchSingleL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
if (mask.data)
{
SingleMask m(mask);
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance);
}
else
{
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance);
}
}
template void matchSingleL1_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL1_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL1_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template <typename T>
void matchSingleL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
SingleTrain<T> train((DevMem2D_<T>)trainDescs);
if (mask.data)
{
SingleMask m(mask);
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance);
}
else
{
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance);
}
}
template void matchSingleL2_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL2_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL2_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchSingleL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template <typename T>
void matchCollectionL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
if (maskCollection.data)
{
MaskCollection mask(maskCollection.data);
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance);
}
else
{
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance);
}
}
template void matchCollectionL1_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL1_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL1_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template <typename T>
void matchCollectionL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainCollection,
const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance)
{
TrainCollection<T> train((DevMem2D_<T>*)trainCollection.ptr(), trainCollection.cols, queryDescs.cols);
if (maskCollection.data)
{
MaskCollection mask(maskCollection.data);
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance);
}
else
{
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance);
}
}
template void matchCollectionL2_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL2_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL2_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
template void matchCollectionL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainCollection, const DevMem2D_<PtrStep>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance);
///////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// Knn Match ////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Calc distance kernel
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
__global__ void calcDistance(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_, Mask mask, PtrStepf distance)
{
__shared__ float sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
float* sdiff_row = sdiff + BLOCK_DIM_X * threadIdx.y;
const int queryIdx = blockIdx.x;
const T* queryDescs = queryDescs_.ptr(queryIdx);
const int trainIdx = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (trainIdx < trainDescs_.rows)
{
const T* trainDescs = trainDescs_.ptr(trainIdx);
float dist = numeric_limits_gpu<float>::max();
if (mask(queryIdx, trainIdx))
{
reduceDescDiff<BLOCK_DIM_X, Dist>(queryDescs, trainDescs, trainDescs_.cols, sdiff_row);
if (threadIdx.x == 0)
{
dist = Dist::finalResult(sdiff_row[0]);
}
}
if (threadIdx.x == 0)
distance.ptr(queryIdx)[trainIdx] = dist;
}
}
///////////////////////////////////////////////////////////////////////////////
// Calc distance kernel caller
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, template <int> class Dist, typename T, typename Mask>
void calcDistance_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
const Mask& mask, const DevMem2Df& distance)
{
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
dim3 grid(queryDescs.rows, divUp(trainDescs.rows, BLOCK_DIM_Y), 1);
calcDistance<BLOCK_DIM_X, BLOCK_DIM_Y, Dist<BLOCK_DIM_X>, T><<<grid, threads>>>(
queryDescs, trainDescs, mask, distance);
cudaSafeCall( cudaThreadSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// reduceMin
template <int BLOCK_SIZE>
__device__ void warpReduceMinIdx(volatile float* sdist, volatile int* strainIdx, float& myMin, int tid)
{
if (tid < 32)
{
if (BLOCK_SIZE >= 64)
{
float reg = sdist[tid + 32];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 32];
}
}
if (BLOCK_SIZE >= 32)
{
float reg = sdist[tid + 16];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 16];
}
}
if (BLOCK_SIZE >= 16)
{
float reg = sdist[tid + 8];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 8];
}
}
if (BLOCK_SIZE >= 8)
{
float reg = sdist[tid + 4];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 4];
}
}
if (BLOCK_SIZE >= 4)
{
float reg = sdist[tid + 2];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 2];
}
}
if (BLOCK_SIZE >= 2)
{
float reg = sdist[tid + 1];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 1];
}
}
}
}
template <int BLOCK_SIZE>
__device__ void reduceMinIdx(const float* dist, int n, float* sdist, int* strainIdx)
{
const int tid = threadIdx.x;
float myMin = numeric_limits_gpu<float>::max();
int myMinIdx = -1;
for (int i = tid; i < n; i += BLOCK_SIZE)
{
float reg = dist[i];
if (reg < myMin)
{
myMin = reg;
myMinIdx = i;
}
}
sdist[tid] = myMin;
strainIdx[tid] = myMinIdx;
__syncthreads();
if (BLOCK_SIZE >= 512 && tid < 256)
{
float reg = sdist[tid + 256];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 256];
}
__syncthreads();
}
if (BLOCK_SIZE >= 256 && tid < 128)
{
float reg = sdist[tid + 128];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 128];
}
__syncthreads();
}
if (BLOCK_SIZE >= 128 && tid < 64)
{
float reg = sdist[tid + 64];
if (reg < myMin)
{
sdist[tid] = myMin = reg;
strainIdx[tid] = strainIdx[tid + 64];
}
__syncthreads();
}
warpReduceMinIdx<BLOCK_SIZE>(sdist, strainIdx, myMin, tid);
}
///////////////////////////////////////////////////////////////////////////////
// find knn match kernel
template <int BLOCK_SIZE>
__global__ void findBestMatch(DevMem2Df allDist_, int i, PtrStepi trainIdx_, PtrStepf distance_)
{
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
__shared__ float sdist[SMEM_SIZE];
__shared__ int strainIdx[SMEM_SIZE];
const int queryIdx = blockIdx.x;
float* allDist = allDist_.ptr(queryIdx);
int* trainIdx = trainIdx_.ptr(queryIdx);
float* distance = distance_.ptr(queryIdx);
reduceMinIdx<BLOCK_SIZE>(allDist, allDist_.cols, sdist, strainIdx);
if (threadIdx.x == 0)
{
float dist = sdist[0];
if (dist < numeric_limits_gpu<float>::max())
{
int bestIdx = strainIdx[0];
allDist[bestIdx] = numeric_limits_gpu<float>::max();
trainIdx[i] = bestIdx;
distance[i] = dist;
}
}
}
///////////////////////////////////////////////////////////////////////////////
// find knn match kernel caller
template <int BLOCK_SIZE>
void findKnnMatch_caller(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
{
dim3 threads(BLOCK_SIZE, 1, 1);
dim3 grid(trainIdx.rows, 1, 1);
for (int i = 0; i < knn; ++i)
findBestMatch<BLOCK_SIZE><<<grid, threads>>>(allDist, i, trainIdx, distance);
cudaSafeCall( cudaThreadSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// knn match caller
template <typename T>
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
{
if (mask.data)
{
calcDistance_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist);
}
else
{
calcDistance_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist);
}
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
}
template void knnMatchL1_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL1_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL1_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template <typename T>
void knnMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
{
if (mask.data)
{
calcDistance_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
SingleMask(mask), allDist);
}
else
{
calcDistance_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
WithOutMask(), allDist);
}
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
}
template void knnMatchL2_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
template void knnMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////// Radius Match //////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Radius Match kernel
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
__global__ void radiusMatch(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_,
float maxDistance, Mask mask, DevMem2Di trainIdx_, unsigned int* nMatches, PtrStepf distance)
{
__shared__ float sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
float* sdiff_row = sdiff + BLOCK_DIM_X * threadIdx.y;
const int queryIdx = blockIdx.x;
const T* queryDescs = queryDescs_.ptr(queryIdx);
const int trainIdx = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (trainIdx < trainDescs_.rows)
{
const T* trainDescs = trainDescs_.ptr(trainIdx);
if (mask(queryIdx, trainIdx))
{
reduceDescDiff<BLOCK_DIM_X, Dist>(queryDescs, trainDescs, trainDescs_.cols, sdiff_row);
if (threadIdx.x == 0)
{
float dist = Dist::finalResult(sdiff_row[0]);
if (dist < maxDistance)
{
unsigned int i = atomicInc(nMatches + queryIdx, (unsigned int) -1);
if (i < trainIdx_.cols)
{
distance.ptr(queryIdx)[i] = dist;
trainIdx_.ptr(queryIdx)[i] = trainIdx;
}
}
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Radius Match kernel caller
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, template <int> class Dist, typename T, typename Mask>
void radiusMatch_caller(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
const DevMem2Df& distance)
{
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y, 1);
dim3 grid(queryDescs.rows, divUp(trainDescs.rows, BLOCK_DIM_Y), 1);
radiusMatch<BLOCK_DIM_X, BLOCK_DIM_Y, Dist<BLOCK_DIM_X>, T><<<grid, threads>>>(
queryDescs, trainDescs, maxDistance, mask, trainIdx, nMatches, distance);
cudaSafeCall( cudaThreadSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// Radius Match kernel chooser
template <typename T>
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance)
{
if (mask.data)
{
radiusMatch_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
}
else
{
radiusMatch_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
}
}
template void radiusMatchL1_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL1_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL1_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL1_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL1_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL1_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template <typename T>
void radiusMatchL2_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance)
{
if (mask.data)
{
radiusMatch_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
}
else
{
radiusMatch_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
}
}
template void radiusMatchL2_gpu<unsigned char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<char >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<unsigned short>(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<short >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<int >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
template void radiusMatchL2_gpu<float >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance, const DevMem2D& mask, const DevMem2Di& trainIdx, unsigned int* nMatches, const DevMem2Df& distance);
}}}
tests/gpu/src/brute_force_matcher.cpp
0 → 100644
View file @
8891acb6
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "gputest.hpp"
using
namespace
cv
;
using
namespace
cv
::
gpu
;
using
namespace
std
;
class
CV_GpuBruteForceMatcherTest
:
public
CvTest
{
public
:
CV_GpuBruteForceMatcherTest
()
:
CvTest
(
"GPU-BruteForceMatcher"
,
"BruteForceMatcher"
)
{}
protected
:
void
run
(
int
)
{
try
{
BruteForceMatcher
<
L2
<
float
>
>
matcherCPU
;
BruteForceMatcher_GPU
<
L2
<
float
>
>
matcherGPU
;
vector
<
DMatch
>
matchesCPU
,
matchesGPU
;
vector
<
vector
<
DMatch
>
>
knnMatchesCPU
,
knnMatchesGPU
;
vector
<
vector
<
DMatch
>
>
radiusMatchesCPU
,
radiusMatchesGPU
;
RNG
rng
(
*
ts
->
get_rng
());
const
int
desc_len
=
rng
.
uniform
(
40
,
300
);
Mat
queryCPU
(
rng
.
uniform
(
100
,
300
),
desc_len
,
CV_32F
);
rng
.
fill
(
queryCPU
,
cv
::
RNG
::
UNIFORM
,
cv
::
Scalar
::
all
(
0.0
),
cv
::
Scalar
::
all
(
1.0
));
GpuMat
queryGPU
(
queryCPU
);
const
int
nTrains
=
rng
.
uniform
(
1
,
5
);
vector
<
Mat
>
trainsCPU
(
nTrains
);
vector
<
GpuMat
>
trainsGPU
(
nTrains
);
vector
<
Mat
>
masksCPU
(
nTrains
);
vector
<
GpuMat
>
masksGPU
(
nTrains
);
for
(
int
i
=
0
;
i
<
nTrains
;
++
i
)
{
Mat
train
(
rng
.
uniform
(
100
,
300
),
desc_len
,
CV_32F
);
rng
.
fill
(
train
,
cv
::
RNG
::
UNIFORM
,
cv
::
Scalar
::
all
(
0.0
),
cv
::
Scalar
::
all
(
1.0
));
trainsCPU
[
i
]
=
train
;
trainsGPU
[
i
].
upload
(
train
);
bool
with_mask
=
rng
.
uniform
(
0
,
10
)
<
5
;
if
(
with_mask
)
{
Mat
mask
(
queryCPU
.
rows
,
train
.
rows
,
CV_8U
,
Scalar
::
all
(
1
));
rng
.
fill
(
mask
,
cv
::
RNG
::
UNIFORM
,
cv
::
Scalar
::
all
(
0
),
cv
::
Scalar
::
all
(
200
));
masksCPU
[
i
]
=
mask
;
masksGPU
[
i
].
upload
(
mask
);
}
}
matcherCPU
.
add
(
trainsCPU
);
matcherGPU
.
add
(
trainsGPU
);
matcherCPU
.
match
(
queryCPU
,
matchesCPU
,
masksCPU
);
matcherGPU
.
match
(
queryGPU
,
matchesGPU
,
masksGPU
);
if
(
!
compareMatches
(
matchesCPU
,
matchesGPU
))
{
ts
->
set_failed_test_info
(
CvTS
::
FAIL_MISMATCH
);
return
;
}
const
int
knn
=
rng
.
uniform
(
3
,
10
);
matcherCPU
.
knnMatch
(
queryCPU
,
knnMatchesCPU
,
knn
,
masksCPU
);
matcherGPU
.
knnMatch
(
queryGPU
,
knnMatchesGPU
,
knn
,
masksGPU
);
if
(
!
compareMatches
(
knnMatchesCPU
,
knnMatchesGPU
))
{
ts
->
set_failed_test_info
(
CvTS
::
FAIL_MISMATCH
);
return
;
}
const
float
maxDistance
=
rng
.
uniform
(
0.01
f
,
0.3
f
);
matcherCPU
.
radiusMatch
(
queryCPU
,
radiusMatchesCPU
,
maxDistance
,
masksCPU
);
matcherGPU
.
radiusMatch
(
queryGPU
,
radiusMatchesGPU
,
maxDistance
,
masksGPU
);
if
(
!
compareMatches
(
radiusMatchesCPU
,
radiusMatchesGPU
))
{
ts
->
set_failed_test_info
(
CvTS
::
FAIL_MISMATCH
);
return
;
}
}
catch
(
const
cv
::
Exception
&
e
)
{
if
(
!
check_and_treat_gpu_exception
(
e
,
ts
))
throw
;
return
;
}
ts
->
set_failed_test_info
(
CvTS
::
OK
);
}
private
:
static
void
convertMatches
(
const
vector
<
vector
<
DMatch
>
>&
knnMatches
,
vector
<
DMatch
>&
matches
)
{
matches
.
clear
();
for
(
size_t
i
=
0
;
i
<
knnMatches
.
size
();
++
i
)
copy
(
knnMatches
[
i
].
begin
(),
knnMatches
[
i
].
end
(),
back_inserter
(
matches
));
}
static
bool
compareMatches
(
const
vector
<
DMatch
>&
matches1
,
const
vector
<
DMatch
>&
matches2
)
{
if
(
matches1
.
size
()
!=
matches2
.
size
())
return
false
;
struct
DMatchEqual
:
public
binary_function
<
DMatch
,
DMatch
,
bool
>
{
bool
operator
()(
const
DMatch
&
m1
,
const
DMatch
&
m2
)
{
return
m1
.
imgIdx
==
m2
.
imgIdx
&&
m1
.
queryIdx
==
m2
.
queryIdx
&&
m1
.
trainIdx
==
m2
.
trainIdx
;
}
};
return
equal
(
matches1
.
begin
(),
matches1
.
end
(),
matches2
.
begin
(),
DMatchEqual
());
}
static
bool
compareMatches
(
const
vector
<
vector
<
DMatch
>
>&
matches1
,
const
vector
<
vector
<
DMatch
>
>&
matches2
)
{
vector
<
DMatch
>
m1
,
m2
;
convertMatches
(
matches1
,
m1
);
convertMatches
(
matches2
,
m2
);
return
compareMatches
(
m1
,
m2
);
}
}
brute_force_matcher_test
;
\ No newline at end of file
tests/gpu/src/gputest.hpp
View file @
8891acb6
...
...
@@ -50,7 +50,8 @@
#include <opencv2/gpu/gpu.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/features2d/features2d.hpp>
#include "cxts.h"
/****************************************************************************************/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment