Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8d5e9522
Commit
8d5e9522
authored
Feb 03, 2014
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
very preliminary port of SURF to T-API (compiles but certainly does not work)
parent
652a0bd5
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
464 additions
and
653 deletions
+464
-653
features2d.hpp
modules/features2d/include/opencv2/features2d.hpp
+0
-0
features2d.hpp
modules/nonfree/include/opencv2/nonfree/features2d.hpp
+0
-1
ocl.hpp
modules/nonfree/include/opencv2/nonfree/ocl.hpp
+0
-126
surf.cl
modules/nonfree/src/opencl/surf.cl
+35
-31
precomp.hpp
modules/nonfree/src/precomp.hpp
+0
-5
surf.cpp
modules/nonfree/src/surf.cpp
+35
-3
surf.hpp
modules/nonfree/src/surf.hpp
+123
-0
surf.ocl.cpp
modules/nonfree/src/surf.ocl.cpp
+271
-487
No files found.
modules/features2d/include/opencv2/features2d.hpp
View file @
8d5e9522
modules/nonfree/include/opencv2/nonfree/features2d.hpp
View file @
8d5e9522
...
...
@@ -142,7 +142,6 @@ public:
CV_PROP_RW
bool
upright
;
protected
:
void
detectImpl
(
InputArray
image
,
std
::
vector
<
KeyPoint
>&
keypoints
,
InputArray
mask
=
noArray
()
)
const
;
void
computeImpl
(
const
Mat
&
image
,
std
::
vector
<
KeyPoint
>&
keypoints
,
Mat
&
descriptors
)
const
;
};
...
...
modules/nonfree/include/opencv2/nonfree/ocl.hpp
deleted
100644 → 0
View file @
652a0bd5
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_NONFREE_OCL_HPP__
#define __OPENCV_NONFREE_OCL_HPP__
#include "opencv2/ocl.hpp"
namespace
cv
{
namespace
ocl
{
//! Speeded up robust features, port from CUDA module.
////////////////////////////////// SURF //////////////////////////////////////////
class
CV_EXPORTS
SURF_OCL
{
public
:
enum
KeypointLayout
{
X_ROW
=
0
,
Y_ROW
,
LAPLACIAN_ROW
,
OCTAVE_ROW
,
SIZE_ROW
,
ANGLE_ROW
,
HESSIAN_ROW
,
ROWS_COUNT
};
//! the default constructor
SURF_OCL
();
//! the full constructor taking all the necessary parameters
explicit
SURF_OCL
(
double
_hessianThreshold
,
int
_nOctaves
=
4
,
int
_nOctaveLayers
=
2
,
bool
_extended
=
false
,
float
_keypointsRatio
=
0.01
f
,
bool
_upright
=
false
);
//! returns the descriptor size in float's (64 or 128)
int
descriptorSize
()
const
;
//! returns the default norm type
int
defaultNorm
()
const
;
//! upload host keypoints to device memory
void
uploadKeypoints
(
const
std
::
vector
<
cv
::
KeyPoint
>
&
keypoints
,
oclMat
&
keypointsocl
);
//! download keypoints from device to host memory
void
downloadKeypoints
(
const
oclMat
&
keypointsocl
,
std
::
vector
<
KeyPoint
>
&
keypoints
);
//! download descriptors from device to host memory
void
downloadDescriptors
(
const
oclMat
&
descriptorsocl
,
std
::
vector
<
float
>
&
descriptors
);
//! finds the keypoints using fast hessian detector used in SURF
//! supports CV_8UC1 images
//! keypoints will have nFeature cols and 6 rows
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
void
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
oclMat
&
keypoints
);
//! finds the keypoints and computes their descriptors.
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
void
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
oclMat
&
keypoints
,
oclMat
&
descriptors
,
bool
useProvidedKeypoints
=
false
);
void
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
std
::
vector
<
KeyPoint
>
&
keypoints
);
void
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
std
::
vector
<
KeyPoint
>
&
keypoints
,
oclMat
&
descriptors
,
bool
useProvidedKeypoints
=
false
);
void
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
std
::
vector
<
KeyPoint
>
&
keypoints
,
std
::
vector
<
float
>
&
descriptors
,
bool
useProvidedKeypoints
=
false
);
void
releaseMemory
();
// SURF parameters
float
hessianThreshold
;
int
nOctaves
;
int
nOctaveLayers
;
bool
extended
;
bool
upright
;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
float
keypointsRatio
;
oclMat
sum
,
mask1
,
maskSum
,
intBuffer
;
oclMat
det
,
trace
;
oclMat
maxPosBuffer
;
};
}
}
#endif //__OPENCV_NONFREE_OCL_HPP__
modules/nonfree/src/opencl/surf.cl
View file @
8d5e9522
...
...
@@ -45,6 +45,12 @@
//
//M*/
//
The
number
of
degrees
between
orientation
samples
in
calcOrientation
#
define
ORI_SEARCH_INC
5
//
The
local
size
of
the
calcOrientation
kernel
#
define
ORI_LOCAL_SIZE
(
360
/
ORI_SEARCH_INC
)
//
specialized
for
non-image2d_t
supported
platform,
intel
HD4000,
for
example
#
ifdef
DISABLE_IMAGE2D
#
define
IMAGE_INT32
__global
uint
*
...
...
@@ -175,7 +181,7 @@ F calcAxisAlignedDerivative(
}
//calculate
targeted
layer
per-pixel
determinant
and
trace
with
an
integral
image
__kernel
void
icvC
alcLayerDetAndTrace
(
__kernel
void
SURF_c
alcLayerDetAndTrace
(
IMAGE_INT32
sumTex,
//
input
integral
image
__global
float
*
det,
//
output
Determinant
__global
float
*
trace,
//
output
trace
...
...
@@ -338,7 +344,7 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
//
Non-maximal
suppression
to
further
filtering
the
candidates
from
previous
step
__kernel
void
icvFindMaximaInLayer_withm
ask
(
void
SURF_findMaximaInLayerWithM
ask
(
__global
const
float
*
det,
__global
const
float
*
trace,
__global
int4
*
maxPosBuffer,
...
...
@@ -466,7 +472,7 @@ void icvFindMaximaInLayer_withmask(
}
__kernel
void
icvF
indMaximaInLayer
(
void
SURF_f
indMaximaInLayer
(
__global
float
*
det,
__global
float
*
trace,
__global
int4
*
maxPosBuffer,
...
...
@@ -624,7 +630,7 @@ inline bool solve3x3_float(const float4 *A, const float *b, float *x)
////////////////////////////////////////////////////////////////////////
//
INTERPOLATION
__kernel
void
icvI
nterpolateKeypoint
(
void
SURF_i
nterpolateKeypoint
(
__global
const
float
*
det,
__global
const
int4
*
maxPosBuffer,
__global
float
*
keypoints,
...
...
@@ -829,7 +835,7 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc
}
__kernel
void
icvC
alcOrientation
(
void
SURF_c
alcOrientation
(
IMAGE_INT32
sumTex,
__global
float
*
keypoints,
int
keypoints_step,
...
...
@@ -995,18 +1001,17 @@ void icvCalcOrientation(
}
__kernel
void
icvS
etUpright(
void
SURF_s
etUpright(
__global float * keypoints,
int keypoints_step,
int nFeatures
)
int keypoints_step, int keypoints_offset,
int rows, int cols )
{
int i = get_global_id(0);
keypoints_step /= sizeof(*keypoints);
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
if(
get_global_id(0) <= nFeature
s)
if(
i < col
s)
{
featureDir[get_global_id(0)] = 270.0
f;
keypoints[mad24(keypoints_step, ANGLE_ROW, i)] = 270.
f;
}
}
...
...
@@ -1162,6 +1167,7 @@ void calc_dx_dy(
s_dy_bin[tid] = vy;
}
}
void reduce_sum25(
volatile __local float* sdata1,
volatile __local float* sdata2,
...
...
@@ -1225,16 +1231,14 @@ void reduce_sum25(
}
__kernel
void
compute_d
escriptors64(
void
SURF_computeD
escriptors64(
IMAGE_INT8 imgTex,
int img_step, int img_offset,
int rows, int cols,
__global const float* keypoints,
int keypoints_step, int keypoints_offset,
__global float * descriptors,
__global const float * keypoints,
int descriptors_step,
int keypoints_step,
int rows,
int cols,
int img_step
)
int descriptors_step, int descriptors_offset)
{
descriptors_step /= sizeof(float);
keypoints_step /= sizeof(float);
...
...
@@ -1279,17 +1283,16 @@ void compute_descriptors64(
}
}
}
__kernel
void
compute_d
escriptors128(
void
SURF_computeD
escriptors128(
IMAGE_INT8 imgTex,
__global float * descriptors,
__global float * keypoints,
int descriptors_step,
int keypoints_step,
int rows,
int cols,
int img_step
)
int img_step, int img_offset,
int rows, int cols,
__global const float* keypoints,
int keypoints_step, int keypoints_offset,
__global float* descriptors,
int descriptors_step, int descriptors_offset)
{
descriptors_step /= sizeof(*descriptors);
keypoints_step /= sizeof(*keypoints);
...
...
@@ -1483,7 +1486,7 @@ void reduce_sum64(volatile __local float* smem, int tid)
}
__kernel
void
normalize_d
escriptors128
(
__global
float
*
descriptors,
int
descriptors_step
)
void
SURF_normalizeD
escriptors128
(
__global
float
*
descriptors,
int
descriptors_step
)
{
descriptors_step
/=
sizeof
(
*descriptors
)
;
//
no
need
for
thread
ID
...
...
@@ -1509,8 +1512,9 @@ void normalize_descriptors128(__global float * descriptors, int descriptors_step
//
normalize
and
store
in
output
descriptor_base[get_local_id
(
0
)
]
=
lookup
/
len
;
}
__kernel
void
normalize_d
escriptors64
(
__global
float
*
descriptors,
int
descriptors_step
)
void
SURF_normalizeD
escriptors64
(
__global
float
*
descriptors,
int
descriptors_step
)
{
descriptors_step
/=
sizeof
(
*descriptors
)
;
//
no
need
for
thread
ID
...
...
modules/nonfree/src/precomp.hpp
View file @
8d5e9522
...
...
@@ -60,11 +60,6 @@
# include "opencv2/cudaarithm.hpp"
#endif
#ifdef HAVE_OPENCV_OCL
# include "opencv2/nonfree/ocl.hpp"
# include "opencv2/ocl/private/util.hpp"
#endif
#include "opencv2/core/private.hpp"
#endif
modules/nonfree/src/surf.cpp
View file @
8d5e9522
...
...
@@ -108,6 +108,7 @@ Modifications by Ian Mahon
*/
#include "precomp.hpp"
#include "surf.hpp"
namespace
cv
{
...
...
@@ -897,11 +898,42 @@ void SURF::operator()(InputArray _img, InputArray _mask,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
)
const
{
Mat
img
=
_img
.
getMat
(),
mask
=
_mask
.
getMat
(),
mask1
,
sum
,
msum
;
int
imgtype
=
_img
.
type
(),
imgcn
=
CV_MAT_CN
(
imgtype
)
;
bool
doDescriptors
=
_descriptors
.
needed
();
CV_Assert
(
!
img
.
empty
()
&&
img
.
depth
()
==
CV_8U
);
if
(
img
.
channels
()
>
1
)
CV_Assert
(
!
_img
.
empty
()
&&
CV_MAT_DEPTH
(
imgtype
)
==
CV_8U
&&
(
imgcn
==
1
||
imgcn
==
3
||
imgcn
==
4
));
CV_Assert
(
_descriptors
.
needed
()
&&
!
useProvidedKeypoints
);
if
(
ocl
::
useOpenCL
()
)
{
SURF_OCL
ocl_surf
;
UMat
gpu_kpt
;
bool
ok
=
ocl_surf
.
init
(
this
);
if
(
ok
)
{
if
(
!
_descriptors
.
needed
()
)
{
ok
=
ocl_surf
.
detect
(
_img
,
_mask
,
gpu_kpt
);
}
else
{
if
(
useProvidedKeypoints
)
ocl_surf
.
uploadKeypoints
(
keypoints
,
gpu_kpt
);
ok
=
ocl_surf
.
detectAndCompute
(
_img
,
_mask
,
gpu_kpt
,
_descriptors
,
useProvidedKeypoints
);
}
}
if
(
ok
)
{
if
(
!
useProvidedKeypoints
)
ocl_surf
.
downloadKeypoints
(
gpu_kpt
,
keypoints
);
return
;
}
}
Mat
img
=
_img
.
getMat
(),
mask
=
_mask
.
getMat
(),
mask1
,
sum
,
msum
;
if
(
imgcn
>
1
)
cvtColor
(
img
,
img
,
COLOR_BGR2GRAY
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8U
&&
mask
.
size
()
==
img
.
size
()));
...
...
modules/nonfree/src/surf.hpp
0 → 100644
View file @
8d5e9522
///////////// see LICENSE.txt in the OpenCV root directory //////////////
#ifndef __OPENCV_NONFREE_SURF_HPP__
#define __OPENCV_NONFREE_SURF_HPP__
namespace
cv
{
//! Speeded up robust features, port from CUDA module.
////////////////////////////////// SURF //////////////////////////////////////////
class
SURF_OCL
{
public
:
enum
KeypointLayout
{
X_ROW
=
0
,
Y_ROW
,
LAPLACIAN_ROW
,
OCTAVE_ROW
,
SIZE_ROW
,
ANGLE_ROW
,
HESSIAN_ROW
,
ROWS_COUNT
};
//! the full constructor taking all the necessary parameters
SURF_OCL
();
bool
init
(
const
SURF
*
params
);
//! returns the descriptor size in float's (64 or 128)
int
descriptorSize
()
const
{
return
params
->
extended
?
128
:
64
;
}
void
uploadKeypoints
(
const
std
::
vector
<
KeyPoint
>
&
keypoints
,
UMat
&
keypointsGPU
);
void
downloadKeypoints
(
const
UMat
&
keypointsGPU
,
std
::
vector
<
KeyPoint
>
&
keypoints
);
//! finds the keypoints using fast hessian detector used in SURF
//! supports CV_8UC1 images
//! keypoints will have nFeature cols and 6 rows
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
bool
detect
(
InputArray
img
,
InputArray
mask
,
UMat
&
keypoints
);
//! finds the keypoints and computes their descriptors.
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
bool
detectAndCompute
(
InputArray
img
,
InputArray
mask
,
UMat
&
keypoints
,
OutputArray
descriptors
,
bool
useProvidedKeypoints
=
false
);
protected
:
bool
setImage
(
InputArray
img
,
InputArray
mask
);
// kernel callers declarations
bool
calcLayerDetAndTrace
(
UMat
&
det
,
UMat
&
trace
,
int
octave
,
int
layer_rows
);
bool
findMaximaInLayer
(
const
UMat
&
det
,
const
UMat
&
trace
,
UMat
&
maxPosBuffer
,
UMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
int
layer_rows
,
int
layer_cols
);
bool
interpolateKeypoint
(
const
UMat
&
det
,
const
UMat
&
maxPosBuffer
,
int
maxCounter
,
UMat
&
keypoints
,
UMat
&
counters
,
int
octave
,
int
layer_rows
,
int
maxFeatures
);
bool
calcOrientation
(
UMat
&
keypoints
);
bool
setUpRight
(
UMat
&
keypoints
);
bool
computeDescriptors
(
const
UMat
&
keypoints
,
OutputArray
descriptors
);
bool
detectKeypoints
(
UMat
&
keypoints
);
const
SURF
*
params
;
int
refcount
;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
UMat
sum
,
mask1
,
maskSum
,
intBuffer
;
UMat
det
,
trace
;
UMat
maxPosBuffer
;
int
img_cols
,
img_rows
;
int
maxCandidates
;
int
maxFeatures
;
UMat
img
,
counters
;
// texture buffers
ocl
::
Image2D
imgTex
,
sumTex
,
maskSumTex
;
bool
haveImageSupport
;
int
status
;
ocl
::
Kernel
kerCalcDetTrace
,
kerFindMaxima
,
kerFindMaximaMask
,
kerInterp
;
ocl
::
Kernel
kerUpRight
,
kerOri
,
kerCalcDesc64
,
kerCalcDesc128
,
kerNormDesc64
,
kerNormDesc128
;
};
/*
template<typename _Tp> void copyVectorToUMat(const std::vector<_Tp>& v, UMat& um)
{
if(v.empty())
um.release();
else
Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um);
}
template<typename _Tp> void copyUMatToVector(const UMat& um, std::vector<_Tp>& v)
{
if(um.empty())
v.clear();
else
{
size_t sz = um.total()*um.elemSize();
CV_Assert(um.isContinuous() && (sz % sizeof(_Tp) == 0));
v.resize(sz/sizeof(_Tp));
Mat m(um.size(), um.type(), &v[0]);
um.copyTo(m);
}
}*/
}
#endif
modules/nonfree/src/surf.ocl.cpp
View file @
8d5e9522
...
...
@@ -43,29 +43,20 @@
//
//M*/
#include "precomp.hpp"
#include "surf.hpp"
#ifdef HAVE_OPENCV_OCL
#include <cstdio>
#include <sstream>
#include "opencl_kernels.hpp"
using
namespace
cv
;
using
namespace
cv
::
ocl
;
static
ProgramEntry
surfprog
=
cv
::
ocl
::
nonfree
::
surf
;
namespace
cv
{
namespace
ocl
{
// The number of degrees between orientation samples in calcOrientation
const
static
int
ORI_SEARCH_INC
=
5
;
// The local size of the calcOrientation kernel
const
static
int
ORI_LOCAL_SIZE
=
(
360
/
ORI_SEARCH_INC
);
static
void
openCLExecuteKernelSURF
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
String
kernelName
,
size_t
globalThreads
[
3
],
enum
{
ORI_SEARCH_INC
=
5
,
ORI_LOCAL_SIZE
=
(
360
/
ORI_SEARCH_INC
)
};
/*static void openCLExecuteKernelSURF(Context2 *clCxt, const ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
{
std::stringstream optsStr;
optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
...
...
@@ -75,10 +66,7 @@ namespace cv
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
optsStr << "-D WAVE_SIZE=" << wave_size;
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
}
}
}
}*/
static
inline
int
calcSize
(
int
octave
,
int
layer
)
{
...
...
@@ -96,223 +84,220 @@ static inline int calcSize(int octave, int layer)
}
class
SURF_OCL_Invoker
SURF_OCL
::
SURF_OCL
()
{
public
:
// facilities
void
bindImgTex
(
const
oclMat
&
img
,
cl_mem
&
texture
);
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
// kernel callers declarations
void
icvCalcLayerDetAndTrace_gpu
(
oclMat
&
det
,
oclMat
&
trace
,
int
octave
,
int
nOctaveLayers
,
int
layer_rows
);
void
icvFindMaximaInLayer_gpu
(
const
oclMat
&
det
,
const
oclMat
&
trace
,
oclMat
&
maxPosBuffer
,
oclMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
bool
use_mask
,
int
nLayers
,
int
layer_rows
,
int
layer_cols
);
void
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
int
maxCounter
,
oclMat
&
keypoints
,
oclMat
&
counters
,
int
octave
,
int
layer_rows
,
int
maxFeatures
);
void
icvCalcOrientation_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
);
void
icvSetUpright_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
);
void
compute_descriptors_gpu
(
const
oclMat
&
descriptors
,
const
oclMat
&
keypoints
,
int
nFeatures
);
// end of kernel callers declarations
img_cols
=
img_rows
=
maxCandidates
=
maxFeatures
=
0
;
haveImageSupport
=
false
;
status
=
-
1
;
}
SURF_OCL_Invoker
(
SURF_OCL
&
surf
,
const
oclMat
&
img
,
const
oclMat
&
mask
)
:
surf_
(
surf
),
img_cols
(
img
.
cols
),
img_rows
(
img
.
rows
),
use_mask
(
!
mask
.
empty
()),
counters
(
oclMat
()),
imgTex
(
NULL
),
sumTex
(
NULL
),
maskSumTex
(
NULL
),
_img
(
img
)
{
CV_Assert
(
!
img
.
empty
()
&&
img
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
size
()
==
img
.
size
()
&&
mask
.
type
()
==
CV_8UC1
));
CV_Assert
(
surf_
.
nOctaves
>
0
&&
surf_
.
nOctaveLayers
>
0
);
bool
SURF_OCL
::
init
(
const
SURF
*
p
)
{
params
=
p
;
if
(
status
<
0
)
{
status
=
0
;
if
(
ocl
::
haveOpenCL
())
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
if
(
dev
.
type
()
==
ocl
::
Device
::
TYPE_CPU
)
return
false
;
haveImageSupport
=
dev
.
imageSupport
();
String
opts
=
haveImageSupport
?
"-D DISABLE_IMAGE2D"
:
""
;
if
(
kerCalcDetTrace
.
create
(
"SURF_calcLayerDetAndTrace"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerFindMaxima
.
create
(
"SURF_findMaximaInLayer"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerFindMaximaMask
.
create
(
"SURF_findMaximaInLayerWithMask"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerInterp
.
create
(
"SURF_interpolateKeypoint"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerUpRight
.
create
(
"SURF_setUpRight"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerOri
.
create
(
"SURF_calcOrientation"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerCalcDesc64
.
create
(
"SURF_computeDescriptors64"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerCalcDesc128
.
create
(
"SURF_computeDescriptors128"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerNormDesc64
.
create
(
"SURF_normalizeDescriptors64"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerNormDesc128
.
create
(
"SURF_normalizeDescriptors128"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
))
status
=
1
;
}
}
return
status
>
0
;
}
const
int
min_size
=
calcSize
(
surf_
.
nOctaves
-
1
,
0
);
CV_Assert
(
img_rows
-
min_size
>=
0
);
CV_Assert
(
img_cols
-
min_size
>=
0
);
const
int
layer_rows
=
img_rows
>>
(
surf_
.
nOctaves
-
1
);
const
int
layer_cols
=
img_cols
>>
(
surf_
.
nOctaves
-
1
);
const
int
min_margin
=
((
calcSize
((
surf_
.
nOctaves
-
1
),
2
)
>>
1
)
>>
(
surf_
.
nOctaves
-
1
))
+
1
;
bool
SURF_OCL
::
setImage
(
InputArray
_img
,
InputArray
_mask
)
{
if
(
status
<=
0
)
return
false
;
CV_Assert
(
!
_img
.
empty
()
&&
_img
.
type
()
==
CV_8UC1
);
CV_Assert
(
_mask
.
empty
()
||
(
_mask
.
size
()
==
_img
.
size
()
&&
_mask
.
type
()
==
CV_8UC1
));
CV_Assert
(
params
&&
params
->
nOctaves
>
0
&&
params
->
nOctaveLayers
>
0
);
int
min_size
=
calcSize
(
params
->
nOctaves
-
1
,
0
);
Size
sz
=
_img
.
size
();
img_cols
=
sz
.
width
;
img_rows
=
sz
.
height
;
CV_Assert
(
img_rows
>=
min_size
&&
img_cols
>=
min_size
);
const
int
layer_rows
=
img_rows
>>
(
params
->
nOctaves
-
1
);
const
int
layer_cols
=
img_cols
>>
(
params
->
nOctaves
-
1
);
const
int
min_margin
=
((
calcSize
((
params
->
nOctaves
-
1
),
2
)
>>
1
)
>>
(
params
->
nOctaves
-
1
))
+
1
;
CV_Assert
(
layer_rows
-
2
*
min_margin
>
0
);
CV_Assert
(
layer_cols
-
2
*
min_margin
>
0
);
maxFeatures
=
std
::
min
(
static_cast
<
int
>
(
img
.
size
().
area
()
*
surf
.
keypointsRatio
),
65535
);
maxFeatures
=
std
::
min
(
static_cast
<
int
>
(
img_cols
*
img_rows
*
0.01
f
),
65535
);
maxCandidates
=
std
::
min
(
static_cast
<
int
>
(
1.5
*
maxFeatures
),
65535
);
CV_Assert
(
maxFeatures
>
0
);
counters
.
create
(
1
,
surf_
.
nOctaves
+
1
,
CV_32SC1
);
counters
.
create
(
1
,
params
->
nOctaves
+
1
,
CV_32SC1
);
counters
.
setTo
(
Scalar
::
all
(
0
));
integral
(
img
,
surf_
.
sum
);
img
.
release
();
if
(
_img
.
isUMat
())
img
=
_img
.
getUMat
();
else
_img
.
copyTo
(
img
);
bindImgTex
(
img
,
imgTex
);
bindImgTex
(
surf_
.
sum
,
sumTex
);
finish
();
integral
(
img
,
sum
);
maskSumTex
=
0
;
if
(
haveImageSupport
)
{
imgTex
=
ocl
::
Image2D
(
img
);
sumTex
=
ocl
::
Image2D
(
sum
);
}
if
(
use_mask
)
maskSumTex
=
ocl
::
Image2D
();
if
(
!
_mask
.
empty
())
{
CV_Error
(
Error
::
StsBadFunc
,
"Masked SURF detector is not implemented yet"
);
//!FIXME
// temp fix for missing min overload
//oclMat temp(mask.size(), mask.type());
//temp.setTo(Scalar::all(1.0));
////cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
//integral(surf_.mask1, surf_.maskSum);
//bindImgTex(surf_.maskSum, maskSumTex);
}
}
return
true
;
}
void
detectKeypoints
(
oclMat
&
keypoints
)
{
bool
SURF_OCL
::
detectKeypoints
(
UMat
&
keypoints
)
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from Gaussian kernel.
ensureSizeIsEnough
(
img_rows
*
(
surf_
.
nOctaveLayers
+
2
),
img_cols
,
CV_32FC1
,
surf_
.
det
);
ensureSizeIsEnough
(
img_rows
*
(
surf_
.
nOctaveLayers
+
2
),
img_cols
,
CV_32FC1
,
surf_
.
trace
);
det
.
create
(
img_rows
*
(
params
->
nOctaveLayers
+
2
),
img_cols
,
CV_32F
);
trace
.
create
(
img_rows
*
(
params
->
nOctaveLayers
+
2
),
img_cols
,
CV_32FC1
);
ensureSizeIsEnough
(
1
,
maxCandidates
,
CV_32SC4
,
surf_
.
maxPosBuffer
);
ensureSizeIsEnough
(
SURF_OCL
::
ROWS_COUNT
,
maxFeatures
,
CV_32FC1
,
keypoints
);
maxPosBuffer
.
create
(
1
,
maxCandidates
,
CV_32SC4
);
keypoints
.
create
(
SURF_OCL
::
ROWS_COUNT
,
maxFeatures
,
CV_32F
);
keypoints
.
setTo
(
Scalar
::
all
(
0
));
Mat
cpuCounters
;
for
(
int
octave
=
0
;
octave
<
surf_
.
nOctaves
;
++
octave
)
for
(
int
octave
=
0
;
octave
<
params
->
nOctaves
;
++
octave
)
{
const
int
layer_rows
=
img_rows
>>
octave
;
const
int
layer_cols
=
img_cols
>>
octave
;
//loadOctaveConstants(octave, layer_rows, layer_cols);
if
(
!
calcLayerDetAndTrace
(
det
,
trace
,
octave
,
layer_rows
))
return
false
;
icvCalcLayerDetAndTrace_gpu
(
surf_
.
det
,
surf_
.
trace
,
octave
,
surf_
.
nOctaveLayers
,
layer_rows
);
if
(
!
findMaximaInLayer
(
det
,
trace
,
maxPosBuffer
,
counters
,
1
+
octave
,
octave
,
layer_rows
,
layer_cols
))
return
false
;
icvFindMaximaInLayer_gpu
(
surf_
.
det
,
surf_
.
trace
,
surf_
.
maxPosBuffer
,
counters
,
1
+
octave
,
octave
,
use_mask
,
surf_
.
nOctaveLayers
,
layer_rows
,
layer_cols
);
int
maxCounter
=
((
Mat
)
counters
).
at
<
int
>
(
1
+
octave
);
maxCounter
=
std
::
min
(
maxCounter
,
static_cast
<
int
>
(
maxCandidates
));
cpuCounters
=
counters
.
getMat
(
ACCESS_READ
);
int
maxCounter
=
cpuCounters
.
at
<
int
>
(
1
+
octave
);
maxCounter
=
std
::
min
(
maxCounter
,
maxCandidates
);
cpuCounters
.
release
();
if
(
maxCounter
>
0
)
{
icvInterpolateKeypoint_gpu
(
surf_
.
det
,
surf_
.
maxPosBuffer
,
maxCounter
,
keypoints
,
counters
,
octave
,
layer_rows
,
maxFeatures
);
if
(
!
interpolateKeypoint
(
det
,
maxPosBuffer
,
maxCounter
,
keypoints
,
counters
,
octave
,
layer_rows
,
maxFeatures
))
return
false
;
}
}
int
featureCounter
=
Mat
(
counters
).
at
<
int
>
(
0
);
featureCounter
=
std
::
min
(
featureCounter
,
static_cast
<
int
>
(
maxFeatures
));
keypoints
.
cols
=
featureCounter
;
cpuCounters
=
counters
.
getMat
(
ACCESS_READ
);
int
featureCounter
=
cpuCounters
.
at
<
int
>
(
0
);
featureCounter
=
std
::
min
(
featureCounter
,
maxFeatures
);
cpuCounters
.
release
();
if
(
surf_
.
upright
)
{
//keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
setUpright
(
keypoints
);
}
keypoints
=
UMat
(
keypoints
,
Rect
(
0
,
0
,
featureCounter
,
1
));
if
(
params
->
upright
)
return
setUpRight
(
keypoints
);
else
{
findOrientation
(
keypoints
);
}
}
return
calcOrientation
(
keypoints
);
}
void
setUpright
(
oclMat
&
keypoints
)
{
const
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
>
0
)
{
icvSetUpright_gpu
(
keypoints
,
keypoints
.
cols
);
}
}
void
findOrientation
(
oclMat
&
keypoints
)
{
const
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
>
0
)
bool
SURF_OCL
::
setUpRight
(
UMat
&
keypoints
)
{
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
==
0
)
return
true
;
size_t
globalThreads
[
3
]
=
{
nFeatures
,
1
};
return
kerUpRight
.
args
(
ocl
::
KernelArg
::
ReadWrite
(
keypoints
)).
run
(
2
,
globalThreads
,
0
,
false
);
}
bool
SURF_OCL
::
computeDescriptors
(
const
UMat
&
keypoints
,
OutputArray
_descriptors
)
{
int
descriptorSize
=
params
->
descriptorSize
();
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
==
0
)
{
icvCalcOrientation_gpu
(
keypoints
,
nFeatures
);
}
_descriptors
.
release
(
);
return
true
;
}
_descriptors
.
create
(
nFeatures
,
descriptorSize
,
CV_32F
);
UMat
descriptors
;
if
(
_descriptors
.
isUMat
()
)
descriptors
=
_descriptors
.
getUMat
();
else
descriptors
.
create
(
nFeatures
,
descriptorSize
,
CV_32F
);
void
computeDescriptors
(
const
oclMat
&
keypoints
,
oclMat
&
descriptors
,
int
descriptorSize
)
{
const
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
>
0
)
ocl
::
Kernel
kerCalcDesc
,
kerNormDesc
;
if
(
descriptorSize
==
64
)
{
ensureSizeIsEnough
(
nFeatures
,
descriptorSize
,
CV_32F
,
descriptors
);
compute_descriptors_gpu
(
descriptors
,
keypoints
,
nFeatures
);
}
kerCalcDesc
=
kerCalcDesc64
;
kerNormDesc
=
kerNormDesc64
;
}
~
SURF_OCL_Invoker
()
else
{
if
(
imgTex
)
openCLFree
(
imgTex
);
if
(
sumTex
)
openCLFree
(
sumTex
);
if
(
maskSumTex
)
openCLFree
(
maskSumTex
);
CV_Assert
(
descriptorSize
==
128
);
kerCalcDesc
=
kerCalcDesc128
;
kerNormDesc
=
kerNormDesc128
;
}
private
:
SURF_OCL
&
surf_
;
int
img_cols
,
img_rows
;
bool
use_mask
;
int
maxCandidates
;
int
maxFeatures
;
oclMat
counters
;
// texture buffers
cl_mem
imgTex
;
cl_mem
sumTex
;
cl_mem
maskSumTex
;
size_t
localThreads
[]
=
{
6
,
6
};
size_t
globalThreads
[]
=
{
nFeatures
*
localThreads
[
0
],
localThreads
[
1
]};
const
oclMat
_img
;
// make a copy for non-image2d_t supported platform
SURF_OCL_Invoker
&
operator
=
(
const
SURF_OCL_Invoker
&
right
)
if
(
haveImageSupport
)
{
kerCalcDesc
.
args
(
imgTex
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
keypoints
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
descriptors
));
}
else
{
(
*
this
)
=
right
;
return
*
this
;
}
// remove warning C4512
};
kerCalcDesc
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
img
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
keypoints
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
descriptors
));
}
cv
::
ocl
::
SURF_OCL
::
SURF_OCL
()
{
hessianThreshold
=
100.0
f
;
extended
=
true
;
nOctaves
=
4
;
nOctaveLayers
=
2
;
keypointsRatio
=
0.01
f
;
upright
=
false
;
}
if
(
!
kerCalcDesc
.
run
(
2
,
globalThreads
,
localThreads
,
false
))
return
false
;
cv
::
ocl
::
SURF_OCL
::
SURF_OCL
(
double
_threshold
,
int
_nOctaves
,
int
_nOctaveLayers
,
bool
_extended
,
float
_keypointsRatio
,
bool
_upright
)
{
hessianThreshold
=
saturate_cast
<
float
>
(
_threshold
);
extended
=
_extended
;
nOctaves
=
_nOctaves
;
nOctaveLayers
=
_nOctaveLayers
;
keypointsRatio
=
_keypointsRatio
;
upright
=
_upright
;
}
size_t
localThreads_n
[]
=
{
descriptorSize
,
1
};
size_t
globalThreads_n
[]
=
{
nFeatures
*
localThreads_n
[
0
],
localThreads_n
[
1
]};
int
cv
::
ocl
::
SURF_OCL
::
descriptorSize
()
const
{
return
extended
?
128
:
64
;
globalThreads
[
0
]
=
nFeatures
*
localThreads
[
0
];
globalThreads
[
1
]
=
localThreads
[
1
];
bool
ok
=
kerNormDesc
.
args
(
ocl
::
KernelArg
::
ReadWriteNoSize
(
descriptors
)).
run
(
2
,
globalThreads_n
,
localThreads_n
,
false
);
if
(
ok
&&
!
_descriptors
.
isUMat
())
descriptors
.
copyTo
(
_descriptors
);
return
ok
;
}
int
cv
::
ocl
::
SURF_OCL
::
defaultNorm
()
const
{
return
NORM_L2
;
}
void
cv
::
ocl
::
SURF_OCL
::
uploadKeypoints
(
const
std
::
vector
<
KeyPoint
>
&
keypoints
,
ocl
Mat
&
keypointsGPU
)
void
SURF_OCL
::
uploadKeypoints
(
const
std
::
vector
<
KeyPoint
>
&
keypoints
,
U
Mat
&
keypointsGPU
)
{
if
(
keypoints
.
empty
())
keypointsGPU
.
release
();
...
...
@@ -340,11 +325,11 @@ void cv::ocl::SURF_OCL::uploadKeypoints(const std::vector<KeyPoint> &keypoints,
kp_laplacian
[
i
]
=
1
;
}
keypoints
GPU
.
upload
(
keypointsC
PU
);
keypoints
CPU
.
copyTo
(
keypointsG
PU
);
}
}
void
cv
::
ocl
::
SURF_OCL
::
downloadKeypoints
(
const
ocl
Mat
&
keypointsGPU
,
std
::
vector
<
KeyPoint
>
&
keypoints
)
void
SURF_OCL
::
downloadKeypoints
(
const
U
Mat
&
keypointsGPU
,
std
::
vector
<
KeyPoint
>
&
keypoints
)
{
const
int
nFeatures
=
keypointsGPU
.
cols
;
...
...
@@ -354,8 +339,7 @@ void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &keypointsGPU, std::vecto
{
CV_Assert
(
keypointsGPU
.
type
()
==
CV_32FC1
&&
keypointsGPU
.
rows
==
ROWS_COUNT
);
Mat
keypointsCPU
(
keypointsGPU
);
Mat
keypointsCPU
=
keypointsGPU
.
getMat
(
ACCESS_READ
);
keypoints
.
resize
(
nFeatures
);
float
*
kp_x
=
keypointsCPU
.
ptr
<
float
>
(
SURF_OCL
::
X_ROW
);
...
...
@@ -380,354 +364,154 @@ void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &keypointsGPU, std::vecto
}
}
void
cv
::
ocl
::
SURF_OCL
::
downloadDescriptors
(
const
oclMat
&
descriptorsGPU
,
std
::
vector
<
float
>
&
descriptors
)
{
if
(
descriptorsGPU
.
empty
())
descriptors
.
clear
();
else
{
CV_Assert
(
descriptorsGPU
.
type
()
==
CV_32F
);
descriptors
.
resize
(
descriptorsGPU
.
rows
*
descriptorsGPU
.
cols
);
Mat
descriptorsCPU
(
descriptorsGPU
.
size
(),
CV_32F
,
&
descriptors
[
0
]);
descriptorsGPU
.
download
(
descriptorsCPU
);
}
}
void
cv
::
ocl
::
SURF_OCL
::
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
oclMat
&
keypoints
)
bool
SURF_OCL
::
detect
(
InputArray
img
,
InputArray
mask
,
UMat
&
keypoints
)
{
if
(
!
img
.
empty
())
{
SURF_OCL_Invoker
surf
(
*
this
,
img
,
mask
);
if
(
!
setImage
(
img
,
mask
)
)
return
false
;
surf
.
detectKeypoints
(
keypoints
);
}
return
detectKeypoints
(
keypoints
);
}
void
cv
::
ocl
::
SURF_OCL
::
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
oclMat
&
keypoints
,
oclMat
&
descriptors
,
bool
useProvidedKeypoints
)
{
if
(
!
img
.
empty
())
{
SURF_OCL_Invoker
surf
(
*
this
,
img
,
mask
);
if
(
!
useProvidedKeypoints
)
surf
.
detectKeypoints
(
keypoints
);
else
if
(
!
upright
)
{
surf
.
findOrientation
(
keypoints
);
}
surf
.
computeDescriptors
(
keypoints
,
descriptors
,
descriptorSize
());
}
}
void
cv
::
ocl
::
SURF_OCL
::
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
std
::
vector
<
KeyPoint
>
&
keypoints
)
bool
SURF_OCL
::
detectAndCompute
(
InputArray
img
,
InputArray
mask
,
UMat
&
keypoints
,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
)
{
oclMat
keypointsGPU
;
if
(
!
setImage
(
img
,
mask
)
)
return
false
;
(
*
this
)(
img
,
mask
,
keypointsGPU
);
if
(
!
useProvidedKeypoints
&&
!
detectKeypoints
(
keypoints
)
)
return
false
;
downloadKeypoints
(
keypointsGPU
,
keypoint
s
);
return
computeDescriptors
(
keypoints
,
_descriptor
s
);
}
void
cv
::
ocl
::
SURF_OCL
::
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
std
::
vector
<
KeyPoint
>
&
keypoints
,
oclMat
&
descriptors
,
bool
useProvidedKeypoints
)
{
oclMat
keypointsGPU
;
if
(
useProvidedKeypoints
)
uploadKeypoints
(
keypoints
,
keypointsGPU
);
(
*
this
)(
img
,
mask
,
keypointsGPU
,
descriptors
,
useProvidedKeypoints
);
downloadKeypoints
(
keypointsGPU
,
keypoints
);
}
void
cv
::
ocl
::
SURF_OCL
::
operator
()(
const
oclMat
&
img
,
const
oclMat
&
mask
,
std
::
vector
<
KeyPoint
>
&
keypoints
,
std
::
vector
<
float
>
&
descriptors
,
bool
useProvidedKeypoints
)
{
oclMat
descriptorsGPU
;
(
*
this
)(
img
,
mask
,
keypoints
,
descriptorsGPU
,
useProvidedKeypoints
);
downloadDescriptors
(
descriptorsGPU
,
descriptors
);
}
void
cv
::
ocl
::
SURF_OCL
::
releaseMemory
()
{
sum
.
release
();
mask1
.
release
();
maskSum
.
release
();
intBuffer
.
release
();
det
.
release
();
trace
.
release
();
maxPosBuffer
.
release
();
}
// bind source buffer to image oject.
void
SURF_OCL_Invoker
::
bindImgTex
(
const
oclMat
&
img
,
cl_mem
&
texture
)
{
if
(
texture
)
{
openCLFree
(
texture
);
}
texture
=
bindTexture
(
img
);
}
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
////////////////////////////
// kernel caller definitions
void
SURF_OCL_Invoker
::
icvCalcLayerDetAndTrace_gpu
(
oclMat
&
det
,
oclMat
&
trace
,
int
octave
,
int
nOctaveLayers
,
int
c_layer_rows
)
bool
SURF_OCL
::
calcLayerDetAndTrace
(
UMat
&
det
,
UMat
&
trace
,
int
octave
,
int
c_layer_rows
)
{
int
nOctaveLayers
=
params
->
nOctaveLayers
;
const
int
min_size
=
calcSize
(
octave
,
0
);
const
int
max_samples_i
=
1
+
((
img_rows
-
min_size
)
>>
octave
);
const
int
max_samples_j
=
1
+
((
img_cols
-
min_size
)
>>
octave
);
Context
*
clCxt
=
det
.
clCxt
;
String
kernelName
=
"icvCalcLayerDetAndTrace"
;
String
kernelName
=
"SURF_calcLayerDetAndTrace"
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
if
(
sumTex
)
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
sumTex
));
}
else
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
sum
.
data
));
// if image2d is not supported
}
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
det
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trace
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
det
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trace
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
nOctaveLayers
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
octave
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
c_layer_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
surf_
.
sum
.
step
));
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
};
size_t
globalThreads
[
3
]
=
{
divUp
(
max_samples_j
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
max_samples_i
,
localThreads
[
1
])
*
localThreads
[
1
]
*
(
nOctaveLayers
+
2
),
1
divUp
(
max_samples_i
,
localThreads
[
1
])
*
localThreads
[
1
]
*
(
nOctaveLayers
+
2
)
};
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
icvFindMaximaInLayer_gpu
(
const
oclMat
&
det
,
const
oclMat
&
trace
,
oclMat
&
maxPosBuffer
,
oclMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
bool
useMask
,
int
nLayers
,
int
layer_rows
,
int
layer_cols
)
{
const
int
min_margin
=
((
calcSize
(
octave
,
2
)
>>
1
)
>>
octave
)
+
1
;
Context
*
clCxt
=
det
.
clCxt
;
String
kernelName
=
use_mask
?
"icvFindMaximaInLayer_withmask"
:
"icvFindMaximaInLayer"
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
det
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trace
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
maxPosBuffer
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
maxCounter
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
counterOffset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
det
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
trace
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
nLayers
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
octave
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
layer_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
layer_cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
maxCandidates
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
surf_
.
hessianThreshold
));
if
(
useMask
)
{
if
(
maskSumTex
)
if
(
haveImageSupport
)
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
maskSumTex
));
kerCalcDetTrace
.
args
(
sumTex
,
img_rows
,
img_cols
,
nOctaveLayers
,
octave
,
c_layer_rows
,
ocl
::
KernelArg
::
WriteOnlyNoSize
(
det
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
trace
));
}
else
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
maskSum
.
data
));
kerCalcDetTrace
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
sum
),
img_rows
,
img_cols
,
nOctaveLayers
,
octave
,
c_layer_rows
,
ocl
::
KernelArg
::
WriteOnlyNoSize
(
det
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
trace
));
}
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
maskSum
.
step
));
}
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
layer_cols
-
2
*
min_margin
,
localThreads
[
0
]
-
2
)
*
localThreads
[
0
],
divUp
(
layer_rows
-
2
*
min_margin
,
localThreads
[
1
]
-
2
)
*
nLayers
*
localThreads
[
1
],
1
};
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
int
maxCounter
,
oclMat
&
keypoints
,
oclMat
&
counters_
,
int
octave
,
int
layer_rows
,
int
max_features
)
{
Context
*
clCxt
=
det
.
clCxt
;
String
kernelName
=
"icvInterpolateKeypoint"
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
det
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
maxPosBuffer
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
counters_
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
det
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
octave
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
layer_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
max_features
));
size_t
localThreads
[
3
]
=
{
3
,
3
,
3
};
size_t
globalThreads
[
3
]
=
{
maxCounter
*
localThreads
[
0
],
localThreads
[
1
],
1
};
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
return
kerCalcDetTrace
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
}
void
SURF_OCL_Invoker
::
icvCalcOrientation_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
)
bool
SURF_OCL
::
findMaximaInLayer
(
const
UMat
&
det
,
const
UMat
&
trace
,
UMat
&
maxPosBuffer
,
UMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
int
layer_rows
,
int
layer_cols
)
{
Context
*
clCxt
=
counters
.
clCxt
;
String
kernelName
=
"icvCalcOrientation"
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
if
(
sumTex
)
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
sumTex
));
const
int
min_margin
=
((
calcSize
(
octave
,
2
)
>>
1
)
>>
octave
)
+
1
;
bool
haveMask
=
!
maskSum
.
empty
()
||
(
maskSumTex
.
ptr
()
!=
0
);
int
nOctaveLayers
=
params
->
nOctaveLayers
;
ocl
::
Kernel
ker
;
if
(
haveMask
)
{
if
(
haveImageSupport
)
ker
=
kerFindMaximaMask
.
args
(
maskSumTex
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxCounter
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
);
else
ker
=
kerFindMaximaMask
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
maskSum
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxCounter
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
);
}
else
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
sum
.
data
));
// if image2d is not supported
}
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
surf_
.
sum
.
step
));
size_t
localThreads
[
3
]
=
{
ORI_LOCAL_SIZE
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
nFeatures
*
localThreads
[
0
],
1
,
1
};
ker
=
kerFindMaxima
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxCounter
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
);
}
size_t
localThreads
[
3
]
=
{
16
,
16
};
size_t
globalThreads
[
3
]
=
{
divUp
(
layer_cols
-
2
*
min_margin
,
localThreads
[
0
]
-
2
)
*
localThreads
[
0
],
divUp
(
layer_rows
-
2
*
min_margin
,
localThreads
[
1
]
-
2
)
*
nOctaveLayers
*
localThreads
[
1
]
};
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
return
ker
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
}
void
SURF_OCL_Invoker
::
icvSetUpright_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
)
bool
SURF_OCL
::
interpolateKeypoint
(
const
UMat
&
det
,
const
UMat
&
maxPosBuffer
,
int
maxCounter
,
UMat
&
keypoints
,
UMat
&
counters_
,
int
octave
,
int
layer_rows
,
int
max_features
)
{
Context
*
clCxt
=
counters
.
clCxt
;
String
kernelName
=
"icvSetUpright"
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
nFeatures
));
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
saturate_cast
<
size_t
>
(
nFeatures
),
1
,
1
};
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
size_t
localThreads
[
3
]
=
{
3
,
3
,
3
};
size_t
globalThreads
[
3
]
=
{
maxCounter
*
localThreads
[
0
],
localThreads
[
1
],
3
};
return
kerInterp
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
PtrReadOnly
(
maxPosBuffer
),
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
),
ocl
::
KernelArg
::
PtrReadWrite
(
counters_
),
img_rows
,
img_cols
,
octave
,
layer_rows
,
max_features
).
run
(
3
,
globalThreads
,
localThreads
,
false
);
}
void
SURF_OCL_Invoker
::
compute_descriptors_gpu
(
const
oclMat
&
descriptors
,
const
oclMat
&
keypoints
,
int
nFeatures
)
bool
SURF_OCL
::
calcOrientation
(
UMat
&
keypoints
)
{
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
Context
*
clCxt
=
descriptors
.
clCxt
;
String
kernelName
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
1
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
1
,
1
,
1
};
if
(
descriptors
.
cols
==
64
)
{
kernelName
=
"compute_descriptors64"
;
localThreads
[
0
]
=
6
;
localThreads
[
1
]
=
6
;
globalThreads
[
0
]
=
nFeatures
*
localThreads
[
0
];
globalThreads
[
1
]
=
16
*
localThreads
[
1
];
args
.
clear
();
if
(
imgTex
)
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
imgTex
));
}
else
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
_img
.
data
));
}
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
step
));
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
kernelName
=
"normalize_descriptors64"
;
localThreads
[
0
]
=
64
;
localThreads
[
1
]
=
1
;
globalThreads
[
0
]
=
nFeatures
*
localThreads
[
0
];
globalThreads
[
1
]
=
localThreads
[
1
];
args
.
clear
();
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
kernelName
=
"compute_descriptors128"
;
localThreads
[
0
]
=
6
;
localThreads
[
1
]
=
6
;
globalThreads
[
0
]
=
nFeatures
*
localThreads
[
0
];
globalThreads
[
1
]
=
16
*
localThreads
[
1
];
args
.
clear
();
if
(
imgTex
)
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
imgTex
));
}
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
==
0
)
return
true
;
if
(
haveImageSupport
)
kerOri
.
args
(
sumTex
,
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
),
img_rows
,
img_cols
);
else
{
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
_img
.
data
));
}
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
step
));
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
kernelName
=
"normalize_descriptors128"
;
localThreads
[
0
]
=
128
;
localThreads
[
1
]
=
1
;
kerOri
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
sum
),
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
),
img_rows
,
img_cols
);
globalThreads
[
0
]
=
nFeatures
*
localThreads
[
0
];
globalThreads
[
1
]
=
localThreads
[
1
];
args
.
clear
();
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
openCLExecuteKernelSURF
(
clCxt
,
&
surfprog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
size_t
localThreads
[
3
]
=
{
ORI_LOCAL_SIZE
,
1
};
size_t
globalThreads
[
3
]
=
{
nFeatures
*
localThreads
[
0
],
1
};
return
kerOri
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
}
#endif //HAVE_OPENCV_OCL
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment