Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
84022779
Commit
84022779
authored
Sep 06, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added bicubic interpolation to gpu::remap
parent
415978b1
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
385 additions
and
262 deletions
+385
-262
brute_force_matcher.cu
modules/gpu/src/cuda/brute_force_matcher.cu
+1
-1
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+9
-2
surf.cu
modules/gpu/src/cuda/surf.cu
+1
-0
imgproc.cpp
modules/gpu/src/imgproc.cpp
+1
-1
filters.hpp
modules/gpu/src/opencv2/gpu/device/filters.hpp
+135
-0
utility.hpp
modules/gpu/src/opencv2/gpu/device/utility.hpp
+0
-228
vec_distance.hpp
modules/gpu/src/opencv2/gpu/device/vec_distance.hpp
+223
-0
test_imgproc.cpp
modules/gpu/test/test_imgproc.cpp
+4
-22
tests.cpp
samples/gpu/performance/tests.cpp
+11
-8
No files found.
modules/gpu/src/cuda/brute_force_matcher.cu
View file @
84022779
...
...
@@ -42,7 +42,7 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/
utility
.hpp"
#include "opencv2/gpu/device/
vec_distance
.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
...
...
modules/gpu/src/cuda/imgproc.cu
View file @
84022779
...
...
@@ -45,7 +45,7 @@
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/
utility
.hpp"
#include "opencv2/gpu/device/
filters
.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
...
...
@@ -186,7 +186,7 @@ namespace cv { namespace gpu { namespace imgproc
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream);
static const caller_t callers[
2
][5] =
static const caller_t callers[
3
][5] =
{
{
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
...
...
@@ -201,6 +201,13 @@ namespace cv { namespace gpu { namespace imgproc
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
RemapDispatcher<LinearFilter, BrdWrap, T>::call
},
{
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
RemapDispatcher<CubicFilter, BrdWrap, T>::call
}
};
...
...
modules/gpu/src/cuda/surf.cu
View file @
84022779
...
...
@@ -50,6 +50,7 @@
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/filters.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
...
...
modules/gpu/src/imgproc.cpp
View file @
84022779
...
...
@@ -131,7 +131,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
xmap
.
type
()
==
CV_32F
&&
ymap
.
type
()
==
CV_32F
&&
xmap
.
size
()
==
ymap
.
size
());
CV_Assert
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
);
CV_Assert
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
||
interpolation
==
INTER_CUBIC
);
CV_Assert
(
borderMode
==
BORDER_REFLECT101
||
borderMode
==
BORDER_REPLICATE
||
borderMode
==
BORDER_CONSTANT
||
borderMode
==
BORDER_REFLECT
||
borderMode
==
BORDER_WRAP
);
int
gpuBorderType
;
...
...
modules/gpu/src/opencv2/gpu/device/filters.hpp
0 → 100644
View file @
84022779
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_FILTERS_HPP__
#define __OPENCV_GPU_FILTERS_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
typename
Ptr2D
>
struct
PointFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
explicit
__host__
__device__
__forceinline__
PointFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
__device__
__forceinline__
elem_type
operator
()(
float
y
,
float
x
)
const
{
return
src
(
__float2int_rn
(
y
),
__float2int_rn
(
x
));
}
const
Ptr2D
src
;
};
template
<
typename
Ptr2D
>
struct
LinearFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
explicit
__host__
__device__
__forceinline__
LinearFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
__device__
__forceinline__
elem_type
operator
()(
float
y
,
float
x
)
const
{
typedef
typename
TypeVec
<
float
,
VecTraits
<
elem_type
>::
cn
>::
vec_type
work_type
;
work_type
out
=
VecTraits
<
work_type
>::
all
(
0
);
const
int
x1
=
__float2int_rd
(
x
);
const
int
y1
=
__float2int_rd
(
y
);
const
int
x2
=
x1
+
1
;
const
int
y2
=
y1
+
1
;
elem_type
src_reg
=
src
(
y1
,
x1
);
out
=
out
+
src_reg
*
((
x2
-
x
)
*
(
y2
-
y
));
src_reg
=
src
(
y1
,
x2
);
out
=
out
+
src_reg
*
((
x
-
x1
)
*
(
y2
-
y
));
src_reg
=
src
(
y2
,
x1
);
out
=
out
+
src_reg
*
((
x2
-
x
)
*
(
y
-
y1
));
src_reg
=
src
(
y2
,
x2
);
out
=
out
+
src_reg
*
((
x
-
x1
)
*
(
y
-
y1
));
return
saturate_cast
<
elem_type
>
(
out
);
}
const
Ptr2D
src
;
};
template
<
typename
Ptr2D
>
struct
CubicFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
typedef
typename
TypeVec
<
float
,
VecTraits
<
elem_type
>::
cn
>::
vec_type
work_type
;
explicit
__host__
__device__
__forceinline__
CubicFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
static
__device__
__forceinline__
work_type
cubicInterpolate
(
const
work_type
&
p0
,
const
work_type
&
p1
,
const
work_type
&
p2
,
const
work_type
&
p3
,
float
x
)
{
return
p1
+
0.5
f
*
x
*
(
p2
-
p0
+
x
*
(
2.0
f
*
p0
-
5.0
f
*
p1
+
4.0
f
*
p2
-
p3
+
x
*
(
3.0
f
*
(
p1
-
p2
)
+
p3
-
p0
)));
}
__device__
elem_type
operator
()(
float
y
,
float
x
)
const
{
const
int
xi
=
__float2int_rn
(
x
);
const
int
yi
=
__float2int_rn
(
y
);
work_type
arr
[
4
];
arr
[
0
]
=
cubicInterpolate
(
saturate_cast
<
work_type
>
(
src
(
yi
-
1
,
xi
-
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
-
1
,
xi
)),
saturate_cast
<
work_type
>
(
src
(
yi
-
1
,
xi
+
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
-
1
,
xi
+
2
)),
x
-
xi
);
arr
[
1
]
=
cubicInterpolate
(
saturate_cast
<
work_type
>
(
src
(
yi
,
xi
-
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
,
xi
)),
saturate_cast
<
work_type
>
(
src
(
yi
,
xi
+
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
,
xi
+
2
)),
x
-
xi
);
arr
[
2
]
=
cubicInterpolate
(
saturate_cast
<
work_type
>
(
src
(
yi
+
1
,
xi
-
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
+
1
,
xi
)),
saturate_cast
<
work_type
>
(
src
(
yi
+
1
,
xi
+
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
+
1
,
xi
+
2
)),
x
-
xi
);
arr
[
3
]
=
cubicInterpolate
(
saturate_cast
<
work_type
>
(
src
(
yi
+
2
,
xi
-
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
+
2
,
xi
)),
saturate_cast
<
work_type
>
(
src
(
yi
+
2
,
xi
+
1
)),
saturate_cast
<
work_type
>
(
src
(
yi
+
2
,
xi
+
2
)),
x
-
xi
);
return
saturate_cast
<
elem_type
>
(
cubicInterpolate
(
arr
[
0
],
arr
[
1
],
arr
[
2
],
arr
[
3
],
y
-
yi
));
}
const
Ptr2D
src
;
};
}}}
#endif // __OPENCV_GPU_FILTERS_HPP__
modules/gpu/src/opencv2/gpu/device/utility.hpp
View file @
84022779
...
...
@@ -135,180 +135,6 @@ namespace cv { namespace gpu { namespace device
StaticAssert
<
n
>=
8
&&
n
<=
512
>::
check
();
detail
::
PredValReductionDispatcher
<
n
<=
64
>::
reduce
<
n
>
(
myData
,
myVal
,
sdata
,
sval
,
tid
,
pred
);
}
///////////////////////////////////////////////////////////////////////////////
// Vector Distance
template
<
typename
T
>
struct
L1Dist
{
typedef
int
value_type
;
typedef
int
result_type
;
__device__
__forceinline__
L1Dist
()
:
mySum
(
0
)
{}
__device__
__forceinline__
void
reduceIter
(
int
val1
,
int
val2
)
{
mySum
=
__sad
(
val1
,
val2
,
mySum
);
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
int
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
int
>
());
}
__device__
__forceinline__
operator
int
()
const
{
return
mySum
;
}
int
mySum
;
};
template
<>
struct
L1Dist
<
float
>
{
typedef
float
value_type
;
typedef
float
result_type
;
__device__
__forceinline__
L1Dist
()
:
mySum
(
0.0
f
)
{}
__device__
__forceinline__
void
reduceIter
(
float
val1
,
float
val2
)
{
mySum
+=
::
fabs
(
val1
-
val2
);
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
float
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
float
>
());
}
__device__
__forceinline__
operator
float
()
const
{
return
mySum
;
}
float
mySum
;
};
struct
L2Dist
{
typedef
float
value_type
;
typedef
float
result_type
;
__device__
__forceinline__
L2Dist
()
:
mySum
(
0.0
f
)
{}
__device__
__forceinline__
void
reduceIter
(
float
val1
,
float
val2
)
{
float
reg
=
val1
-
val2
;
mySum
+=
reg
*
reg
;
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
float
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
float
>
());
}
__device__
__forceinline__
operator
float
()
const
{
return
sqrtf
(
mySum
);
}
float
mySum
;
};
struct
HammingDist
{
typedef
int
value_type
;
typedef
int
result_type
;
__device__
__forceinline__
HammingDist
()
:
mySum
(
0
)
{}
__device__
__forceinline__
void
reduceIter
(
int
val1
,
int
val2
)
{
mySum
+=
__popc
(
val1
^
val2
);
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
int
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
int
>
());
}
__device__
__forceinline__
operator
int
()
const
{
return
mySum
;
}
int
mySum
;
};
// calc distance between two vectors in global memory
template
<
int
THREAD_DIM
,
typename
Dist
,
typename
T1
,
typename
T2
>
__device__
void
calcVecDiffGlobal
(
const
T1
*
vec1
,
const
T2
*
vec2
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
{
for
(
int
i
=
tid
;
i
<
len
;
i
+=
THREAD_DIM
)
{
T1
val1
;
ForceGlob
<
T1
>::
Load
(
vec1
,
i
,
val1
);
T2
val2
;
ForceGlob
<
T2
>::
Load
(
vec2
,
i
,
val2
);
dist
.
reduceIter
(
val1
,
val2
);
}
dist
.
reduceAll
<
THREAD_DIM
>
(
smem
,
tid
);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template
<
int
THREAD_DIM
,
int
MAX_LEN
,
bool
LEN_EQ_MAX_LEN
,
typename
Dist
,
typename
T1
,
typename
T2
>
__device__
__forceinline__
void
calcVecDiffCached
(
const
T1
*
vecCached
,
const
T2
*
vecGlob
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
{
detail
::
VecDiffCachedCalculator
<
THREAD_DIM
,
MAX_LEN
,
LEN_EQ_MAX_LEN
>::
calc
(
vecCached
,
vecGlob
,
len
,
dist
,
tid
);
dist
.
reduceAll
<
THREAD_DIM
>
(
smem
,
tid
);
}
// calc distance between two vectors in global memory
template
<
int
THREAD_DIM
,
typename
T1
>
struct
VecDiffGlobal
{
explicit
__device__
__forceinline__
VecDiffGlobal
(
const
T1
*
vec1_
,
int
=
0
,
void
*
=
0
,
int
=
0
,
int
=
0
)
{
vec1
=
vec1_
;
}
template
<
typename
T2
,
typename
Dist
>
__device__
__forceinline__
void
calc
(
const
T2
*
vec2
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
const
{
calcVecDiffGlobal
<
THREAD_DIM
>
(
vec1
,
vec2
,
len
,
dist
,
smem
,
tid
);
}
const
T1
*
vec1
;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template
<
int
THREAD_DIM
,
int
MAX_LEN
,
bool
LEN_EQ_MAX_LEN
,
typename
U
>
struct
VecDiffCachedRegister
{
template
<
typename
T1
>
__device__
__forceinline__
VecDiffCachedRegister
(
const
T1
*
vec1
,
int
len
,
U
*
smem
,
int
glob_tid
,
int
tid
)
{
if
(
glob_tid
<
len
)
smem
[
glob_tid
]
=
vec1
[
glob_tid
];
__syncthreads
();
U
*
vec1ValsPtr
=
vec1Vals
;
#pragma unroll
for
(
int
i
=
tid
;
i
<
MAX_LEN
;
i
+=
THREAD_DIM
)
*
vec1ValsPtr
++
=
smem
[
i
];
__syncthreads
();
}
template
<
typename
T2
,
typename
Dist
>
__device__
__forceinline__
void
calc
(
const
T2
*
vec2
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
const
{
calcVecDiffCached
<
THREAD_DIM
,
MAX_LEN
,
LEN_EQ_MAX_LEN
>
(
vec1Vals
,
vec2
,
len
,
dist
,
smem
,
tid
);
}
U
vec1Vals
[
MAX_LEN
/
THREAD_DIM
];
};
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
...
...
@@ -363,60 +189,6 @@ namespace cv { namespace gpu { namespace device
return
false
;
}
///////////////////////////////////////////////////////////////////////////////
// Filters
template
<
typename
Ptr2D
>
struct
PointFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
explicit
__host__
__device__
__forceinline__
PointFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
__device__
__forceinline__
elem_type
operator
()(
float
y
,
float
x
)
const
{
return
src
(
__float2int_rn
(
y
),
__float2int_rn
(
x
));
}
const
Ptr2D
src
;
};
template
<
typename
Ptr2D
>
struct
LinearFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
explicit
__host__
__device__
__forceinline__
LinearFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
__device__
__forceinline__
elem_type
operator
()(
float
y
,
float
x
)
const
{
typedef
typename
TypeVec
<
float
,
VecTraits
<
elem_type
>::
cn
>::
vec_type
work_type
;
work_type
out
=
VecTraits
<
work_type
>::
all
(
0
);
const
int
x1
=
__float2int_rd
(
x
);
const
int
y1
=
__float2int_rd
(
y
);
const
int
x2
=
x1
+
1
;
const
int
y2
=
y1
+
1
;
elem_type
src_reg
=
src
(
y1
,
x1
);
out
=
out
+
src_reg
*
((
x2
-
x
)
*
(
y2
-
y
));
src_reg
=
src
(
y1
,
x2
);
out
=
out
+
src_reg
*
((
x
-
x1
)
*
(
y2
-
y
));
src_reg
=
src
(
y2
,
x1
);
out
=
out
+
src_reg
*
((
x2
-
x
)
*
(
y
-
y1
));
src_reg
=
src
(
y2
,
x2
);
out
=
out
+
src_reg
*
((
x
-
x1
)
*
(
y
-
y1
));
return
saturate_cast
<
elem_type
>
(
out
);
}
const
Ptr2D
src
;
};
}}}
#endif // __OPENCV_GPU_UTILITY_HPP__
modules/gpu/src/opencv2/gpu/device/vec_distance.hpp
0 → 100644
View file @
84022779
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_HPP__
#include "utility.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
typename
T
>
struct
L1Dist
{
typedef
int
value_type
;
typedef
int
result_type
;
__device__
__forceinline__
L1Dist
()
:
mySum
(
0
)
{}
__device__
__forceinline__
void
reduceIter
(
int
val1
,
int
val2
)
{
mySum
=
__sad
(
val1
,
val2
,
mySum
);
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
int
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
int
>
());
}
__device__
__forceinline__
operator
int
()
const
{
return
mySum
;
}
int
mySum
;
};
template
<>
struct
L1Dist
<
float
>
{
typedef
float
value_type
;
typedef
float
result_type
;
__device__
__forceinline__
L1Dist
()
:
mySum
(
0.0
f
)
{}
__device__
__forceinline__
void
reduceIter
(
float
val1
,
float
val2
)
{
mySum
+=
::
fabs
(
val1
-
val2
);
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
float
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
float
>
());
}
__device__
__forceinline__
operator
float
()
const
{
return
mySum
;
}
float
mySum
;
};
struct
L2Dist
{
typedef
float
value_type
;
typedef
float
result_type
;
__device__
__forceinline__
L2Dist
()
:
mySum
(
0.0
f
)
{}
__device__
__forceinline__
void
reduceIter
(
float
val1
,
float
val2
)
{
float
reg
=
val1
-
val2
;
mySum
+=
reg
*
reg
;
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
float
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
float
>
());
}
__device__
__forceinline__
operator
float
()
const
{
return
sqrtf
(
mySum
);
}
float
mySum
;
};
struct
HammingDist
{
typedef
int
value_type
;
typedef
int
result_type
;
__device__
__forceinline__
HammingDist
()
:
mySum
(
0
)
{}
__device__
__forceinline__
void
reduceIter
(
int
val1
,
int
val2
)
{
mySum
+=
__popc
(
val1
^
val2
);
}
template
<
int
THREAD_DIM
>
__device__
__forceinline__
void
reduceAll
(
int
*
smem
,
int
tid
)
{
reduce
<
THREAD_DIM
>
(
smem
,
mySum
,
tid
,
plus
<
volatile
int
>
());
}
__device__
__forceinline__
operator
int
()
const
{
return
mySum
;
}
int
mySum
;
};
// calc distance between two vectors in global memory
template
<
int
THREAD_DIM
,
typename
Dist
,
typename
T1
,
typename
T2
>
__device__
void
calcVecDiffGlobal
(
const
T1
*
vec1
,
const
T2
*
vec2
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
{
for
(
int
i
=
tid
;
i
<
len
;
i
+=
THREAD_DIM
)
{
T1
val1
;
ForceGlob
<
T1
>::
Load
(
vec1
,
i
,
val1
);
T2
val2
;
ForceGlob
<
T2
>::
Load
(
vec2
,
i
,
val2
);
dist
.
reduceIter
(
val1
,
val2
);
}
dist
.
reduceAll
<
THREAD_DIM
>
(
smem
,
tid
);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template
<
int
THREAD_DIM
,
int
MAX_LEN
,
bool
LEN_EQ_MAX_LEN
,
typename
Dist
,
typename
T1
,
typename
T2
>
__device__
__forceinline__
void
calcVecDiffCached
(
const
T1
*
vecCached
,
const
T2
*
vecGlob
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
{
detail
::
VecDiffCachedCalculator
<
THREAD_DIM
,
MAX_LEN
,
LEN_EQ_MAX_LEN
>::
calc
(
vecCached
,
vecGlob
,
len
,
dist
,
tid
);
dist
.
reduceAll
<
THREAD_DIM
>
(
smem
,
tid
);
}
// calc distance between two vectors in global memory
template
<
int
THREAD_DIM
,
typename
T1
>
struct
VecDiffGlobal
{
explicit
__device__
__forceinline__
VecDiffGlobal
(
const
T1
*
vec1_
,
int
=
0
,
void
*
=
0
,
int
=
0
,
int
=
0
)
{
vec1
=
vec1_
;
}
template
<
typename
T2
,
typename
Dist
>
__device__
__forceinline__
void
calc
(
const
T2
*
vec2
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
const
{
calcVecDiffGlobal
<
THREAD_DIM
>
(
vec1
,
vec2
,
len
,
dist
,
smem
,
tid
);
}
const
T1
*
vec1
;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template
<
int
THREAD_DIM
,
int
MAX_LEN
,
bool
LEN_EQ_MAX_LEN
,
typename
U
>
struct
VecDiffCachedRegister
{
template
<
typename
T1
>
__device__
__forceinline__
VecDiffCachedRegister
(
const
T1
*
vec1
,
int
len
,
U
*
smem
,
int
glob_tid
,
int
tid
)
{
if
(
glob_tid
<
len
)
smem
[
glob_tid
]
=
vec1
[
glob_tid
];
__syncthreads
();
U
*
vec1ValsPtr
=
vec1Vals
;
#pragma unroll
for
(
int
i
=
tid
;
i
<
MAX_LEN
;
i
+=
THREAD_DIM
)
*
vec1ValsPtr
++
=
smem
[
i
];
__syncthreads
();
}
template
<
typename
T2
,
typename
Dist
>
__device__
__forceinline__
void
calc
(
const
T2
*
vec2
,
int
len
,
Dist
&
dist
,
typename
Dist
::
result_type
*
smem
,
int
tid
)
const
{
calcVecDiffCached
<
THREAD_DIM
,
MAX_LEN
,
LEN_EQ_MAX_LEN
>
(
vec1Vals
,
vec2
,
len
,
dist
,
smem
,
tid
);
}
U
vec1Vals
[
MAX_LEN
/
THREAD_DIM
];
};
}}}
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
modules/gpu/test/test_imgproc.cpp
View file @
84022779
...
...
@@ -210,20 +210,8 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int,
src
=
cvtest
::
randomMat
(
rng
,
size
,
type
,
0.0
,
256.0
,
false
);
xmap
.
create
(
size
,
CV_32FC1
);
ymap
.
create
(
size
,
CV_32FC1
);
for
(
int
y
=
0
;
y
<
src
.
rows
;
++
y
)
{
float
*
xmap_row
=
xmap
.
ptr
<
float
>
(
y
);
float
*
ymap_row
=
ymap
.
ptr
<
float
>
(
y
);
for
(
int
x
=
0
;
x
<
src
.
cols
;
++
x
)
{
xmap_row
[
x
]
=
src
.
cols
-
1
-
x
+
10
;
ymap_row
[
x
]
=
src
.
rows
-
1
-
y
+
10
;
}
}
xmap
=
cvtest
::
randomMat
(
rng
,
size
,
CV_32FC1
,
-
20.0
,
src
.
cols
+
20
,
false
);
ymap
=
cvtest
::
randomMat
(
rng
,
size
,
CV_32FC1
,
-
20.0
,
src
.
rows
+
20
,
false
);
cv
::
remap
(
src
,
dst_gold
,
xmap
,
ymap
,
interpolation
,
borderType
);
}
...
...
@@ -253,13 +241,7 @@ TEST_P(Remap, Accuracy)
gpuRes
.
download
(
dst
);
);
if
(
dst_gold
.
depth
()
==
CV_32F
)
{
dst_gold
.
convertTo
(
dst_gold
,
CV_8U
);
dst
.
convertTo
(
dst
,
CV_8U
);
}
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
1e-5
);
EXPECT_MAT_SIMILAR
(
dst_gold
,
dst
,
1e-1
);
}
INSTANTIATE_TEST_CASE_P
...
...
@@ -272,7 +254,7 @@ INSTANTIATE_TEST_CASE_P
CV_8UC1
,
CV_8UC3
,
CV_8UC4
,
CV_32FC1
,
CV_32FC3
,
CV_32FC4
),
testing
::
Values
((
int
)
cv
::
INTER_NEAREST
,
(
int
)
cv
::
INTER_LINEAR
),
testing
::
Values
((
int
)
cv
::
INTER_NEAREST
,
(
int
)
cv
::
INTER_LINEAR
,
(
int
)
cv
::
INTER_CUBIC
),
testing
::
Values
((
int
)
cv
::
BORDER_REFLECT101
,
(
int
)
cv
::
BORDER_REPLICATE
,
(
int
)
cv
::
BORDER_CONSTANT
,
(
int
)
cv
::
BORDER_REFLECT
,
(
int
)
cv
::
BORDER_WRAP
)
)
);
...
...
samples/gpu/performance/tests.cpp
View file @
84022779
...
...
@@ -78,6 +78,9 @@ TEST(remap)
{
Mat
src
,
dst
,
xmap
,
ymap
;
gpu
::
GpuMat
d_src
,
d_dst
,
d_xmap
,
d_ymap
;
int
interpolation
=
INTER_LINEAR
;
int
borderMode
=
BORDER_CONSTANT
;
for
(
int
size
=
1000
;
size
<=
4000
;
size
*=
2
)
{
...
...
@@ -101,7 +104,7 @@ TEST(remap)
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
remap
(
src
,
dst
,
xmap
,
ymap
,
interpolation
,
borderMode
);
CPU_OFF
;
d_src
=
src
;
...
...
@@ -110,7 +113,7 @@ TEST(remap)
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
interpolation
,
borderMode
);
GPU_OFF
;
}
...
...
@@ -136,7 +139,7 @@ TEST(remap)
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
remap
(
src
,
dst
,
xmap
,
ymap
,
interpolation
,
borderMode
);
CPU_OFF
;
d_src
=
src
;
...
...
@@ -145,7 +148,7 @@ TEST(remap)
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
interpolation
,
borderMode
);
GPU_OFF
;
}
...
...
@@ -171,7 +174,7 @@ TEST(remap)
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
remap
(
src
,
dst
,
xmap
,
ymap
,
interpolation
,
borderMode
);
CPU_OFF
;
d_src
=
src
;
...
...
@@ -180,7 +183,7 @@ TEST(remap)
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
interpolation
,
borderMode
);
GPU_OFF
;
}
...
...
@@ -206,7 +209,7 @@ TEST(remap)
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
remap
(
src
,
dst
,
xmap
,
ymap
,
interpolation
,
borderMode
);
CPU_OFF
;
d_src
=
src
;
...
...
@@ -215,7 +218,7 @@ TEST(remap)
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
interpolation
,
borderMode
);
GPU_OFF
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment