Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2777ebb8
Commit
2777ebb8
authored
Jun 28, 2012
by
Marina Kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merged GPU scan
parent
6cca6a45
Show whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
283 additions
and
34 deletions
+283
-34
OpenCVDetectCUDA.cmake
cmake/OpenCVDetectCUDA.cmake
+0
-0
perf_imgproc.cpp
modules/gpu/perf/perf_imgproc.cpp
+34
-0
matrix_reductions.cu
modules/gpu/src/cuda/matrix_reductions.cu
+1
-1
resize.cu
modules/gpu/src/cuda/resize.cu
+2
-1
split_merge.cu
modules/gpu/src/cuda/split_merge.cu
+18
-18
NCVBroxOpticalFlow.cu
modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu
+3
-3
NCV.cu
modules/gpu/src/nvidia/core/NCV.cu
+8
-8
common.hpp
modules/gpu/src/opencv2/gpu/device/common.hpp
+0
-0
datamov_utils.hpp
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
+0
-0
dynamic_smem.hpp
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
+0
-0
emulation.hpp
modules/gpu/src/opencv2/gpu/device/emulation.hpp
+0
-0
funcattrib.hpp
modules/gpu/src/opencv2/gpu/device/funcattrib.hpp
+0
-0
functional.hpp
modules/gpu/src/opencv2/gpu/device/functional.hpp
+24
-2
limits.hpp
modules/gpu/src/opencv2/gpu/device/limits.hpp
+0
-0
saturate_cast.hpp
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
+0
-0
scan.hpp
modules/gpu/src/opencv2/gpu/device/scan.hpp
+167
-0
static_check.hpp
modules/gpu/src/opencv2/gpu/device/static_check.hpp
+0
-0
transform.hpp
modules/gpu/src/opencv2/gpu/device/transform.hpp
+0
-0
type_traits.hpp
modules/gpu/src/opencv2/gpu/device/type_traits.hpp
+0
-0
utility.hpp
modules/gpu/src/opencv2/gpu/device/utility.hpp
+0
-0
vec_distance.hpp
modules/gpu/src/opencv2/gpu/device/vec_distance.hpp
+0
-0
vec_math.hpp
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
+0
-0
vec_traits.hpp
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
+0
-0
warp.hpp
modules/gpu/src/opencv2/gpu/device/warp.hpp
+0
-0
resize.cpp
modules/gpu/src/resize.cpp
+26
-1
No files found.
cmake/OpenCVDetectCUDA.cmake
View file @
2777ebb8
modules/gpu/perf/perf_imgproc.cpp
View file @
2777ebb8
...
...
@@ -90,6 +90,40 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
Interpolation
(
cv
::
INTER_CUBIC
),
Interpolation
(
cv
::
INTER_AREA
)),
testing
::
Values
(
Scale
(
0.5
),
Scale
(
0.3
),
Scale
(
2.0
))));
GPU_PERF_TEST
(
ResizeArea
,
cv
::
gpu
::
DeviceInfo
,
cv
::
Size
,
MatType
,
Scale
)
{
cv
::
gpu
::
DeviceInfo
devInfo
=
GET_PARAM
(
0
);
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
cv
::
Size
size
=
GET_PARAM
(
1
);
int
type
=
GET_PARAM
(
2
);
int
interpolation
=
cv
::
INTER_AREA
;
double
f
=
GET_PARAM
(
3
);
cv
::
Mat
src_host
(
size
,
type
);
fill
(
src_host
,
0
,
255
);
cv
::
gpu
::
GpuMat
src
(
src_host
);
cv
::
gpu
::
GpuMat
dst
;
cv
::
gpu
::
resize
(
src
,
dst
,
cv
::
Size
(),
f
,
f
,
interpolation
);
declare
.
time
(
1.0
);
TEST_CYCLE
()
{
cv
::
gpu
::
resize
(
src
,
dst
,
cv
::
Size
(),
f
,
f
,
interpolation
);
}
}
INSTANTIATE_TEST_CASE_P
(
ImgProc
,
ResizeArea
,
testing
::
Combine
(
ALL_DEVICES
,
testing
::
Values
(
perf
::
sz1080p
/*, cv::Size(4096, 2048)*/
),
testing
::
Values
(
MatType
(
CV_8UC1
),
MatType
(
CV_8UC3
),
MatType
(
CV_8UC4
),
MatType
(
CV_16UC1
),
MatType
(
CV_16UC3
),
MatType
(
CV_16UC4
),
MatType
(
CV_32FC1
),
MatType
(
CV_32FC3
),
MatType
(
CV_32FC4
)),
testing
::
Values
(
Scale
(
0.2
),
Scale
(
0.1
),
Scale
(
0.05
))));
//////////////////////////////////////////////////////////////////////
// WarpAffine
...
...
modules/gpu/src/cuda/matrix_reductions.cu
View file @
2777ebb8
...
...
@@ -72,7 +72,7 @@ namespace cv { namespace gpu { namespace device
struct Mask8U
{
explicit Mask8U(PtrStepb mask
): mask(mask
) {}
explicit Mask8U(PtrStepb mask
_): mask(mask_
) {}
__device__ __forceinline__ bool operator()(int y, int x) const
{
...
...
modules/gpu/src/cuda/resize.cu
View file @
2777ebb8
...
...
@@ -46,7 +46,8 @@
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
# include <cfloat>
#include <cfloat>
#include <opencv2/gpu/device/scan.hpp>
namespace cv { namespace gpu { namespace device
{
...
...
modules/gpu/src/cuda/split_merge.cu
View file @
2777ebb8
...
...
@@ -228,9 +228,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
static void mergeC2_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
{
dim3 block
Dim
(32, 8);
dim3 grid
Dim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim
.y));
mergeC2_<T><<<grid
Dim, blockDim
, 0, stream>>>(
dim3 block(32, 8);
dim3 grid
(divUp(dst.cols, block.x), divUp(dst.rows, block
.y));
mergeC2_<T><<<grid
, block
, 0, stream>>>(
src[0].data, src[0].step,
src[1].data, src[1].step,
dst.rows, dst.cols, dst.data, dst.step);
...
...
@@ -244,9 +244,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
static void mergeC3_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
{
dim3 block
Dim
(32, 8);
dim3 grid
Dim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim
.y));
mergeC3_<T><<<grid
Dim, blockDim
, 0, stream>>>(
dim3 block(32, 8);
dim3 grid
(divUp(dst.cols, block.x), divUp(dst.rows, block
.y));
mergeC3_<T><<<grid
, block
, 0, stream>>>(
src[0].data, src[0].step,
src[1].data, src[1].step,
src[2].data, src[2].step,
...
...
@@ -261,9 +261,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
static void mergeC4_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
{
dim3 block
Dim
(32, 8);
dim3 grid
Dim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim
.y));
mergeC4_<T><<<grid
Dim, blockDim
, 0, stream>>>(
dim3 block(32, 8);
dim3 grid
(divUp(dst.cols, block.x), divUp(dst.rows, block
.y));
mergeC4_<T><<<grid
, block
, 0, stream>>>(
src[0].data, src[0].step,
src[1].data, src[1].step,
src[2].data, src[2].step,
...
...
@@ -437,9 +437,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
static void splitC2_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
{
dim3 block
Dim
(32, 8);
dim3 grid
Dim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim
.y));
splitC2_<T><<<grid
Dim, blockDim
, 0, stream>>>(
dim3 block(32, 8);
dim3 grid
(divUp(src.cols, block.x), divUp(src.rows, block
.y));
splitC2_<T><<<grid
, block
, 0, stream>>>(
src.data, src.step, src.rows, src.cols,
dst[0].data, dst[0].step,
dst[1].data, dst[1].step);
...
...
@@ -453,9 +453,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
static void splitC3_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
{
dim3 block
Dim
(32, 8);
dim3 grid
Dim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim
.y));
splitC3_<T><<<grid
Dim, blockDim
, 0, stream>>>(
dim3 block(32, 8);
dim3 grid
(divUp(src.cols, block.x), divUp(src.rows, block
.y));
splitC3_<T><<<grid
, block
, 0, stream>>>(
src.data, src.step, src.rows, src.cols,
dst[0].data, dst[0].step,
dst[1].data, dst[1].step,
...
...
@@ -470,9 +470,9 @@ namespace cv { namespace gpu { namespace device
template <typename T>
static void splitC4_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
{
dim3 block
Dim
(32, 8);
dim3 grid
Dim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim
.y));
splitC4_<T><<<grid
Dim, blockDim
, 0, stream>>>(
dim3 block(32, 8);
dim3 grid
(divUp(src.cols, block.x), divUp(src.rows, block
.y));
splitC4_<T><<<grid
, block
, 0, stream>>>(
src.data, src.step, src.rows, src.cols,
dst[0].data, dst[0].step,
dst[1].data, dst[1].step,
...
...
modules/gpu/src/nvidia/NCVBroxOpticalFlow.cu
View file @
2777ebb8
...
...
@@ -1121,18 +1121,18 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
dim3 p_blocks(iDivUp(nw, 32), iDivUp(nh, 8));
dim3 p_threads(32, 8);
NcvSize32u srcSize (kLevelWidth, kLevelHeight);
NcvSize32u
inner_
srcSize (kLevelWidth, kLevelHeight);
NcvSize32u dstSize (nw, nh);
NcvRect32u srcROI (0, 0, kLevelWidth, kLevelHeight);
NcvRect32u dstROI (0, 0, nw, nh);
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrU->ptr(),
srcSize, kLevelStride * sizeof (float), srcROI,
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrU->ptr(),
inner_srcSize, kLevelStride * sizeof (float), srcROI,
ptrUNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) );
ScaleVector(ptrUNew->ptr(), ptrUNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrV->ptr(),
srcSize, kLevelStride * sizeof (float), srcROI,
ncvAssertReturnNcvStat( nppiStResize_32f_C1R (ptrV->ptr(),
inner_srcSize, kLevelStride * sizeof (float), srcROI,
ptrVNew->ptr(), dstSize, ns * sizeof (float), dstROI, 1.0f/scale_factor, 1.0f/scale_factor, nppStBicubic) );
ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
...
...
modules/gpu/src/nvidia/core/NCV.cu
View file @
2777ebb8
...
...
@@ -252,7 +252,7 @@ NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
//===================================================================
NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment
_
)
:
currentSize(0),
_maxSize(0),
...
...
@@ -260,23 +260,23 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
begin(NULL),
end(NULL),
_memType(NCVMemoryTypeNone),
_alignment(alignment),
_alignment(alignment
_
),
bReusesMemory(false)
{
NcvBool bProperAlignment = (alignment
& (alignment-
1)) == 0;
NcvBool bProperAlignment = (alignment
_ & (alignment_ -
1)) == 0;
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
}
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr)
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment
_
, void *reusePtr)
:
currentSize(0),
_maxSize(0),
allocBegin(NULL),
_memType(memT),
_alignment(alignment)
_alignment(alignment
_
)
{
NcvBool bProperAlignment = (alignment
& (alignment-
1)) == 0;
NcvBool bProperAlignment = (alignment
_ & (alignment_ -
1)) == 0;
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");
ncvAssertPrintCheck(memT != NCVMemoryTypeNone, "NCVMemStackAllocator ctor:: Incorrect allocator type");
...
...
@@ -425,12 +425,12 @@ size_t NCVMemStackAllocator::maxSize(void) const
//===================================================================
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment
_
)
:
currentSize(0),
_maxSize(0),
_memType(memT),
_alignment(alignment)
_alignment(alignment
_
)
{
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
}
...
...
modules/gpu/src/opencv2/gpu/device/common.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/emulation.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/funcattrib.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/functional.hpp
View file @
2777ebb8
...
...
@@ -416,6 +416,8 @@ namespace cv { namespace gpu { namespace device
{
return
src1
*
src1
+
src2
*
src2
;
}
__device__
__forceinline__
hypot_sqr_func
(
const
hypot_sqr_func
&
other
)
:
binary_function
<
T
,
T
,
float
>
(){}
__device__
__forceinline__
hypot_sqr_func
()
:
binary_function
<
T
,
T
,
float
>
(){}
};
// Saturate Cast Functor
...
...
@@ -438,6 +440,7 @@ namespace cv { namespace gpu { namespace device
{
return
(
src
>
thresh
)
*
maxVal
;
}
__device__
__forceinline__
thresh_binary_func
(
const
thresh_binary_func
&
other
)
:
unary_function
<
T
,
T
>
(),
thresh
(
other
.
thresh
),
maxVal
(
other
.
maxVal
){}
...
...
@@ -455,6 +458,7 @@ namespace cv { namespace gpu { namespace device
{
return
(
src
<=
thresh
)
*
maxVal
;
}
__device__
__forceinline__
thresh_binary_inv_func
(
const
thresh_binary_inv_func
&
other
)
:
unary_function
<
T
,
T
>
(),
thresh
(
other
.
thresh
),
maxVal
(
other
.
maxVal
){}
...
...
@@ -523,8 +527,12 @@ namespace cv { namespace gpu { namespace device
return
!
pred
(
x
);
}
__device__
__forceinline__
unary_negate
(
const
unary_negate
&
other
)
:
unary_function
<
typename
Predicate
::
argument_type
,
bool
>
(){}
__device__
__forceinline__
unary_negate
()
:
unary_function
<
typename
Predicate
::
argument_type
,
bool
>
(){}
const
Predicate
pred
;
};
template
<
typename
Predicate
>
__host__
__device__
__forceinline__
unary_negate
<
Predicate
>
not1
(
const
Predicate
&
pred
)
{
return
unary_negate
<
Predicate
>
(
pred
);
...
...
@@ -534,13 +542,20 @@ namespace cv { namespace gpu { namespace device
{
explicit
__host__
__device__
__forceinline__
binary_negate
(
const
Predicate
&
p
)
:
pred
(
p
)
{}
__device__
__forceinline__
bool
operator
()(
typename
TypeTraits
<
typename
Predicate
::
first_argument_type
>::
ParameterType
x
,
typename
TypeTraits
<
typename
Predicate
::
second_argument_type
>::
ParameterType
y
)
const
__device__
__forceinline__
bool
operator
()(
typename
TypeTraits
<
typename
Predicate
::
first_argument_type
>::
ParameterType
x
,
typename
TypeTraits
<
typename
Predicate
::
second_argument_type
>::
ParameterType
y
)
const
{
return
!
pred
(
x
,
y
);
}
__device__
__forceinline__
binary_negate
(
const
binary_negate
&
other
)
:
binary_function
<
typename
Predicate
::
first_argument_type
,
typename
Predicate
::
second_argument_type
,
bool
>
(){}
__device__
__forceinline__
binary_negate
()
:
binary_function
<
typename
Predicate
::
first_argument_type
,
typename
Predicate
::
second_argument_type
,
bool
>
(){}
const
Predicate
pred
;
};
template
<
typename
BinaryPredicate
>
__host__
__device__
__forceinline__
binary_negate
<
BinaryPredicate
>
not2
(
const
BinaryPredicate
&
pred
)
{
return
binary_negate
<
BinaryPredicate
>
(
pred
);
...
...
@@ -555,9 +570,13 @@ namespace cv { namespace gpu { namespace device
return
op
(
arg1
,
a
);
}
__device__
__forceinline__
binder1st
(
const
binder1st
&
other
)
:
unary_function
<
typename
Op
::
second_argument_type
,
typename
Op
::
result_type
>
(){}
const
Op
op
;
const
typename
Op
::
first_argument_type
arg1
;
};
template
<
typename
Op
,
typename
T
>
__host__
__device__
__forceinline__
binder1st
<
Op
>
bind1st
(
const
Op
&
op
,
const
T
&
x
)
{
return
binder1st
<
Op
>
(
op
,
typename
Op
::
first_argument_type
(
x
));
...
...
@@ -572,16 +591,19 @@ namespace cv { namespace gpu { namespace device
return
op
(
a
,
arg2
);
}
__device__
__forceinline__
binder2nd
(
const
binder2nd
&
other
)
:
unary_function
<
typename
Op
::
first_argument_type
,
typename
Op
::
result_type
>
(),
op
(
other
.
op
),
arg2
(
other
.
arg2
){}
const
Op
op
;
const
typename
Op
::
second_argument_type
arg2
;
};
template
<
typename
Op
,
typename
T
>
__host__
__device__
__forceinline__
binder2nd
<
Op
>
bind2nd
(
const
Op
&
op
,
const
T
&
x
)
{
return
binder2nd
<
Op
>
(
op
,
typename
Op
::
second_argument_type
(
x
));
}
// Functor Traits
template
<
typename
F
>
struct
IsUnaryFunction
{
typedef
char
Yes
;
...
...
modules/gpu/src/opencv2/gpu/device/limits.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/scan.hpp
0 → 100644
View file @
2777ebb8
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_SCAN_HPP__
#define __OPENCV_GPU_SCAN_HPP__
enum
ScanKind
{
EXCLUSIVE
=
0
,
INCLUSIVE
=
1
};
template
<
ScanKind
Kind
,
typename
T
,
typename
F
>
struct
WarpScan
{
__device__
__forceinline__
WarpScan
()
{}
__device__
__forceinline__
WarpScan
(
const
WarpScan
&
other
)
{
(
void
)
other
;
}
__device__
__forceinline__
T
operator
()(
volatile
T
*
ptr
,
const
unsigned
int
idx
)
{
const
unsigned
int
lane
=
idx
&
31
;
F
op
;
if
(
lane
>=
1
)
ptr
[
idx
]
=
op
(
ptr
[
idx
-
1
],
ptr
[
idx
]);
if
(
lane
>=
2
)
ptr
[
idx
]
=
op
(
ptr
[
idx
-
2
],
ptr
[
idx
]);
if
(
lane
>=
4
)
ptr
[
idx
]
=
op
(
ptr
[
idx
-
4
],
ptr
[
idx
]);
if
(
lane
>=
8
)
ptr
[
idx
]
=
op
(
ptr
[
idx
-
8
],
ptr
[
idx
]);
if
(
lane
>=
16
)
ptr
[
idx
]
=
op
(
ptr
[
idx
-
16
],
ptr
[
idx
]);
if
(
Kind
==
INCLUSIVE
)
return
ptr
[
idx
];
else
return
(
lane
>
0
)
?
ptr
[
idx
-
1
]
:
0
;
}
__device__
__forceinline__
unsigned
int
index
(
const
unsigned
int
tid
)
{
return
tid
;
}
__device__
__forceinline__
void
init
(
volatile
T
*
ptr
){}
static
const
int
warp_offset
=
0
;
typedef
WarpScan
<
INCLUSIVE
,
T
,
F
>
merge
;
};
template
<
ScanKind
Kind
,
typename
T
,
typename
F
>
struct
WarpScanNoComp
{
__device__
__forceinline__
WarpScanNoComp
()
{}
__device__
__forceinline__
WarpScanNoComp
(
const
WarpScanNoComp
&
other
)
{
(
void
)
other
;
}
__device__
__forceinline__
T
operator
()(
volatile
T
*
ptr
,
const
unsigned
int
idx
)
{
const
unsigned
int
lane
=
threadIdx
.
x
&
31
;
F
op
;
ptr
[
idx
]
=
op
(
ptr
[
idx
-
1
],
ptr
[
idx
]);
ptr
[
idx
]
=
op
(
ptr
[
idx
-
2
],
ptr
[
idx
]);
ptr
[
idx
]
=
op
(
ptr
[
idx
-
4
],
ptr
[
idx
]);
ptr
[
idx
]
=
op
(
ptr
[
idx
-
8
],
ptr
[
idx
]);
ptr
[
idx
]
=
op
(
ptr
[
idx
-
16
],
ptr
[
idx
]);
if
(
Kind
==
INCLUSIVE
)
return
ptr
[
idx
];
else
return
(
lane
>
0
)
?
ptr
[
idx
-
1
]
:
0
;
}
__device__
__forceinline__
unsigned
int
index
(
const
unsigned
int
tid
)
{
return
(
tid
>>
warp_log
)
*
warp_smem_stride
+
16
+
(
tid
&
warp_mask
);
}
__device__
__forceinline__
void
init
(
volatile
T
*
ptr
)
{
ptr
[
threadIdx
.
x
]
=
0
;
}
static
const
int
warp_smem_stride
=
32
+
16
+
1
;
static
const
int
warp_offset
=
16
;
static
const
int
warp_log
=
5
;
static
const
int
warp_mask
=
31
;
typedef
WarpScanNoComp
<
INCLUSIVE
,
T
,
F
>
merge
;
};
template
<
ScanKind
Kind
,
typename
T
,
typename
Sc
,
typename
F
>
struct
BlockScan
{
__device__
__forceinline__
BlockScan
()
{}
__device__
__forceinline__
BlockScan
(
const
BlockScan
&
other
)
{
(
void
)
other
;
}
__device__
__forceinline__
T
operator
()(
volatile
T
*
ptr
)
{
const
unsigned
int
tid
=
threadIdx
.
x
;
const
unsigned
int
lane
=
tid
&
warp_mask
;
const
unsigned
int
warp
=
tid
>>
warp_log
;
Sc
scan
;
typename
Sc
::
merge
merge_scan
;
const
unsigned
int
idx
=
scan
.
index
(
tid
);
T
val
=
scan
(
ptr
,
idx
);
__syncthreads
();
if
(
warp
==
0
)
scan
.
init
(
ptr
);
__syncthreads
();
if
(
lane
==
31
)
ptr
[
scan
.
warp_offset
+
warp
]
=
(
Kind
==
INCLUSIVE
)
?
val
:
ptr
[
idx
];
__syncthreads
();
if
(
warp
==
0
)
merge_scan
(
ptr
,
idx
);
__syncthreads
();
if
(
warp
>
0
)
val
=
ptr
[
scan
.
warp_offset
+
warp
-
1
]
+
val
;
__syncthreads
();
ptr
[
idx
]
=
val
;
__syncthreads
();
return
val
;
}
static
const
int
warp_log
=
5
;
static
const
int
warp_mask
=
31
;
};
#endif
\ No newline at end of file
modules/gpu/src/opencv2/gpu/device/static_check.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/transform.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/type_traits.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/utility.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/vec_distance.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
View file @
2777ebb8
modules/gpu/src/opencv2/gpu/device/warp.hpp
View file @
2777ebb8
modules/gpu/src/resize.cpp
View file @
2777ebb8
...
...
@@ -44,7 +44,32 @@
#ifndef HAVE_CUDA
void
cv
::
gpu
::
resize
(
const
GpuMat
&
,
GpuMat
&
,
Size
,
double
,
double
,
int
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
resize
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
,
Stream
&
s
)
{
(
void
)
src
;
(
void
)
dst
;
(
void
)
dsize
;
(
void
)
fx
;
(
void
)
fy
;
(
void
)
interpolation
;
(
void
)
s
;
throw_nogpu
();
}
void
cv
::
gpu
::
resize
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
GpuMat
&
buffer
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
,
Stream
&
s
)
{
(
void
)
src
;
(
void
)
dst
;
(
void
)
dsize
;
(
void
)
fx
;
(
void
)
fy
;
(
void
)
interpolation
;
(
void
)
buffer
;
(
void
)
s
;
throw_nogpu
();
}
#else // HAVE_CUDA
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment