Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8abdb372
Commit
8abdb372
authored
Jan 24, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added gpu threshold.
parent
4c4ff882
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
742 additions
and
343 deletions
+742
-343
gpu_image_processing.tex
doc/gpu_image_processing.tex
+6
-2
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+4
-3
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+110
-1
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+1
-1
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+24
-104
matrix_reductions.cu
modules/gpu/src/cuda/matrix_reductions.cu
+1
-1
surf.cu
modules/gpu/src/cuda/surf.cu
+0
-1
surf_key_point.h
modules/gpu/src/cuda/surf_key_point.h
+0
-54
transform.hpp
modules/gpu/src/cuda/transform.hpp
+0
-130
cudastream.cpp
modules/gpu/src/cudastream.cpp
+2
-2
element_operations.cpp
modules/gpu/src/element_operations.cpp
+71
-0
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+0
-20
matrix_operations.cpp
modules/gpu/src/matrix_operations.cpp
+3
-3
transform.hpp
modules/gpu/src/opencv2/gpu/device/transform.hpp
+433
-0
vecmath.hpp
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
+68
-12
precomp.hpp
modules/gpu/src/precomp.hpp
+0
-1
imgproc_gpu.cpp
tests/gpu/src/imgproc_gpu.cpp
+19
-8
No files found.
doc/gpu_image_processing.tex
View file @
8abdb372
...
...
@@ -314,13 +314,17 @@ See also: \cvCppCross{cvtColor}.
Applies a fixed-level threshold to each array element.
\cvdefCpp
{
double threshold(const GpuMat
\&
src, GpuMat
\&
dst, double thresh);
double threshold(const GpuMat
\&
src, GpuMat
\&
dst, double thresh,
\par
double maxval, int type);
\newline
double threshold(const GpuMat
\&
src, GpuMat
\&
dst, double thresh,
\par
double maxval, int type, const Stream
\&
stream);
}
\begin{description}
\cvarg
{
src
}{
Source array
. Supports only
\texttt
{
CV
\_
32FC1
}
type
.
}
\cvarg
{
src
}{
Source array
(single-channel,
\texttt
{
CV
\_
64F
}
depth doesn't supported)
.
}
\cvarg
{
dst
}{
Destination array; will have the same size and the same type as
\texttt
{
src
}
.
}
\cvarg
{
thresh
}{
Threshold value.
}
\cvarg
{
maxVal
}{
Maximum value to use with
\texttt
{
THRESH
\_
BINARY
}
and
\texttt
{
THRESH
\_
BINARY
\_
INV
}
thresholding types.
}
\cvarg
{
thresholdType
}{
Thresholding type. For details see
\cvCppCross
{
threshold
}
.
\texttt
{
THRESH
\_
OTSU
}
thresholding type doesn't supported.
}
\cvarg
{
stream
}{
Stream for the asynchronous version.
}
\end{description}
See also:
\cvCppCross
{
threshold
}
.
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
8abdb372
...
...
@@ -587,9 +587,10 @@ namespace cv
//! async version
CV_EXPORTS
void
cvtColor
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
code
,
int
dcn
,
const
Stream
&
stream
);
//! applies fixed threshold to the image.
//! Now supports only THRESH_TRUNC threshold type and one channels float source.
CV_EXPORTS
double
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
);
//! applies fixed threshold to the image
CV_EXPORTS
double
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxval
,
int
type
);
//! async version
CV_EXPORTS
double
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxval
,
int
type
,
const
Stream
&
stream
);
//! resizes the image
//! Supports INTER_NEAREST, INTER_LINEAR
...
...
modules/gpu/src/cuda/element_operations.cu
View file @
8abdb372
...
...
@@ -41,7 +41,8 @@
//M*/
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
...
...
@@ -468,4 +469,112 @@ namespace cv { namespace gpu { namespace mathfunc
template void max_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
// threshold
class ThreshOp
{
public:
ThreshOp(float thresh_, float maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
protected:
float thresh;
float maxVal;
};
class ThreshBinary : public ThreshOp
{
public:
ThreshBinary(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? saturate_cast<T>(maxVal) : 0;
}
};
class ThreshBinaryInv : public ThreshOp
{
public:
ThreshBinaryInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? 0 : saturate_cast<T>(maxVal);
}
};
class ThreshTrunc : public ThreshOp
{
public:
ThreshTrunc(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return saturate_cast<T>(fmin((float)src, thresh));
}
};
class ThreshToZero : public ThreshOp
{
public:
ThreshToZero(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? src : 0;
}
};
class ThreshToZeroInv : public ThreshOp
{
public:
ThreshToZeroInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? 0 : src;
}
};
template <class Op, typename T>
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal,
cudaStream_t stream)
{
Op op(thresh, maxVal);
transform(src, dst, op, stream);
}
template <typename T>
void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,
cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal,
cudaStream_t stream);
static const caller_t callers[] =
{
threshold_caller<ThreshBinary, T>,
threshold_caller<ThreshBinaryInv, T>,
threshold_caller<ThreshTrunc, T>,
threshold_caller<ThreshToZero, T>,
threshold_caller<ThreshToZeroInv, T>
};
callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
}
template void threshold_gpu<uchar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<schar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<ushort>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<short>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
}}}
modules/gpu/src/cuda/mathfunc.cu
View file @
8abdb372
...
...
@@ -43,7 +43,7 @@
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "
opencv2/gpu/device/
transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
8abdb372
...
...
@@ -42,6 +42,7 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/transform.hpp"
using namespace cv::gpu::device;
...
...
@@ -56,63 +57,6 @@ namespace cv { namespace gpu { namespace matrix_operations {
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
struct ReadWriteTraits
{
enum {shift=1};
typedef T read_type;
typedef DT write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 1, 1>
{
enum {shift=4};
typedef char4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 2, 1>
{
enum {shift=4};
typedef short4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 4, 1>
{
enum {shift=4};
typedef int4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 1, 2>
{
enum {shift=2};
typedef char2 read_type;
typedef short2 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 2, 2>
{
enum {shift=2};
typedef short2 read_type;
typedef short2 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 4, 2>
{
enum {shift=2};
typedef int2 read_type;
typedef short2 write_type;
};
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
...
...
@@ -276,60 +220,35 @@ namespace cv { namespace gpu { namespace matrix_operations {
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T, typename DT>
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
{
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
if (y < height)
{
const T* src = (const T*)(srcmat + src_step * y);
DT* dst = (DT*)(dstmat + dst_step * y);
if ((x * shift) + shift - 1 < width)
template <typename T, typename D>
class Convertor
{
read_type srcn_el = ((read_type*)src)[x];
write_type dstn_el;
const T* src1_el = (const T*) &srcn_el;
DT* dst1_el = (DT*) &dstn_el;
public:
Convertor(double alpha_, double beta_): alpha(alpha_), beta(beta_) {}
for (int i = 0; i < shift; ++i)
dst1_el[i] = saturate_cast<DT>(alpha * src1_el[i] + beta);
((write_type*)dst)[x] = dstn_el;
}
else
__device__ D operator()(const T& src)
{
for (int i = 0; i < shift - 1; ++i)
if ((x * shift) + i < width)
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
}
}
return saturate_cast<D>(alpha * src + beta);
}
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
private:
double alpha, beta;
};
template<typename T, typename D
T
>
void cvt_(const DevMem2D& src,
DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t &
stream)
template<typename T, typename D>
void cvt_(const DevMem2D& src,
const DevMem2D& dst, double alpha, double beta, cudaStream_t
stream)
{
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
dim3 block(32, 8);
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
Convertor<T, D> op(alpha, beta);
transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
}
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta,
cudaStream_t stream = 0)
{
static CvtFunc tab[8][8] =
typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta,
cudaStream_t stream);
static const caller_t tab[8][8] =
{
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
...
...
@@ -355,9 +274,10 @@ namespace cv { namespace gpu { namespace matrix_operations {
{0,0,0,0,0,0,0,0}
};
CvtFunc func = tab[sdepth][ddepth];
if (
func == 0)
caller_t func = tab[sdepth][ddepth];
if (
!func)
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
func(src, dst, alpha, beta, stream);
}
}}}
modules/gpu/src/cuda/matrix_reductions.cu
View file @
8abdb372
...
...
@@ -43,7 +43,7 @@
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "
opencv2/gpu/device/
transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
...
...
modules/gpu/src/cuda/surf.cu
View file @
8abdb372
...
...
@@ -46,7 +46,6 @@
//M*/
#include "internal_shared.hpp"
#include "surf_key_point.h"
#include "opencv2/gpu/device/limits_gpu.hpp"
using namespace cv::gpu;
...
...
modules/gpu/src/cuda/surf_key_point.h
deleted
100644 → 0
View file @
4c4ff882
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_SURF_KEY_POINT_H__
#define __OPENCV_SURF_KEY_POINT_H__
namespace
cv
{
namespace
gpu
{
}
}
#endif // __OPENCV_SURF_KEY_POINT_H__
modules/gpu/src/cuda/transform.hpp
deleted
100644 → 0
View file @
4c4ff882
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "internal_shared.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
{
//! Mask accessor
template
<
class
T
>
struct
MaskReader_
{
PtrStep_
<
T
>
mask
;
explicit
MaskReader_
(
PtrStep_
<
T
>
mask
)
:
mask
(
mask
)
{}
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
};
//! Stub mask accessor
struct
NoMask
{
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
};
//! Transform kernels
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__global__
void
transform
(
const
DevMem2D_
<
T
>
src
,
PtrStep_
<
D
>
dst
,
const
Mask
mask
,
UnOp
op
)
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
src
.
cols
&&
y
<
src
.
rows
&&
mask
(
y
,
x
))
{
T
src_data
=
src
.
ptr
(
y
)[
x
];
dst
.
ptr
(
y
)[
x
]
=
op
(
src_data
);
}
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__global__
void
transform
(
const
DevMem2D_
<
T1
>
src1
,
const
PtrStep_
<
T2
>
src2
,
PtrStep_
<
D
>
dst
,
const
Mask
mask
,
BinOp
op
)
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
src1
.
cols
&&
y
<
src1
.
rows
&&
mask
(
y
,
x
))
{
T1
src1_data
=
src1
.
ptr
(
y
)[
x
];
T2
src2_data
=
src2
.
ptr
(
y
)[
x
];
dst
.
ptr
(
y
)[
x
]
=
op
(
src1_data
,
src2_data
);
}
}
}}}
namespace
cv
{
namespace
gpu
{
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
cudaStream_t
stream
)
{
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
device
::
transform
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
device
::
NoMask
(),
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
static
void
transform
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
BinOp
op
,
cudaStream_t
stream
)
{
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
device
::
transform
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
device
::
NoMask
(),
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__
modules/gpu/src/cudastream.cpp
View file @
8abdb372
...
...
@@ -80,7 +80,7 @@ namespace cv
void
set_to_without_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_with_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
convert_
to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
int
channels
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
void
convert_
gpu
(
const
DevMem2D
&
src
,
int
sdepth
,
const
DevMem2D
&
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
}
}
}
...
...
@@ -204,7 +204,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc
=
&
(
temp
=
src
);
dst
.
create
(
src
.
size
(),
rtype
);
matrix_operations
::
convert_
to
(
*
psrc
,
sdepth
,
dst
,
ddepth
,
psrc
->
channels
()
,
alpha
,
beta
,
impl
->
stream
);
matrix_operations
::
convert_
gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
,
impl
->
stream
);
}
...
...
modules/gpu/src/element_operations.cpp
View file @
8abdb372
...
...
@@ -74,6 +74,8 @@ void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
double
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
double
,
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
int
)
{
throw_nogpu
();
return
0.0
;}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
int
,
const
Stream
&
)
{
throw_nogpu
();
return
0.0
;}
#else
...
...
@@ -696,4 +698,72 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& st
funcs
[
src1
.
depth
()](
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
}
////////////////////////////////////////////////////////////////////////
// threshold
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
template
<
typename
T
>
void
threshold_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
float
thresh
,
float
maxVal
,
int
type
,
cudaStream_t
stream
);
}}}
namespace
{
void
threshold_caller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
,
cudaStream_t
stream
=
0
)
{
using
namespace
cv
::
gpu
::
mathfunc
;
typedef
void
(
*
caller_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
float
thresh
,
float
maxVal
,
int
type
,
cudaStream_t
stream
);
static
const
caller_t
callers
[]
=
{
threshold_gpu
<
unsigned
char
>
,
threshold_gpu
<
signed
char
>
,
threshold_gpu
<
unsigned
short
>
,
threshold_gpu
<
short
>
,
threshold_gpu
<
int
>
,
threshold_gpu
<
float
>
,
0
};
CV_Assert
(
src
.
channels
()
==
1
&&
src
.
depth
()
<
CV_64F
);
CV_Assert
(
type
<=
THRESH_TOZERO_INV
);
dst
.
create
(
src
.
size
(),
src
.
type
());
if
(
src
.
depth
()
!=
CV_32F
)
{
thresh
=
cvFloor
(
thresh
);
maxVal
=
cvRound
(
maxVal
);
}
callers
[
src
.
depth
()](
src
,
dst
,
static_cast
<
float
>
(
thresh
),
static_cast
<
float
>
(
maxVal
),
type
,
stream
);
}
}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
)
{
if
(
src
.
type
()
==
CV_32FC1
&&
type
==
THRESH_TRUNC
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiThreshold_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
static_cast
<
Npp32f
>
(
thresh
),
NPP_CMP_GREATER
)
);
}
else
{
threshold_caller
(
src
,
dst
,
thresh
,
maxVal
,
type
);
}
return
thresh
;
}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
,
const
Stream
&
stream
)
{
threshold_caller
(
src
,
dst
,
thresh
,
maxVal
,
type
,
StreamAccessor
::
getStream
(
stream
));
return
thresh
;
}
#endif
\ No newline at end of file
modules/gpu/src/imgproc_gpu.cpp
View file @
8abdb372
...
...
@@ -55,7 +55,6 @@ void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
void
cv
::
gpu
::
drawColorDisp
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
reprojectImageTo3D
(
const
GpuMat
&
,
GpuMat
&
,
const
Mat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
reprojectImageTo3D
(
const
GpuMat
&
,
GpuMat
&
,
const
Mat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
,
GpuMat
&
,
double
)
{
throw_nogpu
();
return
0.0
;
}
void
cv
::
gpu
::
resize
(
const
GpuMat
&
,
GpuMat
&
,
Size
,
double
,
double
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
copyMakeBorder
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
int
,
int
,
const
Scalar
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
warpAffine
(
const
GpuMat
&
,
GpuMat
&
,
const
Mat
&
,
Size
,
int
)
{
throw_nogpu
();
}
...
...
@@ -241,25 +240,6 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
reprojectImageTo3D_callers
[
disp
.
type
()](
disp
,
xyzw
,
Q
,
StreamAccessor
::
getStream
(
stream
));
}
////////////////////////////////////////////////////////////////////////
// threshold
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiThreshold_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
static_cast
<
Npp32f
>
(
thresh
),
NPP_CMP_GREATER
)
);
return
thresh
;
}
////////////////////////////////////////////////////////////////////////
// resize
...
...
modules/gpu/src/matrix_operations.cpp
View file @
8abdb372
...
...
@@ -90,7 +90,7 @@ namespace cv
void
set_to_without_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_with_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
convert_
to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
int
channels
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
void
convert_
gpu
(
const
DevMem2D
&
src
,
int
sdepth
,
const
DevMem2D
&
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
}
}
}
...
...
@@ -193,7 +193,7 @@ namespace
void
convertToKernelCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
matrix_operations
::
convert_
to
(
src
,
src
.
depth
(),
dst
,
dst
.
depth
(),
src
.
channels
(),
1.0
,
0.0
);
matrix_operations
::
convert_
gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
);
}
}
...
...
@@ -222,7 +222,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
dst
.
create
(
size
(),
rtype
);
if
(
!
noScale
)
matrix_operations
::
convert_
to
(
*
psrc
,
sdepth
,
dst
,
ddepth
,
psrc
->
channels
()
,
alpha
,
beta
);
matrix_operations
::
convert_
gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
);
else
{
typedef
void
(
*
convert_caller_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
);
...
...
modules/gpu/src/opencv2/gpu/device/transform.hpp
0 → 100644
View file @
8abdb372
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "internal_shared.hpp"
#include "vecmath.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
{
//! Mask accessor
class
MaskReader
{
public
:
explicit
MaskReader
(
const
PtrStep
&
mask_
)
:
mask
(
mask_
)
{}
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
mask
.
ptr
(
y
)[
x
];
}
private
:
PtrStep
mask
;
};
struct
NoMask
{
__device__
bool
operator
()(
int
y
,
int
x
)
const
{
return
true
;
}
};
//! Read Write Traits
template
<
size_t
src_elem_size
,
size_t
dst_elem_size
>
struct
UnReadWriteTraits_
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size
>
struct
UnReadWriteTraits_
<
src_elem_size
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size
>
struct
UnReadWriteTraits_
<
src_elem_size
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T
,
typename
D
>
struct
UnReadWriteTraits
{
enum
{
shift
=
UnReadWriteTraits_
<
sizeof
(
T
),
sizeof
(
D
)
>::
shift
};
typedef
typename
TypeVec
<
T
,
shift
>::
vec_t
read_type
;
typedef
typename
TypeVec
<
D
,
shift
>::
vec_t
write_type
;
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
,
size_t
dst_elem_size
>
struct
BinReadWriteTraits_
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinReadWriteTraits_
<
src_elem_size1
,
src_elem_size2
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinReadWriteTraits_
<
src_elem_size1
,
src_elem_size2
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T1
,
typename
T2
,
typename
D
>
struct
BinReadWriteTraits
{
enum
{
shift
=
BinReadWriteTraits_
<
sizeof
(
T1
),
sizeof
(
T2
),
sizeof
(
D
)
>::
shift
};
typedef
typename
TypeVec
<
T1
,
shift
>::
vec_t
read_type1
;
typedef
typename
TypeVec
<
T2
,
shift
>::
vec_t
read_type2
;
typedef
typename
TypeVec
<
D
,
shift
>::
vec_t
write_type
;
};
//! Transform kernels
template
<
int
shift
>
struct
OpUnroller
;
template
<>
struct
OpUnroller
<
1
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
}
};
template
<>
struct
OpUnroller
<
2
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
y
=
op
(
src
.
y
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
y
=
op
(
src1
.
y
,
src2
.
y
);
}
};
template
<>
struct
OpUnroller
<
3
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
y
=
op
(
src
.
y
);
if
(
mask
(
y
,
x_shifted
+
2
))
dst
.
z
=
op
(
src
.
z
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
y
=
op
(
src1
.
y
,
src2
.
y
);
if
(
mask
(
y
,
x_shifted
+
2
))
dst
.
z
=
op
(
src1
.
z
,
src2
.
z
);
}
};
template
<>
struct
OpUnroller
<
4
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src
.
x
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
y
=
op
(
src
.
y
);
if
(
mask
(
y
,
x_shifted
+
2
))
dst
.
z
=
op
(
src
.
z
);
if
(
mask
(
y
,
x_shifted
+
3
))
dst
.
w
=
op
(
src
.
w
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
x
=
op
(
src1
.
x
,
src2
.
x
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
y
=
op
(
src1
.
y
,
src2
.
y
);
if
(
mask
(
y
,
x_shifted
+
2
))
dst
.
z
=
op
(
src1
.
z
,
src2
.
z
);
if
(
mask
(
y
,
x_shifted
+
3
))
dst
.
w
=
op
(
src1
.
w
,
src2
.
w
);
}
};
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
__global__
static
void
transformSmart
(
const
DevMem2D_
<
T
>
src_
,
PtrStep_
<
D
>
dst_
,
const
Mask
mask
,
UnOp
op
)
{
typedef
typename
UnReadWriteTraits
<
T
,
D
>::
read_type
read_type
;
typedef
typename
UnReadWriteTraits
<
T
,
D
>::
write_type
write_type
;
const
int
shift
=
UnReadWriteTraits
<
T
,
D
>::
shift
;
const
int
x
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
const
int
y
=
threadIdx
.
y
+
blockIdx
.
y
*
blockDim
.
y
;
const
int
x_shifted
=
x
*
shift
;
if
(
y
<
src_
.
rows
)
{
const
T
*
src
=
src_
.
ptr
(
y
);
D
*
dst
=
dst_
.
ptr
(
y
);
if
(
x_shifted
+
shift
-
1
<
src_
.
cols
)
{
read_type
src_n_el
=
((
const
read_type
*
)
src
)[
x
];
write_type
dst_n_el
;
OpUnroller
<
shift
>::
unroll
(
src_n_el
,
dst_n_el
,
mask
,
op
,
x_shifted
,
y
);
((
write_type
*
)
dst
)[
x
]
=
dst_n_el
;
}
else
{
for
(
int
real_x
=
x_shifted
;
real_x
<
src_
.
cols
;
++
real_x
)
{
if
(
mask
(
y
,
real_x
))
dst
[
real_x
]
=
op
(
src
[
real_x
]);
}
}
}
}
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__global__
void
transformSimple
(
const
DevMem2D_
<
T
>
src
,
PtrStep_
<
D
>
dst
,
const
Mask
mask
,
UnOp
op
)
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
src
.
cols
&&
y
<
src
.
rows
&&
mask
(
y
,
x
))
{
dst
.
ptr
(
y
)[
x
]
=
op
(
src
.
ptr
(
y
)[
x
]);
}
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
__global__
static
void
transformSmart
(
const
DevMem2D_
<
T1
>
src1_
,
const
PtrStep_
<
T2
>
src2_
,
PtrStep_
<
D
>
dst_
,
const
Mask
mask
,
BinOp
op
)
{
typedef
typename
BinReadWriteTraits
<
T1
,
T2
,
D
>::
read_type1
read_type1
;
typedef
typename
BinReadWriteTraits
<
T1
,
T2
,
D
>::
read_type2
read_type2
;
typedef
typename
BinReadWriteTraits
<
T1
,
T2
,
D
>::
write_type
write_type
;
const
int
shift
=
BinReadWriteTraits
<
T1
,
T2
,
D
>::
shift
;
const
int
x
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
const
int
y
=
threadIdx
.
y
+
blockIdx
.
y
*
blockDim
.
y
;
const
int
x_shifted
=
x
*
shift
;
if
(
y
<
src1_
.
rows
)
{
const
T1
*
src1
=
src1_
.
ptr
(
y
);
const
T2
*
src2
=
src2_
.
ptr
(
y
);
D
*
dst
=
dst_
.
ptr
(
y
);
if
(
x_shifted
+
shift
-
1
<
src1_
.
cols
)
{
read_type1
src1_n_el
=
((
const
read_type1
*
)
src1
)[
x
];
read_type2
src2_n_el
=
((
const
read_type2
*
)
src2
)[
x
];
write_type
dst_n_el
;
OpUnroller
<
shift
>::
unroll
(
src1_n_el
,
src2_n_el
,
dst_n_el
,
mask
,
op
,
x_shifted
,
y
);
((
write_type
*
)
dst
)[
x
]
=
dst_n_el
;
}
else
{
for
(
int
real_x
=
x_shifted
;
real_x
<
src1_
.
cols
;
++
real_x
)
{
if
(
mask
(
y
,
real_x
))
dst
[
real_x
]
=
op
(
src1
[
real_x
],
src2
[
real_x
]);
}
}
}
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__global__
void
transformSimple
(
const
DevMem2D_
<
T1
>
src1
,
const
PtrStep_
<
T2
>
src2
,
PtrStep_
<
D
>
dst
,
const
Mask
mask
,
BinOp
op
)
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
src1
.
cols
&&
y
<
src1
.
rows
&&
mask
(
y
,
x
))
{
T1
src1_data
=
src1
.
ptr
(
y
)[
x
];
T2
src2_data
=
src2
.
ptr
(
y
)[
x
];
dst
.
ptr
(
y
)[
x
]
=
op
(
src1_data
,
src2_data
);
}
}
}}}
namespace
cv
{
namespace
gpu
{
template
<
bool
UseSmart
>
struct
TransformChooser
;
template
<>
struct
TransformChooser
<
false
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
const
Mask
&
mask
,
cudaStream_t
stream
=
0
)
{
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
device
::
transformSimple
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
mask
,
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
BinOp
op
,
const
Mask
&
mask
,
cudaStream_t
stream
=
0
)
{
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
device
::
transformSimple
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
mask
,
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
};
template
<>
struct
TransformChooser
<
true
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
const
Mask
&
mask
,
cudaStream_t
stream
=
0
)
{
const
int
shift
=
device
::
UnReadWriteTraits
<
T
,
D
>::
shift
;
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
*
shift
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
device
::
transformSmart
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
mask
,
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
BinOp
op
,
const
Mask
&
mask
,
cudaStream_t
stream
=
0
)
{
const
int
shift
=
device
::
BinReadWriteTraits
<
T1
,
T2
,
D
>::
shift
;
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
*
shift
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
device
::
transformSmart
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
mask
,
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
};
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
transform_caller
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
const
Mask
&
mask
,
cudaStream_t
stream
=
0
)
{
TransformChooser
<
device
::
VecTraits
<
T
>::
cn
==
1
&&
device
::
VecTraits
<
D
>::
cn
==
1
&&
device
::
UnReadWriteTraits
<
T
,
D
>::
shift
!=
1
>::
call
(
src
,
dst
,
op
,
mask
,
stream
);
}
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
cudaStream_t
stream
=
0
)
{
transform_caller
(
src
,
dst
,
op
,
device
::
NoMask
(),
stream
);
}
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
PtrStep
&
mask
,
UnOp
op
,
cudaStream_t
stream
=
0
)
{
transform_caller
(
src
,
dst
,
op
,
device
::
MaskReader
(
mask
),
stream
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
transform_caller
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
BinOp
op
,
const
Mask
&
mask
,
cudaStream_t
stream
=
0
)
{
TransformChooser
<
device
::
VecTraits
<
T1
>::
cn
==
1
&&
device
::
VecTraits
<
T2
>::
cn
==
1
&&
device
::
VecTraits
<
D
>::
cn
==
1
&&
device
::
BinReadWriteTraits
<
T1
,
T2
,
D
>::
shift
!=
1
>::
call
(
src1
,
src2
,
dst
,
op
,
mask
,
stream
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
static
void
transform
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
BinOp
op
,
cudaStream_t
stream
=
0
)
{
transform_caller
(
src1
,
src2
,
dst
,
op
,
device
::
NoMask
(),
stream
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
static
void
transform
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
PtrStep
&
mask
,
BinOp
op
,
cudaStream_t
stream
=
0
)
{
transform_caller
(
src1
,
src2
,
dst
,
op
,
device
::
MaskReader
(
mask
),
stream
);
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__
modules/gpu/src/opencv2/gpu/device/vecmath.hpp
View file @
8abdb372
...
...
@@ -64,12 +64,16 @@ namespace cv
template
<>
struct
TypeVec
<
uchar4
,
4
>
{
typedef
uchar4
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
1
>
{
typedef
char
vec_t
;
};
template
<>
struct
TypeVec
<
schar
,
1
>
{
typedef
char
vec_t
;
};
template
<>
struct
TypeVec
<
char1
,
1
>
{
typedef
char1
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
schar
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
char2
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
schar
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
char3
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
schar
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
char4
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
ushort
,
1
>
{
typedef
ushort
vec_t
;
};
...
...
@@ -117,6 +121,15 @@ namespace cv
template
<>
struct
TypeVec
<
float
,
4
>
{
typedef
float4
vec_t
;
};
template
<>
struct
TypeVec
<
float4
,
4
>
{
typedef
float4
vec_t
;
};
template
<>
struct
TypeVec
<
double
,
1
>
{
typedef
double
vec_t
;
};
template
<>
struct
TypeVec
<
double1
,
1
>
{
typedef
double1
vec_t
;
};
template
<>
struct
TypeVec
<
double
,
2
>
{
typedef
double2
vec_t
;
};
template
<>
struct
TypeVec
<
double2
,
2
>
{
typedef
double2
vec_t
;
};
template
<>
struct
TypeVec
<
double
,
3
>
{
typedef
double3
vec_t
;
};
template
<>
struct
TypeVec
<
double3
,
3
>
{
typedef
double3
vec_t
;
};
template
<>
struct
TypeVec
<
double
,
4
>
{
typedef
double4
vec_t
;
};
template
<>
struct
TypeVec
<
double4
,
4
>
{
typedef
double4
vec_t
;
};
template
<
typename
T
>
struct
VecTraits
;
template
<>
struct
VecTraits
<
uchar
>
...
...
@@ -162,33 +175,40 @@ namespace cv
static
__device__
__host__
char
all
(
char
v
)
{
return
v
;}
static
__device__
__host__
char
make
(
char
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
schar
>
{
typedef
schar
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
schar
make
(
schar
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
char1
>
{
typedef
char
elem_t
;
typedef
s
char
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
char1
all
(
char
v
)
{
return
make_char1
(
v
);}
static
__device__
__host__
char1
make
(
char
x
)
{
return
make_char1
(
x
);}
static
__device__
__host__
char1
all
(
s
char
v
)
{
return
make_char1
(
v
);}
static
__device__
__host__
char1
make
(
s
char
x
)
{
return
make_char1
(
x
);}
};
template
<>
struct
VecTraits
<
char2
>
{
typedef
char
elem_t
;
typedef
s
char
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
char2
all
(
char
v
)
{
return
make_char2
(
v
,
v
);}
static
__device__
__host__
char2
make
(
char
x
,
char
y
)
{
return
make_char2
(
x
,
y
);}
static
__device__
__host__
char2
all
(
s
char
v
)
{
return
make_char2
(
v
,
v
);}
static
__device__
__host__
char2
make
(
schar
x
,
s
char
y
)
{
return
make_char2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
char3
>
{
typedef
char
elem_t
;
typedef
s
char
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
char3
all
(
char
v
)
{
return
make_char3
(
v
,
v
,
v
);}
static
__device__
__host__
char3
make
(
char
x
,
char
y
,
char
z
)
{
return
make_char3
(
x
,
y
,
z
);}
static
__device__
__host__
char3
all
(
s
char
v
)
{
return
make_char3
(
v
,
v
,
v
);}
static
__device__
__host__
char3
make
(
schar
x
,
schar
y
,
s
char
z
)
{
return
make_char3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
char4
>
{
typedef
char
elem_t
;
typedef
s
char
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
char4
all
(
char
v
)
{
return
make_char4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
char4
make
(
char
x
,
char
y
,
char
z
,
char
w
)
{
return
make_char4
(
x
,
y
,
z
,
w
);}
static
__device__
__host__
char4
all
(
s
char
v
)
{
return
make_char4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
char4
make
(
schar
x
,
schar
y
,
schar
z
,
s
char
w
)
{
return
make_char4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
ushort
>
...
...
@@ -371,6 +391,42 @@ namespace cv
static
__device__
__host__
float4
make
(
float
x
,
float
y
,
float
z
,
float
w
)
{
return
make_float4
(
x
,
y
,
z
,
w
);}
};
template
<>
struct
VecTraits
<
double
>
{
typedef
double
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
double
all
(
double
v
)
{
return
v
;}
static
__device__
__host__
double
make
(
double
x
)
{
return
x
;}
};
template
<>
struct
VecTraits
<
double1
>
{
typedef
double
elem_t
;
enum
{
cn
=
1
};
static
__device__
__host__
double1
all
(
double
v
)
{
return
make_double1
(
v
);}
static
__device__
__host__
double1
make
(
double
x
)
{
return
make_double1
(
x
);}
};
template
<>
struct
VecTraits
<
double2
>
{
typedef
double
elem_t
;
enum
{
cn
=
2
};
static
__device__
__host__
double2
all
(
double
v
)
{
return
make_double2
(
v
,
v
);}
static
__device__
__host__
double2
make
(
double
x
,
double
y
)
{
return
make_double2
(
x
,
y
);}
};
template
<>
struct
VecTraits
<
double3
>
{
typedef
double
elem_t
;
enum
{
cn
=
3
};
static
__device__
__host__
double3
all
(
double
v
)
{
return
make_double3
(
v
,
v
,
v
);}
static
__device__
__host__
double3
make
(
double
x
,
double
y
,
double
z
)
{
return
make_double3
(
x
,
y
,
z
);}
};
template
<>
struct
VecTraits
<
double4
>
{
typedef
double
elem_t
;
enum
{
cn
=
4
};
static
__device__
__host__
double4
all
(
double
v
)
{
return
make_double4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
double4
make
(
double
x
,
double
y
,
double
z
,
double
w
)
{
return
make_double4
(
x
,
y
,
z
,
w
);}
};
template
<
int
cn
,
typename
VecD
>
struct
SatCast
;
template
<
typename
VecD
>
struct
SatCast
<
1
,
VecD
>
{
...
...
modules/gpu/src/precomp.hpp
View file @
8abdb372
...
...
@@ -70,7 +70,6 @@
#include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "npp_staging.h"
#include "surf_key_point.h"
#include "nvidia/NCV.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp"
...
...
tests/gpu/src/imgproc_gpu.cpp
View file @
8abdb372
...
...
@@ -180,30 +180,41 @@ void CV_GpuImageProcTest::run( int )
////////////////////////////////////////////////////////////////////////////////
// threshold
struct
CV_Gpu
Npp
ImageThresholdTest
:
public
CV_GpuImageProcTest
struct
CV_GpuImageThresholdTest
:
public
CV_GpuImageProcTest
{
public
:
CV_Gpu
NppImageThresholdTest
()
:
CV_GpuImageProcTest
(
"GPU-Npp
ImageThreshold"
,
"threshold"
)
{}
CV_Gpu
ImageThresholdTest
()
:
CV_GpuImageProcTest
(
"GPU-
ImageThreshold"
,
"threshold"
)
{}
int
test
(
const
Mat
&
img
)
{
if
(
img
.
type
()
!=
CV_32FC1
)
if
(
img
.
type
()
!=
CV_
8UC1
&&
img
.
type
()
!=
CV_
32FC1
)
{
ts
->
printf
(
CvTS
::
LOG
,
"
\n
Unsupported type
\n
"
);
return
CvTS
::
OK
;
}
const
double
maxVal
=
img
.
type
()
==
CV_8UC1
?
255
:
1.0
;
cv
::
RNG
rng
(
*
ts
->
get_rng
());
const
double
thresh
=
rng
;
int
res
=
CvTS
::
OK
;
for
(
int
type
=
THRESH_BINARY
;
type
<=
THRESH_TOZERO_INV
;
++
type
)
{
const
double
thresh
=
rng
.
uniform
(
0.0
,
maxVal
);
cv
::
Mat
cpuRes
;
cv
::
threshold
(
img
,
cpuRes
,
thresh
,
0.0
,
THRESH_TRUNC
);
cv
::
threshold
(
img
,
cpuRes
,
thresh
,
maxVal
,
type
);
GpuMat
gpu1
(
img
);
GpuMat
gpuRes
;
cv
::
gpu
::
threshold
(
gpu1
,
gpuRes
,
thresh
);
cv
::
gpu
::
threshold
(
gpu1
,
gpuRes
,
thresh
,
maxVal
,
type
);
if
(
CheckNorm
(
cpuRes
,
gpuRes
)
!=
CvTS
::
OK
)
res
=
CvTS
::
FAIL_GENERIC
;
}
return
CheckNorm
(
cpuRes
,
gpuRes
)
;
return
res
;
}
};
...
...
@@ -822,7 +833,7 @@ struct CV_GpuColumnSumTest: CvTest
// Placing all test definitions in one place
// makes us know about what tests are commented.
CV_Gpu
NppImageThresholdTest
CV_GpuNpp
ImageThreshold_test
;
CV_Gpu
ImageThresholdTest
CV_Gpu
ImageThreshold_test
;
CV_GpuNppImageResizeTest
CV_GpuNppImageResize_test
;
CV_GpuNppImageCopyMakeBorderTest
CV_GpuNppImageCopyMakeBorder_test
;
CV_GpuNppImageWarpAffineTest
CV_GpuNppImageWarpAffine_test
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment