Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6dfd8f18
Commit
6dfd8f18
authored
Jul 22, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rewrote matrix operations with cudev module
parent
2311b0b4
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
586 additions
and
1135 deletions
+586
-1135
CMakeLists.txt
modules/core/CMakeLists.txt
+2
-2
gpu_mat.cu
modules/core/src/cuda/gpu_mat.cu
+486
-0
matrix_operations.cu
modules/core/src/cuda/matrix_operations.cu
+0
-296
matrix_operations.hpp
modules/core/src/cuda/matrix_operations.hpp
+0
-57
gpu_mat.cpp
modules/core/src/gpu_mat.cpp
+98
-780
No files found.
modules/core/CMakeLists.txt
View file @
6dfd8f18
set
(
the_description
"The Core Functionality"
)
ocv_add_module
(
core
${
ZLIB_LIBRARIES
}
)
ocv_add_module
(
core
${
ZLIB_LIBRARIES
}
OPTIONAL opencv_cudev
)
ocv_module_include_directories
(
${
ZLIB_INCLUDE_DIR
}
)
if
(
HAVE_CUDA
)
ocv_warnings_disable
(
CMAKE_CXX_FLAGS -Wundef
)
ocv_warnings_disable
(
CMAKE_CXX_FLAGS -Wundef
-Wenum-compare -Wunused-function
)
endif
()
file
(
GLOB lib_cuda_hdrs
"include/opencv2/
${
name
}
/cuda/*.hpp"
"include/opencv2/
${
name
}
/cuda/*.h"
)
...
...
modules/core/src/cuda/gpu_mat.cu
0 → 100644
View file @
6dfd8f18
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/opencv_modules.hpp"
#ifndef HAVE_OPENCV_CUDEV
#error "opencv_cudev is required"
#else
#include "opencv2/core/gpu.hpp"
#include "opencv2/cudev.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace cv::cudev;
/////////////////////////////////////////////////////
/// create
void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
{
CV_DbgAssert( _rows >= 0 && _cols >= 0 );
_type &= Mat::TYPE_MASK;
if (rows == _rows && cols == _cols && type() == _type && data)
return;
if (data)
release();
if (_rows > 0 && _cols > 0)
{
flags = Mat::MAGIC_VAL + _type;
rows = _rows;
cols = _cols;
size_t esz = elemSize();
void* devPtr;
if (rows > 1 && cols > 1)
{
CV_CUDEV_SAFE_CALL( cudaMallocPitch(&devPtr, &step, esz * cols, rows) );
}
else
{
// Single row or single column must be continuous
CV_CUDEV_SAFE_CALL( cudaMalloc(&devPtr, esz * cols * rows) );
step = esz * cols;
}
if (esz * cols == step)
flags |= Mat::CONTINUOUS_FLAG;
int64 _nettosize = static_cast<int64>(step) * rows;
size_t nettosize = static_cast<size_t>(_nettosize);
datastart = data = static_cast<uchar*>(devPtr);
dataend = data + nettosize;
refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
*refcount = 1;
}
}
/////////////////////////////////////////////////////
/// release
void cv::gpu::GpuMat::release()
{
if (refcount && CV_XADD(refcount, -1) == 1)
{
cudaFree(datastart);
fastFree(refcount);
}
data = datastart = dataend = 0;
step = rows = cols = 0;
refcount = 0;
}
/////////////////////////////////////////////////////
/// upload
void cv::gpu::GpuMat::upload(InputArray arr)
{
Mat mat = arr.getMat();
CV_DbgAssert( !mat.empty() );
create(mat.size(), mat.type());
CV_CUDEV_SAFE_CALL( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
}
void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
{
Mat mat = arr.getMat();
CV_DbgAssert( !mat.empty() );
create(mat.size(), mat.type());
cudaStream_t stream = StreamAccessor::getStream(_stream);
CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice, stream) );
}
/////////////////////////////////////////////////////
/// download
void cv::gpu::GpuMat::download(OutputArray _dst) const
{
CV_DbgAssert( !empty() );
_dst.create(size(), type());
Mat dst = _dst.getMat();
CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
}
void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
{
CV_DbgAssert( !empty() );
_dst.create(size(), type());
Mat dst = _dst.getMat();
cudaStream_t stream = StreamAccessor::getStream(_stream);
CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost, stream) );
}
/////////////////////////////////////////////////////
/// copyTo
void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
{
CV_DbgAssert( !empty() );
_dst.create(size(), type());
GpuMat dst = _dst.getGpuMat();
CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
}
void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
{
CV_DbgAssert( !empty() );
_dst.create(size(), type());
GpuMat dst = _dst.getGpuMat();
cudaStream_t stream = StreamAccessor::getStream(_stream);
CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice, stream) );
}
namespace
{
template <size_t size> struct CopyToPolicy : DefaultTransformPolicy
{
};
template <> struct CopyToPolicy<4> : DefaultTransformPolicy
{
enum {
shift = 2
};
};
template <> struct CopyToPolicy<8> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
template <typename T>
void copyWithMask(const GpuMat& src, const GpuMat& dst, const GpuMat& mask, Stream& stream)
{
gridTransform_< CopyToPolicy<sizeof(typename VecTraits<T>::elem_type)> >(globPtr<T>(src), globPtr<T>(dst), identity<T>(), globPtr<uchar>(mask), stream);
}
}
void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& stream) const
{
CV_DbgAssert( !empty() );
CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
GpuMat mask = _mask.getGpuMat();
CV_DbgAssert( size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == channels()) );
_dst.create(size(), type());
GpuMat dst = _dst.getGpuMat();
typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[9][4] =
{
{0,0,0,0},
{copyWithMask<uchar>, copyWithMask<uchar2>, copyWithMask<uchar3>, copyWithMask<uchar4>},
{copyWithMask<ushort>, copyWithMask<ushort2>, copyWithMask<ushort3>, copyWithMask<ushort4>},
{0,0,0,0},
{copyWithMask<int>, copyWithMask<int2>, copyWithMask<int3>, copyWithMask<int4>},
{0,0,0,0},
{0,0,0,0},
{0,0,0,0},
{copyWithMask<double>, copyWithMask<double2>, copyWithMask<double3>, copyWithMask<double4>}
};
if (mask.channels() == channels())
{
const func_t func = funcs[elemSize1()][0];
CV_DbgAssert( func != 0 );
func(reshape(1), dst.reshape(1), mask.reshape(1), stream);
}
else
{
const func_t func = funcs[elemSize1()][channels() - 1];
CV_DbgAssert( func != 0 );
func(*this, dst, mask, stream);
}
}
/////////////////////////////////////////////////////
/// setTo
namespace
{
template <typename T>
void setToWithOutMask(const GpuMat& mat, Scalar _scalar, Stream& stream)
{
Scalar_<typename VecTraits<T>::elem_type> scalar = _scalar;
gridTransform(constantPtr(VecTraits<T>::make(scalar.val), mat.rows, mat.cols), globPtr<T>(mat), identity<T>(), stream);
}
template <typename T>
void setToWithMask(const GpuMat& mat, const GpuMat& mask, Scalar _scalar, Stream& stream)
{
Scalar_<typename VecTraits<T>::elem_type> scalar = _scalar;
gridTransform(constantPtr(VecTraits<T>::make(scalar.val), mat.rows, mat.cols), globPtr<T>(mat), identity<T>(), globPtr<uchar>(mask), stream);
}
}
GpuMat& cv::gpu::GpuMat::setTo(Scalar value, Stream& stream)
{
CV_DbgAssert( !empty() );
CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
if (value[0] == 0.0 && value[1] == 0.0 && value[2] == 0.0 && value[3] == 0.0)
{
// Zero fill
if (stream)
CV_CUDEV_SAFE_CALL( cudaMemset2DAsync(data, step, 0, cols * elemSize(), rows, StreamAccessor::getStream(stream)) );
else
CV_CUDEV_SAFE_CALL( cudaMemset2D(data, step, 0, cols * elemSize(), rows) );
return *this;
}
if (depth() == CV_8U)
{
const int cn = channels();
if (cn == 1
|| (cn == 2 && value[0] == value[1])
|| (cn == 3 && value[0] == value[1] && value[0] == value[2])
|| (cn == 4 && value[0] == value[1] && value[0] == value[2] && value[0] == value[3]))
{
const int val = cv::saturate_cast<uchar>(value[0]);
if (stream)
CV_CUDEV_SAFE_CALL( cudaMemset2DAsync(data, step, val, cols * elemSize(), rows, StreamAccessor::getStream(stream)) );
else
CV_CUDEV_SAFE_CALL( cudaMemset2D(data, step, val, cols * elemSize(), rows) );
return *this;
}
}
typedef void (*func_t)(const GpuMat& mat, Scalar scalar, Stream& stream);
static const func_t funcs[7][4] =
{
{setToWithOutMask<uchar>,setToWithOutMask<uchar2>,setToWithOutMask<uchar3>,setToWithOutMask<uchar4>},
{setToWithOutMask<schar>,setToWithOutMask<char2>,setToWithOutMask<char3>,setToWithOutMask<char4>},
{setToWithOutMask<ushort>,setToWithOutMask<ushort2>,setToWithOutMask<ushort3>,setToWithOutMask<ushort4>},
{setToWithOutMask<short>,setToWithOutMask<short2>,setToWithOutMask<short3>,setToWithOutMask<short4>},
{setToWithOutMask<int>,setToWithOutMask<int2>,setToWithOutMask<int3>,setToWithOutMask<int4>},
{setToWithOutMask<float>,setToWithOutMask<float2>,setToWithOutMask<float3>,setToWithOutMask<float4>},
{setToWithOutMask<double>,setToWithOutMask<double2>,setToWithOutMask<double3>,setToWithOutMask<double4>}
};
funcs[depth()][channels() - 1](*this, value, stream);
return *this;
}
GpuMat& cv::gpu::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
{
CV_DbgAssert( !empty() );
CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
GpuMat mask = _mask.getGpuMat();
CV_DbgAssert( size() == mask.size() && mask.type() == CV_8UC1 );
typedef void (*func_t)(const GpuMat& mat, const GpuMat& mask, Scalar scalar, Stream& stream);
static const func_t funcs[7][4] =
{
{setToWithMask<uchar>,setToWithMask<uchar2>,setToWithMask<uchar3>,setToWithMask<uchar4>},
{setToWithMask<schar>,setToWithMask<char2>,setToWithMask<char3>,setToWithMask<char4>},
{setToWithMask<ushort>,setToWithMask<ushort2>,setToWithMask<ushort3>,setToWithMask<ushort4>},
{setToWithMask<short>,setToWithMask<short2>,setToWithMask<short3>,setToWithMask<short4>},
{setToWithMask<int>,setToWithMask<int2>,setToWithMask<int3>,setToWithMask<int4>},
{setToWithMask<float>,setToWithMask<float2>,setToWithMask<float3>,setToWithMask<float4>},
{setToWithMask<double>,setToWithMask<double2>,setToWithMask<double3>,setToWithMask<double4>}
};
funcs[depth()][channels() - 1](*this, mask, value, stream);
return *this;
}
/////////////////////////////////////////////////////
/// convertTo
namespace
{
template <typename T> struct ConvertToPolicy : DefaultTransformPolicy
{
};
template <> struct ConvertToPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
template <typename T, typename D>
void convertToNoScale(const GpuMat& src, const GpuMat& dst, Stream& stream)
{
typedef typename VecTraits<T>::elem_type src_elem_type;
typedef typename VecTraits<D>::elem_type dst_elem_type;
typedef typename LargerType<src_elem_type, float>::type larger_elem_type;
typedef typename LargerType<float, dst_elem_type>::type scalar_type;
gridTransform_< ConvertToPolicy<scalar_type> >(globPtr<T>(src), globPtr<D>(dst), saturate_cast_func<T, D>(), stream);
}
template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
{
S alpha;
S beta;
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::parameter_type src) const
{
return cudev::saturate_cast<D>(alpha * src + beta);
}
};
template <typename T, typename D>
void convertToScale(const GpuMat& src, const GpuMat& dst, double alpha, double beta, Stream& stream)
{
typedef typename VecTraits<T>::elem_type src_elem_type;
typedef typename VecTraits<D>::elem_type dst_elem_type;
typedef typename LargerType<src_elem_type, float>::type larger_elem_type;
typedef typename LargerType<float, dst_elem_type>::type scalar_type;
Convertor<T, D, scalar_type> op;
op.alpha = cv::saturate_cast<scalar_type>(alpha);
op.beta = cv::saturate_cast<scalar_type>(beta);
gridTransform_< ConvertToPolicy<scalar_type> >(globPtr<T>(src), globPtr<D>(dst), op, stream);
}
}
void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) const
{
if (rtype < 0)
rtype = type();
else
rtype = CV_MAKE_TYPE(CV_MAT_DEPTH(rtype), channels());
const int sdepth = depth();
const int ddepth = CV_MAT_DEPTH(rtype);
if (sdepth == ddepth)
{
if (stream)
copyTo(_dst, stream);
else
copyTo(_dst);
return;
}
CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F );
GpuMat src = *this;
_dst.create(size(), rtype);
GpuMat dst = _dst.getGpuMat();
typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
static const func_t funcs[7][7] =
{
{0, convertToNoScale<uchar, schar>, convertToNoScale<uchar, ushort>, convertToNoScale<uchar, short>, convertToNoScale<uchar, int>, convertToNoScale<uchar, float>, convertToNoScale<uchar, double>},
{convertToNoScale<schar, uchar>, 0, convertToNoScale<schar, ushort>, convertToNoScale<schar, short>, convertToNoScale<schar, int>, convertToNoScale<schar, float>, convertToNoScale<schar, double>},
{convertToNoScale<ushort, uchar>, convertToNoScale<ushort, schar>, 0, convertToNoScale<ushort, short>, convertToNoScale<ushort, int>, convertToNoScale<ushort, float>, convertToNoScale<ushort, double>},
{convertToNoScale<short, uchar>, convertToNoScale<short, schar>, convertToNoScale<short, ushort>, 0, convertToNoScale<short, int>, convertToNoScale<short, float>, convertToNoScale<short, double>},
{convertToNoScale<int, uchar>, convertToNoScale<int, schar>, convertToNoScale<int, ushort>, convertToNoScale<int, short>, 0, convertToNoScale<int, float>, convertToNoScale<int, double>},
{convertToNoScale<float, uchar>, convertToNoScale<float, schar>, convertToNoScale<float, ushort>, convertToNoScale<float, short>, convertToNoScale<float, int>, 0, convertToNoScale<float, double>},
{convertToNoScale<double, uchar>, convertToNoScale<double, schar>, convertToNoScale<double, ushort>, convertToNoScale<double, short>, convertToNoScale<double, int>, convertToNoScale<double, float>, 0}
};
funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream);
}
void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const
{
if (rtype < 0)
rtype = type();
else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
const int sdepth = depth();
const int ddepth = CV_MAT_DEPTH(rtype);
GpuMat src = *this;
_dst.create(size(), rtype);
GpuMat dst = _dst.getGpuMat();
typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, double alpha, double beta, Stream& stream);
static const func_t funcs[7][7] =
{
{convertToScale<uchar, uchar>, convertToScale<uchar, schar>, convertToScale<uchar, ushort>, convertToScale<uchar, short>, convertToScale<uchar, int>, convertToScale<uchar, float>, convertToScale<uchar, double>},
{convertToScale<schar, uchar>, convertToScale<schar, schar>, convertToScale<schar, ushort>, convertToScale<schar, short>, convertToScale<schar, int>, convertToScale<schar, float>, convertToScale<schar, double>},
{convertToScale<ushort, uchar>, convertToScale<ushort, schar>, convertToScale<ushort, ushort>, convertToScale<ushort, short>, convertToScale<ushort, int>, convertToScale<ushort, float>, convertToScale<ushort, double>},
{convertToScale<short, uchar>, convertToScale<short, schar>, convertToScale<short, ushort>, convertToScale<short, short>, convertToScale<short, int>, convertToScale<short, float>, convertToScale<short, double>},
{convertToScale<int, uchar>, convertToScale<int, schar>, convertToScale<int, ushort>, convertToScale<int, short>, convertToScale<int, int>, convertToScale<int, float>, convertToScale<int, double>},
{convertToScale<float, uchar>, convertToScale<float, schar>, convertToScale<float, ushort>, convertToScale<float, short>, convertToScale<float, int>, convertToScale<float, float>, convertToScale<float, double>},
{convertToScale<double, uchar>, convertToScale<double, schar>, convertToScale<double, ushort>, convertToScale<double, short>, convertToScale<double, int>, convertToScale<double, float>, convertToScale<double, double>}
};
funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
}
#endif
modules/core/src/cuda/matrix_operations.cu
deleted
100644 → 0
View file @
2311b0b4
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/type_traits.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "matrix_operations.hpp"
namespace cv { namespace gpu { namespace cudev
{
///////////////////////////////////////////////////////////////////////////
// copyWithMask
template <typename T>
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
if (multiChannelMask)
cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMask(mask), stream);
else
cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
}
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
static const func_t tab[] =
{
0,
copyWithMask<uchar>,
copyWithMask<ushort>,
0,
copyWithMask<int>,
0,
0,
0,
copyWithMask<double>
};
const func_t func = tab[elemSize1];
CV_DbgAssert( func != 0 );
func(src, dst, cn, mask, multiChannelMask, stream);
}
///////////////////////////////////////////////////////////////////////////
// set
template<typename T, class Mask>
__global__ void set(PtrStepSz<T> mat, const Mask mask, const int channels, const typename TypeVec<T, 4>::vec_type value)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= mat.cols * channels || y >= mat.rows)
return;
const T scalar[4] = {value.x, value.y, value.z, value.w};
if (mask(y, x / channels))
mat(y, x) = scalar[x % channels];
}
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream)
{
typedef typename TypeVec<T, 4>::vec_type scalar_t;
dim3 block(32, 8);
dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
set<T><<<grid, block, 0, stream>>>(mat, WithOutMask(), channels, VecTraits<scalar_t>::make(scalar));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, int channels, cudaStream_t stream);
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
{
typedef typename TypeVec<T, 4>::vec_type scalar_t;
dim3 block(32, 8);
dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
set<T><<<grid, block, 0, stream>>>(mat, SingleMask(mask), channels, VecTraits<scalar_t>::make(scalar));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////
// convert
template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
{
Convertor(S alpha_, S beta_) : alpha(alpha_), beta(beta_) {}
__device__ __forceinline__ D operator()(typename TypeTraits<T>::ParameterType src) const
{
return saturate_cast<D>(alpha * src + beta);
}
S alpha, beta;
};
namespace detail
{
template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct ConvertTraitsDispatcher<1, 1, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 8 };
};
template <typename F> struct ConvertTraitsDispatcher<1, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<1, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<2, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<2, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 2 };
};
template <typename F> struct ConvertTraitsDispatcher<4, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<4, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 2 };
};
template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
{
};
}
template <typename T, typename D, typename S> struct TransformFunctorTraits< Convertor<T, D, S> > : detail::ConvertTraits< Convertor<T, D, S> >
{
};
template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
static const caller_t tab[7][7] =
{
{
cvt_<uchar, uchar, float>,
cvt_<uchar, schar, float>,
cvt_<uchar, ushort, float>,
cvt_<uchar, short, float>,
cvt_<uchar, int, float>,
cvt_<uchar, float, float>,
cvt_<uchar, double, double>
},
{
cvt_<schar, uchar, float>,
cvt_<schar, schar, float>,
cvt_<schar, ushort, float>,
cvt_<schar, short, float>,
cvt_<schar, int, float>,
cvt_<schar, float, float>,
cvt_<schar, double, double>
},
{
cvt_<ushort, uchar, float>,
cvt_<ushort, schar, float>,
cvt_<ushort, ushort, float>,
cvt_<ushort, short, float>,
cvt_<ushort, int, float>,
cvt_<ushort, float, float>,
cvt_<ushort, double, double>
},
{
cvt_<short, uchar, float>,
cvt_<short, schar, float>,
cvt_<short, ushort, float>,
cvt_<short, short, float>,
cvt_<short, int, float>,
cvt_<short, float, float>,
cvt_<short, double, double>
},
{
cvt_<int, uchar, float>,
cvt_<int, schar, float>,
cvt_<int, ushort, float>,
cvt_<int, short, float>,
cvt_<int, int, double>,
cvt_<int, float, double>,
cvt_<int, double, double>
},
{
cvt_<float, uchar, float>,
cvt_<float, schar, float>,
cvt_<float, ushort, float>,
cvt_<float, short, float>,
cvt_<float, int, float>,
cvt_<float, float, float>,
cvt_<float, double, double>
},
{
cvt_<double, uchar, double>,
cvt_<double, schar, double>,
cvt_<double, ushort, double>,
cvt_<double, short, double>,
cvt_<double, int, double>,
cvt_<double, float, double>,
cvt_<double, double, double>
}
};
const caller_t func = tab[sdepth][ddepth];
func(src, dst, alpha, beta, stream);
}
}}} // namespace cv { namespace gpu { namespace cudev
modules/core/src/cuda/matrix_operations.hpp
deleted
100644 → 0
View file @
2311b0b4
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda/common.hpp"
namespace
cv
{
namespace
gpu
{
namespace
cudev
{
void
copyWithMask
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
size_t
elemSize1
,
int
cn
,
PtrStepSzb
mask
,
bool
multiChannelMask
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set
(
PtrStepSz
<
T
>
mat
,
const
T
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set
(
PtrStepSz
<
T
>
mat
,
const
T
*
scalar
,
PtrStepSzb
mask
,
int
channels
,
cudaStream_t
stream
);
void
convert
(
PtrStepSzb
src
,
int
sdepth
,
PtrStepSzb
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
);
}}}
modules/core/src/gpu_mat.cpp
View file @
6dfd8f18
...
...
@@ -46,504 +46,6 @@
using
namespace
cv
;
using
namespace
cv
::
gpu
;
/////////////////////////// matrix operations /////////////////////////
#ifdef HAVE_CUDA
// CUDA implementation
#include "cuda/matrix_operations.hpp"
namespace
{
template
<
typename
T
>
void
cudaSet_
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
cudev
::
set
<
T
>
(
PtrStepSz
<
T
>
(
src
),
sf
.
val
,
src
.
channels
(),
stream
);
}
void
cudaSet
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
cudaSet_
<
uchar
>
,
cudaSet_
<
schar
>
,
cudaSet_
<
ushort
>
,
cudaSet_
<
short
>
,
cudaSet_
<
int
>
,
cudaSet_
<
float
>
,
cudaSet_
<
double
>
};
funcs
[
src
.
depth
()](
src
,
s
,
stream
);
}
template
<
typename
T
>
void
cudaSet_
(
GpuMat
&
src
,
Scalar
s
,
PtrStepSzb
mask
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
cudev
::
set
<
T
>
(
PtrStepSz
<
T
>
(
src
),
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
}
void
cudaSet
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
PtrStepSzb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
cudaSet_
<
uchar
>
,
cudaSet_
<
schar
>
,
cudaSet_
<
ushort
>
,
cudaSet_
<
short
>
,
cudaSet_
<
int
>
,
cudaSet_
<
float
>
,
cudaSet_
<
double
>
};
funcs
[
src
.
depth
()](
src
,
s
,
mask
,
stream
);
}
void
cudaCopyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
cudev
::
copyWithMask
(
src
.
reshape
(
1
),
dst
.
reshape
(
1
),
src
.
elemSize1
(),
src
.
channels
(),
mask
.
reshape
(
1
),
mask
.
channels
()
!=
1
,
stream
);
}
void
cudaConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
cudev
::
convert
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
,
stream
);
}
void
cudaConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
)
{
cudev
::
convert
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
alpha
,
beta
,
stream
);
}
}
// NPP implementation
namespace
{
//////////////////////////////////////////////////////////////////////////
// Convert
template
<
int
SDEPTH
,
int
DDEPTH
>
struct
NppConvertFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DDEPTH
>
struct
NppConvertFunc
<
CV_32F
,
DDEPTH
>
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
NppRoundMode
eRoundMode
);
};
template
<
int
SDEPTH
,
int
DDEPTH
,
typename
NppConvertFunc
<
SDEPTH
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
dst_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
DDEPTH
,
typename
NppConvertFunc
<
CV_32F
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
<
CV_32F
,
DDEPTH
,
func
>
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
dst_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
,
NPP_RND_NEAR
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
//////////////////////////////////////////////////////////////////////////
// Set
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
values
[],
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
>
struct
NppSetFunc
<
SDEPTH
,
1
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SCN
>
struct
NppSetFunc
<
CV_8S
,
SCN
>
{
typedef
NppStatus
(
*
func_ptr
)(
Npp8s
values
[],
Npp8s
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppSetFunc
<
CV_8S
,
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
Npp8s
val
,
Npp8s
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSet
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
typename
NppSetFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSet
<
SDEPTH
,
1
,
func
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
values
[],
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
>
struct
NppSetMaskFunc
<
SDEPTH
,
1
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetMaskFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSetMask
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
typename
NppSetMaskFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSetMask
<
SDEPTH
,
1
,
func
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
//////////////////////////////////////////////////////////////////////////
// CopyMasked
template
<
int
SDEPTH
>
struct
NppCopyWithMaskFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
src_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
typename
NppCopyWithMaskFunc
<
SDEPTH
>::
func_ptr
func
>
struct
NppCopyWithMask
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
}
// Dispatcher
namespace
{
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
)
{
CV_DbgAssert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
type
()
==
dst
.
type
()
);
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
src
.
size
()
==
mask
.
size
()
&&
mask
.
depth
()
==
CV_8U
&&
(
mask
.
channels
()
==
1
||
mask
.
channels
()
==
src
.
channels
())
);
if
(
src
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
/* 8U */
{
NppCopyWithMask
<
CV_8U
,
nppiCopy_8u_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_8U
,
nppiCopy_8u_C3MR
>::
call
,
NppCopyWithMask
<
CV_8U
,
nppiCopy_8u_C4MR
>::
call
},
/* 8S */
{
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
},
/* 16U */
{
NppCopyWithMask
<
CV_16U
,
nppiCopy_16u_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_16U
,
nppiCopy_16u_C3MR
>::
call
,
NppCopyWithMask
<
CV_16U
,
nppiCopy_16u_C4MR
>::
call
},
/* 16S */
{
NppCopyWithMask
<
CV_16S
,
nppiCopy_16s_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_16S
,
nppiCopy_16s_C3MR
>::
call
,
NppCopyWithMask
<
CV_16S
,
nppiCopy_16s_C4MR
>::
call
},
/* 32S */
{
NppCopyWithMask
<
CV_32S
,
nppiCopy_32s_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_32S
,
nppiCopy_32s_C3MR
>::
call
,
NppCopyWithMask
<
CV_32S
,
nppiCopy_32s_C4MR
>::
call
},
/* 32F */
{
NppCopyWithMask
<
CV_32F
,
nppiCopy_32f_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_32F
,
nppiCopy_32f_C3MR
>::
call
,
NppCopyWithMask
<
CV_32F
,
nppiCopy_32f_C4MR
>::
call
},
/* 64F */
{
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
}
};
const
func_t
func
=
mask
.
channels
()
==
src
.
channels
()
?
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
]
:
cudaCopyWithMask
;
func
(
src
,
dst
,
mask
,
stream
);
}
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
=
0
)
{
CV_DbgAssert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
channels
()
==
dst
.
channels
()
);
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
dst
.
depth
()
<=
CV_64F
);
if
(
src
.
depth
()
==
CV_64F
||
dst
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
][
4
]
=
{
{
/* 8U -> 8U */
{
0
,
0
,
0
,
0
},
/* 8U -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8U -> 16U */
{
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C4R
>::
call
},
/* 8U -> 16S */
{
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C4R
>::
call
},
/* 8U -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8U -> 32F */
{
NppCvt
<
CV_8U
,
CV_32F
,
nppiConvert_8u32f_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8U -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 8S -> 8U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 8S */
{
0
,
0
,
0
,
0
},
/* 8S -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 32F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 16U -> 8U */
{
NppCvt
<
CV_16U
,
CV_8U
,
nppiConvert_16u8u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_16U
,
CV_8U
,
nppiConvert_16u8u_C4R
>::
call
},
/* 16U -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 16U */
{
0
,
0
,
0
,
0
},
/* 16U -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 32S */
{
NppCvt
<
CV_16U
,
CV_32S
,
nppiConvert_16u32s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 32F */
{
NppCvt
<
CV_16U
,
CV_32F
,
nppiConvert_16u32f_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 16S -> 8U */
{
NppCvt
<
CV_16S
,
CV_8U
,
nppiConvert_16s8u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_16S
,
CV_8U
,
nppiConvert_16s8u_C4R
>::
call
},
/* 16S -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 16S */
{
0
,
0
,
0
,
0
},
/* 16S -> 32S */
{
NppCvt
<
CV_16S
,
CV_32S
,
nppiConvert_16s32s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 32F */
{
NppCvt
<
CV_16S
,
CV_32F
,
nppiConvert_16s32f_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 32S -> 8U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 32S */
{
0
,
0
,
0
,
0
},
/* 32S -> 32F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 32F -> 8U */
{
NppCvt
<
CV_32F
,
CV_8U
,
nppiConvert_32f8u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 16U */
{
NppCvt
<
CV_32F
,
CV_16U
,
nppiConvert_32f16u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 16S */
{
NppCvt
<
CV_32F
,
CV_16S
,
nppiConvert_32f16s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 32F */
{
0
,
0
,
0
,
0
},
/* 32F -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 64F -> 8U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 32F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 64F */
{
0
,
0
,
0
,
0
}
}
};
const
bool
aligned
=
isAligned
(
src
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
);
if
(
!
aligned
)
{
cudaConvert
(
src
,
dst
,
stream
);
return
;
}
const
func_t
func
=
funcs
[
src
.
depth
()][
dst
.
depth
()][
src
.
channels
()
-
1
];
CV_DbgAssert
(
func
!=
0
);
func
(
src
,
dst
,
stream
);
}
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
)
{
CV_DbgAssert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
channels
()
==
dst
.
channels
()
);
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
dst
.
depth
()
<=
CV_64F
);
if
(
src
.
depth
()
==
CV_64F
||
dst
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
cudaConvert
(
src
,
dst
,
alpha
,
beta
,
stream
);
}
void
set
(
GpuMat
&
m
,
Scalar
s
,
cudaStream_t
stream
=
0
)
{
if
(
s
[
0
]
==
0.0
&&
s
[
1
]
==
0.0
&&
s
[
2
]
==
0.0
&&
s
[
3
]
==
0.0
)
{
if
(
stream
)
cudaSafeCall
(
cudaMemset2DAsync
(
m
.
data
,
m
.
step
,
0
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
,
stream
)
);
else
cudaSafeCall
(
cudaMemset2D
(
m
.
data
,
m
.
step
,
0
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
)
);
return
;
}
if
(
m
.
depth
()
==
CV_8U
)
{
int
cn
=
m
.
channels
();
if
(
cn
==
1
||
(
cn
==
2
&&
s
[
0
]
==
s
[
1
])
||
(
cn
==
3
&&
s
[
0
]
==
s
[
1
]
&&
s
[
0
]
==
s
[
2
])
||
(
cn
==
4
&&
s
[
0
]
==
s
[
1
]
&&
s
[
0
]
==
s
[
2
]
&&
s
[
0
]
==
s
[
3
]))
{
int
val
=
saturate_cast
<
uchar
>
(
s
[
0
]);
if
(
stream
)
cudaSafeCall
(
cudaMemset2DAsync
(
m
.
data
,
m
.
step
,
val
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
,
stream
)
);
else
cudaSafeCall
(
cudaMemset2D
(
m
.
data
,
m
.
step
,
val
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
)
);
return
;
}
}
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
NppSet
<
CV_8U
,
1
,
nppiSet_8u_C1R
>::
call
,
cudaSet
,
cudaSet
,
NppSet
<
CV_8U
,
4
,
nppiSet_8u_C4R
>::
call
},
{
NppSet
<
CV_8S
,
1
,
nppiSet_8s_C1R
>::
call
,
NppSet
<
CV_8S
,
2
,
nppiSet_8s_C2R
>::
call
,
NppSet
<
CV_8S
,
3
,
nppiSet_8s_C3R
>::
call
,
NppSet
<
CV_8S
,
4
,
nppiSet_8s_C4R
>::
call
},
{
NppSet
<
CV_16U
,
1
,
nppiSet_16u_C1R
>::
call
,
NppSet
<
CV_16U
,
2
,
nppiSet_16u_C2R
>::
call
,
cudaSet
,
NppSet
<
CV_16U
,
4
,
nppiSet_16u_C4R
>::
call
},
{
NppSet
<
CV_16S
,
1
,
nppiSet_16s_C1R
>::
call
,
NppSet
<
CV_16S
,
2
,
nppiSet_16s_C2R
>::
call
,
cudaSet
,
NppSet
<
CV_16S
,
4
,
nppiSet_16s_C4R
>::
call
},
{
NppSet
<
CV_32S
,
1
,
nppiSet_32s_C1R
>::
call
,
cudaSet
,
cudaSet
,
NppSet
<
CV_32S
,
4
,
nppiSet_32s_C4R
>::
call
},
{
NppSet
<
CV_32F
,
1
,
nppiSet_32f_C1R
>::
call
,
cudaSet
,
cudaSet
,
NppSet
<
CV_32F
,
4
,
nppiSet_32f_C4R
>::
call
},
{
cudaSet
,
cudaSet
,
cudaSet
,
cudaSet
}
};
CV_Assert
(
m
.
depth
()
<=
CV_64F
&&
m
.
channels
()
<=
4
);
if
(
m
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
funcs
[
m
.
depth
()][
m
.
channels
()
-
1
](
m
,
s
,
stream
);
}
void
set
(
GpuMat
&
m
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
)
{
CV_DbgAssert
(
!
mask
.
empty
()
);
CV_Assert
(
m
.
depth
()
<=
CV_64F
&&
m
.
channels
()
<=
4
);
if
(
m
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
NppSetMask
<
CV_8U
,
1
,
nppiSet_8u_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_8U
,
4
,
nppiSet_8u_C4MR
>::
call
},
{
cudaSet
,
cudaSet
,
cudaSet
,
cudaSet
},
{
NppSetMask
<
CV_16U
,
1
,
nppiSet_16u_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_16U
,
4
,
nppiSet_16u_C4MR
>::
call
},
{
NppSetMask
<
CV_16S
,
1
,
nppiSet_16s_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_16S
,
4
,
nppiSet_16s_C4MR
>::
call
},
{
NppSetMask
<
CV_32S
,
1
,
nppiSet_32s_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_32S
,
4
,
nppiSet_32s_C4MR
>::
call
},
{
NppSetMask
<
CV_32F
,
1
,
nppiSet_32f_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_32F
,
4
,
nppiSet_32f_C4MR
>::
call
},
{
cudaSet
,
cudaSet
,
cudaSet
,
cudaSet
}
};
funcs
[
m
.
depth
()][
m
.
channels
()
-
1
](
m
,
s
,
mask
,
stream
);
}
}
#endif // HAVE_CUDA
cv
::
gpu
::
GpuMat
::
GpuMat
(
int
rows_
,
int
cols_
,
int
type_
,
void
*
data_
,
size_t
step_
)
:
flags
(
Mat
::
MAGIC_VAL
+
(
type_
&
Mat
::
TYPE_MASK
)),
rows
(
rows_
),
cols
(
cols_
),
step
(
step_
),
data
((
uchar
*
)
data_
),
refcount
(
0
),
...
...
@@ -651,288 +153,6 @@ cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
rows
=
cols
=
0
;
}
void
cv
::
gpu
::
GpuMat
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
{
#ifndef HAVE_CUDA
(
void
)
_rows
;
(
void
)
_cols
;
(
void
)
_type
;
throw_no_cuda
();
#else
_type
&=
Mat
::
TYPE_MASK
;
if
(
rows
==
_rows
&&
cols
==
_cols
&&
type
()
==
_type
&&
data
)
return
;
if
(
data
)
release
();
CV_DbgAssert
(
_rows
>=
0
&&
_cols
>=
0
);
if
(
_rows
>
0
&&
_cols
>
0
)
{
flags
=
Mat
::
MAGIC_VAL
+
_type
;
rows
=
_rows
;
cols
=
_cols
;
size_t
esz
=
elemSize
();
void
*
devPtr
;
if
(
rows
>
1
&&
cols
>
1
)
{
cudaSafeCall
(
cudaMallocPitch
(
&
devPtr
,
&
step
,
esz
*
cols
,
rows
)
);
}
else
{
// Single row or single column must be continuous
cudaSafeCall
(
cudaMalloc
(
&
devPtr
,
esz
*
cols
*
rows
)
);
step
=
esz
*
cols
;
}
if
(
esz
*
cols
==
step
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
int64
_nettosize
=
static_cast
<
int64
>
(
step
)
*
rows
;
size_t
nettosize
=
static_cast
<
size_t
>
(
_nettosize
);
datastart
=
data
=
static_cast
<
uchar
*>
(
devPtr
);
dataend
=
data
+
nettosize
;
refcount
=
static_cast
<
int
*>
(
fastMalloc
(
sizeof
(
*
refcount
)));
*
refcount
=
1
;
}
#endif
}
void
cv
::
gpu
::
GpuMat
::
release
()
{
#ifdef HAVE_CUDA
if
(
refcount
&&
CV_XADD
(
refcount
,
-
1
)
==
1
)
{
cudaFree
(
datastart
);
fastFree
(
refcount
);
}
data
=
datastart
=
dataend
=
0
;
step
=
rows
=
cols
=
0
;
refcount
=
0
;
#endif
}
void
cv
::
gpu
::
GpuMat
::
upload
(
InputArray
arr
)
{
#ifndef HAVE_CUDA
(
void
)
arr
;
throw_no_cuda
();
#else
Mat
mat
=
arr
.
getMat
();
CV_DbgAssert
(
!
mat
.
empty
()
);
create
(
mat
.
size
(),
mat
.
type
());
cudaSafeCall
(
cudaMemcpy2D
(
data
,
step
,
mat
.
data
,
mat
.
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyHostToDevice
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
upload
(
InputArray
arr
,
Stream
&
_stream
)
{
#ifndef HAVE_CUDA
(
void
)
arr
;
(
void
)
_stream
;
throw_no_cuda
();
#else
Mat
mat
=
arr
.
getMat
();
CV_DbgAssert
(
!
mat
.
empty
()
);
create
(
mat
.
size
(),
mat
.
type
());
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
cudaSafeCall
(
cudaMemcpy2DAsync
(
data
,
step
,
mat
.
data
,
mat
.
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyHostToDevice
,
stream
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
download
(
OutputArray
_dst
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
_dst
.
create
(
size
(),
type
());
Mat
dst
=
_dst
.
getMat
();
cudaSafeCall
(
cudaMemcpy2D
(
dst
.
data
,
dst
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToHost
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
download
(
OutputArray
_dst
,
Stream
&
_stream
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
_stream
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
_dst
.
create
(
size
(),
type
());
Mat
dst
=
_dst
.
getMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToHost
,
stream
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
OutputArray
_dst
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
_dst
.
create
(
size
(),
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
cudaSafeCall
(
cudaMemcpy2D
(
dst
.
data
,
dst
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToDevice
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
OutputArray
_dst
,
Stream
&
_stream
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
_stream
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
_dst
.
create
(
size
(),
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToDevice
,
stream
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
OutputArray
_dst
,
InputArray
_mask
,
Stream
&
_stream
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
_mask
;
(
void
)
_stream
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
_dst
.
create
(
size
(),
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
mask
=
_mask
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
::
copyWithMask
(
*
this
,
dst
,
mask
,
stream
);
#endif
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
Scalar
s
,
Stream
&
_stream
)
{
#ifndef HAVE_CUDA
(
void
)
s
;
(
void
)
_stream
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
::
set
(
*
this
,
s
,
stream
);
#endif
return
*
this
;
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
Scalar
s
,
InputArray
_mask
,
Stream
&
_stream
)
{
#ifndef HAVE_CUDA
(
void
)
s
;
(
void
)
_mask
;
(
void
)
_stream
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
GpuMat
mask
=
_mask
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
::
set
(
*
this
,
s
,
mask
,
stream
);
#endif
return
*
this
;
}
void
cv
::
gpu
::
GpuMat
::
convertTo
(
OutputArray
_dst
,
int
rtype
,
Stream
&
_stream
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
rtype
;
(
void
)
_stream
;
throw_no_cuda
();
#else
if
(
rtype
<
0
)
rtype
=
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
channels
());
const
int
sdepth
=
depth
();
const
int
ddepth
=
CV_MAT_DEPTH
(
rtype
);
if
(
sdepth
==
ddepth
)
{
if
(
_stream
)
copyTo
(
_dst
,
_stream
);
else
copyTo
(
_dst
);
return
;
}
GpuMat
src
=
*
this
;
_dst
.
create
(
size
(),
rtype
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
::
convert
(
src
,
dst
,
stream
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
convertTo
(
OutputArray
_dst
,
int
rtype
,
double
alpha
,
double
beta
,
Stream
&
_stream
)
const
{
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
rtype
;
(
void
)
alpha
;
(
void
)
beta
;
(
void
)
_stream
;
throw_no_cuda
();
#else
if
(
rtype
<
0
)
rtype
=
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
channels
());
GpuMat
src
=
*
this
;
_dst
.
create
(
size
(),
rtype
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
::
convert
(
src
,
dst
,
alpha
,
beta
,
stream
);
#endif
}
GpuMat
cv
::
gpu
::
GpuMat
::
reshape
(
int
new_cn
,
int
new_rows
)
const
{
GpuMat
hdr
=
*
this
;
...
...
@@ -1124,3 +344,101 @@ GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
return
mat
=
GpuMat
(
rows
,
cols
,
type
);
}
#ifndef HAVE_CUDA
void
cv
::
gpu
::
GpuMat
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
{
(
void
)
_rows
;
(
void
)
_cols
;
(
void
)
_type
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
release
()
{
}
void
cv
::
gpu
::
GpuMat
::
upload
(
InputArray
arr
)
{
(
void
)
arr
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
upload
(
InputArray
arr
,
Stream
&
_stream
)
{
(
void
)
arr
;
(
void
)
_stream
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
download
(
OutputArray
_dst
)
const
{
(
void
)
_dst
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
download
(
OutputArray
_dst
,
Stream
&
_stream
)
const
{
(
void
)
_dst
;
(
void
)
_stream
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
OutputArray
_dst
)
const
{
(
void
)
_dst
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
OutputArray
_dst
,
Stream
&
_stream
)
const
{
(
void
)
_dst
;
(
void
)
_stream
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
OutputArray
_dst
,
InputArray
_mask
,
Stream
&
_stream
)
const
{
(
void
)
_dst
;
(
void
)
_mask
;
(
void
)
_stream
;
throw_no_cuda
();
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
Scalar
s
,
Stream
&
_stream
)
{
(
void
)
s
;
(
void
)
_stream
;
throw_no_cuda
();
return
*
this
;
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
Scalar
s
,
InputArray
_mask
,
Stream
&
_stream
)
{
(
void
)
s
;
(
void
)
_mask
;
(
void
)
_stream
;
throw_no_cuda
();
return
*
this
;
}
void
cv
::
gpu
::
GpuMat
::
convertTo
(
OutputArray
_dst
,
int
rtype
,
Stream
&
_stream
)
const
{
(
void
)
_dst
;
(
void
)
rtype
;
(
void
)
_stream
;
throw_no_cuda
();
}
void
cv
::
gpu
::
GpuMat
::
convertTo
(
OutputArray
_dst
,
int
rtype
,
double
alpha
,
double
beta
,
Stream
&
_stream
)
const
{
(
void
)
_dst
;
(
void
)
rtype
;
(
void
)
alpha
;
(
void
)
beta
;
(
void
)
_stream
;
throw_no_cuda
();
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment