Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
cc34a8ac
Commit
cc34a8ac
authored
Apr 16, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new implementation for GpuMat::setTo (without constant memory)
parent
69be49ba
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
99 deletions
+31
-99
matrix_operations.cu
modules/core/src/cuda/matrix_operations.cu
+31
-99
No files found.
modules/core/src/cuda/matrix_operations.cu
View file @
cc34a8ac
...
...
@@ -44,42 +44,33 @@
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/type_traits.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "matrix_operations.hpp"
namespace cv { namespace gpu { namespace cudev
{
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// copyWithMask
template <typename T>
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
if (multiChannelMask)
cv::gpu::cudev::transform((PtrStepSz<T>)
src, (PtrStepSz<T>)
dst, identity<T>(), SingleMask(mask), stream);
cv::gpu::cudev::transform((PtrStepSz<T>)
src, (PtrStepSz<T>)
dst, identity<T>(), SingleMask(mask), stream);
else
cv::gpu::cudev::transform((PtrStepSz<T>)
src, (PtrStepSz<T>)
dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
cv::gpu::cudev::transform((PtrStepSz<T>)
src, (PtrStepSz<T>)
dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
}
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
static func_t tab[] =
static
const
func_t tab[] =
{
0,
copyWithMask<u
nsigned
char>,
copyWithMask<u
nsigned
short>,
copyWithMask<uchar>,
copyWithMask<ushort>,
0,
copyWithMask<int>,
0,
...
...
@@ -88,81 +79,39 @@ namespace cv { namespace gpu { namespace cudev
copyWithMask<double>
};
tab[elemSize1](src, dst, cn, mask, multiChannelMask, stream);
const func_t func = tab[elemSize1];
CV_DbgAssert( func != 0 );
func(src, dst, cn, mask, multiChannelMask, stream);
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// set
__constant__ uchar scalar_8u[4];
__constant__ schar scalar_8s[4];
__constant__ ushort scalar_16u[4];
__constant__ short scalar_16s[4];
__constant__ int scalar_32s[4];
__constant__ float scalar_32f[4];
__constant__ double scalar_64f[4];
template <typename T> __device__ __forceinline__ T readScalar(int i);
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
static inline void writeScalar(const uchar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
static inline void writeScalar(const schar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
static inline void writeScalar(const ushort* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
static inline void writeScalar(const short* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
static inline void writeScalar(const int* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
static inline void writeScalar(const float* vals)
template<typename T, class Mask>
__global__ void set(PtrStepSz<T> mat, const Mask mask, const int channels, const typename TypeVec<T, 4>::vec_type value)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
static inline void writeScalar(const double* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
template<typename T>
__global__ void set(T* mat, int cols, int rows, size_t step, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= mat.cols * channels || y >= mat.rows)
return;
if ((x < cols * channels ) && (y < rows))
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
const T scalar[4] = {value.x, value.y, value.z, value.w};
if (mask(y, x / channels))
mat(y, x) = scalar[x % channels];
}
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream)
{
writeScalar(scalar)
;
typedef typename TypeVec<T, 4>::vec_type scalar_t
;
dim3
threadsPerBlock(32, 8, 1
);
dim3
numBlocks(mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1
);
dim3
block(32, 8
);
dim3
grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y)
);
set<T><<<
numBlocks, threadsPerBlock, 0, stream>>>(mat.data, mat.cols, mat.rows, mat.step, channels
);
set<T><<<
grid, block, 0, stream>>>(mat, WithOutMask(), channels, VecTraits<scalar_t>::make(scalar)
);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
@@ -177,29 +126,15 @@ namespace cv { namespace gpu { namespace cudev
template void set<float >(PtrStepSz<float > mat, const float* scalar, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, int channels, cudaStream_t stream);
template<typename T>
__global__ void set(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
}
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
{
writeScalar(scalar)
;
typedef typename TypeVec<T, 4>::vec_type scalar_t
;
dim3
threadsPerBlock(32, 8, 1
);
dim3
numBlocks(mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1
);
dim3
block(32, 8
);
dim3
grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y)
);
set<T><<<
numBlocks, threadsPerBlock, 0, stream>>>(mat.data, mask.data, mat.cols, mat.rows, mat.step, channels, mask.step
);
set<T><<<
grid, block, 0, stream>>>(mat, SingleMask(mask), channels, VecTraits<scalar_t>::make(scalar)
);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
@@ -215,8 +150,7 @@ namespace cv { namespace gpu { namespace cudev
template void set<double>(PtrStepSz<double> mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// convert
template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
{
...
...
@@ -281,8 +215,6 @@ namespace cv { namespace gpu { namespace cudev
template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment