Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bed8cb42
Commit
bed8cb42
authored
Mar 02, 2015
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3768 from jet47:gpumat-set-to
parents
2c9547e3
4f5d30a8
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
67 deletions
+40
-67
matrix_operations.cu
modules/dynamicuda/src/cuda/matrix_operations.cu
+40
-67
No files found.
modules/dynamicuda/src/cuda/matrix_operations.cu
View file @
bed8cb42
...
...
@@ -44,6 +44,7 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/type_traits.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
namespace cv { namespace gpu { namespace device
{
...
...
@@ -105,87 +106,59 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
__constant__ uchar scalar_8u[4];
__constant__ schar scalar_8s[4];
__constant__ ushort scalar_16u[4];
__constant__ short scalar_16s[4];
__constant__ int scalar_32s[4];
__constant__ float scalar_32f[4];
__constant__ double scalar_64f[4];
template <typename T> __device__ __forceinline__ T readScalar(int i);
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
void writeScalar(const uchar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
void writeScalar(const schar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
void writeScalar(const ushort* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
void writeScalar(const short* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
void writeScalar(const int* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
void writeScalar(const float* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
void writeScalar(const double* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
template<typename T>
__global__ void set_to_without_mask(
T* mat, int cols, int rows, size_t step
, int channels)
__global__ void set_to_without_mask(
PtrStepSz<T> mat, typename TypeVec<T, 4>::vec_type val
, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
const int y = blockIdx.x * blockDim.y + threadIdx.y;
if (
(x < cols * channels ) && (y < rows)
)
if (
y < mat.rows
)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
const T vals[] = {
val.x, val.y, val.z, val.w
};
T* row = mat.ptr(y);
for (int x = threadIdx.x; x < mat.cols * channels; x += 32)
{
row[x] = vals[x % channels];
}
}
}
template<typename T>
__global__ void set_to_with_mask(
T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask
)
__global__ void set_to_with_mask(
PtrStepSz<T> mat, const PtrStepb mask, typename TypeVec<T, 4>::vec_type val, int channels
)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
const int y = blockIdx.x * blockDim.y + threadIdx.y;
if (y < mat.rows)
{
const T vals[] = {
val.x, val.y, val.z, val.w
};
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
T* row = mat.ptr(y);
const uchar* mask_row = mask.ptr(y);
for (int x = threadIdx.x; x < mat.cols * channels; x += 32)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
if (mask_row[x / channels])
{
row[x] = vals[x % channels];
}
}
}
}
template <typename T>
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
{
writeScalar(scalar)
;
typedef typename TypeVec<T, 4>::vec_type vec_type
;
dim3
threadsPerBlock(32, 8, 1
);
dim3
numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1
);
dim3
block(32, 8
);
dim3
grid(divUp(mat.rows, block.y)
);
set_to_with_mask<T><<<
numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step
);
set_to_with_mask<T><<<
grid, block, 0, stream>>>(PtrStepSz<T>(mat), mask, VecTraits<vec_type>::make(scalar), channels
);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
@@ -203,12 +176,12 @@ namespace cv { namespace gpu { namespace device
template <typename T>
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream)
{
writeScalar(scalar)
;
typedef typename TypeVec<T, 4>::vec_type vec_type
;
dim3
threadsPerBlock(32, 8, 1
);
dim3
numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1
);
dim3
block(32, 8
);
dim3
grid(divUp(mat.rows, block.y)
);
set_to_without_mask<T><<<
numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step
, channels);
set_to_without_mask<T><<<
grid, block, 0, stream>>>(PtrStepSz<T>(mat), VecTraits<vec_type>::make(scalar)
, channels);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment