Commit da1526aa authored by Andrey Morozov's avatar Andrey Morozov

Added implementation SetTo() without mask

parent 2de66d08
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
// //
//M*/ //M*/
#include <stddef.h>
#include "cuda_shared.hpp" #include "cuda_shared.hpp"
#include "cuda_runtime.h" #include "cuda_runtime.h"
...@@ -47,11 +48,30 @@ __constant__ float scalar_d[4]; ...@@ -47,11 +48,30 @@ __constant__ float scalar_d[4];
namespace mat_operators namespace mat_operators
{ {
template <typename T, int channels, int count = channels>
struct unroll
{
__device__ static void unroll_set(T * mat, size_t i)
{
mat[i] = static_cast<T>(scalar_d[i % channels]);
unroll<T, channels, count - 1>::unroll_set(mat, i+1);
}
};
template <typename T, int channels>
struct unroll<T,channels,0>
{
__device__ static void unroll_set(T * , size_t){}
};
template <typename T, int channels> template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat) __global__ void kernel_set_to_without_mask(T * mat)
{ {
int i = blockIdx.x * blockDim.x + threadIdx.x; size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
mat[i * sizeof(T)] = static_cast<T>(scalar_d[i % channels]); unroll<T, channels>::unroll_set(mat, i);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment