Commit 2fe75ea3 authored by Andrey Morozov's avatar Andrey Morozov

fixed implementation of gpumat::setTo()

parent f37ac8e4
...@@ -76,19 +76,21 @@ namespace mat_operators ...@@ -76,19 +76,21 @@ namespace mat_operators
}; };
template <typename T, int channels> template <typename T, int channels>
__device__ size_t GetIndex(size_t i, int cols, int rows, int step) __device__ size_t GetIndex(size_t i, int cols, int step)
{ {
return ((i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T))) + size_t ret = (i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T)) +
(i % static_cast<size_t>(rows))*static_cast<size_t>(channels) ; (i % static_cast<size_t>(cols))*static_cast<size_t>(channels);
return ret;
} }
template <typename T, int channels> template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step) __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
{ {
size_t i = (blockIdx.x * blockDim.x + threadIdx.x); size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows) if (i < cols * rows)
{ {
unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, rows, step)); unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, step));
} }
} }
...@@ -97,7 +99,7 @@ namespace mat_operators ...@@ -97,7 +99,7 @@ namespace mat_operators
{ {
size_t i = (blockIdx.x * blockDim.x + threadIdx.x); size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows) if (i < cols * rows)
unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, rows, step)); unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, step));
} }
} }
...@@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl ...@@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
{ {
// download scalar to constant memory // download scalar to constant memory
float data[4]; float data[4];
data[0] = scalar[0]; data[0] = static_cast<float>(scalar[0]);
data[1] = scalar[1]; data[1] = static_cast<float>(scalar[1]);
data[2] = scalar[2]; data[2] = static_cast<float>(scalar[2]);
data[3] = scalar[3]; data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256,1,1); dim3 threadsPerBlock(256,1,1);
...@@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl ...@@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels) extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
{ {
float data[4]; float data[4];
data[0] = scalar[0]; data[0] = static_cast<float>(scalar[0]);
data[1] = scalar[1]; data[1] = static_cast<float>(scalar[1]);
data[2] = scalar[2]; data[2] = static_cast<float>(scalar[2]);
data[3] = scalar[3]; data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256, 1, 1); dim3 threadsPerBlock(256, 1, 1);
......
...@@ -21,6 +21,8 @@ class CV_GpuMatOpSetTo : public CvTest ...@@ -21,6 +21,8 @@ class CV_GpuMatOpSetTo : public CvTest
void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat"); void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat");
void run(int); void run(int);
bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
bool test_cv_8u_c1(); bool test_cv_8u_c1();
bool test_cv_8u_c2(); bool test_cv_8u_c2();
bool test_cv_8u_c3(); bool test_cv_8u_c3();
...@@ -34,15 +36,15 @@ class CV_GpuMatOpSetTo : public CvTest ...@@ -34,15 +36,15 @@ class CV_GpuMatOpSetTo : public CvTest
bool test_cv_32f_c4(); bool test_cv_32f_c4();
private: private:
int w; int rows;
int h; int cols;
Scalar s; Scalar s;
}; };
CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" ) CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" )
{ {
w = 100; rows = 127;
h = 100; cols = 129;
s.val[0] = 128.0; s.val[0] = 128.0;
s.val[1] = 128.0; s.val[1] = 128.0;
...@@ -66,13 +68,16 @@ void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name) ...@@ -66,13 +68,16 @@ void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name)
print_mat(newmat, name); print_mat(newmat, name);
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c1() bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
{ {
Mat cpumat(w, h, CV_8U, Scalar::all(0)); //int64 time = getTickCount();
GpuMat gpumat(cpumat);
cpumat.setTo(s); cpumat.setTo(s);
//int64 time1 = getTickCount();
gpumat.setTo(s); gpumat.setTo(s);
//int64 time2 = getTickCount();
//std::cout << "\ntime cpu:" << double((time1 - time) / getTickFrequency());
//std::cout << "\ntime gpu:" << double((time2 - time1) / getTickFrequency());
//std::cout << "\n";
#ifdef PRINT_MATRIX #ifdef PRINT_MATRIX
print_mat(cpumat); print_mat(cpumat);
...@@ -82,7 +87,7 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1() ...@@ -82,7 +87,7 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
double ret = norm(cpumat, gpumat); double ret = norm(cpumat, gpumat);
if (ret < 0.1) if (ret < 1.0)
return true; return true;
else else
{ {
...@@ -91,205 +96,78 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1() ...@@ -91,205 +96,78 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
} }
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c2()
bool CV_GpuMatOpSetTo::test_cv_8u_c1()
{ {
Mat cpumat(w, h, CV_8UC2, Scalar::all(0)); Mat cpumat(rows, cols, CV_8U, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s); }
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat); bool CV_GpuMatOpSetTo::test_cv_8u_c2()
{
Mat cpumat(rows, cols, CV_8UC2, Scalar::all(0));
GpuMat gpumat(cpumat);
if (ret < 0.1) return compare_matrix(cpumat, gpumat);
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c3() bool CV_GpuMatOpSetTo::test_cv_8u_c3()
{ {
Mat cpumat(w, h, CV_8UC3, Scalar::all(0)); Mat cpumat(rows, cols, CV_8UC3, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c4() bool CV_GpuMatOpSetTo::test_cv_8u_c4()
{ {
Mat cpumat(w, h, CV_8UC4, Scalar::all(0)); Mat cpumat(rows, cols, CV_8UC4, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_16u_c4() bool CV_GpuMatOpSetTo::test_cv_16u_c4()
{ {
Mat cpumat(w, h, CV_16UC4, Scalar::all(0)); Mat cpumat(rows, cols, CV_16UC4, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c1() bool CV_GpuMatOpSetTo::test_cv_32f_c1()
{ {
Mat cpumat(w, h, CV_32F, Scalar::all(0)); Mat cpumat(rows, cols, CV_32F, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c2() bool CV_GpuMatOpSetTo::test_cv_32f_c2()
{ {
Mat cpumat(w, h, CV_32FC2, Scalar::all(0)); Mat cpumat(rows, cols, CV_32FC2, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret;
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c3() bool CV_GpuMatOpSetTo::test_cv_32f_c3()
{ {
Mat cpumat(w, h, CV_32FC3, Scalar::all(0)); Mat cpumat(rows, cols, CV_32FC3, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret;
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c4() bool CV_GpuMatOpSetTo::test_cv_32f_c4()
{ {
Mat cpumat(w, h, CV_32FC4, Scalar::all(0)); Mat cpumat(rows, cols, CV_32FC4, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
void CV_GpuMatOpSetTo::run( int /* start_from */) void CV_GpuMatOpSetTo::run( int /* start_from */)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment