Commit 2fe75ea3 authored by Andrey Morozov's avatar Andrey Morozov

fixed implementation of gpumat::setTo()

parent f37ac8e4
......@@ -76,19 +76,21 @@ namespace mat_operators
};
template <typename T, int channels>
__device__ size_t GetIndex(size_t i, int cols, int rows, int step)
__device__ size_t GetIndex(size_t i, int cols, int step)
{
return ((i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T))) +
(i % static_cast<size_t>(rows))*static_cast<size_t>(channels) ;
size_t ret = (i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T)) +
(i % static_cast<size_t>(cols))*static_cast<size_t>(channels);
return ret;
}
template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
{
size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows)
{
unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, rows, step));
unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, step));
}
}
......@@ -97,7 +99,7 @@ namespace mat_operators
{
size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows)
unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, rows, step));
unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, step));
}
}
......@@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
{
// download scalar to constant memory
float data[4];
data[0] = scalar[0];
data[1] = scalar[1];
data[2] = scalar[2];
data[3] = scalar[3];
data[0] = static_cast<float>(scalar[0]);
data[1] = static_cast<float>(scalar[1]);
data[2] = static_cast<float>(scalar[2]);
data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256,1,1);
......@@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
{
float data[4];
data[0] = scalar[0];
data[1] = scalar[1];
data[2] = scalar[2];
data[3] = scalar[3];
data[0] = static_cast<float>(scalar[0]);
data[1] = static_cast<float>(scalar[1]);
data[2] = static_cast<float>(scalar[2]);
data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256, 1, 1);
......
......@@ -21,6 +21,8 @@ class CV_GpuMatOpSetTo : public CvTest
void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat");
void run(int);
bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
bool test_cv_8u_c1();
bool test_cv_8u_c2();
bool test_cv_8u_c3();
......@@ -34,15 +36,15 @@ class CV_GpuMatOpSetTo : public CvTest
bool test_cv_32f_c4();
private:
int w;
int h;
int rows;
int cols;
Scalar s;
};
CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" )
{
w = 100;
h = 100;
rows = 127;
cols = 129;
s.val[0] = 128.0;
s.val[1] = 128.0;
......@@ -66,13 +68,16 @@ void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name)
print_mat(newmat, name);
}
bool CV_GpuMatOpSetTo::test_cv_8u_c1()
bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
{
Mat cpumat(w, h, CV_8U, Scalar::all(0));
GpuMat gpumat(cpumat);
//int64 time = getTickCount();
cpumat.setTo(s);
//int64 time1 = getTickCount();
gpumat.setTo(s);
//int64 time2 = getTickCount();
//std::cout << "\ntime cpu:" << double((time1 - time) / getTickFrequency());
//std::cout << "\ntime gpu:" << double((time2 - time1) / getTickFrequency());
//std::cout << "\n";
#ifdef PRINT_MATRIX
print_mat(cpumat);
......@@ -82,7 +87,7 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
if (ret < 1.0)
return true;
else
{
......@@ -91,205 +96,78 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
}
}
bool CV_GpuMatOpSetTo::test_cv_8u_c2()
bool CV_GpuMatOpSetTo::test_cv_8u_c1()
{
Mat cpumat(w, h, CV_8UC2, Scalar::all(0));
Mat cpumat(rows, cols, CV_8U, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
return compare_matrix(cpumat, gpumat);
}
double ret = norm(cpumat, gpumat);
bool CV_GpuMatOpSetTo::test_cv_8u_c2()
{
Mat cpumat(rows, cols, CV_8UC2, Scalar::all(0));
GpuMat gpumat(cpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_8u_c3()
{
Mat cpumat(w, h, CV_8UC3, Scalar::all(0));
Mat cpumat(rows, cols, CV_8UC3, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_8u_c4()
{
Mat cpumat(w, h, CV_8UC4, Scalar::all(0));
Mat cpumat(rows, cols, CV_8UC4, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_16u_c4()
{
Mat cpumat(w, h, CV_16UC4, Scalar::all(0));
Mat cpumat(rows, cols, CV_16UC4, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_32f_c1()
{
Mat cpumat(w, h, CV_32F, Scalar::all(0));
Mat cpumat(rows, cols, CV_32F, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_32f_c2()
{
Mat cpumat(w, h, CV_32FC2, Scalar::all(0));
Mat cpumat(rows, cols, CV_32FC2, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret;
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_32f_c3()
{
Mat cpumat(w, h, CV_32FC3, Scalar::all(0));
Mat cpumat(rows, cols, CV_32FC3, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret;
return false;
}
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_32f_c4()
{
Mat cpumat(w, h, CV_32FC4, Scalar::all(0));
Mat cpumat(rows, cols, CV_32FC4, Scalar::all(0));
GpuMat gpumat(cpumat);
cpumat.setTo(s);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
return compare_matrix(cpumat, gpumat);
}
void CV_GpuMatOpSetTo::run( int /* start_from */)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment