Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
98c8ecf8
Commit
98c8ecf8
authored
Jul 22, 2010
by
Andrey Morozov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modified kernel setto(), added double type, code has been improved
parent
3f5dd5f1
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
98 additions
and
190 deletions
+98
-190
cuda_shared.hpp
modules/gpu/src/cuda/cuda_shared.hpp
+2
-2
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+81
-81
matrix_operations.cpp
modules/gpu/src/matrix_operations.cpp
+3
-3
operator_set_to.cpp
tests/gpu/src/operator_set_to.cpp
+12
-104
No files found.
modules/gpu/src/cuda/cuda_shared.hpp
View file @
98c8ecf8
...
...
@@ -61,8 +61,8 @@ namespace cv
{
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
const
double
*
scalar
,
int
depth
,
int
channels
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
depth
,
int
channels
);
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
int
channels
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
);
extern
"C"
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
size_t
width
,
size_t
height
,
double
alpha
,
double
beta
);
}
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
98c8ecf8
...
...
@@ -49,7 +49,7 @@
using namespace cv::gpu;
using namespace cv::gpu::impl;
__constant__ __align__(16)
float
scalar_d[4];
__constant__ __align__(16)
double
scalar_d[4];
namespace mat_operators
{
...
...
@@ -57,8 +57,8 @@ namespace mat_operators
// SetTo
//////////////////////////////////////////////////////////
template<typename T
, int channels
>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
template<typename T>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step
, int channels
)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -70,14 +70,14 @@ namespace mat_operators
}
}
template<typename T
, int channels
>
__global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int step_mask)
template<typename T>
__global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int
channels, int
step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if (mask[y * step_mask + x] != 0)
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * (step / sizeof(T)) + x;
mat[idx] = scalar_d[ x % channels ];
...
...
@@ -319,100 +319,100 @@ namespace mat_operators
} // namespace mat_operators
//////////////////////////////////////////////////////////////
// SetTo
//////////////////////////////////////////////////////////////
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
namespace cv
{
float data[4];
data[0] = static_cast<float>(scalar[0]);
data[1] = static_cast<float>(scalar[1]);
data[2] = static_cast<float>(scalar[2]);
data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
namespace gpu
{
namespace impl
{
dim3 threadsPerBlock(16, 16, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
//////////////////////////////////////////////////////////////
// SetTo
//////////////////////////////////////////////////////////////
if (channels == 1)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 1><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
}
if (channels == 2)
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels);
template <typename T>
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 2><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
cudaSafeCall ( cudaThreadSynchronize() );
}
if (channels == 3)
template <typename T>
void set_to_without_mask_run(const DevMem2D& mat, int channels)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 3><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
}
if (channels == 4)
extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 4><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 4><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 4><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
}
double data[4];
data[0] = scalar[0];
data[1] = scalar[1];
data[2] = scalar[2];
data[3] = scalar[3];
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
cudaSafeCall ( cudaThreadSynchronize() );
}
static SetToFunc_without_mask tab[8] =
{
set_to_without_mask_run<unsigned char>,
set_to_without_mask_run<char>,
set_to_without_mask_run<unsigned short>,
set_to_without_mask_run<short>,
set_to_without_mask_run<int>,
set_to_without_mask_run<float>,
set_to_without_mask_run<double>,
0
};
extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int elemSize1, int channels)
{
float data[4];
data[0] = static_cast<float>(scalar[0]);
data[1] = static_cast<float>(scalar[1]);
data[2] = static_cast<float>(scalar[2]);
data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
SetToFunc_without_mask func = tab[depth];
dim3 threadsPerBlock(16, 16, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
if (func == 0) error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__);
if (channels == 1)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 1><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
}
if (channels == 2)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 2><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
func(mat, channels);
}
if (channels == 3)
extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels)
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 3><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
}
if (channels == 4)
double data[4];
data[0] = scalar[0];
data[1] = scalar[1];
data[2] = scalar[2];
data[3] = scalar[3];
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
static SetToFunc_with_mask tab[8] =
{
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 4><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 4><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 4><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
set_to_with_mask_run<unsigned char>,
set_to_with_mask_run<char>,
set_to_with_mask_run<unsigned short>,
set_to_with_mask_run<short>,
set_to_with_mask_run<int>,
set_to_with_mask_run<float>,
set_to_with_mask_run<double>,
0
};
SetToFunc_with_mask func = tab[depth];
if (func == 0) error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__);
func(mat, mask, channels);
}
cudaSafeCall ( cudaThreadSynchronize() );
}
//////////////////////////////////////////////////////////////
// ConvertTo
//////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////
// ConvertTo
//////////////////////////////////////////////////////////////
namespace cv
{
namespace gpu
{
namespace impl
{
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta);
...
...
modules/gpu/src/matrix_operations.cpp
View file @
98c8ecf8
...
...
@@ -133,7 +133,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
GpuMat
&
GpuMat
::
operator
=
(
const
Scalar
&
s
)
{
cv
::
gpu
::
impl
::
set_to_without_mask
(
*
this
,
s
.
val
,
this
->
elemSize1
()
,
this
->
channels
());
cv
::
gpu
::
impl
::
set_to_without_mask
(
*
this
,
this
->
depth
(),
s
.
val
,
this
->
channels
());
return
*
this
;
}
...
...
@@ -145,11 +145,11 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
if
(
mask
.
empty
())
{
cv
::
gpu
::
impl
::
set_to_without_mask
(
*
this
,
s
.
val
,
this
->
elemSize1
()
,
this
->
channels
());
cv
::
gpu
::
impl
::
set_to_without_mask
(
*
this
,
this
->
depth
(),
s
.
val
,
this
->
channels
());
}
else
{
cv
::
gpu
::
impl
::
set_to_with_mask
(
*
this
,
s
.
val
,
mask
,
this
->
elemSize1
()
,
this
->
channels
());
cv
::
gpu
::
impl
::
set_to_with_mask
(
*
this
,
this
->
depth
(),
s
.
val
,
mask
,
this
->
channels
());
}
return
*
this
;
...
...
tests/gpu/src/operator_set_to.cpp
View file @
98c8ecf8
...
...
@@ -24,19 +24,6 @@ class CV_GpuMatOpSetTo : public CvTest
bool
compare_matrix
(
cv
::
Mat
&
cpumat
,
gpu
::
GpuMat
&
gpumat
);
bool
test_cv_8u_c1
();
bool
test_cv_8u_c2
();
bool
test_cv_8u_c3
();
bool
test_cv_8u_c4
();
bool
test_cv_16u_c4
();
bool
test_cv_32f_c1
();
bool
test_cv_32f_c2
();
bool
test_cv_32f_c3
();
bool
test_cv_32f_c4
();
private
:
int
rows
;
int
cols
;
...
...
@@ -45,13 +32,13 @@ class CV_GpuMatOpSetTo : public CvTest
CV_GpuMatOpSetTo
::
CV_GpuMatOpSetTo
()
:
CvTest
(
"GpuMatOperatorSetTo"
,
"setTo"
)
{
rows
=
129
;
cols
=
12
7
;
rows
=
256
;
cols
=
12
4
;
s
.
val
[
0
]
=
12
8
.0
;
s
.
val
[
1
]
=
12
8
.0
;
s
.
val
[
2
]
=
12
8
.0
;
s
.
val
[
3
]
=
12
8
.0
;
s
.
val
[
0
]
=
12
7
.0
;
s
.
val
[
1
]
=
12
7
.0
;
s
.
val
[
2
]
=
12
7
.0
;
s
.
val
[
3
]
=
12
7
.0
;
//#define PRINT_MATRIX
}
...
...
@@ -99,95 +86,16 @@ bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
}
}
bool
CV_GpuMatOpSetTo
::
test_cv_8u_c1
()
{
Mat
cpumat
(
rows
,
cols
,
CV_8U
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_8u_c2
()
{
Mat
cpumat
(
rows
,
cols
,
CV_8UC2
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_8u_c3
()
{
Mat
cpumat
(
rows
,
cols
,
CV_8UC3
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_8u_c4
()
{
Mat
cpumat
(
rows
,
cols
,
CV_8UC4
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_16u_c4
()
{
Mat
cpumat
(
rows
,
cols
,
CV_16UC4
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_32f_c1
()
{
Mat
cpumat
(
rows
,
cols
,
CV_32F
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_32f_c2
()
{
Mat
cpumat
(
rows
,
cols
,
CV_32FC2
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_32f_c3
()
{
Mat
cpumat
(
rows
,
cols
,
CV_32FC3
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
bool
CV_GpuMatOpSetTo
::
test_cv_32f_c4
()
{
Mat
cpumat
(
rows
,
cols
,
CV_32FC4
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
return
compare_matrix
(
cpumat
,
gpumat
);
}
void
CV_GpuMatOpSetTo
::
run
(
int
/* start_from */
)
{
bool
is_test_good
=
true
;
is_test_good
&=
test_cv_8u_c1
();
is_test_good
&=
test_cv_8u_c2
();
is_test_good
&=
test_cv_8u_c3
();
is_test_good
&=
test_cv_8u_c4
();
is_test_good
&=
test_cv_16u_c4
();
is_test_good
&=
test_cv_32f_c1
();
is_test_good
&=
test_cv_32f_c2
();
is_test_good
&=
test_cv_32f_c3
();
is_test_good
&=
test_cv_32f_c4
();
for
(
int
i
=
0
;
i
<
7
;
i
++
)
{
Mat
cpumat
(
rows
,
cols
,
i
,
Scalar
::
all
(
0
));
GpuMat
gpumat
(
cpumat
);
is_test_good
&=
compare_matrix
(
cpumat
,
gpumat
);
}
if
(
is_test_good
==
true
)
ts
->
set_failed_test_info
(
CvTS
::
OK
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment