Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b08f6082
Commit
b08f6082
authored
Oct 20, 2010
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added to gpu module linear filters for int and float source types.
refactored gpu module.
parent
ea040ce7
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
1070 additions
and
503 deletions
+1070
-503
devmem2d.hpp
modules/gpu/include/opencv2/gpu/devmem2d.hpp
+9
-4
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+10
-11
arithm.cpp
modules/gpu/src/arithm.cpp
+4
-4
color.cu
modules/gpu/src/cuda/color.cu
+54
-61
constantspacebp.cu
modules/gpu/src/cuda/constantspacebp.cu
+9
-11
cuda_shared.hpp
modules/gpu/src/cuda/cuda_shared.hpp
+1
-12
filters.cu
modules/gpu/src/cuda/filters.cu
+222
-0
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+24
-24
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+44
-4
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+122
-218
saturate_cast.hpp
modules/gpu/src/cuda/saturate_cast.hpp
+124
-42
transform.hpp
modules/gpu/src/cuda/transform.hpp
+118
-0
vecmath.hpp
modules/gpu/src/cuda/vecmath.hpp
+127
-0
cudastream.cpp
modules/gpu/src/cudastream.cpp
+16
-0
filtering.cpp
modules/gpu/src/filtering.cpp
+147
-88
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+21
-21
matrix_operations.cpp
modules/gpu/src/matrix_operations.cpp
+16
-0
gputest_main.cpp
tests/gpu/src/gputest_main.cpp
+2
-3
No files found.
modules/gpu/include/opencv2/gpu/devmem2d.hpp
View file @
b08f6082
...
@@ -50,7 +50,7 @@ namespace cv
...
@@ -50,7 +50,7 @@ namespace cv
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template
<
typename
T
=
unsigned
char
>
template
<
typename
T
>
struct
DevMem2D_
struct
DevMem2D_
{
{
typedef
T
elem_t
;
typedef
T
elem_t
;
...
@@ -60,16 +60,21 @@ namespace cv
...
@@ -60,16 +60,21 @@ namespace cv
int
rows
;
int
rows
;
T
*
ptr
;
T
*
ptr
;
size_t
step
;
size_t
step
;
size_t
elem_step
;
DevMem2D_
()
:
cols
(
0
),
rows
(
0
),
ptr
(
0
),
step
(
0
)
{}
DevMem2D_
()
:
cols
(
0
),
rows
(
0
),
ptr
(
0
),
step
(
0
)
,
elem_step
(
0
)
{}
DevMem2D_
(
int
rows_
,
int
cols_
,
T
*
ptr_
,
size_t
step_
)
DevMem2D_
(
int
rows_
,
int
cols_
,
T
*
ptr_
,
size_t
step_
)
:
cols
(
cols_
),
rows
(
rows_
),
ptr
(
ptr_
),
step
(
step_
)
{}
:
cols
(
cols_
),
rows
(
rows_
),
ptr
(
ptr_
),
step
(
step_
),
elem_step
(
step_
/
sizeof
(
T
))
{}
template
<
typename
U
>
explicit
DevMem2D_
(
const
DevMem2D_
<
U
>&
d
)
:
cols
(
d
.
cols
),
rows
(
d
.
rows
),
ptr
((
T
*
)
d
.
ptr
),
step
(
d
.
step
),
elem_step
(
d
.
step
/
sizeof
(
T
))
{}
size_t
elemSize
()
const
{
return
elem_size
;
}
size_t
elemSize
()
const
{
return
elem_size
;
}
};
};
typedef
DevMem2D_
<>
DevMem2D
;
typedef
DevMem2D_
<
unsigned
char
>
DevMem2D
;
typedef
DevMem2D_
<
float
>
DevMem2Df
;
typedef
DevMem2D_
<
float
>
DevMem2Df
;
typedef
DevMem2D_
<
int
>
DevMem2Di
;
typedef
DevMem2D_
<
int
>
DevMem2Di
;
}
}
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
b08f6082
...
@@ -636,7 +636,7 @@ namespace cv
...
@@ -636,7 +636,7 @@ namespace cv
//! returns the separable filter engine with the specified filters
//! returns the separable filter engine with the specified filters
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createSeparableFilter_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter
,
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createSeparableFilter_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter
,
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter
,
bool
rowFilterFirst
=
true
);
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter
);
//! returns horizontal 1D box filter
//! returns horizontal 1D box filter
//! supports only CV_8UC1 source type and CV_32FC1 sum type
//! supports only CV_8UC1 source type and CV_32FC1 sum type
...
@@ -658,7 +658,7 @@ namespace cv
...
@@ -658,7 +658,7 @@ namespace cv
//! only MORPH_ERODE and MORPH_DILATE are supported
//! only MORPH_ERODE and MORPH_DILATE are supported
//! supports CV_8UC1 and CV_8UC4 types
//! supports CV_8UC1 and CV_8UC4 types
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
CV_EXPORTS
Ptr
<
BaseFilter_GPU
>
getMorphologyFilter_GPU
(
int
op
,
int
type
,
const
Gpu
Mat
&
kernel
,
const
Size
&
ksize
,
CV_EXPORTS
Ptr
<
BaseFilter_GPU
>
getMorphologyFilter_GPU
(
int
op
,
int
type
,
const
Mat
&
kernel
,
const
Size
&
ksize
,
Point
anchor
=
Point
(
-
1
,
-
1
));
Point
anchor
=
Point
(
-
1
,
-
1
));
//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
...
@@ -667,25 +667,24 @@ namespace cv
...
@@ -667,25 +667,24 @@ namespace cv
//! returns 2D filter with the specified kernel
//! returns 2D filter with the specified kernel
//! supports CV_8UC1 and CV_8UC4 types
//! supports CV_8UC1 and CV_8UC4 types
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
CV_EXPORTS
Ptr
<
BaseFilter_GPU
>
getLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
kernel
,
const
Size
&
ksize
,
CV_EXPORTS
Ptr
<
BaseFilter_GPU
>
getLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
GpuMat
&
kernel
,
const
Size
&
ksize
,
Point
anchor
=
Point
(
-
1
,
-
1
));
Point
anchor
=
Point
(
-
1
,
-
1
),
int
nDivisor
=
1
);
//! returns the non-separable linear filter engine
//! returns the non-separable linear filter engine
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
kernel
,
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
kernel
,
const
Point
&
anchor
=
Point
(
-
1
,
-
1
));
const
Point
&
anchor
=
Point
(
-
1
,
-
1
));
//! returns the primitive row filter with the specified kernel
//! returns the primitive row filter with the specified kernel
CV_EXPORTS
Ptr
<
BaseRowFilter_GPU
>
getLinearRowFilter_GPU
(
int
srcType
,
int
bufType
,
const
Gpu
Mat
&
rowKernel
,
CV_EXPORTS
Ptr
<
BaseRowFilter_GPU
>
getLinearRowFilter_GPU
(
int
srcType
,
int
bufType
,
const
Mat
&
rowKernel
,
int
anchor
=
-
1
,
int
nDivisor
=
1
);
int
anchor
=
-
1
);
//! returns the primitive column filter with the specified kernel
//! returns the primitive column filter with the specified kernel
CV_EXPORTS
Ptr
<
BaseColumnFilter_GPU
>
getLinearColumnFilter_GPU
(
int
bufType
,
int
dstType
,
const
Gpu
Mat
&
columnKernel
,
CV_EXPORTS
Ptr
<
BaseColumnFilter_GPU
>
getLinearColumnFilter_GPU
(
int
bufType
,
int
dstType
,
const
Mat
&
columnKernel
,
int
anchor
=
-
1
,
int
nDivisor
=
1
);
int
anchor
=
-
1
);
//! returns the separable linear filter engine
//! returns the separable linear filter engine
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createSeparableLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
rowKernel
,
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createSeparableLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
rowKernel
,
const
Mat
&
columnKernel
,
const
Point
&
anchor
=
Point
(
-
1
,
-
1
)
,
bool
rowFilterFirst
=
true
);
const
Mat
&
columnKernel
,
const
Point
&
anchor
=
Point
(
-
1
,
-
1
));
//! returns filter engine for the generalized Sobel operator
//! returns filter engine for the generalized Sobel operator
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createDerivFilter_GPU
(
int
srcType
,
int
dstType
,
int
dx
,
int
dy
,
int
ksize
);
CV_EXPORTS
Ptr
<
FilterEngine_GPU
>
createDerivFilter_GPU
(
int
srcType
,
int
dstType
,
int
dx
,
int
dy
,
int
ksize
);
...
@@ -720,7 +719,7 @@ namespace cv
...
@@ -720,7 +719,7 @@ namespace cv
//! applies separable 2D linear filter to the image
//! applies separable 2D linear filter to the image
CV_EXPORTS
void
sepFilter2D
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
const
Mat
&
kernelX
,
const
Mat
&
kernelY
,
CV_EXPORTS
void
sepFilter2D
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
const
Mat
&
kernelX
,
const
Mat
&
kernelY
,
Point
anchor
=
Point
(
-
1
,
-
1
)
,
bool
rowFilterFirst
=
true
);
Point
anchor
=
Point
(
-
1
,
-
1
));
//! applies generalized Sobel operator to the image
//! applies generalized Sobel operator to the image
CV_EXPORTS
void
Sobel
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
dx
,
int
dy
,
int
ksize
=
3
,
double
scale
=
1
);
CV_EXPORTS
void
Sobel
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
dx
,
int
dy
,
int
ksize
=
3
,
double
scale
=
1
);
...
...
modules/gpu/src/arithm.cpp
View file @
b08f6082
...
@@ -316,9 +316,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
...
@@ -316,9 +316,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// compare
// compare
namespace
cv
{
namespace
gpu
{
namespace
mat
rix_operations
namespace
cv
{
namespace
gpu
{
namespace
mat
hfunc
{
{
void
compare_ne_8u
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
void
compare_ne_8u
c4
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
}}}
}}}
...
@@ -346,7 +346,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -346,7 +346,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
}
}
else
else
{
{
mat
rix_operations
::
compare_ne_8u
(
src1
,
src2
,
dst
);
mat
hfunc
::
compare_ne_8uc4
(
src1
,
src2
,
dst
);
}
}
}
}
else
else
...
@@ -359,7 +359,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -359,7 +359,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
}
}
else
else
{
{
mat
rix_operations
::
compare_ne_32f
(
src1
,
src2
,
dst
);
mat
hfunc
::
compare_ne_32f
(
src1
,
src2
,
dst
);
}
}
}
}
}
}
...
...
modules/gpu/src/cuda/color.cu
View file @
b08f6082
...
@@ -42,6 +42,7 @@
...
@@ -42,6 +42,7 @@
#include "cuda_shared.hpp"
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "saturate_cast.hpp"
#include "vecmath.hpp"
using namespace cv::gpu;
using namespace cv::gpu;
...
@@ -53,16 +54,8 @@ using namespace cv::gpu;
...
@@ -53,16 +54,8 @@ using namespace cv::gpu;
#define FLT_EPSILON 1.192092896e-07F
#define FLT_EPSILON 1.192092896e-07F
#endif
#endif
namespace imgproc
namespace imgproc
_krnls
{
{
template<typename T, int N> struct TypeVec {};
template<> struct TypeVec<uchar, 3> { typedef uchar3 vec_t; };
template<> struct TypeVec<uchar, 4> { typedef uchar4 vec_t; };
template<> struct TypeVec<ushort, 3> { typedef ushort3 vec_t; };
template<> struct TypeVec<ushort, 4> { typedef ushort4 vec_t; };
template<> struct TypeVec<float, 3> { typedef float3 vec_t; };
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
template<typename T> struct ColorChannel {};
template<typename T> struct ColorChannel {};
template<> struct ColorChannel<uchar>
template<> struct ColorChannel<uchar>
{
{
...
@@ -106,7 +99,7 @@ namespace imgproc
...
@@ -106,7 +99,7 @@ namespace imgproc
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
template <int SRCCN, int DSTCN, typename T>
template <int SRCCN, int DSTCN, typename T>
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
...
@@ -132,7 +125,7 @@ namespace imgproc
...
@@ -132,7 +125,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int SRCCN, int DSTCN>
template <typename T, int SRCCN, int DSTCN>
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
...
@@ -143,7 +136,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -143,7 +136,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -189,7 +182,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -189,7 +182,7 @@ namespace cv { namespace gpu { namespace improc
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
namespace imgproc
namespace imgproc
_krnls
{
{
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {};
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>
...
@@ -281,7 +274,7 @@ namespace imgproc
...
@@ -281,7 +274,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <int GREEN_BITS, int DSTCN>
template <int GREEN_BITS, int DSTCN>
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
...
@@ -292,7 +285,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -292,7 +285,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -320,7 +313,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -320,7 +313,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -342,7 +335,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -342,7 +335,7 @@ namespace cv { namespace gpu { namespace improc
///////////////////////////////// Grayscale to Color ////////////////////////////////
///////////////////////////////// Grayscale to Color ////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
template <int DSTCN, typename T>
template <int DSTCN, typename T>
__global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
__global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
...
@@ -396,7 +389,7 @@ namespace imgproc
...
@@ -396,7 +389,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int DSTCN>
template <typename T, int DSTCN>
void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
...
@@ -407,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -407,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols);
dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0)
if (stream == 0)
...
@@ -447,7 +440,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -447,7 +440,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols);
dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0)
if (stream == 0)
...
@@ -468,7 +461,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -468,7 +461,7 @@ namespace cv { namespace gpu { namespace improc
///////////////////////////////// Color to Grayscale ////////////////////////////////
///////////////////////////////// Color to Grayscale ////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
#undef R2Y
#undef R2Y
#undef G2Y
#undef G2Y
...
@@ -550,7 +543,7 @@ namespace imgproc
...
@@ -550,7 +543,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int SRCCN>
template <typename T, int SRCCN>
void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
...
@@ -561,7 +554,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -561,7 +554,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -601,7 +594,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -601,7 +594,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols);
dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0)
if (stream == 0)
...
@@ -622,7 +615,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -622,7 +615,7 @@ namespace cv { namespace gpu { namespace improc
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
__constant__ float cYCrCbCoeffs_f[5];
__constant__ float cYCrCbCoeffs_f[5];
__constant__ int cYCrCbCoeffs_i[5];
__constant__ int cYCrCbCoeffs_i[5];
...
@@ -721,7 +714,7 @@ namespace imgproc
...
@@ -721,7 +714,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int SRCCN, int DSTCN>
template <typename T, int SRCCN, int DSTCN>
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
...
@@ -732,7 +725,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -732,7 +725,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -748,7 +741,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -748,7 +741,7 @@ namespace cv { namespace gpu { namespace improc
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}
...
@@ -762,7 +755,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -762,7 +755,7 @@ namespace cv { namespace gpu { namespace improc
{RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}
{RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}
...
@@ -776,7 +769,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -776,7 +769,7 @@ namespace cv { namespace gpu { namespace improc
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}
...
@@ -790,7 +783,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -790,7 +783,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -806,7 +799,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -806,7 +799,7 @@ namespace cv { namespace gpu { namespace improc
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}
...
@@ -820,7 +813,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -820,7 +813,7 @@ namespace cv { namespace gpu { namespace improc
{YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}
{YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}
...
@@ -834,7 +827,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -834,7 +827,7 @@ namespace cv { namespace gpu { namespace improc
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
}
}
...
@@ -842,7 +835,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -842,7 +835,7 @@ namespace cv { namespace gpu { namespace improc
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
__constant__ float cXYZ_D65f[9];
__constant__ float cXYZ_D65f[9];
__constant__ int cXYZ_D65i[9];
__constant__ int cXYZ_D65i[9];
...
@@ -931,7 +924,7 @@ namespace imgproc
...
@@ -931,7 +924,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int SRCCN, int DSTCN>
template <typename T, int SRCCN, int DSTCN>
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
...
@@ -942,7 +935,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -942,7 +935,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols);
dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0)
if (stream == 0)
...
@@ -958,7 +951,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -958,7 +951,7 @@ namespace cv { namespace gpu { namespace improc
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
}
}
...
@@ -972,7 +965,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -972,7 +965,7 @@ namespace cv { namespace gpu { namespace improc
{RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}
{RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
}
}
...
@@ -986,7 +979,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -986,7 +979,7 @@ namespace cv { namespace gpu { namespace improc
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
}
}
...
@@ -1000,7 +993,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1000,7 +993,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols);
dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0)
if (stream == 0)
...
@@ -1016,7 +1009,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1016,7 +1009,7 @@ namespace cv { namespace gpu { namespace improc
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
}
}
...
@@ -1030,7 +1023,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1030,7 +1023,7 @@ namespace cv { namespace gpu { namespace improc
{XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}
{XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
}
}
...
@@ -1044,7 +1037,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1044,7 +1037,7 @@ namespace cv { namespace gpu { namespace improc
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
}
}
...
@@ -1052,7 +1045,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1052,7 +1045,7 @@ namespace cv { namespace gpu { namespace improc
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
__constant__ int cHsvDivTable[256];
__constant__ int cHsvDivTable[256];
...
@@ -1229,7 +1222,7 @@ namespace imgproc
...
@@ -1229,7 +1222,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int SRCCN, int DSTCN>
template <typename T, int SRCCN, int DSTCN>
void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
...
@@ -1241,10 +1234,10 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1241,10 +1234,10 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
if (hrange == 180)
if (hrange == 180)
imgproc::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
else
else
imgproc::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -1295,7 +1288,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1295,7 +1288,7 @@ namespace cv { namespace gpu { namespace improc
4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229,
4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229,
4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096
4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096
};
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvDivTable, div_table, sizeof(div_table)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cHsvDivTable, div_table, sizeof(div_table)) );
RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
}
}
...
@@ -1323,10 +1316,10 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1323,10 +1316,10 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
if (hrange == 180)
if (hrange == 180)
imgproc::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
else
else
imgproc::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -1345,7 +1338,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1345,7 +1338,7 @@ namespace cv { namespace gpu { namespace improc
static const int sector_data[][3] =
static const int sector_data[][3] =
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvSectorData, sector_data, sizeof(sector_data)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cHsvSectorData, sector_data, sizeof(sector_data)) );
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
}
}
...
@@ -1362,7 +1355,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1362,7 +1355,7 @@ namespace cv { namespace gpu { namespace improc
static const int sector_data[][3] =
static const int sector_data[][3] =
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvSectorData, sector_data, sizeof(sector_data)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cHsvSectorData, sector_data, sizeof(sector_data)) );
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
}
}
...
@@ -1370,7 +1363,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1370,7 +1363,7 @@ namespace cv { namespace gpu { namespace improc
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
template<typename T, int HR> struct RGB2HLSConvertor;
template<typename T, int HR> struct RGB2HLSConvertor;
template<int HR> struct RGB2HLSConvertor<float, HR>
template<int HR> struct RGB2HLSConvertor<float, HR>
...
@@ -1541,7 +1534,7 @@ namespace imgproc
...
@@ -1541,7 +1534,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T, int SRCCN, int DSTCN>
template <typename T, int SRCCN, int DSTCN>
void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
...
@@ -1553,10 +1546,10 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1553,10 +1546,10 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
if (hrange == 180)
if (hrange == 180)
imgproc::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
else
else
imgproc::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -1598,10 +1591,10 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1598,10 +1591,10 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
if (hrange == 180)
if (hrange == 180)
imgproc::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
else
else
imgproc::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
imgproc
_krnls
::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx);
dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0)
if (stream == 0)
...
@@ -1620,7 +1613,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1620,7 +1613,7 @@ namespace cv { namespace gpu { namespace improc
static const int sector_data[][3]=
static const int sector_data[][3]=
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHlsSectorData, sector_data, sizeof(sector_data)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cHlsSectorData, sector_data, sizeof(sector_data)) );
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
}
}
...
@@ -1637,7 +1630,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -1637,7 +1630,7 @@ namespace cv { namespace gpu { namespace improc
static const int sector_data[][3]=
static const int sector_data[][3]=
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHlsSectorData, sector_data, sizeof(sector_data)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cHlsSectorData, sector_data, sizeof(sector_data)) );
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
}
}
...
...
modules/gpu/src/cuda/constantspacebp.cu
View file @
b08f6082
...
@@ -54,20 +54,18 @@ using namespace cv::gpu;
...
@@ -54,20 +54,18 @@ using namespace cv::gpu;
#define SHRT_MAX 32767
#define SHRT_MAX 32767
#endif
#endif
template <typename T>
namespace csbp_krnls
struct TypeLimits {};
template <>
struct TypeLimits<short>
{
{
template <typename T> struct TypeLimits;
template <> struct TypeLimits<short>
{
static __device__ short max() {return SHRT_MAX;}
static __device__ short max() {return SHRT_MAX;}
};
};
template <> struct TypeLimits<float>
template <>
{
struct TypeLimits<float>
{
static __device__ float max() {return FLT_MAX;}
static __device__ float max() {return FLT_MAX;}
};
};
}
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
/////////////////////// load constants ////////////////////////
...
...
modules/gpu/src/cuda/cuda_shared.hpp
View file @
b08f6082
...
@@ -58,19 +58,8 @@ namespace cv
...
@@ -58,19 +58,8 @@ namespace cv
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
namespace
matrix_operations
{
extern
"C"
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
set_to_without_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
set_to_with_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
int
channels
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
}
template
<
class
T
>
template
<
class
T
>
inline
void
uploadConstant
(
const
char
*
name
,
const
T
&
value
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
name
,
&
value
,
sizeof
(
T
))
);
}
static
inline
void
uploadConstant
(
const
char
*
name
,
const
T
&
value
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
name
,
&
value
,
sizeof
(
T
))
);
}
}
}
}
}
...
...
modules/gpu/src/cuda/
bilateral_filter
.cu
→
modules/gpu/src/cuda/
filters
.cu
View file @
b08f6082
...
@@ -43,6 +43,7 @@
...
@@ -43,6 +43,7 @@
#include "opencv2/gpu/devmem2d.hpp"
#include "opencv2/gpu/devmem2d.hpp"
#include "saturate_cast.hpp"
#include "saturate_cast.hpp"
#include "safe_call.hpp"
#include "safe_call.hpp"
#include "cuda_shared.hpp"
using namespace cv::gpu;
using namespace cv::gpu;
...
@@ -50,6 +51,227 @@ using namespace cv::gpu;
...
@@ -50,6 +51,227 @@ using namespace cv::gpu;
#define FLT_MAX 3.402823466e+30F
#define FLT_MAX 3.402823466e+30F
#endif
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////
// Linear filters
#define MAX_KERNEL_SIZE 16
namespace filter_krnls
{
__constant__ float cLinearKernel[MAX_KERNEL_SIZE];
}
namespace cv { namespace gpu { namespace filters
{
void loadLinearKernel(const float kernel[], int ksize)
{
cudaSafeCall( cudaMemcpyToSymbol(filter_krnls::cLinearKernel, kernel, ksize * sizeof(float)) );
}
}}}
namespace filter_krnls
{
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
__global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
const int blockStartX = blockDim.x * blockIdx.x;
const int blockStartY = blockDim.y * blockIdx.y;
const int threadX = blockStartX + threadIdx.x;
const int prevThreadX = threadX - blockDim.x;
const int nextThreadX = threadX + blockDim.x;
const int threadY = blockStartY + threadIdx.y;
T* sDataRow = smem + threadIdx.y * blockDim.x * 3;
if (threadY < height)
{
const T* rowSrc = src + threadY * src_step;
sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : 0;
sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : 0;
sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : 0;
__syncthreads();
if (threadX < width)
{
float sum = 0;
sDataRow += threadIdx.x + blockDim.x - anchor;
#pragma unroll
for(int i = 0; i < KERNEL_SIZE; ++i)
sum += cLinearKernel[i] * sDataRow[i];
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
}
}
}
}
namespace cv { namespace gpu { namespace filters
{
template <int KERNEL_SIZE, typename T, typename D>
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
{
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T, typename D>
inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
static const caller_t callers[] =
{linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>,
linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>,
linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>,
linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>,
linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>,
linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>,
linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>,
linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>};
loadLinearKernel(kernel, ksize);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
}
}}}
namespace filter_krnls
{
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
__global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
const int blockStartX = blockDim.x * blockIdx.x;
const int blockStartY = blockDim.y * blockIdx.y;
const int threadX = blockStartX + threadIdx.x;
const int threadY = blockStartY + threadIdx.y;
const int prevThreadY = threadY - blockDim.y;
const int nextThreadY = threadY + blockDim.y;
const int smem_step = blockDim.x;
T* sDataColumn = smem + threadIdx.x;
if (threadX < width)
{
const T* colSrc = src + threadX;
sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : 0;
sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : 0;
sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : 0;
__syncthreads();
if (threadY < height)
{
float sum = 0;
sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;
#pragma unroll
for(int i = 0; i < KERNEL_SIZE; ++i)
sum += cLinearKernel[i] * sDataColumn[i * smem_step];
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
}
}
}
}
namespace cv { namespace gpu { namespace filters
{
template <int KERNEL_SIZE, typename T, typename D>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
{
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T, typename D>
inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
static const caller_t callers[] =
{linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>,
linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>,
linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>,
linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>,
linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>,
linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>,
linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>,
linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>};
loadLinearKernel(kernel, ksize);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
}
}}}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Bilateral filters
namespace bf_krnls
namespace bf_krnls
{
{
__constant__ float* ctable_color;
__constant__ float* ctable_color;
...
...
modules/gpu/src/cuda/imgproc.cu
View file @
b08f6082
...
@@ -45,7 +45,7 @@
...
@@ -45,7 +45,7 @@
using namespace cv::gpu;
using namespace cv::gpu;
/////////////////////////////////// Remap ///////////////////////////////////////////////
/////////////////////////////////// Remap ///////////////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
...
@@ -123,7 +123,7 @@ namespace imgproc
...
@@ -123,7 +123,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
{
{
...
@@ -132,15 +132,15 @@ namespace cv { namespace gpu { namespace improc
...
@@ -132,15 +132,15 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(dst.cols, threads.x);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
grid.y = divUp(dst.rows, threads.y);
imgproc::tex_remap.filterMode = cudaFilterModeLinear;
imgproc
_krnls
::tex_remap.filterMode = cudaFilterModeLinear;
imgproc
::tex_remap.addressMode[0] = imgproc
::tex_remap.addressMode[1] = cudaAddressModeWrap;
imgproc
_krnls::tex_remap.addressMode[0] = imgproc_krnls
::tex_remap.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
cudaSafeCall( cudaBindTexture2D(0, imgproc
_krnls
::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
imgproc::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
imgproc
_krnls
::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );
cudaSafeCall( cudaUnbindTexture(imgproc
_krnls
::tex_remap) );
}
}
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
...
@@ -150,7 +150,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -150,7 +150,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(dst.cols, threads.x);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
grid.y = divUp(dst.rows, threads.y);
imgproc::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
imgproc
_krnls
::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
...
@@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace improc
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
texture<uchar4, 2> tex_meanshift;
texture<uchar4, 2> tex_meanshift;
...
@@ -254,7 +254,7 @@ namespace imgproc
...
@@ -254,7 +254,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
{
{
...
@@ -264,11 +264,11 @@ namespace cv { namespace gpu { namespace improc
...
@@ -264,11 +264,11 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
cudaSafeCall( cudaBindTexture2D( 0, imgproc
_krnls
::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
imgproc
_krnls
::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
cudaSafeCall( cudaUnbindTexture( imgproc
_krnls
::tex_meanshift ) );
}
}
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
{
{
...
@@ -278,17 +278,17 @@ namespace cv { namespace gpu { namespace improc
...
@@ -278,17 +278,17 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
cudaSafeCall( cudaBindTexture2D( 0, imgproc
_krnls
::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
imgproc::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
imgproc
_krnls
::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
cudaSafeCall( cudaUnbindTexture( imgproc
_krnls
::tex_meanshift ) );
}
}
}}}
}}}
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
template <typename T>
template <typename T>
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
...
@@ -391,7 +391,7 @@ namespace imgproc
...
@@ -391,7 +391,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
{
{
...
@@ -400,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -400,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x << 2);
grid.x = divUp(src.cols, threads.x << 2);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
imgproc
_krnls
::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -413,7 +413,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -413,7 +413,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x << 1);
grid.x = divUp(src.cols, threads.x << 1);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
imgproc
_krnls
::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -422,7 +422,7 @@ namespace cv { namespace gpu { namespace improc
...
@@ -422,7 +422,7 @@ namespace cv { namespace gpu { namespace improc
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
namespace imgproc
namespace imgproc
_krnls
{
{
__constant__ float cq[16];
__constant__ float cq[16];
...
@@ -457,7 +457,7 @@ namespace imgproc
...
@@ -457,7 +457,7 @@ namespace imgproc
}
}
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace im
g
proc
{
{
template <typename T>
template <typename T>
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
...
@@ -467,9 +467,9 @@ namespace cv { namespace gpu { namespace improc
...
@@ -467,9 +467,9 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(disp.cols, threads.x);
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
grid.y = divUp(disp.rows, threads.y);
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cq, q, 16 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc
_krnls
::cq, q, 16 * sizeof(float)) );
imgproc::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
imgproc
_krnls
::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
...
modules/gpu/src/cuda/mathfunc.cu
View file @
b08f6082
...
@@ -41,6 +41,9 @@
...
@@ -41,6 +41,9 @@
//M*/
//M*/
#include "cuda_shared.hpp"
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "transform.hpp"
#include "vecmath.hpp"
using namespace cv::gpu;
using namespace cv::gpu;
...
@@ -48,6 +51,9 @@ using namespace cv::gpu;
...
@@ -48,6 +51,9 @@ using namespace cv::gpu;
#define CV_PI 3.1415926535897932384626433832795f
#define CV_PI 3.1415926535897932384626433832795f
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
namespace mathfunc_krnls
namespace mathfunc_krnls
{
{
struct Nothing
struct Nothing
...
@@ -143,8 +149,8 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -143,8 +149,8 @@ namespace cv { namespace gpu { namespace mathfunc
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
mathfunc_krnls::cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
mathfunc_krnls::cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.ptr, x.
step / sizeof(float), y.ptr, y.step / sizeof(float)
,
x.ptr, x.
elem_step, y.ptr, y.elem_step
,
mag.ptr, mag.
step / sizeof(float), angle.ptr, angle.step / sizeof(float)
, scale, x.cols, x.rows);
mag.ptr, mag.
elem_step, angle.ptr, angle.elem_step
, scale, x.cols, x.rows);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -191,8 +197,8 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -191,8 +197,8 @@ namespace cv { namespace gpu { namespace mathfunc
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.
step / sizeof(float)
,
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.
elem_step
,
angle.ptr, angle.
step / sizeof(float), scale, x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float)
, mag.cols, mag.rows);
angle.ptr, angle.
elem_step, scale, x.ptr, x.elem_step, y.ptr, y.elem_step
, mag.cols, mag.rows);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -210,3 +216,37 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -210,3 +216,37 @@ namespace cv { namespace gpu { namespace mathfunc
callers[mag.ptr == 0](mag, angle, x, y, angleInDegrees, stream);
callers[mag.ptr == 0](mag, angle, x, y, angleInDegrees, stream);
}
}
}}}
}}}
//////////////////////////////////////////////////////////////////////////////////////
// Compare
namespace mathfunc_krnls
{
template <typename T1, typename T2>
struct NotEqual
{
__device__ uchar operator()(const T1& src1, const T2& src2, int, int)
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
};
}
namespace cv { namespace gpu { namespace mathfunc
{
template <typename T1, typename T2>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
mathfunc_krnls::NotEqual<T1, T2> op;
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
}
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<uint, uint>(src1, src2, dst);
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<float, float>(src1, src2, dst);
}
}}}
modules/gpu/src/cuda/matrix_operations.cu
View file @
b08f6082
...
@@ -47,120 +47,17 @@
...
@@ -47,120 +47,17 @@
#include "saturate_cast.hpp"
#include "saturate_cast.hpp"
using namespace cv::gpu;
using namespace cv::gpu;
using namespace cv::gpu::matrix_operations;
namespace matop_krnls
namespace mat_operators
{
{
__constant__ double scalar_d[4];
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
template <typename T>
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
class shift_and_sizeof;
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <>
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
class shift_and_sizeof<char>
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
{
public:
enum { shift = 0 };
};
template <>
class shift_and_sizeof<unsigned char>
{
public:
enum { shift = 0 };
};
template <>
class shift_and_sizeof<short>
{
public:
enum { shift = 1 };
};
template <>
class shift_and_sizeof<unsigned short>
{
public:
enum { shift = 1 };
};
template <>
class shift_and_sizeof<int>
{
public:
enum { shift = 2 };
};
template <>
class shift_and_sizeof<float>
{
public:
enum { shift = 2 };
};
template <>
class shift_and_sizeof<double>
{
public:
enum { shift = 3 };
};
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template<typename T>
__global__ void kernel_copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
mat_dst[idx] = mat_src[idx];
}
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template<typename T>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = scalar_d[ x % channels ];
}
}
template<typename T>
__global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = scalar_d[ x % channels ];
}
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
struct ReadWriteTraits
struct ReadWriteTraits
...
@@ -218,74 +115,31 @@ namespace mat_operators
...
@@ -218,74 +115,31 @@ namespace mat_operators
typedef int2 read_type;
typedef int2 read_type;
typedef short2 write_type;
typedef short2 write_type;
};
};
}
template <typename T, typename DT>
///////////////////////////////////////////////////////////////////////////
__global__ static void kernel_convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
////////////////////////////////// CopyTo /////////////////////////////////
{
///////////////////////////////////////////////////////////////////////////
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
namespace matop_krnls
const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
{
template<typename T>
if (y < height)
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
{
const T* src = (const T*)(srcmat + src_step * y);
DT* dst = (DT*)(dstmat + dst_step * y);
if ((x * shift) + shift - 1 < width)
{
read_type srcn_el = ((read_type*)src)[x];
write_type dstn_el;
const T* src1_el = (const T*) &srcn_el;
DT* dst1_el = (DT*) &dstn_el;
for (int i = 0; i < shift; ++i)
dst1_el[i] = saturate_cast<DT>(alpha * src1_el[i] + beta);
((write_type*)dst)[x] = dstn_el;
}
else
{
for (int i = 0; i < shift - 1; ++i)
if ((x * shift) + i < width)
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
}
}
}
///////////////////////////////////////////////////////////////////////////
/////////////////////////////// compare_ne ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T>
__global__ void kernel_compare_ne(uchar* src1, size_t src1_step, uchar* src2, size_t src2_step, uchar* dst, size_t dst_step, int cols, int rows)
{
{
const size_t x = threadIdx.x + blockIdx.x * blockDim
.x;
size_t x = blockIdx.x * blockDim.x + threadIdx
.x;
const size_t y = threadIdx.y + blockIdx.y * blockDim
.y;
size_t y = blockIdx.y * blockDim.y + threadIdx
.y;
if (x < cols && y < rows)
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
{
T src1_pix = ((T*)(src1 + y * src1_step))[x];
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
T src2_pix = ((T*)(src2 + y * src2_step))[x];
mat_dst[idx] = mat_src[idx];
uchar res = (uchar)(src1_pix != src2_pix) * 255;
((dst + y * dst_step))[x] = res;
}
}
}
}
}
// namespace mat_operators
}
namespace cv
namespace cv
{ namespace gpu { namespace matrix_operations
{
{
namespace gpu
{
namespace matrix_operations
{
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
template<typename T>
template<typename T>
...
@@ -295,18 +149,18 @@ namespace cv
...
@@ -295,18 +149,18 @@ namespace cv
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
if (stream == 0)
if (stream == 0)
{
{
::mat_operators::kernel_
copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
::matop_krnls::
copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
cudaSafeCall ( cudaThreadSynchronize() );
}
}
else
else
{
{
::mat_operators::kernel_
copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
::matop_krnls::
copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
}
}
}
}
extern "C"
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
{
{
static CopyToFunc tab[8] =
static CopyToFunc tab[8] =
{
{
...
@@ -326,12 +180,46 @@ namespace cv
...
@@ -326,12 +180,46 @@ namespace cv
func(mat_src, mat_dst, mask, channels, stream);
func(mat_src, mat_dst, mask, channels, stream);
}
}
}}}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
namespace matop_krnls
////////////////////////////////// SetTo //////////////////////////////////
{
///////////////////////////////////////////////////////////////////////////
__constant__ double scalar_d[4];
template<typename T>
__global__ void set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = scalar_d[ x % channels ];
}
}
template<typename T>
__global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = scalar_d[ x % channels ];
}
}
}
namespace cv { namespace gpu { namespace matrix_operations
{
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
...
@@ -343,12 +231,12 @@ namespace cv
...
@@ -343,12 +231,12 @@ namespace cv
if (stream == 0)
if (stream == 0)
{
{
::mat_operators::kernel_
set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
::matop_krnls::
set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
cudaSafeCall ( cudaThreadSynchronize() );
cudaSafeCall ( cudaThreadSynchronize() );
}
}
else
else
{
{
::mat_operators::kernel_
set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
::matop_krnls::
set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
}
}
}
}
...
@@ -361,18 +249,18 @@ namespace cv
...
@@ -361,18 +249,18 @@ namespace cv
if (stream == 0)
if (stream == 0)
{
{
mat_operators::kernel_
set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
matop_krnls::
set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
cudaSafeCall ( cudaThreadSynchronize() );
}
}
else
else
{
{
mat_operators::kernel_
set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
matop_krnls::
set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
}
}
}
}
extern "C"
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
{
{
cudaSafeCall( cudaMemcpyToSymbol(mat_operator
s::scalar_d, scalar, sizeof(double) * 4));
cudaSafeCall( cudaMemcpyToSymbol(matop_krnl
s::scalar_d, scalar, sizeof(double) * 4));
static SetToFunc_without_mask tab[8] =
static SetToFunc_without_mask tab[8] =
{
{
...
@@ -394,10 +282,9 @@ namespace cv
...
@@ -394,10 +282,9 @@ namespace cv
func(mat, channels, stream);
func(mat, channels, stream);
}
}
void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
extern "C" void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
{
{
cudaSafeCall( cudaMemcpyToSymbol(mat_operator
s::scalar_d, scalar, sizeof(double) * 4));
cudaSafeCall( cudaMemcpyToSymbol(matop_krnl
s::scalar_d, scalar, sizeof(double) * 4));
static SetToFunc_with_mask tab[8] =
static SetToFunc_with_mask tab[8] =
{
{
...
@@ -418,34 +305,75 @@ namespace cv
...
@@ -418,34 +305,75 @@ namespace cv
func(mat, mask, channels, stream);
func(mat, mask, channels, stream);
}
}
}}}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
namespace matop_krnls
//////////////////////////////// ConvertTo ////////////////////////////////
{
///////////////////////////////////////////////////////////////////////////
template <typename T, typename DT>
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
{
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
if (y < height)
{
const T* src = (const T*)(srcmat + src_step * y);
DT* dst = (DT*)(dstmat + dst_step * y);
if ((x * shift) + shift - 1 < width)
{
read_type srcn_el = ((read_type*)src)[x];
write_type dstn_el;
const T* src1_el = (const T*) &srcn_el;
DT* dst1_el = (DT*) &dstn_el;
for (int i = 0; i < shift; ++i)
dst1_el[i] = saturate_cast<DT>(alpha * src1_el[i] + beta);
((write_type*)dst)[x] = dstn_el;
}
else
{
for (int i = 0; i < shift - 1; ++i)
if ((x * shift) + i < width)
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
}
}
}
}
namespace cv { namespace gpu { namespace matrix_operations
{
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
template<typename T, typename DT>
template<typename T, typename DT>
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
{
{
const int shift = ::mat_operator
s::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
const int shift = ::matop_krnl
s::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
dim3 block(32, 8);
dim3 block(32, 8);
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
if (stream == 0)
if (stream == 0)
{
{
mat_operators::kernel_
convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
matop_krnls::
convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
else
else
{
{
mat_operators::kernel_
convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
matop_krnls::
convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
}
}
}
}
extern "C"
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
{
{
static CvtFunc tab[8][8] =
static CvtFunc tab[8][8] =
{
{
...
@@ -478,28 +406,4 @@ namespace cv
...
@@ -478,28 +406,4 @@ namespace cv
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
}
}
}}}
///////////////////////////////////////////////////////////////////////////
/////////////////////////////// compare_ne ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
void compare_ne_8u(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
dim3 block(32, 8);
dim3 grid(divUp(src1.cols, block.x), divUp(src1.rows, block.y));
mat_operators::kernel_compare_ne<uint><<<grid, block>>>(src1.ptr, src1.step, src2.ptr, src2.step, dst.ptr, dst.step, src1.cols, src1.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
dim3 block(32, 8);
dim3 grid(divUp(src1.cols, block.x), divUp(src1.rows, block.y));
mat_operators::kernel_compare_ne<float><<<grid, block>>>(src1.ptr, src1.step, src2.ptr, src2.step, dst.ptr, dst.step, src1.cols, src1.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
} // namespace matrix_operations
} // namespace gpu
} // namespace cv
modules/gpu/src/cuda/saturate_cast.hpp
View file @
b08f6082
...
@@ -49,32 +49,29 @@ namespace cv
...
@@ -49,32 +49,29 @@ namespace cv
{
{
namespace
gpu
namespace
gpu
{
{
// To fix link error: this func already defined in other obj file
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uchar
v
)
{
return
_Tp
(
v
);
}
namespace
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
schar
v
)
{
return
_Tp
(
v
);
}
{
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
ushort
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
uchar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
short
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
schar
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uint
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
ushort
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
int
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
short
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
float
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
uint
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
double
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
int
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
float
v
)
{
return
_Tp
(
v
);
}
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
schar
v
)
template
<
typename
_Tp
>
__device__
_Tp
saturate_cast
(
double
v
)
{
return
_Tp
(
v
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
schar
v
)
{
return
(
uchar
)
max
((
int
)
v
,
0
);
}
{
return
(
uchar
)
max
((
int
)
v
,
0
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
{
return
(
uchar
)
min
((
uint
)
v
,
(
uint
)
UCHAR_MAX
);
}
{
return
(
uchar
)
min
((
uint
)
v
,
(
uint
)
UCHAR_MAX
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
int
v
)
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
int
v
)
{
return
(
uchar
)((
uint
)
v
<=
UCHAR_MAX
?
v
:
v
>
0
?
UCHAR_MAX
:
0
);
}
{
return
(
uchar
)((
uint
)
v
<=
UCHAR_MAX
?
v
:
v
>
0
?
UCHAR_MAX
:
0
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
uint
v
)
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
uint
v
)
{
return
(
uchar
)
min
(
v
,
(
uint
)
UCHAR_MAX
);
}
{
return
(
uchar
)
min
(
v
,
(
uint
)
UCHAR_MAX
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
short
v
)
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
short
v
)
{
return
saturate_cast
<
uchar
>
((
uint
)
v
);
}
{
return
saturate_cast
<
uchar
>
((
uint
)
v
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
float
v
)
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
}
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
}
template
<>
__device__
uchar
saturate_cast
<
uchar
>
(
double
v
)
template
<>
static
__device__
uchar
saturate_cast
<
uchar
>
(
double
v
)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
uchar
>
(
iv
);
...
@@ -83,23 +80,23 @@ namespace cv
...
@@ -83,23 +80,23 @@ namespace cv
#endif
#endif
}
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
uchar
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
uchar
v
)
{
return
(
schar
)
min
((
int
)
v
,
SCHAR_MAX
);
}
{
return
(
schar
)
min
((
int
)
v
,
SCHAR_MAX
);
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
ushort
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
ushort
v
)
{
return
(
schar
)
min
((
uint
)
v
,
(
uint
)
SCHAR_MAX
);
}
{
return
(
schar
)
min
((
uint
)
v
,
(
uint
)
SCHAR_MAX
);
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
int
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
int
v
)
{
{
return
(
schar
)((
uint
)(
v
-
SCHAR_MIN
)
<=
(
uint
)
UCHAR_MAX
?
return
(
schar
)((
uint
)(
v
-
SCHAR_MIN
)
<=
(
uint
)
UCHAR_MAX
?
v
:
v
>
0
?
SCHAR_MAX
:
SCHAR_MIN
);
v
:
v
>
0
?
SCHAR_MAX
:
SCHAR_MIN
);
}
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
short
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
short
v
)
{
return
saturate_cast
<
schar
>
((
int
)
v
);
}
{
return
saturate_cast
<
schar
>
((
int
)
v
);
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
uint
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
uint
v
)
{
return
(
schar
)
min
(
v
,
(
uint
)
SCHAR_MAX
);
}
{
return
(
schar
)
min
(
v
,
(
uint
)
SCHAR_MAX
);
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
float
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
}
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
}
template
<>
__device__
schar
saturate_cast
<
schar
>
(
double
v
)
template
<>
static
__device__
schar
saturate_cast
<
schar
>
(
double
v
)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
schar
>
(
iv
);
...
@@ -108,17 +105,17 @@ namespace cv
...
@@ -108,17 +105,17 @@ namespace cv
#endif
#endif
}
}
template
<>
__device__
ushort
saturate_cast
<
ushort
>
(
schar
v
)
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
schar
v
)
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
template
<>
__device__
ushort
saturate_cast
<
ushort
>
(
short
v
)
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
short
v
)
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
{
return
(
ushort
)
max
((
int
)
v
,
0
);
}
template
<>
__device__
ushort
saturate_cast
<
ushort
>
(
int
v
)
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
int
v
)
{
return
(
ushort
)((
uint
)
v
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
USHRT_MAX
:
0
);
}
{
return
(
ushort
)((
uint
)
v
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
USHRT_MAX
:
0
);
}
template
<>
__device__
ushort
saturate_cast
<
ushort
>
(
uint
v
)
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
uint
v
)
{
return
(
ushort
)
min
(
v
,
(
uint
)
USHRT_MAX
);
}
{
return
(
ushort
)
min
(
v
,
(
uint
)
USHRT_MAX
);
}
template
<>
__device__
ushort
saturate_cast
<
ushort
>
(
float
v
)
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
}
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
}
template
<>
__device__
ushort
saturate_cast
<
ushort
>
(
double
v
)
template
<>
static
__device__
ushort
saturate_cast
<
ushort
>
(
double
v
)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
ushort
>
(
iv
);
...
@@ -127,18 +124,18 @@ namespace cv
...
@@ -127,18 +124,18 @@ namespace cv
#endif
#endif
}
}
template
<>
__device__
short
saturate_cast
<
short
>
(
ushort
v
)
template
<>
static
__device__
short
saturate_cast
<
short
>
(
ushort
v
)
{
return
(
short
)
min
((
int
)
v
,
SHRT_MAX
);
}
{
return
(
short
)
min
((
int
)
v
,
SHRT_MAX
);
}
template
<>
__device__
short
saturate_cast
<
short
>
(
int
v
)
template
<>
static
__device__
short
saturate_cast
<
short
>
(
int
v
)
{
{
return
(
short
)((
uint
)(
v
-
SHRT_MIN
)
<=
(
uint
)
USHRT_MAX
?
return
(
short
)((
uint
)(
v
-
SHRT_MIN
)
<=
(
uint
)
USHRT_MAX
?
v
:
v
>
0
?
SHRT_MAX
:
SHRT_MIN
);
v
:
v
>
0
?
SHRT_MAX
:
SHRT_MIN
);
}
}
template
<>
__device__
short
saturate_cast
<
short
>
(
uint
v
)
template
<>
static
__device__
short
saturate_cast
<
short
>
(
uint
v
)
{
return
(
short
)
min
(
v
,
(
uint
)
SHRT_MAX
);
}
{
return
(
short
)
min
(
v
,
(
uint
)
SHRT_MAX
);
}
template
<>
__device__
short
saturate_cast
<
short
>
(
float
v
)
template
<>
static
__device__
short
saturate_cast
<
short
>
(
float
v
)
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
}
{
int
iv
=
__float2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
}
template
<>
__device__
short
saturate_cast
<
short
>
(
double
v
)
template
<>
static
__device__
short
saturate_cast
<
short
>
(
double
v
)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
int
iv
=
__double2int_rn
(
v
);
return
saturate_cast
<
short
>
(
iv
);
...
@@ -147,8 +144,8 @@ namespace cv
...
@@ -147,8 +144,8 @@ namespace cv
#endif
#endif
}
}
template
<>
__device__
int
saturate_cast
<
int
>
(
float
v
)
{
return
__float2int_rn
(
v
);
}
template
<>
static
__device__
int
saturate_cast
<
int
>
(
float
v
)
{
return
__float2int_rn
(
v
);
}
template
<>
__device__
int
saturate_cast
<
int
>
(
double
v
)
template
<>
static
__device__
int
saturate_cast
<
int
>
(
double
v
)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return
__double2int_rn
(
v
);
return
__double2int_rn
(
v
);
...
@@ -157,8 +154,8 @@ namespace cv
...
@@ -157,8 +154,8 @@ namespace cv
#endif
#endif
}
}
template
<>
__device__
uint
saturate_cast
<
uint
>
(
float
v
){
return
__float2uint_rn
(
v
);
}
template
<>
static
__device__
uint
saturate_cast
<
uint
>
(
float
v
){
return
__float2uint_rn
(
v
);
}
template
<>
__device__
uint
saturate_cast
<
uint
>
(
double
v
)
template
<>
static
__device__
uint
saturate_cast
<
uint
>
(
double
v
)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
return
__double2uint_rn
(
v
);
return
__double2uint_rn
(
v
);
...
@@ -166,7 +163,92 @@ namespace cv
...
@@ -166,7 +163,92 @@ namespace cv
return
saturate_cast
<
uint
>
((
float
)
v
);
return
saturate_cast
<
uint
>
((
float
)
v
);
#endif
#endif
}
}
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uchar4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
char4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
ushort4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
short4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uint4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
int4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
float4
v
)
{
return
_Tp
(
v
);
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
char4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
ushort4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
short4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
uint4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
int4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
float4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
uchar4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
ushort4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
short4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
uint4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
int4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
float4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
uchar4
v
)
{
return
make_ushort4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
char4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
short4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
uint4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
int4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
float4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
uchar4
v
)
{
return
make_short4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
char4
v
)
{
return
make_short4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
ushort4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
uint4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
int4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
float4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
uchar4
v
)
{
return
make_uint4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
char4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
ushort4
v
)
{
return
make_uint4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
short4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
int4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
float4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
uchar4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
char4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
ushort4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
short4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
uint4
v
)
{
return
make_int4
(
saturate_cast
<
int
>
(
v
.
x
),
saturate_cast
<
int
>
(
v
.
y
),
saturate_cast
<
int
>
(
v
.
z
),
saturate_cast
<
int
>
(
v
.
w
));
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
float4
v
)
{
return
make_int4
(
saturate_cast
<
int
>
(
v
.
x
),
saturate_cast
<
int
>
(
v
.
y
),
saturate_cast
<
int
>
(
v
.
z
),
saturate_cast
<
int
>
(
v
.
w
));
}
}
}
}
}
...
...
modules/gpu/src/cuda/transform.hpp
0 → 100644
View file @
b08f6082
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "vecmath.hpp"
namespace
cv
{
namespace
gpu
{
namespace
algo_krnls
{
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
__global__
void
transform
(
const
T
*
src
,
size_t
src_step
,
D
*
dst
,
size_t
dst_step
,
int
width
,
int
height
,
UnOp
op
)
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
width
&&
y
<
height
)
{
T
src_data
=
src
[
y
*
src_step
+
x
];
dst
[
y
*
dst_step
+
x
]
=
op
(
src_data
,
x
,
y
);
}
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
static
__global__
void
transform
(
const
T1
*
src1
,
size_t
src1_step
,
const
T2
*
src2
,
size_t
src2_step
,
D
*
dst
,
size_t
dst_step
,
int
width
,
int
height
,
BinOp
op
)
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
width
&&
y
<
height
)
{
T1
src1_data
=
src1
[
y
*
src1_step
+
x
];
T2
src2_data
=
src2
[
y
*
src2_step
+
x
];
dst
[
y
*
dst_step
+
x
]
=
op
(
src1_data
,
src2_data
,
x
,
y
);
}
}
}}}
namespace
cv
{
namespace
gpu
{
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
cudaStream_t
stream
)
{
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
algo_krnls
::
transform
<<<
grid
,
threads
,
0
,
stream
>>>
(
src
.
ptr
,
src
.
elem_step
,
dst
.
ptr
,
dst
.
elem_step
,
src
.
cols
,
src
.
rows
,
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
static
void
transform
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
BinOp
op
,
cudaStream_t
stream
)
{
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
algo_krnls
::
transform
<<<
grid
,
threads
,
0
,
stream
>>>
(
src1
.
ptr
,
src1
.
elem_step
,
src2
.
ptr
,
src2
.
elem_step
,
dst
.
ptr
,
dst
.
elem_step
,
src1
.
cols
,
src1
.
rows
,
op
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__
modules/gpu/src/cuda/vecmath.hpp
0 → 100644
View file @
b08f6082
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VECMATH_HPP__
#define __OPENCV_GPU_VECMATH_HPP__
#include "cuda_shared.hpp"
namespace
cv
{
namespace
gpu
{
template
<
typename
T
,
int
N
>
struct
TypeVec
;
template
<
typename
T
>
struct
TypeVec
<
T
,
1
>
{
typedef
T
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
char
,
2
>
{
typedef
uchar2
vec_t
;
};
template
<>
struct
TypeVec
<
uchar2
,
2
>
{
typedef
uchar2
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
char
,
3
>
{
typedef
uchar3
vec_t
;
};;
template
<>
struct
TypeVec
<
uchar3
,
3
>
{
typedef
uchar3
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
char
,
4
>
{
typedef
uchar4
vec_t
;
};;
template
<>
struct
TypeVec
<
uchar4
,
4
>
{
typedef
uchar4
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
char2
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
char3
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
char4
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
short
,
2
>
{
typedef
ushort2
vec_t
;
};
template
<>
struct
TypeVec
<
ushort2
,
2
>
{
typedef
ushort2
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
short
,
3
>
{
typedef
ushort3
vec_t
;
};
template
<>
struct
TypeVec
<
ushort3
,
3
>
{
typedef
ushort3
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
short
,
4
>
{
typedef
ushort4
vec_t
;
};
template
<>
struct
TypeVec
<
ushort4
,
4
>
{
typedef
ushort4
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
2
>
{
typedef
short2
vec_t
;
};
template
<>
struct
TypeVec
<
short2
,
2
>
{
typedef
short2
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
3
>
{
typedef
short3
vec_t
;
};
template
<>
struct
TypeVec
<
short3
,
3
>
{
typedef
short3
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
4
>
{
typedef
short4
vec_t
;
};
template
<>
struct
TypeVec
<
short4
,
4
>
{
typedef
short4
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
int
,
2
>
{
typedef
uint2
vec_t
;
};
template
<>
struct
TypeVec
<
uint2
,
2
>
{
typedef
uint2
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
int
,
3
>
{
typedef
uint3
vec_t
;
};
template
<>
struct
TypeVec
<
uint3
,
3
>
{
typedef
uint3
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
int
,
4
>
{
typedef
uint4
vec_t
;
};
template
<>
struct
TypeVec
<
uint4
,
4
>
{
typedef
uint4
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
2
>
{
typedef
int2
vec_t
;
};
template
<>
struct
TypeVec
<
int2
,
2
>
{
typedef
int2
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
3
>
{
typedef
int3
vec_t
;
};
template
<>
struct
TypeVec
<
int3
,
3
>
{
typedef
int3
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
4
>
{
typedef
int4
vec_t
;
};
template
<>
struct
TypeVec
<
int4
,
4
>
{
typedef
int4
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
2
>
{
typedef
float2
vec_t
;
};
template
<>
struct
TypeVec
<
float2
,
2
>
{
typedef
float2
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
3
>
{
typedef
float3
vec_t
;
};
template
<>
struct
TypeVec
<
float3
,
3
>
{
typedef
float3
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
4
>
{
typedef
float4
vec_t
;
};
template
<>
struct
TypeVec
<
float4
,
4
>
{
typedef
float4
vec_t
;
};
static
__device__
uchar4
operator
+
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
uchar4
operator
-
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
uchar4
operator
*
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
uchar4
operator
/
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
template
<
typename
T
>
static
__device__
uchar4
operator
*
(
const
uchar4
&
a
,
T
s
)
{
return
make_uchar4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
template
<
typename
T
>
static
__device__
uchar4
operator
*
(
T
s
,
const
uchar4
&
a
)
{
return
a
*
s
;
}
}
}
#endif // __OPENCV_GPU_VECMATH_HPP__
\ No newline at end of file
modules/gpu/src/cudastream.cpp
View file @
b08f6082
...
@@ -69,6 +69,22 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int
...
@@ -69,6 +69,22 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int
#include "opencv2/gpu/stream_accessor.hpp"
#include "opencv2/gpu/stream_accessor.hpp"
namespace
cv
{
namespace
gpu
{
namespace
matrix_operations
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_without_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_with_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
int
channels
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
}
}
}
struct
Stream
::
Impl
struct
Stream
::
Impl
{
{
cudaStream_t
stream
;
cudaStream_t
stream
;
...
...
modules/gpu/src/filtering
_npp
.cpp
→
modules/gpu/src/filtering.cpp
View file @
b08f6082
...
@@ -49,18 +49,18 @@ using namespace cv::gpu;
...
@@ -49,18 +49,18 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
#if !defined (HAVE_CUDA)
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createFilter2D_GPU
(
const
Ptr
<
BaseFilter_GPU
>
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createFilter2D_GPU
(
const
Ptr
<
BaseFilter_GPU
>
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableFilter_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
,
const
Ptr
<
BaseColumnFilter_GPU
>&
,
bool
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableFilter_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
,
const
Ptr
<
BaseColumnFilter_GPU
>&
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
BaseRowFilter_GPU
>
cv
::
gpu
::
getRowSumFilter_GPU
(
int
,
int
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
}
Ptr
<
BaseRowFilter_GPU
>
cv
::
gpu
::
getRowSumFilter_GPU
(
int
,
int
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
}
Ptr
<
BaseColumnFilter_GPU
>
cv
::
gpu
::
getColumnSumFilter_GPU
(
int
,
int
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
}
Ptr
<
BaseColumnFilter_GPU
>
cv
::
gpu
::
getColumnSumFilter_GPU
(
int
,
int
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getBoxFilter_GPU
(
int
,
int
,
const
Size
&
,
Point
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getBoxFilter_GPU
(
int
,
int
,
const
Size
&
,
Point
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createBoxFilter_GPU
(
int
,
int
,
const
Size
&
,
const
Point
&
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createBoxFilter_GPU
(
int
,
int
,
const
Size
&
,
const
Point
&
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getMorphologyFilter_GPU
(
int
,
int
,
const
Gpu
Mat
&
,
const
Size
&
,
Point
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getMorphologyFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Size
&
,
Point
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createMorphologyFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Point
&
,
int
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createMorphologyFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Point
&
,
int
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getLinearFilter_GPU
(
int
,
int
,
const
GpuMat
&
,
const
Size
&
,
Point
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getLinearFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Size
&
,
Po
int
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createLinearFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Point
&
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createLinearFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Point
&
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
BaseRowFilter_GPU
>
cv
::
gpu
::
getLinearRowFilter_GPU
(
int
,
int
,
const
GpuMat
&
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
}
Ptr
<
BaseRowFilter_GPU
>
cv
::
gpu
::
getLinearRowFilter_GPU
(
int
,
int
,
const
Mat
&
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
}
Ptr
<
BaseColumnFilter_GPU
>
cv
::
gpu
::
getLinearColumnFilter_GPU
(
int
,
int
,
const
GpuMat
&
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
}
Ptr
<
BaseColumnFilter_GPU
>
cv
::
gpu
::
getLinearColumnFilter_GPU
(
int
,
int
,
const
Mat
&
,
int
)
{
throw_nogpu
();
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableLinearFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Mat
&
,
const
Point
&
,
bool
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableLinearFilter_GPU
(
int
,
int
,
const
Mat
&
,
const
Mat
&
,
const
Point
&
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createDerivFilter_GPU
(
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createDerivFilter_GPU
(
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createGaussianFilter_GPU
(
int
,
Size
,
double
,
double
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createGaussianFilter_GPU
(
int
,
Size
,
double
,
double
)
{
throw_nogpu
();
return
Ptr
<
FilterEngine_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getMaxFilter_GPU
(
int
,
int
,
const
Size
&
,
Point
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getMaxFilter_GPU
(
int
,
int
,
const
Size
&
,
Point
)
{
throw_nogpu
();
return
Ptr
<
BaseFilter_GPU
>
(
0
);
}
...
@@ -71,7 +71,7 @@ void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nog
...
@@ -71,7 +71,7 @@ void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nog
void
cv
::
gpu
::
dilate
(
const
GpuMat
&
,
GpuMat
&
,
const
Mat
&
,
Point
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
dilate
(
const
GpuMat
&
,
GpuMat
&
,
const
Mat
&
,
Point
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
morphologyEx
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Mat
&
,
Point
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
morphologyEx
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Mat
&
,
Point
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
filter2D
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Mat
&
,
Point
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
filter2D
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Mat
&
,
Point
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
sepFilter2D
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Mat
&
,
const
Mat
&
,
Point
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
sepFilter2D
(
const
GpuMat
&
,
GpuMat
&
,
int
,
const
Mat
&
,
const
Mat
&
,
Point
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Sobel
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
int
,
int
,
double
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Sobel
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
int
,
int
,
double
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Scharr
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
int
,
double
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Scharr
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
int
,
double
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
GaussianBlur
(
const
GpuMat
&
,
GpuMat
&
,
Size
,
double
,
double
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
GaussianBlur
(
const
GpuMat
&
,
GpuMat
&
,
Size
,
double
,
double
)
{
throw_nogpu
();
}
...
@@ -164,28 +164,10 @@ Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filt
...
@@ -164,28 +164,10 @@ Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filt
namespace
namespace
{
{
struct
RowColumnFilterApply
class
SeparableFilterEngine_GPU
:
public
FilterEngine_GPU
{
void
operator
()(
Ptr
<
BaseRowFilter_GPU
>&
rowFilter
,
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter
,
GpuMat
&
srcROI
,
GpuMat
&
dstROI
,
GpuMat
&
dstBufROI
)
{
(
*
rowFilter
)(
srcROI
,
dstBufROI
);
(
*
columnFilter
)(
dstBufROI
,
dstROI
);
}
};
struct
ColumnRowFilterApply
{
void
operator
()(
Ptr
<
BaseRowFilter_GPU
>&
rowFilter
,
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter
,
GpuMat
&
srcROI
,
GpuMat
&
dstROI
,
GpuMat
&
dstBufROI
)
{
(
*
columnFilter
)(
srcROI
,
dstBufROI
);
(
*
rowFilter
)(
dstBufROI
,
dstROI
);
}
};
class
SeparableFilterEngine_GPU_base
:
public
FilterEngine_GPU
{
{
public
:
public
:
SeparableFilterEngine_GPU
_base
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter_
,
SeparableFilterEngine_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter_
,
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter_
)
:
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter_
)
:
rowFilter
(
rowFilter_
),
columnFilter
(
columnFilter_
)
rowFilter
(
rowFilter_
),
columnFilter
(
columnFilter_
)
{
{
...
@@ -208,6 +190,9 @@ namespace
...
@@ -208,6 +190,9 @@ namespace
srcROI
=
src
(
roi
);
srcROI
=
src
(
roi
);
dstROI
=
dst
(
roi
);
dstROI
=
dst
(
roi
);
dstBufROI
=
dstBuf
(
roi
);
dstBufROI
=
dstBuf
(
roi
);
(
*
rowFilter
)(
srcROI
,
dstBufROI
);
(
*
columnFilter
)(
dstBufROI
,
dstROI
);
}
}
Ptr
<
BaseRowFilter_GPU
>
rowFilter
;
Ptr
<
BaseRowFilter_GPU
>
rowFilter
;
...
@@ -219,32 +204,12 @@ namespace
...
@@ -219,32 +204,12 @@ namespace
GpuMat
dstROI
;
GpuMat
dstROI
;
GpuMat
dstBufROI
;
GpuMat
dstBufROI
;
};
};
template
<
typename
FA
>
class
SeparableFilterEngine_GPU
:
public
SeparableFilterEngine_GPU_base
{
public
:
SeparableFilterEngine_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter_
,
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter_
,
FA
fa_
)
:
SeparableFilterEngine_GPU_base
(
rowFilter_
,
columnFilter_
),
fa
(
fa_
)
{
}
virtual
void
apply
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Rect
roi
=
Rect
(
0
,
0
,
-
1
,
-
1
))
{
SeparableFilterEngine_GPU_base
::
apply
(
src
,
dst
,
roi
);
fa
(
rowFilter
,
columnFilter
,
srcROI
,
dstROI
,
dstBufROI
);
}
FA
fa
;
};
}
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableFilter_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter
,
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableFilter_GPU
(
const
Ptr
<
BaseRowFilter_GPU
>&
rowFilter
,
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter
,
bool
rowFilterFirst
)
const
Ptr
<
BaseColumnFilter_GPU
>&
columnFilter
)
{
{
if
(
rowFilterFirst
)
return
Ptr
<
FilterEngine_GPU
>
(
new
SeparableFilterEngine_GPU
(
rowFilter
,
columnFilter
));
return
Ptr
<
FilterEngine_GPU
>
(
new
SeparableFilterEngine_GPU
<
RowColumnFilterApply
>
(
rowFilter
,
columnFilter
,
RowColumnFilterApply
()));
return
Ptr
<
FilterEngine_GPU
>
(
new
SeparableFilterEngine_GPU
<
ColumnRowFilterApply
>
(
rowFilter
,
columnFilter
,
ColumnRowFilterApply
()));
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
...
@@ -398,7 +363,7 @@ namespace
...
@@ -398,7 +363,7 @@ namespace
};
};
}
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getMorphologyFilter_GPU
(
int
op
,
int
type
,
const
Gpu
Mat
&
kernel
,
const
Size
&
ksize
,
Point
anchor
)
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getMorphologyFilter_GPU
(
int
op
,
int
type
,
const
Mat
&
kernel
,
const
Size
&
ksize
,
Point
anchor
)
{
{
static
const
nppMorfFilter_t
nppMorfFilter_callers
[
2
][
5
]
=
static
const
nppMorfFilter_t
nppMorfFilter_callers
[
2
][
5
]
=
{
{
...
@@ -408,11 +373,12 @@ Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Gpu
...
@@ -408,11 +373,12 @@ Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Gpu
CV_Assert
(
op
==
MORPH_ERODE
||
op
==
MORPH_DILATE
);
CV_Assert
(
op
==
MORPH_ERODE
||
op
==
MORPH_DILATE
);
CV_Assert
(
type
==
CV_8UC1
||
type
==
CV_8UC4
);
CV_Assert
(
type
==
CV_8UC1
||
type
==
CV_8UC4
);
CV_Assert
(
kernel
.
type
()
==
CV_8UC1
&&
kernel
.
rows
==
1
&&
kernel
.
cols
==
ksize
.
area
());
GpuMat
gpu_krnl
;
normalizeKernel
(
kernel
,
gpu_krnl
);
normalizeAnchor
(
anchor
,
ksize
);
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseFilter_GPU
>
(
new
NPPMorphFilter
(
ksize
,
anchor
,
kerne
l
,
nppMorfFilter_callers
[
op
][
CV_MAT_CN
(
type
)]));
return
Ptr
<
BaseFilter_GPU
>
(
new
NPPMorphFilter
(
ksize
,
anchor
,
gpu_krn
l
,
nppMorfFilter_callers
[
op
][
CV_MAT_CN
(
type
)]));
}
}
namespace
namespace
...
@@ -447,10 +413,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, cons
...
@@ -447,10 +413,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, cons
Size
ksize
=
kernel
.
size
();
Size
ksize
=
kernel
.
size
();
GpuMat
gpu_krnl
;
Ptr
<
BaseFilter_GPU
>
filter2D
=
getMorphologyFilter_GPU
(
op
,
type
,
kernel
,
ksize
,
anchor
);
normalizeKernel
(
kernel
,
gpu_krnl
);
Ptr
<
BaseFilter_GPU
>
filter2D
=
getMorphologyFilter_GPU
(
op
,
type
,
gpu_krnl
,
ksize
,
anchor
);
return
Ptr
<
FilterEngine_GPU
>
(
new
MorphologyFilterEngine_GPU
(
filter2D
,
iterations
));
return
Ptr
<
FilterEngine_GPU
>
(
new
MorphologyFilterEngine_GPU
(
filter2D
,
iterations
));
}
}
...
@@ -575,27 +538,25 @@ namespace
...
@@ -575,27 +538,25 @@ namespace
};
};
}
}
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
GpuMat
&
kernel
,
const
Size
&
ksize
,
Point
anchor
,
int
nDivis
or
)
Ptr
<
BaseFilter_GPU
>
cv
::
gpu
::
getLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
kernel
,
const
Size
&
ksize
,
Point
anch
or
)
{
{
static
const
nppFilter2D_t
cppFilter2D_callers
[]
=
{
0
,
nppiFilter_8u_C1R
,
0
,
0
,
nppiFilter_8u_C4R
};
static
const
nppFilter2D_t
cppFilter2D_callers
[]
=
{
0
,
nppiFilter_8u_C1R
,
0
,
0
,
nppiFilter_8u_C4R
};
CV_Assert
((
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
&&
dstType
==
srcType
);
CV_Assert
((
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
&&
dstType
==
srcType
);
CV_Assert
(
kernel
.
type
()
==
CV_32SC1
&&
kernel
.
rows
==
1
&&
kernel
.
cols
==
ksize
.
area
());
GpuMat
gpu_krnl
;
int
nDivisor
;
normalizeKernel
(
kernel
,
gpu_krnl
,
CV_32S
,
&
nDivisor
,
true
);
normalizeAnchor
(
anchor
,
ksize
);
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseFilter_GPU
>
(
new
NPPLinearFilter
(
ksize
,
anchor
,
kerne
l
,
nDivisor
,
cppFilter2D_callers
[
CV_MAT_CN
(
srcType
)]));
return
Ptr
<
BaseFilter_GPU
>
(
new
NPPLinearFilter
(
ksize
,
anchor
,
gpu_krn
l
,
nDivisor
,
cppFilter2D_callers
[
CV_MAT_CN
(
srcType
)]));
}
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
kernel
,
const
Point
&
anchor
)
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
kernel
,
const
Point
&
anchor
)
{
{
Size
ksize
=
kernel
.
size
();
Size
ksize
=
kernel
.
size
();
GpuMat
gpu_krnl
;
Ptr
<
BaseFilter_GPU
>
linearFilter
=
getLinearFilter_GPU
(
srcType
,
dstType
,
kernel
,
ksize
,
anchor
);
int
nDivisor
;
normalizeKernel
(
kernel
,
gpu_krnl
,
CV_32S
,
&
nDivisor
,
true
);
Ptr
<
BaseFilter_GPU
>
linearFilter
=
getLinearFilter_GPU
(
srcType
,
dstType
,
gpu_krnl
,
ksize
,
anchor
,
nDivisor
);
return
createFilter2D_GPU
(
linearFilter
);
return
createFilter2D_GPU
(
linearFilter
);
}
}
...
@@ -614,11 +575,26 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
...
@@ -614,11 +575,26 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
// Separable Linear Filter
// Separable Linear Filter
namespace
cv
{
namespace
gpu
{
namespace
filters
{
void
linearRowFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
}}}
namespace
namespace
{
{
typedef
NppStatus
(
*
nppFilter1D_t
)(
const
Npp8u
*
pSrc
,
Npp32s
nSrcStep
,
Npp8u
*
pDst
,
Npp32s
nDstStep
,
NppiSize
oROI
,
typedef
NppStatus
(
*
nppFilter1D_t
)(
const
Npp8u
*
pSrc
,
Npp32s
nSrcStep
,
Npp8u
*
pDst
,
Npp32s
nDstStep
,
NppiSize
oROI
,
const
Npp32s
*
pKernel
,
Npp32s
nMaskSize
,
Npp32s
nAnchor
,
Npp32s
nDivisor
);
const
Npp32s
*
pKernel
,
Npp32s
nMaskSize
,
Npp32s
nAnchor
,
Npp32s
nDivisor
);
typedef
void
(
*
gpuFilter1D_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
class
NppLinearRowFilter
:
public
BaseRowFilter_GPU
class
NppLinearRowFilter
:
public
BaseRowFilter_GPU
{
{
public
:
public
:
...
@@ -638,20 +614,64 @@ namespace
...
@@ -638,20 +614,64 @@ namespace
Npp32s
nDivisor
;
Npp32s
nDivisor
;
nppFilter1D_t
func
;
nppFilter1D_t
func
;
};
};
class
GpuLinearRowFilter
:
public
BaseRowFilter_GPU
{
public
:
GpuLinearRowFilter
(
int
ksize_
,
int
anchor_
,
const
Mat
&
kernel_
,
gpuFilter1D_t
func_
)
:
BaseRowFilter_GPU
(
ksize_
,
anchor_
),
kernel
(
kernel_
),
func
(
func_
)
{}
virtual
void
operator
()(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
func
(
src
,
dst
,
kernel
.
ptr
<
float
>
(),
ksize
,
anchor
);
}
Mat
kernel
;
gpuFilter1D_t
func
;
};
}
}
Ptr
<
BaseRowFilter_GPU
>
cv
::
gpu
::
getLinearRowFilter_GPU
(
int
srcType
,
int
bufType
,
const
GpuMat
&
rowKernel
,
int
anchor
,
int
nDivis
or
)
Ptr
<
BaseRowFilter_GPU
>
cv
::
gpu
::
getLinearRowFilter_GPU
(
int
srcType
,
int
bufType
,
const
Mat
&
rowKernel
,
int
anch
or
)
{
{
using
namespace
cv
::
gpu
::
filters
;
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterRow_8u_C1R
,
0
,
0
,
nppiFilterRow_8u_C4R
};
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterRow_8u_C1R
,
0
,
0
,
nppiFilterRow_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32s32s
,
linearRowFilter_gpu_32s32f
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32f32s
,
linearRowFilter_gpu_32f32f
}
};
CV_Assert
((
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
&&
bufType
==
srcType
);
if
((
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
&&
bufType
==
srcType
)
CV_Assert
(
rowKernel
.
type
()
==
CV_32SC1
&&
rowKernel
.
rows
==
1
);
{
GpuMat
gpu_row_krnl
;
int
nDivisor
;
normalizeKernel
(
rowKernel
,
gpu_row_krnl
,
CV_32S
,
&
nDivisor
,
true
);
int
ksize
=
rowKernel
.
cols
;
int
ksize
=
gpu_row_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseRowFilter_GPU
>
(
new
NppLinearRowFilter
(
ksize
,
anchor
,
gpu_row_krnl
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
srcType
)]));
}
else
if
((
srcType
==
CV_32SC1
||
srcType
==
CV_32FC1
)
&&
(
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
))
{
Mat
temp
(
rowKernel
.
size
(),
CV_32FC1
);
rowKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseRowFilter_GPU
>
(
new
NppLinearRowFilter
(
ksize
,
anchor
,
rowKernel
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
srcType
)]));
return
Ptr
<
BaseRowFilter_GPU
>
(
new
GpuLinearRowFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
srcType
)][
CV_MAT_DEPTH
(
bufType
)]));
}
CV_Assert
(
!
"Unsupported types"
);
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
}
}
namespace
namespace
...
@@ -675,49 +695,88 @@ namespace
...
@@ -675,49 +695,88 @@ namespace
Npp32s
nDivisor
;
Npp32s
nDivisor
;
nppFilter1D_t
func
;
nppFilter1D_t
func
;
};
};
class
GpuLinearColumnFilter
:
public
BaseColumnFilter_GPU
{
public
:
GpuLinearColumnFilter
(
int
ksize_
,
int
anchor_
,
const
Mat
&
kernel_
,
gpuFilter1D_t
func_
)
:
BaseColumnFilter_GPU
(
ksize_
,
anchor_
),
kernel
(
kernel_
),
func
(
func_
)
{}
virtual
void
operator
()(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
func
(
src
,
dst
,
kernel
.
ptr
<
float
>
(),
ksize
,
anchor
);
}
Mat
kernel
;
gpuFilter1D_t
func
;
};
}
}
Ptr
<
BaseColumnFilter_GPU
>
cv
::
gpu
::
getLinearColumnFilter_GPU
(
int
bufType
,
int
dstType
,
const
GpuMat
&
columnKernel
,
int
anchor
,
int
nDivis
or
)
Ptr
<
BaseColumnFilter_GPU
>
cv
::
gpu
::
getLinearColumnFilter_GPU
(
int
bufType
,
int
dstType
,
const
Mat
&
columnKernel
,
int
anch
or
)
{
{
using
namespace
cv
::
gpu
::
filters
;
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterColumn_8u_C1R
,
0
,
0
,
nppiFilterColumn_8u_C4R
};
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterColumn_8u_C1R
,
0
,
0
,
nppiFilterColumn_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32s32s
,
linearColumnFilter_gpu_32s32f
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32f32s
,
linearColumnFilter_gpu_32f32f
}
};
CV_Assert
((
bufType
==
CV_8UC1
||
bufType
==
CV_8UC4
)
&&
dstType
==
bufType
);
if
((
bufType
==
CV_8UC1
||
bufType
==
CV_8UC4
)
&&
dstType
==
bufType
)
CV_Assert
(
columnKernel
.
type
()
==
CV_32SC1
&&
columnKernel
.
rows
==
1
);
{
GpuMat
gpu_col_krnl
;
int
nDivisor
;
normalizeKernel
(
columnKernel
,
gpu_col_krnl
,
CV_32S
,
&
nDivisor
,
true
);
int
ksize
=
columnKernel
.
cols
;
int
ksize
=
gpu_col_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
NppLinearColumnFilter
(
ksize
,
anchor
,
gpu_col_krnl
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
bufType
)]));
}
else
if
((
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
)
&&
(
dstType
==
CV_32SC1
||
dstType
==
CV_32FC1
))
{
Mat
temp
(
columnKernel
.
size
(),
CV_32FC1
);
columnKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
NppLinearColumnFilter
(
ksize
,
anchor
,
columnKernel
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
bufType
)]));
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
GpuLinearColumnFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
bufType
)][
CV_MAT_DEPTH
(
dstType
)]));
}
CV_Assert
(
!
"Unsupported types"
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
}
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
rowKernel
,
const
Mat
&
columnKernel
,
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
rowKernel
,
const
Mat
&
columnKernel
,
const
Point
&
anchor
,
bool
rowFilterFirst
)
const
Point
&
anchor
)
{
{
int
sdepth
=
CV_MAT_DEPTH
(
srcType
),
ddepth
=
CV_MAT_DEPTH
(
dstType
);
int
sdepth
=
CV_MAT_DEPTH
(
srcType
),
ddepth
=
CV_MAT_DEPTH
(
dstType
);
int
cn
=
CV_MAT_CN
(
srcType
);
int
cn
=
CV_MAT_CN
(
srcType
);
int
bdepth
=
std
::
max
(
sdepth
,
ddepth
);
int
bdepth
=
std
::
max
(
sdepth
,
ddepth
);
int
bufType
=
CV_MAKETYPE
(
bdepth
,
cn
);
int
bufType
=
CV_MAKETYPE
(
bdepth
,
cn
);
GpuMat
gpu_row_krnl
,
gpu_col_krnl
;
Ptr
<
BaseRowFilter_GPU
>
rowFilter
=
getLinearRowFilter_GPU
(
srcType
,
bufType
,
rowKernel
,
anchor
.
x
);
int
nRowDivisor
,
nColDivisor
;
Ptr
<
BaseColumnFilter_GPU
>
columnFilter
=
getLinearColumnFilter_GPU
(
bufType
,
dstType
,
columnKernel
,
anchor
.
y
);
normalizeKernel
(
rowKernel
,
gpu_row_krnl
,
CV_32S
,
&
nRowDivisor
,
true
);
normalizeKernel
(
columnKernel
,
gpu_col_krnl
,
CV_32S
,
&
nColDivisor
,
true
);
Ptr
<
BaseRowFilter_GPU
>
rowFilter
=
getLinearRowFilter_GPU
(
srcType
,
bufType
,
gpu_row_krnl
,
anchor
.
x
,
nRowDivisor
);
Ptr
<
BaseColumnFilter_GPU
>
columnFilter
=
getLinearColumnFilter_GPU
(
bufType
,
dstType
,
gpu_col_krnl
,
anchor
.
y
,
nColDivisor
);
return
createSeparableFilter_GPU
(
rowFilter
,
columnFilter
,
rowFilterFirst
);
return
createSeparableFilter_GPU
(
rowFilter
,
columnFilter
);
}
}
void
cv
::
gpu
::
sepFilter2D
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
const
Mat
&
kernelX
,
const
Mat
&
kernelY
,
Point
anchor
,
bool
rowFilterFirst
)
void
cv
::
gpu
::
sepFilter2D
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
const
Mat
&
kernelX
,
const
Mat
&
kernelY
,
Point
anchor
)
{
{
if
(
ddepth
<
0
)
if
(
ddepth
<
0
)
ddepth
=
src
.
depth
();
ddepth
=
src
.
depth
();
dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()));
dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()));
Ptr
<
FilterEngine_GPU
>
f
=
createSeparableLinearFilter_GPU
(
src
.
type
(),
dst
.
type
(),
kernelX
,
kernelY
,
anchor
,
rowFilterFirst
);
Ptr
<
FilterEngine_GPU
>
f
=
createSeparableLinearFilter_GPU
(
src
.
type
(),
dst
.
type
(),
kernelX
,
kernelY
,
anchor
);
f
->
apply
(
src
,
dst
);
f
->
apply
(
src
,
dst
);
}
}
...
@@ -728,7 +787,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, i
...
@@ -728,7 +787,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, i
{
{
Mat
kx
,
ky
;
Mat
kx
,
ky
;
getDerivKernels
(
kx
,
ky
,
dx
,
dy
,
ksize
,
false
,
CV_32F
);
getDerivKernels
(
kx
,
ky
,
dx
,
dy
,
ksize
,
false
,
CV_32F
);
return
createSeparableLinearFilter_GPU
(
srcType
,
dstType
,
kx
,
ky
,
Point
(
-
1
,
-
1
)
,
dx
>=
dy
);
return
createSeparableLinearFilter_GPU
(
srcType
,
dstType
,
kx
,
ky
,
Point
(
-
1
,
-
1
));
}
}
void
cv
::
gpu
::
Sobel
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
dx
,
int
dy
,
int
ksize
,
double
scale
)
void
cv
::
gpu
::
Sobel
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
dx
,
int
dy
,
int
ksize
,
double
scale
)
...
@@ -746,7 +805,7 @@ void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
...
@@ -746,7 +805,7 @@ void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
ky
*=
scale
;
ky
*=
scale
;
}
}
sepFilter2D
(
src
,
dst
,
ddepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
)
,
dx
>=
dy
);
sepFilter2D
(
src
,
dst
,
ddepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
));
}
}
void
cv
::
gpu
::
Scharr
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
dx
,
int
dy
,
double
scale
)
void
cv
::
gpu
::
Scharr
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
dx
,
int
dy
,
double
scale
)
...
@@ -764,7 +823,7 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
...
@@ -764,7 +823,7 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
ky
*=
scale
;
ky
*=
scale
;
}
}
sepFilter2D
(
src
,
dst
,
ddepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
)
,
dx
>=
dy
);
sepFilter2D
(
src
,
dst
,
ddepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
));
}
}
void
cv
::
gpu
::
Laplacian
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
ksize
,
double
scale
)
void
cv
::
gpu
::
Laplacian
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ddepth
,
int
ksize
,
double
scale
)
...
...
modules/gpu/src/imgproc_gpu.cpp
View file @
b08f6082
...
@@ -75,7 +75,7 @@ void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu();
...
@@ -75,7 +75,7 @@ void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu();
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
{
namespace
improc
namespace
im
g
proc
{
{
void
remap_gpu_1c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
void
remap_gpu_1c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
void
remap_gpu_3c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
void
remap_gpu_3c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
...
@@ -142,7 +142,7 @@ namespace cv { namespace gpu
...
@@ -142,7 +142,7 @@ namespace cv { namespace gpu
void
cv
::
gpu
::
remap
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
xmap
,
const
GpuMat
&
ymap
)
void
cv
::
gpu
::
remap
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
xmap
,
const
GpuMat
&
ymap
)
{
{
typedef
void
(
*
remap_gpu_t
)(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
typedef
void
(
*
remap_gpu_t
)(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
static
const
remap_gpu_t
callers
[]
=
{
im
proc
::
remap_gpu_1c
,
0
,
im
proc
::
remap_gpu_3c
};
static
const
remap_gpu_t
callers
[]
=
{
im
gproc
::
remap_gpu_1c
,
0
,
img
proc
::
remap_gpu_3c
};
CV_Assert
((
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_8UC3
)
&&
xmap
.
type
()
==
CV_32F
&&
ymap
.
type
()
==
CV_32F
);
CV_Assert
((
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_8UC3
)
&&
xmap
.
type
()
==
CV_32F
&&
ymap
.
type
()
==
CV_32F
);
...
@@ -180,7 +180,7 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
...
@@ -180,7 +180,7 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
eps
=
1.
f
;
eps
=
1.
f
;
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
improc
::
meanShiftFiltering_gpu
(
src
,
dst
,
sp
,
sr
,
maxIter
,
eps
);
im
g
proc
::
meanShiftFiltering_gpu
(
src
,
dst
,
sp
,
sr
,
maxIter
,
eps
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -207,7 +207,7 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
...
@@ -207,7 +207,7 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
eps
=
1.
f
;
eps
=
1.
f
;
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
improc
::
meanShiftProc_gpu
(
src
,
dstr
,
dstsp
,
sp
,
sr
,
maxIter
,
eps
);
im
g
proc
::
meanShiftProc_gpu
(
src
,
dstr
,
dstsp
,
sp
,
sr
,
maxIter
,
eps
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -223,7 +223,7 @@ namespace
...
@@ -223,7 +223,7 @@ namespace
out
=
dst
;
out
=
dst
;
out
.
create
(
src
.
size
(),
CV_8UC4
);
out
.
create
(
src
.
size
(),
CV_8UC4
);
improc
::
drawColorDisp_gpu
((
DevMem2D_
<
T
>
)
src
,
out
,
ndisp
,
stream
);
im
g
proc
::
drawColorDisp_gpu
((
DevMem2D_
<
T
>
)
src
,
out
,
ndisp
,
stream
);
dst
=
out
;
dst
=
out
;
}
}
...
@@ -256,7 +256,7 @@ namespace
...
@@ -256,7 +256,7 @@ namespace
void
reprojectImageTo3D_caller
(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
cudaStream_t
&
stream
)
void
reprojectImageTo3D_caller
(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
cudaStream_t
&
stream
)
{
{
xyzw
.
create
(
disp
.
rows
,
disp
.
cols
,
CV_32FC4
);
xyzw
.
create
(
disp
.
rows
,
disp
.
cols
,
CV_32FC4
);
improc
::
reprojectImageTo3D_gpu
((
DevMem2D_
<
T
>
)
disp
,
xyzw
,
Q
.
ptr
<
float
>
(),
stream
);
im
g
proc
::
reprojectImageTo3D_gpu
((
DevMem2D_
<
T
>
)
disp
,
xyzw
,
Q
.
ptr
<
float
>
(),
stream
);
}
}
typedef
void
(
*
reprojectImageTo3D_caller_t
)(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
cudaStream_t
&
stream
);
typedef
void
(
*
reprojectImageTo3D_caller_t
)(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
cudaStream_t
&
stream
);
...
@@ -313,7 +313,7 @@ namespace
...
@@ -313,7 +313,7 @@ namespace
case
CV_RGBA2BGR
:
case
CV_RGB2BGR
:
case
CV_BGRA2RGBA
:
case
CV_RGBA2BGR
:
case
CV_RGB2BGR
:
case
CV_BGRA2RGBA
:
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
RGB2RGB_gpu_8u
,
0
,
improc
::
RGB2RGB_gpu_16u
,
0
,
0
,
im
proc
::
RGB2RGB_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
RGB2RGB_gpu_8u
,
0
,
imgproc
::
RGB2RGB_gpu_16u
,
0
,
0
,
img
proc
::
RGB2RGB_gpu_32f
};
CV_Assert
(
scn
==
3
||
scn
==
4
);
CV_Assert
(
scn
==
3
||
scn
==
4
);
...
@@ -338,7 +338,7 @@ namespace
...
@@ -338,7 +338,7 @@ namespace
dst
.
create
(
sz
,
CV_8UC2
);
dst
.
create
(
sz
,
CV_8UC2
);
improc
::
RGB2RGB5x5_gpu
(
src
,
scn
,
dst
,
green_bits
,
bidx
,
stream
);
im
g
proc
::
RGB2RGB5x5_gpu
(
src
,
scn
,
dst
,
green_bits
,
bidx
,
stream
);
break
;
break
;
}
}
...
@@ -356,14 +356,14 @@ namespace
...
@@ -356,14 +356,14 @@ namespace
dst
.
create
(
sz
,
CV_MAKETYPE
(
depth
,
dcn
));
dst
.
create
(
sz
,
CV_MAKETYPE
(
depth
,
dcn
));
improc
::
RGB5x52RGB_gpu
(
src
,
green_bits
,
dst
,
dcn
,
bidx
,
stream
);
im
g
proc
::
RGB5x52RGB_gpu
(
src
,
green_bits
,
dst
,
dcn
,
bidx
,
stream
);
break
;
break
;
}
}
case
CV_BGR2GRAY
:
case
CV_BGRA2GRAY
:
case
CV_RGB2GRAY
:
case
CV_RGBA2GRAY
:
case
CV_BGR2GRAY
:
case
CV_BGRA2GRAY
:
case
CV_RGB2GRAY
:
case
CV_RGBA2GRAY
:
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
bidx
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
bidx
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
RGB2Gray_gpu_8u
,
0
,
improc
::
RGB2Gray_gpu_16u
,
0
,
0
,
im
proc
::
RGB2Gray_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
RGB2Gray_gpu_8u
,
0
,
imgproc
::
RGB2Gray_gpu_16u
,
0
,
0
,
img
proc
::
RGB2Gray_gpu_32f
};
CV_Assert
(
scn
==
3
||
scn
==
4
);
CV_Assert
(
scn
==
3
||
scn
==
4
);
...
@@ -383,14 +383,14 @@ namespace
...
@@ -383,14 +383,14 @@ namespace
dst
.
create
(
sz
,
CV_8UC1
);
dst
.
create
(
sz
,
CV_8UC1
);
improc
::
RGB5x52Gray_gpu
(
src
,
green_bits
,
dst
,
stream
);
im
g
proc
::
RGB5x52Gray_gpu
(
src
,
green_bits
,
dst
,
stream
);
break
;
break
;
}
}
case
CV_GRAY2BGR
:
case
CV_GRAY2BGRA
:
case
CV_GRAY2BGR
:
case
CV_GRAY2BGRA
:
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
dstcn
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
dstcn
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
Gray2RGB_gpu_8u
,
0
,
improc
::
Gray2RGB_gpu_16u
,
0
,
0
,
im
proc
::
Gray2RGB_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
Gray2RGB_gpu_8u
,
0
,
imgproc
::
Gray2RGB_gpu_16u
,
0
,
0
,
img
proc
::
Gray2RGB_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
...
@@ -410,7 +410,7 @@ namespace
...
@@ -410,7 +410,7 @@ namespace
dst
.
create
(
sz
,
CV_8UC2
);
dst
.
create
(
sz
,
CV_8UC2
);
improc
::
Gray2RGB5x5_gpu
(
src
,
dst
,
green_bits
,
stream
);
im
g
proc
::
Gray2RGB5x5_gpu
(
src
,
dst
,
green_bits
,
stream
);
break
;
break
;
}
}
...
@@ -419,7 +419,7 @@ namespace
...
@@ -419,7 +419,7 @@ namespace
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
const
void
*
coeffs
,
cudaStream_t
stream
);
const
void
*
coeffs
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
RGB2YCrCb_gpu_8u
,
0
,
improc
::
RGB2YCrCb_gpu_16u
,
0
,
0
,
im
proc
::
RGB2YCrCb_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
RGB2YCrCb_gpu_8u
,
0
,
imgproc
::
RGB2YCrCb_gpu_16u
,
0
,
0
,
img
proc
::
RGB2YCrCb_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
CV_Assert
((
scn
==
3
||
scn
==
4
)
&&
(
dcn
==
3
||
dcn
==
4
));
CV_Assert
((
scn
==
3
||
scn
==
4
)
&&
(
dcn
==
3
||
dcn
==
4
));
...
@@ -456,7 +456,7 @@ namespace
...
@@ -456,7 +456,7 @@ namespace
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
const
void
*
coeffs
,
cudaStream_t
stream
);
const
void
*
coeffs
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
YCrCb2RGB_gpu_8u
,
0
,
improc
::
YCrCb2RGB_gpu_16u
,
0
,
0
,
im
proc
::
YCrCb2RGB_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
YCrCb2RGB_gpu_8u
,
0
,
imgproc
::
YCrCb2RGB_gpu_16u
,
0
,
0
,
img
proc
::
YCrCb2RGB_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
...
@@ -485,7 +485,7 @@ namespace
...
@@ -485,7 +485,7 @@ namespace
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
const
void
*
coeffs
,
cudaStream_t
stream
);
const
void
*
coeffs
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
RGB2XYZ_gpu_8u
,
0
,
improc
::
RGB2XYZ_gpu_16u
,
0
,
0
,
im
proc
::
RGB2XYZ_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
RGB2XYZ_gpu_8u
,
0
,
imgproc
::
RGB2XYZ_gpu_16u
,
0
,
0
,
img
proc
::
RGB2XYZ_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
...
@@ -534,7 +534,7 @@ namespace
...
@@ -534,7 +534,7 @@ namespace
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
const
void
*
coeffs
,
cudaStream_t
stream
);
const
void
*
coeffs
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
im
proc
::
XYZ2RGB_gpu_8u
,
0
,
improc
::
XYZ2RGB_gpu_16u
,
0
,
0
,
im
proc
::
XYZ2RGB_gpu_32f
};
static
const
func_t
funcs
[]
=
{
im
gproc
::
XYZ2RGB_gpu_8u
,
0
,
imgproc
::
XYZ2RGB_gpu_16u
,
0
,
0
,
img
proc
::
XYZ2RGB_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
...
@@ -584,8 +584,8 @@ namespace
...
@@ -584,8 +584,8 @@ namespace
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
int
hrange
,
cudaStream_t
stream
);
int
hrange
,
cudaStream_t
stream
);
static
const
func_t
funcs_hsv
[]
=
{
im
proc
::
RGB2HSV_gpu_8u
,
0
,
0
,
0
,
0
,
im
proc
::
RGB2HSV_gpu_32f
};
static
const
func_t
funcs_hsv
[]
=
{
im
gproc
::
RGB2HSV_gpu_8u
,
0
,
0
,
0
,
0
,
img
proc
::
RGB2HSV_gpu_32f
};
static
const
func_t
funcs_hls
[]
=
{
im
proc
::
RGB2HLS_gpu_8u
,
0
,
0
,
0
,
0
,
im
proc
::
RGB2HLS_gpu_32f
};
static
const
func_t
funcs_hls
[]
=
{
im
gproc
::
RGB2HLS_gpu_8u
,
0
,
0
,
0
,
0
,
img
proc
::
RGB2HLS_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
...
@@ -610,8 +610,8 @@ namespace
...
@@ -610,8 +610,8 @@ namespace
{
{
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
int
srccn
,
const
DevMem2D
&
dst
,
int
dstcn
,
int
bidx
,
int
hrange
,
cudaStream_t
stream
);
int
hrange
,
cudaStream_t
stream
);
static
const
func_t
funcs_hsv
[]
=
{
im
proc
::
HSV2RGB_gpu_8u
,
0
,
0
,
0
,
0
,
im
proc
::
HSV2RGB_gpu_32f
};
static
const
func_t
funcs_hsv
[]
=
{
im
gproc
::
HSV2RGB_gpu_8u
,
0
,
0
,
0
,
0
,
img
proc
::
HSV2RGB_gpu_32f
};
static
const
func_t
funcs_hls
[]
=
{
im
proc
::
HLS2RGB_gpu_8u
,
0
,
0
,
0
,
0
,
im
proc
::
HLS2RGB_gpu_32f
};
static
const
func_t
funcs_hls
[]
=
{
im
gproc
::
HLS2RGB_gpu_8u
,
0
,
0
,
0
,
0
,
img
proc
::
HLS2RGB_gpu_32f
};
if
(
dcn
<=
0
)
dcn
=
3
;
if
(
dcn
<=
0
)
dcn
=
3
;
...
...
modules/gpu/src/matrix_operations.cpp
View file @
b08f6082
...
@@ -77,6 +77,22 @@ namespace cv
...
@@ -77,6 +77,22 @@ namespace cv
#else
/* !defined (HAVE_CUDA) */
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
matrix_operations
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_without_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_with_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
int
channels
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
}
}
}
void
cv
::
gpu
::
GpuMat
::
upload
(
const
Mat
&
m
)
void
cv
::
gpu
::
GpuMat
::
upload
(
const
Mat
&
m
)
{
{
CV_DbgAssert
(
!
m
.
empty
());
CV_DbgAssert
(
!
m
.
empty
());
...
...
tests/gpu/src/gputest_main.cpp
View file @
b08f6082
...
@@ -53,7 +53,6 @@ const char* blacklist[] =
...
@@ -53,7 +53,6 @@ const char* blacklist[] =
//"GPU-NppImageMeanStdDev", // different precision
//"GPU-NppImageMeanStdDev", // different precision
//"GPU-NppImageExp", // different precision
//"GPU-NppImageExp", // different precision
//"GPU-NppImageLog", // different precision
//"GPU-NppImageLog", // different precision
//"GPU-NppImageMagnitude", // different precision
"GPU-NppImageCanny"
,
// NPP_TEXTURE_BIND_ERROR
"GPU-NppImageCanny"
,
// NPP_TEXTURE_BIND_ERROR
//"GPU-NppImageResize", // different precision
//"GPU-NppImageResize", // different precision
...
@@ -61,8 +60,8 @@ const char* blacklist[] =
...
@@ -61,8 +60,8 @@ const char* blacklist[] =
//"GPU-NppImageWarpPerspective", // different precision
//"GPU-NppImageWarpPerspective", // different precision
//"GPU-NppImageIntegral", // different precision
//"GPU-NppImageIntegral", // different precision
//"GPU-NppImageSobel", //
???
//"GPU-NppImageSobel", //
sign error
//"GPU-NppImageScharr", //
???
//"GPU-NppImageScharr", //
sign error
//"GPU-NppImageGaussianBlur", // different precision
//"GPU-NppImageGaussianBlur", // different precision
0
0
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment