Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d661b8e3
Commit
d661b8e3
authored
Oct 31, 2010
by
Anatoly Baksheev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added PtrStep PtrElemStep structures. Refactored name spaces,
parent
501e81eb
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
179 additions
and
227 deletions
+179
-227
devmem2d.hpp
modules/gpu/include/opencv2/gpu/devmem2d.hpp
+56
-33
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+1
-0
matrix_operations.hpp
modules/gpu/include/opencv2/gpu/matrix_operations.hpp
+1
-0
beliefpropagation.cu
modules/gpu/src/cuda/beliefpropagation.cu
+0
-0
color.cu
modules/gpu/src/cuda/color.cu
+0
-0
constantspacebp.cu
modules/gpu/src/cuda/constantspacebp.cu
+0
-0
cuda_shared.hpp
modules/gpu/src/cuda/cuda_shared.hpp
+2
-2
filters.cu
modules/gpu/src/cuda/filters.cu
+13
-11
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+18
-39
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+19
-30
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+13
-61
split_merge.cu
modules/gpu/src/cuda/split_merge.cu
+24
-24
stereobm.cu
modules/gpu/src/cuda/stereobm.cu
+0
-0
transform.hpp
modules/gpu/src/cuda/transform.hpp
+14
-20
vecmath.hpp
modules/gpu/src/cuda/vecmath.hpp
+8
-1
filtering.cpp
modules/gpu/src/filtering.cpp
+6
-2
stereobm_gpu.cpp
modules/gpu/src/stereobm_gpu.cpp
+4
-4
No files found.
modules/gpu/include/opencv2/gpu/devmem2d.hpp
View file @
d661b8e3
...
@@ -50,56 +50,79 @@ namespace cv
...
@@ -50,56 +50,79 @@ namespace cv
// Simple lightweight structures that encapsulates information about an image on device.
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template
<
typename
T
>
struct
PtrStep_
{
T
*
ptr
;
size_t
step
;
typedef
T
elem_type
;
enum
{
elem_size
=
sizeof
(
elem_type
)
};
#if defined(__CUDACC__)
#if defined(__CUDACC__)
__host__
__device__
#define __CV_GPU_HOST_DEVICE__ __host__ __device__
#else
#define __CV_GPU_HOST_DEVICE__
#endif
#endif
size_t
elemSize
()
const
{
return
elem_size
;
}
};
template
<
typename
T
>
struct
DevMem2D_
template
<
typename
T
>
struct
DevMem2D_
{
{
int
cols
;
int
cols
;
int
rows
;
int
rows
;
T
*
ptr
;
T
*
data
;
size_t
step
;
size_t
step
;
size_t
elem_step
;
/*__host__*/
DevMem2D_
()
:
cols
(
0
),
rows
(
0
),
ptr
(
0
),
step
(
0
),
elem_step
(
0
)
{}
/*__host__*/
DevMem2D_
(
int
rows_
,
int
cols_
,
T
*
ptr_
,
size_t
step_
)
:
cols
(
cols_
),
rows
(
rows_
),
ptr
(
ptr_
),
step
(
step_
),
elem_step
(
step_
/
sizeof
(
T
))
{}
template
<
typename
U
>
DevMem2D_
()
:
cols
(
0
),
rows
(
0
),
data
(
0
),
step
(
0
)
{}
/*__host__*/
DevMem2D_
(
int
rows_
,
int
cols_
,
T
*
data_
,
size_t
step_
)
:
cols
(
cols_
),
rows
(
rows_
),
data
(
data_
),
step
(
step_
)
{}
template
<
typename
U
>
explicit
DevMem2D_
(
const
DevMem2D_
<
U
>&
d
)
explicit
DevMem2D_
(
const
DevMem2D_
<
U
>&
d
)
:
cols
(
d
.
cols
),
rows
(
d
.
rows
),
ptr
((
T
*
)
d
.
ptr
),
step
(
d
.
step
),
elem_step
(
d
.
step
/
sizeof
(
T
))
{}
:
cols
(
d
.
cols
),
rows
(
d
.
rows
),
data
((
T
*
)
d
.
data
),
step
(
d
.
step
)
{}
typedef
T
elem_type
;
enum
{
elem_size
=
sizeof
(
elem_type
)
};
template
<
typename
U
>
__CV_GPU_HOST_DEVICE__
size_t
elemSize
()
const
{
return
elem_size
;
}
/*__host__*/
__CV_GPU_HOST_DEVICE__
T
*
ptr
(
int
y
=
0
)
{
return
(
T
*
)(
(
char
*
)
data
+
y
*
step
);
}
operator
PtrStep_
<
U
>
()
const
{
PtrStep_
<
U
>
dt
;
dt
.
ptr
=
ptr
;
dt
.
step
=
step
;
return
dt
;
}
__CV_GPU_HOST_DEVICE__
const
T
*
ptr
(
int
y
=
0
)
const
{
return
(
const
T
*
)(
(
const
char
*
)
data
+
y
*
step
);
}
};
template
<
typename
T
>
struct
PtrStep_
{
T
*
data
;
size_t
step
;
typedef
typename
PtrStep_
<
T
>::
elem_type
elem_type
;
PtrStep_
()
:
data
(
0
),
step
(
0
)
{}
enum
{
elem_size
=
PtrStep_
<
T
>::
elem_size
};
PtrStep_
(
const
DevMem2D_
<
T
>&
mem
)
:
data
(
mem
.
data
),
step
(
mem
.
step
)
{}
#if defined(__CUDACC__)
__host__
__device__
typedef
T
elem_type
;
#endif
enum
{
elem_size
=
sizeof
(
elem_type
)
};
size_t
elemSize
()
const
{
return
elem_size
;
}
__CV_GPU_HOST_DEVICE__
size_t
elemSize
()
const
{
return
elem_size
;
}
__CV_GPU_HOST_DEVICE__
T
*
ptr
(
int
y
=
0
)
{
return
(
T
*
)(
(
char
*
)
data
+
y
*
step
);
}
__CV_GPU_HOST_DEVICE__
const
T
*
ptr
(
int
y
=
0
)
const
{
return
(
const
T
*
)(
(
const
char
*
)
data
+
y
*
step
);
}
};
template
<
typename
T
>
struct
PtrElemStep_
:
public
PtrStep_
<
T
>
{
PtrElemStep_
(
const
DevMem2D_
<
T
>&
mem
)
:
PtrStep_
<
T
>
(
mem
)
{
step
/=
elem_size
;
}
private
:
template
<
bool
>
struct
StaticCheck
;
template
<>
struct
StaticCheck
<
true
>
{};
StaticCheck
<
256
%
sizeof
(
T
)
==
0
>
ElemStepTypeCheck
;
};
};
typedef
DevMem2D_
<
unsigned
char
>
DevMem2D
;
typedef
DevMem2D_
<
unsigned
char
>
DevMem2D
;
typedef
DevMem2D_
<
float
>
DevMem2Df
;
typedef
DevMem2D_
<
float
>
DevMem2Df
;
typedef
DevMem2D_
<
int
>
DevMem2Di
;
typedef
DevMem2D_
<
int
>
DevMem2Di
;
}
typedef
PtrStep_
<
unsigned
char
>
PtrStep
;
typedef
PtrStep_
<
float
>
PtrStepf
;
typedef
PtrStep_
<
int
>
PtrStepi
;
typedef
PtrElemStep_
<
unsigned
char
>
PtrElemStep
;
typedef
PtrElemStep_
<
float
>
PtrElemStepf
;
typedef
PtrElemStep_
<
int
>
PtrElemStepi
;
#undef __CV_GPU_HOST_DEVICE__
}
}
}
#endif
/* __OPENCV_GPU_DEVMEM2D_HPP__ */
#endif
/* __OPENCV_GPU_DEVMEM2D_HPP__ */
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
d661b8e3
...
@@ -109,6 +109,7 @@ namespace cv
...
@@ -109,6 +109,7 @@ namespace cv
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
// Contains just image size, data ptr and step.
// Contains just image size, data ptr and step.
template
<
class
T
>
operator
DevMem2D_
<
T
>
()
const
;
template
<
class
T
>
operator
DevMem2D_
<
T
>
()
const
;
template
<
class
T
>
operator
PtrStep_
<
T
>
()
const
;
//! pefroms blocking upload data to GpuMat. .
//! pefroms blocking upload data to GpuMat. .
void
upload
(
const
cv
::
Mat
&
m
);
void
upload
(
const
cv
::
Mat
&
m
);
...
...
modules/gpu/include/opencv2/gpu/matrix_operations.hpp
View file @
d661b8e3
...
@@ -207,6 +207,7 @@ inline GpuMat& GpuMat::operator = (const GpuMat& m)
...
@@ -207,6 +207,7 @@ inline GpuMat& GpuMat::operator = (const GpuMat& m)
inline
GpuMat
&
GpuMat
::
operator
=
(
const
Mat
&
m
)
{
upload
(
m
);
return
*
this
;
}
inline
GpuMat
&
GpuMat
::
operator
=
(
const
Mat
&
m
)
{
upload
(
m
);
return
*
this
;
}
template
<
class
T
>
inline
GpuMat
::
operator
DevMem2D_
<
T
>
()
const
{
return
DevMem2D_
<
T
>
(
rows
,
cols
,
(
T
*
)
data
,
step
);
}
template
<
class
T
>
inline
GpuMat
::
operator
DevMem2D_
<
T
>
()
const
{
return
DevMem2D_
<
T
>
(
rows
,
cols
,
(
T
*
)
data
,
step
);
}
template
<
class
T
>
inline
GpuMat
::
operator
PtrStep_
<
T
>
()
const
{
return
PtrStep_
<
T
>
(
*
this
);
}
//CPP: void GpuMat::upload(const Mat& m);
//CPP: void GpuMat::upload(const Mat& m);
...
...
modules/gpu/src/cuda/beliefpropagation.cu
View file @
d661b8e3
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/color.cu
View file @
d661b8e3
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/constantspacebp.cu
View file @
d661b8e3
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/cuda_shared.hpp
View file @
d661b8e3
...
@@ -54,9 +54,9 @@ namespace cv
...
@@ -54,9 +54,9 @@ namespace cv
typedef
unsigned
char
uchar
;
typedef
unsigned
char
uchar
;
typedef
signed
char
schar
;
typedef
signed
char
schar
;
typedef
unsigned
short
ushort
;
typedef
unsigned
short
ushort
;
typedef
unsigned
int
uint
;
typedef
unsigned
int
uint
;
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
static
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
template
<
class
T
>
template
<
class
T
>
static
inline
void
uploadConstant
(
const
char
*
name
,
const
T
&
value
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
name
,
&
value
,
sizeof
(
T
))
);
}
static
inline
void
uploadConstant
(
const
char
*
name
,
const
T
&
value
)
{
cudaSafeCall
(
cudaMemcpyToSymbol
(
name
,
&
value
,
sizeof
(
T
))
);
}
...
...
modules/gpu/src/cuda/filters.cu
View file @
d661b8e3
...
@@ -128,8 +128,8 @@ namespace cv { namespace gpu { namespace filters
...
@@ -128,8 +128,8 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.
ptr, src.elem_step
,
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.
data, src.step/src.elemSize()
,
dst.
ptr, dst.elem_step
, anchor, src.cols, src.rows);
dst.
data, dst.step/dst.elemSize()
, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
...
@@ -152,10 +152,12 @@ namespace cv { namespace gpu { namespace filters
...
@@ -152,10 +152,12 @@ namespace cv { namespace gpu { namespace filters
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
}
void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
template void linearRowFilter_gpu<4, uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);
/* void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
{
linearRowFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor);
linearRowFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor);
}
}
*/
void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
{
linearRowFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor);
linearRowFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor);
...
@@ -262,8 +264,8 @@ namespace cv { namespace gpu { namespace filters
...
@@ -262,8 +264,8 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.
ptr, src.elem_step
,
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE, CN><<<blocks, threads>>>(src.
data, src.step/src.elemSize()
,
dst.
ptr, dst.elem_step
, anchor, src.cols, src.rows);
dst.
data, dst.step/dst.elemSize()
, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
...
@@ -357,7 +359,7 @@ namespace cv { namespace gpu { namespace bf
...
@@ -357,7 +359,7 @@ namespace cv { namespace gpu { namespace bf
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
{
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.
ptr, sizeof(table_space.ptr
)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.
data, sizeof(table_space.data
)) );
size_t table_space_step = table_space.step / sizeof(float);
size_t table_space_step = table_space.step / sizeof(float);
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
...
@@ -491,15 +493,15 @@ namespace cv { namespace gpu { namespace bf
...
@@ -491,15 +493,15 @@ namespace cv { namespace gpu { namespace bf
case 1:
case 1:
for (int i = 0; i < iters; ++i)
for (int i = 0; i < iters; ++i)
{
{
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.
ptr, disp.step/sizeof(T), img.ptr
, img.step, disp.rows, disp.cols);
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.
data, disp.step/sizeof(T), img.data
, img.step, disp.rows, disp.cols);
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.
ptr, disp.step/sizeof(T), img.ptr
, img.step, disp.rows, disp.cols);
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.
data, disp.step/sizeof(T), img.data
, img.step, disp.rows, disp.cols);
}
}
break;
break;
case 3:
case 3:
for (int i = 0; i < iters; ++i)
for (int i = 0; i < iters; ++i)
{
{
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.
ptr, disp.step/sizeof(T), img.ptr
, img.step, disp.rows, disp.cols);
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.
data, disp.step/sizeof(T), img.data
, img.step, disp.rows, disp.cols);
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.
ptr, disp.step/sizeof(T), img.ptr
, img.step, disp.rows, disp.cols);
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.
data, disp.step/sizeof(T), img.data
, img.step, disp.rows, disp.cols);
}
}
break;
break;
default:
default:
...
...
modules/gpu/src/cuda/imgproc.cu
View file @
d661b8e3
...
@@ -45,7 +45,7 @@
...
@@ -45,7 +45,7 @@
using namespace cv::gpu;
using namespace cv::gpu;
/////////////////////////////////// Remap ///////////////////////////////////////////////
/////////////////////////////////// Remap ///////////////////////////////////////////////
namespace
imgproc_krnls
namespace
cv { namespace gpu { namespace imgproc
{
{
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
...
@@ -121,10 +121,7 @@ namespace imgproc_krnls
...
@@ -121,10 +121,7 @@ namespace imgproc_krnls
*(dst + y * dst_step + 3 * x + 2) = out.z;
*(dst + y * dst_step + 3 * x + 2) = out.z;
}
}
}
}
}
namespace cv { namespace gpu { namespace imgproc
{
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
{
{
dim3 threads(16, 16, 1);
dim3 threads(16, 16, 1);
...
@@ -132,15 +129,15 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -132,15 +129,15 @@ namespace cv { namespace gpu { namespace imgproc
grid.x = divUp(dst.cols, threads.x);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
grid.y = divUp(dst.rows, threads.y);
imgproc_krnls::
tex_remap.filterMode = cudaFilterModeLinear;
tex_remap.filterMode = cudaFilterModeLinear;
imgproc_krnls::tex_remap.addressMode[0] = imgproc_krnls::
tex_remap.addressMode[1] = cudaAddressModeWrap;
tex_remap.addressMode[0] =
tex_remap.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0,
imgproc_krnls::tex_remap, src.ptr
, desc, src.cols, src.rows, src.step) );
cudaSafeCall( cudaBindTexture2D(0,
tex_remap, src.data
, desc, src.cols, src.rows, src.step) );
imgproc_krnls::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr
, dst.step, dst.cols, dst.rows);
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data
, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(
imgproc_krnls::
tex_remap) );
cudaSafeCall( cudaUnbindTexture(tex_remap) );
}
}
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
...
@@ -150,17 +147,13 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -150,17 +147,13 @@ namespace cv { namespace gpu { namespace imgproc
grid.x = divUp(dst.cols, threads.x);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
grid.y = divUp(dst.rows, threads.y);
imgproc_krnls::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr
, dst.step, dst.cols, dst.rows);
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data
, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
}}}
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
namespace imgproc_krnls
{
texture<uchar4, 2> tex_meanshift;
texture<uchar4, 2> tex_meanshift;
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
...
@@ -252,10 +245,7 @@ namespace imgproc_krnls
...
@@ -252,10 +245,7 @@ namespace imgproc_krnls
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
}
}
}
}
}
namespace cv { namespace gpu { namespace imgproc
{
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
{
{
dim3 grid(1, 1, 1);
dim3 grid(1, 1, 1);
...
@@ -264,11 +254,11 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -264,11 +254,11 @@ namespace cv { namespace gpu { namespace imgproc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0,
imgproc_krnls::tex_meanshift, src.ptr
, desc, src.cols, src.rows, src.step ) );
cudaSafeCall( cudaBindTexture2D( 0,
tex_meanshift, src.data
, desc, src.cols, src.rows, src.step ) );
imgproc_krnls::meanshift_kernel<<< grid, threads >>>( dst.ptr
, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
meanshift_kernel<<< grid, threads >>>( dst.data
, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(
imgproc_krnls::
tex_meanshift ) );
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
}
}
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
{
{
...
@@ -278,18 +268,15 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -278,18 +268,15 @@ namespace cv { namespace gpu { namespace imgproc
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0,
imgproc_krnls::tex_meanshift, src.ptr
, desc, src.cols, src.rows, src.step ) );
cudaSafeCall( cudaBindTexture2D( 0,
tex_meanshift, src.data
, desc, src.cols, src.rows, src.step ) );
imgproc_krnls::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr
, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
meanshiftproc_kernel<<< grid, threads >>>( dstr.data, dstr.step, dstsp.data
, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(
imgproc_krnls::
tex_meanshift ) );
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
}
}
}}}
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
namespace imgproc_krnls
{
template <typename T>
template <typename T>
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
{
{
...
@@ -389,10 +376,8 @@ namespace imgproc_krnls
...
@@ -389,10 +376,8 @@ namespace imgproc_krnls
line[x >> 1] = res;
line[x >> 1] = res;
}
}
}
}
}
namespace cv { namespace gpu { namespace imgproc
{
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
{
{
dim3 threads(16, 16, 1);
dim3 threads(16, 16, 1);
...
@@ -400,7 +385,7 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -400,7 +385,7 @@ namespace cv { namespace gpu { namespace imgproc
grid.x = divUp(src.cols, threads.x << 2);
grid.x = divUp(src.cols, threads.x << 2);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr
, dst.step, src.cols, src.rows, ndisp);
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data
, dst.step, src.cols, src.rows, ndisp);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -413,17 +398,14 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -413,17 +398,14 @@ namespace cv { namespace gpu { namespace imgproc
grid.x = divUp(src.cols, threads.x << 1);
grid.x = divUp(src.cols, threads.x << 1);
grid.y = divUp(src.rows, threads.y);
grid.y = divUp(src.rows, threads.y);
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr
, dst.step, src.cols, src.rows, ndisp);
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data
, dst.step, src.cols, src.rows, ndisp);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
}}}
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
namespace imgproc_krnls
{
__constant__ float cq[16];
__constant__ float cq[16];
template <typename T>
template <typename T>
...
@@ -455,10 +437,7 @@ namespace imgproc_krnls
...
@@ -455,10 +437,7 @@ namespace imgproc_krnls
*(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
*(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
}
}
}
}
}
namespace cv { namespace gpu { namespace imgproc
{
template <typename T>
template <typename T>
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
{
{
...
@@ -467,9 +446,9 @@ namespace cv { namespace gpu { namespace imgproc
...
@@ -467,9 +446,9 @@ namespace cv { namespace gpu { namespace imgproc
grid.x = divUp(disp.cols, threads.x);
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
grid.y = divUp(disp.rows, threads.y);
cudaSafeCall( cudaMemcpyToSymbol(
imgproc_krnls::
cq, q, 16 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
imgproc_krnls::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr
, xyzw.step / sizeof(float), disp.rows, disp.cols);
reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.data, disp.step / sizeof(T), xyzw.data
, xyzw.step / sizeof(float), disp.rows, disp.cols);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
...
modules/gpu/src/cuda/mathfunc.cu
View file @
d661b8e3
...
@@ -41,9 +41,7 @@
...
@@ -41,9 +41,7 @@
//M*/
//M*/
#include "cuda_shared.hpp"
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "transform.hpp"
#include "transform.hpp"
#include "vecmath.hpp"
using namespace cv::gpu;
using namespace cv::gpu;
...
@@ -54,7 +52,7 @@ using namespace cv::gpu;
...
@@ -54,7 +52,7 @@ using namespace cv::gpu;
//////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
// Cart <-> Polar
namespace
mathfunc_krnls
namespace
cv { namespace gpu { namespace mathfunc
{
{
struct Nothing
struct Nothing
{
{
...
@@ -133,10 +131,7 @@ namespace mathfunc_krnls
...
@@ -133,10 +131,7 @@ namespace mathfunc_krnls
yptr[y * y_step + x] = mag_data * sin_a;
yptr[y * y_step + x] = mag_data * sin_a;
}
}
}
}
}
namespace cv { namespace gpu { namespace mathfunc
{
template <typename Mag, typename Angle>
template <typename Mag, typename Angle>
void cartToPolar_caller(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
void cartToPolar_caller(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
{
{
...
@@ -148,9 +143,9 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -148,9 +143,9 @@ namespace cv { namespace gpu { namespace mathfunc
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
mathfunc_krnls::
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.
ptr, x.elem_step, y.ptr, y.elem_step
,
x.
data, x.step/x.elemSize(), y.data, y.step/y.elemSize()
,
mag.
ptr, mag.elem_step, angle.ptr, angle.elem_step
, scale, x.cols, x.rows);
mag.
data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize()
, scale, x.cols, x.rows);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -163,27 +158,27 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -163,27 +158,27 @@ namespace cv { namespace gpu { namespace mathfunc
{
{
{
{
{
{
cartToPolar_caller<
mathfunc_krnls::Magnitude, mathfunc_krnls::
Atan2>,
cartToPolar_caller<
Magnitude,
Atan2>,
cartToPolar_caller<
mathfunc_krnls::Magnitude, mathfunc_krnls::
Nothing>
cartToPolar_caller<
Magnitude,
Nothing>
},
},
{
{
cartToPolar_caller<
mathfunc_krnls::MagnitudeSqr, mathfunc_krnls::
Atan2>,
cartToPolar_caller<
MagnitudeSqr,
Atan2>,
cartToPolar_caller<
mathfunc_krnls::MagnitudeSqr, mathfunc_krnls::
Nothing>,
cartToPolar_caller<
MagnitudeSqr,
Nothing>,
}
}
},
},
{
{
{
{
cartToPolar_caller<
mathfunc_krnls::Nothing, mathfunc_krnls::
Atan2>,
cartToPolar_caller<
Nothing,
Atan2>,
cartToPolar_caller<
mathfunc_krnls::Nothing, mathfunc_krnls::
Nothing>
cartToPolar_caller<
Nothing,
Nothing>
},
},
{
{
cartToPolar_caller<
mathfunc_krnls::Nothing, mathfunc_krnls::
Atan2>,
cartToPolar_caller<
Nothing,
Atan2>,
cartToPolar_caller<
mathfunc_krnls::Nothing, mathfunc_krnls::
Nothing>,
cartToPolar_caller<
Nothing,
Nothing>,
}
}
}
}
};
};
callers[mag.
ptr == 0][magSqr][angle.ptr
== 0](x, y, mag, angle, angleInDegrees, stream);
callers[mag.
data == 0][magSqr][angle.data
== 0](x, y, mag, angle, angleInDegrees, stream);
}
}
template <typename Mag>
template <typename Mag>
...
@@ -197,8 +192,8 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -197,8 +192,8 @@ namespace cv { namespace gpu { namespace mathfunc
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.elem_step
,
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize()
,
angle.
ptr, angle.elem_step, scale, x.ptr, x.elem_step, y.ptr, y.elem_step
, mag.cols, mag.rows);
angle.
data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize()
, mag.cols, mag.rows);
if (stream == 0)
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
...
@@ -209,19 +204,16 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -209,19 +204,16 @@ namespace cv { namespace gpu { namespace mathfunc
typedef void (*caller_t)(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
typedef void (*caller_t)(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] =
static const caller_t callers[2] =
{
{
polarToCart_caller<
mathfunc_krnls::
NonEmptyMag>,
polarToCart_caller<NonEmptyMag>,
polarToCart_caller<
mathfunc_krnls::
EmptyMag>
polarToCart_caller<EmptyMag>
};
};
callers[mag.
ptr
== 0](mag, angle, x, y, angleInDegrees, stream);
callers[mag.
data
== 0](mag, angle, x, y, angleInDegrees, stream);
}
}
}}}
//////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////
// Compare
// Compare
namespace mathfunc_krnls
{
template <typename T1, typename T2>
template <typename T1, typename T2>
struct NotEqual
struct NotEqual
{
{
...
@@ -230,14 +222,11 @@ namespace mathfunc_krnls
...
@@ -230,14 +222,11 @@ namespace mathfunc_krnls
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
}
};
};
}
namespace cv { namespace gpu { namespace mathfunc
{
template <typename T1, typename T2>
template <typename T1, typename T2>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
{
mathfunc_krnls::
NotEqual<T1, T2> op;
NotEqual<T1, T2> op;
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
}
}
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
d661b8e3
...
@@ -40,16 +40,11 @@
...
@@ -40,16 +40,11 @@
//
//
//M*/
//M*/
#include <stddef.h>
#include <stdio.h>
#include "cuda_shared.hpp"
#include "cuda_shared.hpp"
#include "cuda_runtime.h"
#include "saturate_cast.hpp"
#include "saturate_cast.hpp"
using namespace cv::gpu;
namespace cv { namespace gpu { namespace matrix_operations {
namespace matop_krnls
{
template <typename T> struct shift_and_sizeof;
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
...
@@ -115,14 +110,11 @@ namespace matop_krnls
...
@@ -115,14 +110,11 @@ namespace matop_krnls
typedef int2 read_type;
typedef int2 read_type;
typedef short2 write_type;
typedef short2 write_type;
};
};
}
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
namespace matop_krnls
{
template<typename T>
template<typename T>
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
{
{
...
@@ -136,10 +128,6 @@ namespace matop_krnls
...
@@ -136,10 +128,6 @@ namespace matop_krnls
mat_dst[idx] = mat_src[idx];
mat_dst[idx] = mat_src[idx];
}
}
}
}
}
namespace cv { namespace gpu { namespace matrix_operations
{
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
template<typename T>
template<typename T>
...
@@ -147,17 +135,12 @@ namespace cv { namespace gpu { namespace matrix_operations
...
@@ -147,17 +135,12 @@ namespace cv { namespace gpu { namespace matrix_operations
{
{
dim3 threadsPerBlock(16,16, 1);
dim3 threadsPerBlock(16,16, 1);
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
((T*)mat_src.data, (T*)mat_dst.data, (unsigned char*)mask.data, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
if (stream == 0)
if (stream == 0)
{
cudaSafeCall ( cudaThreadSynchronize() );
::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
}
else
{
::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
}
}
}
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
...
@@ -180,14 +163,11 @@ namespace cv { namespace gpu { namespace matrix_operations
...
@@ -180,14 +163,11 @@ namespace cv { namespace gpu { namespace matrix_operations
func(mat_src, mat_dst, mask, channels, stream);
func(mat_src, mat_dst, mask, channels, stream);
}
}
}}}
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
namespace matop_krnls
{
__constant__ double scalar_d[4];
__constant__ double scalar_d[4];
template<typename T>
template<typename T>
...
@@ -216,10 +196,6 @@ namespace matop_krnls
...
@@ -216,10 +196,6 @@ namespace matop_krnls
mat[idx] = scalar_d[ x % channels ];
mat[idx] = scalar_d[ x % channels ];
}
}
}
}
}
namespace cv { namespace gpu { namespace matrix_operations
{
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
...
@@ -229,16 +205,9 @@ namespace cv { namespace gpu { namespace matrix_operations
...
@@ -229,16 +205,9 @@ namespace cv { namespace gpu { namespace matrix_operations
dim3 threadsPerBlock(32, 8, 1);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.data, (unsigned char *)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
if (stream == 0)
if (stream == 0)
{
::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
cudaSafeCall ( cudaThreadSynchronize() );
cudaSafeCall ( cudaThreadSynchronize() );
}
else
{
::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
}
}
}
template <typename T>
template <typename T>
...
@@ -247,20 +216,15 @@ namespace cv { namespace gpu { namespace matrix_operations
...
@@ -247,20 +216,15 @@ namespace cv { namespace gpu { namespace matrix_operations
dim3 threadsPerBlock(32, 8, 1);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
if (stream == 0)
if (stream == 0)
{
matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
cudaSafeCall ( cudaThreadSynchronize() );
}
else
{
matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
}
}
}
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
{
{
cudaSafeCall( cudaMemcpyToSymbol(
matop_krnls::
scalar_d, scalar, sizeof(double) * 4));
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, scalar, sizeof(double) * 4));
static SetToFunc_without_mask tab[8] =
static SetToFunc_without_mask tab[8] =
{
{
...
@@ -284,7 +248,7 @@ namespace cv { namespace gpu { namespace matrix_operations
...
@@ -284,7 +248,7 @@ namespace cv { namespace gpu { namespace matrix_operations
void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
{
{
cudaSafeCall( cudaMemcpyToSymbol(
matop_krnls::
scalar_d, scalar, sizeof(double) * 4));
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, scalar, sizeof(double) * 4));
static SetToFunc_with_mask tab[8] =
static SetToFunc_with_mask tab[8] =
{
{
...
@@ -305,14 +269,11 @@ namespace cv { namespace gpu { namespace matrix_operations
...
@@ -305,14 +269,11 @@ namespace cv { namespace gpu { namespace matrix_operations
func(mat, mask, channels, stream);
func(mat, mask, channels, stream);
}
}
}}}
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
namespace matop_krnls
{
template <typename T, typename DT>
template <typename T, typename DT>
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
{
{
...
@@ -348,29 +309,20 @@ namespace matop_krnls
...
@@ -348,29 +309,20 @@ namespace matop_krnls
}
}
}
}
}
}
}
namespace cv { namespace gpu { namespace matrix_operations
{
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
template<typename T, typename DT>
template<typename T, typename DT>
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
{
{
const int shift =
::matop_krnls::
ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
dim3 block(32, 8);
dim3 block(32, 8);
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
if (stream == 0)
if (stream == 0)
{
matop_krnls::convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
else
{
matop_krnls::convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
}
}
}
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
...
...
modules/gpu/src/cuda/split_merge.cu
View file @
d661b8e3
...
@@ -230,9 +230,9 @@ namespace cv { namespace gpu { namespace split_merge {
...
@@ -230,9 +230,9 @@ namespace cv { namespace gpu { namespace split_merge {
dim3 blockDim(32, 8);
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].
ptr
, src[0].step,
src[0].
data
, src[0].step,
src[1].
ptr
, src[1].step,
src[1].
data
, src[1].step,
dst.rows, dst.cols, dst.
ptr
, dst.step);
dst.rows, dst.cols, dst.
data
, dst.step);
if (stream == 0)
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
...
@@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace split_merge {
...
@@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace split_merge {
dim3 blockDim(32, 8);
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].
ptr
, src[0].step,
src[0].
data
, src[0].step,
src[1].
ptr
, src[1].step,
src[1].
data
, src[1].step,
src[2].
ptr
, src[2].step,
src[2].
data
, src[2].step,
dst.rows, dst.cols, dst.
ptr
, dst.step);
dst.rows, dst.cols, dst.
data
, dst.step);
if (stream == 0)
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
...
@@ -259,11 +259,11 @@ namespace cv { namespace gpu { namespace split_merge {
...
@@ -259,11 +259,11 @@ namespace cv { namespace gpu { namespace split_merge {
dim3 blockDim(32, 8);
dim3 blockDim(32, 8);
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(
src[0].
ptr
, src[0].step,
src[0].
data
, src[0].step,
src[1].
ptr
, src[1].step,
src[1].
data
, src[1].step,
src[2].
ptr
, src[2].step,
src[2].
data
, src[2].step,
src[3].
ptr
, src[3].step,
src[3].
data
, src[3].step,
dst.rows, dst.cols, dst.
ptr
, dst.step);
dst.rows, dst.cols, dst.
data
, dst.step);
if (stream == 0)
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
...
@@ -433,9 +433,9 @@ namespace cv { namespace gpu { namespace split_merge {
...
@@ -433,9 +433,9 @@ namespace cv { namespace gpu { namespace split_merge {
dim3 blockDim(32, 8);
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
splitC2_<T><<<gridDim, blockDim, 0, stream>>>(
src.
ptr
, src.step, src.rows, src.cols,
src.
data
, src.step, src.rows, src.cols,
dst[0].
ptr
, dst[0].step,
dst[0].
data
, dst[0].step,
dst[1].
ptr
, dst[1].step);
dst[1].
data
, dst[1].step);
if (stream == 0)
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
...
@@ -447,10 +447,10 @@ namespace cv { namespace gpu { namespace split_merge {
...
@@ -447,10 +447,10 @@ namespace cv { namespace gpu { namespace split_merge {
dim3 blockDim(32, 8);
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
splitC3_<T><<<gridDim, blockDim, 0, stream>>>(
src.
ptr
, src.step, src.rows, src.cols,
src.
data
, src.step, src.rows, src.cols,
dst[0].
ptr
, dst[0].step,
dst[0].
data
, dst[0].step,
dst[1].
ptr
, dst[1].step,
dst[1].
data
, dst[1].step,
dst[2].
ptr
, dst[2].step);
dst[2].
data
, dst[2].step);
if (stream == 0)
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
...
@@ -462,11 +462,11 @@ namespace cv { namespace gpu { namespace split_merge {
...
@@ -462,11 +462,11 @@ namespace cv { namespace gpu { namespace split_merge {
dim3 blockDim(32, 8);
dim3 blockDim(32, 8);
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
splitC4_<T><<<gridDim, blockDim, 0, stream>>>(
src.
ptr
, src.step, src.rows, src.cols,
src.
data
, src.step, src.rows, src.cols,
dst[0].
ptr
, dst[0].step,
dst[0].
data
, dst[0].step,
dst[1].
ptr
, dst[1].step,
dst[1].
data
, dst[1].step,
dst[2].
ptr
, dst[2].step,
dst[2].
data
, dst[2].step,
dst[3].
ptr
, dst[3].step);
dst[3].
data
, dst[3].step);
if (stream == 0)
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
...
...
modules/gpu/src/cuda/stereobm.cu
View file @
d661b8e3
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/transform.hpp
View file @
d661b8e3
...
@@ -44,36 +44,32 @@
...
@@ -44,36 +44,32 @@
#define __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "cuda_shared.hpp"
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "vecmath.hpp"
namespace
cv
{
namespace
gpu
{
namespace
algo_krnls
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
template
<
typename
T
,
typename
D
,
typename
UnOp
>
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
__global__
void
transform
(
const
T
*
src
,
size_t
src_step
,
static
__global__
void
transform
(
const
DevMem2D_
<
T
>
src
,
PtrStep_
<
D
>
dst
,
UnOp
op
)
D
*
dst
,
size_t
dst_step
,
int
width
,
int
height
,
UnOp
op
)
{
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
width
&&
y
<
height
)
if
(
x
<
src
.
cols
&&
y
<
src
.
rows
)
{
{
T
src_data
=
src
[
y
*
src_step
+
x
];
T
src_data
=
src
.
ptr
(
y
)[
x
];
dst
[
y
*
dst_step
+
x
]
=
op
(
src_data
,
x
,
y
);
dst
.
ptr
(
y
)[
x
]
=
op
(
src_data
,
x
,
y
);
}
}
}
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
static
__global__
void
transform
(
const
T1
*
src1
,
size_t
src1_step
,
const
T2
*
src2
,
size_t
src2_step
,
static
__global__
void
transform
(
const
DevMem2D_
<
T1
>
src1
,
const
PtrStep_
<
T2
>
src2
,
PtrStep_
<
D
>
dst
,
BinOp
op
)
D
*
dst
,
size_t
dst_step
,
int
width
,
int
height
,
BinOp
op
)
{
{
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
x
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
const
int
y
=
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
;
if
(
x
<
width
&&
y
<
height
)
if
(
x
<
src1
.
cols
&&
y
<
src1
.
rows
)
{
{
T1
src1_data
=
src1
[
y
*
src1_step
+
x
];
T1
src1_data
=
src1
.
ptr
(
y
)[
x
];
T2
src2_data
=
src2
[
y
*
src2_step
+
x
];
T2
src2_data
=
src2
.
ptr
(
y
)[
x
];
dst
[
y
*
dst_step
+
x
]
=
op
(
src1_data
,
src2_data
,
x
,
y
);
dst
.
ptr
(
y
)[
x
]
=
op
(
src1_data
,
src2_data
,
x
,
y
);
}
}
}
}
}}}
}}}
...
@@ -83,7 +79,7 @@ namespace cv
...
@@ -83,7 +79,7 @@ namespace cv
namespace
gpu
namespace
gpu
{
{
template
<
typename
T
,
typename
D
,
typename
UnOp
>
template
<
typename
T
,
typename
D
,
typename
UnOp
>
static
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
cudaStream_t
stream
)
static
void
transform
2
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
UnOp
op
,
cudaStream_t
stream
)
{
{
dim3
threads
(
16
,
16
,
1
);
dim3
threads
(
16
,
16
,
1
);
dim3
grid
(
1
,
1
,
1
);
dim3
grid
(
1
,
1
,
1
);
...
@@ -91,8 +87,7 @@ namespace cv
...
@@ -91,8 +87,7 @@ namespace cv
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
algo_krnls
::
transform
<<<
grid
,
threads
,
0
,
stream
>>>
(
src
.
ptr
,
src
.
elem_step
,
device
::
transform
<
T
,
D
,
UnOp
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
op
);
dst
.
ptr
,
dst
.
elem_step
,
src
.
cols
,
src
.
rows
,
op
);
if
(
stream
==
0
)
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
...
@@ -106,11 +101,10 @@ namespace cv
...
@@ -106,11 +101,10 @@ namespace cv
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
algo_krnls
::
transform
<<<
grid
,
threads
,
0
,
stream
>>>
(
src1
.
ptr
,
src1
.
elem_step
,
device
::
transform
<
T1
,
T2
,
D
,
BinOp
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
op
);
src2
.
ptr
,
src2
.
elem_step
,
dst
.
ptr
,
dst
.
elem_step
,
src1
.
cols
,
src1
.
rows
,
op
);
if
(
stream
==
0
)
if
(
stream
==
0
)
cudaSafeCall
(
cudaThreadSynchronize
()
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
}
}
}
}
...
...
modules/gpu/src/cuda/vecmath.hpp
View file @
d661b8e3
...
@@ -384,7 +384,14 @@ namespace cv
...
@@ -384,7 +384,14 @@ namespace cv
template
<
typename
VecD
,
typename
VecS
>
static
__device__
VecD
saturate_cast_caller
(
const
VecS
&
v
)
template
<
typename
VecD
,
typename
VecS
>
static
__device__
VecD
saturate_cast_caller
(
const
VecS
&
v
)
{
{
SatCast
<
VecTraits
<
VecD
>::
cn
,
VecD
>
cast
;
SatCast
<
VecTraits
<
VecD
>::
cn
,
VecD
>
cast
;
return
cast
(
v
);
return
cast
(
v
);
}
}
...
...
modules/gpu/src/filtering.cpp
View file @
d661b8e3
...
@@ -577,7 +577,10 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
...
@@ -577,7 +577,10 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
namespace
cv
{
namespace
gpu
{
namespace
filters
namespace
cv
{
namespace
gpu
{
namespace
filters
{
{
void
linearRowFilter_gpu_8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
template
<
int
CN
,
typename
T
,
typename
D
>
void
linearRowFilter_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
//void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);
void
linearRowFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
...
@@ -653,7 +656,8 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
...
@@ -653,7 +656,8 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterRow_8u_C1R
,
0
,
0
,
nppiFilterRow_8u_C4R
};
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterRow_8u_C1R
,
0
,
0
,
nppiFilterRow_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
{
linearRowFilter_gpu_8u_8u_c4
,
linearRowFilter_gpu_8u_8s_c4
,
0
,
0
,
0
,
0
},
{
linearRowFilter_gpu
<
4
,
uchar4
,
uchar4
>
/*linearRowFilter_gpu_8u_8u_c4*/
,
linearRowFilter_gpu_8u_8s_c4
,
0
,
0
,
0
,
0
},
{
linearRowFilter_gpu_8s_8u_c4
,
linearRowFilter_gpu_8s_8s_c4
,
0
,
0
,
0
,
0
},
{
linearRowFilter_gpu_8s_8u_c4
,
linearRowFilter_gpu_8s_8s_c4
,
0
,
0
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16u_16u_c2
,
linearRowFilter_gpu_16u_16s_c2
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16u_16u_c2
,
linearRowFilter_gpu_16u_16s_c2
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16s_16u_c2
,
linearRowFilter_gpu_16s_16s_c2
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16s_16u_c2
,
linearRowFilter_gpu_16s_16s_c2
,
0
,
0
},
...
...
modules/gpu/src/stereobm_gpu.cpp
View file @
d661b8e3
...
@@ -61,9 +61,9 @@ namespace cv { namespace gpu
...
@@ -61,9 +61,9 @@ namespace cv { namespace gpu
namespace
bm
namespace
bm
{
{
//extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
//extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
extern
"C"
void
stereoBM_GPU
(
const
DevMem2D
&
left
,
const
DevMem2D
&
right
,
const
DevMem2D
&
disp
,
int
ndisp
,
int
winsz
,
const
DevMem2D_
<
uint
>&
minSSD_buf
,
c
onst
c
udaStream_t
&
stream
);
extern
"C"
void
stereoBM_GPU
(
const
DevMem2D
&
left
,
const
DevMem2D
&
right
,
const
DevMem2D
&
disp
,
int
ndisp
,
int
winsz
,
const
DevMem2D_
<
uint
>&
minSSD_buf
,
cudaStream_t
&
stream
);
extern
"C"
void
prefilter_xsobel
(
const
DevMem2D
&
input
,
const
DevMem2D
&
output
,
int
prefilterCap
/*= 31*/
,
const
cudaStream_t
&
stream
);
extern
"C"
void
prefilter_xsobel
(
const
DevMem2D
&
input
,
const
DevMem2D
output
,
int
prefilterCap
/*= 31*/
,
cudaStream_t
&
stream
);
extern
"C"
void
postfilter_textureness
(
const
DevMem2D
&
input
,
int
winsz
,
float
avgTexturenessThreshold
,
const
DevMem2D
&
disp
,
c
onst
c
udaStream_t
&
stream
);
extern
"C"
void
postfilter_textureness
(
const
DevMem2D
&
input
,
int
winsz
,
float
avgTexturenessThreshold
,
const
DevMem2D
&
disp
,
cudaStream_t
&
stream
);
}
}
}}
}}
...
@@ -98,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
...
@@ -98,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
return
false
;
return
false
;
}
}
static
void
stereo_bm_gpu_operator
(
GpuMat
&
minSSD
,
GpuMat
&
leBuf
,
GpuMat
&
riBuf
,
int
preset
,
int
ndisp
,
int
winSize
,
float
avergeTexThreshold
,
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
c
onst
cudaStream_t
&
stream
)
static
void
stereo_bm_gpu_operator
(
GpuMat
&
minSSD
,
GpuMat
&
leBuf
,
GpuMat
&
riBuf
,
int
preset
,
int
ndisp
,
int
winSize
,
float
avergeTexThreshold
,
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
c
udaStream_t
stream
)
{
{
CV_DbgAssert
(
left
.
rows
==
right
.
rows
&&
left
.
cols
==
right
.
cols
);
CV_DbgAssert
(
left
.
rows
==
right
.
rows
&&
left
.
cols
==
right
.
cols
);
CV_DbgAssert
(
left
.
type
()
==
CV_8UC1
);
CV_DbgAssert
(
left
.
type
()
==
CV_8UC1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment