Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
045a856c
Commit
045a856c
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::minMax
parent
b705e0d8
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
238 deletions
+54
-238
minmax.cu
modules/cudaarithm/src/cuda/minmax.cu
+51
-182
reductions.cpp
modules/cudaarithm/src/reductions.cpp
+0
-47
reduce.hpp
modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp
+3
-9
No files found.
modules/cudaarithm/src/cuda/minmax.cu
View file @
045a856c
...
...
@@ -40,208 +40,77 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/utility.hpp"
#ifndef HAVE_OPENCV_CUDEV
using namespace cv::cuda;
using namespace cv::cuda::device;
#error "opencv_cudev is required"
namespace minMax
{
__device__ unsigned int blocks_finished = 0;
// To avoid shared bank conflicts we convert each value into value of
// appropriate type (32 bits minimum)
template <typename T> struct MinMaxTypeTraits;
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
template <> struct MinMaxTypeTraits<schar> { typedef int best_type; };
template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
template <int BLOCK_SIZE, typename R>
struct GlobalReduce
{
static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval)
{
#if __CUDA_ARCH__ >= 200
if (tid == 0)
{
Emulation::glob::atomicMin(minval, mymin);
Emulation::glob::atomicMax(maxval, mymax);
}
#else
__shared__ bool is_last;
if (tid == 0)
{
minval[bid] = mymin;
maxval[bid] = mymax;
__threadfence();
unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
is_last = (ticket == gridDim.x * gridDim.y - 1);
}
__syncthreads();
if (is_last)
{
int idx = ::min(tid, gridDim.x * gridDim.y - 1);
mymin = minval[idx];
mymax = maxval[idx];
const minimum<R> minOp;
const maximum<R> maxOp;
device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
if (tid == 0)
{
minval[0] = mymin;
maxval[0] = mymax;
blocks_finished = 0;
}
}
#endif
}
};
template <int BLOCK_SIZE, typename T, typename R, class Mask>
__global__ void kernel(const PtrStepSz<T> src, const Mask mask, R* minval, R* maxval, const int twidth, const int theight)
{
__shared__ R sminval[BLOCK_SIZE];
__shared__ R smaxval[BLOCK_SIZE];
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
R mymin = numeric_limits<R>::max();
R mymax = -numeric_limits<R>::max();
const minimum<R> minOp;
const maximum<R> maxOp;
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
{
const T* ptr = src.ptr(y);
#else
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
{
if (mask(y, x))
{
const R srcVal = ptr[x];
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
mymin = minOp(mymin, srcVal);
mymax = maxOp(mymax, srcVal);
}
}
}
using namespace cv::cudev;
device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval);
}
const int threads_x = 32;
const int threads_y = 8;
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
namespace
{
template <typename T>
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf, double* minVal, double* maxVal)
{
block = dim3(threads_x, threads_y);
grid = dim3(divUp(cols, block.x * block.y),
divUp(rows, block.y * block.x));
typedef typename SelectIf<
TypesEquals<T, double>::value,
double,
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
>::type work_type;
grid.x = ::min(grid.x, block.x);
grid.y = ::min(grid.y, block.y);
}
GpuMat_<T> src(_src);
GpuMat_<work_type> buf(_buf);
void getBufSize(int cols, int rows, int& bufcols, int& bufrows
)
{
dim3 block, grid;
getLaunchCfg(cols, rows, block, grid
);
if (mask.empty()
)
gridFindMinMaxVal(src, buf);
else
gridFindMinMaxVal(src, buf, globPtr<uchar>(mask)
);
bufcols = grid.x * grid.y * sizeof(double);
bufrows = 2;
}
work_type data[2];
buf.download(cv::Mat(1, 2, buf.type(), data));
__global__ void setDefaultKernel(int* minval_buf, int* maxval_buf)
{
*minval_buf = numeric_limits<int>::max();
*maxval_buf = numeric_limits<int>::min();
}
__global__ void setDefaultKernel(float* minval_buf, float* maxval_buf)
{
*minval_buf = numeric_limits<float>::max();
*maxval_buf = -numeric_limits<float>::max();
}
__global__ void setDefaultKernel(double* minval_buf, double* maxval_buf)
{
*minval_buf = numeric_limits<double>::max();
*maxval_buf = -numeric_limits<double>::max();
}
if (minVal)
*minVal = data[0];
template <typename R>
void setDefault(R* minval_buf, R* maxval_buf)
{
setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf);
if (maxVal)
*maxVal = data[1];
}
}
template <typename T>
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf, double* minVal, double* maxVal);
static const func_t funcs[] =
{
typedef typename MinMaxTypeTraits<T>::best_type R;
dim3 block, grid;
getLaunchCfg(src.cols, src.rows, block, grid);
const int twidth = divUp(divUp(src.cols, grid.x), block.x);
const int theight = divUp(divUp(src.rows, grid.y), block.y);
R* minval_buf = (R*) buf.ptr(0);
R* maxval_buf = (R*) buf.ptr(1);
minMaxImpl<uchar>,
minMaxImpl<schar>,
minMaxImpl<ushort>,
minMaxImpl<short>,
minMaxImpl<int>,
minMaxImpl<float>,
minMaxImpl<double>
};
setDefault(minval_buf, maxval_buf);
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat();
if (mask.data)
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, twidth, theight);
else
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight);
CV_Assert( src.channels() == 1 );
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
cudaSafeCall( cudaGetLastError()
);
const int depth = src.depth(
);
cudaSafeCall( cudaDeviceSynchronize() );
const int work_type = depth == CV_64F ? CV_64F : depth == CV_32F ? CV_32F : CV_32S;
ensureSizeIsEnough(1, 2, work_type, buf);
R minval_, maxval_;
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
*minval = minval_;
*maxval = maxval_;
}
const func_t func = funcs[src.depth()];
template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
func(src, mask, buf, minVal, maxVal);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/reductions.cpp
View file @
045a856c
...
...
@@ -186,53 +186,6 @@ double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normT
return
retVal
;
}
////////////////////////////////////////////////////////////////////////
// minMax
namespace
minMax
{
void
getBufSize
(
int
cols
,
int
rows
,
int
&
bufcols
,
int
&
bufrows
);
template
<
typename
T
>
void
run
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
}
void
cv
::
cuda
::
minMax
(
InputArray
_src
,
double
*
minVal
,
double
*
maxVal
,
InputArray
_mask
,
GpuMat
&
buf
)
{
GpuMat
src
=
_src
.
getGpuMat
();
GpuMat
mask
=
_mask
.
getGpuMat
();
typedef
void
(
*
func_t
)(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
static
const
func_t
funcs
[]
=
{
::
minMax
::
run
<
uchar
>
,
::
minMax
::
run
<
schar
>
,
::
minMax
::
run
<
ushort
>
,
::
minMax
::
run
<
short
>
,
::
minMax
::
run
<
int
>
,
::
minMax
::
run
<
float
>
,
::
minMax
::
run
<
double
>
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
size
()
==
src
.
size
()
&&
mask
.
type
()
==
CV_8U
)
);
if
(
src
.
depth
()
==
CV_64F
)
{
if
(
!
deviceSupports
(
NATIVE_DOUBLE
))
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"The device doesn't support double"
);
}
Size
buf_size
;
::
minMax
::
getBufSize
(
src
.
cols
,
src
.
rows
,
buf_size
.
width
,
buf_size
.
height
);
ensureSizeIsEnough
(
buf_size
,
CV_8U
,
buf
);
const
func_t
func
=
funcs
[
src
.
depth
()];
double
temp1
,
temp2
;
func
(
src
,
mask
,
minVal
?
minVal
:
&
temp1
,
maxVal
?
maxVal
:
&
temp2
,
buf
);
}
////////////////////////////////////////////////////////////////////////
// minMaxLoc
...
...
modules/cudev/include/opencv2/cudev/grid/detail/reduce.hpp
View file @
045a856c
...
...
@@ -440,30 +440,24 @@ namespace grid_reduce_detail
__host__
void
minVal
(
const
SrcPtr
&
src
,
ResType
*
result
,
const
MaskPtr
&
mask
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
{
typedef
typename
PtrTraits
<
SrcPtr
>::
value_type
src_type
;
const
int
cn
=
VecTraits
<
src_type
>::
cn
;
typedef
typename
MakeVec
<
ResType
,
cn
>::
type
work_type
;
glob_reduce
<
MinMaxReductor
<
minop
<
work_type
>
,
src_type
,
work_t
ype
>
,
Policy
>
(
src
,
result
,
mask
,
rows
,
cols
,
stream
);
glob_reduce
<
MinMaxReductor
<
minop
<
ResType
>
,
src_type
,
ResT
ype
>
,
Policy
>
(
src
,
result
,
mask
,
rows
,
cols
,
stream
);
}
template
<
class
Policy
,
class
SrcPtr
,
typename
ResType
,
class
MaskPtr
>
__host__
void
maxVal
(
const
SrcPtr
&
src
,
ResType
*
result
,
const
MaskPtr
&
mask
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
{
typedef
typename
PtrTraits
<
SrcPtr
>::
value_type
src_type
;
const
int
cn
=
VecTraits
<
src_type
>::
cn
;
typedef
typename
MakeVec
<
ResType
,
cn
>::
type
work_type
;
glob_reduce
<
MinMaxReductor
<
maxop
<
work_type
>
,
src_type
,
work_t
ype
>
,
Policy
>
(
src
,
result
,
mask
,
rows
,
cols
,
stream
);
glob_reduce
<
MinMaxReductor
<
maxop
<
ResType
>
,
src_type
,
ResT
ype
>
,
Policy
>
(
src
,
result
,
mask
,
rows
,
cols
,
stream
);
}
template
<
class
Policy
,
class
SrcPtr
,
typename
ResType
,
class
MaskPtr
>
__host__
void
minMaxVal
(
const
SrcPtr
&
src
,
ResType
*
result
,
const
MaskPtr
&
mask
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
{
typedef
typename
PtrTraits
<
SrcPtr
>::
value_type
src_type
;
const
int
cn
=
VecTraits
<
src_type
>::
cn
;
typedef
typename
MakeVec
<
ResType
,
cn
>::
type
work_type
;
glob_reduce
<
MinMaxReductor
<
both
,
src_type
,
work_t
ype
>
,
Policy
>
(
src
,
result
,
mask
,
rows
,
cols
,
stream
);
glob_reduce
<
MinMaxReductor
<
both
,
src_type
,
ResT
ype
>
,
Policy
>
(
src
,
result
,
mask
,
rows
,
cols
,
stream
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment