Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7b3bbcea
Commit
7b3bbcea
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::transpose
parent
6dbb32a0
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
127 deletions
+82
-127
core.cpp
modules/cudaarithm/src/core.cpp
+0
-46
transpose.cu
modules/cudaarithm/src/cuda/transpose.cu
+34
-64
transpose.hpp
...les/cudev/include/opencv2/cudev/grid/detail/transpose.hpp
+11
-14
transpose.hpp
modules/cudev/include/opencv2/cudev/grid/transpose.hpp
+37
-3
No files found.
modules/cudaarithm/src/core.cpp
View file @
7b3bbcea
...
...
@@ -63,52 +63,6 @@ void cv::cuda::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int,
#else
/* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// transpose
namespace
arithm
{
template
<
typename
T
>
void
transpose
(
PtrStepSz
<
T
>
src
,
PtrStepSz
<
T
>
dst
,
cudaStream_t
stream
);
}
void
cv
::
cuda
::
transpose
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
_stream
)
{
GpuMat
src
=
_src
.
getGpuMat
();
CV_Assert
(
src
.
elemSize
()
==
1
||
src
.
elemSize
()
==
4
||
src
.
elemSize
()
==
8
);
_dst
.
create
(
src
.
cols
,
src
.
rows
,
src
.
type
()
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
if
(
src
.
elemSize
()
==
1
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiTranspose_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
else
if
(
src
.
elemSize
()
==
4
)
{
arithm
::
transpose
<
int
>
(
src
,
dst
,
stream
);
}
else
// if (src.elemSize() == 8)
{
if
(
!
deviceSupports
(
NATIVE_DOUBLE
))
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"The device doesn't support double"
);
arithm
::
transpose
<
double
>
(
src
,
dst
,
stream
);
}
}
////////////////////////////////////////////////////////////////////////
// flip
...
...
modules/cudaarithm/src/cuda/transpose.cu
View file @
7b3bbcea
...
...
@@ -40,83 +40,53 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#i
nclude "opencv2/core/cuda/common.hpp"
#i
fndef HAVE_OPENCV_CUDEV
using namespace cv::cuda;
using namespace cv::cuda::device;
#error "opencv_cudev is required"
namespace arithm
{
const int TRANSPOSE_TILE_DIM = 16;
const int TRANSPOSE_BLOCK_ROWS = 16;
template <typename T>
__global__ void transposeKernel(const PtrStepSz<T> src, PtrStep<T> dst)
{
__shared__ T tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1];
#else
int blockIdx_x, blockIdx_y;
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
// do diagonal reordering
if (gridDim.x == gridDim.y)
{
blockIdx_y = blockIdx.x;
blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;
}
else
{
int bid = blockIdx.x + gridDim.x * blockIdx.y;
blockIdx_y = bid % gridDim.y;
blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
}
using namespace cv::cudev;
int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
if (xIndex < src.cols)
{
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < src.rows)
{
tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex);
}
}
}
void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src = _src.getGpuMat();
__syncthreads
();
const size_t elemSize = src.elemSize
();
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
if (xIndex < src.rows)
{
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < src.cols)
{
dst(yIndex + i, xIndex) = tile[threadIdx.x][threadIdx.y + i];
}
}
}
}
_dst.create( src.cols, src.rows, src.type() );
GpuMat dst = _dst.getGpuMat();
template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream
)
if (elemSize == 1
)
{
const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
NppStreamHandler h(StreamAccessor::getStream(stream));
transposeKernel<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
template void transpose<int>(PtrStepSz<int> src, PtrStepSz<int> dst, cudaStream_t stream);
template void transpose<double>(PtrStepSz<double> src, PtrStepSz<double> dst, cudaStream_t stream);
if (!stream)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
else if (elemSize == 4)
{
gridTranspose(globPtr<int>(src), globPtr<int>(dst), stream);
}
else // if (elemSize == 8)
{
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
}
}
#endif
// CUDA_DISABLER
#endif
modules/cudev/include/opencv2/cudev/grid/detail/transpose.hpp
View file @
7b3bbcea
...
...
@@ -55,15 +55,12 @@ namespace cv { namespace cudev {
namespace
transpose_detail
{
const
int
TRANSPOSE_TILE_DIM
=
16
;
const
int
TRANSPOSE_BLOCK_ROWS
=
16
;
template
<
class
SrcPtr
,
typename
DstType
>
template
<
int
TILE_DIM
,
int
BLOCK_DIM_Y
,
class
SrcPtr
,
typename
DstType
>
__global__
void
transpose
(
const
SrcPtr
src
,
GlobPtr
<
DstType
>
dst
,
const
int
rows
,
const
int
cols
)
{
typedef
typename
PtrTraits
<
SrcPtr
>::
value_type
src_type
;
__shared__
src_type
tile
[
T
RANSPOSE_TILE_DIM
][
TRANSPOSE_
TILE_DIM
+
1
];
__shared__
src_type
tile
[
T
ILE_DIM
][
TILE_DIM
+
1
];
int
blockIdx_x
,
blockIdx_y
;
...
...
@@ -80,12 +77,12 @@ namespace transpose_detail
blockIdx_x
=
((
bid
/
gridDim
.
y
)
+
blockIdx_y
)
%
gridDim
.
x
;
}
int
xIndex
=
blockIdx_x
*
T
RANSPOSE_T
ILE_DIM
+
threadIdx
.
x
;
int
yIndex
=
blockIdx_y
*
T
RANSPOSE_T
ILE_DIM
+
threadIdx
.
y
;
int
xIndex
=
blockIdx_x
*
TILE_DIM
+
threadIdx
.
x
;
int
yIndex
=
blockIdx_y
*
TILE_DIM
+
threadIdx
.
y
;
if
(
xIndex
<
cols
)
{
for
(
int
i
=
0
;
i
<
T
RANSPOSE_TILE_DIM
;
i
+=
TRANSPOSE_BLOCK_ROWS
)
for
(
int
i
=
0
;
i
<
T
ILE_DIM
;
i
+=
BLOCK_DIM_Y
)
{
if
(
yIndex
+
i
<
rows
)
{
...
...
@@ -96,12 +93,12 @@ namespace transpose_detail
__syncthreads
();
xIndex
=
blockIdx_y
*
T
RANSPOSE_T
ILE_DIM
+
threadIdx
.
x
;
yIndex
=
blockIdx_x
*
T
RANSPOSE_T
ILE_DIM
+
threadIdx
.
y
;
xIndex
=
blockIdx_y
*
TILE_DIM
+
threadIdx
.
x
;
yIndex
=
blockIdx_x
*
TILE_DIM
+
threadIdx
.
y
;
if
(
xIndex
<
rows
)
{
for
(
int
i
=
0
;
i
<
T
RANSPOSE_TILE_DIM
;
i
+=
TRANSPOSE_BLOCK_ROWS
)
for
(
int
i
=
0
;
i
<
T
ILE_DIM
;
i
+=
BLOCK_DIM_Y
)
{
if
(
yIndex
+
i
<
cols
)
{
...
...
@@ -111,13 +108,13 @@ namespace transpose_detail
}
}
template
<
class
SrcPtr
,
typename
DstType
>
template
<
class
Policy
,
class
SrcPtr
,
typename
DstType
>
__host__
void
transpose
(
const
SrcPtr
&
src
,
const
GlobPtr
<
DstType
>&
dst
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
{
const
dim3
block
(
TRANSPOSE_TILE_DIM
,
TRANSPOSE_TILE_DIM
);
const
dim3
block
(
Policy
::
tile_dim
,
Policy
::
block_dim_y
);
const
dim3
grid
(
divUp
(
cols
,
block
.
x
),
divUp
(
rows
,
block
.
y
));
transpose
<<<
grid
,
block
,
0
,
stream
>>>
(
src
,
dst
,
rows
,
cols
);
transpose
<
Policy
::
tile_dim
,
Policy
::
block_dim_y
><
<<
grid
,
block
,
0
,
stream
>>>
(
src
,
dst
,
rows
,
cols
);
CV_CUDEV_SAFE_CALL
(
cudaGetLastError
()
);
if
(
stream
==
0
)
...
...
modules/cudev/include/opencv2/cudev/grid/transpose.hpp
View file @
7b3bbcea
...
...
@@ -49,19 +49,53 @@
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/glob.hpp"
#include "detail/transpose.hpp"
namespace
cv
{
namespace
cudev
{
template
<
class
SrcPtr
,
typename
DstType
>
__host__
void
gridTranspose
(
const
SrcPtr
&
src
,
GpuMat_
<
DstType
>&
dst
,
Stream
&
stream
=
Stream
::
Null
())
template
<
class
Policy
,
class
SrcPtr
,
typename
DstType
>
__host__
void
gridTranspose
_
(
const
SrcPtr
&
src
,
GpuMat_
<
DstType
>&
dst
,
Stream
&
stream
=
Stream
::
Null
())
{
const
int
rows
=
getRows
(
src
);
const
int
cols
=
getCols
(
src
);
dst
.
create
(
cols
,
rows
);
transpose_detail
::
transpose
(
shrinkPtr
(
src
),
shrinkPtr
(
dst
),
rows
,
cols
,
StreamAccessor
::
getStream
(
stream
));
transpose_detail
::
transpose
<
Policy
>
(
shrinkPtr
(
src
),
shrinkPtr
(
dst
),
rows
,
cols
,
StreamAccessor
::
getStream
(
stream
));
}
template
<
class
Policy
,
class
SrcPtr
,
typename
DstType
>
__host__
void
gridTranspose_
(
const
SrcPtr
&
src
,
const
GlobPtrSz
<
DstType
>&
dst
,
Stream
&
stream
=
Stream
::
Null
())
{
const
int
rows
=
getRows
(
src
);
const
int
cols
=
getCols
(
src
);
CV_Assert
(
getRows
(
dst
)
==
cols
&&
getCols
(
dst
)
==
rows
);
transpose_detail
::
transpose
<
Policy
>
(
shrinkPtr
(
src
),
shrinkPtr
(
dst
),
rows
,
cols
,
StreamAccessor
::
getStream
(
stream
));
}
// Default Policy
struct
DefaultTransposePolicy
{
enum
{
tile_dim
=
16
,
block_dim_y
=
16
};
};
template
<
class
SrcPtr
,
typename
DstType
>
__host__
void
gridTranspose
(
const
SrcPtr
&
src
,
GpuMat_
<
DstType
>&
dst
,
Stream
&
stream
=
Stream
::
Null
())
{
gridTranspose_
<
DefaultTransposePolicy
>
(
src
,
dst
,
stream
);
}
template
<
class
SrcPtr
,
typename
DstType
>
__host__
void
gridTranspose
(
const
SrcPtr
&
src
,
const
GlobPtrSz
<
DstType
>&
dst
,
Stream
&
stream
=
Stream
::
Null
())
{
gridTranspose_
<
DefaultTransposePolicy
>
(
src
,
dst
,
stream
);
}
}}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment