Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
776c0cb0
Commit
776c0cb0
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
switched to new device layer in gpucodec module
parent
298a1d50
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
86 additions
and
88 deletions
+86
-88
CMakeLists.txt
modules/cudacodec/CMakeLists.txt
+2
-2
nv12_to_rgb.cu
modules/cudacodec/src/cuda/nv12_to_rgb.cu
+37
-18
rgb_to_yv12.cu
modules/cudacodec/src/cuda/rgb_to_yv12.cu
+41
-44
video_reader.cpp
modules/cudacodec/src/video_reader.cpp
+2
-17
video_writer.cpp
modules/cudacodec/src/video_writer.cpp
+2
-5
glob.hpp
modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
+2
-2
No files found.
modules/cudacodec/CMakeLists.txt
View file @
776c0cb0
...
...
@@ -4,9 +4,9 @@ endif()
set
(
the_description
"CUDA-accelerated Video Encoding/Decoding"
)
ocv_warnings_disable
(
CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef
-Wmissing-declarations
)
ocv_warnings_disable
(
CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef
)
ocv_add_module
(
cudacodec opencv_highgui
)
ocv_add_module
(
cudacodec opencv_highgui
OPTIONAL opencv_cudev
)
ocv_module_include_directories
()
ocv_glob_module_sources
()
...
...
modules/cudacodec/src/cuda/nv12_to_rgb.cu
View file @
776c0cb0
...
...
@@ -47,13 +47,26 @@
* source and converts to output in ARGB format
*/
#include "opencv2/
core/cuda/common
.hpp"
#include "opencv2/
opencv_modules
.hpp"
namespace cv { namespace cuda { namespace device
#ifndef HAVE_OPENCV_CUDEV
#error "opencv_cudev is required"
#else
#include "opencv2/cudev/common.hpp"
using namespace cv;
using namespace cv::cudev;
void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height);
namespace
{
__constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
__device__
static
void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
{
float luma, chromaCb, chromaCr;
...
...
@@ -76,7 +89,7 @@ namespace cv { namespace cuda { namespace device
(chromaCr * constHueColorSpaceMat[8]);
}
__device__ uint RGBA_pack_10bit(float red, float green, float blue, uint alpha)
__device__
static
uint RGBA_pack_10bit(float red, float green, float blue, uint alpha)
{
uint ARGBpixel = 0;
...
...
@@ -99,9 +112,9 @@ namespace cv { namespace cuda { namespace device
#define COLOR_COMPONENT_BIT_SIZE 10
#define COLOR_COMPONENT_MASK 0x3FF
__global__ void NV12_to_RGB(uchar* srcImage, size_t nSourcePitch,
uint* dstImage, size_t nDestPitch,
uint width, uint height)
__global__ void NV12_to_RGB(
const
uchar* srcImage, size_t nSourcePitch,
uint* dstImage, size_t nDestPitch,
uint width, uint height)
{
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
...
...
@@ -171,18 +184,24 @@ namespace cv { namespace cuda { namespace device
dstImage[y * dstImagePitch + x ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24));
dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24));
}
}
void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height)
{
// Final Stage: NV12toARGB color space conversion
NV12_to_RGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
interopFrame.cols, interopFrame.rows
);
_outFrame.create(height, width, CV_8UC4);
GpuMat outFrame = _outFrame.getGpuMat(
);
cudaSafeCall( cudaGetLastError() );
dim3 block(32, 8);
dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}}}
NV12_to_RGB<<<grid, block>>>(decodedFrame.ptr<uchar>(), decodedFrame.step,
outFrame.ptr<uint>(), outFrame.step,
width, height);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
#endif
modules/cudacodec/src/cuda/rgb_to_yv12.cu
View file @
776c0cb0
...
...
@@ -40,10 +40,21 @@
//
//M*/
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/opencv_modules.hpp"
namespace cv { namespace cuda { namespace device
#ifndef HAVE_OPENCV_CUDEV
#error "opencv_cudev is required"
#else
#include "opencv2/cudev/ptr2d/glob.hpp"
using namespace cv::cudev;
void RGB_to_YV12(const GpuMat& src, GpuMat& dst);
namespace
{
__device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y)
{
...
...
@@ -57,7 +68,7 @@ namespace cv { namespace cuda { namespace device
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
}
__global__ void Gray_to_YV12(const
PtrStepSzb src, PtrStepb
dst)
__global__ void Gray_to_YV12(const
GlobPtrSz<uchar> src, GlobPtr<uchar>
dst)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
...
...
@@ -67,9 +78,9 @@ namespace cv { namespace cuda { namespace device
// get pointers to the data
const size_t planeSize = src.rows * dst.step;
PtrStepb y_plane
(dst.data, dst.step);
PtrStepb u_plane
(y_plane.data + planeSize, dst.step / 2);
PtrStepb v_plane
(u_plane.data + (planeSize / 4), dst.step / 2);
GlobPtr<uchar> y_plane = globPtr
(dst.data, dst.step);
GlobPtr<uchar> u_plane = globPtr
(y_plane.data + planeSize, dst.step / 2);
GlobPtr<uchar> v_plane = globPtr
(u_plane.data + (planeSize / 4), dst.step / 2);
uchar pix;
uchar y_val, u_val, v_val;
...
...
@@ -94,7 +105,7 @@ namespace cv { namespace cuda { namespace device
}
template <typename T>
__global__ void RGB_to_YV12(const
PtrStepSz<T> src, PtrStepb
dst)
__global__ void RGB_to_YV12(const
GlobPtrSz<T> src, GlobPtr<uchar>
dst)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
...
...
@@ -104,9 +115,9 @@ namespace cv { namespace cuda { namespace device
// get pointers to the data
const size_t planeSize = src.rows * dst.step;
PtrStepb y_plane
(dst.data, dst.step);
PtrStepb u_plane
(y_plane.data + planeSize, dst.step / 2);
PtrStepb v_plane
(u_plane.data + (planeSize / 4), dst.step / 2);
GlobPtr<uchar> y_plane = globPtr
(dst.data, dst.step);
GlobPtr<uchar> u_plane = globPtr
(y_plane.data + planeSize, dst.step / 2);
GlobPtr<uchar> v_plane = globPtr
(u_plane.data + (planeSize / 4), dst.step / 2);
T pix;
uchar y_val, u_val, v_val;
...
...
@@ -129,42 +140,28 @@ namespace cv { namespace cuda { namespace device
u_plane(y / 2, x / 2) = u_val;
v_plane(y / 2, x / 2) = v_val;
}
}
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
Gray_to_YV12<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
void RGB_to_YV12(const GpuMat& src, GpuMat& dst)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void RGB_to_YV12_caller(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream)
switch (src.channels())
{
typedef typename TypeVec<uchar, cn>::vec_type src_t;
dim3 block(32, 8);
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
RGB_to_YV12<<<grid, block, 0, stream>>>(static_cast< PtrStepSz<src_t> >(src), dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
case 1:
Gray_to_YV12<<<grid, block>>>(globPtr<uchar>(src), globPtr<uchar>(dst));
break;
case 3:
RGB_to_YV12<<<grid, block>>>(globPtr<uchar3>(src), globPtr<uchar>(dst));
break;
case 4:
RGB_to_YV12<<<grid, block>>>(globPtr<uchar4>(src), globPtr<uchar>(dst));
break;
}
void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream)
{
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst, cudaStream_t stream);
static const func_t funcs[] =
{
0, Gray_to_YV12_caller, 0, RGB_to_YV12_caller<3>, RGB_to_YV12_caller<4>
};
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
funcs[cn](src, dst, stream);
}
}}}
#endif
modules/cudacodec/src/video_reader.cpp
View file @
776c0cb0
...
...
@@ -53,10 +53,7 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&) {
#else // HAVE_NVCUVID
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
NV12_to_RGB
(
const
PtrStepb
decodedFrame
,
PtrStepSz
<
uint
>
interopFrame
,
cudaStream_t
stream
=
0
);
}}}
void
videoDecPostProcessFrame
(
const
GpuMat
&
decodedFrame
,
OutputArray
_outFrame
,
int
width
,
int
height
);
using
namespace
cv
::
cudacodec
::
detail
;
...
...
@@ -125,18 +122,6 @@ namespace
CUvideoctxlock
m_lock
;
};
void
cudaPostProcessFrame
(
const
GpuMat
&
decodedFrame
,
OutputArray
_outFrame
,
int
width
,
int
height
)
{
using
namespace
cv
::
cuda
::
device
;
// Final Stage: NV12toARGB color space conversion
_outFrame
.
create
(
height
,
width
,
CV_8UC4
);
GpuMat
outFrame
=
_outFrame
.
getGpuMat
();
NV12_to_RGB
(
decodedFrame
,
outFrame
);
}
bool
VideoReaderImpl
::
nextFrame
(
OutputArray
frame
)
{
if
(
videoSource_
->
hasError
()
||
videoParser_
->
hasError
())
...
...
@@ -195,7 +180,7 @@ namespace
// perform post processing on the CUDA surface (performs colors space conversion and post processing)
// comment this out if we inclue the line of code seen above
cuda
PostProcessFrame
(
decodedFrame
,
frame
,
videoDecoder_
->
targetWidth
(),
videoDecoder_
->
targetHeight
());
videoDec
PostProcessFrame
(
decodedFrame
,
frame
,
videoDecoder_
->
targetWidth
(),
videoDecoder_
->
targetHeight
());
// unmap video frame
// unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding)
...
...
modules/cudacodec/src/video_writer.cpp
View file @
776c0cb0
...
...
@@ -62,10 +62,7 @@ Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, S
#else // !defined HAVE_CUDA || !defined WIN32
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
RGB_to_YV12
(
const
PtrStepSzb
src
,
int
cn
,
PtrStepSzb
dst
,
cudaStream_t
stream
=
0
);
}}}
void
RGB_to_YV12
(
const
GpuMat
&
src
,
GpuMat
&
dst
);
///////////////////////////////////////////////////////////////////////////
// VideoWriterImpl
...
...
@@ -642,7 +639,7 @@ namespace
if
(
inputFormat_
==
SF_BGR
)
{
device
::
RGB_to_YV12
(
frame
,
frame
.
channels
()
,
videoFrame_
);
RGB_to_YV12
(
frame
,
videoFrame_
);
}
else
{
...
...
modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
View file @
776c0cb0
...
...
@@ -72,7 +72,7 @@ template <typename T> struct GlobPtrSz : GlobPtr<T>
};
template
<
typename
T
>
__host__
GlobPtr
<
T
>
globPtr
(
T
*
data
,
size_t
step
)
__host__
__device__
GlobPtr
<
T
>
globPtr
(
T
*
data
,
size_t
step
)
{
GlobPtr
<
T
>
p
;
p
.
data
=
data
;
...
...
@@ -81,7 +81,7 @@ __host__ GlobPtr<T> globPtr(T* data, size_t step)
}
template
<
typename
T
>
__host__
GlobPtrSz
<
T
>
globPtr
(
T
*
data
,
size_t
step
,
int
rows
,
int
cols
)
__host__
__device__
GlobPtrSz
<
T
>
globPtr
(
T
*
data
,
size_t
step
,
int
rows
,
int
cols
)
{
GlobPtrSz
<
T
>
p
;
p
.
data
=
data
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment