Commit 0e439762 authored by Anatoly Baksheev's avatar Anatoly Baksheev

1) more convenient naming for samples gpu

2) added mask support to device 'transform' function 
3) sample hog gpu: waitKey(1) -> waitKey(3), in other case image is not displayed.
parent 790cd2ef
...@@ -217,7 +217,7 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -217,7 +217,7 @@ namespace cv { namespace gpu { namespace mathfunc
template <typename T1, typename T2> template <typename T1, typename T2>
struct NotEqual struct NotEqual
{ {
__device__ uchar operator()(const T1& src1, const T2& src2, int, int) __device__ uchar operator()(const T1& src1, const T2& src2)
{ {
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255); return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
} }
......
...@@ -47,31 +47,49 @@ ...@@ -47,31 +47,49 @@
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T, typename D, typename UnOp> //! Mask accessor
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, UnOp op) template<class T> struct MaskReader_
{
PtrStep_<T> mask;
explicit MaskReader_(PtrStep_<T> mask): mask(mask) {}
__device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
};
//! Stub mask accessor
struct NoMask
{
__device__ bool operator()(int y, int x) const { return true; }
};
//! Transform kernels
template <typename T, typename D, typename Mask, typename UnOp>
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
{ {
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows) if (x < src.cols && y < src.rows && mask(y, x))
{ {
T src_data = src.ptr(y)[x]; T src_data = src.ptr(y)[x];
dst.ptr(y)[x] = op(src_data, x, y); dst.ptr(y)[x] = op(src_data);
} }
} }
template <typename T1, typename T2, typename D, typename BinOp>
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, BinOp op) template <typename T1, typename T2, typename D, typename Mask, typename BinOp>
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
{ {
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows) if (x < src1.cols && y < src1.rows && mask(y, x))
{ {
T1 src1_data = src1.ptr(y)[x]; T1 src1_data = src1.ptr(y)[x];
T2 src2_data = src2.ptr(y)[x]; T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data, x, y); dst.ptr(y)[x] = op(src1_data, src2_data);
} }
} }
}}} }}}
namespace cv namespace cv
...@@ -87,7 +105,7 @@ namespace cv ...@@ -87,7 +105,7 @@ namespace cv
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, op); device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
...@@ -101,7 +119,7 @@ namespace cv ...@@ -101,7 +119,7 @@ namespace cv
grid.x = divUp(src1.cols, threads.x); grid.x = divUp(src1.cols, threads.x);
grid.y = divUp(src1.rows, threads.y); grid.y = divUp(src1.rows, threads.y);
device::transform<T1, T2, D, BinOp><<<grid, threads, 0, stream>>>(src1, src2, dst, op); device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
......
...@@ -24,11 +24,11 @@ if (BUILD_EXAMPLES) ...@@ -24,11 +24,11 @@ if (BUILD_EXAMPLES)
# Define executable targets # Define executable targets
# --------------------------------------------- # ---------------------------------------------
MACRO(MY_DEFINE_EXAMPLE name srcs) MACRO(MY_DEFINE_EXAMPLE name srcs)
set(the_target "example_${name}") set(the_target "example_gpu_${name}")
add_executable(${the_target} ${srcs}) add_executable(${the_target} ${srcs})
set_target_properties(${the_target} PROPERTIES set_target_properties(${the_target} PROPERTIES
OUTPUT_NAME "${name}" OUTPUT_NAME "${name}"
PROJECT_LABEL "(EXAMPLE) ${name}") PROJECT_LABEL "(EXAMPLE_GPU) ${name}")
add_dependencies(${the_target} opencv_core opencv_flann opencv_imgproc opencv_highgui add_dependencies(${the_target} opencv_core opencv_flann opencv_imgproc opencv_highgui
opencv_ml opencv_video opencv_objdetect opencv_features2d opencv_ml opencv_video opencv_objdetect opencv_features2d
opencv_calib3d opencv_legacy opencv_contrib opencv_gpu) opencv_calib3d opencv_legacy opencv_contrib opencv_gpu)
......
...@@ -283,7 +283,7 @@ void App::RunOpencvGui() ...@@ -283,7 +283,7 @@ void App::RunOpencvGui()
// Show results // Show results
putText(img_to_show, GetPerformanceSummary(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2); putText(img_to_show, GetPerformanceSummary(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2);
imshow("opencv_gpu_hog", img_to_show); imshow("opencv_gpu_hog", img_to_show);
HandleKey((char)waitKey(1)); HandleKey((char)waitKey(3));
if (settings.src_is_video) if (settings.src_is_video)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment