Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
4b5bbb77
Commit
4b5bbb77
authored
Sep 27, 2012
by
Anatoly Baksheev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fast_nlm initial version
parent
2446c932
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
879 additions
and
49 deletions
+879
-49
matrix_operations.cu
modules/core/src/cuda/matrix_operations.cu
+0
-4
gpumat.cpp
modules/core/src/gpumat.cpp
+9
-10
CMakeLists.txt
modules/gpu/CMakeLists.txt
+1
-1
image_processing.rst
modules/gpu/doc/image_processing.rst
+1
-1
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+2
-0
perf_denoising.cpp
modules/gpu/perf/perf_denoising.cpp
+48
-0
perf_precomp.hpp
modules/gpu/perf/perf_precomp.hpp
+1
-0
cascadeclassifier.cpp
modules/gpu/src/cascadeclassifier.cpp
+4
-0
nlm.cu
modules/gpu/src/cuda/nlm.cu
+297
-0
cudastream.cpp
modules/gpu/src/cudastream.cpp
+1
-1
denoising.cpp
modules/gpu/src/denoising.cpp
+230
-2
imgproc.cpp
modules/gpu/src/imgproc.cpp
+0
-25
NCV.cu
modules/gpu/src/nvidia/core/NCV.cu
+2
-2
block.hpp
modules/gpu/src/opencv2/gpu/device/block.hpp
+205
-0
precomp.cpp
modules/gpu/src/precomp.cpp
+30
-0
test_denoising.cpp
modules/gpu/test/test_denoising.cpp
+48
-3
No files found.
modules/core/src/cuda/matrix_operations.cu
View file @
4b5bbb77
...
...
@@ -40,7 +40,6 @@
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/transform.hpp"
...
...
@@ -342,5 +341,3 @@ namespace cv { namespace gpu { namespace device
# pragma clang diagnostic pop
#endif
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */
\ No newline at end of file
modules/core/src/gpumat.cpp
View file @
4b5bbb77
...
...
@@ -94,7 +94,7 @@ namespace
bool
cv
::
gpu
::
TargetArchs
::
builtWith
(
cv
::
gpu
::
FeatureSet
feature_set
)
{
#if defined
HAVE_CUDA && !defined(CUDA_DISABLER
)
#if defined
(HAVE_CUDA
)
return
::
compareToSet
(
CUDA_ARCH_FEATURES
,
feature_set
,
std
::
greater_equal
<
int
>
());
#else
(
void
)
feature_set
;
...
...
@@ -109,7 +109,7 @@ bool cv::gpu::TargetArchs::has(int major, int minor)
bool
cv
::
gpu
::
TargetArchs
::
hasPtx
(
int
major
,
int
minor
)
{
#if defined
HAVE_CUDA && !defined(CUDA_DISABLER
)
#if defined
(HAVE_CUDA
)
return
::
compareToSet
(
CUDA_ARCH_PTX
,
major
*
10
+
minor
,
std
::
equal_to
<
int
>
());
#else
(
void
)
major
;
...
...
@@ -120,7 +120,7 @@ bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
bool
cv
::
gpu
::
TargetArchs
::
hasBin
(
int
major
,
int
minor
)
{
#if defined (HAVE_CUDA)
&& !defined(CUDA_DISABLER)
#if defined (HAVE_CUDA)
return
::
compareToSet
(
CUDA_ARCH_BIN
,
major
*
10
+
minor
,
std
::
equal_to
<
int
>
());
#else
(
void
)
major
;
...
...
@@ -131,7 +131,7 @@ bool cv::gpu::TargetArchs::hasBin(int major, int minor)
bool
cv
::
gpu
::
TargetArchs
::
hasEqualOrLessPtx
(
int
major
,
int
minor
)
{
#if defined
HAVE_CUDA && !defined(CUDA_DISABLER
)
#if defined
(HAVE_CUDA
)
return
::
compareToSet
(
CUDA_ARCH_PTX
,
major
*
10
+
minor
,
std
::
less_equal
<
int
>
());
#else
...
...
@@ -149,9 +149,8 @@ bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
bool
cv
::
gpu
::
TargetArchs
::
hasEqualOrGreaterPtx
(
int
major
,
int
minor
)
{
#if defined HAVE_CUDA && !defined(CUDA_DISABLER)
return
::
compareToSet
(
CUDA_ARCH_PTX
,
major
*
10
+
minor
,
std
::
greater_equal
<
int
>
());
#if defined (HAVE_CUDA)
return
::
compareToSet
(
CUDA_ARCH_PTX
,
major
*
10
+
minor
,
std
::
greater_equal
<
int
>
());
#else
(
void
)
major
;
(
void
)
minor
;
...
...
@@ -161,7 +160,7 @@ bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
bool
cv
::
gpu
::
TargetArchs
::
hasEqualOrGreaterBin
(
int
major
,
int
minor
)
{
#if defined
HAVE_CUDA && !defined(CUDA_DISABLER
)
#if defined
(HAVE_CUDA
)
return
::
compareToSet
(
CUDA_ARCH_BIN
,
major
*
10
+
minor
,
std
::
greater_equal
<
int
>
());
#else
...
...
@@ -171,7 +170,7 @@ bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
#endif
}
#if !defined
HAVE_CUDA || defined(CUDA_DISABLER
)
#if !defined
(HAVE_CUDA
)
#define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
...
...
@@ -728,7 +727,7 @@ namespace
};
}
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#if !defined HAVE_CUDA || defined(CUDA_DISABLER
_
)
namespace
{
...
...
modules/gpu/CMakeLists.txt
View file @
4b5bbb77
...
...
@@ -3,7 +3,7 @@ if(ANDROID OR IOS)
endif
()
set
(
the_description
"GPU-accelerated Computer Vision"
)
ocv_add_module
(
gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree opencv_legacy
)
ocv_add_module
(
gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree opencv_
photo opencv_
legacy
)
ocv_module_include_directories
(
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/cuda"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/../highgui/src"
)
...
...
modules/gpu/doc/image_processing.rst
View file @
4b5bbb77
...
...
@@ -851,7 +851,7 @@ Performs pure non local means denoising without any simplification, and thus it
.. ocv:function:: void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_widow_size = 11, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null())
:param src: Source image. Supports only CV_8UC1, CV_8UC3.
:param src: Source image. Supports only CV_8UC1, CV_8UC
2 and CV_8UC
3.
:param dst: Destination imagwe.
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
4b5bbb77
...
...
@@ -777,6 +777,8 @@ CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size,
CV_EXPORTS
void
nonLocalMeans
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
float
h
,
int
search_widow_size
=
11
,
int
block_size
=
7
,
int
borderMode
=
BORDER_DEFAULT
,
Stream
&
s
=
Stream
::
Null
());
//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique)
CV_EXPORTS
void
fastNlMeansDenoising
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
float
h
,
int
search_radius
=
10
,
int
block_radius
=
3
,
Stream
&
s
=
Stream
::
Null
());
struct
CV_EXPORTS
CannyBuf
;
...
...
modules/gpu/perf/perf_denoising.cpp
View file @
4b5bbb77
...
...
@@ -96,3 +96,50 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
FAIL
();
}
}
//////////////////////////////////////////////////////////////////////
// fastNonLocalMeans
DEF_PARAM_TEST
(
Sz_Depth_Cn_WinSz_BlockSz
,
cv
::
Size
,
MatDepth
,
int
,
int
,
int
);
PERF_TEST_P
(
Sz_Depth_Cn_WinSz_BlockSz
,
Denoising_FastNonLocalMeans
,
Combine
(
GPU_TYPICAL_MAT_SIZES
,
Values
<
MatDepth
>
(
CV_8U
),
Values
(
1
),
Values
(
21
),
Values
(
5
,
7
)))
{
declare
.
time
(
30.0
);
cv
::
Size
size
=
GET_PARAM
(
0
);
int
depth
=
GET_PARAM
(
1
);
int
channels
=
GET_PARAM
(
2
);
int
search_widow_size
=
GET_PARAM
(
3
);
int
block_size
=
GET_PARAM
(
4
);
float
h
=
10
;
int
type
=
CV_MAKE_TYPE
(
depth
,
channels
);
cv
::
Mat
src
(
size
,
type
);
fillRandom
(
src
);
if
(
runOnGpu
)
{
cv
::
gpu
::
GpuMat
d_src
(
src
);
cv
::
gpu
::
GpuMat
d_dst
;
cv
::
gpu
::
fastNlMeansDenoising
(
d_src
,
d_dst
,
h
,
search_widow_size
/
2
,
block_size
/
2
);
TEST_CYCLE
()
{
cv
::
gpu
::
fastNlMeansDenoising
(
d_src
,
d_dst
,
h
,
search_widow_size
/
2
,
block_size
/
2
);
}
}
else
{
cv
::
Mat
dst
;
cv
::
fastNlMeansDenoising
(
src
,
dst
,
h
,
block_size
,
search_widow_size
);
TEST_CYCLE
()
{
cv
::
fastNlMeansDenoising
(
src
,
dst
,
h
,
block_size
,
search_widow_size
);
}
}
}
\ No newline at end of file
modules/gpu/perf/perf_precomp.hpp
View file @
4b5bbb77
...
...
@@ -26,6 +26,7 @@
#include "opencv2/video/video.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "opencv2/photo/photo.hpp"
#include "utility.hpp"
...
...
modules/gpu/src/cascadeclassifier.cpp
View file @
4b5bbb77
...
...
@@ -721,8 +721,12 @@ bool cv::gpu::CascadeClassifier_GPU::load(const string& filename)
return
!
this
->
empty
();
}
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined (HAVE_CUDA)
struct
RectConvert
{
Rect
operator
()(
const
NcvRect32u
&
nr
)
const
{
return
Rect
(
nr
.
x
,
nr
.
y
,
nr
.
width
,
nr
.
height
);
}
...
...
modules/gpu/src/cuda/nlm.cu
View file @
4b5bbb77
...
...
@@ -47,6 +47,7 @@
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/block.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
using namespace cv::gpu;
...
...
@@ -167,8 +168,303 @@ namespace cv { namespace gpu { namespace device
}
template void nlm_bruteforce_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t);
template void nlm_bruteforce_gpu<uchar2>(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t);
template void nlm_bruteforce_gpu<uchar3>(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t);
}
}}}
//////////////////////////////////////////////////////////////////////////////////
//// Non Local Means Denosing (fast approximate version)
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
__device__ __forceinline__ int calcDist(const uchar& a, const uchar& b) { return (a-b)*(a-b); }
__device__ __forceinline__ int calcDist(const uchar2& a, const uchar2& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y); }
__device__ __forceinline__ int calcDist(const uchar3& a, const uchar3& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y) + (a.z-b.z)*(a.z-b.z); }
template <class T> struct FastNonLocalMenas
{
enum
{
CTA_SIZE = 256,
//TILE_COLS = 256,
//TILE_ROWS = 32,
TILE_COLS = 256,
TILE_ROWS = 32,
STRIDE = CTA_SIZE
};
struct plus
{
__device__ __forceinline float operator()(float v1, float v2) const { return v1 + v2; }
};
int search_radius;
int block_radius;
int search_window;
int block_window;
float minus_h2_inv;
FastNonLocalMenas(int search_window_, int block_window_, float h) : search_radius(search_window_/2), block_radius(block_window_/2),
search_window(search_window_), block_window(block_window_), minus_h2_inv(-1.f/(h * h * VecTraits<T>::cn)) {}
PtrStep<T> src;
mutable PtrStepi buffer;
__device__ __forceinline__ void initSums_TileFistColumn(int i, int j, int* dist_sums, PtrStepi& col_dist_sums, PtrStepi& up_col_dist_sums) const
{
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
dist_sums[index] = 0;
for(int tx = 0; tx < block_window; ++tx)
col_dist_sums(tx, index) = 0;
int y = index / search_window;
int x = index - y * search_window;
int ay = i;
int ax = j;
int by = i + y - search_radius;
int bx = j + x - search_radius;
#if 1
for (int tx = -block_radius; tx <= block_radius; ++tx)
{
int col_dist_sums_tx_block_radius_index = 0;
for (int ty = -block_radius; ty <= block_radius; ++ty)
{
int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx));
dist_sums[index] += dist;
col_dist_sums_tx_block_radius_index += dist;
}
col_dist_sums(tx + block_radius, index) = col_dist_sums_tx_block_radius_index;
}
#else
for (int ty = -block_radius; ty <= block_radius; ++ty)
for (int tx = -block_radius; tx <= block_radius; ++tx)
{
int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx));
dist_sums[index] += dist;
col_dist_sums(tx + block_radius, index) += dist;
}
#endif
up_col_dist_sums(j, index) = col_dist_sums(block_window - 1, index);
}
}
__device__ __forceinline__ void shiftLeftSums_TileFirstRow(int i, int j, int first_col, int* dist_sums, PtrStepi& col_dist_sums, PtrStepi& up_col_dist_sums) const
{
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
int y = index / search_window;
int x = index - y * search_window;
int ay = i;
int ax = j + block_radius;
int by = i + y - search_radius;
int bx = j + x - search_radius + block_radius;
int col_dist_sum = 0;
for (int ty = -block_radius; ty <= block_radius; ++ty)
col_dist_sum += calcDist(src(ay + ty, ax), src(by + ty, bx));
int old_dist_sums = dist_sums[index];
int old_col_sum = col_dist_sums(first_col, index);
dist_sums[index] += col_dist_sum - old_col_sum;
col_dist_sums(first_col, index) = col_dist_sum;
up_col_dist_sums(j, index) = col_dist_sum;
}
}
__device__ __forceinline__ void shiftLeftSums_UsingUpSums(int i, int j, int first_col, int* dist_sums, PtrStepi& col_dist_sums, PtrStepi& up_col_dist_sums) const
{
int ay = i;
int ax = j + block_radius;
int start_by = i - search_radius;
int start_bx = j - search_radius + block_radius;
T a_up = src(ay - block_radius - 1, ax);
T a_down = src(ay + block_radius, ax);
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
dist_sums[index] -= col_dist_sums(first_col, index);
int y = index / search_window;
int x = index - y * search_window;
int by = start_by + y;
int bx = start_bx + x;
T b_up = src(by - block_radius - 1, bx);
T b_down = src(by + block_radius, bx);
int col_dist_sums_first_col_index = up_col_dist_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up);
col_dist_sums(first_col, index) = col_dist_sums_first_col_index;
dist_sums[index] += col_dist_sums_first_col_index;
up_col_dist_sums(j, index) = col_dist_sums_first_col_index;
}
}
__device__ __forceinline__ void convolve_search_window(int i, int j, const int* dist_sums, PtrStepi& col_dist_sums, PtrStepi& up_col_dist_sums, T& dst) const
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_type;
float weights_sum = 0;
sum_type sum = VecTraits<sum_type>::all(0);
float bw2_inv = 1.f/(block_window * block_window);
int start_x = j - search_radius;
int start_y = i - search_radius;
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
int y = index / search_window;
int x = index - y * search_window;
float avg_dist = dist_sums[index] * bw2_inv;
float weight = __expf(avg_dist * minus_h2_inv);
weights_sum += weight;
sum = sum + weight * saturate_cast<sum_type>(src(start_y + y, start_x + x));
}
volatile __shared__ float cta_buffer[CTA_SIZE];
int tid = threadIdx.x;
cta_buffer[tid] = weights_sum;
__syncthreads();
Block::reduce<CTA_SIZE>(cta_buffer, plus());
if (tid == 0)
weights_sum = cta_buffer[0];
__syncthreads();
for(int n = 0; n < VecTraits<T>::cn; ++n)
{
cta_buffer[tid] = reinterpret_cast<float*>(&sum)[n];
__syncthreads();
Block::reduce<CTA_SIZE>(cta_buffer, plus());
if (tid == 0)
reinterpret_cast<float*>(&sum)[n] = cta_buffer[0];
__syncthreads();
}
if (tid == 0)
dst = saturate_cast<T>(sum/weights_sum);
}
__device__ __forceinline__ void operator()(PtrStepSz<T>& dst) const
{
int tbx = blockIdx.x * TILE_COLS;
int tby = blockIdx.y * TILE_ROWS;
int tex = ::min(tbx + TILE_COLS, dst.cols);
int tey = ::min(tby + TILE_ROWS, dst.rows);
PtrStepi col_dist_sums;
col_dist_sums.data = buffer.ptr(dst.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window;
col_dist_sums.step = buffer.step;
PtrStepi up_col_dist_sums;
up_col_dist_sums.data = buffer.data + blockIdx.y * search_window * search_window;
up_col_dist_sums.step = buffer.step;
extern __shared__ int dist_sums[]; //search_window * search_window
int first_col = -1;
for (int i = tby; i < tey; ++i)
for (int j = tbx; j < tex; ++j)
{
__syncthreads();
if (j == tbx)
{
initSums_TileFistColumn(i, j, dist_sums, col_dist_sums, up_col_dist_sums);
first_col = 0;
}
else
{
if (i == tby)
shiftLeftSums_TileFirstRow(i, j, first_col, dist_sums, col_dist_sums, up_col_dist_sums);
else
shiftLeftSums_UsingUpSums(i, j, first_col, dist_sums, col_dist_sums, up_col_dist_sums);
first_col = (first_col + 1) % block_window;
}
__syncthreads();
convolve_search_window(i, j, dist_sums, col_dist_sums, up_col_dist_sums, dst(i, j));
}
}
};
template<typename T>
__global__ void fast_nlm_kernel(const FastNonLocalMenas<T> fnlm, PtrStepSz<T> dst) { fnlm(dst); }
void nln_fast_get_buffer_size(const PtrStepSzb& src, int search_window, int block_window, int& buffer_cols, int& buffer_rows)
{
typedef FastNonLocalMenas<uchar> FNLM;
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
buffer_cols = search_window * search_window * grid.y;
buffer_rows = src.cols + block_window * grid.x;
}
template<typename T>
void nlm_fast_gpu(const PtrStepSzb& src, PtrStepSzb dst, PtrStepi buffer,
int search_window, int block_window, float h, cudaStream_t stream)
{
typedef FastNonLocalMenas<T> FNLM;
FNLM fnlm(search_window, block_window, h);
fnlm.src = (PtrStepSz<T>)src;
fnlm.buffer = buffer;
dim3 block(FNLM::CTA_SIZE, 1);
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
int smem = search_window * search_window * sizeof(int);
fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst);
cudaSafeCall ( cudaGetLastError () );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void nlm_fast_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
template void nlm_fast_gpu<uchar2>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
template void nlm_fast_gpu<uchar3>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
}
}}}
#endif /* CUDA_DISABLER */
\ No newline at end of file
modules/gpu/src/cudastream.cpp
View file @
4b5bbb77
...
...
@@ -64,7 +64,7 @@ CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
#endif
/* !defined (HAVE_CUDA) */
#if !defined (HAVE_CUDA)
|| defined (CUDA_DISABLER)
#if !defined (HAVE_CUDA)
void
cv
::
gpu
::
Stream
::
create
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
Stream
::
release
()
{
throw_nogpu
();
}
...
...
modules/gpu/src/denoising.cpp
View file @
4b5bbb77
...
...
@@ -49,9 +49,12 @@ using namespace cv::gpu;
void
cv
::
gpu
::
bilateralFilter
(
const
GpuMat
&
,
GpuMat
&
,
int
,
float
,
float
,
int
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
nonLocalMeans
(
const
GpuMat
&
,
GpuMat
&
,
float
,
int
,
int
,
int
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
fastNlMeansDenoising
(
const
GpuMat
&
,
GpuMat
&
,
float
,
int
,
int
,
Stream
&
)
{
throw_nogpu
();
}
#else
//////////////////////////////////////////////////////////////////////////////////
//// Non Local Means Denosing (brute force)
namespace
cv
{
namespace
gpu
{
namespace
device
{
...
...
@@ -106,9 +109,9 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
using
cv
::
gpu
::
device
::
imgproc
::
nlm_bruteforce_gpu
;
typedef
void
(
*
func_t
)(
const
PtrStepSzb
&
src
,
PtrStepSzb
dst
,
int
search_radius
,
int
block_radius
,
float
h
,
int
borderMode
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
4
]
=
{
nlm_bruteforce_gpu
<
uchar
>
,
0
/*nlm_bruteforce_gpu<uchar2>*/
,
nlm_bruteforce_gpu
<
uchar3
>
,
0
/*nlm_bruteforce_gpu<uchar4>,*/
};
static
const
func_t
funcs
[
4
]
=
{
nlm_bruteforce_gpu
<
uchar
>
,
nlm_bruteforce_gpu
<
uchar2
>
,
nlm_bruteforce_gpu
<
uchar3
>
,
0
/*nlm_bruteforce_gpu<uchar4>,*/
};
CV_Assert
(
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_8UC3
);
CV_Assert
(
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_8UC
2
||
src
.
type
()
==
CV_8UC
3
);
const
func_t
func
=
funcs
[
src
.
channels
()
-
1
];
CV_Assert
(
func
!=
0
);
...
...
@@ -127,10 +130,235 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
}
//////////////////////////////////////////////////////////////////////////////////
//// Non Local Means Denosing (fast approxinate)
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
imgproc
{
void
nln_fast_get_buffer_size
(
const
PtrStepSzb
&
src
,
int
search_window
,
int
block_window
,
int
&
buffer_cols
,
int
&
buffer_rows
);
template
<
typename
T
>
void
nlm_fast_gpu
(
const
PtrStepSzb
&
src
,
PtrStepSzb
dst
,
PtrStepi
buffer
,
int
search_window
,
int
block_window
,
float
h
,
cudaStream_t
stream
);
}
}}}
//class CV_EXPORTS FastNonLocalMeansDenoising
//{
//public:
// FastNonLocalMeansDenoising(float h, int search_radius, int block_radius, const Size& image_size = Size())
// {
// if (size.area() != 0)
// allocate_buffers(image_size);
// }
// void operator()(const GpuMat& src, GpuMat& dst);
//private:
// void allocate_buffers(const Size& image_size)
// {
// col_dist_sums.create(block_window_, search_window_ * search_window_, CV_32S);
// up_col_dist_sums.create(image_size.width, search_window_ * search_window_, CV_32S);
// }
// int search_radius_;
// int block_radius;
// GpuMat col_dist_sums_;
// GpuMat up_col_dist_sums_;
//};
void
cv
::
gpu
::
fastNlMeansDenoising
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
float
h
,
int
search_radius
,
int
block_radius
,
Stream
&
s
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
CV_Assert
(
src
.
depth
()
==
CV_8U
&&
src
.
channels
()
<
4
);
GpuMat
extended_src
,
src_hdr
;
int
border_size
=
search_radius
+
block_radius
;
cv
::
gpu
::
copyMakeBorder
(
src
,
extended_src
,
border_size
,
border_size
,
border_size
,
border_size
,
cv
::
BORDER_DEFAULT
,
Scalar
(),
s
);
src_hdr
=
extended_src
(
Rect
(
Point2i
(
border_size
,
border_size
),
src
.
size
()));
using
namespace
cv
::
gpu
::
device
::
imgproc
;
typedef
void
(
*
nlm_fast_t
)(
const
PtrStepSzb
&
,
PtrStepSzb
,
PtrStepi
,
int
,
int
,
float
,
cudaStream_t
);
static
const
nlm_fast_t
funcs
[]
=
{
nlm_fast_gpu
<
uchar
>
,
nlm_fast_gpu
<
uchar2
>
,
nlm_fast_gpu
<
uchar3
>
,
0
};
int
search_window
=
2
*
search_radius
+
1
;
int
block_window
=
2
*
block_radius
+
1
;
int
bcols
,
brows
;
nln_fast_get_buffer_size
(
src_hdr
,
search_window
,
block_window
,
bcols
,
brows
);
//GpuMat col_dist_sums(block_window * gx, search_window * search_window * gy, CV_32S);
//GpuMat up_col_dist_sums(src.cols, search_window * search_window * gy, CV_32S);
GpuMat
buffer
(
brows
,
bcols
,
CV_32S
);
funcs
[
src
.
channels
()
-
1
](
src_hdr
,
dst
,
buffer
,
search_window
,
block_window
,
h
,
StreamAccessor
::
getStream
(
s
));
}
//void cv::gpu::fastNlMeansDenoisingColored( const GpuMat& src, GpuMat& dst, float h, float hForColorComponents, int templateWindowSize, int searchWindowSize)
//{
// Mat src = _src.getMat();
// _dst.create(src.size(), src.type());
// Mat dst = _dst.getMat();
// if (src.type() != CV_8UC3) {
// CV_Error(CV_StsBadArg, "Type of input image should be CV_8UC3!");
// return;
// }
// Mat src_lab;
// cvtColor(src, src_lab, CV_LBGR2Lab);
// Mat l(src.size(), CV_8U);
// Mat ab(src.size(), CV_8UC2);
// Mat l_ab[] = { l, ab };
// int from_to[] = { 0,0, 1,1, 2,2 };
// mixChannels(&src_lab, 1, l_ab, 2, from_to, 3);
// fastNlMeansDenoising(l, l, h, templateWindowSize, searchWindowSize);
// fastNlMeansDenoising(ab, ab, hForColorComponents, templateWindowSize, searchWindowSize);
// Mat l_ab_denoised[] = { l, ab };
// Mat dst_lab(src.size(), src.type());
// mixChannels(l_ab_denoised, 2, &dst_lab, 1, from_to, 3);
// cvtColor(dst_lab, dst, CV_Lab2LBGR);
//}
//static void fastNlMeansDenoisingMultiCheckPreconditions(
// const std::vector<Mat>& srcImgs,
// int imgToDenoiseIndex, int temporalWindowSize,
// int templateWindowSize, int searchWindowSize)
//{
// int src_imgs_size = (int)srcImgs.size();
// if (src_imgs_size == 0) {
// CV_Error(CV_StsBadArg, "Input images vector should not be empty!");
// }
// if (temporalWindowSize % 2 == 0 ||
// searchWindowSize % 2 == 0 ||
// templateWindowSize % 2 == 0) {
// CV_Error(CV_StsBadArg, "All windows sizes should be odd!");
// }
// int temporalWindowHalfSize = temporalWindowSize / 2;
// if (imgToDenoiseIndex - temporalWindowHalfSize < 0 ||
// imgToDenoiseIndex + temporalWindowHalfSize >= src_imgs_size)
// {
// CV_Error(CV_StsBadArg,
// "imgToDenoiseIndex and temporalWindowSize "
// "should be choosen corresponding srcImgs size!");
// }
// for (int i = 1; i < src_imgs_size; i++) {
// if (srcImgs[0].size() != srcImgs[i].size() || srcImgs[0].type() != srcImgs[i].type()) {
// CV_Error(CV_StsBadArg, "Input images should have the same size and type!");
// }
// }
//}
//void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
// int imgToDenoiseIndex, int temporalWindowSize,
// float h, int templateWindowSize, int searchWindowSize)
//{
// vector<Mat> srcImgs;
// _srcImgs.getMatVector(srcImgs);
// fastNlMeansDenoisingMultiCheckPreconditions(
// srcImgs, imgToDenoiseIndex,
// temporalWindowSize, templateWindowSize, searchWindowSize
// );
// _dst.create(srcImgs[0].size(), srcImgs[0].type());
// Mat dst = _dst.getMat();
// switch (srcImgs[0].type()) {
// case CV_8U:
// parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
// FastNlMeansMultiDenoisingInvoker<uchar>(
// srcImgs, imgToDenoiseIndex, temporalWindowSize,
// dst, templateWindowSize, searchWindowSize, h));
// break;
// case CV_8UC2:
// parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
// FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
// srcImgs, imgToDenoiseIndex, temporalWindowSize,
// dst, templateWindowSize, searchWindowSize, h));
// break;
// case CV_8UC3:
// parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
// FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
// srcImgs, imgToDenoiseIndex, temporalWindowSize,
// dst, templateWindowSize, searchWindowSize, h));
// break;
// default:
// CV_Error(CV_StsBadArg,
// "Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported");
// }
//}
//void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
// int imgToDenoiseIndex, int temporalWindowSize,
// float h, float hForColorComponents,
// int templateWindowSize, int searchWindowSize)
//{
// vector<Mat> srcImgs;
// _srcImgs.getMatVector(srcImgs);
// fastNlMeansDenoisingMultiCheckPreconditions(
// srcImgs, imgToDenoiseIndex,
// temporalWindowSize, templateWindowSize, searchWindowSize
// );
// _dst.create(srcImgs[0].size(), srcImgs[0].type());
// Mat dst = _dst.getMat();
// int src_imgs_size = (int)srcImgs.size();
// if (srcImgs[0].type() != CV_8UC3) {
// CV_Error(CV_StsBadArg, "Type of input images should be CV_8UC3!");
// return;
// }
// int from_to[] = { 0,0, 1,1, 2,2 };
// // TODO convert only required images
// vector<Mat> src_lab(src_imgs_size);
// vector<Mat> l(src_imgs_size);
// vector<Mat> ab(src_imgs_size);
// for (int i = 0; i < src_imgs_size; i++) {
// src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
// l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
// ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
// cvtColor(srcImgs[i], src_lab[i], CV_LBGR2Lab);
// Mat l_ab[] = { l[i], ab[i] };
// mixChannels(&src_lab[i], 1, l_ab, 2, from_to, 3);
// }
// Mat dst_l;
// Mat dst_ab;
// fastNlMeansDenoisingMulti(
// l, dst_l, imgToDenoiseIndex, temporalWindowSize,
// h, templateWindowSize, searchWindowSize);
// fastNlMeansDenoisingMulti(
// ab, dst_ab, imgToDenoiseIndex, temporalWindowSize,
// hForColorComponents, templateWindowSize, searchWindowSize);
// Mat l_ab_denoised[] = { dst_l, dst_ab };
// Mat dst_lab(srcImgs[0].size(), srcImgs[0].type());
// mixChannels(l_ab_denoised, 2, &dst_lab, 1, from_to, 3);
// cvtColor(dst_lab, dst, CV_Lab2LBGR);
//}
#endif
modules/gpu/src/imgproc.cpp
View file @
4b5bbb77
...
...
@@ -1110,31 +1110,6 @@ namespace
}
}
bool
cv
::
gpu
::
tryConvertToGpuBorderType
(
int
cpuBorderType
,
int
&
gpuBorderType
)
{
switch
(
cpuBorderType
)
{
case
cv
:
:
BORDER_REFLECT101
:
gpuBorderType
=
cv
::
gpu
::
BORDER_REFLECT101_GPU
;
return
true
;
case
cv
:
:
BORDER_REPLICATE
:
gpuBorderType
=
cv
::
gpu
::
BORDER_REPLICATE_GPU
;
return
true
;
case
cv
:
:
BORDER_CONSTANT
:
gpuBorderType
=
cv
::
gpu
::
BORDER_CONSTANT_GPU
;
return
true
;
case
cv
:
:
BORDER_REFLECT
:
gpuBorderType
=
cv
::
gpu
::
BORDER_REFLECT_GPU
;
return
true
;
case
cv
:
:
BORDER_WRAP
:
gpuBorderType
=
cv
::
gpu
::
BORDER_WRAP_GPU
;
return
true
;
default
:
return
false
;
};
return
false
;
}
void
cv
::
gpu
::
cornerHarris
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
blockSize
,
int
ksize
,
double
k
,
int
borderType
)
{
GpuMat
Dx
,
Dy
;
...
...
modules/gpu/src/nvidia/core/NCV.cu
View file @
4b5bbb77
...
...
@@ -39,8 +39,6 @@
//
//M*/
#if !defined CUDA_DISABLER
#include <iostream>
#include <string>
...
...
@@ -77,6 +75,8 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
debugOutputHandler = func;
}
#if !defined CUDA_DISABLER
//==============================================================================
//
...
...
modules/gpu/src/opencv2/gpu/device/block.hpp
0 → 100644
View file @
4b5bbb77
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_BLOCK_HPP__
#define __OPENCV_GPU_DEVICE_BLOCK_HPP__
namespace
cv
{
namespace
gpu
{
namespace
device
{
struct
Block
{
static
__device__
__forceinline__
unsigned
int
id
()
{
return
blockIdx
.
x
;
}
static
__device__
__forceinline__
unsigned
int
stride
()
{
return
blockDim
.
x
*
blockDim
.
y
*
blockDim
.
z
;
}
static
__device__
__forceinline__
void
sync
()
{
__syncthreads
();
}
static
__device__
__forceinline__
int
flattenedThreadId
()
{
return
threadIdx
.
z
*
blockDim
.
x
*
blockDim
.
y
+
threadIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
}
template
<
typename
It
,
typename
T
>
static
__device__
__forceinline__
void
fill
(
It
beg
,
It
end
,
const
T
&
value
)
{
int
STRIDE
=
stride
();
It
t
=
beg
+
flattenedThreadId
();
for
(;
t
<
end
;
t
+=
STRIDE
)
*
t
=
value
;
}
template
<
typename
OutIt
,
typename
T
>
static
__device__
__forceinline__
void
yota
(
OutIt
beg
,
OutIt
end
,
T
value
)
{
int
STRIDE
=
stride
();
int
tid
=
flattenedThreadId
();
value
+=
tid
;
for
(
OutIt
t
=
beg
+
tid
;
t
<
end
;
t
+=
STRIDE
,
value
+=
STRIDE
)
*
t
=
value
;
}
template
<
typename
InIt
,
typename
OutIt
>
static
__device__
__forceinline__
void
copy
(
InIt
beg
,
InIt
end
,
OutIt
out
)
{
int
STRIDE
=
stride
();
InIt
t
=
beg
+
flattenedThreadId
();
OutIt
o
=
out
+
(
t
-
beg
);
for
(;
t
<
end
;
t
+=
STRIDE
,
o
+=
STRIDE
)
*
o
=
*
t
;
}
template
<
typename
InIt
,
typename
OutIt
,
class
UnOp
>
static
__device__
__forceinline__
void
transfrom
(
InIt
beg
,
InIt
end
,
OutIt
out
,
UnOp
op
)
{
int
STRIDE
=
stride
();
InIt
t
=
beg
+
flattenedThreadId
();
OutIt
o
=
out
+
(
t
-
beg
);
for
(;
t
<
end
;
t
+=
STRIDE
,
o
+=
STRIDE
)
*
o
=
op
(
*
t
);
}
template
<
typename
InIt1
,
typename
InIt2
,
typename
OutIt
,
class
BinOp
>
static
__device__
__forceinline__
void
transfrom
(
InIt1
beg1
,
InIt1
end1
,
InIt2
beg2
,
OutIt
out
,
BinOp
op
)
{
int
STRIDE
=
stride
();
InIt1
t1
=
beg1
+
flattenedThreadId
();
InIt2
t2
=
beg2
+
flattenedThreadId
();
OutIt
o
=
out
+
(
t1
-
beg1
);
for
(;
t1
<
end1
;
t1
+=
STRIDE
,
t2
+=
STRIDE
,
o
+=
STRIDE
)
*
o
=
op
(
*
t1
,
*
t2
);
}
template
<
int
CTA_SIZE
,
typename
T
,
class
BinOp
>
static
__device__
__forceinline__
void
reduce
(
volatile
T
*
buffer
,
BinOp
op
)
{
int
tid
=
flattenedThreadId
();
T
val
=
buffer
[
tid
];
if
(
CTA_SIZE
>=
1024
)
{
if
(
tid
<
512
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
512
]);
__syncthreads
();
}
if
(
CTA_SIZE
>=
512
)
{
if
(
tid
<
256
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
256
]);
__syncthreads
();
}
if
(
CTA_SIZE
>=
256
)
{
if
(
tid
<
128
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
128
]);
__syncthreads
();
}
if
(
CTA_SIZE
>=
128
)
{
if
(
tid
<
64
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
64
]);
__syncthreads
();
}
if
(
tid
<
32
)
{
if
(
CTA_SIZE
>=
64
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
32
]);
}
if
(
CTA_SIZE
>=
32
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
16
]);
}
if
(
CTA_SIZE
>=
16
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
8
]);
}
if
(
CTA_SIZE
>=
8
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
4
]);
}
if
(
CTA_SIZE
>=
4
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
2
]);
}
if
(
CTA_SIZE
>=
2
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
1
]);
}
}
}
template
<
int
CTA_SIZE
,
typename
T
,
class
BinOp
>
static
__device__
__forceinline__
T
reduce
(
volatile
T
*
buffer
,
T
init
,
BinOp
op
)
{
int
tid
=
flattenedThreadId
();
T
val
=
buffer
[
tid
]
=
init
;
__syncthreads
();
if
(
CTA_SIZE
>=
1024
)
{
if
(
tid
<
512
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
512
]);
__syncthreads
();
}
if
(
CTA_SIZE
>=
512
)
{
if
(
tid
<
256
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
256
]);
__syncthreads
();
}
if
(
CTA_SIZE
>=
256
)
{
if
(
tid
<
128
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
128
]);
__syncthreads
();
}
if
(
CTA_SIZE
>=
128
)
{
if
(
tid
<
64
)
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
64
]);
__syncthreads
();
}
if
(
tid
<
32
)
{
if
(
CTA_SIZE
>=
64
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
32
]);
}
if
(
CTA_SIZE
>=
32
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
16
]);
}
if
(
CTA_SIZE
>=
16
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
8
]);
}
if
(
CTA_SIZE
>=
8
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
4
]);
}
if
(
CTA_SIZE
>=
4
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
2
]);
}
if
(
CTA_SIZE
>=
2
)
{
buffer
[
tid
]
=
val
=
op
(
val
,
buffer
[
tid
+
1
]);
}
}
__syncthreads
();
return
buffer
[
0
];
}
template
<
typename
T
,
class
BinOp
>
static
__device__
__forceinline__
void
reduce_n
(
T
*
data
,
unsigned
int
n
,
BinOp
op
)
{
int
ftid
=
flattenedThreadId
();
int
sft
=
stride
();
if
(
sft
<
n
)
{
for
(
unsigned
int
i
=
sft
+
ftid
;
i
<
n
;
i
+=
sft
)
data
[
ftid
]
=
op
(
data
[
ftid
],
data
[
i
]);
__syncthreads
();
n
=
sft
;
}
while
(
n
>
1
)
{
unsigned
int
half
=
n
/
2
;
if
(
ftid
<
half
)
data
[
ftid
]
=
op
(
data
[
ftid
],
data
[
n
-
ftid
-
1
]);
__syncthreads
();
n
=
n
-
half
;
}
}
};
}}}
#endif
/* __OPENCV_GPU_DEVICE_BLOCK_HPP__ */
modules/gpu/src/precomp.cpp
View file @
4b5bbb77
...
...
@@ -41,4 +41,34 @@
#include "precomp.hpp"
bool
cv
::
gpu
::
tryConvertToGpuBorderType
(
int
cpuBorderType
,
int
&
gpuBorderType
)
{
switch
(
cpuBorderType
)
{
case
cv
:
:
BORDER_REFLECT101
:
gpuBorderType
=
cv
::
gpu
::
BORDER_REFLECT101_GPU
;
return
true
;
case
cv
:
:
BORDER_REPLICATE
:
gpuBorderType
=
cv
::
gpu
::
BORDER_REPLICATE_GPU
;
return
true
;
case
cv
:
:
BORDER_CONSTANT
:
gpuBorderType
=
cv
::
gpu
::
BORDER_CONSTANT_GPU
;
return
true
;
case
cv
:
:
BORDER_REFLECT
:
gpuBorderType
=
cv
::
gpu
::
BORDER_REFLECT_GPU
;
return
true
;
case
cv
:
:
BORDER_WRAP
:
gpuBorderType
=
cv
::
gpu
::
BORDER_WRAP_GPU
;
return
true
;
default
:
return
false
;
};
return
false
;
}
/* End of file. */
modules/gpu/test/test_denoising.cpp
View file @
4b5bbb77
...
...
@@ -96,7 +96,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Denoising, BilateralFilter, testing::Combine(
////////////////////////////////////////////////////////
// Brute Force Non local means
struct
NonLocalMeans
:
testing
::
TestWithParam
<
cv
::
gpu
::
DeviceInfo
>
struct
BruteForce
NonLocalMeans
:
testing
::
TestWithParam
<
cv
::
gpu
::
DeviceInfo
>
{
cv
::
gpu
::
DeviceInfo
devInfo
;
...
...
@@ -107,7 +107,7 @@ struct NonLocalMeans: testing::TestWithParam<cv::gpu::DeviceInfo>
}
};
TEST_P
(
NonLocalMeans
,
Regression
)
TEST_P
(
BruteForce
NonLocalMeans
,
Regression
)
{
using
cv
::
gpu
::
GpuMat
;
...
...
@@ -134,7 +134,52 @@ TEST_P(NonLocalMeans, Regression)
EXPECT_MAT_NEAR
(
gray_gold
,
dgray
,
1e-4
);
}
INSTANTIATE_TEST_CASE_P
(
GPU_Denoising
,
NonLocalMeans
,
ALL_DEVICES
);
INSTANTIATE_TEST_CASE_P
(
GPU_Denoising
,
BruteForceNonLocalMeans
,
ALL_DEVICES
);
////////////////////////////////////////////////////////
// Fast Force Non local means
struct
FastNonLocalMeans
:
testing
::
TestWithParam
<
cv
::
gpu
::
DeviceInfo
>
{
cv
::
gpu
::
DeviceInfo
devInfo
;
virtual
void
SetUp
()
{
devInfo
=
GetParam
();
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
}
};
TEST_P
(
FastNonLocalMeans
,
Regression
)
{
using
cv
::
gpu
::
GpuMat
;
cv
::
Mat
bgr
=
readImage
(
"denoising/lena_noised_gaussian_sigma=20_multi_0.png"
,
cv
::
IMREAD_COLOR
);
ASSERT_FALSE
(
bgr
.
empty
());
cv
::
Mat
gray
;
cv
::
cvtColor
(
bgr
,
gray
,
CV_BGR2GRAY
);
GpuMat
dbgr
,
dgray
;
cv
::
gpu
::
fastNlMeansDenoising
(
GpuMat
(
gray
),
dgray
,
10
);
#if 0
//dumpImage("denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));
dumpImage("denoising/fnlm_denoised_lena_gray.png", cv::Mat(dgray));
#endif
//cv::Mat bgr_gold = readImage("denoising/denoised_lena_bgr.png", cv::IMREAD_COLOR);
cv
::
Mat
gray_gold
=
readImage
(
"denoising/fnlm_denoised_lena_gray.png"
,
cv
::
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
/*bgr_gold.empty() || */
gray_gold
.
empty
());
//EXPECT_MAT_NEAR(bgr_gold, dbgr, 1e-4);
EXPECT_MAT_NEAR
(
gray_gold
,
dgray
,
1e-4
);
}
INSTANTIATE_TEST_CASE_P
(
GPU_Denoising
,
FastNonLocalMeans
,
ALL_DEVICES
);
#endif // HAVE_CUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment