Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6b6cfa89
Commit
6b6cfa89
authored
Mar 13, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Mar 13, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2382 from ilya-lavrenov:tapi_nlmeans
parents
553673ee
9b31e6cd
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
903 additions
and
303 deletions
+903
-303
perf_denoising.cpp
modules/photo/perf/opencl/perf_denoising.cpp
+4
-4
arrays.hpp
modules/photo/src/arrays.hpp
+50
-30
denoising.cpp
modules/photo/src/denoising.cpp
+29
-18
fast_nlmeans_denoising_invoker.hpp
modules/photo/src/fast_nlmeans_denoising_invoker.hpp
+95
-107
fast_nlmeans_denoising_invoker_commons.hpp
modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
+59
-13
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+162
-0
fast_nlmeans_multi_denoising_invoker.hpp
modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
+116
-131
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+291
-0
precomp.hpp
modules/photo/src/precomp.hpp
+2
-0
test_denoising.cpp
modules/photo/test/ocl/test_denoising.cpp
+95
-0
No files found.
modules/photo/perf/opencl/perf_denoising.cpp
View file @
6b6cfa89
...
@@ -26,7 +26,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscale)
...
@@ -26,7 +26,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscale)
OCL_TEST_CYCLE
()
OCL_TEST_CYCLE
()
cv
::
fastNlMeansDenoising
(
original
,
result
,
10
);
cv
::
fastNlMeansDenoising
(
original
,
result
,
10
);
SANITY_CHECK
(
result
);
SANITY_CHECK
(
result
,
1
);
}
}
OCL_PERF_TEST
(
Photo
,
DenoisingColored
)
OCL_PERF_TEST
(
Photo
,
DenoisingColored
)
...
@@ -42,10 +42,10 @@ OCL_PERF_TEST(Photo, DenoisingColored)
...
@@ -42,10 +42,10 @@ OCL_PERF_TEST(Photo, DenoisingColored)
OCL_TEST_CYCLE
()
OCL_TEST_CYCLE
()
cv
::
fastNlMeansDenoisingColored
(
original
,
result
,
10
,
10
);
cv
::
fastNlMeansDenoisingColored
(
original
,
result
,
10
,
10
);
SANITY_CHECK
(
result
);
SANITY_CHECK
(
result
,
2
);
}
}
OCL_PERF_TEST
(
Photo
,
DenoisingGrayscaleMulti
)
OCL_PERF_TEST
(
Photo
,
D
ISABLED_D
enoisingGrayscaleMulti
)
{
{
const
int
imgs_count
=
3
;
const
int
imgs_count
=
3
;
...
@@ -68,7 +68,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti)
...
@@ -68,7 +68,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti)
SANITY_CHECK
(
result
);
SANITY_CHECK
(
result
);
}
}
OCL_PERF_TEST
(
Photo
,
DenoisingColoredMulti
)
OCL_PERF_TEST
(
Photo
,
D
ISABLED_D
enoisingColoredMulti
)
{
{
const
int
imgs_count
=
3
;
const
int
imgs_count
=
3
;
...
...
modules/photo/src/arrays.hpp
View file @
6b6cfa89
...
@@ -39,10 +39,14 @@
...
@@ -39,10 +39,14 @@
//
//
//M*/
//M*/
#include "opencv2/core/base.hpp"
#ifndef __OPENCV_DENOISING_ARRAYS_HPP__
#ifndef __OPENCV_DENOISING_ARRAYS_HPP__
#define __OPENCV_DENOISING_ARRAYS_HPP__
#define __OPENCV_DENOISING_ARRAYS_HPP__
template
<
class
T
>
struct
Array2d
{
template
<
class
T
>
struct
Array2d
{
T
*
a
;
T
*
a
;
int
n1
,
n2
;
int
n1
,
n2
;
bool
needToDeallocArray
;
bool
needToDeallocArray
;
...
@@ -50,14 +54,16 @@ template <class T> struct Array2d {
...
@@ -50,14 +54,16 @@ template <class T> struct Array2d {
Array2d
(
const
Array2d
&
array2d
)
:
Array2d
(
const
Array2d
&
array2d
)
:
a
(
array2d
.
a
),
n1
(
array2d
.
n1
),
n2
(
array2d
.
n2
),
needToDeallocArray
(
false
)
a
(
array2d
.
a
),
n1
(
array2d
.
n1
),
n2
(
array2d
.
n2
),
needToDeallocArray
(
false
)
{
{
if
(
array2d
.
needToDeallocArray
)
{
if
(
array2d
.
needToDeallocArray
)
// copy constructor for self allocating arrays not supported
{
throw
new
std
::
exception
(
);
CV_Error
(
Error
::
BadDataPtr
,
"Copy constructor for self allocating arrays not supported"
);
}
}
}
}
Array2d
(
T
*
_a
,
int
_n1
,
int
_n2
)
:
Array2d
(
T
*
_a
,
int
_n1
,
int
_n2
)
:
a
(
_a
),
n1
(
_n1
),
n2
(
_n2
),
needToDeallocArray
(
false
)
{}
a
(
_a
),
n1
(
_n1
),
n2
(
_n2
),
needToDeallocArray
(
false
)
{
}
Array2d
(
int
_n1
,
int
_n2
)
:
Array2d
(
int
_n1
,
int
_n2
)
:
n1
(
_n1
),
n2
(
_n2
),
needToDeallocArray
(
true
)
n1
(
_n1
),
n2
(
_n2
),
needToDeallocArray
(
true
)
...
@@ -65,28 +71,34 @@ template <class T> struct Array2d {
...
@@ -65,28 +71,34 @@ template <class T> struct Array2d {
a
=
new
T
[
n1
*
n2
];
a
=
new
T
[
n1
*
n2
];
}
}
~
Array2d
()
{
~
Array2d
()
if
(
needToDeallocArray
)
{
{
if
(
needToDeallocArray
)
delete
[]
a
;
delete
[]
a
;
}
}
}
T
*
operator
[]
(
int
i
)
{
T
*
operator
[]
(
int
i
)
{
return
a
+
i
*
n2
;
return
a
+
i
*
n2
;
}
}
inline
T
*
row_ptr
(
int
i
)
{
inline
T
*
row_ptr
(
int
i
)
{
return
(
*
this
)[
i
];
return
(
*
this
)[
i
];
}
}
};
};
template
<
class
T
>
struct
Array3d
{
template
<
class
T
>
struct
Array3d
{
T
*
a
;
T
*
a
;
int
n1
,
n2
,
n3
;
int
n1
,
n2
,
n3
;
bool
needToDeallocArray
;
bool
needToDeallocArray
;
Array3d
(
T
*
_a
,
int
_n1
,
int
_n2
,
int
_n3
)
:
Array3d
(
T
*
_a
,
int
_n1
,
int
_n2
,
int
_n3
)
:
a
(
_a
),
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
needToDeallocArray
(
false
)
{}
a
(
_a
),
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
needToDeallocArray
(
false
)
{
}
Array3d
(
int
_n1
,
int
_n2
,
int
_n3
)
:
Array3d
(
int
_n1
,
int
_n2
,
int
_n3
)
:
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
needToDeallocArray
(
true
)
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
needToDeallocArray
(
true
)
...
@@ -94,64 +106,72 @@ template <class T> struct Array3d {
...
@@ -94,64 +106,72 @@ template <class T> struct Array3d {
a
=
new
T
[
n1
*
n2
*
n3
];
a
=
new
T
[
n1
*
n2
*
n3
];
}
}
~
Array3d
()
{
~
Array3d
()
if
(
needToDeallocArray
)
{
{
if
(
needToDeallocArray
)
delete
[]
a
;
delete
[]
a
;
}
}
}
Array2d
<
T
>
operator
[]
(
int
i
)
{
Array2d
<
T
>
operator
[]
(
int
i
)
{
Array2d
<
T
>
array2d
(
a
+
i
*
n2
*
n3
,
n2
,
n3
);
Array2d
<
T
>
array2d
(
a
+
i
*
n2
*
n3
,
n2
,
n3
);
return
array2d
;
return
array2d
;
}
}
inline
T
*
row_ptr
(
int
i1
,
int
i2
)
{
inline
T
*
row_ptr
(
int
i1
,
int
i2
)
{
return
a
+
i1
*
n2
*
n3
+
i2
*
n3
;
return
a
+
i1
*
n2
*
n3
+
i2
*
n3
;
}
}
};
};
template
<
class
T
>
struct
Array4d
{
template
<
class
T
>
struct
Array4d
{
T
*
a
;
T
*
a
;
int
n1
,
n2
,
n3
,
n4
;
int
n1
,
n2
,
n3
,
n4
;
bool
needToDeallocArray
;
bool
needToDeallocArray
;
int
steps
[
4
];
int
steps
[
4
];
void
init_steps
()
{
void
init_steps
()
{
steps
[
0
]
=
n2
*
n3
*
n4
;
steps
[
0
]
=
n2
*
n3
*
n4
;
steps
[
1
]
=
n3
*
n4
;
steps
[
1
]
=
n3
*
n4
;
steps
[
2
]
=
n4
;
steps
[
2
]
=
n4
;
steps
[
3
]
=
1
;
steps
[
3
]
=
1
;
}
}
Array4d
(
T
*
_a
,
int
_n1
,
int
_n2
,
int
_n3
,
int
_n4
)
:
Array4d
(
T
*
_a
,
int
_n1
,
int
_n2
,
int
_n3
,
int
_n4
)
:
a
(
_a
),
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
n4
(
_n4
),
needToDeallocArray
(
false
)
a
(
_a
),
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
n4
(
_n4
),
needToDeallocArray
(
false
)
{
{
init_steps
();
init_steps
();
}
}
Array4d
(
int
_n1
,
int
_n2
,
int
_n3
,
int
_n4
)
:
Array4d
(
int
_n1
,
int
_n2
,
int
_n3
,
int
_n4
)
:
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
n4
(
_n4
),
needToDeallocArray
(
true
)
n1
(
_n1
),
n2
(
_n2
),
n3
(
_n3
),
n4
(
_n4
),
needToDeallocArray
(
true
)
{
{
a
=
new
T
[
n1
*
n2
*
n3
*
n4
];
a
=
new
T
[
n1
*
n2
*
n3
*
n4
];
init_steps
();
init_steps
();
}
}
~
Array4d
()
{
~
Array4d
()
if
(
needToDeallocArray
)
{
{
if
(
needToDeallocArray
)
delete
[]
a
;
delete
[]
a
;
}
}
}
Array3d
<
T
>
operator
[]
(
int
i
)
{
Array3d
<
T
>
operator
[]
(
int
i
)
{
Array3d
<
T
>
array3d
(
a
+
i
*
n2
*
n3
*
n4
,
n2
,
n3
,
n4
);
Array3d
<
T
>
array3d
(
a
+
i
*
n2
*
n3
*
n4
,
n2
,
n3
,
n4
);
return
array3d
;
return
array3d
;
}
}
inline
T
*
row_ptr
(
int
i1
,
int
i2
,
int
i3
)
{
inline
T
*
row_ptr
(
int
i1
,
int
i2
,
int
i3
)
{
return
a
+
i1
*
n2
*
n3
*
n4
+
i2
*
n3
*
n4
+
i3
*
n4
;
return
a
+
i1
*
n2
*
n3
*
n4
+
i2
*
n3
*
n4
+
i3
*
n4
;
}
}
inline
int
step_size
(
int
dimension
)
{
inline
int
step_size
(
int
dimension
)
{
return
steps
[
dimension
];
return
steps
[
dimension
];
}
}
};
};
...
...
modules/photo/src/denoising.cpp
View file @
6b6cfa89
...
@@ -40,14 +40,17 @@
...
@@ -40,14 +40,17 @@
//M*/
//M*/
#include "precomp.hpp"
#include "precomp.hpp"
#include "opencv2/photo.hpp"
#include "opencv2/imgproc.hpp"
#include "fast_nlmeans_denoising_invoker.hpp"
#include "fast_nlmeans_denoising_invoker.hpp"
#include "fast_nlmeans_multi_denoising_invoker.hpp"
#include "fast_nlmeans_multi_denoising_invoker.hpp"
#include "fast_nlmeans_denoising_opencl.hpp"
void
cv
::
fastNlMeansDenoising
(
InputArray
_src
,
OutputArray
_dst
,
float
h
,
void
cv
::
fastNlMeansDenoising
(
InputArray
_src
,
OutputArray
_dst
,
float
h
,
int
templateWindowSize
,
int
searchWindowSize
)
int
templateWindowSize
,
int
searchWindowSize
)
{
{
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
(
_src
.
isUMat
()
||
_dst
.
isUMat
()),
ocl_fastNlMeansDenoising
(
_src
,
_dst
,
h
,
templateWindowSize
,
searchWindowSize
))
Mat
src
=
_src
.
getMat
();
Mat
src
=
_src
.
getMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
());
_dst
.
create
(
src
.
size
(),
src
.
type
());
Mat
dst
=
_dst
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
...
@@ -83,15 +86,20 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
...
@@ -83,15 +86,20 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
float
h
,
float
hForColorComponents
,
float
h
,
float
hForColorComponents
,
int
templateWindowSize
,
int
searchWindowSize
)
int
templateWindowSize
,
int
searchWindowSize
)
{
{
Mat
src
=
_src
.
getMat
();
if
(
_src
.
type
()
!=
CV_8UC3
)
_dst
.
create
(
src
.
size
(),
src
.
type
());
{
Mat
dst
=
_dst
.
getMat
();
if
(
src
.
type
()
!=
CV_8UC3
)
{
CV_Error
(
Error
::
StsBadArg
,
"Type of input image should be CV_8UC3!"
);
CV_Error
(
Error
::
StsBadArg
,
"Type of input image should be CV_8UC3!"
);
return
;
return
;
}
}
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
(
_dst
.
isUMat
()
||
_src
.
isUMat
()),
ocl_fastNlMeansDenoisingColored
(
_src
,
_dst
,
h
,
hForColorComponents
,
templateWindowSize
,
searchWindowSize
))
Mat
src
=
_src
.
getMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
());
Mat
dst
=
_dst
.
getMat
();
Mat
src_lab
;
Mat
src_lab
;
cvtColor
(
src
,
src_lab
,
COLOR_LBGR2Lab
);
cvtColor
(
src
,
src_lab
,
COLOR_LBGR2Lab
);
...
@@ -117,7 +125,8 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
...
@@ -117,7 +125,8 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
int
templateWindowSize
,
int
searchWindowSize
)
int
templateWindowSize
,
int
searchWindowSize
)
{
{
int
src_imgs_size
=
static_cast
<
int
>
(
srcImgs
.
size
());
int
src_imgs_size
=
static_cast
<
int
>
(
srcImgs
.
size
());
if
(
src_imgs_size
==
0
)
{
if
(
src_imgs_size
==
0
)
{
CV_Error
(
Error
::
StsBadArg
,
"Input images vector should not be empty!"
);
CV_Error
(
Error
::
StsBadArg
,
"Input images vector should not be empty!"
);
}
}
...
@@ -136,11 +145,11 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
...
@@ -136,11 +145,11 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
"should be chosen corresponding srcImgs size!"
);
"should be chosen corresponding srcImgs size!"
);
}
}
for
(
int
i
=
1
;
i
<
src_imgs_size
;
i
++
)
{
for
(
int
i
=
1
;
i
<
src_imgs_size
;
i
++
)
if
(
srcImgs
[
0
].
size
()
!=
srcImgs
[
i
].
size
()
||
srcImgs
[
0
].
type
()
!=
srcImgs
[
i
].
type
())
{
if
(
srcImgs
[
0
].
size
()
!=
srcImgs
[
i
].
size
()
||
srcImgs
[
0
].
type
()
!=
srcImgs
[
i
].
type
())
{
CV_Error
(
Error
::
StsBadArg
,
"Input images should have the same size and type!"
);
CV_Error
(
Error
::
StsBadArg
,
"Input images should have the same size and type!"
);
}
}
}
}
}
void
cv
::
fastNlMeansDenoisingMulti
(
InputArrayOfArrays
_srcImgs
,
OutputArray
_dst
,
void
cv
::
fastNlMeansDenoisingMulti
(
InputArrayOfArrays
_srcImgs
,
OutputArray
_dst
,
...
@@ -152,12 +161,13 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
...
@@ -152,12 +161,13 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
fastNlMeansDenoisingMultiCheckPreconditions
(
fastNlMeansDenoisingMultiCheckPreconditions
(
srcImgs
,
imgToDenoiseIndex
,
srcImgs
,
imgToDenoiseIndex
,
temporalWindowSize
,
templateWindowSize
,
searchWindowSize
temporalWindowSize
,
templateWindowSize
,
searchWindowSize
);
);
_dst
.
create
(
srcImgs
[
0
].
size
(),
srcImgs
[
0
].
type
());
_dst
.
create
(
srcImgs
[
0
].
size
(),
srcImgs
[
0
].
type
());
Mat
dst
=
_dst
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
switch
(
srcImgs
[
0
].
type
())
{
switch
(
srcImgs
[
0
].
type
())
{
case
CV_8U
:
case
CV_8U
:
parallel_for_
(
cv
::
Range
(
0
,
srcImgs
[
0
].
rows
),
parallel_for_
(
cv
::
Range
(
0
,
srcImgs
[
0
].
rows
),
FastNlMeansMultiDenoisingInvoker
<
uchar
>
(
FastNlMeansMultiDenoisingInvoker
<
uchar
>
(
...
@@ -192,15 +202,15 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
...
@@ -192,15 +202,15 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
fastNlMeansDenoisingMultiCheckPreconditions
(
fastNlMeansDenoisingMultiCheckPreconditions
(
srcImgs
,
imgToDenoiseIndex
,
srcImgs
,
imgToDenoiseIndex
,
temporalWindowSize
,
templateWindowSize
,
searchWindowSize
temporalWindowSize
,
templateWindowSize
,
searchWindowSize
);
);
_dst
.
create
(
srcImgs
[
0
].
size
(),
srcImgs
[
0
].
type
());
_dst
.
create
(
srcImgs
[
0
].
size
(),
srcImgs
[
0
].
type
());
Mat
dst
=
_dst
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
int
src_imgs_size
=
static_cast
<
int
>
(
srcImgs
.
size
());
int
src_imgs_size
=
static_cast
<
int
>
(
srcImgs
.
size
());
if
(
srcImgs
[
0
].
type
()
!=
CV_8UC3
)
{
if
(
srcImgs
[
0
].
type
()
!=
CV_8UC3
)
{
CV_Error
(
Error
::
StsBadArg
,
"Type of input images should be CV_8UC3!"
);
CV_Error
(
Error
::
StsBadArg
,
"Type of input images should be CV_8UC3!"
);
return
;
return
;
}
}
...
@@ -211,7 +221,8 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
...
@@ -211,7 +221,8 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
std
::
vector
<
Mat
>
src_lab
(
src_imgs_size
);
std
::
vector
<
Mat
>
src_lab
(
src_imgs_size
);
std
::
vector
<
Mat
>
l
(
src_imgs_size
);
std
::
vector
<
Mat
>
l
(
src_imgs_size
);
std
::
vector
<
Mat
>
ab
(
src_imgs_size
);
std
::
vector
<
Mat
>
ab
(
src_imgs_size
);
for
(
int
i
=
0
;
i
<
src_imgs_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
src_imgs_size
;
i
++
)
{
src_lab
[
i
]
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
CV_8UC3
);
src_lab
[
i
]
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
CV_8UC3
);
l
[
i
]
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
CV_8UC1
);
l
[
i
]
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
CV_8UC1
);
ab
[
i
]
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
CV_8UC2
);
ab
[
i
]
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
CV_8UC2
);
...
...
modules/photo/src/fast_nlmeans_denoising_invoker.hpp
View file @
6b6cfa89
...
@@ -51,61 +51,61 @@
...
@@ -51,61 +51,61 @@
using
namespace
cv
;
using
namespace
cv
;
template
<
typename
T
>
template
<
typename
T
>
struct
FastNlMeansDenoisingInvoker
:
ParallelLoopBody
{
struct
FastNlMeansDenoisingInvoker
:
public
:
public
ParallelLoopBody
FastNlMeansDenoisingInvoker
(
const
Mat
&
src
,
Mat
&
dst
,
{
int
template_window_size
,
int
search_window_size
,
const
float
h
);
public
:
FastNlMeansDenoisingInvoker
(
const
Mat
&
src
,
Mat
&
dst
,
int
template_window_size
,
int
search_window_size
,
const
float
h
);
void
operator
()
(
const
Range
&
range
)
const
;
void
operator
()
(
const
Range
&
range
)
const
;
private
:
private
:
void
operator
=
(
const
FastNlMeansDenoisingInvoker
&
);
void
operator
=
(
const
FastNlMeansDenoisingInvoker
&
);
const
Mat
&
src_
;
const
Mat
&
src_
;
Mat
&
dst_
;
Mat
&
dst_
;
Mat
extended_src_
;
Mat
extended_src_
;
int
border_size_
;
int
border_size_
;
int
template_window_size_
;
int
template_window_size_
;
int
search_window_size_
;
int
search_window_size_
;
int
template_window_half_size_
;
int
template_window_half_size_
;
int
search_window_half_size_
;
int
search_window_half_size_
;
int
fixed_point_mult_
;
int
fixed_point_mult_
;
int
almost_template_window_size_sq_bin_shift_
;
int
almost_template_window_size_sq_bin_shift_
;
std
::
vector
<
int
>
almost_dist2weight_
;
std
::
vector
<
int
>
almost_dist2weight_
;
void
calcDistSumsForFirstElementInRow
(
void
calcDistSumsForFirstElementInRow
(
int
i
,
int
i
,
Array2d
<
int
>&
dist_sums
,
Array2d
<
int
>&
dist_sums
,
Array3d
<
int
>&
col_dist_sums
,
Array3d
<
int
>&
col_dist_sums
,
Array3d
<
int
>&
up_col_dist_sums
)
const
;
Array3d
<
int
>&
up_col_dist_sums
)
const
;
void
calcDistSumsForElementInFirstRow
(
void
calcDistSumsForElementInFirstRow
(
int
i
,
int
i
,
int
j
,
int
first_col_num
,
int
j
,
Array2d
<
int
>&
dist_sums
,
int
first_col_num
,
Array3d
<
int
>&
col_dist_sums
,
Array2d
<
int
>&
dist_sums
,
Array3d
<
int
>&
up_col_dist_sums
)
const
;
Array3d
<
int
>&
col_dist_sums
,
Array3d
<
int
>&
up_col_dist_sums
)
const
;
};
};
inline
int
getNearestPowerOf2
(
int
value
)
inline
int
getNearestPowerOf2
(
int
value
)
{
{
int
p
=
0
;
int
p
=
0
;
while
(
1
<<
p
<
value
)
++
p
;
while
(
1
<<
p
<
value
)
++
p
;
return
p
;
return
p
;
}
}
template
<
class
T
>
template
<
class
T
>
FastNlMeansDenoisingInvoker
<
T
>::
FastNlMeansDenoisingInvoker
(
FastNlMeansDenoisingInvoker
<
T
>::
FastNlMeansDenoisingInvoker
(
const
cv
::
Mat
&
src
,
const
Mat
&
src
,
Mat
&
dst
,
cv
::
Mat
&
dst
,
int
template_window_size
,
int
template_window_size
,
int
search_window_size
,
int
search_window_size
,
const
float
h
)
:
src_
(
src
),
dst_
(
dst
)
const
float
h
)
:
src_
(
src
),
dst_
(
dst
)
{
{
CV_Assert
(
src
.
channels
()
==
sizeof
(
T
));
//T is Vec1b or Vec2b or Vec3b
CV_Assert
(
src
.
channels
()
==
sizeof
(
T
));
//T is Vec1b or Vec2b or Vec3b
...
@@ -115,26 +115,25 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
...
@@ -115,26 +115,25 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
search_window_size_
=
search_window_half_size_
*
2
+
1
;
search_window_size_
=
search_window_half_size_
*
2
+
1
;
border_size_
=
search_window_half_size_
+
template_window_half_size_
;
border_size_
=
search_window_half_size_
+
template_window_half_size_
;
copyMakeBorder
(
src_
,
extended_src_
,
copyMakeBorder
(
src_
,
extended_src_
,
border_size_
,
border_size_
,
border_size_
,
border_size_
,
BORDER_DEFAULT
);
border_size_
,
border_size_
,
border_size_
,
border_size_
,
cv
::
BORDER_DEFAULT
);
const
int
max_estimate_sum_value
=
search_window_size_
*
search_window_size_
*
255
;
const
int
max_estimate_sum_value
=
search_window_size_
*
search_window_size_
*
255
;
fixed_point_mult_
=
std
::
numeric_limits
<
int
>::
max
()
/
max_estimate_sum_value
;
fixed_point_mult_
=
std
::
numeric_limits
<
int
>::
max
()
/
max_estimate_sum_value
;
// precalc weight for every possible l2 dist between blocks
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
// additional optimization of precalced weights to replace division(averaging) by binary shift
CV_Assert
(
template_window_size_
<=
46340
);
// sqrt(INT_MAX)
CV_Assert
(
template_window_size_
<=
46340
);
// sqrt(INT_MAX)
int
template_window_size_sq
=
template_window_size_
*
template_window_size_
;
int
template_window_size_sq
=
template_window_size_
*
template_window_size_
;
almost_template_window_size_sq_bin_shift_
=
getNearestPowerOf2
(
template_window_size_sq
);
almost_template_window_size_sq_bin_shift_
=
getNearestPowerOf2
(
template_window_size_sq
);
double
almost_dist2actual_dist_multiplier
=
((
double
)(
1
<<
almost_template_window_size_sq_bin_shift_
))
/
template_window_size_sq
;
double
almost_dist2actual_dist_multiplier
=
((
double
)(
1
<<
almost_template_window_size_sq_bin_shift_
))
/
template_window_size_sq
;
int
max_dist
=
255
*
255
*
sizeof
(
T
);
int
max_dist
=
255
*
255
*
sizeof
(
T
);
int
almost_max_dist
=
(
int
)
(
max_dist
/
almost_dist2actual_dist_multiplier
+
1
);
int
almost_max_dist
=
(
int
)(
max_dist
/
almost_dist2actual_dist_multiplier
+
1
);
almost_dist2weight_
.
resize
(
almost_max_dist
);
almost_dist2weight_
.
resize
(
almost_max_dist
);
const
double
WEIGHT_THRESHOLD
=
0.001
;
const
double
WEIGHT_THRESHOLD
=
0.001
;
for
(
int
almost_dist
=
0
;
almost_dist
<
almost_max_dist
;
almost_dist
++
)
{
for
(
int
almost_dist
=
0
;
almost_dist
<
almost_max_dist
;
almost_dist
++
)
{
double
dist
=
almost_dist
*
almost_dist2actual_dist_multiplier
;
double
dist
=
almost_dist
*
almost_dist2actual_dist_multiplier
;
int
weight
=
cvRound
(
fixed_point_mult_
*
std
::
exp
(
-
dist
/
(
h
*
h
*
sizeof
(
T
))));
int
weight
=
cvRound
(
fixed_point_mult_
*
std
::
exp
(
-
dist
/
(
h
*
h
*
sizeof
(
T
))));
...
@@ -144,50 +143,56 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
...
@@ -144,50 +143,56 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
almost_dist2weight_
[
almost_dist
]
=
weight
;
almost_dist2weight_
[
almost_dist
]
=
weight
;
}
}
CV_Assert
(
almost_dist2weight_
[
0
]
==
fixed_point_mult_
);
CV_Assert
(
almost_dist2weight_
[
0
]
==
fixed_point_mult_
);
// additional optimization init end
if
(
dst_
.
empty
())
{
// additional optimization init end
if
(
dst_
.
empty
())
dst_
=
Mat
::
zeros
(
src_
.
size
(),
src_
.
type
());
dst_
=
Mat
::
zeros
(
src_
.
size
(),
src_
.
type
());
}
}
}
template
<
class
T
>
template
<
class
T
>
void
FastNlMeansDenoisingInvoker
<
T
>::
operator
()
(
const
Range
&
range
)
const
{
void
FastNlMeansDenoisingInvoker
<
T
>::
operator
()
(
const
Range
&
range
)
const
{
int
row_from
=
range
.
start
;
int
row_from
=
range
.
start
;
int
row_to
=
range
.
end
-
1
;
int
row_to
=
range
.
end
-
1
;
// sums of cols anf rows for current pixel p
Array2d
<
int
>
dist_sums
(
search_window_size_
,
search_window_size_
);
Array2d
<
int
>
dist_sums
(
search_window_size_
,
search_window_size_
);
// for lazy calc optimization
// for lazy calc optimization
(sum of cols for current pixel)
Array3d
<
int
>
col_dist_sums
(
template_window_size_
,
search_window_size_
,
search_window_size_
);
Array3d
<
int
>
col_dist_sums
(
template_window_size_
,
search_window_size_
,
search_window_size_
);
int
first_col_num
=
-
1
;
int
first_col_num
=
-
1
;
// last elements of column sum (for each element in row)
Array3d
<
int
>
up_col_dist_sums
(
src_
.
cols
,
search_window_size_
,
search_window_size_
);
Array3d
<
int
>
up_col_dist_sums
(
src_
.
cols
,
search_window_size_
,
search_window_size_
);
for
(
int
i
=
row_from
;
i
<=
row_to
;
i
++
)
{
for
(
int
i
=
row_from
;
i
<=
row_to
;
i
++
)
for
(
int
j
=
0
;
j
<
src_
.
cols
;
j
++
)
{
{
for
(
int
j
=
0
;
j
<
src_
.
cols
;
j
++
)
{
int
search_window_y
=
i
-
search_window_half_size_
;
int
search_window_y
=
i
-
search_window_half_size_
;
int
search_window_x
=
j
-
search_window_half_size_
;
int
search_window_x
=
j
-
search_window_half_size_
;
// calc dist_sums
// calc dist_sums
if
(
j
==
0
)
{
if
(
j
==
0
)
{
calcDistSumsForFirstElementInRow
(
i
,
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
calcDistSumsForFirstElementInRow
(
i
,
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
first_col_num
=
0
;
first_col_num
=
0
;
}
}
else
{
// calc cur dist_sums using previous dist_sums
else
if
(
i
==
row_from
)
{
{
// calc cur dist_sums using previous dist_sums
if
(
i
==
row_from
)
{
calcDistSumsForElementInFirstRow
(
i
,
j
,
first_col_num
,
calcDistSumsForElementInFirstRow
(
i
,
j
,
first_col_num
,
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
}
}
else
{
else
{
int
ay
=
border_size_
+
i
;
int
ay
=
border_size_
+
i
;
int
ax
=
border_size_
+
j
+
template_window_half_size_
;
int
ax
=
border_size_
+
j
+
template_window_half_size_
;
int
start_by
=
int
start_by
=
border_size_
+
i
-
search_window_half_size_
;
border_size_
+
i
-
search_window_half_size_
;
int
start_bx
=
border_size_
+
j
-
search_window_half_size_
+
template_window_half_size_
;
int
start_bx
=
border_size_
+
j
-
search_window_half_size_
+
template_window_half_size_
;
T
a_up
=
extended_src_
.
at
<
T
>
(
ay
-
template_window_half_size_
-
1
,
ax
);
T
a_up
=
extended_src_
.
at
<
T
>
(
ay
-
template_window_half_size_
-
1
,
ax
);
T
a_down
=
extended_src_
.
at
<
T
>
(
ay
+
template_window_half_size_
,
ax
);
T
a_down
=
extended_src_
.
at
<
T
>
(
ay
+
template_window_half_size_
,
ax
);
...
@@ -195,33 +200,25 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
...
@@ -195,33 +200,25 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
// copy class member to local variable for optimization
// copy class member to local variable for optimization
int
search_window_size
=
search_window_size_
;
int
search_window_size
=
search_window_size_
;
for
(
int
y
=
0
;
y
<
search_window_size
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size
;
y
++
)
int
*
dist_sums_row
=
dist_sums
.
row_ptr
(
y
);
{
int
*
dist_sums_row
=
dist_sums
.
row_ptr
(
y
);
int
*
col_dist_sums_row
=
col_dist_sums
.
row_ptr
(
first_col_num
,
y
);
int
*
col_dist_sums_row
=
col_dist_sums
.
row_ptr
(
first_col_num
,
y
);
int
*
up_col_dist_sums_row
=
up_col_dist_sums
.
row_ptr
(
j
,
y
);
int
*
up_col_dist_sums_row
=
up_col_dist_sums
.
row_ptr
(
j
,
y
);
const
T
*
b_up_ptr
=
extended_src_
.
ptr
<
T
>
(
start_by
-
template_window_half_size_
-
1
+
y
);
const
T
*
b_down_ptr
=
const
T
*
b_up_ptr
=
extended_src_
.
ptr
<
T
>
(
start_by
-
template_window_half_size_
-
1
+
y
);
extended_src_
.
ptr
<
T
>
(
start_by
+
template_window_half_size_
+
y
);
const
T
*
b_down_ptr
=
extended_src_
.
ptr
<
T
>
(
start_by
+
template_window_half_size_
+
y
);
for
(
int
x
=
0
;
x
<
search_window_size
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size
;
x
++
)
{
// remove from current pixel sum column sum with index "first_col_num"
dist_sums_row
[
x
]
-=
col_dist_sums_row
[
x
];
dist_sums_row
[
x
]
-=
col_dist_sums_row
[
x
];
col_dist_sums_row
[
x
]
=
int
bx
=
start_bx
+
x
;
up_col_dist_sums_row
[
x
]
+
col_dist_sums_row
[
x
]
=
up_col_dist_sums_row
[
x
]
+
calcUpDownDist
(
a_up
,
a_down
,
b_up_ptr
[
bx
],
b_down_ptr
[
bx
]);
calcUpDownDist
(
a_up
,
a_down
,
b_up_ptr
[
start_bx
+
x
],
b_down_ptr
[
start_bx
+
x
]
);
dist_sums_row
[
x
]
+=
col_dist_sums_row
[
x
];
dist_sums_row
[
x
]
+=
col_dist_sums_row
[
x
];
up_col_dist_sums_row
[
x
]
=
col_dist_sums_row
[
x
];
up_col_dist_sums_row
[
x
]
=
col_dist_sums_row
[
x
];
}
}
}
}
}
}
...
@@ -230,20 +227,17 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
...
@@ -230,20 +227,17 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
}
}
// calc weights
// calc weights
int
weights_sum
=
0
;
int
estimation
[
3
],
weights_sum
=
0
;
for
(
size_t
channel_num
=
0
;
channel_num
<
sizeof
(
T
);
channel_num
++
)
int
estimation
[
3
];
for
(
size_t
channel_num
=
0
;
channel_num
<
sizeof
(
T
);
channel_num
++
)
{
estimation
[
channel_num
]
=
0
;
estimation
[
channel_num
]
=
0
;
}
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
const
T
*
cur_row_ptr
=
extended_src_
.
ptr
<
T
>
(
border_size_
+
search_window_y
+
y
);
const
T
*
cur_row_ptr
=
extended_src_
.
ptr
<
T
>
(
border_size_
+
search_window_y
+
y
);
int
*
dist_sums_row
=
dist_sums
.
row_ptr
(
y
);
int
*
dist_sums_row
=
dist_sums
.
row_ptr
(
y
);
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
int
almostAvgDist
=
{
dist_sums_row
[
x
]
>>
almost_template_window_size_sq_bin_shift_
;
int
almostAvgDist
=
dist_sums_row
[
x
]
>>
almost_template_window_size_sq_bin_shift_
;
int
weight
=
almost_dist2weight_
[
almostAvgDist
];
int
weight
=
almost_dist2weight_
[
almostAvgDist
];
weights_sum
+=
weight
;
weights_sum
+=
weight
;
...
@@ -269,18 +263,19 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
...
@@ -269,18 +263,19 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
{
{
int
j
=
0
;
int
j
=
0
;
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
dist_sums
[
y
][
x
]
=
0
;
dist_sums
[
y
][
x
]
=
0
;
for
(
int
tx
=
0
;
tx
<
template_window_size_
;
tx
++
)
{
for
(
int
tx
=
0
;
tx
<
template_window_size_
;
tx
++
)
col_dist_sums
[
tx
][
y
][
x
]
=
0
;
col_dist_sums
[
tx
][
y
][
x
]
=
0
;
}
int
start_y
=
i
+
y
-
search_window_half_size_
;
int
start_y
=
i
+
y
-
search_window_half_size_
;
int
start_x
=
j
+
x
-
search_window_half_size_
;
int
start_x
=
j
+
x
-
search_window_half_size_
;
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
{
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
for
(
int
tx
=
-
template_window_half_size_
;
tx
<=
template_window_half_size_
;
tx
++
)
{
for
(
int
tx
=
-
template_window_half_size_
;
tx
<=
template_window_half_size_
;
tx
++
)
{
int
dist
=
calcDist
<
T
>
(
extended_src_
,
int
dist
=
calcDist
<
T
>
(
extended_src_
,
border_size_
+
i
+
ty
,
border_size_
+
j
+
tx
,
border_size_
+
i
+
ty
,
border_size_
+
j
+
tx
,
border_size_
+
start_y
+
ty
,
border_size_
+
start_x
+
tx
);
border_size_
+
start_y
+
ty
,
border_size_
+
start_x
+
tx
);
...
@@ -288,18 +283,14 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
...
@@ -288,18 +283,14 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
dist_sums
[
y
][
x
]
+=
dist
;
dist_sums
[
y
][
x
]
+=
dist
;
col_dist_sums
[
tx
+
template_window_half_size_
][
y
][
x
]
+=
dist
;
col_dist_sums
[
tx
+
template_window_half_size_
][
y
][
x
]
+=
dist
;
}
}
}
up_col_dist_sums
[
j
][
y
][
x
]
=
col_dist_sums
[
template_window_size_
-
1
][
y
][
x
];
up_col_dist_sums
[
j
][
y
][
x
]
=
col_dist_sums
[
template_window_size_
-
1
][
y
][
x
];
}
}
}
}
}
template
<
class
T
>
template
<
class
T
>
inline
void
FastNlMeansDenoisingInvoker
<
T
>::
calcDistSumsForElementInFirstRow
(
inline
void
FastNlMeansDenoisingInvoker
<
T
>::
calcDistSumsForElementInFirstRow
(
int
i
,
int
i
,
int
j
,
int
first_col_num
,
int
j
,
int
first_col_num
,
Array2d
<
int
>&
dist_sums
,
Array2d
<
int
>&
dist_sums
,
Array3d
<
int
>&
col_dist_sums
,
Array3d
<
int
>&
col_dist_sums
,
Array3d
<
int
>&
up_col_dist_sums
)
const
Array3d
<
int
>&
up_col_dist_sums
)
const
...
@@ -312,23 +303,20 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
...
@@ -312,23 +303,20 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
int
new_last_col_num
=
first_col_num
;
int
new_last_col_num
=
first_col_num
;
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
dist_sums
[
y
][
x
]
-=
col_dist_sums
[
first_col_num
][
y
][
x
];
dist_sums
[
y
][
x
]
-=
col_dist_sums
[
first_col_num
][
y
][
x
];
col_dist_sums
[
new_last_col_num
][
y
][
x
]
=
0
;
col_dist_sums
[
new_last_col_num
][
y
][
x
]
=
0
;
int
by
=
start_by
+
y
;
int
by
=
start_by
+
y
;
int
bx
=
start_bx
+
x
;
int
bx
=
start_bx
+
x
;
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
{
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
col_dist_sums
[
new_last_col_num
][
y
][
x
]
+=
col_dist_sums
[
new_last_col_num
][
y
][
x
]
+=
calcDist
<
T
>
(
extended_src_
,
ay
+
ty
,
ax
,
by
+
ty
,
bx
);
calcDist
<
T
>
(
extended_src_
,
ay
+
ty
,
ax
,
by
+
ty
,
bx
);
}
dist_sums
[
y
][
x
]
+=
col_dist_sums
[
new_last_col_num
][
y
][
x
];
dist_sums
[
y
][
x
]
+=
col_dist_sums
[
new_last_col_num
][
y
][
x
];
up_col_dist_sums
[
j
][
y
][
x
]
=
col_dist_sums
[
new_last_col_num
][
y
][
x
];
up_col_dist_sums
[
j
][
y
][
x
]
=
col_dist_sums
[
new_last_col_num
][
y
][
x
];
}
}
}
}
}
#endif
#endif
modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
View file @
6b6cfa89
...
@@ -46,29 +46,35 @@ using namespace cv;
...
@@ -46,29 +46,35 @@ using namespace cv;
template
<
typename
T
>
static
inline
int
calcDist
(
const
T
a
,
const
T
b
);
template
<
typename
T
>
static
inline
int
calcDist
(
const
T
a
,
const
T
b
);
template
<>
inline
int
calcDist
(
const
uchar
a
,
const
uchar
b
)
{
template
<>
inline
int
calcDist
(
const
uchar
a
,
const
uchar
b
)
{
return
(
a
-
b
)
*
(
a
-
b
);
return
(
a
-
b
)
*
(
a
-
b
);
}
}
template
<>
inline
int
calcDist
(
const
Vec2b
a
,
const
Vec2b
b
)
{
template
<>
inline
int
calcDist
(
const
Vec2b
a
,
const
Vec2b
b
)
{
return
(
a
[
0
]
-
b
[
0
])
*
(
a
[
0
]
-
b
[
0
])
+
(
a
[
1
]
-
b
[
1
])
*
(
a
[
1
]
-
b
[
1
]);
return
(
a
[
0
]
-
b
[
0
])
*
(
a
[
0
]
-
b
[
0
])
+
(
a
[
1
]
-
b
[
1
])
*
(
a
[
1
]
-
b
[
1
]);
}
}
template
<>
inline
int
calcDist
(
const
Vec3b
a
,
const
Vec3b
b
)
{
template
<>
inline
int
calcDist
(
const
Vec3b
a
,
const
Vec3b
b
)
{
return
(
a
[
0
]
-
b
[
0
])
*
(
a
[
0
]
-
b
[
0
])
+
(
a
[
1
]
-
b
[
1
])
*
(
a
[
1
]
-
b
[
1
])
+
(
a
[
2
]
-
b
[
2
])
*
(
a
[
2
]
-
b
[
2
]);
return
(
a
[
0
]
-
b
[
0
])
*
(
a
[
0
]
-
b
[
0
])
+
(
a
[
1
]
-
b
[
1
])
*
(
a
[
1
]
-
b
[
1
])
+
(
a
[
2
]
-
b
[
2
])
*
(
a
[
2
]
-
b
[
2
]);
}
}
template
<
typename
T
>
static
inline
int
calcDist
(
const
Mat
&
m
,
int
i1
,
int
j1
,
int
i2
,
int
j2
)
{
template
<
typename
T
>
static
inline
int
calcDist
(
const
Mat
&
m
,
int
i1
,
int
j1
,
int
i2
,
int
j2
)
{
const
T
a
=
m
.
at
<
T
>
(
i1
,
j1
);
const
T
a
=
m
.
at
<
T
>
(
i1
,
j1
);
const
T
b
=
m
.
at
<
T
>
(
i2
,
j2
);
const
T
b
=
m
.
at
<
T
>
(
i2
,
j2
);
return
calcDist
<
T
>
(
a
,
b
);
return
calcDist
<
T
>
(
a
,
b
);
}
}
template
<
typename
T
>
static
inline
int
calcUpDownDist
(
T
a_up
,
T
a_down
,
T
b_up
,
T
b_down
)
{
template
<
typename
T
>
static
inline
int
calcUpDownDist
(
T
a_up
,
T
a_down
,
T
b_up
,
T
b_down
)
return
calcDist
(
a_down
,
b_down
)
-
calcDist
(
a_up
,
b_up
);
{
return
calcDist
(
a_down
,
b_down
)
-
calcDist
(
a_up
,
b_up
);
}
}
template
<>
inline
int
calcUpDownDist
(
uchar
a_up
,
uchar
a_down
,
uchar
b_up
,
uchar
b_down
)
{
template
<>
inline
int
calcUpDownDist
(
uchar
a_up
,
uchar
a_down
,
uchar
b_up
,
uchar
b_down
)
{
int
A
=
a_down
-
b_down
;
int
A
=
a_down
-
b_down
;
int
B
=
a_up
-
b_up
;
int
B
=
a_up
-
b_up
;
return
(
A
-
B
)
*
(
A
+
B
);
return
(
A
-
B
)
*
(
A
+
B
);
...
@@ -76,16 +82,37 @@ template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uch
...
@@ -76,16 +82,37 @@ template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uch
template
<
typename
T
>
static
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
T
p
);
template
<
typename
T
>
static
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
T
p
);
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
uchar
p
)
{
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
uchar
p
)
{
estimation
[
0
]
+=
weight
*
p
;
estimation
[
0
]
+=
weight
*
p
;
}
}
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
Vec2b
p
)
{
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
Vec2b
p
)
{
estimation
[
0
]
+=
weight
*
p
[
0
];
estimation
[
0
]
+=
weight
*
p
[
0
];
estimation
[
1
]
+=
weight
*
p
[
1
];
estimation
[
1
]
+=
weight
*
p
[
1
];
}
}
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
Vec3b
p
)
{
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
Vec3b
p
)
{
estimation
[
0
]
+=
weight
*
p
[
0
];
estimation
[
1
]
+=
weight
*
p
[
1
];
estimation
[
2
]
+=
weight
*
p
[
2
];
}
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
int
p
)
{
estimation
[
0
]
+=
weight
*
p
;
}
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
Vec2i
p
)
{
estimation
[
0
]
+=
weight
*
p
[
0
];
estimation
[
1
]
+=
weight
*
p
[
1
];
}
template
<>
inline
void
incWithWeight
(
int
*
estimation
,
int
weight
,
Vec3i
p
)
{
estimation
[
0
]
+=
weight
*
p
[
0
];
estimation
[
0
]
+=
weight
*
p
[
0
];
estimation
[
1
]
+=
weight
*
p
[
1
];
estimation
[
1
]
+=
weight
*
p
[
1
];
estimation
[
2
]
+=
weight
*
p
[
2
];
estimation
[
2
]
+=
weight
*
p
[
2
];
...
@@ -93,18 +120,21 @@ template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) {
...
@@ -93,18 +120,21 @@ template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) {
template
<
typename
T
>
static
inline
T
saturateCastFromArray
(
int
*
estimation
);
template
<
typename
T
>
static
inline
T
saturateCastFromArray
(
int
*
estimation
);
template
<>
inline
uchar
saturateCastFromArray
(
int
*
estimation
)
{
template
<>
inline
uchar
saturateCastFromArray
(
int
*
estimation
)
{
return
saturate_cast
<
uchar
>
(
estimation
[
0
]);
return
saturate_cast
<
uchar
>
(
estimation
[
0
]);
}
}
template
<>
inline
Vec2b
saturateCastFromArray
(
int
*
estimation
)
{
template
<>
inline
Vec2b
saturateCastFromArray
(
int
*
estimation
)
{
Vec2b
res
;
Vec2b
res
;
res
[
0
]
=
saturate_cast
<
uchar
>
(
estimation
[
0
]);
res
[
0
]
=
saturate_cast
<
uchar
>
(
estimation
[
0
]);
res
[
1
]
=
saturate_cast
<
uchar
>
(
estimation
[
1
]);
res
[
1
]
=
saturate_cast
<
uchar
>
(
estimation
[
1
]);
return
res
;
return
res
;
}
}
template
<>
inline
Vec3b
saturateCastFromArray
(
int
*
estimation
)
{
template
<>
inline
Vec3b
saturateCastFromArray
(
int
*
estimation
)
{
Vec3b
res
;
Vec3b
res
;
res
[
0
]
=
saturate_cast
<
uchar
>
(
estimation
[
0
]);
res
[
0
]
=
saturate_cast
<
uchar
>
(
estimation
[
0
]);
res
[
1
]
=
saturate_cast
<
uchar
>
(
estimation
[
1
]);
res
[
1
]
=
saturate_cast
<
uchar
>
(
estimation
[
1
]);
...
@@ -112,4 +142,20 @@ template <> inline Vec3b saturateCastFromArray(int* estimation) {
...
@@ -112,4 +142,20 @@ template <> inline Vec3b saturateCastFromArray(int* estimation) {
return
res
;
return
res
;
}
}
template
<>
inline
int
saturateCastFromArray
(
int
*
estimation
)
{
return
estimation
[
0
];
}
template
<>
inline
Vec2i
saturateCastFromArray
(
int
*
estimation
)
{
estimation
[
1
]
=
0
;
return
Vec2i
(
estimation
);
}
template
<>
inline
Vec3i
saturateCastFromArray
(
int
*
estimation
)
{
return
Vec3i
(
estimation
);
}
#endif
#endif
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
0 → 100644
View file @
6b6cfa89
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#ifndef __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
#define __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#ifdef HAVE_OPENCL
namespace
cv
{
enum
{
BLOCK_ROWS
=
32
,
BLOCK_COLS
=
32
,
CTA_SIZE
=
256
};
static
int
divUp
(
int
a
,
int
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
template
<
typename
FT
>
static
bool
ocl_calcAlmostDist2Weight
(
UMat
&
almostDist2Weight
,
int
searchWindowSize
,
int
templateWindowSize
,
FT
h
,
int
cn
,
int
&
almostTemplateWindowSizeSqBinShift
)
{
const
int
maxEstimateSumValue
=
searchWindowSize
*
searchWindowSize
*
255
;
int
fixedPointMult
=
std
::
numeric_limits
<
int
>::
max
()
/
maxEstimateSumValue
;
int
depth
=
DataType
<
FT
>::
depth
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
CV_Assert
(
templateWindowSize
<=
46340
);
// sqrt(INT_MAX)
int
templateWindowSizeSq
=
templateWindowSize
*
templateWindowSize
;
almostTemplateWindowSizeSqBinShift
=
getNearestPowerOf2
(
templateWindowSizeSq
);
FT
almostDist2ActualDistMultiplier
=
(
FT
)(
1
<<
almostTemplateWindowSizeSqBinShift
)
/
templateWindowSizeSq
;
const
FT
WEIGHT_THRESHOLD
=
1e-3
f
;
int
maxDist
=
255
*
255
*
cn
;
int
almostMaxDist
=
(
int
)(
maxDist
/
almostDist2ActualDistMultiplier
+
1
);
FT
den
=
1.0
f
/
(
h
*
h
*
cn
);
almostDist2Weight
.
create
(
1
,
almostMaxDist
,
CV_32SC1
);
ocl
::
Kernel
k
(
"calcAlmostDist2Weight"
,
ocl
::
photo
::
nlmeans_oclsrc
,
format
(
"-D OP_CALC_WEIGHTS -D FT=%s%s"
,
ocl
::
typeToStr
(
depth
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
PtrWriteOnly
(
almostDist2Weight
),
almostMaxDist
,
almostDist2ActualDistMultiplier
,
fixedPointMult
,
den
,
WEIGHT_THRESHOLD
);
size_t
globalsize
[
1
]
=
{
almostMaxDist
};
return
k
.
run
(
1
,
globalsize
,
NULL
,
false
);
}
static
bool
ocl_fastNlMeansDenoising
(
InputArray
_src
,
OutputArray
_dst
,
float
h
,
int
templateWindowSize
,
int
searchWindowSize
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
Size
size
=
_src
.
size
();
if
(
type
!=
CV_8UC1
||
type
!=
CV_8UC2
||
type
!=
CV_8UC4
)
return
false
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
int
searchWindowHalfSize
=
searchWindowSize
/
2
;
templateWindowSize
=
templateWindowHalfWize
*
2
+
1
;
searchWindowSize
=
searchWindowHalfSize
*
2
+
1
;
int
nblocksx
=
divUp
(
size
.
width
,
BLOCK_COLS
),
nblocksy
=
divUp
(
size
.
height
,
BLOCK_ROWS
);
int
almostTemplateWindowSizeSqBinShift
=
-
1
;
char
cvt
[
2
][
40
];
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
CTA_SIZE
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
cn
,
CTA_SIZE
>>
1
,
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]));
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
almostDist2Weight
;
if
(
!
ocl_calcAlmostDist2Weight
<
float
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
almostTemplateWindowSizeSqBinShift
))
return
false
;
CV_Assert
(
almostTemplateWindowSizeSqBinShift
>=
0
);
UMat
srcex
;
int
borderSize
=
searchWindowHalfSize
+
templateWindowHalfWize
;
copyMakeBorder
(
_src
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
_dst
.
create
(
size
,
type
);
UMat
dst
=
_dst
.
getUMat
();
int
searchWindowSizeSq
=
searchWindowSize
*
searchWindowSize
;
Size
upColSumSize
(
size
.
width
,
searchWindowSizeSq
*
nblocksy
);
Size
colSumSize
(
nblocksx
*
templateWindowSize
,
searchWindowSizeSq
*
nblocksy
);
UMat
buffer
(
upColSumSize
+
colSumSize
,
CV_32SC
(
cn
));
srcex
=
srcex
(
Rect
(
Point
(
borderSize
,
borderSize
),
size
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
srcex
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
almostDist2Weight
),
ocl
::
KernelArg
::
PtrReadOnly
(
buffer
),
almostTemplateWindowSizeSqBinShift
);
size_t
globalsize
[
2
]
=
{
nblocksx
*
CTA_SIZE
,
nblocksy
},
localsize
[
2
]
=
{
CTA_SIZE
,
1
};
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
static
bool
ocl_fastNlMeansDenoisingColored
(
InputArray
_src
,
OutputArray
_dst
,
float
h
,
float
hForColorComponents
,
int
templateWindowSize
,
int
searchWindowSize
)
{
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
());
UMat
dst
=
_dst
.
getUMat
();
UMat
src_lab
;
cvtColor
(
src
,
src_lab
,
COLOR_LBGR2Lab
);
UMat
l
(
src
.
size
(),
CV_8U
);
UMat
ab
(
src
.
size
(),
CV_8UC2
);
std
::
vector
<
UMat
>
l_ab
(
2
),
l_ab_denoised
(
2
);
l_ab
[
0
]
=
l
;
l_ab
[
1
]
=
ab
;
l_ab_denoised
[
0
].
create
(
src
.
size
(),
CV_8U
);
l_ab_denoised
[
1
].
create
(
src
.
size
(),
CV_8UC2
);
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
std
::
vector
<
UMat
>
(
1
,
src_lab
),
l_ab
,
from_to
,
3
);
fastNlMeansDenoising
(
l_ab
[
0
],
l_ab_denoised
[
0
],
h
,
templateWindowSize
,
searchWindowSize
);
fastNlMeansDenoising
(
l_ab
[
1
],
l_ab_denoised
[
1
],
hForColorComponents
,
templateWindowSize
,
searchWindowSize
);
UMat
dst_lab
(
src
.
size
(),
src
.
type
());
mixChannels
(
l_ab_denoised
,
std
::
vector
<
UMat
>
(
1
,
dst_lab
),
from_to
,
3
);
cvtColor
(
dst_lab
,
dst
,
COLOR_Lab2LBGR
);
return
true
;
}
}
#endif
#endif
modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
View file @
6b6cfa89
...
@@ -51,51 +51,47 @@
...
@@ -51,51 +51,47 @@
using
namespace
cv
;
using
namespace
cv
;
template
<
typename
T
>
template
<
typename
T
>
struct
FastNlMeansMultiDenoisingInvoker
:
ParallelLoopBody
{
struct
FastNlMeansMultiDenoisingInvoker
:
public
:
ParallelLoopBody
FastNlMeansMultiDenoisingInvoker
(
{
const
std
::
vector
<
Mat
>&
srcImgs
,
int
imgToDenoiseIndex
,
int
temporalWindowSize
,
public
:
Mat
&
dst
,
int
template_window_size
,
int
search_window_size
,
const
float
h
);
FastNlMeansMultiDenoisingInvoker
(
const
std
::
vector
<
Mat
>&
srcImgs
,
int
imgToDenoiseIndex
,
int
temporalWindowSize
,
Mat
&
dst
,
int
template_window_size
,
void
operator
()
(
const
Range
&
range
)
const
;
int
search_window_size
,
const
float
h
);
private
:
void
operator
()
(
const
Range
&
range
)
const
;
void
operator
=
(
const
FastNlMeansMultiDenoisingInvoker
&
);
private
:
int
rows_
;
void
operator
=
(
const
FastNlMeansMultiDenoisingInvoker
&
);
int
cols_
;
int
rows_
;
Mat
&
dst_
;
int
cols_
;
std
::
vector
<
Mat
>
extended_srcs_
;
Mat
&
dst_
;
Mat
main_extended_src_
;
int
border_size_
;
std
::
vector
<
Mat
>
extended_srcs_
;
Mat
main_extended_src_
;
int
template_window_size_
;
int
border_size_
;
int
search_window_size_
;
int
temporal_window_size_
;
int
template_window_size_
;
int
search_window_size_
;
int
template_window_half_size_
;
int
temporal_window_size_
;
int
search_window_half_size_
;
int
temporal_window_half_size_
;
int
template_window_half_size_
;
int
search_window_half_size_
;
int
fixed_point_mult_
;
int
temporal_window_half_size_
;
int
almost_template_window_size_sq_bin_shift
;
std
::
vector
<
int
>
almost_dist2weight
;
int
fixed_point_mult_
;
int
almost_template_window_size_sq_bin_shift
;
void
calcDistSumsForFirstElementInRow
(
std
::
vector
<
int
>
almost_dist2weight
;
int
i
,
Array3d
<
int
>&
dist_sums
,
void
calcDistSumsForFirstElementInRow
(
int
i
,
Array3d
<
int
>&
dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
up_col_dist_sums
)
const
;
Array4d
<
int
>&
up_col_dist_sums
)
const
;
void
calcDistSumsForElementInFirstRow
(
void
calcDistSumsForElementInFirstRow
(
int
i
,
int
j
,
int
first_col_num
,
int
i
,
Array3d
<
int
>&
dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
int
j
,
Array4d
<
int
>&
up_col_dist_sums
)
const
;
int
first_col_num
,
Array3d
<
int
>&
dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
up_col_dist_sums
)
const
;
};
};
template
<
class
T
>
template
<
class
T
>
...
@@ -106,7 +102,8 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
...
@@ -106,7 +102,8 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
cv
::
Mat
&
dst
,
cv
::
Mat
&
dst
,
int
template_window_size
,
int
template_window_size
,
int
search_window_size
,
int
search_window_size
,
const
float
h
)
:
dst_
(
dst
),
extended_srcs_
(
srcImgs
.
size
())
const
float
h
)
:
dst_
(
dst
),
extended_srcs_
(
srcImgs
.
size
())
{
{
CV_Assert
(
srcImgs
.
size
()
>
0
);
CV_Assert
(
srcImgs
.
size
()
>
0
);
CV_Assert
(
srcImgs
[
0
].
channels
()
==
sizeof
(
T
));
CV_Assert
(
srcImgs
[
0
].
channels
()
==
sizeof
(
T
));
...
@@ -123,85 +120,84 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
...
@@ -123,85 +120,84 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
temporal_window_size_
=
temporal_window_half_size_
*
2
+
1
;
temporal_window_size_
=
temporal_window_half_size_
*
2
+
1
;
border_size_
=
search_window_half_size_
+
template_window_half_size_
;
border_size_
=
search_window_half_size_
+
template_window_half_size_
;
for
(
int
i
=
0
;
i
<
temporal_window_size_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
temporal_window_size_
;
i
++
)
copyMakeBorder
(
copyMakeBorder
(
srcImgs
[
imgToDenoiseIndex
-
temporal_window_half_size_
+
i
],
extended_srcs_
[
i
],
srcImgs
[
imgToDenoiseIndex
-
temporal_window_half_size_
+
i
],
extended_srcs_
[
i
],
border_size_
,
border_size_
,
border_size_
,
border_size_
,
cv
::
BORDER_DEFAULT
);
border_size_
,
border_size_
,
border_size_
,
border_size_
,
cv
::
BORDER_DEFAULT
);
}
main_extended_src_
=
extended_srcs_
[
temporal_window_half_size_
];
const
int
max_estimate_sum_value
=
temporal_window_size_
*
search_window_size_
*
search_window_size_
*
255
;
main_extended_src_
=
extended_srcs_
[
temporal_window_half_size_
];
const
int
max_estimate_sum_value
=
temporal_window_size_
*
search_window_size_
*
search_window_size_
*
255
;
fixed_point_mult_
=
std
::
numeric_limits
<
int
>::
max
()
/
max_estimate_sum_value
;
fixed_point_mult_
=
std
::
numeric_limits
<
int
>::
max
()
/
max_estimate_sum_value
;
// precalc weight for every possible l2 dist between blocks
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
// additional optimization of precalced weights to replace division(averaging) by binary shift
int
template_window_size_sq
=
template_window_size_
*
template_window_size_
;
int
template_window_size_sq
=
template_window_size_
*
template_window_size_
;
almost_template_window_size_sq_bin_shift
=
0
;
almost_template_window_size_sq_bin_shift
=
0
;
while
(
1
<<
almost_template_window_size_sq_bin_shift
<
template_window_size_sq
)
{
while
(
1
<<
almost_template_window_size_sq_bin_shift
<
template_window_size_sq
)
almost_template_window_size_sq_bin_shift
++
;
almost_template_window_size_sq_bin_shift
++
;
}
int
almost_template_window_size_sq
=
1
<<
almost_template_window_size_sq_bin_shift
;
int
almost_template_window_size_sq
=
1
<<
almost_template_window_size_sq_bin_shift
;
double
almost_dist2actual_dist_multiplier
=
double
almost_dist2actual_dist_multiplier
=
(
double
)
almost_template_window_size_sq
/
template_window_size_sq
;
((
double
)
almost_template_window_size_sq
)
/
template_window_size_sq
;
int
max_dist
=
255
*
255
*
sizeof
(
T
);
int
max_dist
=
255
*
255
*
sizeof
(
T
);
int
almost_max_dist
=
(
int
)
(
max_dist
/
almost_dist2actual_dist_multiplier
+
1
);
int
almost_max_dist
=
(
int
)
(
max_dist
/
almost_dist2actual_dist_multiplier
+
1
);
almost_dist2weight
.
resize
(
almost_max_dist
);
almost_dist2weight
.
resize
(
almost_max_dist
);
const
double
WEIGHT_THRESHOLD
=
0.001
;
const
double
WEIGHT_THRESHOLD
=
0.001
;
for
(
int
almost_dist
=
0
;
almost_dist
<
almost_max_dist
;
almost_dist
++
)
{
for
(
int
almost_dist
=
0
;
almost_dist
<
almost_max_dist
;
almost_dist
++
)
{
double
dist
=
almost_dist
*
almost_dist2actual_dist_multiplier
;
double
dist
=
almost_dist
*
almost_dist2actual_dist_multiplier
;
int
weight
=
cvRound
(
fixed_point_mult_
*
std
::
exp
(
-
dist
/
(
h
*
h
*
sizeof
(
T
))));
int
weight
=
cvRound
(
fixed_point_mult_
*
std
::
exp
(
-
dist
/
(
h
*
h
*
sizeof
(
T
))));
if
(
weight
<
WEIGHT_THRESHOLD
*
fixed_point_mult_
)
{
if
(
weight
<
WEIGHT_THRESHOLD
*
fixed_point_mult_
)
weight
=
0
;
weight
=
0
;
}
almost_dist2weight
[
almost_dist
]
=
weight
;
almost_dist2weight
[
almost_dist
]
=
weight
;
}
}
CV_Assert
(
almost_dist2weight
[
0
]
==
fixed_point_mult_
);
CV_Assert
(
almost_dist2weight
[
0
]
==
fixed_point_mult_
);
// additional optimization init end
if
(
dst_
.
empty
())
{
// additional optimization init end
if
(
dst_
.
empty
())
dst_
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
srcImgs
[
0
].
type
());
dst_
=
Mat
::
zeros
(
srcImgs
[
0
].
size
(),
srcImgs
[
0
].
type
());
}
}
}
template
<
class
T
>
template
<
class
T
>
void
FastNlMeansMultiDenoisingInvoker
<
T
>::
operator
()
(
const
Range
&
range
)
const
{
void
FastNlMeansMultiDenoisingInvoker
<
T
>::
operator
()
(
const
Range
&
range
)
const
{
int
row_from
=
range
.
start
;
int
row_from
=
range
.
start
;
int
row_to
=
range
.
end
-
1
;
int
row_to
=
range
.
end
-
1
;
Array3d
<
int
>
dist_sums
(
temporal_window_size_
,
search_window_size_
,
search_window_size_
);
Array3d
<
int
>
dist_sums
(
temporal_window_size_
,
search_window_size_
,
search_window_size_
);
// for lazy calc optimization
// for lazy calc optimization
Array4d
<
int
>
col_dist_sums
(
Array4d
<
int
>
col_dist_sums
(
template_window_size_
,
temporal_window_size_
,
search_window_size_
,
search_window_size_
);
template_window_size_
,
temporal_window_size_
,
search_window_size_
,
search_window_size_
);
int
first_col_num
=
-
1
;
int
first_col_num
=
-
1
;
Array4d
<
int
>
up_col_dist_sums
(
cols_
,
temporal_window_size_
,
search_window_size_
,
search_window_size_
);
Array4d
<
int
>
up_col_dist_sums
(
for
(
int
i
=
row_from
;
i
<=
row_to
;
i
++
)
cols_
,
temporal_window_size_
,
search_window_size_
,
search_window_size_
);
{
for
(
int
j
=
0
;
j
<
cols_
;
j
++
)
for
(
int
i
=
row_from
;
i
<=
row_to
;
i
++
)
{
{
for
(
int
j
=
0
;
j
<
cols_
;
j
++
)
{
int
search_window_y
=
i
-
search_window_half_size_
;
int
search_window_y
=
i
-
search_window_half_size_
;
int
search_window_x
=
j
-
search_window_half_size_
;
int
search_window_x
=
j
-
search_window_half_size_
;
// calc dist_sums
// calc dist_sums
if
(
j
==
0
)
{
if
(
j
==
0
)
{
calcDistSumsForFirstElementInRow
(
i
,
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
calcDistSumsForFirstElementInRow
(
i
,
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
first_col_num
=
0
;
first_col_num
=
0
;
}
}
else
{
// calc cur dist_sums using previous dist_sums
else
if
(
i
==
row_from
)
{
{
// calc cur dist_sums using previous dist_sums
if
(
i
==
row_from
)
{
calcDistSumsForElementInFirstRow
(
i
,
j
,
first_col_num
,
calcDistSumsForElementInFirstRow
(
i
,
j
,
first_col_num
,
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
dist_sums
,
col_dist_sums
,
up_col_dist_sums
);
}
else
{
}
else
{
int
ay
=
border_size_
+
i
;
int
ay
=
border_size_
+
i
;
int
ax
=
border_size_
+
j
+
template_window_half_size_
;
int
ax
=
border_size_
+
j
+
template_window_half_size_
;
...
@@ -217,36 +213,31 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
...
@@ -217,36 +213,31 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
// copy class member to local variable for optimization
// copy class member to local variable for optimization
int
search_window_size
=
search_window_size_
;
int
search_window_size
=
search_window_size_
;
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
Mat
cur_extended_src
=
extended_srcs_
[
d
];
Mat
cur_extended_src
=
extended_srcs_
[
d
];
Array2d
<
int
>
cur_dist_sums
=
dist_sums
[
d
];
Array2d
<
int
>
cur_dist_sums
=
dist_sums
[
d
];
Array2d
<
int
>
cur_col_dist_sums
=
col_dist_sums
[
first_col_num
][
d
];
Array2d
<
int
>
cur_col_dist_sums
=
col_dist_sums
[
first_col_num
][
d
];
Array2d
<
int
>
cur_up_col_dist_sums
=
up_col_dist_sums
[
j
][
d
];
Array2d
<
int
>
cur_up_col_dist_sums
=
up_col_dist_sums
[
j
][
d
];
for
(
int
y
=
0
;
y
<
search_window_size
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size
;
y
++
)
{
int
*
dist_sums_row
=
cur_dist_sums
.
row_ptr
(
y
);
int
*
dist_sums_row
=
cur_dist_sums
.
row_ptr
(
y
);
int
*
col_dist_sums_row
=
cur_col_dist_sums
.
row_ptr
(
y
);
int
*
col_dist_sums_row
=
cur_col_dist_sums
.
row_ptr
(
y
);
int
*
up_col_dist_sums_row
=
cur_up_col_dist_sums
.
row_ptr
(
y
);
int
*
up_col_dist_sums_row
=
cur_up_col_dist_sums
.
row_ptr
(
y
);
const
T
*
b_up_ptr
=
const
T
*
b_up_ptr
=
cur_extended_src
.
ptr
<
T
>
(
start_by
-
template_window_half_size_
-
1
+
y
);
cur_extended_src
.
ptr
<
T
>
(
start_by
-
template_window_half_size_
-
1
+
y
);
const
T
*
b_down_ptr
=
cur_extended_src
.
ptr
<
T
>
(
start_by
+
template_window_half_size_
+
y
);
const
T
*
b_down_ptr
=
cur_extended_src
.
ptr
<
T
>
(
start_by
+
template_window_half_size_
+
y
);
for
(
int
x
=
0
;
x
<
search_window_size
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size
;
x
++
)
{
dist_sums_row
[
x
]
-=
col_dist_sums_row
[
x
];
dist_sums_row
[
x
]
-=
col_dist_sums_row
[
x
];
col_dist_sums_row
[
x
]
=
up_col_dist_sums_row
[
x
]
+
col_dist_sums_row
[
x
]
=
up_col_dist_sums_row
[
x
]
+
calcUpDownDist
(
calcUpDownDist
(
a_up
,
a_down
,
b_up_ptr
[
start_bx
+
x
],
b_down_ptr
[
start_bx
+
x
]);
a_up
,
a_down
,
b_up_ptr
[
start_bx
+
x
],
b_down_ptr
[
start_bx
+
x
]
);
dist_sums_row
[
x
]
+=
col_dist_sums_row
[
x
];
dist_sums_row
[
x
]
+=
col_dist_sums_row
[
x
];
up_col_dist_sums_row
[
x
]
=
col_dist_sums_row
[
x
];
up_col_dist_sums_row
[
x
]
=
col_dist_sums_row
[
x
];
}
}
}
}
}
}
...
@@ -259,19 +250,21 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
...
@@ -259,19 +250,21 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
int
weights_sum
=
0
;
int
weights_sum
=
0
;
int
estimation
[
3
];
int
estimation
[
3
];
for
(
size_t
channel_num
=
0
;
channel_num
<
sizeof
(
T
);
channel_num
++
)
{
for
(
size_t
channel_num
=
0
;
channel_num
<
sizeof
(
T
);
channel_num
++
)
estimation
[
channel_num
]
=
0
;
estimation
[
channel_num
]
=
0
;
}
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
const
Mat
&
esrc_d
=
extended_srcs_
[
d
];
const
Mat
&
esrc_d
=
extended_srcs_
[
d
];
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
const
T
*
cur_row_ptr
=
esrc_d
.
ptr
<
T
>
(
border_size_
+
search_window_y
+
y
);
const
T
*
cur_row_ptr
=
esrc_d
.
ptr
<
T
>
(
border_size_
+
search_window_y
+
y
);
int
*
dist_sums_row
=
dist_sums
.
row_ptr
(
d
,
y
);
int
*
dist_sums_row
=
dist_sums
.
row_ptr
(
d
,
y
);
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
int
almostAvgDist
=
{
dist_sums_row
[
x
]
>>
almost_template_window_size_sq_bin_shift
;
int
almostAvgDist
=
dist_sums_row
[
x
]
>>
almost_template_window_size_sq_bin_shift
;
int
weight
=
almost_dist2weight
[
almostAvgDist
];
int
weight
=
almost_dist2weight
[
almostAvgDist
];
weights_sum
+=
weight
;
weights_sum
+=
weight
;
...
@@ -293,21 +286,19 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
...
@@ -293,21 +286,19 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
template
<
class
T
>
template
<
class
T
>
inline
void
FastNlMeansMultiDenoisingInvoker
<
T
>::
calcDistSumsForFirstElementInRow
(
inline
void
FastNlMeansMultiDenoisingInvoker
<
T
>::
calcDistSumsForFirstElementInRow
(
int
i
,
int
i
,
Array3d
<
int
>&
dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
up_col_dist_sums
)
const
Array3d
<
int
>&
dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
up_col_dist_sums
)
const
{
{
int
j
=
0
;
int
j
=
0
;
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
Mat
cur_extended_src
=
extended_srcs_
[
d
];
Mat
cur_extended_src
=
extended_srcs_
[
d
];
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
dist_sums
[
d
][
y
][
x
]
=
0
;
dist_sums
[
d
][
y
][
x
]
=
0
;
for
(
int
tx
=
0
;
tx
<
template_window_size_
;
tx
++
)
{
for
(
int
tx
=
0
;
tx
<
template_window_size_
;
tx
++
)
col_dist_sums
[
tx
][
d
][
y
][
x
]
=
0
;
col_dist_sums
[
tx
][
d
][
y
][
x
]
=
0
;
}
int
start_y
=
i
+
y
-
search_window_half_size_
;
int
start_y
=
i
+
y
-
search_window_half_size_
;
int
start_x
=
j
+
x
-
search_window_half_size_
;
int
start_x
=
j
+
x
-
search_window_half_size_
;
...
@@ -315,14 +306,13 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
...
@@ -315,14 +306,13 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
int
*
dist_sums_ptr
=
&
dist_sums
[
d
][
y
][
x
];
int
*
dist_sums_ptr
=
&
dist_sums
[
d
][
y
][
x
];
int
*
col_dist_sums_ptr
=
&
col_dist_sums
[
0
][
d
][
y
][
x
];
int
*
col_dist_sums_ptr
=
&
col_dist_sums
[
0
][
d
][
y
][
x
];
int
col_dist_sums_step
=
col_dist_sums
.
step_size
(
0
);
int
col_dist_sums_step
=
col_dist_sums
.
step_size
(
0
);
for
(
int
tx
=
-
template_window_half_size_
;
tx
<=
template_window_half_size_
;
tx
++
)
{
for
(
int
tx
=
-
template_window_half_size_
;
tx
<=
template_window_half_size_
;
tx
++
)
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
{
{
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
{
int
dist
=
calcDist
<
T
>
(
int
dist
=
calcDist
<
T
>
(
main_extended_src_
.
at
<
T
>
(
main_extended_src_
.
at
<
T
>
(
border_size_
+
i
+
ty
,
border_size_
+
j
+
tx
),
border_size_
+
i
+
ty
,
border_size_
+
j
+
tx
),
cur_extended_src
.
at
<
T
>
(
border_size_
+
start_y
+
ty
,
border_size_
+
start_x
+
tx
));
cur_extended_src
.
at
<
T
>
(
border_size_
+
start_y
+
ty
,
border_size_
+
start_x
+
tx
)
);
*
dist_sums_ptr
+=
dist
;
*
dist_sums_ptr
+=
dist
;
*
col_dist_sums_ptr
+=
dist
;
*
col_dist_sums_ptr
+=
dist
;
...
@@ -332,18 +322,13 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
...
@@ -332,18 +322,13 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
up_col_dist_sums
[
j
][
d
][
y
][
x
]
=
col_dist_sums
[
template_window_size_
-
1
][
d
][
y
][
x
];
up_col_dist_sums
[
j
][
d
][
y
][
x
]
=
col_dist_sums
[
template_window_size_
-
1
][
d
][
y
][
x
];
}
}
}
}
}
}
}
template
<
class
T
>
template
<
class
T
>
inline
void
FastNlMeansMultiDenoisingInvoker
<
T
>::
calcDistSumsForElementInFirstRow
(
inline
void
FastNlMeansMultiDenoisingInvoker
<
T
>::
calcDistSumsForElementInFirstRow
(
int
i
,
int
i
,
int
j
,
int
first_col_num
,
Array3d
<
int
>&
dist_sums
,
int
j
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
up_col_dist_sums
)
const
int
first_col_num
,
Array3d
<
int
>&
dist_sums
,
Array4d
<
int
>&
col_dist_sums
,
Array4d
<
int
>&
up_col_dist_sums
)
const
{
{
int
ay
=
border_size_
+
i
;
int
ay
=
border_size_
+
i
;
int
ax
=
border_size_
+
j
+
template_window_half_size_
;
int
ax
=
border_size_
+
j
+
template_window_half_size_
;
...
@@ -353,10 +338,12 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
...
@@ -353,10 +338,12 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
int
new_last_col_num
=
first_col_num
;
int
new_last_col_num
=
first_col_num
;
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
for
(
int
d
=
0
;
d
<
temporal_window_size_
;
d
++
)
{
Mat
cur_extended_src
=
extended_srcs_
[
d
];
Mat
cur_extended_src
=
extended_srcs_
[
d
];
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
{
for
(
int
y
=
0
;
y
<
search_window_size_
;
y
++
)
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
for
(
int
x
=
0
;
x
<
search_window_size_
;
x
++
)
{
dist_sums
[
d
][
y
][
x
]
-=
col_dist_sums
[
first_col_num
][
d
][
y
][
x
];
dist_sums
[
d
][
y
][
x
]
-=
col_dist_sums
[
first_col_num
][
d
][
y
][
x
];
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
]
=
0
;
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
]
=
0
;
...
@@ -364,19 +351,17 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
...
@@ -364,19 +351,17 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
int
bx
=
start_bx
+
x
;
int
bx
=
start_bx
+
x
;
int
*
col_dist_sums_ptr
=
&
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
];
int
*
col_dist_sums_ptr
=
&
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
];
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
{
for
(
int
ty
=
-
template_window_half_size_
;
ty
<=
template_window_half_size_
;
ty
++
)
*
col_dist_sums_ptr
+=
{
calcDist
<
T
>
(
*
col_dist_sums_ptr
+=
calcDist
<
T
>
(
main_extended_src_
.
at
<
T
>
(
ay
+
ty
,
ax
),
main_extended_src_
.
at
<
T
>
(
ay
+
ty
,
ax
),
cur_extended_src
.
at
<
T
>
(
by
+
ty
,
bx
)
cur_extended_src
.
at
<
T
>
(
by
+
ty
,
bx
));
);
}
}
dist_sums
[
d
][
y
][
x
]
+=
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
];
dist_sums
[
d
][
y
][
x
]
+=
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
];
up_col_dist_sums
[
j
][
d
][
y
][
x
]
=
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
];
up_col_dist_sums
[
j
][
d
][
y
][
x
]
=
col_dist_sums
[
new_last_col_num
][
d
][
y
][
x
];
}
}
}
}
}
}
}
...
...
modules/photo/src/opencl/nlmeans.cl
0 → 100644
View file @
6b6cfa89
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
cl_amd_printf
#
pragma
OPENCL_EXTENSION
cl_amd_printf:enable
#
endif
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
ifdef
OP_CALC_WEIGHTS
__kernel
void
calcAlmostDist2Weight
(
__global
int
*
almostDist2Weight,
int
almostMaxDist,
FT
almostDist2ActualDistMultiplier,
int
fixedPointMult,
FT
den,
FT
WEIGHT_THRESHOLD
)
{
int
almostDist
=
get_global_id
(
0
)
;
if
(
almostDist
<
almostMaxDist
)
{
FT
dist
=
almostDist
*
almostDist2ActualDistMultiplier
;
int
weight
=
convert_int_sat_rte
(
fixedPointMult
*
exp
(
-dist
*
den
))
;
if
(
weight
<
WEIGHT_THRESHOLD
*
fixedPointMult
)
weight
=
0
;
almostDist2Weight[almostDist]
=
weight
;
}
}
#
elif
defined
OP_CALC_FASTNLMEANS
#
define
noconvert
#
define
SEARCH_SIZE_SQ
(
SEARCH_SIZE
*
SEARCH_SIZE
)
inline
int
calcDist
(
uchar_t
a,
uchar_t
b
)
{
int_t
diff
=
convert_int_t
(
a
)
-
convert_int_t
(
b
)
;
int_t
retval
=
diff
*
diff
;
#
if
cn
==
1
return
retval
;
#
elif
cn
==
2
return
retval.x
+
retval.y
;
#
else
#
error
"cn should be either 1 or 2"
#
endif
}
inline
int
calcDistUpDown
(
uchar_t
down_value,
uchar_t
down_value_t,
uchar_t
up_value,
uchar_t
up_value_t
)
{
int_t
A
=
convert_int_t
(
down_value
)
-
convert_int_t
(
down_value_t
)
;
int_t
B
=
convert_int_t
(
up_value
)
-
convert_int_t
(
up_value_t
)
;
int_t
retval
=
(
A
-
B
)
*
(
A
+
B
)
;
#
if
cn
==
1
return
retval
;
#
elif
cn
==
2
return
retval.x
+
retval.y
;
#
else
#
error
"cn should be either 1 or 2"
#
endif
}
#
define
COND
if
(
x
==
0
&&
y
==
0
)
inline
void
calcFirstElementInRow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
int
y,
int
x,
int
id,
__global
int
*
col_dists,
__global
int
*
up_col_dists
)
{
y
-=
TEMPLATE_SIZE2
;
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2
;
int
col_dists_current_private[TEMPLATE_SIZE]
;
for
(
int
i
=
id,
size
=
SEARCH_SIZE_SQ
; i < size; i += CTA_SIZE)
{
int
dist
=
0
,
value
;
__global
const
uchar_t
*
src_template
=
(
__global
const
uchar_t
*
)(
src
+
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
cn,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
__global
const
uchar_t
*
src_current
=
(
__global
const
uchar_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn,
x,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
i
*
TEMPLATE_SIZE
;
#
pragma
unroll
for
(
int
j
=
0
; j < TEMPLATE_SIZE; ++j)
col_dists_current_private[j]
=
0
;
for
(
int
ty
=
0
; ty < TEMPLATE_SIZE; ++ty)
{
#
pragma
unroll
for
(
int
tx
=
-TEMPLATE_SIZE2
; tx <= TEMPLATE_SIZE2; ++tx)
{
value
=
calcDist
(
src_template[tx],
src_current[tx]
)
;
col_dists_current_private[tx
+
TEMPLATE_SIZE2]
+=
value
;
dist
+=
value
;
}
src_current
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_current
+
src_step
)
;
src_template
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_template
+
src_step
)
;
}
#
pragma
unroll
for
(
int
j
=
0
; j < TEMPLATE_SIZE; ++j)
col_dists_current[j]
=
col_dists_current_private[j]
;
dists[i]
=
dist
;
up_col_dists[0
+
i]
=
col_dists[TEMPLATE_SIZE
-
1]
;
}
}
inline
void
calcElementInFirstRow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
int
y,
int
x0,
int
x,
int
id,
int
first,
__global
int
*
col_dists,
__global
int
*
up_col_dists
)
{
x
+=
TEMPLATE_SIZE2
;
y
-=
TEMPLATE_SIZE2
;
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2
;
for
(
int
i
=
id,
size
=
SEARCH_SIZE_SQ
; i < size; i += CTA_SIZE)
{
__global
const
uchar_t
*
src_current
=
(
__global
const
uchar_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn,
x,
src_offset
)))
;
__global
const
uchar_t
*
src_template
=
(
__global
const
uchar_t
*
)(
src
+
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
cn,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
TEMPLATE_SIZE
*
i
;
int
col_dist
=
0
;
#
pragma
unroll
for
(
int
ty
=
0
; ty < TEMPLATE_SIZE; ++ty)
{
col_dist
+=
calcDist
(
src_current[0],
src_template[0]
)
;
src_current
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_current
+
src_step
)
;
src_template
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_template
+
src_step
)
;
}
dists[i]
+=
col_dist
-
col_dists_current[first]
;
col_dists_current[first]
=
col_dist
;
up_col_dists[mad24
(
x0,
SEARCH_SIZE_SQ,
i
)
]
=
col_dist
;
}
}
inline
void
calcElement
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
int
y,
int
x0,
int
x,
int
id,
int
first,
__global
int
*
col_dists,
__global
int
*
up_col_dists
)
{
int
sx
=
x
+
TEMPLATE_SIZE2
;
int
sy_up
=
y
-
TEMPLATE_SIZE2
-
1
;
int
sy_down
=
y
+
TEMPLATE_SIZE2
;
uchar_t
up_value
=
*
(
__global
const
uchar_t
*
)(
src
+
mad24
(
sy_up,
src_step,
mad24
(
cn,
sx,
src_offset
)))
;
uchar_t
down_value
=
*
(
__global
const
uchar_t
*
)(
src
+
mad24
(
sy_down,
src_step,
mad24
(
cn,
sx,
src_offset
)))
;
sx
-=
SEARCH_SIZE2
;
sy_up
-=
SEARCH_SIZE2
;
sy_down
-=
SEARCH_SIZE2
;
for
(
int
i
=
id,
size
=
SEARCH_SIZE_SQ
; i < size; i += CTA_SIZE)
{
int
wx
=
i
%
SEARCH_SIZE,
wy
=
i
/
SEARCH_SIZE
;
uchar_t
up_value_t
=
*
(
__global
const
uchar_t
*
)(
src
+
mad24
(
sy_up
+
wy,
src_step,
mad24
(
cn,
sx
+
wx,
src_offset
)))
;
uchar_t
down_value_t
=
*
(
__global
const
uchar_t
*
)(
src
+
mad24
(
sy_down
+
wy,
src_step,
mad24
(
cn,
sx
+
wx,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
mad24
(
i,
TEMPLATE_SIZE,
first
)
;
__global
int
*
up_col_dists_current
=
up_col_dists
+
mad24
(
x0,
SEARCH_SIZE_SQ,
i
)
;
int
col_dist
=
up_col_dists_current[0]
+
calcDistUpDown
(
down_value,
down_value_t,
up_value,
up_value_t
)
;
dists[i]
+=
col_dist
-
col_dists_current[0]
;
col_dists_current[0]
=
col_dist
;
up_col_dists_current[0]
=
col_dist
;
}
}
inline
void
convolveWindow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
__global
const
int
*
almostDist2Weight,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
y,
int
x,
int
id,
__local
int
*
weights_local,
__local
int_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
{
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2,
weights
=
0
;
int_t
weighted_sum
=
(
int_t
)(
0
)
;
for
(
int
i
=
id,
size
=
SEARCH_SIZE_SQ
; i < size; i += CTA_SIZE)
{
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
cn,
src_offset
))
;
int_t
src_value
=
convert_int_t
(
*
(
__global
const
uchar_t
*
)(
src
+
src_index
))
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
weight
=
almostDist2Weight[almostAvgDist]
;
weights
+=
weight
;
weighted_sum
+=
(
int_t
)(
weight
)
*
src_value
;
}
if
(
id
>=
CTA_SIZE2
)
{
int
id2
=
id
-
CTA_SIZE2
;
weights_local[id2]
=
weights
;
weighted_sum_local[id2]
=
weighted_sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
id
<
CTA_SIZE2
)
{
weights_local[id]
+=
weights
;
weighted_sum_local[id]
+=
weighted_sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
CTA_SIZE2
>>
1
; lsize > 2; lsize >>= 1)
{
if
(
id
<
lsize
)
{
int
id2
=
lsize
+
id
;
weights_local[id]
+=
weights_local[id2]
;
weighted_sum_local[id]
+=
weighted_sum_local[id2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
id
==
0
)
{
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
cn,
x,
dst_offset
))
;
int_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
weighted_sum_local[2]
+
weighted_sum_local[3]
;
int
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
*
(
__global
uchar_t
*
)(
dst
+
dst_index
)
=
convert_uchar_t
(
weighted_sum_local_0
/
(
int_t
)(
weights_local_0
))
;
}
}
__kernel
void
fastNlMeansDenoising
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
const
int
*
almostDist2Weight,
__global
uchar
*
buffer,
int
almostTemplateWindowSizeSqBinShift
)
{
int
block_x
=
get_group_id
(
0
)
,
nblocks_x
=
get_num_groups
(
0
)
;
int
block_y
=
get_group_id
(
1
)
;
int
id
=
get_local_id
(
0
)
,
first
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE2]
;
__local
int_t
weighted_sum[CTA_SIZE2]
;
int
x0
=
block_x
*
BLOCK_COLS,
x1
=
min
(
x0
+
BLOCK_COLS,
dst_cols
)
;
int
y0
=
block_y
*
BLOCK_ROWS,
y1
=
min
(
y0
+
BLOCK_ROWS,
dst_rows
)
;
//
for
each
group
we
need
SEARCH_SIZE_SQ
*
TEMPLATE_SIZE
integer
buffer
for
storing
part
column
sum
for
current
element
//
and
SEARCH_SIZE_SQ
*
BLOCK_COLS
integer
buffer
for
storing
last
column
sum
for
each
element
of
search
window
of
up
row
int
block_data_start
=
SEARCH_SIZE_SQ
*
(
mad24
(
block_y,
dst_cols,
x0
)
+
mad24
(
block_y,
nblocks_x,
block_x
)
*
TEMPLATE_SIZE
)
;
__global
int
*
col_dists
=
(
__global
int
*
)(
buffer
+
block_data_start
*
sizeof
(
int
))
;
__global
int
*
up_col_dists
=
col_dists
+
SEARCH_SIZE_SQ
*
TEMPLATE_SIZE
;
for
(
int
y
=
y0
; y < y1; ++y)
for
(
int
x
=
x0
; x < x1; ++x)
{
if
(
x
==
x0
)
{
calcFirstElementInRow
(
src,
src_step,
src_offset,
dists,
y,
x,
id,
col_dists,
up_col_dists
)
;
first
=
0
;
}
else
{
if
(
y
==
y0
)
calcElementInFirstRow
(
src,
src_step,
src_offset,
dists,
y,
x
-
x0,
x,
id,
first,
col_dists,
up_col_dists
)
;
else
calcElement
(
src,
src_step,
src_offset,
dists,
y,
x
-
x0,
x,
id,
first,
col_dists,
up_col_dists
)
;
first
=
(
first
+
1
)
%
TEMPLATE_SIZE
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
convolveWindow
(
src,
src_step,
src_offset,
dists,
almostDist2Weight,
dst,
dst_step,
dst_offset,
y,
x,
id,
weights,
weighted_sum,
almostTemplateWindowSizeSqBinShift
)
;
}
}
#
endif
modules/photo/src/precomp.hpp
View file @
6b6cfa89
...
@@ -46,6 +46,8 @@
...
@@ -46,6 +46,8 @@
#include "opencv2/core/private.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/photo.hpp"
#include "opencv2/photo.hpp"
#include "opencv2/core/ocl.hpp"
#include "opencv2/imgproc.hpp"
#ifdef HAVE_TEGRA_OPTIMIZATION
#ifdef HAVE_TEGRA_OPTIMIZATION
#include "opencv2/photo/photo_tegra.hpp"
#include "opencv2/photo/photo_tegra.hpp"
...
...
modules/photo/test/ocl/test_denoising.cpp
0 → 100644
View file @
6b6cfa89
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace
cvtest
{
namespace
ocl
{
PARAM_TEST_CASE
(
FastNlMeansDenoisingTestBase
,
Channels
,
bool
)
{
int
cn
,
templateWindowSize
,
searchWindowSize
;
float
h
;
bool
use_roi
;
TEST_DECLARE_INPUT_PARAMETER
(
src
)
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
)
virtual
void
SetUp
()
{
cn
=
GET_PARAM
(
0
);
use_roi
=
GET_PARAM
(
1
);
templateWindowSize
=
7
;
searchWindowSize
=
21
;
h
=
3.0
f
;
}
virtual
void
generateTestData
()
{
Mat
image
;
if
(
cn
==
1
)
{
image
=
readImage
(
"denoising/lena_noised_gaussian_sigma=10.png"
,
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
image
.
empty
());
}
const
int
type
=
CV_8UC
(
cn
);
Size
roiSize
=
cn
==
1
?
image
.
size
()
:
randomSize
(
1
,
MAX_VALUE
);
Border
srcBorder
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
0
,
255
);
if
(
cn
==
1
)
image
.
copyTo
(
src_roi
);
Border
dstBorder
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
dst
,
dst_roi
,
roiSize
,
dstBorder
,
type
,
0
,
255
);
UMAT_UPLOAD_INPUT_PARAMETER
(
src
)
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
)
}
};
typedef
FastNlMeansDenoisingTestBase
FastNlMeansDenoising
;
OCL_TEST_P
(
FastNlMeansDenoising
,
Mat
)
{
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
generateTestData
();
OCL_OFF
(
cv
::
fastNlMeansDenoising
(
src_roi
,
dst_roi
,
h
,
templateWindowSize
,
searchWindowSize
));
OCL_ON
(
cv
::
fastNlMeansDenoising
(
usrc_roi
,
udst_roi
,
h
,
templateWindowSize
,
searchWindowSize
));
OCL_EXPECT_MATS_NEAR
(
dst
,
1
)
}
}
typedef
FastNlMeansDenoisingTestBase
fastNlMeansDenoisingColored
;
OCL_TEST_P
(
fastNlMeansDenoisingColored
,
Mat
)
{
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
generateTestData
();
OCL_OFF
(
cv
::
fastNlMeansDenoisingColored
(
src_roi
,
dst_roi
,
h
,
h
,
templateWindowSize
,
searchWindowSize
));
OCL_ON
(
cv
::
fastNlMeansDenoisingColored
(
usrc_roi
,
udst_roi
,
h
,
h
,
templateWindowSize
,
searchWindowSize
));
OCL_EXPECT_MATS_NEAR
(
dst
,
1
)
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
Photo
,
FastNlMeansDenoising
,
Combine
(
Values
(
1
,
2
),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Photo
,
fastNlMeansDenoisingColored
,
Combine
(
Values
(
Channels
(
3
)),
Bool
()));
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment