Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
73663dcd
Commit
73663dcd
authored
Mar 02, 2015
by
Erik Karlsson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added support for 16-bit input
parent
3bde9e93
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
56 additions
and
32 deletions
+56
-32
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+37
-20
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+19
-12
No files found.
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
View file @
73663dcd
...
...
@@ -28,12 +28,14 @@ static int divUp(int a, int b)
return
(
a
+
b
-
1
)
/
b
;
}
template
<
typename
FT
>
template
<
typename
FT
,
typename
ST
,
typename
WT
>
static
bool
ocl_calcAlmostDist2Weight
(
UMat
&
almostDist2Weight
,
int
searchWindowSize
,
int
templateWindowSize
,
FT
h
,
int
cn
,
int
&
almostTemplateWindowSizeSqBinShift
,
bool
abs
)
{
const
int
maxEstimateSumValue
=
searchWindowSize
*
searchWindowSize
*
255
;
int
fixedPointMult
=
std
::
numeric_limits
<
int
>::
max
()
/
maxEstimateSumValue
;
const
WT
maxEstimateSumValue
=
searchWindowSize
*
searchWindowSize
*
std
::
numeric_limits
<
ST
>::
max
();
int
fixedPointMult
=
(
int
)
std
::
min
<
WT
>
(
std
::
numeric_limits
<
WT
>::
max
()
/
maxEstimateSumValue
,
std
::
numeric_limits
<
int
>::
max
());
int
depth
=
DataType
<
FT
>::
depth
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
...
...
@@ -48,7 +50,8 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
FT
almostDist2ActualDistMultiplier
=
(
FT
)(
1
<<
almostTemplateWindowSizeSqBinShift
)
/
templateWindowSizeSq
;
const
FT
WEIGHT_THRESHOLD
=
1e-3
f
;
int
maxDist
=
abs
?
255
*
cn
:
255
*
255
*
cn
;
int
maxDist
=
abs
?
std
::
numeric_limits
<
ST
>::
max
()
*
cn
:
std
::
numeric_limits
<
ST
>::
max
()
*
std
::
numeric_limits
<
ST
>::
max
()
*
cn
;
int
almostMaxDist
=
(
int
)(
maxDist
/
almostDist2ActualDistMultiplier
+
1
);
FT
den
=
1.0
f
/
(
h
*
h
*
cn
);
...
...
@@ -74,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
Size
size
=
_src
.
size
();
if
(
type
!=
CV_8UC1
&&
type
!=
CV_8UC2
&&
type
!=
CV_8UC3
)
if
(
cn
!=
1
&&
cn
!=
2
&&
cn
!=
3
&&
depth
!=
CV_8U
&&
(
!
abs
||
depth
!=
CV_16U
)
)
return
false
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
...
...
@@ -84,45 +87,60 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
nblocksx
=
divUp
(
size
.
width
,
BLOCK_COLS
),
nblocksy
=
divUp
(
size
.
height
,
BLOCK_ROWS
);
int
almostTemplateWindowSizeSqBinShift
=
-
1
;
char
cvt
[
2
][
40
];
char
buf
[
4
][
40
];
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D sample_t=%s -D pixel_t=%s -D int_t=%s"
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32S
)
:
"long"
,
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32SC
(
cn
))
:
(
sprintf
(
buf
[
0
],
"long%d"
,
cn
),
buf
[
0
]),
depth
==
CV_8U
?
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
1
])
:
(
sprintf
(
buf
[
1
],
"convert_long%d"
,
cn
),
buf
[
1
]),
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
type
==
CV_8UC
3
?
4
:
cn
,
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]),
abs
?
" -D ABS"
:
""
);
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
2
]),
cn
==
3
?
4
:
cn
,
ocl
::
convertTypeStr
(
CV_32S
,
depth
,
cn
,
buf
[
3
]),
abs
?
" -D ABS"
:
""
);
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
almostDist2Weight
;
if
(
!
ocl_calcAlmostDist2Weight
<
float
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
))
if
((
depth
==
CV_8U
&&
!
ocl_calcAlmostDist2Weight
<
float
,
uchar
,
int
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
))
||
(
depth
==
CV_16U
&&
!
ocl_calcAlmostDist2Weight
<
float
,
ushort
,
int64
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
)))
return
false
;
CV_Assert
(
almostTemplateWindowSizeSqBinShift
>=
0
);
UMat
srcex
;
int
borderSize
=
searchWindowHalfSize
+
templateWindowHalfWize
;
if
(
type
==
CV_8UC
3
)
{
Mat
src_rgb
=
_src
.
getMat
(),
src_rgba
(
size
,
CV_8UC4
);
if
(
cn
==
3
)
{
UMat
tmp
(
size
,
CV_MAKE_TYPE
(
depth
,
4
)
);
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
&
src_rgb
,
1
,
&
src_rgba
,
1
,
from_to
,
3
);
copyMakeBorder
(
src_rgba
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
mixChannels
(
std
::
vector
<
UMat
>
(
1
,
_src
.
getUMat
()),
std
::
vector
<
UMat
>
(
1
,
tmp
),
from_to
,
3
);
copyMakeBorder
(
tmp
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
}
else
copyMakeBorder
(
_src
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
_dst
.
create
(
size
,
type
);
UMat
dst
;
if
(
type
==
CV_8UC
3
)
dst
.
create
(
size
,
CV_
8UC4
);
if
(
cn
==
3
)
dst
.
create
(
size
,
CV_
MAKE_TYPE
(
depth
,
4
)
);
else
dst
=
_dst
.
getUMat
();
...
...
@@ -139,10 +157,9 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
size_t
globalsize
[
2
]
=
{
nblocksx
*
ctaSize
,
nblocksy
},
localsize
[
2
]
=
{
ctaSize
,
1
};
if
(
!
k
.
run
(
2
,
globalsize
,
localsize
,
false
))
return
false
;
if
(
type
==
CV_8UC3
)
{
Mat
dst_rgba
=
dst
.
getMat
(
ACCESS_READ
),
dst_rgb
=
_dst
.
getMat
();
if
(
cn
==
3
)
{
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
&
dst_rgba
,
1
,
&
dst_rgb
,
1
,
from_to
,
3
);
mixChannels
(
std
::
vector
<
UMat
>
(
1
,
dst
),
std
::
vector
<
UMat
>
(
1
,
_dst
.
getUMat
())
,
from_to
,
3
);
}
return
true
;
...
...
modules/photo/src/opencl/nlmeans.cl
View file @
73663dcd
...
...
@@ -206,22 +206,23 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
inline
void
convolveWindow
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
__global
const
int
*
almostDist2Weight,
__global
sample_t
*
dst,
int
dst_step,
int
dst_offset,
int
y,
int
x,
int
id,
__local
in
t
*
weights_local,
__local
int
_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
int
y,
int
x,
int
id,
__local
weight_
t
*
weights_local,
__local
sum
_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
{
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2,
weights
=
0
;
int_t
weighted_sum
=
(
int_t
)(
0
)
;
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2
;
weight_t
weights
=
0
;
sum_t
weighted_sum
=
(
sum_t
)(
0
)
;
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
cn,
src_offset
))
;
int_t
src_value
=
convert_int
_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
sum_t
src_value
=
convert_sum
_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
weight
=
almostDist2Weight[almostAvgDist]
;
weights
+=
weight
;
weighted_sum
+=
(
int
_t
)(
weight
)
*
src_value
;
weights
+=
(
weight_t
)
weight
;
weighted_sum
+=
(
sum
_t
)(
weight
)
*
src_value
;
}
weights_local[id]
=
weights
;
...
...
@@ -242,11 +243,11 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
if
(
id
==
0
)
{
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
cn,
x,
dst_offset
))
;
int
_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
sum
_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
weighted_sum_local[2]
+
weighted_sum_local[3]
;
in
t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
weight_
t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel_t
(
weighted_sum_local_0
/
(
int
_t
)(
weights_local_0
))
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel_t
(
weighted_sum_local_0
/
(
sum
_t
)(
weights_local_0
))
;
}
}
...
...
@@ -259,8 +260,9 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
int
block_y
=
get_group_id
(
1
)
;
int
id
=
get_local_id
(
0
)
,
first
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE]
;
__local
int_t
weighted_sum[CTA_SIZE]
;
__local
int
dists[SEARCH_SIZE_SQ]
;
__local
weight_t
weights[CTA_SIZE]
;
__local
sum_t
weighted_sum[CTA_SIZE]
;
int
x0
=
block_x
*
BLOCK_COLS,
x1
=
min
(
x0
+
BLOCK_COLS,
dst_cols
)
;
int
y0
=
block_y
*
BLOCK_ROWS,
y1
=
min
(
y0
+
BLOCK_ROWS,
dst_rows
)
;
...
...
@@ -271,6 +273,11 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
__global
int
*
col_dists
=
(
__global
int
*
)(
buffer
+
block_data_start
*
sizeof
(
int
))
;
__global
int
*
up_col_dists
=
col_dists
+
SEARCH_SIZE_SQ
*
TEMPLATE_SIZE
;
src_step
/=
sizeof
(
sample_t
)
;
src_offset
/=
sizeof
(
sample_t
)
;
dst_step
/=
sizeof
(
sample_t
)
;
dst_offset
/=
sizeof
(
sample_t
)
;
for
(
int
y
=
y0
; y < y1; ++y)
for
(
int
x
=
x0
; x < x1; ++x)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment