Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
73663dcd
Commit
73663dcd
authored
Mar 02, 2015
by
Erik Karlsson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added support for 16-bit input
parent
3bde9e93
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
56 additions
and
32 deletions
+56
-32
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+37
-20
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+19
-12
No files found.
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
View file @
73663dcd
...
@@ -28,12 +28,14 @@ static int divUp(int a, int b)
...
@@ -28,12 +28,14 @@ static int divUp(int a, int b)
return
(
a
+
b
-
1
)
/
b
;
return
(
a
+
b
-
1
)
/
b
;
}
}
template
<
typename
FT
>
template
<
typename
FT
,
typename
ST
,
typename
WT
>
static
bool
ocl_calcAlmostDist2Weight
(
UMat
&
almostDist2Weight
,
int
searchWindowSize
,
int
templateWindowSize
,
FT
h
,
int
cn
,
static
bool
ocl_calcAlmostDist2Weight
(
UMat
&
almostDist2Weight
,
int
searchWindowSize
,
int
templateWindowSize
,
FT
h
,
int
cn
,
int
&
almostTemplateWindowSizeSqBinShift
,
bool
abs
)
int
&
almostTemplateWindowSizeSqBinShift
,
bool
abs
)
{
{
const
int
maxEstimateSumValue
=
searchWindowSize
*
searchWindowSize
*
255
;
const
WT
maxEstimateSumValue
=
searchWindowSize
*
searchWindowSize
*
int
fixedPointMult
=
std
::
numeric_limits
<
int
>::
max
()
/
maxEstimateSumValue
;
std
::
numeric_limits
<
ST
>::
max
();
int
fixedPointMult
=
(
int
)
std
::
min
<
WT
>
(
std
::
numeric_limits
<
WT
>::
max
()
/
maxEstimateSumValue
,
std
::
numeric_limits
<
int
>::
max
());
int
depth
=
DataType
<
FT
>::
depth
;
int
depth
=
DataType
<
FT
>::
depth
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
...
@@ -48,7 +50,8 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
...
@@ -48,7 +50,8 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
FT
almostDist2ActualDistMultiplier
=
(
FT
)(
1
<<
almostTemplateWindowSizeSqBinShift
)
/
templateWindowSizeSq
;
FT
almostDist2ActualDistMultiplier
=
(
FT
)(
1
<<
almostTemplateWindowSizeSqBinShift
)
/
templateWindowSizeSq
;
const
FT
WEIGHT_THRESHOLD
=
1e-3
f
;
const
FT
WEIGHT_THRESHOLD
=
1e-3
f
;
int
maxDist
=
abs
?
255
*
cn
:
255
*
255
*
cn
;
int
maxDist
=
abs
?
std
::
numeric_limits
<
ST
>::
max
()
*
cn
:
std
::
numeric_limits
<
ST
>::
max
()
*
std
::
numeric_limits
<
ST
>::
max
()
*
cn
;
int
almostMaxDist
=
(
int
)(
maxDist
/
almostDist2ActualDistMultiplier
+
1
);
int
almostMaxDist
=
(
int
)(
maxDist
/
almostDist2ActualDistMultiplier
+
1
);
FT
den
=
1.0
f
/
(
h
*
h
*
cn
);
FT
den
=
1.0
f
/
(
h
*
h
*
cn
);
...
@@ -74,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
...
@@ -74,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
Size
size
=
_src
.
size
();
Size
size
=
_src
.
size
();
if
(
type
!=
CV_8UC1
&&
type
!=
CV_8UC2
&&
type
!=
CV_8UC3
)
if
(
cn
!=
1
&&
cn
!=
2
&&
cn
!=
3
&&
depth
!=
CV_8U
&&
(
!
abs
||
depth
!=
CV_16U
)
)
return
false
;
return
false
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
...
@@ -84,45 +87,60 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
...
@@ -84,45 +87,60 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
nblocksx
=
divUp
(
size
.
width
,
BLOCK_COLS
),
nblocksy
=
divUp
(
size
.
height
,
BLOCK_ROWS
);
int
nblocksx
=
divUp
(
size
.
width
,
BLOCK_COLS
),
nblocksy
=
divUp
(
size
.
height
,
BLOCK_ROWS
);
int
almostTemplateWindowSizeSqBinShift
=
-
1
;
int
almostTemplateWindowSizeSqBinShift
=
-
1
;
char
cvt
[
2
][
40
];
char
buf
[
4
][
40
];
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D sample_t=%s -D pixel_t=%s -D int_t=%s"
" -D sample_t=%s -D pixel_t=%s -D int_t=%s"
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s"
,
" -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s"
,
templateWindowSize
,
searchWindowSize
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32S
)
:
"long"
,
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32SC
(
cn
))
:
(
sprintf
(
buf
[
0
],
"long%d"
,
cn
),
buf
[
0
]),
depth
==
CV_8U
?
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
1
])
:
(
sprintf
(
buf
[
1
],
"convert_long%d"
,
cn
),
buf
[
1
]),
BLOCK_COLS
,
BLOCK_ROWS
,
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
type
==
CV_8UC
3
?
4
:
cn
,
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
2
]),
cn
==
3
?
4
:
cn
,
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]),
abs
?
" -D ABS"
:
""
);
ocl
::
convertTypeStr
(
CV_32S
,
depth
,
cn
,
buf
[
3
]),
abs
?
" -D ABS"
:
""
);
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
UMat
almostDist2Weight
;
UMat
almostDist2Weight
;
if
(
!
ocl_calcAlmostDist2Weight
<
float
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
if
((
depth
==
CV_8U
&&
h
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
))
!
ocl_calcAlmostDist2Weight
<
float
,
uchar
,
int
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
))
||
(
depth
==
CV_16U
&&
!
ocl_calcAlmostDist2Weight
<
float
,
ushort
,
int64
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
)))
return
false
;
return
false
;
CV_Assert
(
almostTemplateWindowSizeSqBinShift
>=
0
);
CV_Assert
(
almostTemplateWindowSizeSqBinShift
>=
0
);
UMat
srcex
;
UMat
srcex
;
int
borderSize
=
searchWindowHalfSize
+
templateWindowHalfWize
;
int
borderSize
=
searchWindowHalfSize
+
templateWindowHalfWize
;
if
(
type
==
CV_8UC
3
)
{
if
(
cn
==
3
)
{
Mat
src_rgb
=
_src
.
getMat
(),
src_rgba
(
size
,
CV_8UC4
);
UMat
tmp
(
size
,
CV_MAKE_TYPE
(
depth
,
4
)
);
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
&
src_rgb
,
1
,
&
src_rgba
,
1
,
from_to
,
3
);
mixChannels
(
std
::
vector
<
UMat
>
(
1
,
_src
.
getUMat
()),
std
::
vector
<
UMat
>
(
1
,
tmp
),
from_to
,
3
);
copyMakeBorder
(
src_rgba
,
srcex
,
copyMakeBorder
(
tmp
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
}
}
else
else
copyMakeBorder
(
_src
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
copyMakeBorder
(
_src
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
_dst
.
create
(
size
,
type
);
_dst
.
create
(
size
,
type
);
UMat
dst
;
UMat
dst
;
if
(
type
==
CV_8UC
3
)
if
(
cn
==
3
)
dst
.
create
(
size
,
CV_
8UC4
);
dst
.
create
(
size
,
CV_
MAKE_TYPE
(
depth
,
4
)
);
else
else
dst
=
_dst
.
getUMat
();
dst
=
_dst
.
getUMat
();
...
@@ -139,10 +157,9 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
...
@@ -139,10 +157,9 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
size_t
globalsize
[
2
]
=
{
nblocksx
*
ctaSize
,
nblocksy
},
localsize
[
2
]
=
{
ctaSize
,
1
};
size_t
globalsize
[
2
]
=
{
nblocksx
*
ctaSize
,
nblocksy
},
localsize
[
2
]
=
{
ctaSize
,
1
};
if
(
!
k
.
run
(
2
,
globalsize
,
localsize
,
false
))
return
false
;
if
(
!
k
.
run
(
2
,
globalsize
,
localsize
,
false
))
return
false
;
if
(
type
==
CV_8UC3
)
{
if
(
cn
==
3
)
{
Mat
dst_rgba
=
dst
.
getMat
(
ACCESS_READ
),
dst_rgb
=
_dst
.
getMat
();
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
&
dst_rgba
,
1
,
&
dst_rgb
,
1
,
from_to
,
3
);
mixChannels
(
std
::
vector
<
UMat
>
(
1
,
dst
),
std
::
vector
<
UMat
>
(
1
,
_dst
.
getUMat
())
,
from_to
,
3
);
}
}
return
true
;
return
true
;
...
...
modules/photo/src/opencl/nlmeans.cl
View file @
73663dcd
...
@@ -206,22 +206,23 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
...
@@ -206,22 +206,23 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
inline
void
convolveWindow
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
inline
void
convolveWindow
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
__global
const
int
*
almostDist2Weight,
__local
int
*
dists,
__global
const
int
*
almostDist2Weight,
__global
sample_t
*
dst,
int
dst_step,
int
dst_offset,
__global
sample_t
*
dst,
int
dst_step,
int
dst_offset,
int
y,
int
x,
int
id,
__local
in
t
*
weights_local,
int
y,
int
x,
int
id,
__local
weight_
t
*
weights_local,
__local
int
_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
__local
sum
_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
{
{
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2,
weights
=
0
;
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2
;
int_t
weighted_sum
=
(
int_t
)(
0
)
;
weight_t
weights
=
0
;
sum_t
weighted_sum
=
(
sum_t
)(
0
)
;
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
{
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
cn,
src_offset
))
;
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
cn,
src_offset
))
;
int_t
src_value
=
convert_int
_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
sum_t
src_value
=
convert_sum
_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
weight
=
almostDist2Weight[almostAvgDist]
;
int
weight
=
almostDist2Weight[almostAvgDist]
;
weights
+=
weight
;
weights
+=
(
weight_t
)
weight
;
weighted_sum
+=
(
int
_t
)(
weight
)
*
src_value
;
weighted_sum
+=
(
sum
_t
)(
weight
)
*
src_value
;
}
}
weights_local[id]
=
weights
;
weights_local[id]
=
weights
;
...
@@ -242,11 +243,11 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
...
@@ -242,11 +243,11 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
if
(
id
==
0
)
if
(
id
==
0
)
{
{
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
cn,
x,
dst_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
cn,
x,
dst_offset
))
;
int
_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
sum
_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
weighted_sum_local[2]
+
weighted_sum_local[3]
;
weighted_sum_local[2]
+
weighted_sum_local[3]
;
in
t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
weight_
t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel_t
(
weighted_sum_local_0
/
(
int
_t
)(
weights_local_0
))
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel_t
(
weighted_sum_local_0
/
(
sum
_t
)(
weights_local_0
))
;
}
}
}
}
...
@@ -259,8 +260,9 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
...
@@ -259,8 +260,9 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
int
block_y
=
get_group_id
(
1
)
;
int
block_y
=
get_group_id
(
1
)
;
int
id
=
get_local_id
(
0
)
,
first
;
int
id
=
get_local_id
(
0
)
,
first
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE]
;
__local
int
dists[SEARCH_SIZE_SQ]
;
__local
int_t
weighted_sum[CTA_SIZE]
;
__local
weight_t
weights[CTA_SIZE]
;
__local
sum_t
weighted_sum[CTA_SIZE]
;
int
x0
=
block_x
*
BLOCK_COLS,
x1
=
min
(
x0
+
BLOCK_COLS,
dst_cols
)
;
int
x0
=
block_x
*
BLOCK_COLS,
x1
=
min
(
x0
+
BLOCK_COLS,
dst_cols
)
;
int
y0
=
block_y
*
BLOCK_ROWS,
y1
=
min
(
y0
+
BLOCK_ROWS,
dst_rows
)
;
int
y0
=
block_y
*
BLOCK_ROWS,
y1
=
min
(
y0
+
BLOCK_ROWS,
dst_rows
)
;
...
@@ -271,6 +273,11 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
...
@@ -271,6 +273,11 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
__global
int
*
col_dists
=
(
__global
int
*
)(
buffer
+
block_data_start
*
sizeof
(
int
))
;
__global
int
*
col_dists
=
(
__global
int
*
)(
buffer
+
block_data_start
*
sizeof
(
int
))
;
__global
int
*
up_col_dists
=
col_dists
+
SEARCH_SIZE_SQ
*
TEMPLATE_SIZE
;
__global
int
*
up_col_dists
=
col_dists
+
SEARCH_SIZE_SQ
*
TEMPLATE_SIZE
;
src_step
/=
sizeof
(
sample_t
)
;
src_offset
/=
sizeof
(
sample_t
)
;
dst_step
/=
sizeof
(
sample_t
)
;
dst_offset
/=
sizeof
(
sample_t
)
;
for
(
int
y
=
y0
; y < y1; ++y)
for
(
int
y
=
y0
; y < y1; ++y)
for
(
int
x
=
x0
; x < x1; ++x)
for
(
int
x
=
x0
; x < x1; ++x)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment