Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
324fa268
Commit
324fa268
authored
Mar 06, 2015
by
Erik Karlsson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactoring of OpenCL implementation
parent
41ffcc27
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
31 deletions
+44
-31
denoising.cpp
modules/photo/src/denoising.cpp
+4
-2
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+25
-13
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+15
-16
No files found.
modules/photo/src/denoising.cpp
View file @
324fa268
...
...
@@ -51,7 +51,8 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
Size
src_size
=
_src
.
size
();
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
(
_src
.
isUMat
()
||
_dst
.
isUMat
())
&&
src_size
.
width
>
5
&&
src_size
.
height
>
5
,
// low accuracy on small sizes
ocl_fastNlMeansDenoising
(
_src
,
_dst
,
h
,
templateWindowSize
,
searchWindowSize
,
false
))
ocl_fastNlMeansDenoising
(
_src
,
_dst
,
&
h
,
1
,
templateWindowSize
,
searchWindowSize
,
false
))
Mat
src
=
_src
.
getMat
();
_dst
.
create
(
src_size
,
src
.
type
());
...
...
@@ -95,7 +96,8 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h,
Size
src_size
=
_src
.
size
();
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
(
_src
.
isUMat
()
||
_dst
.
isUMat
())
&&
src_size
.
width
>
5
&&
src_size
.
height
>
5
,
// low accuracy on small sizes
ocl_fastNlMeansDenoising
(
_src
,
_dst
,
h
,
templateWindowSize
,
searchWindowSize
,
true
))
ocl_fastNlMeansDenoising
(
_src
,
_dst
,
&
h
,
1
,
templateWindowSize
,
searchWindowSize
,
true
))
Mat
src
=
_src
.
getMat
();
_dst
.
create
(
src_size
,
src
.
type
());
...
...
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
View file @
324fa268
...
...
@@ -29,7 +29,7 @@ static int divUp(int a, int b)
}
template
<
typename
FT
,
typename
ST
,
typename
WT
>
static
bool
ocl_calcAlmostDist2Weight
(
UMat
&
almostDist2Weight
,
int
searchWindowSize
,
int
templateWindowSize
,
FT
h
,
int
cn
,
static
bool
ocl_calcAlmostDist2Weight
(
UMat
&
almostDist2Weight
,
int
searchWindowSize
,
int
templateWindowSize
,
FT
*
h
,
int
hn
,
int
cn
,
int
&
almostTemplateWindowSizeSqBinShift
,
bool
abs
)
{
const
WT
maxEstimateSumValue
=
searchWindowSize
*
searchWindowSize
*
...
...
@@ -53,24 +53,32 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
int
maxDist
=
abs
?
std
::
numeric_limits
<
ST
>::
max
()
*
cn
:
std
::
numeric_limits
<
ST
>::
max
()
*
std
::
numeric_limits
<
ST
>::
max
()
*
cn
;
int
almostMaxDist
=
(
int
)(
maxDist
/
almostDist2ActualDistMultiplier
+
1
);
FT
den
=
1.0
f
/
(
h
*
h
*
cn
);
FT
den
[
4
];
CV_Assert
(
hn
>
0
&&
hn
<=
4
);
for
(
int
i
=
0
;
i
<
hn
;
i
++
)
den
[
i
]
=
1.0
f
/
(
h
[
i
]
*
h
[
i
]
*
cn
);
almostDist2Weight
.
create
(
1
,
almostMaxDist
,
CV_32SC
1
);
almostDist2Weight
.
create
(
1
,
almostMaxDist
,
CV_32SC
(
hn
==
3
?
4
:
hn
)
);
char
buf
[
40
];
ocl
::
Kernel
k
(
"calcAlmostDist2Weight"
,
ocl
::
photo
::
nlmeans_oclsrc
,
format
(
"-D OP_CALC_WEIGHTS -D FT=%s%s%s"
,
ocl
::
typeToStr
(
depth
),
format
(
"-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
" -D wlut_t=%s -D convert_wlut_t=%s%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
hn
)),
ocl
::
typeToStr
(
CV_32SC
(
hn
)),
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
hn
,
buf
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
abs
?
" -D ABS"
:
""
));
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
PtrWriteOnly
(
almostDist2Weight
),
almostMaxDist
,
almostDist2ActualDistMultiplier
,
fixedPointMult
,
den
,
WEIGHT_THRESHOLD
);
almostDist2ActualDistMultiplier
,
fixedPointMult
,
ocl
::
KernelArg
::
Constant
(
den
,
(
hn
==
3
?
4
:
hn
)
*
sizeof
(
FT
)),
WEIGHT_THRESHOLD
);
size_t
globalsize
[
1
]
=
{
almostMaxDist
};
return
k
.
run
(
1
,
globalsize
,
NULL
,
false
);
}
static
bool
ocl_fastNlMeansDenoising
(
InputArray
_src
,
OutputArray
_dst
,
float
h
,
static
bool
ocl_fastNlMeansDenoising
(
InputArray
_src
,
OutputArray
_dst
,
float
*
h
,
int
hn
,
int
templateWindowSize
,
int
searchWindowSize
,
bool
abs
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
...
...
@@ -89,18 +97,22 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
char
buf
[
4
][
40
];
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D pixel_t=%s -D int_t=%s"
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D pixel_t=%s -D int_t=%s
-D wlut_t=%s
"
" -D weight_t=%s -D
convert_weight_t=%s -D
sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32S
)
:
"long"
,
ocl
::
typeToStr
(
CV_32SC
(
hn
)),
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32SC
(
hn
))
:
format
(
"long%s"
,
hn
>
1
?
format
(
"%d"
,
hn
).
c_str
()
:
""
).
c_str
(),
depth
==
CV_8U
?
ocl
::
convertTypeStr
(
CV_32S
,
CV_32S
,
hn
,
buf
[
0
])
:
format
(
"convert_long%s"
,
hn
>
1
?
format
(
"%d"
,
hn
).
c_str
()
:
""
).
c_str
(),
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32SC
(
cn
))
:
(
sprintf
(
buf
[
0
],
"long%d"
,
cn
),
buf
[
0
]
),
format
(
"long%s"
,
cn
>
1
?
format
(
"%d"
,
cn
).
c_str
()
:
""
).
c_str
(
),
depth
==
CV_8U
?
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
1
])
:
(
sprintf
(
buf
[
1
],
"convert_long%d"
,
cn
),
buf
[
1
]
),
format
(
"convert_long%s"
,
cn
>
1
?
format
(
"%d"
,
cn
).
c_str
()
:
""
).
c_str
(
),
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
2
]),
cn
,
...
...
@@ -115,13 +127,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
if
((
depth
==
CV_8U
&&
!
ocl_calcAlmostDist2Weight
<
float
,
uchar
,
int
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
h
,
hn
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
))
||
(
depth
==
CV_16U
&&
!
ocl_calcAlmostDist2Weight
<
float
,
ushort
,
int64
>
(
almostDist2Weight
,
searchWindowSize
,
templateWindowSize
,
h
,
cn
,
h
,
hn
,
cn
,
almostTemplateWindowSizeSqBinShift
,
abs
)))
return
false
;
...
...
modules/photo/src/opencl/nlmeans.cl
View file @
324fa268
...
...
@@ -20,9 +20,9 @@
#
ifdef
OP_CALC_WEIGHTS
__kernel
void
calcAlmostDist2Weight
(
__global
in
t
*
almostDist2Weight,
int
almostMaxDist,
__kernel
void
calcAlmostDist2Weight
(
__global
wlut_
t
*
almostDist2Weight,
int
almostMaxDist,
FT
almostDist2ActualDistMultiplier,
int
fixedPointMult,
FT
den,
FT
WEIGHT_THRESHOLD
)
w_t
den,
FT
WEIGHT_THRESHOLD
)
{
int
almostDist
=
get_global_id
(
0
)
;
...
...
@@ -30,14 +30,13 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
{
FT
dist
=
almostDist
*
almostDist2ActualDistMultiplier
;
#
ifdef
ABS
int
weight
=
convert_int_sat_rte
(
fixedPointMult
*
exp
(
-dist*dist
*
den
)
)
;
w_t
w
=
exp
((
w_t
)(
-dist*dist
)
*
den
)
;
#
else
int
weight
=
convert_int_sat_rte
(
fixedPointMult
*
exp
(
-dist
*
den
)
)
;
w_t
w
=
exp
((
w_t
)(
-dist
)
*
den
)
;
#
endif
if
(
weight
<
WEIGHT_THRESHOLD
*
fixedPointMult
)
weight
=
0
;
almostDist2Weight[almostDist]
=
weight
;
wlut_t
weight
=
convert_wlut_t
(
fixedPointMult
*
(
isnan
(
w
)
?
(
w_t
)
1.0
:
w
))
;
almostDist2Weight[almostDist]
=
weight
<
WEIGHT_THRESHOLD
*
fixedPointMult
?
(
wlut_t
)
0
:
weight
;
}
}
...
...
@@ -208,14 +207,14 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
}
inline
void
convolveWindow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
__global
const
in
t
*
almostDist2Weight,
__local
int
*
dists,
__global
const
wlut_
t
*
almostDist2Weight,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
y,
int
x,
int
id,
__local
weight_t
*
weights_local,
__local
sum_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
{
int
sx
=
x
-
SEARCH_SIZE2,
sy
=
y
-
SEARCH_SIZE2
;
weight_t
weights
=
0
;
sum_t
weighted_sum
=
(
sum_t
)
(
0
)
;
weight_t
weights
=
(
weight_t
)
0
;
sum_t
weighted_sum
=
(
sum_t
)
0
;
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
...
...
@@ -223,10 +222,10 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
sum_t
src_value
=
convert_sum_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
weight
=
almostDist2Weight[almostAvgDist]
;
weight_t
weight
=
convert_weight_t
(
almostDist2Weight[almostAvgDist]
)
;
weights
+=
(
weight_t
)
weight
;
weighted_sum
+=
(
sum_t
)
(
weight
)
*
src_value
;
weights
+=
weight
;
weighted_sum
+=
(
sum_t
)
weight
*
src_value
;
}
weights_local[id]
=
weights
;
...
...
@@ -251,13 +250,13 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
weighted_sum_local[2]
+
weighted_sum_local[3]
;
weight_t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel_t
(
weighted_sum_local_0
/
(
sum_t
)
(
weights_local_0
)
)
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel_t
(
weighted_sum_local_0
/
(
sum_t
)
weights_local_0
)
;
}
}
__kernel
void
fastNlMeansDenoising
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
const
in
t
*
almostDist2Weight,
__global
uchar
*
buffer,
__global
const
wlut_
t
*
almostDist2Weight,
__global
uchar
*
buffer,
int
almostTemplateWindowSizeSqBinShift
)
{
int
block_x
=
get_group_id
(
0
)
,
nblocks_x
=
get_num_groups
(
0
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment