Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a16a11f3
Commit
a16a11f3
authored
Nov 13, 2014
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3413 from wangyan42164:denoising_opencl_improvement
parents
4763d803
a8b4c99e
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
22 deletions
+12
-22
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+7
-5
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+5
-17
No files found.
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
View file @
a16a11f3
...
...
@@ -19,7 +19,8 @@ enum
{
BLOCK_ROWS
=
32
,
BLOCK_COLS
=
32
,
CTA_SIZE
=
256
CTA_SIZE_INTEL
=
64
,
CTA_SIZE_DEFAULT
=
256
};
static
int
divUp
(
int
a
,
int
b
)
...
...
@@ -70,6 +71,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
templateWindowSize
,
int
searchWindowSize
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
Size
size
=
_src
.
size
();
if
(
type
!=
CV_8UC1
&&
type
!=
CV_8UC2
&&
type
!=
CV_8UC4
)
...
...
@@ -86,12 +88,12 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D
CTA_SIZE2=%d -D
convert_uchar_t=%s"
,
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
CTA_SIZE
,
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
cn
,
CTA_SIZE
>>
1
,
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]));
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]));
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
if
(
k
.
empty
())
...
...
@@ -120,7 +122,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl
::
KernelArg
::
PtrReadOnly
(
almostDist2Weight
),
ocl
::
KernelArg
::
PtrReadOnly
(
buffer
),
almostTemplateWindowSizeSqBinShift
);
size_t
globalsize
[
2
]
=
{
nblocksx
*
CTA_SIZE
,
nblocksy
},
localsize
[
2
]
=
{
CTA_SIZE
,
1
};
size_t
globalsize
[
2
]
=
{
nblocksx
*
ctaSize
,
nblocksy
},
localsize
[
2
]
=
{
ctaSize
,
1
};
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
...
...
modules/photo/src/opencl/nlmeans.cl
View file @
a16a11f3
...
...
@@ -206,22 +206,11 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
weighted_sum
+=
(
int_t
)(
weight
)
*
src_value
;
}
if
(
id
>=
CTA_SIZE2
)
{
int
id2
=
id
-
CTA_SIZE2
;
weights_local[id2]
=
weights
;
weighted_sum_local[id2]
=
weighted_sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
id
<
CTA_SIZE2
)
{
weights_local[id]
+=
weights
;
weighted_sum_local[id]
+=
weighted_sum
;
}
weights_local[id]
=
weights
;
weighted_sum_local[id]
=
weighted_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
CTA_SIZE
2
>>
1
; lsize > 2; lsize >>= 1)
for
(
int
lsize
=
CTA_SIZE
>>
1
; lsize > 2; lsize >>= 1)
{
if
(
id
<
lsize
)
{
...
...
@@ -252,8 +241,8 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int
int
block_y
=
get_group_id
(
1
)
;
int
id
=
get_local_id
(
0
)
,
first
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE
2
]
;
__local
int_t
weighted_sum[CTA_SIZE
2
]
;
__local
int
dists[SEARCH_SIZE_SQ],
weights[CTA_SIZE]
;
__local
int_t
weighted_sum[CTA_SIZE]
;
int
x0
=
block_x
*
BLOCK_COLS,
x1
=
min
(
x0
+
BLOCK_COLS,
dst_cols
)
;
int
y0
=
block_y
*
BLOCK_ROWS,
y1
=
min
(
y0
+
BLOCK_ROWS,
dst_rows
)
;
...
...
@@ -281,7 +270,6 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int
first
=
(
first
+
1
)
%
TEMPLATE_SIZE
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
convolveWindow
(
src,
src_step,
src_offset,
dists,
almostDist2Weight,
dst,
dst_step,
dst_offset,
y,
x,
id,
weights,
weighted_sum,
almostTemplateWindowSizeSqBinShift
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment