Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
87760d13
Commit
87760d13
authored
Mar 02, 2015
by
Erik Karlsson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Cleanup and addition of 4-component support for ocl_fastNlMeansDenoising
parent
50bb14a0
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
15 deletions
+19
-15
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+3
-3
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+16
-12
No files found.
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
View file @
87760d13
...
@@ -77,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
...
@@ -77,7 +77,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
Size
size
=
_src
.
size
();
Size
size
=
_src
.
size
();
if
(
cn
!=
1
&&
cn
!=
2
&&
cn
!=
3
&&
depth
!=
CV_8U
&&
(
!
abs
||
depth
!=
CV_16U
))
if
(
cn
!=
1
&&
cn
!=
2
&&
cn
!=
3
&&
cn
!=
4
&&
depth
!=
CV_8U
&&
(
!
abs
||
depth
!=
CV_16U
))
return
false
;
return
false
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
...
@@ -93,7 +93,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
...
@@ -93,7 +93,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s%s"
,
" -D convert_int_t=%s -D cn=%d -D
psz=%d -D
convert_pixel_t=%s%s"
,
templateWindowSize
,
searchWindowSize
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32S
)
:
"long"
,
depth
==
CV_8U
?
ocl
::
typeToStr
(
CV_32S
)
:
"long"
,
...
@@ -103,7 +103,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
...
@@ -103,7 +103,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
(
sprintf
(
buf
[
1
],
"convert_long%d"
,
cn
),
buf
[
1
]),
(
sprintf
(
buf
[
1
],
"convert_long%d"
,
cn
),
buf
[
1
]),
BLOCK_COLS
,
BLOCK_ROWS
,
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
2
]),
cn
==
3
?
4
:
cn
,
ocl
::
convertTypeStr
(
depth
,
CV_32S
,
cn
,
buf
[
2
]),
cn
,
cn
==
3
?
4
:
cn
,
ocl
::
convertTypeStr
(
CV_32S
,
depth
,
cn
,
buf
[
3
]),
abs
?
" -D ABS"
:
""
);
ocl
::
convertTypeStr
(
CV_32S
,
depth
,
cn
,
buf
[
3
]),
abs
?
" -D ABS"
:
""
);
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
...
...
modules/photo/src/opencl/nlmeans.cl
View file @
87760d13
...
@@ -60,8 +60,10 @@ inline int calcDist(pixel_t a, pixel_t b)
...
@@ -60,8 +60,10 @@ inline int calcDist(pixel_t a, pixel_t b)
return
retval
;
return
retval
;
#
elif
cn
==
2
#
elif
cn
==
2
return
retval.x
+
retval.y
;
return
retval.x
+
retval.y
;
#
elif
cn
==
3
|
| cn == 4 /* A is ignored */
#
elif
cn
==
3
return
retval.x
+
retval.y
+
retval.z
;
return
retval.x
+
retval.y
+
retval.z
;
#
elif
cn
==
4
return
retval.x
+
retval.y
+
retval.z
+
retval.w
;
#
else
#
else
#
error
"cn should be either 1, 2, 3 or 4"
#
error
"cn should be either 1, 2, 3 or 4"
#
endif
#
endif
...
@@ -83,8 +85,10 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v
...
@@ -83,8 +85,10 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v
return
retval
;
return
retval
;
#
elif
cn
==
2
#
elif
cn
==
2
return
retval.x
+
retval.y
;
return
retval.x
+
retval.y
;
#elif cn == 3
|
|
cn
==
4
/*
A
is
ignored
*/
#
elif
cn
==
3
return
retval.x
+
retval.y
+
retval.z
;
return
retval.x
+
retval.y
+
retval.z
;
#
elif
cn
==
4
return
retval.x
+
retval.y
+
retval.z
+
retval.w
;
#
else
#
else
#
error
"cn should be either 1, 2, 3 or 4"
#
error
"cn should be either 1, 2, 3 or 4"
#
endif
#
endif
...
@@ -106,8 +110,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i
...
@@ -106,8 +110,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i
int
dist
=
0
,
value
;
int
dist
=
0
,
value
;
__global
const
pixel_t
*
src_template
=
(
__global
const
pixel_t
*
)(
src
+
__global
const
pixel_t
*
src_template
=
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
cn
,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
psz
,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
__global
const
pixel_t
*
src_current
=
(
__global
const
pixel_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn
,
x,
src_offset
)))
;
__global
const
pixel_t
*
src_current
=
(
__global
const
pixel_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
psz
,
x,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
i
*
TEMPLATE_SIZE
;
__global
int
*
col_dists_current
=
col_dists
+
i
*
TEMPLATE_SIZE
;
#
pragma
unroll
#
pragma
unroll
...
@@ -148,9 +152,9 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i
...
@@ -148,9 +152,9 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
{
__global
const
pixel_t
*
src_current
=
(
__global
const
pixel_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn
,
x,
src_offset
)))
;
__global
const
pixel_t
*
src_current
=
(
__global
const
pixel_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
psz
,
x,
src_offset
)))
;
__global
const
pixel_t
*
src_template
=
(
__global
const
pixel_t
*
)(
src
+
__global
const
pixel_t
*
src_template
=
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
cn
,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
psz
,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
TEMPLATE_SIZE
*
i
;
__global
int
*
col_dists_current
=
col_dists
+
TEMPLATE_SIZE
*
i
;
int
col_dist
=
0
;
int
col_dist
=
0
;
...
@@ -178,8 +182,8 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
...
@@ -178,8 +182,8 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
int
sy_up
=
y
-
TEMPLATE_SIZE2
-
1
;
int
sy_up
=
y
-
TEMPLATE_SIZE2
-
1
;
int
sy_down
=
y
+
TEMPLATE_SIZE2
;
int
sy_down
=
y
+
TEMPLATE_SIZE2
;
pixel_t
up_value
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_up,
src_step,
mad24
(
cn
,
sx,
src_offset
)))
;
pixel_t
up_value
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_up,
src_step,
mad24
(
psz
,
sx,
src_offset
)))
;
pixel_t
down_value
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_down,
src_step,
mad24
(
cn
,
sx,
src_offset
)))
;
pixel_t
down_value
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_down,
src_step,
mad24
(
psz
,
sx,
src_offset
)))
;
sx
-=
SEARCH_SIZE2
;
sx
-=
SEARCH_SIZE2
;
sy_up
-=
SEARCH_SIZE2
;
sy_up
-=
SEARCH_SIZE2
;
...
@@ -189,8 +193,8 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
...
@@ -189,8 +193,8 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
{
{
int
wx
=
i
%
SEARCH_SIZE,
wy
=
i
/
SEARCH_SIZE
;
int
wx
=
i
%
SEARCH_SIZE,
wy
=
i
/
SEARCH_SIZE
;
pixel_t
up_value_t
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_up
+
wy,
src_step,
mad24
(
cn
,
sx
+
wx,
src_offset
)))
;
pixel_t
up_value_t
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_up
+
wy,
src_step,
mad24
(
psz
,
sx
+
wx,
src_offset
)))
;
pixel_t
down_value_t
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_down
+
wy,
src_step,
mad24
(
cn
,
sx
+
wx,
src_offset
)))
;
pixel_t
down_value_t
=
*
(
__global
const
pixel_t
*
)(
src
+
mad24
(
sy_down
+
wy,
src_step,
mad24
(
psz
,
sx
+
wx,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
mad24
(
i,
TEMPLATE_SIZE,
first
)
;
__global
int
*
col_dists_current
=
col_dists
+
mad24
(
i,
TEMPLATE_SIZE,
first
)
;
__global
int
*
up_col_dists_current
=
up_col_dists
+
mad24
(
x0,
SEARCH_SIZE_SQ,
i
)
;
__global
int
*
up_col_dists_current
=
up_col_dists
+
mad24
(
x0,
SEARCH_SIZE_SQ,
i
)
;
...
@@ -215,7 +219,7 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
...
@@ -215,7 +219,7 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
{
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
cn
,
src_offset
))
;
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
psz
,
src_offset
))
;
sum_t
src_value
=
convert_sum_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
sum_t
src_value
=
convert_sum_t
(
*
(
__global
const
pixel_t
*
)(
src
+
src_index
))
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
...
@@ -242,7 +246,7 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
...
@@ -242,7 +246,7 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
if
(
id
==
0
)
if
(
id
==
0
)
{
{
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
cn
,
x,
dst_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
psz
,
x,
dst_offset
))
;
sum_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
sum_t
weighted_sum_local_0
=
weighted_sum_local[0]
+
weighted_sum_local[1]
+
weighted_sum_local[2]
+
weighted_sum_local[3]
;
weighted_sum_local[2]
+
weighted_sum_local[3]
;
weight_t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
weight_t
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment