Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0fdb95e1
Commit
0fdb95e1
authored
Mar 01, 2015
by
Erik Karlsson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactoring and addition of CV_8UC3 to ocl_fastNlMeansDenoising
parent
8e7aff44
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
78 additions
and
38 deletions
+78
-38
fast_nlmeans_denoising_opencl.hpp
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+33
-11
nlmeans.cl
modules/photo/src/opencl/nlmeans.cl
+45
-27
No files found.
modules/photo/src/fast_nlmeans_denoising_opencl.hpp
View file @
0fdb95e1
...
...
@@ -70,11 +70,11 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
static
bool
ocl_fastNlMeansDenoising
(
InputArray
_src
,
OutputArray
_dst
,
float
h
,
int
templateWindowSize
,
int
searchWindowSize
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
ctaSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
CTA_SIZE_INTEL
:
CTA_SIZE_DEFAULT
;
Size
size
=
_src
.
size
();
if
(
type
!=
CV_8UC1
&&
type
!=
CV_8UC2
&&
type
!=
CV_8UC
4
)
if
(
type
!=
CV_8UC1
&&
type
!=
CV_8UC2
&&
type
!=
CV_8UC
3
)
return
false
;
int
templateWindowHalfWize
=
templateWindowSize
/
2
;
...
...
@@ -86,13 +86,15 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
char
cvt
[
2
][
40
];
String
opts
=
format
(
"-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D sample_t=%s -D pixel_t=%s -D int_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
cn
,
" -D convert_int_t=%s -D cn=%d -D convert_pixel_t=%s"
,
templateWindowSize
,
searchWindowSize
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
BLOCK_COLS
,
BLOCK_ROWS
,
ctaSize
,
templateWindowHalfWize
,
searchWindowHalfSize
,
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
type
==
CV_8UC3
?
4
:
cn
,
ocl
::
convertTypeStr
(
CV_32S
,
CV_8U
,
cn
,
cvt
[
1
]));
ocl
::
Kernel
k
(
"fastNlMeansDenoising"
,
ocl
::
photo
::
nlmeans_oclsrc
,
opts
);
...
...
@@ -107,10 +109,22 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
UMat
srcex
;
int
borderSize
=
searchWindowHalfSize
+
templateWindowHalfWize
;
copyMakeBorder
(
_src
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
if
(
type
==
CV_8UC3
)
{
Mat
src_rgb
=
_src
.
getMat
(),
src_rgba
(
size
,
CV_8UC4
);
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
&
src_rgb
,
1
,
&
src_rgba
,
1
,
from_to
,
3
);
copyMakeBorder
(
src_rgba
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
}
else
copyMakeBorder
(
_src
,
srcex
,
borderSize
,
borderSize
,
borderSize
,
borderSize
,
BORDER_DEFAULT
);
_dst
.
create
(
size
,
type
);
UMat
dst
=
_dst
.
getUMat
();
UMat
dst
;
if
(
type
==
CV_8UC3
)
dst
.
create
(
size
,
CV_8UC4
);
else
dst
=
_dst
.
getUMat
();
int
searchWindowSizeSq
=
searchWindowSize
*
searchWindowSize
;
Size
upColSumSize
(
size
.
width
,
searchWindowSizeSq
*
nblocksy
);
...
...
@@ -123,7 +137,15 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl
::
KernelArg
::
PtrReadOnly
(
buffer
),
almostTemplateWindowSizeSqBinShift
);
size_t
globalsize
[
2
]
=
{
nblocksx
*
ctaSize
,
nblocksy
},
localsize
[
2
]
=
{
ctaSize
,
1
};
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
if
(
!
k
.
run
(
2
,
globalsize
,
localsize
,
false
))
return
false
;
if
(
type
==
CV_8UC3
)
{
Mat
dst_rgba
=
dst
.
getMat
(
ACCESS_READ
),
dst_rgb
=
_dst
.
getMat
();
int
from_to
[]
=
{
0
,
0
,
1
,
1
,
2
,
2
};
mixChannels
(
&
dst_rgba
,
1
,
&
dst_rgb
,
1
,
from_to
,
3
);
}
return
true
;
}
static
bool
ocl_fastNlMeansDenoisingColored
(
InputArray
_src
,
OutputArray
_dst
,
...
...
modules/photo/src/opencl/nlmeans.cl
View file @
0fdb95e1
...
...
@@ -29,8 +29,11 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
if
(
almostDist
<
almostMaxDist
)
{
FT
dist
=
almostDist
*
almostDist2ActualDistMultiplier
;
#
ifdef
ABS
int
weight
=
convert_int_sat_rte
(
fixedPointMult
*
exp
(
-dist*dist
*
den
))
;
#
else
int
weight
=
convert_int_sat_rte
(
fixedPointMult
*
exp
(
-dist
*
den
))
;
#
endif
if
(
weight
<
WEIGHT_THRESHOLD
*
fixedPointMult
)
weight
=
0
;
...
...
@@ -44,21 +47,33 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
#
define
SEARCH_SIZE_SQ
(
SEARCH_SIZE
*
SEARCH_SIZE
)
inline
int
calcDist
(
uchar_t
a,
uchar
_t
b
)
inline
int
calcDist
(
pixel_t
a,
pixel
_t
b
)
{
#
ifdef
ABS
int_t
retval
=
convert_int_t
(
abs_diff
(
a,
b
))
;
#
else
int_t
diff
=
convert_int_t
(
a
)
-
convert_int_t
(
b
)
;
int_t
retval
=
diff
*
diff
;
#
endif
#
if
cn
==
1
return
retval
;
#
elif
cn
==
2
return
retval.x
+
retval.y
;
#
elif
cn
==
3
|
| cn == 4 /* A is ignored */
return retval.x + retval.y + retval.z;
#else
#
error
"cn should be either 1
or 2
"
#error "cn should be either 1
, 2, 3 or 4
"
#endif
}
inline
int
calcDistUpDown
(
uchar_t
down_value,
uchar_t
down_value_t,
uchar_t
up_value,
uchar_t
up_value_t
)
#ifdef ABS
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
{
return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);
}
#else
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
{
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
...
...
@@ -68,14 +83,17 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v
return retval;
#elif cn == 2
return retval.x + retval.y;
#elif cn == 3 |
|
cn
==
4
/*
A
is
ignored
*/
return
retval.x
+
retval.y
+
retval.z
;
#
else
#
error
"cn should be either 1
or 2
"
#
error
"cn should be either 1
, 2, 3 or 4
"
#
endif
}
#
endif
#
define
COND
if
(
x
==
0
&&
y
==
0
)
inline
void
calcFirstElementInRow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
inline
void
calcFirstElementInRow
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
int
y,
int
x,
int
id,
__global
int
*
col_dists,
__global
int
*
up_col_dists
)
{
...
...
@@ -87,9 +105,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
{
int
dist
=
0
,
value
;
__global
const
uchar_t
*
src_template
=
(
__global
const
uchar
_t
*
)(
src
+
__global
const
pixel_t
*
src_template
=
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
cn,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
__global
const
uchar_t
*
src_current
=
(
__global
const
uchar
_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn,
x,
src_offset
)))
;
__global
const
pixel_t
*
src_current
=
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn,
x,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
i
*
TEMPLATE_SIZE
;
#
pragma
unroll
...
...
@@ -107,8 +125,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
dist
+=
value
;
}
src_current
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_current
+
src_step
)
;
src_template
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_template
+
src_step
)
;
src_current
=
(
__global
const
pixel_t
*
)((
__global
const
sample_t
*
)
src_current
+
src_step
)
;
src_template
=
(
__global
const
pixel_t
*
)((
__global
const
sample_t
*
)
src_template
+
src_step
)
;
}
#
pragma
unroll
...
...
@@ -120,7 +138,7 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
}
}
inline
void
calcElementInFirstRow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
inline
void
calcElementInFirstRow
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
int
y,
int
x0,
int
x,
int
id,
int
first,
__global
int
*
col_dists,
__global
int
*
up_col_dists
)
{
...
...
@@ -130,8 +148,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
__global
const
uchar_t
*
src_current
=
(
__global
const
uchar
_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn,
x,
src_offset
)))
;
__global
const
uchar_t
*
src_template
=
(
__global
const
uchar
_t
*
)(
src
+
__global
const
pixel_t
*
src_current
=
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
y,
src_step,
mad24
(
cn,
x,
src_offset
)))
;
__global
const
pixel_t
*
src_template
=
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
cn,
sx
+
i
%
SEARCH_SIZE,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
TEMPLATE_SIZE
*
i
;
...
...
@@ -142,8 +160,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
{
col_dist
+=
calcDist
(
src_current[0],
src_template[0]
)
;
src_current
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_current
+
src_step
)
;
src_template
=
(
__global
const
uchar_t
*
)((
__global
const
uchar
*
)
src_template
+
src_step
)
;
src_current
=
(
__global
const
pixel_t
*
)((
__global
const
sample_t
*
)
src_current
+
src_step
)
;
src_template
=
(
__global
const
pixel_t
*
)((
__global
const
sample_t
*
)
src_template
+
src_step
)
;
}
dists[i]
+=
col_dist
-
col_dists_current[first]
;
...
...
@@ -152,7 +170,7 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
}
}
inline
void
calcElement
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
inline
void
calcElement
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
int
y,
int
x0,
int
x,
int
id,
int
first,
__global
int
*
col_dists,
__global
int
*
up_col_dists
)
{
...
...
@@ -160,8 +178,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
int
sy_up
=
y
-
TEMPLATE_SIZE2
-
1
;
int
sy_down
=
y
+
TEMPLATE_SIZE2
;
uchar_t
up_value
=
*
(
__global
const
uchar
_t
*
)(
src
+
mad24
(
sy_up,
src_step,
mad24
(
cn,
sx,
src_offset
)))
;
uchar_t
down_value
=
*
(
__global
const
uchar
_t
*
)(
src
+
mad24
(
sy_down,
src_step,
mad24
(
cn,
sx,
src_offset
)))
;
pixel_t
up_value
=
*
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
sy_up,
src_step,
mad24
(
cn,
sx,
src_offset
)))
;
pixel_t
down_value
=
*
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
sy_down,
src_step,
mad24
(
cn,
sx,
src_offset
)))
;
sx
-=
SEARCH_SIZE2
;
sy_up
-=
SEARCH_SIZE2
;
...
...
@@ -171,8 +189,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
{
int
wx
=
i
%
SEARCH_SIZE,
wy
=
i
/
SEARCH_SIZE
;
uchar_t
up_value_t
=
*
(
__global
const
uchar
_t
*
)(
src
+
mad24
(
sy_up
+
wy,
src_step,
mad24
(
cn,
sx
+
wx,
src_offset
)))
;
uchar_t
down_value_t
=
*
(
__global
const
uchar
_t
*
)(
src
+
mad24
(
sy_down
+
wy,
src_step,
mad24
(
cn,
sx
+
wx,
src_offset
)))
;
pixel_t
up_value_t
=
*
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
sy_up
+
wy,
src_step,
mad24
(
cn,
sx
+
wx,
src_offset
)))
;
pixel_t
down_value_t
=
*
(
__global
const
pixel
_t
*
)(
src
+
mad24
(
sy_down
+
wy,
src_step,
mad24
(
cn,
sx
+
wx,
src_offset
)))
;
__global
int
*
col_dists_current
=
col_dists
+
mad24
(
i,
TEMPLATE_SIZE,
first
)
;
__global
int
*
up_col_dists_current
=
up_col_dists
+
mad24
(
x0,
SEARCH_SIZE_SQ,
i
)
;
...
...
@@ -185,9 +203,9 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
}
}
inline
void
convolveWindow
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
inline
void
convolveWindow
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__local
int
*
dists,
__global
const
int
*
almostDist2Weight,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
__global
sample_t
*
dst,
int
dst_step,
int
dst_offset,
int
y,
int
x,
int
id,
__local
int
*
weights_local,
__local
int_t
*
weighted_sum_local,
int
almostTemplateWindowSizeSqBinShift
)
{
...
...
@@ -197,7 +215,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
for
(
int
i
=
id
; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
int
src_index
=
mad24
(
sy
+
i
/
SEARCH_SIZE,
src_step,
mad24
(
i
%
SEARCH_SIZE
+
sx,
cn,
src_offset
))
;
int_t
src_value
=
convert_int_t
(
*
(
__global
const
uchar
_t
*
)(
src
+
src_index
))
;
int_t
src_value
=
convert_int_t
(
*
(
__global
const
pixel
_t
*
)(
src
+
src_index
))
;
int
almostAvgDist
=
dists[i]
>>
almostTemplateWindowSizeSqBinShift
;
int
weight
=
almostDist2Weight[almostAvgDist]
;
...
...
@@ -228,13 +246,13 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
weighted_sum_local[2]
+
weighted_sum_local[3]
;
int
weights_local_0
=
weights_local[0]
+
weights_local[1]
+
weights_local[2]
+
weights_local[3]
;
*
(
__global
uchar_t
*
)(
dst
+
dst_index
)
=
convert_uchar
_t
(
weighted_sum_local_0
/
(
int_t
)(
weights_local_0
))
;
*
(
__global
pixel_t
*
)(
dst
+
dst_index
)
=
convert_pixel
_t
(
weighted_sum_local_0
/
(
int_t
)(
weights_local_0
))
;
}
}
__kernel
void
fastNlMeansDenoising
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
const
int
*
almostDist2Weight,
__global
uchar
*
buffer,
__kernel
void
fastNlMeansDenoising
(
__global
const
sample_t
*
src,
int
src_step,
int
src_offset,
__global
sample_t
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
const
int
*
almostDist2Weight,
__global
sample_t
*
buffer,
int
almostTemplateWindowSizeSqBinShift
)
{
int
block_x
=
get_group_id
(
0
)
,
nblocks_x
=
get_num_groups
(
0
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment