Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8a456c5a
Commit
8a456c5a
authored
Dec 30, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 30, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2073 from KonstantinMatskevich:ocl_tapi_clahe
parents
8151571d
55634c1f
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
375 additions
and
15 deletions
+375
-15
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+1
-0
ocl.cpp
modules/core/src/ocl.cpp
+10
-0
clahe.cpp
modules/imgproc/src/clahe.cpp
+112
-15
clahe.cl
modules/imgproc/src/opencl/clahe.cl
+252
-0
No files found.
modules/core/include/opencv2/core/ocl.hpp
View file @
8a456c5a
...
...
@@ -489,6 +489,7 @@ public:
bool
runTask
(
bool
sync
,
const
Queue
&
q
=
Queue
());
size_t
workGroupSize
()
const
;
size_t
preferedWorkGroupSizeMultiple
()
const
;
bool
compileWorkGroupSize
(
size_t
wsz
[])
const
;
size_t
localMemSize
()
const
;
...
...
modules/core/src/ocl.cpp
View file @
8a456c5a
...
...
@@ -2817,6 +2817,16 @@ size_t Kernel::workGroupSize() const
sizeof
(
val
),
&
val
,
&
retsz
)
>=
0
?
val
:
0
;
}
size_t
Kernel
::
preferedWorkGroupSizeMultiple
()
const
{
if
(
!
p
)
return
0
;
size_t
val
=
0
,
retsz
=
0
;
cl_device_id
dev
=
(
cl_device_id
)
Device
::
getDefault
().
ptr
();
return
clGetKernelWorkGroupInfo
(
p
->
handle
,
dev
,
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
,
sizeof
(
val
),
&
val
,
&
retsz
)
>=
0
?
val
:
0
;
}
bool
Kernel
::
compileWorkGroupSize
(
size_t
wsz
[])
const
{
if
(
!
p
||
!
wsz
)
...
...
modules/imgproc/src/clahe.cpp
View file @
8a456c5a
...
...
@@ -40,10 +40,90 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
// ----------------------------------------------------------------------
// CLAHE
namespace
clahe
{
static
bool
calcLut
(
cv
::
InputArray
_src
,
cv
::
OutputArray
_dst
,
const
int
tilesX
,
const
int
tilesY
,
const
cv
::
Size
tileSize
,
const
int
clipLimit
,
const
float
lutScale
)
{
cv
::
ocl
::
Kernel
_k
(
"calcLut"
,
cv
::
ocl
::
imgproc
::
clahe_oclsrc
);
bool
is_cpu
=
cv
::
ocl
::
Device
::
getDefault
().
type
()
==
cv
::
ocl
::
Device
::
TYPE_CPU
;
cv
::
String
opts
;
if
(
is_cpu
)
opts
=
"-D CPU "
;
else
opts
=
cv
::
format
(
"-D WAVE_SIZE=%d"
,
_k
.
preferedWorkGroupSizeMultiple
());
cv
::
ocl
::
Kernel
k
(
"calcLut"
,
cv
::
ocl
::
imgproc
::
clahe_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
cv
::
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
tilesX
*
tilesY
,
256
,
CV_8UC1
);
cv
::
UMat
dst
=
_dst
.
getUMat
();
int
tile_size
[
2
];
tile_size
[
0
]
=
tileSize
.
width
;
tile_size
[
1
]
=
tileSize
.
height
;
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
tilesX
*
localThreads
[
0
],
tilesY
*
localThreads
[
1
],
1
};
int
idx
=
0
;
idx
=
k
.
set
(
idx
,
cv
::
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
));
idx
=
k
.
set
(
idx
,
cv
::
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
));
idx
=
k
.
set
(
idx
,
tile_size
);
idx
=
k
.
set
(
idx
,
tilesX
);
idx
=
k
.
set
(
idx
,
clipLimit
);
idx
=
k
.
set
(
idx
,
lutScale
);
if
(
!
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
))
return
false
;
return
true
;
}
static
bool
transform
(
const
cv
::
InputArray
_src
,
cv
::
OutputArray
_dst
,
const
cv
::
InputArray
_lut
,
const
int
tilesX
,
const
int
tilesY
,
const
cv
::
Size
&
tileSize
)
{
cv
::
ocl
::
Kernel
k
(
"transform"
,
cv
::
ocl
::
imgproc
::
clahe_oclsrc
);
if
(
k
.
empty
())
return
false
;
int
tile_size
[
2
];
tile_size
[
0
]
=
tileSize
.
width
;
tile_size
[
1
]
=
tileSize
.
height
;
cv
::
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
());
cv
::
UMat
dst
=
_dst
.
getUMat
();
cv
::
UMat
lut
=
_lut
.
getUMat
();
size_t
localThreads
[
3
]
=
{
32
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
};
int
idx
=
0
;
idx
=
k
.
set
(
idx
,
cv
::
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
));
idx
=
k
.
set
(
idx
,
cv
::
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
));
idx
=
k
.
set
(
idx
,
cv
::
ocl
::
KernelArg
::
ReadOnlyNoSize
(
lut
));
idx
=
k
.
set
(
idx
,
src
.
cols
);
idx
=
k
.
set
(
idx
,
src
.
rows
);
idx
=
k
.
set
(
idx
,
tile_size
);
idx
=
k
.
set
(
idx
,
tilesX
);
idx
=
k
.
set
(
idx
,
tilesY
);
if
(
!
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
))
return
false
;
return
true
;
}
}
namespace
{
class
CLAHE_CalcLut_Body
:
public
cv
::
ParallelLoopBody
...
...
@@ -241,7 +321,9 @@ namespace
int
tilesY_
;
cv
::
Mat
srcExt_
;
cv
::
UMat
usrcExt_
;
cv
::
Mat
lut_
;
cv
::
UMat
ulut_
;
};
CLAHE_Impl
::
CLAHE_Impl
(
double
clipLimit
,
int
tilesX
,
int
tilesY
)
:
...
...
@@ -256,31 +338,34 @@ namespace
void
CLAHE_Impl
::
apply
(
cv
::
InputArray
_src
,
cv
::
OutputArray
_dst
)
{
cv
::
Mat
src
=
_src
.
getMat
();
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
CV_Assert
(
_src
.
type
()
==
CV_8UC1
);
_dst
.
create
(
src
.
size
(),
src
.
type
()
);
cv
::
Mat
dst
=
_dst
.
getMat
();
bool
useOpenCL
=
cv
::
ocl
::
useOpenCL
()
&&
_src
.
isUMat
()
&&
_src
.
dims
()
<=
2
;
const
int
histSize
=
256
;
lut_
.
create
(
tilesX_
*
tilesY_
,
histSize
,
CV_8UC1
);
cv
::
Size
tileSize
;
cv
::
Mat
srcForLut
;
cv
::
_InputArray
_
srcForLut
;
if
(
src
.
cols
%
tilesX_
==
0
&&
src
.
rows
%
tilesY_
==
0
)
if
(
_src
.
size
().
width
%
tilesX_
==
0
&&
_src
.
size
().
height
%
tilesY_
==
0
)
{
tileSize
=
cv
::
Size
(
src
.
cols
/
tilesX_
,
src
.
rows
/
tilesY_
);
srcForLut
=
src
;
tileSize
=
cv
::
Size
(
_src
.
size
().
width
/
tilesX_
,
_src
.
size
().
height
/
tilesY_
);
_srcForLut
=
_
src
;
}
else
{
cv
::
copyMakeBorder
(
src
,
srcExt_
,
0
,
tilesY_
-
(
src
.
rows
%
tilesY_
),
0
,
tilesX_
-
(
src
.
cols
%
tilesX_
),
cv
::
BORDER_REFLECT_101
);
tileSize
=
cv
::
Size
(
srcExt_
.
cols
/
tilesX_
,
srcExt_
.
rows
/
tilesY_
);
srcForLut
=
srcExt_
;
if
(
useOpenCL
)
{
cv
::
copyMakeBorder
(
_src
,
usrcExt_
,
0
,
tilesY_
-
(
_src
.
size
().
height
%
tilesY_
),
0
,
tilesX_
-
(
_src
.
size
().
width
%
tilesX_
),
cv
::
BORDER_REFLECT_101
);
tileSize
=
cv
::
Size
(
usrcExt_
.
size
().
width
/
tilesX_
,
usrcExt_
.
size
().
height
/
tilesY_
);
_srcForLut
=
usrcExt_
;
}
else
{
cv
::
copyMakeBorder
(
_src
,
srcExt_
,
0
,
tilesY_
-
(
_src
.
size
().
height
%
tilesY_
),
0
,
tilesX_
-
(
_src
.
size
().
width
%
tilesX_
),
cv
::
BORDER_REFLECT_101
);
tileSize
=
cv
::
Size
(
srcExt_
.
size
().
width
/
tilesX_
,
srcExt_
.
size
().
height
/
tilesY_
);
_srcForLut
=
srcExt_
;
}
}
const
int
tileSizeTotal
=
tileSize
.
area
();
...
...
@@ -293,6 +378,16 @@ namespace
clipLimit
=
std
::
max
(
clipLimit
,
1
);
}
if
(
useOpenCL
&&
clahe
::
calcLut
(
_srcForLut
,
ulut_
,
tilesX_
,
tilesY_
,
tileSize
,
clipLimit
,
lutScale
)
)
if
(
clahe
::
transform
(
_src
,
_dst
,
ulut_
,
tilesX_
,
tilesY_
,
tileSize
)
)
return
;
cv
::
Mat
src
=
_src
.
getMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
()
);
cv
::
Mat
dst
=
_dst
.
getMat
();
cv
::
Mat
srcForLut
=
_srcForLut
.
getMat
();
lut_
.
create
(
tilesX_
*
tilesY_
,
histSize
,
CV_8UC1
);
CLAHE_CalcLut_Body
calcLutBody
(
srcForLut
,
lut_
,
tileSize
,
tilesX_
,
tilesY_
,
clipLimit
,
lutScale
);
cv
::
parallel_for_
(
cv
::
Range
(
0
,
tilesX_
*
tilesY_
),
calcLutBody
);
...
...
@@ -325,6 +420,8 @@ namespace
{
srcExt_
.
release
();
lut_
.
release
();
usrcExt_
.
release
();
ulut_
.
release
();
}
}
...
...
modules/imgproc/src/opencl/clahe.cl
0 → 100644
View file @
8a456c5a
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Sen
Liu,
swjtuls1987@126.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
inline
int
calc_lut
(
__local
int*
smem,
int
val,
int
tid
)
{
smem[tid]
=
val
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
==
0
)
for
(
int
i
=
1
; i < 256; ++i)
smem[i]
+=
smem[i
-
1]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
return
smem[tid]
;
}
#
ifdef
CPU
inline
void
reduce
(
volatile
__local
int*
smem,
int
val,
int
tid
)
{
smem[tid]
=
val
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
128
)
smem[tid]
=
val
+=
smem[tid
+
128]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
64
)
smem[tid]
=
val
+=
smem[tid
+
64]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
smem[tid]
+=
smem[tid
+
32]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
smem[tid]
+=
smem[tid
+
16]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
smem[tid]
+=
smem[tid
+
8]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
smem[tid]
+=
smem[tid
+
4]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
smem[tid]
+=
smem[tid
+
2]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
smem[256]
=
smem[tid]
+
smem[tid
+
1]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
#
else
inline
void
reduce
(
__local
volatile
int*
smem,
int
val,
int
tid
)
{
smem[tid]
=
val
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
128
)
smem[tid]
=
val
+=
smem[tid
+
128]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
64
)
smem[tid]
=
val
+=
smem[tid
+
64]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
{
smem[tid]
+=
smem[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem[tid]
+=
smem[tid
+
16]
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem[tid]
+=
smem[tid
+
8]
;
smem[tid]
+=
smem[tid
+
4]
;
smem[tid]
+=
smem[tid
+
2]
;
smem[tid]
+=
smem[tid
+
1]
;
}
}
#
endif
__kernel
void
calcLut
(
__global
__const
uchar
*
src,
const
int
srcStep,
const
int
src_offset,
__global
uchar
*
lut,
const
int
dstStep,
const
int
dst_offset,
const
int2
tileSize,
const
int
tilesX,
const
int
clipLimit,
const
float
lutScale
)
{
__local
int
smem[512]
;
int
tx
=
get_group_id
(
0
)
;
int
ty
=
get_group_id
(
1
)
;
int
tid
=
get_local_id
(
1
)
*
get_local_size
(
0
)
+
get_local_id
(
0
)
;
smem[tid]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
i
=
get_local_id
(
1
)
; i < tileSize.y; i += get_local_size(1))
{
__global
const
uchar*
srcPtr
=
src
+
mad24
(
ty
*
tileSize.y
+
i,
srcStep,
tx
*
tileSize.x
+
src_offset
)
;
for
(
int
j
=
get_local_id
(
0
)
; j < tileSize.x; j += get_local_size(0))
{
const
int
data
=
srcPtr[j]
;
atomic_inc
(
&smem[data]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
tHistVal
=
smem[tid]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
clipLimit
>
0
)
{
//
clip
histogram
bar
int
clipped
=
0
;
if
(
tHistVal
>
clipLimit
)
{
clipped
=
tHistVal
-
clipLimit
;
tHistVal
=
clipLimit
;
}
//
find
number
of
overall
clipped
samples
reduce
(
smem,
clipped,
tid
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifdef
CPU
clipped
=
smem[256]
;
#
else
clipped
=
smem[0]
;
#
endif
//
broadcast
evaluated
value
__local
int
totalClipped
;
if
(
tid
==
0
)
totalClipped
=
clipped
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
redistribute
clipped
samples
evenly
int
redistBatch
=
totalClipped
/
256
;
tHistVal
+=
redistBatch
;
int
residual
=
totalClipped
-
redistBatch
*
256
;
if
(
tid
<
residual
)
++tHistVal
;
}
const
int
lutVal
=
calc_lut
(
smem,
tHistVal,
tid
)
;
uint
ires
=
(
uint
)
convert_int_rte
(
lutScale
*
lutVal
)
;
lut[
(
ty
*
tilesX
+
tx
)
*
dstStep
+
tid
+
dst_offset]
=
convert_uchar
(
clamp
(
ires,
(
uint
)
0
,
(
uint
)
255
))
;
}
__kernel
void
transform
(
__global
__const
uchar
*
src,
const
int
srcStep,
const
int
src_offset,
__global
uchar
*
dst,
const
int
dstStep,
const
int
dst_offset,
__global
uchar
*
lut,
const
int
lutStep,
int
lut_offset,
const
int
cols,
const
int
rows,
const
int2
tileSize,
const
int
tilesX,
const
int
tilesY
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
if
(
x
>=
cols
||
y
>=
rows
)
return
;
const
float
tyf
=
(
convert_float
(
y
)
/
tileSize.y
)
-
0.5f
;
int
ty1
=
convert_int_rtn
(
tyf
)
;
int
ty2
=
ty1
+
1
;
const
float
ya
=
tyf
-
ty1
;
ty1
=
max
(
ty1,
0
)
;
ty2
=
min
(
ty2,
tilesY
-
1
)
;
const
float
txf
=
(
convert_float
(
x
)
/
tileSize.x
)
-
0.5f
;
int
tx1
=
convert_int_rtn
(
txf
)
;
int
tx2
=
tx1
+
1
;
const
float
xa
=
txf
-
tx1
;
tx1
=
max
(
tx1,
0
)
;
tx2
=
min
(
tx2,
tilesX
-
1
)
;
const
int
srcVal
=
src[mad24
(
y,
srcStep,
x
+
src_offset
)
]
;
float
res
=
0
;
res
+=
lut[mad24
(
ty1
*
tilesX
+
tx1,
lutStep,
srcVal
+
lut_offset
)
]
*
((
1.0f
-
xa
)
*
(
1.0f
-
ya
))
;
res
+=
lut[mad24
(
ty1
*
tilesX
+
tx2,
lutStep,
srcVal
+
lut_offset
)
]
*
((
xa
)
*
(
1.0f
-
ya
))
;
res
+=
lut[mad24
(
ty2
*
tilesX
+
tx1,
lutStep,
srcVal
+
lut_offset
)
]
*
((
1.0f
-
xa
)
*
(
ya
))
;
res
+=
lut[mad24
(
ty2
*
tilesX
+
tx2,
lutStep,
srcVal
+
lut_offset
)
]
*
((
xa
)
*
(
ya
))
;
uint
ires
=
(
uint
)
convert_int_rte
(
res
)
;
dst[mad24
(
y,
dstStep,
x
+
dst_offset
)
]
=
convert_uchar
(
clamp
(
ires,
(
uint
)
0
,
(
uint
)
255
))
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment