Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5726e80f
Commit
5726e80f
authored
Mar 14, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Mar 14, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2475 from ilya-lavrenov:ocl_2.4_fix
parents
836635d2
61c347fb
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
72 additions
and
628 deletions
+72
-628
perf_main.cpp
modules/nonfree/perf/perf_main.cpp
+1
-1
perf_surf_ocl.cpp
modules/nonfree/perf/perf_surf_ocl.cpp
+2
-2
arithm.cpp
modules/ocl/src/arithm.cpp
+0
-0
gftt.cpp
modules/ocl/src/gftt.cpp
+14
-15
arithm_bitwise.cl
modules/ocl/src/opencl/arithm_bitwise.cl
+31
-20
arithm_bitwise_binary_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
+0
-88
arithm_bitwise_binary_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
+0
-82
arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+0
-86
arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+0
-253
arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+14
-64
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+4
-9
arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+4
-6
test_arithm.cpp
modules/ocl/test/test_arithm.cpp
+2
-2
No files found.
modules/nonfree/perf/perf_main.cpp
View file @
5726e80f
...
...
@@ -5,7 +5,7 @@ static const char * impls[] = {
#ifdef HAVE_CUDA
"cuda"
,
#endif
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENC
V_OC
L
"ocl"
,
#endif
"plain"
...
...
modules/nonfree/perf/perf_surf_ocl.cpp
View file @
5726e80f
...
...
@@ -59,7 +59,7 @@ typedef perf::TestBaseWithParam<std::string> OCL_SURF;
#define OCL_TEST_CYCLE() for( ; startTimer(), next(); cv::ocl::finish(), stopTimer())
PERF_TEST_P
(
OCL_SURF
,
with_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
PERF_TEST_P
(
OCL_SURF
,
DISABLED_
with_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
{
string
filename
=
getDataPath
(
GetParam
());
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
...
...
@@ -94,7 +94,7 @@ PERF_TEST_P(OCL_SURF, with_data_transfer, testing::Values(SURF_IMAGES))
SANITY_CHECK_NOTHING
();
}
PERF_TEST_P
(
OCL_SURF
,
without_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
PERF_TEST_P
(
OCL_SURF
,
DISABLED_
without_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
{
string
filename
=
getDataPath
(
GetParam
());
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
...
...
modules/ocl/src/arithm.cpp
View file @
5726e80f
This diff is collapsed.
Click to expand it.
modules/ocl/src/gftt.cpp
View file @
5726e80f
...
...
@@ -146,34 +146,33 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
dst
);
cl_mem
dst_data
=
reinterpret_cast
<
cl_mem
>
(
dst
.
data
);
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;
int
offset
=
src
.
offset
/
src
.
elemSize
();
int
vElemSize
=
src
.
elemSize1
();
int
src_step
=
src
.
step
/
vElemSize
,
src_offset
=
src
.
offset
/
vElemSize
;
int
total
=
src
.
size
().
area
();
{
// first parallel pass
{
// first parallel pass
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_col
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
row
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
"arithm_op_minMax"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5"
);
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5
-D vlen=1
"
);
}
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
...
...
modules/ocl/src/opencl/arithm_bitwise
_binary
.cl
→
modules/ocl/src/opencl/arithm_bitwise.cl
View file @
5726e80f
...
...
@@ -48,35 +48,46 @@
///////////////////////////////////////////
bitwise_binary
//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
__kernel
void
arithm_bitwise
(
__global
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
#
ifdef
OP_BINARY
__global
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
#
elif
defined
HAVE_SCALAR
T
scalar,
#
endif
#
ifdef
HAVE_MASK
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
#
endif
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
dst_cols
&&
y
<
dst_
rows
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
ifdef
HAVE_MASK
mask
+=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[0]
)
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src1_offset
))
;
#
ifdef
OP_BINARY
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src2_offset
))
;
#
endif
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst_offset
))
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
#
ifdef
OP_BINARY
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
#
endif
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
#
ifdef
OP_BINARY
dst[0]
=
src1[0]
Operation
src2[0]
;
#
elif
defined
HAVE_SCALAR
dst[0]
=
src1[0]
Operation
scalar
;
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index
]
;
dst[0]
=
Operation
src1[0
]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
deleted
100644 → 0
View file @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_mask
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols1,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols1
&&
y
<
rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
if
(
mask[mask_index]
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
deleted
100644 → 0
View file @
836635d2
////////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
///////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary/////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
src1_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
deleted
100644 → 0
View file @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar_mask
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[mask_index]
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_not.cl
deleted
100644 → 0
View file @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_not_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
uchar4
src1_data
=
vload4
(
0
,
src1
+
src1_index
)
;
uchar4
dst_data
=
vload4
(
0
,
dst
+
dst_index
)
;
uchar4
tmp_data
=
~src1_data
;
dst_data.x
=
dst_index
+
0
<
dst_end
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
dst_index
+
1
<
dst_end
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
dst_index
+
2
<
dst_end
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
dst_index
+
3
<
dst_end
?
tmp_data.w
:
dst_data.w
;
vstore4
(
dst_data,
0
,
dst
+
dst_index
)
;
}
}
__kernel
void
arithm_bitwise_not_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
char4
src1_data
=
vload4
(
0
,
src1
+
src1_index
)
;
char4
dst_data
=
vload4
(
0
,
dst
+
dst_index
)
;
char4
tmp_data
=
~src1_data
;
dst_data.x
=
dst_index
+
0
<
dst_end
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
dst_index
+
1
<
dst_end
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
dst_index
+
2
<
dst_end
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
dst_index
+
3
<
dst_end
?
tmp_data.w
:
dst_data.w
;
vstore4
(
dst_data,
0
,
dst
+
dst_index
)
;
}
}
__kernel
void
arithm_bitwise_not_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
1
)
&
(
int
)
0xfffffff8
)
;
ushort4
src1_data
=
vload4
(
0
,
(
__global
ushort
*
)((
__global
char
*
)
src1
+
src1_index
))
;
ushort4
dst_data
=
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
;
ushort4
tmp_data
=
~
src1_data
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
((
dst_index
+
2
>=
dst_start
)
&&
(
dst_index
+
2
<
dst_end
))
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
((
dst_index
+
4
>=
dst_start
)
&&
(
dst_index
+
4
<
dst_end
))
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
((
dst_index
+
6
>=
dst_start
)
&&
(
dst_index
+
6
<
dst_end
))
?
tmp_data.w
:
dst_data.w
;
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
dst_data
;
}
}
__kernel
void
arithm_bitwise_not_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
1
)
&
(
int
)
0xfffffff8
)
;
short4
src1_data
=
vload4
(
0
,
(
__global
short
*
)((
__global
char
*
)
src1
+
src1_index
))
;
short4
dst_data
=
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
;
short4
tmp_data
=
~
src1_data
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
((
dst_index
+
2
>=
dst_start
)
&&
(
dst_index
+
2
<
dst_end
))
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
((
dst_index
+
4
>=
dst_start
)
&&
(
dst_index
+
4
<
dst_end
))
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
((
dst_index
+
6
>=
dst_start
)
&&
(
dst_index
+
6
<
dst_end
))
?
tmp_data.w
:
dst_data.w
;
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
dst_data
;
}
}
__kernel
void
arithm_bitwise_not_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
int
data1
=
*
((
__global
int
*
)((
__global
char
*
)
src1
+
src1_index
))
;
int
tmp
=
~
data1
;
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
__kernel
void
arithm_bitwise_not_D5
(
__global
char
*src,
int
src_step,
int
src_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
(
x
<<
2
)
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
char4
data
;
data
=
*
((
__global
char4
*
)((
__global
char
*
)
src
+
src_index
))
;
data
=
~
data
;
*
((
__global
char4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_not_D6
(
__global
char
*src,
int
src_step,
int
src_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
(
x
<<
3
)
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
3
)
+
dst_offset
)
;
char8
data
;
data
=
*
((
__global
char8
*
)((
__global
char
*
)
src
+
src_index
))
;
data
=
~
data
;
*
((
__global
char8
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
endif
modules/ocl/src/opencl/arithm_minMax.cl
View file @
5726e80f
...
...
@@ -63,81 +63,31 @@
/**************************************Array
minMax**************************************/
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
if
(
lid
>
127
)
{
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
{
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
dst[gid]
=
localmem_min[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
}
}
__kernel
void
arithm_op_minMax_mask
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
const
__global
uchar
*
mask,
int
minvalid_cols,
int
moffset
)
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
int
total,
int
groupnum,
__global
T
*
dst
#
ifdef
WITH_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
int
y,
x
;
for
(
int
grainSize
=
groupnum
<<
8
; id <
elemnum
; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id <
total
; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid
_cols
;
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid
_cols
;
y
=
id
/
src
_cols
;
x
=
id
%
src
_cols
;
if
(
mask[midx]
)
#
ifdef
WITH_MASK
if
(
mask[mad24
(
y,
mask_step,
x
+
mask_offset
)
]
)
#
endif
{
temp
=
src[
idx
]
;
temp
=
src[
mad24
(
y,
src_step,
x
+
src_offset
)
]
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
5726e80f
...
...
@@ -52,23 +52,18 @@
/**************************************Count
NonZero**************************************/
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
,
__global
srcT
*src,
__global
dstT
*
dst
)
__kernel
void
arithm_op_nonzero
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_nonzero[128]
;
dstT
nonzero
=
(
dstT
)(
0
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
for
(
int
grain
=
groupnum
<<
8
; id < elemnum; id += grain)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
nonzero
+=
src[idx]
==
zero
?
zero
:
one
;
}
for
(
int
grain
=
groupnum
<<
8
; id < total; id += grain)
nonzero
+=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
==
(
srcT
)(
0
))
?
(
dstT
)(
0
)
:
(
dstT
)(
1
)
;
if
(
lid
>
127
)
localmem_nonzero[lid
-
128]
=
nonzero
;
...
...
modules/ocl/src/opencl/arithm_sum.cl
View file @
5726e80f
...
...
@@ -63,21 +63,19 @@
/**************************************Array
buffer
SUM**************************************/
__kernel
void
arithm_op_sum
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum
,
__global
srcT
*src,
__global
dstT
*
dst
)
__kernel
void
arithm_op_sum
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_sum[128]
;
dstT
sum
=
(
dstT
)(
0
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id <
elemnum
; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id <
total
; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
convertToDstT
(
src[idx]
)
;
temp
=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
)
;
FUNC
(
temp,
sum
)
;
}
...
...
modules/ocl/test/test_arithm.cpp
View file @
5726e80f
...
...
@@ -198,7 +198,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
Size
roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Border
src1Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
2
,
11
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
-
11
,
11
);
Border
src2Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src2
,
src2_roi
,
roiSize
,
src2Border
,
type
,
-
1540
,
1740
);
...
...
@@ -1163,7 +1163,7 @@ OCL_TEST_P(CountNonZero, MAT)
int
cpures
=
cv
::
countNonZero
(
src1_roi
);
int
gpures
=
cv
::
ocl
::
countNonZero
(
gsrc1_roi
);
EXPECT_
DOUBLE_EQ
((
double
)
cpures
,
(
double
)
gpures
);
EXPECT_
EQ
(
cpures
,
gpures
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment