Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f0049fa2
Commit
f0049fa2
authored
Dec 06, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 06, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1937 from ilya-lavrenov:tapi_integral
parents
b16f0a25
3eaa8f14
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
1218 additions
and
58 deletions
+1218
-58
mat.hpp
modules/core/include/opencv2/core/mat.hpp
+2
-0
matrix.cpp
modules/core/src/matrix.cpp
+79
-0
miscellaneous_transformations.rst
modules/imgproc/doc/miscellaneous_transformations.rst
+6
-4
imgproc.hpp
modules/imgproc/include/opencv2/imgproc.hpp
+2
-2
integral_sqrsum.cl
modules/imgproc/src/opencl/integral_sqrsum.cl
+512
-0
integral_sum.cl
modules/imgproc/src/opencl/integral_sum.cl
+413
-0
sumpixels.cpp
modules/imgproc/src/sumpixels.cpp
+155
-36
test_imgproc.cpp
modules/imgproc/test/ocl/test_imgproc.cpp
+47
-14
ImgprocTest.java
...android_test/src/org/opencv/test/imgproc/ImgprocTest.java
+2
-2
No files found.
modules/core/include/opencv2/core/mat.hpp
View file @
f0049fa2
...
...
@@ -129,6 +129,8 @@ public:
virtual
bool
isContinuous
(
int
i
=-
1
)
const
;
virtual
bool
empty
()
const
;
virtual
void
copyTo
(
const
_OutputArray
&
arr
)
const
;
virtual
size_t
offset
(
int
i
=-
1
)
const
;
virtual
size_t
step
(
int
i
=-
1
)
const
;
bool
isMat
()
const
;
bool
isUMat
()
const
;
bool
isMatVectot
()
const
;
...
...
modules/core/src/matrix.cpp
View file @
f0049fa2
...
...
@@ -1792,6 +1792,85 @@ bool _InputArray::isContinuous(int i) const
return
false
;
}
size_t
_InputArray
::
offset
(
int
i
)
const
{
int
k
=
kind
();
if
(
k
==
MAT
)
{
CV_Assert
(
i
<
0
);
const
Mat
*
const
m
=
((
const
Mat
*
)
obj
);
return
(
size_t
)(
m
->
data
-
m
->
datastart
);
}
if
(
k
==
UMAT
)
{
CV_Assert
(
i
<
0
);
return
((
const
UMat
*
)
obj
)
->
offset
;
}
if
(
k
==
EXPR
||
k
==
MATX
||
k
==
STD_VECTOR
||
k
==
NONE
||
k
==
STD_VECTOR_VECTOR
)
return
0
;
if
(
k
==
STD_VECTOR_MAT
)
{
const
std
::
vector
<
Mat
>&
vv
=
*
(
const
std
::
vector
<
Mat
>*
)
obj
;
if
(
i
<
0
)
return
1
;
CV_Assert
(
i
<
(
int
)
vv
.
size
()
);
return
(
size_t
)(
vv
[
i
].
data
-
vv
[
i
].
datastart
);
}
if
(
k
==
GPU_MAT
)
{
CV_Assert
(
i
<
0
);
const
cuda
::
GpuMat
*
const
m
=
((
const
cuda
::
GpuMat
*
)
obj
);
return
(
size_t
)(
m
->
data
-
m
->
datastart
);
}
CV_Error
(
Error
::
StsNotImplemented
,
""
);
return
0
;
}
size_t
_InputArray
::
step
(
int
i
)
const
{
int
k
=
kind
();
if
(
k
==
MAT
)
{
CV_Assert
(
i
<
0
);
return
((
const
Mat
*
)
obj
)
->
step
;
}
if
(
k
==
UMAT
)
{
CV_Assert
(
i
<
0
);
return
((
const
UMat
*
)
obj
)
->
step
;
}
if
(
k
==
EXPR
||
k
==
MATX
||
k
==
STD_VECTOR
||
k
==
NONE
||
k
==
STD_VECTOR_VECTOR
)
return
0
;
if
(
k
==
STD_VECTOR_MAT
)
{
const
std
::
vector
<
Mat
>&
vv
=
*
(
const
std
::
vector
<
Mat
>*
)
obj
;
if
(
i
<
0
)
return
1
;
CV_Assert
(
i
<
(
int
)
vv
.
size
()
);
return
vv
[
i
].
step
;
}
if
(
k
==
GPU_MAT
)
{
CV_Assert
(
i
<
0
);
return
((
const
cuda
::
GpuMat
*
)
obj
)
->
step
;
}
CV_Error
(
Error
::
StsNotImplemented
,
""
);
return
0
;
}
void
_InputArray
::
copyTo
(
const
_OutputArray
&
arr
)
const
{
int
k
=
kind
();
...
...
modules/imgproc/doc/miscellaneous_transformations.rst
View file @
f0049fa2
...
...
@@ -596,15 +596,15 @@ Calculates the integral of an image.
.. ocv:function:: void integral( InputArray src, OutputArray sum, int sdepth=-1 )
.. ocv:function:: void integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth=-1 )
.. ocv:function:: void integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth=-1
, int sqdepth=-1
)
.. ocv:function:: void integral( InputArray src, OutputArray sum, OutputArray sqsum, OutputArray tilted, int sdepth=-1 )
.. ocv:function:: void integral( InputArray src, OutputArray sum, OutputArray sqsum, OutputArray tilted, int sdepth=-1
, int sqdepth=-1
)
.. ocv:pyfunction:: cv2.integral(src[, sum[, sdepth]]) -> sum
.. ocv:pyfunction:: cv2.integral2(src[, sum[, sqsum[, sdepth]]]) -> sum, sqsum
.. ocv:pyfunction:: cv2.integral2(src[, sum[, sqsum[, sdepth
[, sqdepth]
]]]) -> sum, sqsum
.. ocv:pyfunction:: cv2.integral3(src[, sum[, sqsum[, tilted[, sdepth]]]]) -> sum, sqsum, tilted
.. ocv:pyfunction:: cv2.integral3(src[, sum[, sqsum[, tilted[, sdepth
[, sqdepth]
]]]]) -> sum, sqsum, tilted
.. ocv:cfunction:: void cvIntegral( const CvArr* image, CvArr* sum, CvArr* sqsum=NULL, CvArr* tilted_sum=NULL )
...
...
@@ -618,6 +618,8 @@ Calculates the integral of an image.
:param sdepth: desired depth of the integral and the tilted integral images, ``CV_32S``, ``CV_32F``, or ``CV_64F``.
:param sqdepth: desired depth of the integral image of squared pixel values, ``CV_32F`` or ``CV_64F``.
The functions calculate one or more integral images for the source image as follows:
.. math::
...
...
modules/imgproc/include/opencv2/imgproc.hpp
View file @
f0049fa2
...
...
@@ -1241,12 +1241,12 @@ CV_EXPORTS_W void integral( InputArray src, OutputArray sum, int sdepth = -1 );
//! computes the integral image and integral for the squared image
CV_EXPORTS_AS
(
integral2
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
=
-
1
);
OutputArray
sqsum
,
int
sdepth
=
-
1
,
int
sqdepth
=
-
1
);
//! computes the integral image, integral for the squared image and the tilted integral image
CV_EXPORTS_AS
(
integral3
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
OutputArray
tilted
,
int
sdepth
=
-
1
);
int
sdepth
=
-
1
,
int
sqdepth
=
-
1
);
//! adds image to the accumulator (dst += src). Unlike cv::add, dst and src can have different types.
CV_EXPORTS_W
void
accumulate
(
InputArray
src
,
InputOutputArray
dst
,
...
...
modules/imgproc/src/opencl/integral_sqrsum.cl
0 → 100644
View file @
f0049fa2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Shengen
Yan,yanshengen@gmail.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
if
sqdepth
==
6
#
define
CONVERT
(
step
)
((
step
)
>>1
)
#
else
#
define
CONVERT
(
step
)
((
step
))
#
endif
#
define
LSIZE
256
#
define
LSIZE_1
255
#
define
LSIZE_2
254
#
define
HF_LSIZE
128
#
define
LOG_LSIZE
8
#
define
LOG_NUM_BANKS
5
#
define
NUM_BANKS
32
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
#
define
noconvert
#
if
sdepth
==
4
kernel
void
integral_cols
(
__global
uchar4
*src,
__global
int
*sum,
__global
TYPE
*sqsum,
int
src_offset,
int
pre_invalid,
int
rows,
int
cols,
int
src_step,
int
dst_step,
int
dst1_step
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
TYPE4
sqsum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
int*
sum_p
;
__local
TYPE*
sqsum_p
;
src_step
=
src_step
>>
2
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid,
cols
-
1
)
]
)
:
0
)
;
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
cols
-
1
)
]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sqsum[0][bf_loc]
=
convert_TYPE4
(
src_t[0]
*
src_t[0]
)
;
lm_sum[1][bf_loc]
=
src_t[1]
;
lm_sqsum[1][bf_loc]
=
convert_TYPE4
(
src_t[1]
*
src_t[1]
)
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sqsum[lid
>>
7][bi]
+=
lm_sqsum[lid
>>
7][ai]
;
}
offset
<<=
1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
2
)
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
lm_sqsum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
}
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sum[lid
>>
7][ai]
=
lm_sum[lid
>>
7][bi]
-
lm_sum[lid
>>
7][ai]
;
lm_sqsum[lid
>>
7][bi]
+=
lm_sqsum[lid
>>
7][ai]
;
lm_sqsum[lid
>>
7][ai]
=
lm_sqsum[lid
>>
7][bi]
-
lm_sqsum[lid
>>
7][ai]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/4,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_sq0
=
gid
*
CONVERT
(
dst1_step
)
+
i
+
lid
-
1
-
pre_invalid
*
dst1_step
/
sizeof
(
TYPE
)
,
loc_sq1
=
loc_sq0
+
CONVERT
(
dst1_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
kernel void integral_rows(__global int4 *srcsum, __global TYPE4 * srcsqsum,__global int *sum,
__global TYPE *sqsum, int rows, int cols, int src_step, int src1_step, int sum_step,
int sqsum_step, int sum_offset, int sqsum_offset)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
TYPE4 sqsrc_t[2],sqsum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
__local TYPE *sqsum_p;
src_step = src_step >> 4;
src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
gid <<= 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid ] : (TYPE4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid + 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid + 1] : (TYPE4)0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sqsum[0][bf_loc] = sqsrc_t[0];
lm_sum[1][bf_loc] = src_t[1];
lm_sqsum[1][bf_loc] = sqsrc_t[1];
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
{
barrier(CLK_LOCAL_MEM_FENCE);
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
}
offset <<= 1;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
}
for(int d = 1; d < LSIZE; d <<= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
offset >>= 1;
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows)
{
sum[sum_offset + i + lid] = 0;
sqsum[sqsum_offset + i + lid] = 0;
}
if(i + lid == 0)
{
int loc0 = gid * sum_step;
int loc1 = gid * CONVERT(sqsum_step);
for(int k = 1; k <= 8; k++)
{
if(gid * 4 + k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
}
}
int loc_s0 = sum_offset + gid * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + 4 + k >= cols) break;
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
#elif sdepth == 5
kernel void integral_cols(__global uchar4 *src, __global float *sum, __global TYPE *sqsum,
int src_offset, int pre_invalid, int rows, int cols, int src_step, int dst_step, int dst1_step)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
TYPE4 sqsum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local float* sum_p;
__local TYPE* sqsum_p;
src_step = src_step >> 2;
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid, cols - 1)]) : (float4)0);
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
// printf("%f\n", src_t[0].s0);
lm_sum[1][bf_loc] = src_t[1];
lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
{
barrier(CLK_LOCAL_MEM_FENCE);
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
}
offset <<= 1;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
}
for(int d = 1; d < LSIZE; d <<= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
offset >>= 1;
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
if(lid > 0 && (i+lid) <= rows)
{
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq0
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq1
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
kernel
void
integral_rows
(
__global
float4
*srcsum,
__global
TYPE4
*
srcsqsum,
__global
float
*sum
,
__global
TYPE
*sqsum,
int
rows,
int
cols,
int
src_step,
int
src1_step,
int
sum_step,
int
sqsum_step,
int
sum_offset,
int
sqsum_offset
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
TYPE4
sqsrc_t[2],sqsum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
__local
TYPE
*sqsum_p
;
src_step
=
src_step
>>
4
;
src1_step
=
(
src1_step
/
sizeof
(
TYPE
))
>>
2
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src1_step
+
gid
*
2]
:
(
TYPE4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src1_step
+
gid
*
2
+
1]
:
(
TYPE4
)
0
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sqsum[0][bf_loc]
=
sqsrc_t[0]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
lm_sqsum[1][bf_loc]
=
sqsrc_t[1]
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sqsum[lid
>>
7][bi]
+=
lm_sqsum[lid
>>
7][ai]
;
}
offset
<<=
1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
2
)
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
lm_sqsum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
}
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sum[lid
>>
7][ai]
=
lm_sum[lid
>>
7][bi]
-
lm_sum[lid
>>
7][ai]
;
lm_sqsum[lid
>>
7][bi]
+=
lm_sqsum[lid
>>
7][ai]
;
lm_sqsum[lid
>>
7][ai]
=
lm_sqsum[lid
>>
7][bi]
-
lm_sqsum[lid
>>
7][ai]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gid
==
0
&&
(
i
+
lid
)
<=
rows
)
{
sum[sum_offset
+
i
+
lid]
=
0
;
sqsum[sqsum_offset
+
i
+
lid]
=
0
;
}
if
(
i
+
lid
==
0
)
{
int
loc0
=
gid
*
2
*
sum_step
;
int
loc1
=
gid
*
2
*
CONVERT
(
sqsum_step
)
;
for
(
int
k
=
1
; k <= 8; k++)
{
if
(
gid
*
8
+
k
>
cols
)
break
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
0
;
}
}
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
CONVERT
(
sqsum_step
)
+
sqsum_step
/
sizeof
(
TYPE
)
+
i
+
lid,
loc_sq1
=
loc_sq0
+
CONVERT
(
sqsum_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
endif
modules/imgproc/src/opencl/integral_sum.cl
0 → 100644
View file @
f0049fa2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Shengen
Yan,yanshengen@gmail.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
LSIZE
256
#
define
LSIZE_1
255
#
define
LSIZE_2
254
#
define
HF_LSIZE
128
#
define
LOG_LSIZE
8
#
define
LOG_NUM_BANKS
5
#
define
NUM_BANKS
32
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
#
if
sdepth
==
4
kernel
void
integral_sum_cols
(
__global
uchar4
*src,
__global
int
*sum,
int
src_offset,
int
pre_invalid,
int
rows,
int
cols,
int
src_step,
int
dst_step
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
int*
sum_p
;
src_step
=
src_step
>>
2
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
gid]
)
:
0
)
;
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
gid
+
1]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
}
offset
<<=
1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
2
)
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
}
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sum[lid
>>
7][ai]
=
lm_sum[lid
>>
7][bi]
-
lm_sum[lid
>>
7][ai]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/
4
,
loc_s1
=
loc_s0
+
dst_step
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
kernel void integral_sum_rows(__global int4 *srcsum, __global int *sum,
int rows, int cols, int src_step, int sum_step, int sum_offset)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
src_step = src_step >> 4;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : 0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : 0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sum[1][bf_loc] = src_t[1];
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
{
barrier(CLK_LOCAL_MEM_FENCE);
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
}
offset <<= 1;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
}
for(int d = 1; d < LSIZE; d <<= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
offset >>= 1;
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows)
{
sum[sum_offset + i + lid] = 0;
}
if(i + lid == 0)
{
int loc0 = gid * 2 * sum_step;
for(int k = 1; k <= 8; k++)
{
if(gid * 8 + k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
}
}
if(lid > 0 && (i+lid) <= rows)
{
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 8 + k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 8 + 4 + k >= cols) break;
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}
#elif sdepth == 5
kernel void integral_sum_cols(__global uchar4 *src, __global float *sum,
int src_offset, int pre_invalid, int rows, int cols, int src_step, int dst_step)
{
int lid = get_local_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local float* sum_p;
src_step = src_step >> 2;
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid]) : (float4)0);
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid + 1]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sum[1][bf_loc] = src_t[1];
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
{
barrier(CLK_LOCAL_MEM_FENCE);
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
}
offset <<= 1;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 2)
{
lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
}
for(int d = 1; d < LSIZE; d <<= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
offset >>= 1;
int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
ai += GET_CONFLICT_OFFSET(ai);
bi += GET_CONFLICT_OFFSET(bi);
if((lid & 127) < d)
{
lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid > 0 && (i+lid) <= rows)
{
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
for(int k = 0; k < 4; k++)
{
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
kernel
void
integral_sum_rows
(
__global
float4
*srcsum,
__global
float
*sum,
int
rows,
int
cols,
int
src_step,
int
sum_step,
int
sum_offset
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
src_step
=
src_step
>>
4
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
}
offset
<<=
1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
2
)
{
lm_sum[lid][LSIZE_2
+
LOG_LSIZE]
=
0
;
}
for
(
int
d
=
1
; d < LSIZE; d <<= 1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
offset
>>=
1
;
int
ai
=
offset
*
(((
lid
&
127
)
<<1
)
+1
)
-
1
,
bi
=
ai
+
offset
;
ai
+=
GET_CONFLICT_OFFSET
(
ai
)
;
bi
+=
GET_CONFLICT_OFFSET
(
bi
)
;
if
((
lid
&
127
)
<
d
)
{
lm_sum[lid
>>
7][bi]
+=
lm_sum[lid
>>
7][ai]
;
lm_sum[lid
>>
7][ai]
=
lm_sum[lid
>>
7][bi]
-
lm_sum[lid
>>
7][ai]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gid
==
0
&&
(
i
+
lid
)
<=
rows
)
{
sum[sum_offset
+
i
+
lid]
=
0
;
}
if
(
i
+
lid
==
0
)
{
int
loc0
=
gid
*
2
*
sum_step
;
for
(
int
k
=
1
; k <= 8; k++)
{
if
(
gid
*
8
+
k
>
cols
)
break
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
}
}
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[1][bf_loc]
+=
sum_t[1]
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
{
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
endif
modules/imgproc/src/sumpixels.cpp
View file @
f0049fa2
...
...
@@ -41,6 +41,8 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
static
IppStatus
sts
=
ippInit
();
#endif
...
...
@@ -215,28 +217,139 @@ static void integral_##suffix( T* src, size_t srcstep, ST* sum, size_t sumstep,
{ integral_(src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tiltedstep, size, cn); }
DEF_INTEGRAL_FUNC
(
8u32
s
,
uchar
,
int
,
double
)
DEF_INTEGRAL_FUNC
(
8u32
f
,
uchar
,
float
,
double
)
DEF_INTEGRAL_FUNC
(
8u64
f
,
uchar
,
double
,
double
)
DEF_INTEGRAL_FUNC
(
32
f
,
float
,
float
,
double
)
DEF_INTEGRAL_FUNC
(
32
f64f
,
float
,
double
,
double
)
DEF_INTEGRAL_FUNC
(
64
f
,
double
,
double
,
double
)
DEF_INTEGRAL_FUNC
(
8u32
f64f
,
uchar
,
float
,
double
)
DEF_INTEGRAL_FUNC
(
8u64
f64f
,
uchar
,
double
,
double
)
DEF_INTEGRAL_FUNC
(
32
f32f64f
,
float
,
float
,
double
)
DEF_INTEGRAL_FUNC
(
32
f64f64f
,
float
,
double
,
double
)
DEF_INTEGRAL_FUNC
(
64
f64f64f
,
double
,
double
,
double
)
DEF_INTEGRAL_FUNC
(
8u32
s32f
,
uchar
,
int
,
float
)
DEF_INTEGRAL_FUNC
(
8u32
f32f
,
uchar
,
float
,
float
)
DEF_INTEGRAL_FUNC
(
32
f32f32f
,
float
,
float
,
float
)
typedef
void
(
*
IntegralFunc
)(
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
Size
size
,
int
cn
);
}
enum
{
vlen
=
4
};
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
if
(
_src
.
type
()
!=
CV_8UC1
||
_src
.
step
()
%
vlen
!=
0
||
_src
.
offset
()
%
vlen
!=
0
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
)
)
return
false
;
ocl
::
Kernel
k1
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
format
(
"-D sdepth=%d"
,
sdepth
));
if
(
k1
.
empty
())
return
false
;
Size
size
=
_src
.
size
(),
t_size
=
Size
(((
size
.
height
+
vlen
-
1
)
/
vlen
)
*
vlen
,
size
.
width
),
ssize
(
size
.
width
+
1
,
size
.
height
+
1
);
_sum
.
create
(
ssize
,
sdepth
);
UMat
src
=
_src
.
getUMat
(),
t_sum
(
t_size
,
sdepth
),
sum
=
_sum
.
getUMat
();
t_sum
=
t_sum
(
Range
::
all
(),
Range
(
0
,
size
.
height
));
int
offset
=
src
.
offset
/
vlen
,
pre_invalid
=
src
.
offset
%
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
int
sum_offset
=
sum
.
offset
/
vlen
;
k1
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
ocl
::
KernelArg
::
PtrWriteOnly
(
t_sum
),
offset
,
pre_invalid
,
src
.
rows
,
src
.
cols
,
(
int
)
src
.
step
,
(
int
)
t_sum
.
step
);
size_t
gt
=
((
vcols
+
1
)
/
2
)
*
256
,
lt
=
256
;
if
(
!
k1
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
k2
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
format
(
"-D sdepth=%d"
,
sdepth
));
k2
.
args
(
ocl
::
KernelArg
::
PtrReadWrite
(
t_sum
),
ocl
::
KernelArg
::
PtrWriteOnly
(
sum
),
t_sum
.
rows
,
t_sum
.
cols
,
(
int
)
t_sum
.
step
,
(
int
)
sum
.
step
,
sum_offset
);
size_t
gt2
=
t_sum
.
cols
*
32
,
lt2
=
256
;
return
k2
.
run
(
1
,
&
gt2
,
&
lt2
,
false
);
}
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
s
depth
)
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sq
depth
)
{
Mat
src
=
_src
.
getMat
(),
sum
,
sqsum
,
tilted
;
int
depth
=
src
.
depth
(),
cn
=
src
.
channels
();
Size
isize
(
src
.
cols
+
1
,
src
.
rows
+
1
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
_src
.
step
()
%
vlen
!=
0
||
_src
.
offset
()
%
vlen
!=
0
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
char
cvt
[
40
];
String
opts
=
format
(
"-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s"
,
sdepth
,
sqdepth
,
ocl
::
typeToStr
(
sqdepth
),
ocl
::
typeToStr
(
sqdepth
),
ocl
::
convertTypeStr
(
sdepth
,
sqdepth
,
4
,
cvt
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k1
(
"integral_cols"
,
ocl
::
imgproc
::
integral_sqrsum_oclsrc
,
opts
);
if
(
k1
.
empty
())
return
false
;
Size
size
=
_src
.
size
(),
dsize
=
Size
(
size
.
width
+
1
,
size
.
height
+
1
),
t_size
=
Size
(((
size
.
height
+
vlen
-
1
)
/
vlen
)
*
vlen
,
size
.
width
);
UMat
src
=
_src
.
getUMat
(),
t_sum
(
t_size
,
sdepth
),
t_sqsum
(
t_size
,
sqdepth
);
t_sum
=
t_sum
(
Range
::
all
(),
Range
(
0
,
size
.
height
));
t_sqsum
=
t_sqsum
(
Range
::
all
(),
Range
(
0
,
size
.
height
));
_sum
.
create
(
dsize
,
sdepth
);
_sqsum
.
create
(
dsize
,
sqdepth
);
UMat
sum
=
_sum
.
getUMat
(),
sqsum
=
_sqsum
.
getUMat
();
int
offset
=
src
.
offset
/
vlen
;
int
pre_invalid
=
src
.
offset
%
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
int
sum_offset
=
sum
.
offset
/
sum
.
elemSize
();
int
sqsum_offset
=
sqsum
.
offset
/
sqsum
.
elemSize
();
CV_Assert
(
sqsum
.
offset
%
sqsum
.
elemSize
()
==
0
);
k1
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
ocl
::
KernelArg
::
PtrWriteOnly
(
t_sum
),
ocl
::
KernelArg
::
PtrWriteOnly
(
t_sqsum
),
offset
,
pre_invalid
,
src
.
rows
,
src
.
cols
,
(
int
)
src
.
step
,
(
int
)
t_sum
.
step
,
(
int
)
t_sqsum
.
step
);
size_t
gt
=
((
vcols
+
1
)
/
2
)
*
256
,
lt
=
256
;
if
(
!
k1
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
k2
(
"integral_rows"
,
ocl
::
imgproc
::
integral_sqrsum_oclsrc
,
opts
);
if
(
k2
.
empty
())
return
false
;
k2
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
t_sum
),
ocl
::
KernelArg
::
PtrReadOnly
(
t_sqsum
),
ocl
::
KernelArg
::
PtrWriteOnly
(
sum
),
ocl
::
KernelArg
::
PtrWriteOnly
(
sqsum
),
t_sum
.
rows
,
t_sum
.
cols
,
(
int
)
t_sum
.
step
,
(
int
)
t_sqsum
.
step
,
(
int
)
sum
.
step
,
(
int
)
sqsum
.
step
,
sum_offset
,
sqsum_offset
);
size_t
gt2
=
t_sum
.
cols
*
32
,
lt2
=
256
;
return
k2
.
run
(
1
,
&
gt2
,
&
lt2
,
false
);
}
}
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
sdepth
<=
0
)
sdepth
=
depth
==
CV_8U
?
CV_32S
:
CV_64F
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
if
(
sqdepth
<=
0
)
sqdepth
=
CV_64F
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
),
sqdepth
=
CV_MAT_DEPTH
(
sqdepth
);
if
(
ocl
::
useOpenCL
()
&&
_sum
.
isUMat
()
&&
!
_tilted
.
needed
())
{
if
(
!
_sqsum
.
needed
())
{
if
(
ocl_integral
(
_src
,
_sum
,
sdepth
))
return
;
}
else
if
(
_sqsum
.
isUMat
())
{
if
(
ocl_integral
(
_src
,
_sum
,
_sqsum
,
sdepth
,
sqdepth
))
return
;
}
}
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
if
(
(
depth
==
CV_8U
)
&&
(
!
_tilted
.
needed
()
)
)
...
...
@@ -248,9 +361,9 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
IppiSize
srcRoiSize
=
ippiSize
(
src
.
cols
,
src
.
rows
);
_sum
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
sum
=
_sum
.
getMat
();
if
(
_sqsum
.
needed
()
)
if
(
_sqsum
.
needed
()
&&
sqdepth
==
CV_64F
)
{
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
CV_64F
,
cn
)
);
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
sqdepth
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
ippiSqrIntegral_8u32f64f_C1R
(
(
const
Ipp8u
*
)
src
.
data
,
(
int
)
src
.
step
,
(
Ipp32f
*
)
sum
.
data
,
(
int
)
sum
.
step
,
(
Ipp64f
*
)
sqsum
.
data
,
(
int
)
sqsum
.
step
,
srcRoiSize
,
0
,
0
);
}
...
...
@@ -268,9 +381,9 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
IppiSize
srcRoiSize
=
ippiSize
(
src
.
cols
,
src
.
rows
);
_sum
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
sum
=
_sum
.
getMat
();
if
(
_sqsum
.
needed
()
)
if
(
_sqsum
.
needed
()
&&
sqdepth
==
CV_64F
)
{
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
CV_64F
,
cn
)
);
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
sqdepth
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
ippiSqrIntegral_8u32s64f_C1R
(
(
const
Ipp8u
*
)
src
.
data
,
(
int
)
src
.
step
,
(
Ipp32s
*
)
sum
.
data
,
(
int
)
sum
.
step
,
(
Ipp64f
*
)
sqsum
.
data
,
(
int
)
sqsum
.
step
,
srcRoiSize
,
0
,
0
);
}
...
...
@@ -284,35 +397,41 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
}
#endif
Size
ssize
=
_src
.
size
(),
isize
(
ssize
.
width
+
1
,
ssize
.
height
+
1
);
_sum
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
sum
=
_sum
.
getMat
()
;
Mat
src
=
_src
.
getMat
(),
sum
=
_sum
.
getMat
(),
sqsum
,
tilted
;
if
(
_
tilted
.
needed
()
)
if
(
_
sqsum
.
needed
()
)
{
_
tilted
.
create
(
isize
,
CV_MAKETYPE
(
s
depth
,
cn
)
);
tilted
=
_tilted
.
getMat
();
_
sqsum
.
create
(
isize
,
CV_MAKETYPE
(
sq
depth
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
}
if
(
_
sqsum
.
needed
()
)
if
(
_
tilted
.
needed
()
)
{
_
sqsum
.
create
(
isize
,
CV_MAKETYPE
(
CV_64F
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
_
tilted
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
tilted
=
_tilted
.
getMat
();
}
IntegralFunc
func
=
0
;
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
GET_OPTIMIZED
(
integral_8u32s
);
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
)
func
=
(
IntegralFunc
)
integral_8u32f
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_8u64f
;
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_32F
)
func
=
(
IntegralFunc
)
integral_32f
;
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_32f64f
;
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_64f
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32F
)
func
=
(
IntegralFunc
)
integral_8u32s32f
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_8u32f64f
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_32F
)
func
=
(
IntegralFunc
)
integral_8u32f32f
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_8u64f64f
;
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_32f32f64f
;
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_32F
)
func
=
(
IntegralFunc
)
integral_32f32f32f
;
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_32f64f64f
;
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
func
=
(
IntegralFunc
)
integral_64f64f64f
;
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
...
...
@@ -325,9 +444,9 @@ void cv::integral( InputArray src, OutputArray sum, int sdepth )
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
)
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
);
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
...
...
modules/imgproc/test/ocl/test_imgproc.cpp
View file @
f0049fa2
...
...
@@ -271,15 +271,50 @@ OCL_TEST_P(CornerHarris, DISABLED_Mat)
struct
Integral
:
public
ImgprocTestBase
{
int
sdepth
;
int
sdepth
,
sqdepth
;
TEST_DECLARE_OUTPUT_PARAMETER
(
dst2
)
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
blockSize
=
GET_PARAM
(
1
);
sdepth
=
GET_PARAM
(
2
);
sdepth
=
GET_PARAM
(
1
);
s
q
depth
=
GET_PARAM
(
2
);
useRoi
=
GET_PARAM
(
3
);
}
virtual
void
random_roi
()
{
ASSERT_EQ
(
CV_MAT_CN
(
type
),
1
);
Size
roiSize
=
randomSize
(
1
,
MAX_VALUE
),
isize
=
Size
(
roiSize
.
width
+
1
,
roiSize
.
height
+
1
);
Border
srcBorder
=
randomBorder
(
0
,
useRoi
?
2
:
0
);
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
5
,
256
);
Border
dstBorder
=
randomBorder
(
0
,
useRoi
?
2
:
0
);
randomSubMat
(
dst
,
dst_roi
,
isize
,
dstBorder
,
sdepth
,
5
,
16
);
Border
dst2Border
=
randomBorder
(
0
,
useRoi
?
2
:
0
);
randomSubMat
(
dst2
,
dst2_roi
,
isize
,
dst2Border
,
sqdepth
,
5
,
16
);
UMAT_UPLOAD_INPUT_PARAMETER
(
src
)
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
)
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst2
)
}
void
Near2
(
double
threshold
=
0.0
,
bool
relative
=
false
)
{
if
(
relative
)
{
EXPECT_MAT_NEAR_RELATIVE
(
dst2
,
udst2
,
threshold
);
EXPECT_MAT_NEAR_RELATIVE
(
dst2_roi
,
udst2_roi
,
threshold
);
}
else
{
EXPECT_MAT_NEAR
(
dst2
,
udst2
,
threshold
);
EXPECT_MAT_NEAR
(
dst2_roi
,
udst2_roi
,
threshold
);
}
}
};
OCL_TEST_P
(
Integral
,
Mat1
)
...
...
@@ -297,19 +332,15 @@ OCL_TEST_P(Integral, Mat1)
OCL_TEST_P
(
Integral
,
Mat2
)
{
Mat
dst1
;
UMat
udst1
;
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
random_roi
();
OCL_OFF
(
cv
::
integral
(
src_roi
,
dst_roi
,
dst
1
,
s
depth
));
OCL_ON
(
cv
::
integral
(
usrc_roi
,
udst_roi
,
udst
1
,
s
depth
));
OCL_OFF
(
cv
::
integral
(
src_roi
,
dst_roi
,
dst
2_roi
,
sdepth
,
sq
depth
));
OCL_ON
(
cv
::
integral
(
usrc_roi
,
udst_roi
,
udst
2_roi
,
sdepth
,
sq
depth
));
Near
();
if
(
cv
::
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
)
EXPECT_MAT_NEAR
(
dst1
,
udst1
,
0.
);
sqdepth
==
CV_32F
?
Near2
(
1e-6
,
true
)
:
Near2
();
}
}
...
...
@@ -412,19 +443,21 @@ OCL_INSTANTIATE_TEST_CASE_P(Imgproc, EqualizeHist, Combine(
OCL_INSTANTIATE_TEST_CASE_P
(
Imgproc
,
CornerMinEigenVal
,
Combine
(
Values
((
MatType
)
CV_8UC1
,
(
MatType
)
CV_32FC1
),
Values
(
3
,
5
),
Values
((
int
)
BORDER_CONSTANT
,
(
int
)
BORDER_REPLICATE
,
(
int
)
BORDER_REFLECT
,
(
int
)
BORDER_REFLECT101
),
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
(
BorderType
)
BORDER_REFLECT101
),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Imgproc
,
CornerHarris
,
Combine
(
Values
((
MatType
)
CV_8UC1
,
CV_32FC1
),
Values
(
3
,
5
),
Values
(
(
int
)
BORDER_CONSTANT
,
(
int
)
BORDER_REPLICATE
,
(
int
)
BORDER_REFLECT
,
(
int
)
BORDER_REFLECT_101
),
Values
(
(
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
(
BorderType
)
BORDER_REFLECT_101
),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Integral
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
// TODO does not work with CV_32F, CV_64F
Values
(
0
),
// not used
Values
(
(
MatType
)
CV_32SC1
,
(
MatType
)
CV_32FC1
),
Values
(
CV_32SC1
,
CV_32FC1
),
// desired sdepth
Values
(
CV_32FC1
,
CV_64FC1
),
// desired sqdepth
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Threshold
,
Combine
(
...
...
modules/java/android_test/src/org/opencv/test/imgproc/ImgprocTest.java
View file @
f0049fa2
...
...
@@ -1225,7 +1225,7 @@ public class ImgprocTest extends OpenCVTestCase {
expSqsum
.
put
(
2
,
0
,
0
,
18
,
36
,
54
);
expSqsum
.
put
(
3
,
0
,
0
,
27
,
54
,
81
);
Imgproc
.
integral2
(
src
,
sum
,
sqsum
,
CvType
.
CV_64F
);
Imgproc
.
integral2
(
src
,
sum
,
sqsum
,
CvType
.
CV_64F
,
CvType
.
CV_64F
);
assertMatEqual
(
expSum
,
sum
,
EPS
);
assertMatEqual
(
expSqsum
,
sqsum
,
EPS
);
...
...
@@ -1274,7 +1274,7 @@ public class ImgprocTest extends OpenCVTestCase {
expTilted
.
put
(
0
,
0
,
0
,
0
);
expTilted
.
put
(
1
,
0
,
0
,
1
);
Imgproc
.
integral3
(
src
,
sum
,
sqsum
,
tilted
,
CvType
.
CV_64F
);
Imgproc
.
integral3
(
src
,
sum
,
sqsum
,
tilted
,
CvType
.
CV_64F
,
CvType
.
CV_64F
);
assertMatEqual
(
expSum
,
sum
,
EPS
);
assertMatEqual
(
expSqsum
,
sqsum
,
EPS
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment