Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
09b3383a
Commit
09b3383a
authored
Jan 17, 2020
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
imgproc: dispatch sumpixels (integral)
parent
b4316af8
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
191 additions
and
770 deletions
+191
-770
CMakeLists.txt
modules/imgproc/CMakeLists.txt
+1
-0
sumpixels.avx512_skx.hpp
modules/imgproc/src/sumpixels.avx512_skx.hpp
+8
-11
sumpixels.dispatch.cpp
modules/imgproc/src/sumpixels.dispatch.cpp
+143
-316
sumpixels.hpp
modules/imgproc/src/sumpixels.hpp
+0
-25
sumpixels.simd.hpp
modules/imgproc/src/sumpixels.simd.hpp
+39
-418
No files found.
modules/imgproc/CMakeLists.txt
View file @
09b3383a
...
...
@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file
(
median_blur SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
morph SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
smooth SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
sumpixels SSE2 AVX2 AVX512_SKX
)
ocv_add_dispatched_file
(
undistort SSE2 AVX2
)
ocv_define_module
(
imgproc opencv_core WRAP java python js
)
modules/imgproc/src/sumpixels.avx512_skx.hpp
View file @
09b3383a
...
...
@@ -2,14 +2,13 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#include "precomp.hpp"
#include "sumpixels.hpp"
// Copyright (C) 2019-2020, Intel Corporation, all rights reserved.
#include "opencv2/core/hal/intrin.hpp"
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
namespace
cv
{
namespace
{
// Anonymous namespace to avoid exposing the implementation classes
//
...
...
@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
}
// end of anonymous namespace
namespace
opt_AVX512_SKX
{
// This is the implementation for the external callers interface entry point.
// It should be the only function called into this file from outside
// Any new implementations should be directed from here
static
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION
();
switch
(
cn
){
case
1
:
{
IntegralCalculator
<
1
>
calculator
;
...
...
@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
}
}
// end namespace opt_AVX512_SXK
}
// end namespace cv
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
}
// end namespace cv::hal
modules/imgproc/src/sumpixels.dispatch.cpp
View file @
09b3383a
...
...
@@ -10,7 +10,7 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
...
...
@@ -44,210 +44,157 @@
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
{
#include "sumpixels.simd.hpp"
#include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
#ifdef HAVE_OPENCL
template
<
typename
T
,
typename
ST
,
typename
QT
>
struct
Integral_SIMD
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
bool
operator
()(
const
T
*
,
size_t
,
ST
*
,
size_t
,
QT
*
,
size_t
,
ST
*
,
size_t
,
int
,
int
,
int
)
const
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
}
};
static
const
int
tileSize
=
16
;
template
<>
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
Integral_SIMD
()
{}
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
)
;
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
double
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
const
{
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
}
};
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
#if CV_SIMD && CV_SIMD_WIDTH <= 64
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
template
<>
struct
Integral_SIMD
<
uchar
,
int
,
double
>
{
Integral_SIMD
()
{
}
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
int
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
int
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
// the first iteration
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
))
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
// the others
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
int
*
prev_sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
int
*
sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
static
const
int
tileSize
=
16
;
sum_row
[
-
1
]
=
0
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
v_int32
prev
=
vx_setzero_s32
();
int
j
=
0
;
for
(
;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_int32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
)),
prev
.
val
);
el4h
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
)),
_mm256_permutevar8x32_epi32
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_epi32
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_expand
(
el8
,
el4l
,
el4h
);
el4l
+=
prev
;
el4h
+=
el4l
;
prev
=
v_broadcast_element
<
v_int32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_int32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_int32
::
nlanes
));
}
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
vx_cleanup
();
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
return
true
;
}
}
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
)
;
if
(
krows
.
empty
())
return
false
;
template
<>
struct
Integral_SIMD
<
uchar
,
float
,
double
>
{
Integral_SIMD
()
{}
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
float
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
float
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
// the first iteration
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
#endif // HAVE_OPENCL
// the others
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
float
*
prev_sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
float
*
sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
#ifdef HAVE_IPP
sum_row
[
-
1
]
=
0
;
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
v_float32
prev
=
vx_setzero_f32
();
int
j
=
0
;
for
(;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_float32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
))),
prev
.
val
);
el4h
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
))),
_mm256_permutevar8x32_ps
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_ps
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_int32
el4li
,
el4hi
;
v_expand
(
el8
,
el4li
,
el4hi
);
el4l
=
v_cvt_f32
(
el4li
)
+
prev
;
el4h
=
v_cvt_f32
(
el4hi
)
+
el4l
;
prev
=
v_broadcast_element
<
v_float32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_float32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_float32
::
nlanes
));
}
IppiSize
size
=
{
width
,
height
};
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
vx_cleanup
();
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
return
true
;
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
};
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
#endif
#endif
// HAVE_IPP
template
<
typename
T
,
typename
ST
,
typename
QT
>
namespace
hal
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
static
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
{
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
...
...
@@ -401,157 +348,36 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
}
}
#ifdef HAVE_OPENCL
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
static
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
integral_SIMD
,
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
),
CV_CPU_DISPATCH_MODES_ALL
);
}
#if defined(HAVE_IPP)
namespace
cv
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
}
#endif
namespace
cv
{
namespace
hal
{
CV_INSTRUMENT_REGION
();
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
if
(
integral_SIMD
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
))
return
;
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
...
...
@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
CV_Error
(
Error
::
StsUnsupportedFormat
,
""
);
#undef ONE_CALL
}
}
}
// cv::hal::
}
// namespace hal
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
...
...
@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
src
.
cols
,
src
.
rows
,
cn
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
}
// namespace
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
...
...
modules/imgproc/src/sumpixels.hpp
deleted
100644 → 0
View file @
b4316af8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
namespace
cv
{
namespace
opt_AVX512_SKX
{
#if CV_TRY_AVX512_SKX
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
);
#endif
}
// end namespace opt_AVX512_SKX
}
// end namespace cv
#endif
modules/imgproc/src/sumpixels.simd.hpp
View file @
09b3383a
...
...
@@ -10,7 +10,7 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
...
...
@@ -41,13 +41,26 @@
//
//M*/
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
{
#if CV_AVX512_SKX
#include "sumpixels.avx512_skx.hpp"
#endif
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
namespace
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
struct
Integral_SIMD
...
...
@@ -62,7 +75,7 @@ struct Integral_SIMD
}
};
#if CV_AVX512_SKX
template
<>
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
Integral_SIMD
()
{};
...
...
@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> {
double
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
const
{
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
if
(
!
tilted
&&
(
cn
<=
4
))
{
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
return
false
;
}
};
#endif
#if CV_SIMD && CV_SIMD_WIDTH <= 64
...
...
@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double>
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
vx_cleanup
();
return
true
;
}
};
...
...
@@ -226,333 +232,26 @@ struct Integral_SIMD<uchar, float, double>
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
vx_cleanup
();
return
true
;
}
};
#endif
template
<
typename
T
,
typename
ST
,
typename
QT
>
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
{
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
}
// namespace anon
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
int
sqsumstep
=
(
int
)(
_sqsumstep
/
sizeof
(
QT
));
width
*=
cn
;
memset
(
sum
,
0
,
(
width
+
cn
)
*
sizeof
(
sum
[
0
]));
sum
+=
sumstep
+
cn
;
if
(
sqsum
)
{
memset
(
sqsum
,
0
,
(
width
+
cn
)
*
sizeof
(
sqsum
[
0
]));
sqsum
+=
sqsumstep
+
cn
;
}
if
(
tilted
)
{
memset
(
tilted
,
0
,
(
width
+
cn
)
*
sizeof
(
tilted
[
0
]));
tilted
+=
tiltedstep
+
cn
;
}
if
(
sqsum
==
0
&&
tilted
==
0
)
{
for
(
y
=
0
;
y
<
height
;
y
++
,
src
+=
srcstep
-
cn
,
sum
+=
sumstep
-
cn
)
{
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
)
{
ST
s
=
sum
[
-
cn
]
=
0
;
for
(
x
=
0
;
x
<
width
;
x
+=
cn
)
{
s
+=
src
[
x
];
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
}
}
}
}
else
if
(
tilted
==
0
)
{
for
(
y
=
0
;
y
<
height
;
y
++
,
src
+=
srcstep
-
cn
,
sum
+=
sumstep
-
cn
,
sqsum
+=
sqsumstep
-
cn
)
{
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
sqsum
++
)
{
ST
s
=
sum
[
-
cn
]
=
0
;
QT
sq
=
sqsum
[
-
cn
]
=
0
;
for
(
x
=
0
;
x
<
width
;
x
+=
cn
)
{
T
it
=
src
[
x
];
s
+=
it
;
sq
+=
(
QT
)
it
*
it
;
ST
t
=
sum
[
x
-
sumstep
]
+
s
;
QT
tq
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
sum
[
x
]
=
t
;
sqsum
[
x
]
=
tq
;
}
}
}
}
else
{
AutoBuffer
<
ST
>
_buf
(
width
+
cn
);
ST
*
buf
=
_buf
.
data
();
ST
s
;
QT
sq
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
tilted
++
,
buf
++
)
{
sum
[
-
cn
]
=
tilted
[
-
cn
]
=
0
;
for
(
x
=
0
,
s
=
0
,
sq
=
0
;
x
<
width
;
x
+=
cn
)
{
T
it
=
src
[
x
];
buf
[
x
]
=
tilted
[
x
]
=
it
;
s
+=
it
;
sq
+=
(
QT
)
it
*
it
;
sum
[
x
]
=
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sq
;
}
if
(
width
==
cn
)
buf
[
cn
]
=
0
;
if
(
sqsum
)
{
sqsum
[
-
cn
]
=
0
;
sqsum
++
;
}
}
for
(
y
=
1
;
y
<
height
;
y
++
)
{
src
+=
srcstep
-
cn
;
sum
+=
sumstep
-
cn
;
tilted
+=
tiltedstep
-
cn
;
buf
+=
-
cn
;
if
(
sqsum
)
sqsum
+=
sqsumstep
-
cn
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
tilted
++
,
buf
++
)
{
T
it
=
src
[
0
];
ST
t0
=
s
=
it
;
QT
tq0
=
sq
=
(
QT
)
it
*
it
;
sum
[
-
cn
]
=
0
;
if
(
sqsum
)
sqsum
[
-
cn
]
=
0
;
tilted
[
-
cn
]
=
tilted
[
-
tiltedstep
];
sum
[
0
]
=
sum
[
-
sumstep
]
+
t0
;
if
(
sqsum
)
sqsum
[
0
]
=
sqsum
[
-
sqsumstep
]
+
tq0
;
tilted
[
0
]
=
tilted
[
-
tiltedstep
]
+
t0
+
buf
[
cn
];
for
(
x
=
cn
;
x
<
width
-
cn
;
x
+=
cn
)
{
ST
t1
=
buf
[
x
];
buf
[
x
-
cn
]
=
t1
+
t0
;
t0
=
it
=
src
[
x
];
tq0
=
(
QT
)
it
*
it
;
s
+=
t0
;
sq
+=
tq0
;
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
t1
+=
buf
[
x
+
cn
]
+
t0
+
tilted
[
x
-
tiltedstep
-
cn
];
tilted
[
x
]
=
t1
;
}
if
(
width
>
cn
)
{
ST
t1
=
buf
[
x
];
buf
[
x
-
cn
]
=
t1
+
t0
;
t0
=
it
=
src
[
x
];
tq0
=
(
QT
)
it
*
it
;
s
+=
t0
;
sq
+=
tq0
;
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
tilted
[
x
]
=
t0
+
t1
+
tilted
[
x
-
tiltedstep
-
cn
];
buf
[
x
]
=
t0
;
}
if
(
sqsum
)
sqsum
++
;
}
}
}
}
#ifdef HAVE_OPENCL
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
}
#if defined(HAVE_IPP)
namespace
cv
{
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
}
#endif
namespace
cv
{
namespace
hal
{
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
CV_INSTRUMENT_REGION
();
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
#define ONE_CALL(T, ST, QT) \
return Integral_SIMD<T, ST, QT>()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
uchar
,
int
,
double
);
...
...
@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
)
;
return
false
;
#undef ONE_CALL
}
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
}}
// cv::hal::
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
sdepth
<=
0
)
sdepth
=
depth
==
CV_8U
?
CV_32S
:
CV_64F
;
if
(
sqdepth
<=
0
)
sqdepth
=
CV_64F
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
),
sqdepth
=
CV_MAT_DEPTH
(
sqdepth
);
CV_OCL_RUN
(
_sum
.
isUMat
()
&&
!
_tilted
.
needed
(),
(
_sqsum
.
needed
()
?
ocl_integral
(
_src
,
_sum
,
_sqsum
,
sdepth
,
sqdepth
)
:
ocl_integral
(
_src
,
_sum
,
sdepth
)));
Size
ssize
=
_src
.
size
(),
isize
(
ssize
.
width
+
1
,
ssize
.
height
+
1
);
_sum
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
Mat
src
=
_src
.
getMat
(),
sum
=
_sum
.
getMat
(),
sqsum
,
tilted
;
if
(
_sqsum
.
needed
()
)
{
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
sqdepth
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
};
if
(
_tilted
.
needed
()
)
{
_tilted
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
tilted
=
_tilted
.
getMat
();
}
hal
::
integral
(
depth
,
sdepth
,
sqdepth
,
src
.
ptr
(),
src
.
step
,
sum
.
ptr
(),
sum
.
step
,
sqsum
.
ptr
(),
sqsum
.
step
,
tilted
.
ptr
(),
tilted
.
step
,
src
.
cols
,
src
.
rows
,
cn
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
CvArr
*
sumSqImage
,
CvArr
*
tiltedSumImage
)
{
cv
::
Mat
src
=
cv
::
cvarrToMat
(
image
),
sum
=
cv
::
cvarrToMat
(
sumImage
),
sum0
=
sum
;
cv
::
Mat
sqsum0
,
sqsum
,
tilted0
,
tilted
;
cv
::
Mat
*
psqsum
=
0
,
*
ptilted
=
0
;
if
(
sumSqImage
)
{
sqsum0
=
sqsum
=
cv
::
cvarrToMat
(
sumSqImage
);
psqsum
=
&
sqsum
;
}
if
(
tiltedSumImage
)
{
tilted0
=
tilted
=
cv
::
cvarrToMat
(
tiltedSumImage
);
ptilted
=
&
tilted
;
}
cv
::
integral
(
src
,
sum
,
psqsum
?
cv
::
_OutputArray
(
*
psqsum
)
:
cv
::
_OutputArray
(),
ptilted
?
cv
::
_OutputArray
(
*
ptilted
)
:
cv
::
_OutputArray
(),
sum
.
depth
()
);
CV_Assert
(
sum
.
data
==
sum0
.
data
&&
sqsum
.
data
==
sqsum0
.
data
&&
tilted
.
data
==
tilted0
.
data
);
}
/* End of file. */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment