Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
09b3383a
Commit
09b3383a
authored
Jan 17, 2020
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
imgproc: dispatch sumpixels (integral)
parent
b4316af8
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
170 additions
and
749 deletions
+170
-749
CMakeLists.txt
modules/imgproc/CMakeLists.txt
+1
-0
sumpixels.avx512_skx.hpp
modules/imgproc/src/sumpixels.avx512_skx.hpp
+8
-11
sumpixels.dispatch.cpp
modules/imgproc/src/sumpixels.dispatch.cpp
+129
-302
sumpixels.hpp
modules/imgproc/src/sumpixels.hpp
+0
-25
sumpixels.simd.hpp
modules/imgproc/src/sumpixels.simd.hpp
+32
-411
No files found.
modules/imgproc/CMakeLists.txt
View file @
09b3383a
...
@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
...
@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file
(
median_blur SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
median_blur SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
morph SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
morph SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
smooth SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
smooth SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
sumpixels SSE2 AVX2 AVX512_SKX
)
ocv_add_dispatched_file
(
undistort SSE2 AVX2
)
ocv_add_dispatched_file
(
undistort SSE2 AVX2
)
ocv_define_module
(
imgproc opencv_core WRAP java python js
)
ocv_define_module
(
imgproc opencv_core WRAP java python js
)
modules/imgproc/src/sumpixels.avx512_skx.hpp
View file @
09b3383a
...
@@ -2,14 +2,13 @@
...
@@ -2,14 +2,13 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// of this distribution and at http://opencv.org/license.html.
//
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
// Copyright (C) 2019-2020, Intel Corporation, all rights reserved.
#include "precomp.hpp"
#include "sumpixels.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/hal/intrin.hpp"
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
namespace
cv
{
namespace
{
// Anonymous namespace to avoid exposing the implementation classes
namespace
{
// Anonymous namespace to avoid exposing the implementation classes
//
//
...
@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
...
@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
}
// end of anonymous namespace
}
// end of anonymous namespace
namespace
opt_AVX512_SKX
{
static
// This is the implementation for the external callers interface entry point.
// It should be the only function called into this file from outside
// Any new implementations should be directed from here
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
CV_INSTRUMENT_REGION
();
switch
(
cn
){
switch
(
cn
){
case
1
:
{
case
1
:
{
IntegralCalculator
<
1
>
calculator
;
IntegralCalculator
<
1
>
calculator
;
...
@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
...
@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
}
}
}
// end namespace opt_AVX512_SXK
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
// end namespace cv
}
}
// end namespace cv::hal
modules/imgproc/src/sumpixels.dispatch.cpp
View file @
09b3383a
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
// For Open Source Computer Vision Library
//
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Third party copyrights are property of their respective owners.
...
@@ -44,210 +44,157 @@
...
@@ -44,210 +44,157 @@
#include "precomp.hpp"
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
#include "sumpixels.simd.hpp"
{
#include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
#ifdef HAVE_OPENCL
template
<
typename
T
,
typename
ST
,
typename
QT
>
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
struct
Integral_SIMD
{
{
bool
operator
()(
const
T
*
,
size_t
,
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
ST
*
,
size_t
,
QT
*
,
size_t
,
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
ST
*
,
size_t
,
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
int
,
int
,
int
)
const
{
return
false
;
return
false
;
}
};
static
const
int
tileSize
=
16
;
template
<>
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
ocl
::
typeToStr
(
sdepth
),
tileSize
,
Integral_SIMD
()
{}
;
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
)
;
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
UMat
src
=
_src
.
getUMat
();
double
*
sum
,
size_t
_sumstep
,
Size
src_size
=
src
.
size
();
double
*
sqsum
,
size_t
_sqsumstep
,
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
double
*
tilted
,
size_t
_tiltedstep
,
UMat
buf
(
bufsize
,
sdepth
);
int
width
,
int
height
,
int
cn
)
const
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
{
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
#if CV_TRY_AVX512_SKX
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
CV_UNUSED
(
_tiltedstep
);
return
false
;
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
if
(
krows
.
empty
())
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
return
false
;
return
false
;
}
};
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
#if CV_SIMD && CV_SIMD_WIDTH <= 64
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
template
<>
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
struct
Integral_SIMD
<
uchar
,
int
,
double
>
{
{
Integral_SIMD
()
{}
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
int
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
int
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
return
false
;
// the first iteration
static
const
int
tileSize
=
16
;
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
// the others
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
for
(
int
i
=
0
;
i
<
height
;
++
i
)
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
{
tileSize
,
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
int
*
prev_sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
int
*
sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
sum_row
[
-
1
]
=
0
;
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
v_int32
prev
=
vx_setzero_s32
();
UMat
src
=
_src
.
getUMat
();
int
j
=
0
;
Size
src_size
=
src
.
size
();
for
(
;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
{
UMat
buf
(
bufsize
,
sdepth
);
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
UMat
buf_sq
(
bufsize
,
sqdepth
);
v_int32
el4l
,
el4h
;
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
#if CV_AVX2 && CV_SIMD_WIDTH == 32
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
return
false
;
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
)),
prev
.
val
);
el4h
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
)),
_mm256_permutevar8x32_epi32
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_epi32
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_expand
(
el8
,
el4l
,
el4h
);
el4l
+=
prev
;
el4h
+=
el4l
;
prev
=
v_broadcast_element
<
v_int32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_int32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_int32
::
nlanes
));
}
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
if
(
krows
.
empty
())
}
return
false
;
vx_cleanup
();
return
true
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
}
_sum
.
create
(
sumsize
,
sdepth
);
};
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
template
<>
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
struct
Integral_SIMD
<
uchar
,
float
,
double
>
gt
=
src
.
rows
;
{
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
Integral_SIMD
()
{
}
}
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
#endif // HAVE_OPENCL
float
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
float
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
// the first iteration
#ifdef HAVE_IPP
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
// the others
static
bool
ipp_integral
(
for
(
int
i
=
0
;
i
<
height
;
++
i
)
int
depth
,
int
sdepth
,
int
sqdepth
,
{
const
uchar
*
src
,
size_t
srcstep
,
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
uchar
*
sum
,
size_t
sumstep
,
float
*
prev_sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
uchar
*
sqsum
,
size_t
sqsumstep
,
float
*
sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
sum_row
[
-
1
]
=
0
;
IppiSize
size
=
{
width
,
height
}
;
v_float32
prev
=
vx_setzero_f32
();
if
(
cn
>
1
)
int
j
=
0
;
return
false
;
for
(;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
if
(
tilted
)
{
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
CV_UNUSED
(
tstep
);
v_float32
el4l
,
el4h
;
return
false
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
))),
prev
.
val
);
el4h
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
))),
_mm256_permutevar8x32_ps
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_ps
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_int32
el4li
,
el4hi
;
v_expand
(
el8
,
el4li
,
el4hi
);
el4l
=
v_cvt_f32
(
el4li
)
+
prev
;
el4h
=
v_cvt_f32
(
el4hi
)
+
el4l
;
prev
=
v_broadcast_element
<
v_float32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_float32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_float32
::
nlanes
));
}
}
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
if
(
!
sqsum
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
}
vx_cleanup
();
else
{
return
true
;
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
};
}
#endif // HAVE_IPP
#endif
namespace
hal
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
template
<
typename
T
,
typename
ST
,
typename
QT
>
static
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
int
x
,
y
,
k
;
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
...
@@ -401,100 +348,7 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
...
@@ -401,100 +348,7 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
}
}
}
}
static
bool
integral_SIMD
(
#ifdef HAVE_OPENCL
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
}
#if defined(HAVE_IPP)
namespace
cv
{
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sum
,
size_t
sumstep
,
...
@@ -502,56 +356,28 @@ static bool ipp_integral(
...
@@ -502,56 +356,28 @@ static bool ipp_integral(
uchar
*
tilted
,
size_t
tstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
CV_INSTRUMENT_REGION_IPP
();
CV_INSTRUMENT_REGION
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
if
(
!
sqsum
)
CV_CPU_DISPATCH
(
integral_SIMD
,
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
),
{
CV_CPU_DISPATCH_MODES_ALL
);
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
}
}
#endif
namespace
cv
{
namespace
hal
{
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
CV_INSTRUMENT_REGION
();
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
if
(
integral_SIMD
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
))
return
;
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
...
@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
...
@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
ONE_CALL
(
double
,
double
,
double
);
else
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
CV_Error
(
Error
::
StsUnsupportedFormat
,
""
);
#undef ONE_CALL
#undef ONE_CALL
}
}
}
}
// cv::hal::
}
// namespace hal
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
{
CV_INSTRUMENT_REGION
();
CV_INSTRUMENT_REGION
();
...
@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
...
@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
src
.
cols
,
src
.
rows
,
cn
);
src
.
cols
,
src
.
rows
,
cn
);
}
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
{
CV_INSTRUMENT_REGION
();
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
{
CV_INSTRUMENT_REGION
();
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
}
}
// namespace
CV_IMPL
void
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
...
...
modules/imgproc/src/sumpixels.hpp
deleted
100644 → 0
View file @
b4316af8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
namespace
cv
{
namespace
opt_AVX512_SKX
{
#if CV_TRY_AVX512_SKX
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
);
#endif
}
// end namespace opt_AVX512_SKX
}
// end namespace cv
#endif
modules/imgproc/src/sumpixels.simd.hpp
View file @
09b3383a
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
// For Open Source Computer Vision Library
//
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Third party copyrights are property of their respective owners.
...
@@ -41,13 +41,26 @@
...
@@ -41,13 +41,26 @@
//
//
//M*/
//M*/
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
#if CV_AVX512_SKX
{
#include "sumpixels.avx512_skx.hpp"
#endif
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
namespace
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
template
<
typename
T
,
typename
ST
,
typename
QT
>
struct
Integral_SIMD
struct
Integral_SIMD
...
@@ -62,7 +75,7 @@ struct Integral_SIMD
...
@@ -62,7 +75,7 @@ struct Integral_SIMD
}
}
};
};
#if CV_AVX512_SKX
template
<>
template
<>
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
Integral_SIMD
()
{};
Integral_SIMD
()
{};
...
@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> {
...
@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> {
double
*
tilted
,
size_t
_tiltedstep
,
double
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
const
int
width
,
int
height
,
int
cn
)
const
{
{
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
if
(
!
tilted
&&
(
cn
<=
4
))
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
{
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
return
true
;
}
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
return
false
;
return
false
;
}
}
};
};
#endif
#if CV_SIMD && CV_SIMD_WIDTH <= 64
#if CV_SIMD && CV_SIMD_WIDTH <= 64
...
@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double>
...
@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double>
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
}
vx_cleanup
();
return
true
;
return
true
;
}
}
};
};
...
@@ -226,275 +232,15 @@ struct Integral_SIMD<uchar, float, double>
...
@@ -226,275 +232,15 @@ struct Integral_SIMD<uchar, float, double>
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
}
vx_cleanup
();
return
true
;
return
true
;
}
}
};
};
#endif
#endif
template
<
typename
T
,
typename
ST
,
typename
QT
>
}
// namespace anon
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
{
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
int
sqsumstep
=
(
int
)(
_sqsumstep
/
sizeof
(
QT
));
width
*=
cn
;
memset
(
sum
,
0
,
(
width
+
cn
)
*
sizeof
(
sum
[
0
]));
sum
+=
sumstep
+
cn
;
if
(
sqsum
)
{
memset
(
sqsum
,
0
,
(
width
+
cn
)
*
sizeof
(
sqsum
[
0
]));
sqsum
+=
sqsumstep
+
cn
;
}
if
(
tilted
)
{
memset
(
tilted
,
0
,
(
width
+
cn
)
*
sizeof
(
tilted
[
0
]));
tilted
+=
tiltedstep
+
cn
;
}
if
(
sqsum
==
0
&&
tilted
==
0
)
{
for
(
y
=
0
;
y
<
height
;
y
++
,
src
+=
srcstep
-
cn
,
sum
+=
sumstep
-
cn
)
{
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
)
{
ST
s
=
sum
[
-
cn
]
=
0
;
for
(
x
=
0
;
x
<
width
;
x
+=
cn
)
{
s
+=
src
[
x
];
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
}
}
}
}
else
if
(
tilted
==
0
)
{
for
(
y
=
0
;
y
<
height
;
y
++
,
src
+=
srcstep
-
cn
,
sum
+=
sumstep
-
cn
,
sqsum
+=
sqsumstep
-
cn
)
{
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
sqsum
++
)
{
ST
s
=
sum
[
-
cn
]
=
0
;
QT
sq
=
sqsum
[
-
cn
]
=
0
;
for
(
x
=
0
;
x
<
width
;
x
+=
cn
)
{
T
it
=
src
[
x
];
s
+=
it
;
sq
+=
(
QT
)
it
*
it
;
ST
t
=
sum
[
x
-
sumstep
]
+
s
;
QT
tq
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
sum
[
x
]
=
t
;
sqsum
[
x
]
=
tq
;
}
}
}
}
else
{
AutoBuffer
<
ST
>
_buf
(
width
+
cn
);
ST
*
buf
=
_buf
.
data
();
ST
s
;
QT
sq
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
tilted
++
,
buf
++
)
{
sum
[
-
cn
]
=
tilted
[
-
cn
]
=
0
;
for
(
x
=
0
,
s
=
0
,
sq
=
0
;
x
<
width
;
x
+=
cn
)
{
T
it
=
src
[
x
];
buf
[
x
]
=
tilted
[
x
]
=
it
;
s
+=
it
;
sq
+=
(
QT
)
it
*
it
;
sum
[
x
]
=
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sq
;
}
if
(
width
==
cn
)
buf
[
cn
]
=
0
;
if
(
sqsum
)
{
sqsum
[
-
cn
]
=
0
;
sqsum
++
;
}
}
for
(
y
=
1
;
y
<
height
;
y
++
)
{
src
+=
srcstep
-
cn
;
sum
+=
sumstep
-
cn
;
tilted
+=
tiltedstep
-
cn
;
buf
+=
-
cn
;
if
(
sqsum
)
sqsum
+=
sqsumstep
-
cn
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
tilted
++
,
buf
++
)
{
T
it
=
src
[
0
];
ST
t0
=
s
=
it
;
QT
tq0
=
sq
=
(
QT
)
it
*
it
;
sum
[
-
cn
]
=
0
;
if
(
sqsum
)
sqsum
[
-
cn
]
=
0
;
tilted
[
-
cn
]
=
tilted
[
-
tiltedstep
];
sum
[
0
]
=
sum
[
-
sumstep
]
+
t0
;
if
(
sqsum
)
sqsum
[
0
]
=
sqsum
[
-
sqsumstep
]
+
tq0
;
tilted
[
0
]
=
tilted
[
-
tiltedstep
]
+
t0
+
buf
[
cn
];
for
(
x
=
cn
;
x
<
width
-
cn
;
x
+=
cn
)
{
ST
t1
=
buf
[
x
];
buf
[
x
-
cn
]
=
t1
+
t0
;
t0
=
it
=
src
[
x
];
tq0
=
(
QT
)
it
*
it
;
s
+=
t0
;
sq
+=
tq0
;
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
t1
+=
buf
[
x
+
cn
]
+
t0
+
tilted
[
x
-
tiltedstep
-
cn
];
tilted
[
x
]
=
t1
;
}
if
(
width
>
cn
)
{
ST
t1
=
buf
[
x
];
buf
[
x
-
cn
]
=
t1
+
t0
;
t0
=
it
=
src
[
x
];
tq0
=
(
QT
)
it
*
it
;
s
+=
t0
;
sq
+=
tq0
;
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
tilted
[
x
]
=
t0
+
t1
+
tilted
[
x
-
tiltedstep
-
cn
];
buf
[
x
]
=
t0
;
}
if
(
sqsum
)
sqsum
++
;
}
}
}
}
#ifdef HAVE_OPENCL
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
}
#if defined(HAVE_IPP)
bool
integral_SIMD
(
namespace
cv
{
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sum
,
size_t
sumstep
,
...
@@ -502,57 +248,10 @@ static bool ipp_integral(
...
@@ -502,57 +248,10 @@ static bool ipp_integral(
uchar
*
tilted
,
size_t
tstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
CV_INSTRUMENT_REGION_IPP
();
CV_INSTRUMENT_REGION
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
}
#endif
namespace
cv
{
namespace
hal
{
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
#define ONE_CALL(T, ST, QT) \
return Integral_SIMD<T, ST, QT>()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
uchar
,
int
,
double
);
ONE_CALL
(
uchar
,
int
,
double
);
...
@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth,
...
@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
ONE_CALL
(
double
,
double
,
double
);
else
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
)
;
return
false
;
#undef ONE_CALL
#undef ONE_CALL
}
}
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
}}
// cv::hal::
}}
// cv::hal::
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
sdepth
<=
0
)
sdepth
=
depth
==
CV_8U
?
CV_32S
:
CV_64F
;
if
(
sqdepth
<=
0
)
sqdepth
=
CV_64F
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
),
sqdepth
=
CV_MAT_DEPTH
(
sqdepth
);
CV_OCL_RUN
(
_sum
.
isUMat
()
&&
!
_tilted
.
needed
(),
(
_sqsum
.
needed
()
?
ocl_integral
(
_src
,
_sum
,
_sqsum
,
sdepth
,
sqdepth
)
:
ocl_integral
(
_src
,
_sum
,
sdepth
)));
Size
ssize
=
_src
.
size
(),
isize
(
ssize
.
width
+
1
,
ssize
.
height
+
1
);
_sum
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
Mat
src
=
_src
.
getMat
(),
sum
=
_sum
.
getMat
(),
sqsum
,
tilted
;
if
(
_sqsum
.
needed
()
)
{
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
sqdepth
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
};
if
(
_tilted
.
needed
()
)
{
_tilted
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
tilted
=
_tilted
.
getMat
();
}
hal
::
integral
(
depth
,
sdepth
,
sqdepth
,
src
.
ptr
(),
src
.
step
,
sum
.
ptr
(),
sum
.
step
,
sqsum
.
ptr
(),
sqsum
.
step
,
tilted
.
ptr
(),
tilted
.
step
,
src
.
cols
,
src
.
rows
,
cn
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
CvArr
*
sumSqImage
,
CvArr
*
tiltedSumImage
)
{
cv
::
Mat
src
=
cv
::
cvarrToMat
(
image
),
sum
=
cv
::
cvarrToMat
(
sumImage
),
sum0
=
sum
;
cv
::
Mat
sqsum0
,
sqsum
,
tilted0
,
tilted
;
cv
::
Mat
*
psqsum
=
0
,
*
ptilted
=
0
;
if
(
sumSqImage
)
{
sqsum0
=
sqsum
=
cv
::
cvarrToMat
(
sumSqImage
);
psqsum
=
&
sqsum
;
}
if
(
tiltedSumImage
)
{
tilted0
=
tilted
=
cv
::
cvarrToMat
(
tiltedSumImage
);
ptilted
=
&
tilted
;
}
cv
::
integral
(
src
,
sum
,
psqsum
?
cv
::
_OutputArray
(
*
psqsum
)
:
cv
::
_OutputArray
(),
ptilted
?
cv
::
_OutputArray
(
*
ptilted
)
:
cv
::
_OutputArray
(),
sum
.
depth
()
);
CV_Assert
(
sum
.
data
==
sum0
.
data
&&
sqsum
.
data
==
sqsum0
.
data
&&
tilted
.
data
==
tilted0
.
data
);
}
/* End of file. */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment