Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
76c21b73
Commit
76c21b73
authored
Jan 24, 2020
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #16374 from alalek:imgproc_dispatch_sumpixels
parents
4d2da2de
09b3383a
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
426 additions
and
338 deletions
+426
-338
CMakeLists.txt
modules/imgproc/CMakeLists.txt
+1
-0
sumpixels.avx512_skx.hpp
modules/imgproc/src/sumpixels.avx512_skx.hpp
+8
-11
sumpixels.dispatch.cpp
modules/imgproc/src/sumpixels.dispatch.cpp
+129
-302
sumpixels.hpp
modules/imgproc/src/sumpixels.hpp
+0
-25
sumpixels.simd.hpp
modules/imgproc/src/sumpixels.simd.hpp
+288
-0
No files found.
modules/imgproc/CMakeLists.txt
View file @
76c21b73
...
...
@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file
(
median_blur SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
morph SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
smooth SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
sumpixels SSE2 AVX2 AVX512_SKX
)
ocv_add_dispatched_file
(
undistort SSE2 AVX2
)
ocv_define_module
(
imgproc opencv_core WRAP java python js
)
modules/imgproc/src/sumpixels.avx512_skx.
c
pp
→
modules/imgproc/src/sumpixels.avx512_skx.
h
pp
View file @
76c21b73
...
...
@@ -2,14 +2,13 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#include "precomp.hpp"
#include "sumpixels.hpp"
// Copyright (C) 2019-2020, Intel Corporation, all rights reserved.
#include "opencv2/core/hal/intrin.hpp"
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
namespace
cv
{
namespace
{
// Anonymous namespace to avoid exposing the implementation classes
//
...
...
@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
}
// end of anonymous namespace
namespace
opt_AVX512_SKX
{
// This is the implementation for the external callers interface entry point.
// It should be the only function called into this file from outside
// Any new implementations should be directed from here
static
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION
();
switch
(
cn
){
case
1
:
{
IntegralCalculator
<
1
>
calculator
;
...
...
@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
}
}
// end namespace opt_AVX512_SXK
}
// end namespace cv
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
}
// end namespace cv::hal
modules/imgproc/src/sumpixels.cpp
→
modules/imgproc/src/sumpixels.
dispatch.
cpp
View file @
76c21b73
...
...
@@ -10,7 +10,7 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
...
...
@@ -44,210 +44,157 @@
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
{
#include "sumpixels.simd.hpp"
#include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
#ifdef HAVE_OPENCL
template
<
typename
T
,
typename
ST
,
typename
QT
>
struct
Integral_SIMD
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
bool
operator
()(
const
T
*
,
size_t
,
ST
*
,
size_t
,
QT
*
,
size_t
,
ST
*
,
size_t
,
int
,
int
,
int
)
const
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
}
};
static
const
int
tileSize
=
16
;
template
<>
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
Integral_SIMD
()
{}
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
)
;
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
double
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
const
{
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
}
};
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
#if CV_SIMD && CV_SIMD_WIDTH <= 64
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
template
<>
struct
Integral_SIMD
<
uchar
,
int
,
double
>
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
Integral_SIMD
()
{}
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
int
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
int
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
// the first iteration
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
static
const
int
tileSize
=
16
;
// the others
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
int
*
prev_sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
int
*
sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
sum_row
[
-
1
]
=
0
;
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
v_int32
prev
=
vx_setzero_s32
();
int
j
=
0
;
for
(
;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_int32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
)),
prev
.
val
);
el4h
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
)),
_mm256_permutevar8x32_epi32
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_epi32
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_expand
(
el8
,
el4l
,
el4h
);
el4l
+=
prev
;
el4h
+=
el4l
;
prev
=
v_broadcast_element
<
v_int32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_int32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_int32
::
nlanes
));
}
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
vx_cleanup
();
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
return
true
;
}
};
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
template
<>
struct
Integral_SIMD
<
uchar
,
float
,
double
>
{
Integral_SIMD
()
{
}
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
float
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
float
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
#endif // HAVE_OPENCL
// the first iteration
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
#ifdef HAVE_IPP
// the others
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
float
*
prev_sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
float
*
sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
sum_row
[
-
1
]
=
0
;
IppiSize
size
=
{
width
,
height
}
;
v_float32
prev
=
vx_setzero_f32
();
int
j
=
0
;
for
(;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_float32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
))),
prev
.
val
);
el4h
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
))),
_mm256_permutevar8x32_ps
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_ps
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_int32
el4li
,
el4hi
;
v_expand
(
el8
,
el4li
,
el4hi
);
el4l
=
v_cvt_f32
(
el4li
)
+
prev
;
el4h
=
v_cvt_f32
(
el4hi
)
+
el4l
;
prev
=
v_broadcast_element
<
v_float32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_float32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_float32
::
nlanes
));
CV_UNUSED
(
tstep
);
return
false
;
}
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
vx_cleanup
();
return
true
;
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
};
}
#endif // HAVE_IPP
#endif
namespace
hal
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
template
<
typename
T
,
typename
ST
,
typename
QT
>
static
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
{
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
...
...
@@ -401,100 +348,7 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
}
}
#ifdef HAVE_OPENCL
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
}
#if defined(HAVE_IPP)
namespace
cv
{
static
bool
ipp_integral
(
static
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
...
...
@@ -502,56 +356,28 @@ static bool ipp_integral(
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
CV_INSTRUMENT_REGION
();
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
CV_CPU_DISPATCH
(
integral_SIMD
,
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
),
CV_CPU_DISPATCH_MODES_ALL
);
}
}
#endif
namespace
cv
{
namespace
hal
{
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION
();
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
if
(
integral_SIMD
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
))
return
;
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
...
...
@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
CV_Error
(
Error
::
StsUnsupportedFormat
,
""
);
#undef ONE_CALL
}
}
}
// cv::hal::
}
// namespace hal
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
...
...
@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
src
.
cols
,
src
.
rows
,
cn
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
}
// namespace
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
...
...
modules/imgproc/src/sumpixels.hpp
deleted
100644 → 0
View file @
4d2da2de
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
namespace
cv
{
namespace
opt_AVX512_SKX
{
#if CV_TRY_AVX512_SKX
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
);
#endif
}
// end namespace opt_AVX512_SKX
}
// end namespace cv
#endif
modules/imgproc/src/sumpixels.simd.hpp
0 → 100644
View file @
76c21b73
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2020 Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/hal/intrin.hpp"
#if CV_AVX512_SKX
#include "sumpixels.avx512_skx.hpp"
#endif
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
namespace
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
struct
Integral_SIMD
{
bool
operator
()(
const
T
*
,
size_t
,
ST
*
,
size_t
,
QT
*
,
size_t
,
ST
*
,
size_t
,
int
,
int
,
int
)
const
{
return
false
;
}
};
#if CV_AVX512_SKX
template
<>
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
Integral_SIMD
()
{};
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
double
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
const
{
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
if
(
!
tilted
&&
(
cn
<=
4
))
{
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
}
return
false
;
}
};
#endif
#if CV_SIMD && CV_SIMD_WIDTH <= 64
template
<>
struct
Integral_SIMD
<
uchar
,
int
,
double
>
{
Integral_SIMD
()
{}
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
int
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
int
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
// the first iteration
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
// the others
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
int
*
prev_sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
int
*
sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
sum_row
[
-
1
]
=
0
;
v_int32
prev
=
vx_setzero_s32
();
int
j
=
0
;
for
(
;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_int32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
)),
prev
.
val
);
el4h
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
)),
_mm256_permutevar8x32_epi32
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_epi32
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_expand
(
el8
,
el4l
,
el4h
);
el4l
+=
prev
;
el4h
+=
el4l
;
prev
=
v_broadcast_element
<
v_int32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_int32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_int32
::
nlanes
));
}
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
return
true
;
}
};
template
<>
struct
Integral_SIMD
<
uchar
,
float
,
double
>
{
Integral_SIMD
()
{}
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
float
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
,
float
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
// the first iteration
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
// the others
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
float
*
prev_sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
float
*
sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
sum_row
[
-
1
]
=
0
;
v_float32
prev
=
vx_setzero_f32
();
int
j
=
0
;
for
(;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_float32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
))),
prev
.
val
);
el4h
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
))),
_mm256_permutevar8x32_ps
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_ps
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_int32
el4li
,
el4hi
;
v_expand
(
el8
,
el4li
,
el4hi
);
el4l
=
v_cvt_f32
(
el4li
)
+
prev
;
el4h
=
v_cvt_f32
(
el4hi
)
+
el4l
;
prev
=
v_broadcast_element
<
v_float32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_float32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_float32
::
nlanes
));
}
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
return
true
;
}
};
#endif
}
// namespace anon
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION
();
#define ONE_CALL(T, ST, QT) \
return Integral_SIMD<T, ST, QT>()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
uchar
,
int
,
double
);
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32F
)
ONE_CALL
(
uchar
,
int
,
float
);
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
ONE_CALL
(
uchar
,
int
,
int
);
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
uchar
,
float
,
double
);
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_32F
)
ONE_CALL
(
uchar
,
float
,
float
);
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
uchar
,
double
,
double
);
else
if
(
depth
==
CV_16U
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
ushort
,
double
,
double
);
else
if
(
depth
==
CV_16S
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
short
,
double
,
double
);
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
float
,
float
,
double
);
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_32F
)
ONE_CALL
(
float
,
float
,
float
);
else
if
(
depth
==
CV_32F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
float
,
double
,
double
);
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
else
return
false
;
#undef ONE_CALL
}
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
}}
// cv::hal::
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment