Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
09b3383a
Commit
09b3383a
authored
Jan 17, 2020
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
imgproc: dispatch sumpixels (integral)
parent
b4316af8
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
191 additions
and
770 deletions
+191
-770
CMakeLists.txt
modules/imgproc/CMakeLists.txt
+1
-0
sumpixels.avx512_skx.hpp
modules/imgproc/src/sumpixels.avx512_skx.hpp
+8
-11
sumpixels.dispatch.cpp
modules/imgproc/src/sumpixels.dispatch.cpp
+143
-316
sumpixels.hpp
modules/imgproc/src/sumpixels.hpp
+0
-25
sumpixels.simd.hpp
modules/imgproc/src/sumpixels.simd.hpp
+39
-418
No files found.
modules/imgproc/CMakeLists.txt
View file @
09b3383a
...
@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
...
@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file
(
median_blur SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
median_blur SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
morph SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
morph SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
smooth SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
smooth SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
sumpixels SSE2 AVX2 AVX512_SKX
)
ocv_add_dispatched_file
(
undistort SSE2 AVX2
)
ocv_add_dispatched_file
(
undistort SSE2 AVX2
)
ocv_define_module
(
imgproc opencv_core WRAP java python js
)
ocv_define_module
(
imgproc opencv_core WRAP java python js
)
modules/imgproc/src/sumpixels.avx512_skx.hpp
View file @
09b3383a
...
@@ -2,14 +2,13 @@
...
@@ -2,14 +2,13 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// of this distribution and at http://opencv.org/license.html.
//
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
// Copyright (C) 2019-2020, Intel Corporation, all rights reserved.
#include "precomp.hpp"
#include "sumpixels.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/hal/intrin.hpp"
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
namespace
cv
{
namespace
{
// Anonymous namespace to avoid exposing the implementation classes
namespace
{
// Anonymous namespace to avoid exposing the implementation classes
//
//
...
@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
...
@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
}
// end of anonymous namespace
}
// end of anonymous namespace
namespace
opt_AVX512_SKX
{
static
// This is the implementation for the external callers interface entry point.
// It should be the only function called into this file from outside
// Any new implementations should be directed from here
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
CV_INSTRUMENT_REGION
();
switch
(
cn
){
switch
(
cn
){
case
1
:
{
case
1
:
{
IntegralCalculator
<
1
>
calculator
;
IntegralCalculator
<
1
>
calculator
;
...
@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
...
@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
}
}
}
// end namespace opt_AVX512_SXK
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
// end namespace cv
}
}
// end namespace cv::hal
modules/imgproc/src/sumpixels.dispatch.cpp
View file @
09b3383a
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
// For Open Source Computer Vision Library
//
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Third party copyrights are property of their respective owners.
...
@@ -44,210 +44,157 @@
...
@@ -44,210 +44,157 @@
#include "precomp.hpp"
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
#include "sumpixels.simd.hpp"
{
#include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
#ifdef HAVE_OPENCL
template
<
typename
T
,
typename
ST
,
typename
QT
>
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
struct
Integral_SIMD
{
{
bool
operator
()(
const
T
*
,
size_t
,
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
ST
*
,
size_t
,
QT
*
,
size_t
,
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
ST
*
,
size_t
,
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
int
,
int
,
int
)
const
{
return
false
;
return
false
;
}
};
static
const
int
tileSize
=
16
;
template
<>
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
ocl
::
typeToStr
(
sdepth
),
tileSize
,
Integral_SIMD
()
{}
;
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
)
;
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
UMat
src
=
_src
.
getUMat
();
double
*
sum
,
size_t
_sumstep
,
Size
src_size
=
src
.
size
();
double
*
sqsum
,
size_t
_sqsumstep
,
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
double
*
tilted
,
size_t
_tiltedstep
,
UMat
buf
(
bufsize
,
sdepth
);
int
width
,
int
height
,
int
cn
)
const
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
{
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
#if CV_TRY_AVX512_SKX
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
return
false
;
return
false
;
}
};
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
#if CV_SIMD && CV_SIMD_WIDTH <= 64
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
template
<>
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
struct
Integral_SIMD
<
uchar
,
int
,
double
>
gt
=
src
.
rows
;
{
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
Integral_SIMD
()
{
}
}
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
int
*
sum
,
size_t
_sumstep
,
{
double
*
sqsum
,
size_t
,
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
int
*
tilted
,
size_t
,
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
// the first iteration
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
))
;
return
false
;
// the others
static
const
int
tileSize
=
16
;
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
int
*
prev_sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
int
*
sum_row
=
(
int
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
sum_row
[
-
1
]
=
0
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
v_int32
prev
=
vx_setzero_s32
();
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
int
j
=
0
;
if
(
kcols
.
empty
())
for
(
;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
return
false
;
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_int32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
)),
prev
.
val
);
el4h
.
val
=
_mm256_add_epi32
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
)),
_mm256_permutevar8x32_epi32
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_epi32
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_expand
(
el8
,
el4l
,
el4h
);
el4l
+=
prev
;
el4h
+=
el4l
;
prev
=
v_broadcast_element
<
v_int32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_int32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_int32
::
nlanes
));
}
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
UMat
src
=
_src
.
getUMat
();
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
Size
src_size
=
src
.
size
();
}
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
vx_cleanup
();
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
return
true
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
)
;
}
if
(
krows
.
empty
())
}
;
return
false
;
template
<>
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
struct
Integral_SIMD
<
uchar
,
float
,
double
>
_sum
.
create
(
sumsize
,
sdepth
);
{
UMat
sum
=
_sum
.
getUMat
();
Integral_SIMD
()
{}
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
bool
operator
()(
const
uchar
*
src
,
size_t
_srcstep
,
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
float
*
sum
,
size_t
_sumstep
,
gt
=
src
.
rows
;
double
*
sqsum
,
size_t
,
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
float
*
tilted
,
size_t
,
}
int
width
,
int
height
,
int
cn
)
const
{
if
(
sqsum
||
tilted
||
cn
!=
1
)
return
false
;
// the first iteration
#endif // HAVE_OPENCL
memset
(
sum
,
0
,
(
width
+
1
)
*
sizeof
(
int
));
// the others
#ifdef HAVE_IPP
for
(
int
i
=
0
;
i
<
height
;
++
i
)
{
const
uchar
*
src_row
=
src
+
_srcstep
*
i
;
float
*
prev_sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
i
)
+
1
;
float
*
sum_row
=
(
float
*
)((
uchar
*
)
sum
+
_sumstep
*
(
i
+
1
))
+
1
;
sum_row
[
-
1
]
=
0
;
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
v_float32
prev
=
vx_setzero_f32
();
IppiSize
size
=
{
width
,
height
};
int
j
=
0
;
for
(;
j
+
v_uint16
::
nlanes
<=
width
;
j
+=
v_uint16
::
nlanes
)
{
v_int16
el8
=
v_reinterpret_as_s16
(
vx_load_expand
(
src_row
+
j
));
v_float32
el4l
,
el4h
;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i
vsum
=
_mm256_add_epi16
(
el8
.
val
,
_mm256_slli_si256
(
el8
.
val
,
2
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
4
));
vsum
=
_mm256_add_epi16
(
vsum
,
_mm256_slli_si256
(
vsum
,
8
));
__m256i
shmask
=
_mm256_set1_epi32
(
7
);
el4l
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_low
(
vsum
))),
prev
.
val
);
el4h
.
val
=
_mm256_add_ps
(
_mm256_cvtepi32_ps
(
_mm256_cvtepi16_epi32
(
_v256_extract_high
(
vsum
))),
_mm256_permutevar8x32_ps
(
el4l
.
val
,
shmask
));
prev
.
val
=
_mm256_permutevar8x32_ps
(
el4h
.
val
,
shmask
);
#else
el8
+=
v_rotate_left
<
1
>
(
el8
);
el8
+=
v_rotate_left
<
2
>
(
el8
);
#if CV_SIMD_WIDTH >= 32
el8
+=
v_rotate_left
<
4
>
(
el8
);
#if CV_SIMD_WIDTH == 64
el8
+=
v_rotate_left
<
8
>
(
el8
);
#endif
#endif
v_int32
el4li
,
el4hi
;
v_expand
(
el8
,
el4li
,
el4hi
);
el4l
=
v_cvt_f32
(
el4li
)
+
prev
;
el4h
=
v_cvt_f32
(
el4hi
)
+
el4l
;
prev
=
v_broadcast_element
<
v_float32
::
nlanes
-
1
>
(
el4h
);
#endif
v_store
(
sum_row
+
j
,
el4l
+
vx_load
(
prev_sum_row
+
j
));
v_store
(
sum_row
+
j
+
v_float32
::
nlanes
,
el4h
+
vx_load
(
prev_sum_row
+
j
+
v_float32
::
nlanes
));
}
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
if
(
cn
>
1
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
return
false
;
}
if
(
tilted
)
vx_cleanup
();
{
CV_UNUSED
(
tstep
);
return
false
;
}
return
true
;
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
}
};
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
#endif
#endif
// HAVE_IPP
template
<
typename
T
,
typename
ST
,
typename
QT
>
namespace
hal
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
static
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
int
x
,
y
,
k
;
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
...
@@ -401,157 +348,36 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
...
@@ -401,157 +348,36 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
}
}
}
}
static
bool
integral_SIMD
(
#ifdef HAVE_OPENCL
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
uchar
*
sum
,
size_t
sumstep
,
{
uchar
*
sqsum
,
size_t
sqsumstep
,
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
CV_INSTRUMENT_REGION
();
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
CV_CPU_DISPATCH
(
integral_SIMD
,
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
),
CV_CPU_DISPATCH_MODES_ALL
);
}
}
#if defined(HAVE_IPP)
void
integral
(
namespace
cv
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
{
static
bool
ipp_integral
(
CV_INSTRUMENT_REGION
();
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
}
#endif
namespace
cv
{
namespace
hal
{
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
if
(
integral_SIMD
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
))
return
;
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
...
@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
...
@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
ONE_CALL
(
double
,
double
,
double
);
else
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
CV_Error
(
Error
::
StsUnsupportedFormat
,
""
);
#undef ONE_CALL
#undef ONE_CALL
}
}
}
}
// cv::hal::
}
// namespace hal
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
{
CV_INSTRUMENT_REGION
();
CV_INSTRUMENT_REGION
();
...
@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
...
@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
src
.
cols
,
src
.
rows
,
cn
);
src
.
cols
,
src
.
rows
,
cn
);
}
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
{
CV_INSTRUMENT_REGION
();
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
void
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
{
CV_INSTRUMENT_REGION
();
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
}
}
// namespace
CV_IMPL
void
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
...
...
modules/imgproc/src/sumpixels.hpp
deleted
100644 → 0
View file @
b4316af8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
namespace
cv
{
namespace
opt_AVX512_SKX
{
#if CV_TRY_AVX512_SKX
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
);
#endif
}
// end namespace opt_AVX512_SKX
}
// end namespace cv
#endif
modules/imgproc/src/sumpixels.simd.hpp
View file @
09b3383a
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
// For Open Source Computer Vision Library
//
//
// Copyright (C) 2000-20
08,2019
Intel Corporation, all rights reserved.
// Copyright (C) 2000-20
20
Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Third party copyrights are property of their respective owners.
...
@@ -41,13 +41,26 @@
...
@@ -41,13 +41,26 @@
//
//
//M*/
//M*/
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
#if CV_AVX512_SKX
{
#include "sumpixels.avx512_skx.hpp"
#endif
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
bool
integral_SIMD
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
namespace
{
template
<
typename
T
,
typename
ST
,
typename
QT
>
template
<
typename
T
,
typename
ST
,
typename
QT
>
struct
Integral_SIMD
struct
Integral_SIMD
...
@@ -62,7 +75,7 @@ struct Integral_SIMD
...
@@ -62,7 +75,7 @@ struct Integral_SIMD
}
}
};
};
#if CV_AVX512_SKX
template
<>
template
<>
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
struct
Integral_SIMD
<
uchar
,
double
,
double
>
{
Integral_SIMD
()
{};
Integral_SIMD
()
{};
...
@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> {
...
@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> {
double
*
tilted
,
size_t
_tiltedstep
,
double
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
const
int
width
,
int
height
,
int
cn
)
const
{
{
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
if
(
!
tilted
&&
(
cn
<=
4
))
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
{
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
return
true
;
}
}
#else
// Avoid warnings in some builds
CV_UNUSED
(
src
);
CV_UNUSED
(
_srcstep
);
CV_UNUSED
(
sum
);
CV_UNUSED
(
_sumstep
);
CV_UNUSED
(
sqsum
);
CV_UNUSED
(
_sqsumstep
);
CV_UNUSED
(
tilted
);
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
width
);
CV_UNUSED
(
height
);
CV_UNUSED
(
cn
);
#endif
return
false
;
return
false
;
}
}
};
};
#endif
#if CV_SIMD && CV_SIMD_WIDTH <= 64
#if CV_SIMD && CV_SIMD_WIDTH <= 64
...
@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double>
...
@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double>
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
for
(
int
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
}
vx_cleanup
();
return
true
;
return
true
;
}
}
};
};
...
@@ -226,333 +232,26 @@ struct Integral_SIMD<uchar, float, double>
...
@@ -226,333 +232,26 @@ struct Integral_SIMD<uchar, float, double>
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
for
(
float
v
=
sum_row
[
j
-
1
]
-
prev_sum_row
[
j
-
1
];
j
<
width
;
++
j
)
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
sum_row
[
j
]
=
(
v
+=
src_row
[
j
])
+
prev_sum_row
[
j
];
}
}
vx_cleanup
();
return
true
;
return
true
;
}
}
};
};
#endif
#endif
template
<
typename
T
,
typename
ST
,
typename
QT
>
}
// namespace anon
void
integral_
(
const
T
*
src
,
size_t
_srcstep
,
ST
*
sum
,
size_t
_sumstep
,
QT
*
sqsum
,
size_t
_sqsumstep
,
ST
*
tilted
,
size_t
_tiltedstep
,
int
width
,
int
height
,
int
cn
)
{
int
x
,
y
,
k
;
if
(
Integral_SIMD
<
T
,
ST
,
QT
>
()(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
tilted
,
_tiltedstep
,
width
,
height
,
cn
))
return
;
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
T
));
bool
integral_SIMD
(
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
ST
));
int
depth
,
int
sdepth
,
int
sqdepth
,
int
tiltedstep
=
(
int
)(
_tiltedstep
/
sizeof
(
ST
));
const
uchar
*
src
,
size_t
srcstep
,
int
sqsumstep
=
(
int
)(
_sqsumstep
/
sizeof
(
QT
));
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
width
*=
cn
;
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
memset
(
sum
,
0
,
(
width
+
cn
)
*
sizeof
(
sum
[
0
]));
sum
+=
sumstep
+
cn
;
if
(
sqsum
)
{
memset
(
sqsum
,
0
,
(
width
+
cn
)
*
sizeof
(
sqsum
[
0
]));
sqsum
+=
sqsumstep
+
cn
;
}
if
(
tilted
)
{
memset
(
tilted
,
0
,
(
width
+
cn
)
*
sizeof
(
tilted
[
0
]));
tilted
+=
tiltedstep
+
cn
;
}
if
(
sqsum
==
0
&&
tilted
==
0
)
{
for
(
y
=
0
;
y
<
height
;
y
++
,
src
+=
srcstep
-
cn
,
sum
+=
sumstep
-
cn
)
{
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
)
{
ST
s
=
sum
[
-
cn
]
=
0
;
for
(
x
=
0
;
x
<
width
;
x
+=
cn
)
{
s
+=
src
[
x
];
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
}
}
}
}
else
if
(
tilted
==
0
)
{
for
(
y
=
0
;
y
<
height
;
y
++
,
src
+=
srcstep
-
cn
,
sum
+=
sumstep
-
cn
,
sqsum
+=
sqsumstep
-
cn
)
{
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
sqsum
++
)
{
ST
s
=
sum
[
-
cn
]
=
0
;
QT
sq
=
sqsum
[
-
cn
]
=
0
;
for
(
x
=
0
;
x
<
width
;
x
+=
cn
)
{
T
it
=
src
[
x
];
s
+=
it
;
sq
+=
(
QT
)
it
*
it
;
ST
t
=
sum
[
x
-
sumstep
]
+
s
;
QT
tq
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
sum
[
x
]
=
t
;
sqsum
[
x
]
=
tq
;
}
}
}
}
else
{
AutoBuffer
<
ST
>
_buf
(
width
+
cn
);
ST
*
buf
=
_buf
.
data
();
ST
s
;
QT
sq
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
tilted
++
,
buf
++
)
{
sum
[
-
cn
]
=
tilted
[
-
cn
]
=
0
;
for
(
x
=
0
,
s
=
0
,
sq
=
0
;
x
<
width
;
x
+=
cn
)
{
T
it
=
src
[
x
];
buf
[
x
]
=
tilted
[
x
]
=
it
;
s
+=
it
;
sq
+=
(
QT
)
it
*
it
;
sum
[
x
]
=
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sq
;
}
if
(
width
==
cn
)
buf
[
cn
]
=
0
;
if
(
sqsum
)
{
sqsum
[
-
cn
]
=
0
;
sqsum
++
;
}
}
for
(
y
=
1
;
y
<
height
;
y
++
)
{
src
+=
srcstep
-
cn
;
sum
+=
sumstep
-
cn
;
tilted
+=
tiltedstep
-
cn
;
buf
+=
-
cn
;
if
(
sqsum
)
sqsum
+=
sqsumstep
-
cn
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
src
++
,
sum
++
,
tilted
++
,
buf
++
)
{
T
it
=
src
[
0
];
ST
t0
=
s
=
it
;
QT
tq0
=
sq
=
(
QT
)
it
*
it
;
sum
[
-
cn
]
=
0
;
if
(
sqsum
)
sqsum
[
-
cn
]
=
0
;
tilted
[
-
cn
]
=
tilted
[
-
tiltedstep
];
sum
[
0
]
=
sum
[
-
sumstep
]
+
t0
;
if
(
sqsum
)
sqsum
[
0
]
=
sqsum
[
-
sqsumstep
]
+
tq0
;
tilted
[
0
]
=
tilted
[
-
tiltedstep
]
+
t0
+
buf
[
cn
];
for
(
x
=
cn
;
x
<
width
-
cn
;
x
+=
cn
)
{
ST
t1
=
buf
[
x
];
buf
[
x
-
cn
]
=
t1
+
t0
;
t0
=
it
=
src
[
x
];
tq0
=
(
QT
)
it
*
it
;
s
+=
t0
;
sq
+=
tq0
;
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
t1
+=
buf
[
x
+
cn
]
+
t0
+
tilted
[
x
-
tiltedstep
-
cn
];
tilted
[
x
]
=
t1
;
}
if
(
width
>
cn
)
{
ST
t1
=
buf
[
x
];
buf
[
x
-
cn
]
=
t1
+
t0
;
t0
=
it
=
src
[
x
];
tq0
=
(
QT
)
it
*
it
;
s
+=
t0
;
sq
+=
tq0
;
sum
[
x
]
=
sum
[
x
-
sumstep
]
+
s
;
if
(
sqsum
)
sqsum
[
x
]
=
sqsum
[
x
-
sqsumstep
]
+
sq
;
tilted
[
x
]
=
t0
+
t1
+
tilted
[
x
-
tiltedstep
-
cn
];
buf
[
x
]
=
t0
;
}
if
(
sqsum
)
sqsum
++
;
}
}
}
}
#ifdef HAVE_OPENCL
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
int
sdepth
)
{
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
CV_INSTRUMENT_REGION
();
if
(
(
_src
.
type
()
!=
CV_8UC1
)
||
!
(
sdepth
==
CV_32S
||
sdepth
==
CV_32F
||
(
doubleSupport
&&
sdepth
==
CV_64F
)))
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D sumT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
sum
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
static
bool
ocl_integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
int
sdepth
,
int
sqdepth
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
type
()
!=
CV_8UC1
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
sqdepth
==
CV_64F
))
)
return
false
;
static
const
int
tileSize
=
16
;
String
build_opt
=
format
(
"-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
sqdepth
),
tileSize
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
kcols
(
"integral_sum_cols"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
kcols
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
src_size
=
src
.
size
();
Size
bufsize
(((
src_size
.
height
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
,
((
src_size
.
width
+
tileSize
-
1
)
/
tileSize
)
*
tileSize
);
UMat
buf
(
bufsize
,
sdepth
);
UMat
buf_sq
(
bufsize
,
sqdepth
);
kcols
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
buf_sq
));
size_t
gt
=
src
.
cols
,
lt
=
tileSize
;
if
(
!
kcols
.
run
(
1
,
&
gt
,
&
lt
,
false
))
return
false
;
ocl
::
Kernel
krows
(
"integral_sum_rows"
,
ocl
::
imgproc
::
integral_sum_oclsrc
,
build_opt
);
if
(
krows
.
empty
())
return
false
;
Size
sumsize
(
src_size
.
width
+
1
,
src_size
.
height
+
1
);
_sum
.
create
(
sumsize
,
sdepth
);
UMat
sum
=
_sum
.
getUMat
();
_sqsum
.
create
(
sumsize
,
sqdepth
);
UMat
sum_sq
=
_sqsum
.
getUMat
();
krows
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
buf_sq
),
ocl
::
KernelArg
::
WriteOnly
(
sum
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
sum_sq
));
gt
=
src
.
rows
;
return
krows
.
run
(
1
,
&
gt
,
&
lt
,
false
);
}
#endif
}
#if defined(HAVE_IPP)
namespace
cv
{
static
bool
ipp_integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CV_INSTRUMENT_REGION_IPP
();
IppiSize
size
=
{
width
,
height
};
if
(
cn
>
1
)
return
false
;
if
(
tilted
)
{
CV_UNUSED
(
tstep
);
return
false
;
}
if
(
!
sqsum
)
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_8UC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_8u32f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
,
0
)
>=
0
;
else
if
(
depth
==
CV_32FC1
&&
sdepth
==
CV_32F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiIntegral_32f_C1R
,
(
const
Ipp32f
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
size
)
>=
0
;
else
return
false
;
}
else
{
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_32S
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp32s
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32s64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32s
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32F
&&
sqdepth
==
CV_64F
)
return
CV_INSTRUMENT_FUN_IPP
(
ippiSqrIntegral_8u32f64f_C1R
,
(
const
Ipp8u
*
)
src
,
(
int
)
srcstep
,
(
Ipp32f
*
)
sum
,
(
int
)
sumstep
,
(
Ipp64f
*
)
sqsum
,
(
int
)
sqsumstep
,
size
,
0
,
0
)
>=
0
;
else
return
false
;
}
}
}
#endif
namespace
cv
{
namespace
hal
{
void
integral
(
int
depth
,
int
sdepth
,
int
sqdepth
,
const
uchar
*
src
,
size_t
srcstep
,
uchar
*
sum
,
size_t
sumstep
,
uchar
*
sqsum
,
size_t
sqsumstep
,
uchar
*
tilted
,
size_t
tstep
,
int
width
,
int
height
,
int
cn
)
{
CALL_HAL
(
integral
,
cv_hal_integral
,
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
);
CV_IPP_RUN_FAST
(
ipp_integral
(
depth
,
sdepth
,
sqdepth
,
src
,
srcstep
,
sum
,
sumstep
,
sqsum
,
sqsumstep
,
tilted
,
tstep
,
width
,
height
,
cn
));
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
#define ONE_CALL(T, ST, QT) \
return Integral_SIMD<T, ST, QT>()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
if
(
depth
==
CV_8U
&&
sdepth
==
CV_32S
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
uchar
,
int
,
double
);
ONE_CALL
(
uchar
,
int
,
double
);
...
@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth,
...
@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth,
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
else
if
(
depth
==
CV_64F
&&
sdepth
==
CV_64F
&&
sqdepth
==
CV_64F
)
ONE_CALL
(
double
,
double
,
double
);
ONE_CALL
(
double
,
double
,
double
);
else
else
CV_Error
(
CV_StsUnsupportedFormat
,
""
)
;
return
false
;
#undef ONE_CALL
#undef ONE_CALL
}
}
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
}}
// cv::hal::
}}
// cv::hal::
void
cv
::
integral
(
InputArray
_src
,
OutputArray
_sum
,
OutputArray
_sqsum
,
OutputArray
_tilted
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
sdepth
<=
0
)
sdepth
=
depth
==
CV_8U
?
CV_32S
:
CV_64F
;
if
(
sqdepth
<=
0
)
sqdepth
=
CV_64F
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
),
sqdepth
=
CV_MAT_DEPTH
(
sqdepth
);
CV_OCL_RUN
(
_sum
.
isUMat
()
&&
!
_tilted
.
needed
(),
(
_sqsum
.
needed
()
?
ocl_integral
(
_src
,
_sum
,
_sqsum
,
sdepth
,
sqdepth
)
:
ocl_integral
(
_src
,
_sum
,
sdepth
)));
Size
ssize
=
_src
.
size
(),
isize
(
ssize
.
width
+
1
,
ssize
.
height
+
1
);
_sum
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
Mat
src
=
_src
.
getMat
(),
sum
=
_sum
.
getMat
(),
sqsum
,
tilted
;
if
(
_sqsum
.
needed
()
)
{
_sqsum
.
create
(
isize
,
CV_MAKETYPE
(
sqdepth
,
cn
)
);
sqsum
=
_sqsum
.
getMat
();
};
if
(
_tilted
.
needed
()
)
{
_tilted
.
create
(
isize
,
CV_MAKETYPE
(
sdepth
,
cn
)
);
tilted
=
_tilted
.
getMat
();
}
hal
::
integral
(
depth
,
sdepth
,
sqdepth
,
src
.
ptr
(),
src
.
step
,
sum
.
ptr
(),
sum
.
step
,
sqsum
.
ptr
(),
sqsum
.
step
,
tilted
.
ptr
(),
tilted
.
step
,
src
.
cols
,
src
.
rows
,
cn
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
int
sdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
noArray
(),
noArray
(),
sdepth
);
}
void
cv
::
integral
(
InputArray
src
,
OutputArray
sum
,
OutputArray
sqsum
,
int
sdepth
,
int
sqdepth
)
{
CV_INSTRUMENT_REGION
();
integral
(
src
,
sum
,
sqsum
,
noArray
(),
sdepth
,
sqdepth
);
}
CV_IMPL
void
cvIntegral
(
const
CvArr
*
image
,
CvArr
*
sumImage
,
CvArr
*
sumSqImage
,
CvArr
*
tiltedSumImage
)
{
cv
::
Mat
src
=
cv
::
cvarrToMat
(
image
),
sum
=
cv
::
cvarrToMat
(
sumImage
),
sum0
=
sum
;
cv
::
Mat
sqsum0
,
sqsum
,
tilted0
,
tilted
;
cv
::
Mat
*
psqsum
=
0
,
*
ptilted
=
0
;
if
(
sumSqImage
)
{
sqsum0
=
sqsum
=
cv
::
cvarrToMat
(
sumSqImage
);
psqsum
=
&
sqsum
;
}
if
(
tiltedSumImage
)
{
tilted0
=
tilted
=
cv
::
cvarrToMat
(
tiltedSumImage
);
ptilted
=
&
tilted
;
}
cv
::
integral
(
src
,
sum
,
psqsum
?
cv
::
_OutputArray
(
*
psqsum
)
:
cv
::
_OutputArray
(),
ptilted
?
cv
::
_OutputArray
(
*
ptilted
)
:
cv
::
_OutputArray
(),
sum
.
depth
()
);
CV_Assert
(
sum
.
data
==
sum0
.
data
&&
sqsum
.
data
==
sqsum0
.
data
&&
tilted
.
data
==
tilted0
.
data
);
}
/* End of file. */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment