Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1db5d82b
Commit
1db5d82b
authored
Feb 20, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #13844 from brad-kelly:integral_avx512_cn234
parents
3064e40d
507f8add
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
230 additions
and
96 deletions
+230
-96
perf_integral.cpp
modules/imgproc/perf/perf_integral.cpp
+7
-7
sumpixels.avx512_skx.cpp
modules/imgproc/src/sumpixels.avx512_skx.cpp
+220
-84
sumpixels.cpp
modules/imgproc/src/sumpixels.cpp
+2
-3
test_filter.cpp
modules/imgproc/test/test_filter.cpp
+1
-2
No files found.
modules/imgproc/perf/perf_integral.cpp
View file @
1db5d82b
...
...
@@ -11,7 +11,7 @@ typedef perf::TestBaseWithParam<Size_MatType_OutMatDepth_t> Size_MatType_OutMatD
PERF_TEST_P
(
Size_MatType_OutMatDepth
,
integral
,
testing
::
Combine
(
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_8UC3
,
CV_8UC4
),
testing
::
Values
(
CV_8UC1
,
CV_8UC
2
,
CV_8UC
3
,
CV_8UC4
),
testing
::
Values
(
CV_32S
,
CV_32F
,
CV_64F
)
)
)
...
...
@@ -32,9 +32,9 @@ PERF_TEST_P(Size_MatType_OutMatDepth, integral,
PERF_TEST_P
(
Size_MatType_OutMatDepth
,
integral_sqsum
,
testing
::
Combine
(
testing
::
Values
(
::
perf
::
szVGA
,
::
perf
::
sz1080p
),
testing
::
Values
(
CV_8UC1
,
CV_8UC4
),
testing
::
Values
(
CV_32S
,
CV_32F
)
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_8UC
2
,
CV_8UC3
,
CV_8UC
4
),
testing
::
Values
(
CV_32S
,
CV_32F
,
CV_64F
)
)
)
{
...
...
@@ -57,9 +57,9 @@ PERF_TEST_P(Size_MatType_OutMatDepth, integral_sqsum,
PERF_TEST_P
(
Size_MatType_OutMatDepth
,
integral_sqsum_tilted
,
testing
::
Combine
(
testing
::
Values
(
::
perf
::
szVGA
,
::
perf
::
szODD
,
::
perf
::
sz1080p
),
testing
::
Values
(
CV_8UC1
,
CV_8UC4
),
testing
::
Values
(
CV_32S
,
CV_32F
)
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_8UC
2
,
CV_8UC3
,
CV_8UC
4
),
testing
::
Values
(
CV_32S
,
CV_32F
,
CV_64F
)
)
)
{
...
...
modules/imgproc/src/sumpixels.avx512_skx.cpp
View file @
1db5d82b
...
...
@@ -13,33 +13,34 @@ namespace { // Anonymous namespace to avoid exposing the implementation classes
// NOTE: Look at the bottom of the file for the entry-point function for external callers
//
// At the moment only 3 channel support untilted is supported
// More channel support coming soon.
// TODO: Add support for sqsum and 1,2, and 4 channels
class
IntegralCalculator_3Channel
{
// TODO: Add support for 1 channel input (WIP: currently hitting hardware glassjaw)
template
<
size_t
num_channels
>
class
IntegralCalculator
;
template
<
size_t
num_channels
>
class
IntegralCalculator
{
public
:
IntegralCalculator
_3Channel
()
{};
IntegralCalculator
()
{};
void
calculate_integral_avx512
(
const
uchar
*
src
,
size_t
_srcstep
,
double
*
sum
,
size_t
_sumstep
,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
)
{
const
int
srcstep
=
(
int
)(
_srcstep
/
sizeof
(
uchar
));
const
int
sumstep
=
(
int
)(
_sumstep
/
sizeof
(
double
));
const
int
sqsumstep
=
(
int
)(
_sqsumstep
/
sizeof
(
double
));
const
int
ops_per_line
=
width
*
cn
;
const
int
ops_per_line
=
width
*
num_channels
;
// Clear the first line of the sum as per spec (see integral documentation)
// Also adjust the index of sum and sqsum to be at the real 0th element
// and not point to the border pixel so it stays in sync with the src pointer
memset
(
sum
,
0
,
(
ops_per_line
+
cn
)
*
sizeof
(
double
));
sum
+=
cn
;
memset
(
sum
,
0
,
(
ops_per_line
+
num_channels
)
*
sizeof
(
double
));
sum
+=
num_channels
;
if
(
sqsum
)
{
memset
(
sqsum
,
0
,
(
ops_per_line
+
cn
)
*
sizeof
(
double
));
sqsum
+=
cn
;
memset
(
sqsum
,
0
,
(
ops_per_line
+
num_channels
)
*
sizeof
(
double
));
sqsum
+=
num_channels
;
}
// Now calculate the integral over the whole image one line at a time
...
...
@@ -50,22 +51,22 @@ public:
double
*
sqsum_above
=
(
sqsum
)
?
&
sqsum
[
y
*
sqsumstep
]
:
NULL
;
double
*
sqsum_line
=
(
sqsum
)
?
&
sqsum_above
[
sqsumstep
]
:
NULL
;
integral_line_3channel_avx512
(
src_line
,
sum_line
,
sum_above
,
sqsum_line
,
sqsum_above
,
ops_per_line
);
calculate_integral_for_line
(
src_line
,
sum_line
,
sum_above
,
sqsum_line
,
sqsum_above
,
ops_per_line
);
}
}
static
inline
void
integral_line_3channel_avx512
(
const
uchar
*
srcs
,
double
*
sums
,
double
*
sums_above
,
double
*
sqsums
,
double
*
sqsums_above
,
int
num_ops_in_line
)
static
CV_ALWAYS_INLINE
void
calculate_integral_for_line
(
const
uchar
*
srcs
,
double
*
sums
,
double
*
sums_above
,
double
*
sqsums
,
double
*
sqsums_above
,
int
num_ops_in_line
)
{
__m512i
sum_accumulator
=
_mm512_setzero_si512
();
// holds rolling sums for the line
__m512i
sqsum_accumulator
=
_mm512_setzero_si512
();
// holds rolling sqsums for the line
// The first element on each line must be zeroes as per spec (see integral documentation)
set_border_pixel_value
(
sums
,
sqsums
);
zero_out_border_pixel
(
sums
,
sqsums
);
// Do all 64 byte chunk operations then do the last bits that don't fit in a 64 byte chunk
aligned_integral
(
srcs
,
sums
,
sums_above
,
sqsums
,
sqsums_above
,
sum_accumulator
,
sqsum_accumulator
,
num_ops_in_line
);
...
...
@@ -74,20 +75,18 @@ public:
}
static
inline
void
set_border_pixel_value
(
double
*
sums
,
double
*
sqsums
)
static
CV_ALWAYS_INLINE
void
zero_out_border_pixel
(
double
*
sums
,
double
*
sqsums
)
{
// Sets the border pixel value to 0s.
// Note the hard coded -3 and the 0x7 mask is because we only support 3 channel right now
__m512i
zeroes
=
_mm512_setzero_si512
();
_mm512_mask_storeu_epi64
(
&
sums
[
-
3
],
0x7
,
zeroes
);
// Note the negative index is because the sums/sqsums pointers point to the first real pixel
// after the border pixel so we have to look backwards
_mm512_mask_storeu_epi64
(
&
sums
[
-
num_channels
],
(
1
<<
num_channels
)
-
1
,
_mm512_setzero_si512
());
if
(
sqsums
)
_mm512_mask_storeu_epi64
(
&
sqsums
[
-
3
],
0x7
,
zeroes
);
_mm512_mask_storeu_epi64
(
&
sqsums
[
-
num_channels
],
(
1
<<
num_channels
)
-
1
,
_mm512_setzero_si512
()
);
}
static
inline
static
CV_ALWAYS_INLINE
void
aligned_integral
(
const
uchar
*&
srcs
,
double
*&
sums
,
double
*&
sums_above
,
double
*&
sqsum
,
double
*&
sqsum_above
,
...
...
@@ -110,7 +109,7 @@ public:
}
static
inline
static
CV_ALWAYS_INLINE
void
post_aligned_integral
(
const
uchar
*
srcs
,
const
double
*
sums
,
const
double
*
sums_above
,
const
double
*
sqsum
,
const
double
*
sqsum_above
,
...
...
@@ -132,50 +131,54 @@ public:
}
static
inline
static
CV_ALWAYS_INLINE
void
integral_64_operations_avx512
(
const
__m512i
*
srcs
,
__m512i
*
sums
,
const
__m512i
*
sums_above
,
__m512i
*
sqsums
,
const
__m512i
*
sqsums_above
,
__mmask64
data_mask
,
__m512i
&
sum_accumulator
,
__m512i
&
sqsum_accumulator
)
{
__m512i
src_64byte_chunk
=
read_64_bytes
(
srcs
,
data_mask
);
for
(
int
num_16byte_chunks
=
0
;
num_16byte_chunks
<
4
;
num_16byte_chunks
++
)
{
__m128i
src_16bytes
=
_mm512_extracti64x2_epi64
(
src_64byte_chunk
,
0x0
);
// Get lower 16 bytes of data
__m512i
src_64byte_chunk
=
read_64_bytes
(
srcs
,
data_mask
);
for
(
int
num_8byte_chunks
=
0
;
num_8byte_chunks
<
2
;
num_8byte_chunks
++
)
{
while
(
data_mask
)
{
__m128i
src_16bytes
=
extract_lower_16bytes
(
src_64byte_chunk
);
__m512i
src_longs
=
convert_lower_8bytes_to_longs
(
src_16bytes
);
__m512i
src_longs_lo
=
convert_lower_8bytes_to_longs
(
src_16bytes
);
__m512i
src_longs_hi
=
convert_lower_8bytes_to_longs
(
shift_right_8_bytes
(
src_16bytes
));
// Calculate integral for the sum on the 8 entries
integral_8_operations
(
src_longs
,
sums_above
,
data_mask
,
sums
,
sum_accumulator
);
sums
++
;
sums_above
++
;
// Calculate integral for the sum on the 8 lanes at a time
integral_8_operations
(
src_longs_lo
,
sums_above
,
data_mask
,
sums
,
sum_accumulator
);
integral_8_operations
(
src_longs_hi
,
sums_above
+
1
,
data_mask
>>
8
,
sums
+
1
,
sum_accumulator
)
;
if
(
sqsums
){
// Calculate integral for the sum on the 8 entries
__m512i
squared_source
=
_mm512_mullo_epi64
(
src_longs
,
src_longs
);
integral_8_operations
(
squared_source
,
sqsums_above
,
data_mask
,
sqsums
,
sqsum_accumulator
);
sqsums
++
;
sqsums_above
++
;
}
// Prepare for next iteration of inner loop
// shift source to align next 8 bytes to lane 0 and shift the mask
src_16bytes
=
shift_right_8_bytes
(
src_16bytes
);
data_mask
=
data_mask
>>
8
;
if
(
sqsums
)
{
__m512i
squared_source_lo
=
square_m512
(
src_longs_lo
);
__m512i
squared_source_hi
=
square_m512
(
src_longs_hi
);
integral_8_operations
(
squared_source_lo
,
sqsums_above
,
data_mask
,
sqsums
,
sqsum_accumulator
);
integral_8_operations
(
squared_source_hi
,
sqsums_above
+
1
,
data_mask
>>
8
,
sqsums
+
1
,
sqsum_accumulator
);
sqsums
+=
2
;
sqsums_above
+=
2
;
}
// Prepare for next iteration of outer loop
// Prepare for next iteration of loop
// shift source to align next 16 bytes to lane 0, shift the mask, and advance the pointers
sums
+=
2
;
sums_above
+=
2
;
data_mask
=
data_mask
>>
16
;
src_64byte_chunk
=
shift_right_16_bytes
(
src_64byte_chunk
);
}
}
static
inline
static
CV_ALWAYS_INLINE
void
integral_8_operations
(
const
__m512i
src_longs
,
const
__m512i
*
above_values_ptr
,
__mmask64
data_mask
,
__m512i
*
results_ptr
,
__m512i
&
accumulator
)
{
// NOTE that the calculate_integral function referenced here must be implemented in the templated
// derivatives because the algorithm depends heavily on the number of channels in the image
//
_mm512_mask_storeu_pd
(
results_ptr
,
// Store the result here
data_mask
,
// Using the data mask to avoid overrunning the line
...
...
@@ -188,59 +191,178 @@ public:
}
static
inline
__m512d
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
6
,
5
,
7
,
6
,
5
,
7
,
6
,
5
);
// Align data to prepare for the adds:
// shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
// Vertical LANES: 76543210
// src_longs : HGFEDCBA
// shited3lanes : + EDCBA
// shifted6lanes : + BA
// carry_over_idxs : + 65765765 (index position of result from previous iteration)
// = integral
__m512i
shifted3lanes
=
_mm512_maskz_expand_epi64
(
0xF8
,
src_longs
);
__m512i
shifted6lanes
=
_mm512_maskz_expand_epi64
(
0xC0
,
src_longs
);
__m512i
carry_over
=
_mm512_permutex2var_epi64
(
accumulator
,
carryover_idxs
,
accumulator
);
// Do the adds in tree form (shift3 + shift 6) + (current_source_values + accumulator)
__m512i
sum_shift3and6
=
_mm512_add_epi64
(
shifted3lanes
,
shifted6lanes
);
__m512i
sum_src_carry
=
_mm512_add_epi64
(
src_longs
,
carry_over
);
accumulator
=
_mm512_add_epi64
(
sum_shift3and6
,
sum_src_carry
);
// Convert to packed double and add to the line above to get the true integral value
__m512d
accumulator_pd
=
_mm512_cvtepu64_pd
(
accumulator
);
__m512d
integral_pd
=
_mm512_add_pd
(
accumulator_pd
,
above_values
);
return
integral_pd
;
}
// The calculate_integral function referenced here must be implemented in the templated derivatives
// because the algorithm depends heavily on the number of channels in the image
// This is the incomplete definition (just the prototype) here.
//
static
CV_ALWAYS_INLINE
__m512d
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
);
static
inline
static
CV_ALWAYS_INLINE
__m512i
read_64_bytes
(
const
__m512i
*
srcs
,
__mmask64
data_mask
)
{
return
_mm512_maskz_loadu_epi8
(
data_mask
,
srcs
);
}
static
inline
static
CV_ALWAYS_INLINE
__m128i
extract_lower_16bytes
(
__m512i
src_64byte_chunk
)
{
return
_mm512_extracti64x2_epi64
(
src_64byte_chunk
,
0x0
);
}
static
CV_ALWAYS_INLINE
__m512i
convert_lower_8bytes_to_longs
(
__m128i
src_16bytes
)
{
return
_mm512_cvtepu8_epi64
(
src_16bytes
);
}
static
inline
static
CV_ALWAYS_INLINE
__m512i
square_m512
(
__m512i
src_longs
)
{
return
_mm512_mullo_epi64
(
src_longs
,
src_longs
);
}
static
CV_ALWAYS_INLINE
__m128i
shift_right_8_bytes
(
__m128i
src_16bytes
)
{
return
_mm_maskz_compress_epi64
(
2
,
src_16bytes
);
}
static
inline
static
CV_ALWAYS_INLINE
__m512i
shift_right_16_bytes
(
__m512i
src_64byte_chunk
)
{
return
_mm512_maskz_compress_epi64
(
0xFC
,
src_64byte_chunk
);
}
};
//============================================================================================================
// This the only section that needs to change with respect to algorithm based on the number of channels
// It is responsible for returning the calculation of 8 lanes worth of the integral and returning in the
// accumulated sums in the accumulator parameter (NOTE: accumulator is an input and output parameter)
//
// The function prototype that needs to be implemented is:
//
// __m512d calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator){ ... }
//
// Description of parameters:
// INPUTS:
// src_longs : 8 lanes worth of the source bytes converted to 64 bit integers
// above_values: 8 lanes worth of the result values from the line above (See the integral spec)
// accumulator : 8 lanes worth of sums from the previous iteration
// IMPORTANT NOTE: This parameter is both an INPUT AND OUTPUT parameter to this function
//
// OUTPUTS:
// return value: The actual integral value for all 8 lanes which is defined by the spec as
// the sum of all channel values to the left of a given pixel plus the result
// written to the line directly above the current line.
// accumulator: This is an input and and output. This parameter should be left with the accumulated
// sums for the current 8 lanes keeping all entries in the proper lane (do not shuffle it)
//
// Below here is the channel specific implementation
//
//========================================
// 2 Channel Integral Implementation
//========================================
template
<>
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
2
>
::
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
7
,
6
,
7
,
6
,
7
,
6
,
7
,
6
);
// Align data to prepare for the adds:
// shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
// Vertical LANES : 76543210
// src_longs : HGFEDCBA
// shift2lanes : + FEDCBA
// shift4lanes : + DCBA
// shift6lanes : + BA
// carry_over_idxs : + 76767676 (index position of result from previous iteration)
// = integral
__m512i
shift2lanes
=
_mm512_maskz_expand_epi64
(
0xFC
,
src_longs
);
__m512i
shift4lanes
=
_mm512_maskz_expand_epi64
(
0xF0
,
src_longs
);
__m512i
shift6lanes
=
_mm512_maskz_expand_epi64
(
0xC0
,
src_longs
);
__m512i
carry_over
=
_mm512_permutex2var_epi64
(
accumulator
,
carryover_idxs
,
accumulator
);
// Add all values in tree form for perf ((0+2) + (4+6))
__m512i
sum_shift_02
=
_mm512_add_epi64
(
src_longs
,
shift2lanes
);
__m512i
sum_shift_46
=
_mm512_add_epi64
(
shift4lanes
,
shift6lanes
);
__m512i
sum_all
=
_mm512_add_epi64
(
sum_shift_02
,
sum_shift_46
);
accumulator
=
_mm512_add_epi64
(
sum_all
,
carry_over
);
// Convert to packed double and add to the line above to get the true integral value
__m512d
accumulator_pd
=
_mm512_cvtepu64_pd
(
accumulator
);
__m512d
integral_pd
=
_mm512_add_pd
(
accumulator_pd
,
above_values
);
return
integral_pd
;
}
//========================================
// 3 Channel Integral Implementation
//========================================
template
<>
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
3
>
::
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
6
,
5
,
7
,
6
,
5
,
7
,
6
,
5
);
// Align data to prepare for the adds:
// shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
// Vertical LANES: 76543210
// src_longs : HGFEDCBA
// shit3lanes : + EDCBA
// shift6lanes : + BA
// carry_over_idxs : + 65765765 (index position of result from previous iteration)
// = integral
__m512i
shift3lanes
=
_mm512_maskz_expand_epi64
(
0xF8
,
src_longs
);
__m512i
shift6lanes
=
_mm512_maskz_expand_epi64
(
0xC0
,
src_longs
);
__m512i
carry_over
=
_mm512_permutex2var_epi64
(
accumulator
,
carryover_idxs
,
accumulator
);
// Do the adds in tree form
__m512i
sum_shift_03
=
_mm512_add_epi64
(
src_longs
,
shift3lanes
);
__m512i
sum_rest
=
_mm512_add_epi64
(
shift6lanes
,
carry_over
);
accumulator
=
_mm512_add_epi64
(
sum_shift_03
,
sum_rest
);
// Convert to packed double and add to the line above to get the true integral value
__m512d
accumulator_pd
=
_mm512_cvtepu64_pd
(
accumulator
);
__m512d
integral_pd
=
_mm512_add_pd
(
accumulator_pd
,
above_values
);
return
integral_pd
;
}
//========================================
// 4 Channel Integral Implementation
//========================================
template
<>
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
4
>
::
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
7
,
6
,
5
,
4
,
7
,
6
,
5
,
4
);
// Align data to prepare for the adds:
// shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
// Vertical LANES: 76543210
// src_longs : HGFEDCBA
// shit4lanes : + DCBA
// carry_over_idxs : + 76547654 (index position of result from previous iteration)
// = integral
__m512i
shifted4lanes
=
_mm512_maskz_expand_epi64
(
0xF0
,
src_longs
);
__m512i
carry_over
=
_mm512_permutex2var_epi64
(
accumulator
,
carryover_idxs
,
accumulator
);
// Add data pixels and carry over from last iteration
__m512i
sum_shift_04
=
_mm512_add_epi64
(
src_longs
,
shifted4lanes
);
accumulator
=
_mm512_add_epi64
(
sum_shift_04
,
carry_over
);
// Convert to packed double and add to the line above to get the true integral value
__m512d
accumulator_pd
=
_mm512_cvtepu64_pd
(
accumulator
);
__m512d
integral_pd
=
_mm512_add_pd
(
accumulator_pd
,
above_values
);
return
integral_pd
;
}
}
// end of anonymous namespace
namespace
opt_AVX512_SKX
{
...
...
@@ -253,8 +375,22 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
double
*
sqsum
,
size_t
_sqsumstep
,
int
width
,
int
height
,
int
cn
)
{
IntegralCalculator_3Channel
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
switch
(
cn
){
case
2
:
{
IntegralCalculator
<
2
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
break
;
}
case
3
:
{
IntegralCalculator
<
3
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
break
;
}
case
4
:
{
IntegralCalculator
<
4
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
}
}
}
...
...
modules/imgproc/src/sumpixels.cpp
View file @
1db5d82b
...
...
@@ -46,7 +46,6 @@
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace
cv
{
...
...
@@ -77,8 +76,8 @@ struct Integral_SIMD<uchar, double, double> {
{
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
// TODO:
Add support for 1,2, and 4 channels
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
cn
==
3
){
// TODO:
Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
((
cn
>=
2
)
&&
(
cn
<=
4
))
){
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
...
...
modules/imgproc/test/test_filter.cpp
View file @
1db5d82b
...
...
@@ -1667,12 +1667,11 @@ void CV_IntegralTest::get_test_array_types_and_sizes( int test_case_idx,
{
RNG
&
rng
=
ts
->
get_rng
();
int
depth
=
cvtest
::
randInt
(
rng
)
%
2
,
sum_depth
;
int
cn
=
cvtest
::
randInt
(
rng
)
%
3
+
1
;
int
cn
=
cvtest
::
randInt
(
rng
)
%
4
+
1
;
cvtest
::
ArrayTest
::
get_test_array_types_and_sizes
(
test_case_idx
,
sizes
,
types
);
Size
sum_size
;
depth
=
depth
==
0
?
CV_8U
:
CV_32F
;
cn
+=
cn
==
2
;
int
b
=
(
cvtest
::
randInt
(
rng
)
&
1
)
!=
0
;
sum_depth
=
depth
==
CV_8U
&&
b
?
CV_32S
:
b
?
CV_32F
:
CV_64F
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment