Commit 09b3383a authored by Alexander Alekhin's avatar Alexander Alekhin

imgproc: dispatch sumpixels (integral)

parent b4316af8
...@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2) ...@@ -9,5 +9,6 @@ ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(median_blur SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(morph SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
ocv_add_dispatched_file(undistort SSE2 AVX2) ocv_add_dispatched_file(undistort SSE2 AVX2)
ocv_define_module(imgproc opencv_core WRAP java python js) ocv_define_module(imgproc opencv_core WRAP java python js)
...@@ -2,14 +2,13 @@ ...@@ -2,14 +2,13 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory // It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html. // of this distribution and at http://opencv.org/license.html.
// //
// Copyright (C) 2019, Intel Corporation, all rights reserved. // Copyright (C) 2019-2020, Intel Corporation, all rights reserved.
#include "precomp.hpp"
#include "sumpixels.hpp"
#include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/hal/intrin.hpp"
namespace cv { namespace hal {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
namespace cv {
namespace { // Anonymous namespace to avoid exposing the implementation classes namespace { // Anonymous namespace to avoid exposing the implementation classes
// //
...@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c ...@@ -432,16 +431,14 @@ __m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, c
} // end of anonymous namespace } // end of anonymous namespace
namespace opt_AVX512_SKX { static
// This is the implementation for the external callers interface entry point.
// It should be the only function called into this file from outside
// Any new implementations should be directed from here
void calculate_integral_avx512(const uchar *src, size_t _srcstep, void calculate_integral_avx512(const uchar *src, size_t _srcstep,
double *sum, size_t _sumstep, double *sum, size_t _sumstep,
double *sqsum, size_t _sqsumstep, double *sqsum, size_t _sqsumstep,
int width, int height, int cn) int width, int height, int cn)
{ {
CV_INSTRUMENT_REGION();
switch(cn){ switch(cn){
case 1: { case 1: {
IntegralCalculator< 1 > calculator; IntegralCalculator< 1 > calculator;
...@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep, ...@@ -466,5 +463,5 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
} }
} // end namespace opt_AVX512_SXK CV_CPU_OPTIMIZATION_NAMESPACE_END
} // end namespace cv }} // end namespace cv::hal
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
// License Agreement // License Agreement
// For Open Source Computer Vision Library // For Open Source Computer Vision Library
// //
// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved. // Copyright (C) 2000-2020 Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved. // Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
...@@ -44,210 +44,157 @@ ...@@ -44,210 +44,157 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp" #include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace cv #include "sumpixels.simd.hpp"
{ #include "sumpixels.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace cv {
#ifdef HAVE_OPENCL
template <typename T, typename ST, typename QT> static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
struct Integral_SIMD
{ {
bool operator()(const T *, size_t, bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
ST *, size_t,
QT *, size_t, if ( (_src.type() != CV_8UC1) ||
ST *, size_t, !(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F)))
int, int, int) const
{
return false; return false;
}
};
static const int tileSize = 16;
template <> String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s",
struct Integral_SIMD<uchar, double, double> { ocl::typeToStr(sdepth), tileSize,
Integral_SIMD() {}; doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
bool operator()(const uchar *src, size_t _srcstep, UMat src = _src.getUMat();
double *sum, size_t _sumstep, Size src_size = src.size();
double *sqsum, size_t _sqsumstep, Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
double *tilted, size_t _tiltedstep, UMat buf(bufsize, sdepth);
int width, int height, int cn) const kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf));
{ size_t gt = src.cols, lt = tileSize;
#if CV_TRY_AVX512_SKX if (!kcols.run(1, &gt, &lt, false))
CV_UNUSED(_tiltedstep); return false;
// TODO: Add support for 1 channel input (WIP)
if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){ ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep, if (krows.empty())
sqsum, _sqsumstep, width, height, cn);
return true;
}
#else
// Avoid warnings in some builds
CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep);
CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep);
CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn);
#endif
return false; return false;
}
}; Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
#if CV_SIMD && CV_SIMD_WIDTH <= 64 krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
template <> static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
struct Integral_SIMD<uchar, int, double>
{ {
Integral_SIMD() {} bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
bool operator()(const uchar * src, size_t _srcstep, if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
int * sum, size_t _sumstep,
double * sqsum, size_t,
int * tilted, size_t,
int width, int height, int cn) const
{
if (sqsum || tilted || cn != 1)
return false; return false;
// the first iteration static const int tileSize = 16;
memset(sum, 0, (width + 1) * sizeof(int));
// the others String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
for (int i = 0; i < height; ++i) ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
{ tileSize,
const uchar * src_row = src + _srcstep * i; doubleSupport ? " -D DOUBLE_SUPPORT" : "");
int * prev_sum_row = (int *)((uchar *)sum + _sumstep * i) + 1;
int * sum_row = (int *)((uchar *)sum + _sumstep * (i + 1)) + 1;
sum_row[-1] = 0; ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
v_int32 prev = vx_setzero_s32(); UMat src = _src.getUMat();
int j = 0; Size src_size = src.size();
for ( ; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
{ UMat buf(bufsize, sdepth);
v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); UMat buf_sq(bufsize, sqdepth);
v_int32 el4l, el4h; kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
#if CV_AVX2 && CV_SIMD_WIDTH == 32 size_t gt = src.cols, lt = tileSize;
__m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2)); if (!kcols.run(1, &gt, &lt, false))
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4)); return false;
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8));
__m256i shmask = _mm256_set1_epi32(7);
el4l.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_low(vsum)), prev.val);
el4h.val = _mm256_add_epi32(_mm256_cvtepi16_epi32(_v256_extract_high(vsum)), _mm256_permutevar8x32_epi32(el4l.val, shmask));
prev.val = _mm256_permutevar8x32_epi32(el4h.val, shmask);
#else
el8 += v_rotate_left<1>(el8);
el8 += v_rotate_left<2>(el8);
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
#endif
#endif
v_expand(el8, el4l, el4h);
el4l += prev;
el4h += el4l;
prev = v_broadcast_element<v_int32::nlanes - 1>(el4h);
#endif
v_store(sum_row + j , el4l + vx_load(prev_sum_row + j ));
v_store(sum_row + j + v_int32::nlanes, el4h + vx_load(prev_sum_row + j + v_int32::nlanes));
}
for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; if (krows.empty())
} return false;
vx_cleanup();
return true; Size sumsize(src_size.width + 1, src_size.height + 1);
} _sum.create(sumsize, sdepth);
}; UMat sum = _sum.getUMat();
_sqsum.create(sumsize, sqdepth);
UMat sum_sq = _sqsum.getUMat();
template <> krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
struct Integral_SIMD<uchar, float, double> gt = src.rows;
{ return krows.run(1, &gt, &lt, false);
Integral_SIMD() {} }
bool operator()(const uchar * src, size_t _srcstep, #endif // HAVE_OPENCL
float * sum, size_t _sumstep,
double * sqsum, size_t,
float * tilted, size_t,
int width, int height, int cn) const
{
if (sqsum || tilted || cn != 1)
return false;
// the first iteration #ifdef HAVE_IPP
memset(sum, 0, (width + 1) * sizeof(int));
// the others static bool ipp_integral(
for (int i = 0; i < height; ++i) int depth, int sdepth, int sqdepth,
{ const uchar* src, size_t srcstep,
const uchar * src_row = src + _srcstep * i; uchar* sum, size_t sumstep,
float * prev_sum_row = (float *)((uchar *)sum + _sumstep * i) + 1; uchar* sqsum, size_t sqsumstep,
float * sum_row = (float *)((uchar *)sum + _sumstep * (i + 1)) + 1; uchar* tilted, size_t tstep,
int width, int height, int cn)
{
CV_INSTRUMENT_REGION_IPP();
sum_row[-1] = 0; IppiSize size = {width, height};
v_float32 prev = vx_setzero_f32(); if(cn > 1)
int j = 0; return false;
for (; j + v_uint16::nlanes <= width; j += v_uint16::nlanes) if(tilted)
{ {
v_int16 el8 = v_reinterpret_as_s16(vx_load_expand(src_row + j)); CV_UNUSED(tstep);
v_float32 el4l, el4h; return false;
#if CV_AVX2 && CV_SIMD_WIDTH == 32
__m256i vsum = _mm256_add_epi16(el8.val, _mm256_slli_si256(el8.val, 2));
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 4));
vsum = _mm256_add_epi16(vsum, _mm256_slli_si256(vsum, 8));
__m256i shmask = _mm256_set1_epi32(7);
el4l.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_low(vsum))), prev.val);
el4h.val = _mm256_add_ps(_mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(_v256_extract_high(vsum))), _mm256_permutevar8x32_ps(el4l.val, shmask));
prev.val = _mm256_permutevar8x32_ps(el4h.val, shmask);
#else
el8 += v_rotate_left<1>(el8);
el8 += v_rotate_left<2>(el8);
#if CV_SIMD_WIDTH >= 32
el8 += v_rotate_left<4>(el8);
#if CV_SIMD_WIDTH == 64
el8 += v_rotate_left<8>(el8);
#endif
#endif
v_int32 el4li, el4hi;
v_expand(el8, el4li, el4hi);
el4l = v_cvt_f32(el4li) + prev;
el4h = v_cvt_f32(el4hi) + el4l;
prev = v_broadcast_element<v_float32::nlanes - 1>(el4h);
#endif
v_store(sum_row + j , el4l + vx_load(prev_sum_row + j ));
v_store(sum_row + j + v_float32::nlanes, el4h + vx_load(prev_sum_row + j + v_float32::nlanes));
} }
for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) if(!sqsum)
sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; {
if(depth == CV_8U && sdepth == CV_32S)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0;
else if(depth == CV_8UC1 && sdepth == CV_32F)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0;
else if(depth == CV_32FC1 && sdepth == CV_32F)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0;
else
return false;
} }
vx_cleanup(); else
{
return true; if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else
return false;
} }
}; }
#endif // HAVE_IPP
#endif namespace hal {
template<typename T, typename ST, typename QT> template<typename T, typename ST, typename QT> static
void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep, QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep,
int width, int height, int cn ) int width, int height, int cn )
{ {
int x, y, k; int x, y, k;
if (Integral_SIMD<T, ST, QT>()(src, _srcstep,
sum, _sumstep,
sqsum, _sqsumstep,
tilted, _tiltedstep,
width, height, cn))
return;
int srcstep = (int)(_srcstep/sizeof(T)); int srcstep = (int)(_srcstep/sizeof(T));
int sumstep = (int)(_sumstep/sizeof(ST)); int sumstep = (int)(_sumstep/sizeof(ST));
int tiltedstep = (int)(_tiltedstep/sizeof(ST)); int tiltedstep = (int)(_tiltedstep/sizeof(ST));
...@@ -401,100 +348,7 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep, ...@@ -401,100 +348,7 @@ void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
} }
} }
static bool integral_SIMD(
#ifdef HAVE_OPENCL
static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( (_src.type() != CV_8UC1) ||
!(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F)))
return false;
static const int tileSize = 16;
String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s",
ocl::typeToStr(sdepth), tileSize,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
UMat src = _src.getUMat();
Size src_size = src.size();
Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
UMat buf(bufsize, sdepth);
kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf));
size_t gt = src.cols, lt = tileSize;
if (!kcols.run(1, &gt, &lt, false))
return false;
ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (krows.empty())
return false;
Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
return false;
static const int tileSize = 16;
String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
tileSize,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
UMat src = _src.getUMat();
Size src_size = src.size();
Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
UMat buf(bufsize, sdepth);
UMat buf_sq(bufsize, sqdepth);
kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
size_t gt = src.cols, lt = tileSize;
if (!kcols.run(1, &gt, &lt, false))
return false;
ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (krows.empty())
return false;
Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
_sqsum.create(sumsize, sqdepth);
UMat sum_sq = _sqsum.getUMat();
krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
#endif
}
#if defined(HAVE_IPP)
namespace cv
{
static bool ipp_integral(
int depth, int sdepth, int sqdepth, int depth, int sdepth, int sqdepth,
const uchar* src, size_t srcstep, const uchar* src, size_t srcstep,
uchar* sum, size_t sumstep, uchar* sum, size_t sumstep,
...@@ -502,56 +356,28 @@ static bool ipp_integral( ...@@ -502,56 +356,28 @@ static bool ipp_integral(
uchar* tilted, size_t tstep, uchar* tilted, size_t tstep,
int width, int height, int cn) int width, int height, int cn)
{ {
CV_INSTRUMENT_REGION_IPP(); CV_INSTRUMENT_REGION();
IppiSize size = {width, height};
if(cn > 1)
return false;
if(tilted)
{
CV_UNUSED(tstep);
return false;
}
if(!sqsum) CV_CPU_DISPATCH(integral_SIMD, (depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn),
{ CV_CPU_DISPATCH_MODES_ALL);
if(depth == CV_8U && sdepth == CV_32S)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0;
else if(depth == CV_8UC1 && sdepth == CV_32F)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0;
else if(depth == CV_32FC1 && sdepth == CV_32F)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0;
else
return false;
}
else
{
if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else
return false;
}
} }
}
#endif
namespace cv { namespace hal {
void integral(int depth, int sdepth, int sqdepth, void integral(
int depth, int sdepth, int sqdepth,
const uchar* src, size_t srcstep, const uchar* src, size_t srcstep,
uchar* sum, size_t sumstep, uchar* sum, size_t sumstep,
uchar* sqsum, size_t sqsumstep, uchar* sqsum, size_t sqsumstep,
uchar* tilted, size_t tstep, uchar* tilted, size_t tstep,
int width, int height, int cn) int width, int height, int cn)
{ {
CV_INSTRUMENT_REGION();
CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn); CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn);
CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn)); CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn));
if (integral_SIMD(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn))
return;
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) #define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn)
if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F )
...@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth, ...@@ -579,14 +405,14 @@ void integral(int depth, int sdepth, int sqdepth,
else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F )
ONE_CALL(double, double, double); ONE_CALL(double, double, double);
else else
CV_Error( CV_StsUnsupportedFormat, "" ); CV_Error(Error::StsUnsupportedFormat, "");
#undef ONE_CALL #undef ONE_CALL
} }
}} // cv::hal:: } // namespace hal
void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth ) void integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth )
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
...@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output ...@@ -624,20 +450,21 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
src.cols, src.rows, cn); src.cols, src.rows, cn);
} }
void cv::integral( InputArray src, OutputArray sum, int sdepth ) void integral( InputArray src, OutputArray sum, int sdepth )
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
integral( src, sum, noArray(), noArray(), sdepth ); integral( src, sum, noArray(), noArray(), sdepth );
} }
void cv::integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth ) void integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth )
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
integral( src, sum, sqsum, noArray(), sdepth, sqdepth ); integral( src, sum, sqsum, noArray(), sdepth, sqdepth );
} }
} // namespace
CV_IMPL void CV_IMPL void
cvIntegral( const CvArr* image, CvArr* sumImage, cvIntegral( const CvArr* image, CvArr* sumImage,
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
namespace cv
{
namespace opt_AVX512_SKX
{
#if CV_TRY_AVX512_SKX
void calculate_integral_avx512(
const uchar *src, size_t _srcstep,
double *sum, size_t _sumstep,
double *sqsum, size_t _sqsumstep,
int width, int height, int cn);
#endif
} // end namespace opt_AVX512_SKX
} // end namespace cv
#endif
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
// License Agreement // License Agreement
// For Open Source Computer Vision Library // For Open Source Computer Vision Library
// //
// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved. // Copyright (C) 2000-2020 Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved. // Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
...@@ -41,13 +41,26 @@ ...@@ -41,13 +41,26 @@
// //
//M*/ //M*/
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace cv #if CV_AVX512_SKX
{ #include "sumpixels.avx512_skx.hpp"
#endif
namespace cv { namespace hal {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
bool integral_SIMD(
int depth, int sdepth, int sqdepth,
const uchar* src, size_t srcstep,
uchar* sum, size_t sumstep,
uchar* sqsum, size_t sqsumstep,
uchar* tilted, size_t tstep,
int width, int height, int cn);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
namespace {
template <typename T, typename ST, typename QT> template <typename T, typename ST, typename QT>
struct Integral_SIMD struct Integral_SIMD
...@@ -62,7 +75,7 @@ struct Integral_SIMD ...@@ -62,7 +75,7 @@ struct Integral_SIMD
} }
}; };
#if CV_AVX512_SKX
template <> template <>
struct Integral_SIMD<uchar, double, double> { struct Integral_SIMD<uchar, double, double> {
Integral_SIMD() {}; Integral_SIMD() {};
...@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> { ...@@ -74,24 +87,19 @@ struct Integral_SIMD<uchar, double, double> {
double *tilted, size_t _tiltedstep, double *tilted, size_t _tiltedstep,
int width, int height, int cn) const int width, int height, int cn) const
{ {
#if CV_TRY_AVX512_SKX
CV_UNUSED(_tiltedstep); CV_UNUSED(_tiltedstep);
// TODO: Add support for 1 channel input (WIP) // TODO: Add support for 1 channel input (WIP)
if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){ if (!tilted && (cn <= 4))
opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep, {
calculate_integral_avx512(src, _srcstep, sum, _sumstep,
sqsum, _sqsumstep, width, height, cn); sqsum, _sqsumstep, width, height, cn);
return true; return true;
} }
#else
// Avoid warnings in some builds
CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep);
CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep);
CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn);
#endif
return false; return false;
} }
}; };
#endif
#if CV_SIMD && CV_SIMD_WIDTH <= 64 #if CV_SIMD && CV_SIMD_WIDTH <= 64
...@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double> ...@@ -157,8 +165,6 @@ struct Integral_SIMD<uchar, int, double>
for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) for (int v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j)
sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; sum_row[j] = (v += src_row[j]) + prev_sum_row[j];
} }
vx_cleanup();
return true; return true;
} }
}; };
...@@ -226,275 +232,15 @@ struct Integral_SIMD<uchar, float, double> ...@@ -226,275 +232,15 @@ struct Integral_SIMD<uchar, float, double>
for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j) for (float v = sum_row[j - 1] - prev_sum_row[j - 1]; j < width; ++j)
sum_row[j] = (v += src_row[j]) + prev_sum_row[j]; sum_row[j] = (v += src_row[j]) + prev_sum_row[j];
} }
vx_cleanup();
return true; return true;
} }
}; };
#endif #endif
template<typename T, typename ST, typename QT> } // namespace anon
void integral_( const T* src, size_t _srcstep, ST* sum, size_t _sumstep,
QT* sqsum, size_t _sqsumstep, ST* tilted, size_t _tiltedstep,
int width, int height, int cn )
{
int x, y, k;
if (Integral_SIMD<T, ST, QT>()(src, _srcstep,
sum, _sumstep,
sqsum, _sqsumstep,
tilted, _tiltedstep,
width, height, cn))
return;
int srcstep = (int)(_srcstep/sizeof(T));
int sumstep = (int)(_sumstep/sizeof(ST));
int tiltedstep = (int)(_tiltedstep/sizeof(ST));
int sqsumstep = (int)(_sqsumstep/sizeof(QT));
width *= cn;
memset( sum, 0, (width+cn)*sizeof(sum[0]));
sum += sumstep + cn;
if( sqsum )
{
memset( sqsum, 0, (width+cn)*sizeof(sqsum[0]));
sqsum += sqsumstep + cn;
}
if( tilted )
{
memset( tilted, 0, (width+cn)*sizeof(tilted[0]));
tilted += tiltedstep + cn;
}
if( sqsum == 0 && tilted == 0 )
{
for( y = 0; y < height; y++, src += srcstep - cn, sum += sumstep - cn )
{
for( k = 0; k < cn; k++, src++, sum++ )
{
ST s = sum[-cn] = 0;
for( x = 0; x < width; x += cn )
{
s += src[x];
sum[x] = sum[x - sumstep] + s;
}
}
}
}
else if( tilted == 0 )
{
for( y = 0; y < height; y++, src += srcstep - cn,
sum += sumstep - cn, sqsum += sqsumstep - cn )
{
for( k = 0; k < cn; k++, src++, sum++, sqsum++ )
{
ST s = sum[-cn] = 0;
QT sq = sqsum[-cn] = 0;
for( x = 0; x < width; x += cn )
{
T it = src[x];
s += it;
sq += (QT)it*it;
ST t = sum[x - sumstep] + s;
QT tq = sqsum[x - sqsumstep] + sq;
sum[x] = t;
sqsum[x] = tq;
}
}
}
}
else
{
AutoBuffer<ST> _buf(width+cn);
ST* buf = _buf.data();
ST s;
QT sq;
for( k = 0; k < cn; k++, src++, sum++, tilted++, buf++ )
{
sum[-cn] = tilted[-cn] = 0;
for( x = 0, s = 0, sq = 0; x < width; x += cn )
{
T it = src[x];
buf[x] = tilted[x] = it;
s += it;
sq += (QT)it*it;
sum[x] = s;
if( sqsum )
sqsum[x] = sq;
}
if( width == cn )
buf[cn] = 0;
if( sqsum )
{
sqsum[-cn] = 0;
sqsum++;
}
}
for( y = 1; y < height; y++ )
{
src += srcstep - cn;
sum += sumstep - cn;
tilted += tiltedstep - cn;
buf += -cn;
if( sqsum )
sqsum += sqsumstep - cn;
for( k = 0; k < cn; k++, src++, sum++, tilted++, buf++ )
{
T it = src[0];
ST t0 = s = it;
QT tq0 = sq = (QT)it*it;
sum[-cn] = 0;
if( sqsum )
sqsum[-cn] = 0;
tilted[-cn] = tilted[-tiltedstep];
sum[0] = sum[-sumstep] + t0;
if( sqsum )
sqsum[0] = sqsum[-sqsumstep] + tq0;
tilted[0] = tilted[-tiltedstep] + t0 + buf[cn];
for( x = cn; x < width - cn; x += cn )
{
ST t1 = buf[x];
buf[x - cn] = t1 + t0;
t0 = it = src[x];
tq0 = (QT)it*it;
s += t0;
sq += tq0;
sum[x] = sum[x - sumstep] + s;
if( sqsum )
sqsum[x] = sqsum[x - sqsumstep] + sq;
t1 += buf[x + cn] + t0 + tilted[x - tiltedstep - cn];
tilted[x] = t1;
}
if( width > cn )
{
ST t1 = buf[x];
buf[x - cn] = t1 + t0;
t0 = it = src[x];
tq0 = (QT)it*it;
s += t0;
sq += tq0;
sum[x] = sum[x - sumstep] + s;
if( sqsum )
sqsum[x] = sqsum[x - sqsumstep] + sq;
tilted[x] = t0 + t1 + tilted[x - tiltedstep - cn];
buf[x] = t0;
}
if( sqsum )
sqsum++;
}
}
}
}
#ifdef HAVE_OPENCL
static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( (_src.type() != CV_8UC1) ||
!(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F)))
return false;
static const int tileSize = 16;
String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s",
ocl::typeToStr(sdepth), tileSize,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
UMat src = _src.getUMat();
Size src_size = src.size();
Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
UMat buf(bufsize, sdepth);
kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf));
size_t gt = src.cols, lt = tileSize;
if (!kcols.run(1, &gt, &lt, false))
return false;
ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (krows.empty())
return false;
Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
return false;
static const int tileSize = 16;
String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
tileSize,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
UMat src = _src.getUMat();
Size src_size = src.size();
Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
UMat buf(bufsize, sdepth);
UMat buf_sq(bufsize, sqdepth);
kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
size_t gt = src.cols, lt = tileSize;
if (!kcols.run(1, &gt, &lt, false))
return false;
ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (krows.empty())
return false;
Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
_sqsum.create(sumsize, sqdepth);
UMat sum_sq = _sqsum.getUMat();
krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
#endif
}
#if defined(HAVE_IPP) bool integral_SIMD(
namespace cv
{
static bool ipp_integral(
int depth, int sdepth, int sqdepth, int depth, int sdepth, int sqdepth,
const uchar* src, size_t srcstep, const uchar* src, size_t srcstep,
uchar* sum, size_t sumstep, uchar* sum, size_t sumstep,
...@@ -502,57 +248,10 @@ static bool ipp_integral( ...@@ -502,57 +248,10 @@ static bool ipp_integral(
uchar* tilted, size_t tstep, uchar* tilted, size_t tstep,
int width, int height, int cn) int width, int height, int cn)
{ {
CV_INSTRUMENT_REGION_IPP(); CV_INSTRUMENT_REGION();
IppiSize size = {width, height};
if(cn > 1)
return false;
if(tilted)
{
CV_UNUSED(tstep);
return false;
}
if(!sqsum)
{
if(depth == CV_8U && sdepth == CV_32S)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, size, 0) >= 0;
else if(depth == CV_8UC1 && sdepth == CV_32F)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_8u32f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size, 0) >= 0;
else if(depth == CV_32FC1 && sdepth == CV_32F)
return CV_INSTRUMENT_FUN_IPP(ippiIntegral_32f_C1R, (const Ipp32f*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, size) >= 0;
else
return false;
}
else
{
if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_32S)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp32s*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else if(depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32s64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32s*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else if(depth == CV_8U && sdepth == CV_32F && sqdepth == CV_64F)
return CV_INSTRUMENT_FUN_IPP(ippiSqrIntegral_8u32f64f_C1R, (const Ipp8u*)src, (int)srcstep, (Ipp32f*)sum, (int)sumstep, (Ipp64f*)sqsum, (int)sqsumstep, size, 0, 0) >= 0;
else
return false;
}
}
}
#endif
namespace cv { namespace hal {
void integral(int depth, int sdepth, int sqdepth,
const uchar* src, size_t srcstep,
uchar* sum, size_t sumstep,
uchar* sqsum, size_t sqsumstep,
uchar* tilted, size_t tstep,
int width, int height, int cn)
{
CALL_HAL(integral, cv_hal_integral, depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn);
CV_IPP_RUN_FAST(ipp_integral(depth, sdepth, sqdepth, src, srcstep, sum, sumstep, sqsum, sqsumstep, tilted, tstep, width, height, cn));
#define ONE_CALL(A, B, C) integral_<A, B, C>((const A*)src, srcstep, (B*)sum, sumstep, (C*)sqsum, sqsumstep, (B*)tilted, tstep, width, height, cn) #define ONE_CALL(T, ST, QT) \
return Integral_SIMD<T, ST, QT>()((const T*)src, srcstep, (ST*)sum, sumstep, (QT*)sqsum, sqsumstep, (ST*)tilted, tstep, width, height, cn)
if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F ) if( depth == CV_8U && sdepth == CV_32S && sqdepth == CV_64F )
ONE_CALL(uchar, int, double); ONE_CALL(uchar, int, double);
...@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth, ...@@ -579,89 +278,11 @@ void integral(int depth, int sdepth, int sqdepth,
else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F ) else if( depth == CV_64F && sdepth == CV_64F && sqdepth == CV_64F )
ONE_CALL(double, double, double); ONE_CALL(double, double, double);
else else
CV_Error( CV_StsUnsupportedFormat, "" ); return false;
#undef ONE_CALL #undef ONE_CALL
} }
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
}} // cv::hal:: }} // cv::hal::
void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth )
{
CV_INSTRUMENT_REGION();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if( sdepth <= 0 )
sdepth = depth == CV_8U ? CV_32S : CV_64F;
if ( sqdepth <= 0 )
sqdepth = CV_64F;
sdepth = CV_MAT_DEPTH(sdepth), sqdepth = CV_MAT_DEPTH(sqdepth);
CV_OCL_RUN(_sum.isUMat() && !_tilted.needed(),
(_sqsum.needed() ? ocl_integral(_src, _sum, _sqsum, sdepth, sqdepth) : ocl_integral(_src, _sum, sdepth)));
Size ssize = _src.size(), isize(ssize.width + 1, ssize.height + 1);
_sum.create( isize, CV_MAKETYPE(sdepth, cn) );
Mat src = _src.getMat(), sum =_sum.getMat(), sqsum, tilted;
if( _sqsum.needed() )
{
_sqsum.create( isize, CV_MAKETYPE(sqdepth, cn) );
sqsum = _sqsum.getMat();
};
if( _tilted.needed() )
{
_tilted.create( isize, CV_MAKETYPE(sdepth, cn) );
tilted = _tilted.getMat();
}
hal::integral(depth, sdepth, sqdepth,
src.ptr(), src.step,
sum.ptr(), sum.step,
sqsum.ptr(), sqsum.step,
tilted.ptr(), tilted.step,
src.cols, src.rows, cn);
}
void cv::integral( InputArray src, OutputArray sum, int sdepth )
{
CV_INSTRUMENT_REGION();
integral( src, sum, noArray(), noArray(), sdepth );
}
void cv::integral( InputArray src, OutputArray sum, OutputArray sqsum, int sdepth, int sqdepth )
{
CV_INSTRUMENT_REGION();
integral( src, sum, sqsum, noArray(), sdepth, sqdepth );
}
CV_IMPL void
cvIntegral( const CvArr* image, CvArr* sumImage,
CvArr* sumSqImage, CvArr* tiltedSumImage )
{
cv::Mat src = cv::cvarrToMat(image), sum = cv::cvarrToMat(sumImage), sum0 = sum;
cv::Mat sqsum0, sqsum, tilted0, tilted;
cv::Mat *psqsum = 0, *ptilted = 0;
if( sumSqImage )
{
sqsum0 = sqsum = cv::cvarrToMat(sumSqImage);
psqsum = &sqsum;
}
if( tiltedSumImage )
{
tilted0 = tilted = cv::cvarrToMat(tiltedSumImage);
ptilted = &tilted;
}
cv::integral( src, sum, psqsum ? cv::_OutputArray(*psqsum) : cv::_OutputArray(),
ptilted ? cv::_OutputArray(*ptilted) : cv::_OutputArray(), sum.depth() );
CV_Assert( sum.data == sum0.data && sqsum.data == sqsum0.data && tilted.data == tilted0.data );
}
/* End of file. */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment