Commit 8965f3ae authored by Sayed Adel's avatar Sayed Adel

imgproc:simd Enable VSX and wide universal intrinsics for accumulate operations

  - improve cpu dispatching calls to allow more SIMD extentions
    (SSE4.1, AVX2, VSX)
  - wide universal intrinsics
  - replace dummy v_expand with v_expand_low
  - replace v_expand + v_mul_wrap with v_mul_expand for product accumulate operations
  - use FMA for accumulate operations
  - add mask and more types to accumulate's performance tests
parent 5771fd69
set(the_description "Image Processing") set(the_description "Image Processing")
ocv_add_dispatched_file(accum SSE2 AVX NEON) ocv_add_dispatched_file(accum SSE4_1 AVX AVX2)
ocv_define_module(imgproc opencv_core WRAP java python js) ocv_define_module(imgproc opencv_core WRAP java python js)
...@@ -5,94 +5,102 @@ ...@@ -5,94 +5,102 @@
namespace opencv_test { namespace opencv_test {
#ifdef HAVE_OPENVX typedef Size_MatType Accumulate;
PERF_TEST_P(Size_MatType, Accumulate,
testing::Combine( #define MAT_TYPES_ACCUMLATE CV_8UC1, CV_16UC1, CV_32FC1
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p), #define MAT_TYPES_ACCUMLATE_C MAT_TYPES_ACCUMLATE, CV_8UC3, CV_16UC3, CV_32FC3
testing::Values(CV_16SC1, CV_32FC1) #define MAT_TYPES_ACCUMLATE_D MAT_TYPES_ACCUMLATE, CV_64FC1
) #define MAT_TYPES_ACCUMLATE_D_C MAT_TYPES_ACCUMLATE_C, CV_64FC1, CV_64FC1
)
#else #define PERF_ACCUMULATE_INIT(_FLTC) \
PERF_TEST_P( Size_MatType, Accumulate, const Size srcSize = get<0>(GetParam()); \
testing::Combine( const int srcType = get<1>(GetParam()); \
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p), const int dstType = _FLTC(CV_MAT_CN(srcType)); \
testing::Values(CV_32FC1) Mat src1(srcSize, srcType), dst(srcSize, dstType); \
) declare.in(src1, dst, WARMUP_RNG).out(dst);
)
#endif #define PERF_ACCUMULATE_MASK_INIT(_FLTC) \
{ PERF_ACCUMULATE_INIT(_FLTC) \
Size sz = get<0>(GetParam()); Mat mask(srcSize, CV_8UC1); \
int dstType = get<1>(GetParam()); declare.in(mask, WARMUP_RNG);
Mat src(sz, CV_8UC1); #define PERF_TEST_P_ACCUMULATE(_NAME, _TYPES, _INIT, _FUN) \
Mat dst(sz, dstType); PERF_TEST_P(Accumulate, _NAME, \
testing::Combine( \
declare.time(100); testing::Values(sz1080p, sz720p, szVGA, szQVGA, szODD), \
declare.in(src, WARMUP_RNG).out(dst); testing::Values(_TYPES) \
) \
TEST_CYCLE() accumulate(src, dst); ) \
{ \
SANITY_CHECK_NOTHING(); _INIT \
} TEST_CYCLE() _FUN; \
SANITY_CHECK_NOTHING(); \
#ifdef HAVE_OPENVX }
PERF_TEST_P(Size_MatType, AccumulateSquare,
testing::Combine( /////////////////////////////////// Accumulate ///////////////////////////////////
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p),
testing::Values(CV_16SC1, CV_32FC1) PERF_TEST_P_ACCUMULATE(Accumulate, MAT_TYPES_ACCUMLATE,
) PERF_ACCUMULATE_INIT(CV_32FC), accumulate(src1, dst))
)
#else PERF_TEST_P_ACCUMULATE(AccumulateMask, MAT_TYPES_ACCUMLATE_C,
PERF_TEST_P( Size_MatType, AccumulateSquare, PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulate(src1, dst, mask))
testing::Combine(
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p), PERF_TEST_P_ACCUMULATE(AccumulateDouble, MAT_TYPES_ACCUMLATE_D,
testing::Values(CV_32FC1) PERF_ACCUMULATE_INIT(CV_64FC), accumulate(src1, dst))
)
) PERF_TEST_P_ACCUMULATE(AccumulateDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
#endif PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulate(src1, dst, mask))
{
Size sz = get<0>(GetParam()); ///////////////////////////// AccumulateSquare ///////////////////////////////////
int dstType = get<1>(GetParam());
PERF_TEST_P_ACCUMULATE(Square, MAT_TYPES_ACCUMLATE,
Mat src(sz, CV_8UC1); PERF_ACCUMULATE_INIT(CV_32FC), accumulateSquare(src1, dst))
Mat dst(sz, dstType);
PERF_TEST_P_ACCUMULATE(SquareMask, MAT_TYPES_ACCUMLATE_C,
declare.time(100); PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulateSquare(src1, dst, mask))
declare.in(src, WARMUP_RNG).out(dst);
PERF_TEST_P_ACCUMULATE(SquareDouble, MAT_TYPES_ACCUMLATE_D,
TEST_CYCLE() accumulateSquare(src, dst); PERF_ACCUMULATE_INIT(CV_64FC), accumulateSquare(src1, dst))
SANITY_CHECK_NOTHING(); PERF_TEST_P_ACCUMULATE(SquareDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
} PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulateSquare(src1, dst, mask))
#ifdef HAVE_OPENVX ///////////////////////////// AccumulateProduct ///////////////////////////////////
PERF_TEST_P(Size_MatType, AccumulateWeighted,
testing::Combine( #define PERF_ACCUMULATE_INIT_2(_FLTC) \
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p), PERF_ACCUMULATE_INIT(_FLTC) \
testing::Values(CV_8UC1, CV_32FC1) Mat src2(srcSize, srcType); \
) declare.in(src2);
)
#else #define PERF_ACCUMULATE_MASK_INIT_2(_FLTC) \
PERF_TEST_P( Size_MatType, AccumulateWeighted, PERF_ACCUMULATE_MASK_INIT(_FLTC) \
testing::Combine( Mat src2(srcSize, srcType); \
testing::Values(::perf::szODD, ::perf::szQVGA, ::perf::szVGA, ::perf::sz1080p), declare.in(src2);
testing::Values(CV_32FC1)
) PERF_TEST_P_ACCUMULATE(Product, MAT_TYPES_ACCUMLATE,
) PERF_ACCUMULATE_INIT_2(CV_32FC), accumulateProduct(src1, src2, dst))
#endif
{ PERF_TEST_P_ACCUMULATE(ProductMask, MAT_TYPES_ACCUMLATE_C,
Size sz = get<0>(GetParam()); PERF_ACCUMULATE_MASK_INIT_2(CV_32FC), accumulateProduct(src1, src2, dst, mask))
int dstType = get<1>(GetParam());
PERF_TEST_P_ACCUMULATE(ProductDouble, MAT_TYPES_ACCUMLATE_D,
Mat src(sz, CV_8UC1); PERF_ACCUMULATE_INIT_2(CV_64FC), accumulateProduct(src1, src2, dst))
Mat dst(sz, dstType);
PERF_TEST_P_ACCUMULATE(ProductDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
declare.time(100); PERF_ACCUMULATE_MASK_INIT_2(CV_64FC), accumulateProduct(src1, src2, dst, mask))
declare.in(src, WARMUP_RNG).out(dst);
///////////////////////////// AccumulateWeighted ///////////////////////////////////
TEST_CYCLE() accumulateWeighted(src, dst, 0.314);
PERF_TEST_P_ACCUMULATE(Weighted, MAT_TYPES_ACCUMLATE,
SANITY_CHECK_NOTHING(); PERF_ACCUMULATE_INIT(CV_32FC), accumulateWeighted(src1, dst, 0.123))
}
PERF_TEST_P_ACCUMULATE(WeightedMask, MAT_TYPES_ACCUMLATE_C,
PERF_ACCUMULATE_MASK_INIT(CV_32FC), accumulateWeighted(src1, dst, 0.123, mask))
PERF_TEST_P_ACCUMULATE(WeightedDouble, MAT_TYPES_ACCUMLATE_D,
PERF_ACCUMULATE_INIT(CV_64FC), accumulateWeighted(src1, dst, 0.123456))
PERF_TEST_P_ACCUMULATE(WeightedDoubleMask, MAT_TYPES_ACCUMLATE_D_C,
PERF_ACCUMULATE_MASK_INIT(CV_64FC), accumulateWeighted(src1, dst, 0.123456, mask))
} // namespace } // namespace
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment