Commit 82c477c9 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge remote-tracking branch 'upstream/3.4' into merge-3.4

parents 57bead3a 67f79aab
......@@ -1402,15 +1402,19 @@ if(WITH_HALIDE OR HAVE_HALIDE)
status(" Halide:" HAVE_HALIDE THEN "YES (${HALIDE_LIBRARIES} ${HALIDE_INCLUDE_DIRS})" ELSE NO)
endif()
if(WITH_INF_ENGINE OR HAVE_INF_ENGINE)
if(HAVE_INF_ENGINE)
set(__msg "YES")
if(DEFINED INF_ENGINE_VERSION)
set(__msg "YES (ver ${INF_ENGINE_VERSION})")
if(WITH_INF_ENGINE OR INF_ENGINE_TARGET)
if(INF_ENGINE_TARGET)
set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})")
get_target_property(_lib ${INF_ENGINE_TARGET} IMPORTED_LOCATION)
if(NOT _lib)
get_target_property(_lib_rel ${INF_ENGINE_TARGET} IMPORTED_IMPLIB_RELEASE)
get_target_property(_lib_dbg ${INF_ENGINE_TARGET} IMPORTED_IMPLIB_DEBUG)
set(_lib "${_lib_rel} / ${_lib_dbg}")
endif()
get_target_property(_inc ${INF_ENGINE_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
status(" Inference Engine:" "${__msg}")
status(" libs:" "${INF_ENGINE_LIBRARIES}")
status(" includes:" "${INF_ENGINE_INCLUDE_DIRS}")
status(" libs:" "${_lib}")
status(" includes:" "${_inc}")
else()
status(" Inference Engine:" "NO")
endif()
......
......@@ -700,12 +700,21 @@ macro(ocv_compiler_optimization_fill_cpu_config)
list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
endforeach()
list(REMOVE_DUPLICATES __dispatch_modes)
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "")
foreach(OPT ${__dispatch_modes})
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
#define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
endforeach()
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
\n\n#define CV_CPU_DISPATCH_FEATURES 0 \\")
foreach(OPT ${__dispatch_modes})
if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
, CV_CPU_${OPT} \\")
endif()
endforeach()
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}\n")
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
......
# The script detects Intel(R) Inference Engine installation
#
# Parameters:
# INTEL_CVSDK_DIR - Path to Inference Engine root folder
# IE_PLUGINS_PATH - Path to folder with Inference Engine plugins
# Cache variables:
# INF_ENGINE_OMP_DIR - directory with OpenMP library to link with (needed by some versions of IE)
# INF_ENGINE_RELEASE - a number reflecting IE source interface (linked with OpenVINO release)
#
# On return this will define:
# Detect parameters:
# 1. Native cmake IE package:
# - enironment variable InferenceEngine_DIR is set to location of cmake module
# 2. Custom location:
# - INF_ENGINE_INCLUDE_DIRS - headers search location
# - INF_ENGINE_LIB_DIRS - library search location
# 3. OpenVINO location:
# - environment variable INTEL_CVSDK_DIR is set to location of OpenVINO installation dir
# - INF_ENGINE_PLATFORM - part of name of library directory representing its platform (default ubuntu_16.04)
#
# HAVE_INF_ENGINE - True if Intel Inference Engine was found
# INF_ENGINE_INCLUDE_DIRS - Inference Engine include folder
# INF_ENGINE_LIBRARIES - Inference Engine libraries and it's dependencies
# Result:
# INF_ENGINE_TARGET - set to name of imported library target representing InferenceEngine
#
macro(ie_fail)
set(HAVE_INF_ENGINE FALSE)
return()
endmacro()
find_package(InferenceEngine QUIET)
if(InferenceEngine_FOUND)
set(INF_ENGINE_LIBRARIES "${InferenceEngine_LIBRARIES}")
set(INF_ENGINE_INCLUDE_DIRS "${InferenceEngine_INCLUDE_DIRS}")
set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}")
set(HAVE_INF_ENGINE TRUE)
return()
if(NOT HAVE_CXX11)
message(WARNING "DL Inference engine requires C++11. You can turn it on via ENABLE_CXX11=ON CMake flag.")
return()
endif()
ocv_check_environment_variables(INTEL_CVSDK_DIR INF_ENGINE_ROOT_DIR IE_PLUGINS_PATH)
# =======================
if(NOT INF_ENGINE_ROOT_DIR OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp")
set(ie_root_paths "${INF_ENGINE_ROOT_DIR}")
if(DEFINED INTEL_CVSDK_DIR)
list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/")
list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/deployment_tools/inference_engine")
endif()
function(add_custom_ie_build _inc _lib _lib_rel _lib_dbg _msg)
if(NOT _inc OR NOT (_lib OR _lib_rel OR _lib_dbg))
return()
endif()
add_library(inference_engine UNKNOWN IMPORTED)
set_target_properties(inference_engine PROPERTIES
IMPORTED_LOCATION "${_lib}"
IMPORTED_IMPLIB_RELEASE "${_lib_rel}"
IMPORTED_IMPLIB_DEBUG "${_lib_dbg}"
INTERFACE_INCLUDE_DIRECTORIES "${_inc}"
)
find_library(omp_lib iomp5 PATHS "${INF_ENGINE_OMP_DIR}" NO_DEFAULT_PATH)
if(NOT omp_lib)
message(WARNING "OpenMP for IE have not been found. Set INF_ENGINE_OMP_DIR variable if you experience build errors.")
else()
set_target_properties(inference_engine PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${omp_lib}")
endif()
set(INF_ENGINE_VERSION "Unknown" CACHE STRING "")
set(INF_ENGINE_TARGET inference_engine PARENT_SCOPE)
message(STATUS "Detected InferenceEngine: ${_msg}")
endfunction()
if(NOT ie_root_paths)
list(APPEND ie_root_paths "/opt/intel/computer_vision_sdk/deployment_tools/inference_engine/")
endif()
# ======================
find_path(INF_ENGINE_ROOT_DIR include/inference_engine.hpp PATHS ${ie_root_paths})
if(INF_ENGINE_ROOT_DIR MATCHES "-NOTFOUND$")
unset(INF_ENGINE_ROOT_DIR CACHE)
endif()
find_package(InferenceEngine QUIET)
if(InferenceEngine_FOUND)
set(INF_ENGINE_TARGET IE::inference_engine)
set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}" CACHE STRING "")
message(STATUS "Detected InferenceEngine: cmake package")
endif()
set(INF_ENGINE_INCLUDE_DIRS "${INF_ENGINE_ROOT_DIR}/include" CACHE PATH "Path to Inference Engine include directory")
if(NOT INF_ENGINE_ROOT_DIR
OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}"
OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp"
)
message(WARNING "DL IE: Can't detect INF_ENGINE_ROOT_DIR location.")
ie_fail()
if(NOT INF_ENGINE_TARGET AND INF_ENGINE_LIB_DIRS AND INF_ENGINE_INCLUDE_DIRS)
find_path(ie_custom_inc "inference_engine.hpp" PATHS "${INF_ENGINE_INCLUDE_DIRS}" NO_DEFAULT_PATH)
find_library(ie_custom_lib "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH)
find_library(ie_custom_lib_rel "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Release" NO_DEFAULT_PATH)
find_library(ie_custom_lib_dbg "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Debug" NO_DEFAULT_PATH)
add_custom_ie_build("${ie_custom_inc}" "${ie_custom_lib}" "${ie_custom_lib_rel}" "${ie_custom_lib_dbg}" "INF_ENGINE_{INCLUDE,LIB}_DIRS")
endif()
set(INF_ENGINE_LIBRARIES "")
set(ie_lib_list inference_engine)
if(NOT IS_ABSOLUTE "${IE_PLUGINS_PATH}")
set(IE_PLUGINS_PATH "${INF_ENGINE_ROOT_DIR}/${IE_PLUGINS_PATH}")
set(_loc "$ENV{INTEL_CVSDK_DIR}")
if(NOT INF_ENGINE_TARGET AND _loc)
set(INF_ENGINE_PLATFORM "ubuntu_16.04" CACHE STRING "InferenceEngine platform (library dir)")
find_path(ie_custom_env_inc "inference_engine.hpp" PATHS "${_loc}/deployment_tools/inference_engine/include" NO_DEFAULT_PATH)
find_library(ie_custom_env_lib "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH)
find_library(ie_custom_env_lib_rel "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Release" NO_DEFAULT_PATH)
find_library(ie_custom_env_lib_dbg "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Debug" NO_DEFAULT_PATH)
add_custom_ie_build("${ie_custom_env_inc}" "${ie_custom_env_lib}" "${ie_custom_env_lib_rel}" "${ie_custom_env_lib_dbg}" "OpenVINO (${_loc})")
endif()
link_directories(
${INF_ENGINE_ROOT_DIR}/external/mkltiny_lnx/lib
${INF_ENGINE_ROOT_DIR}/external/cldnn/lib
)
foreach(lib ${ie_lib_list})
find_library(${lib} NAMES ${lib} HINTS ${IE_PLUGINS_PATH})
if(NOT ${lib})
message(WARNING "DL IE: Can't find library: '${lib}'")
ie_fail()
endif()
list(APPEND INF_ENGINE_LIBRARIES ${${lib}})
endforeach()
# Add more features to the target
set(HAVE_INF_ENGINE TRUE)
if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version have not been set, 2018R2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif()
set(INF_ENGINE_RELEASE "2018020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
)
endif()
......@@ -1132,7 +1132,7 @@ function(ocv_add_perf_tests)
source_group("Src" FILES "${${the_target}_pch}")
ocv_add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch})
ocv_target_include_modules(${the_target} ${perf_deps} "${perf_path}")
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_PERF_${the_module}_DEPS})
add_dependencies(opencv_perf_tests ${the_target})
set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};PerfTest")
......@@ -1175,7 +1175,7 @@ function(ocv_add_perf_tests)
endfunction()
# this is a command for adding OpenCV accuracy/regression tests to the module
# ocv_add_accuracy_tests([FILES <source group name> <list of sources>] [DEPENDS_ON] <list of extra dependencies>)
# ocv_add_accuracy_tests(<list of extra dependencies>)
function(ocv_add_accuracy_tests)
ocv_debug_message("ocv_add_accuracy_tests(" ${ARGN} ")")
......@@ -1211,7 +1211,7 @@ function(ocv_add_accuracy_tests)
source_group("Src" FILES "${${the_target}_pch}")
ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
ocv_target_include_modules(${the_target} ${test_deps} "${test_path}")
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_TEST_${the_module}_DEPS})
add_dependencies(opencv_tests ${the_target})
set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")
......
......@@ -1016,3 +1016,17 @@
year = {2017},
organization = {IEEE}
}
@ARTICLE{gonzalez,
title={Digital Image Fundamentals, Digital Imaging Processing},
author={Gonzalez, Rafael C and others},
year={1987},
publisher={Addison Wesley Publishing Company}
}
@ARTICLE{gruzman,
title={Цифровая обработка изображений в информационных системах},
author={Грузман, И.С. and Киричук, В.С. and Косых, В.П. and Перетягин, Г.И. and Спектор, А.А.},
year={2000},
publisher={Изд-во НГТУ Новосибирск}
}
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/original.jpg

14.1 KB

doc/tutorials/imgproc/out_of_focus_deblur_filter/images/psf.png

630 Bytes

doc/tutorials/imgproc/out_of_focus_deblur_filter/images/recovered.jpg

41.7 KB

Out-of-focus Deblur Filter {#tutorial_out_of_focus_deblur_filter}
==========================
Goal
----
In this tutorial you will learn:
- what is a degradation image model
- what is PSF of out-of-focus image
- how to restore a blurred image
- what is Wiener filter
Theory
------
@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and an article [SmartDeblur].
@note An out-of-focus image on this page is a real world image. An out-of-focus was done manually by camera optics.
### What is a degradation image model?
A mathematical model of the image degradation in frequency domain representation is:
\f[S = H\cdot U + N\f]
where
\f$S\f$ is a spectrum of blurred (degraded) image,
\f$U\f$ is a spectrum of original true (undegraded) image,
\f$H\f$ is frequency response of point spread function (PSF),
\f$N\f$ is a spectrum of additive noise.
Circular PSF is a good approximation of out-of-focus distortion. Such PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work.
![Circular point spread function](psf.png)
### How to restore an blurred image?
The objective of restoration (deblurring) is to obtain an estimate of the original image. Restoration formula in frequency domain is:
\f[U' = H_w\cdot S\f]
where
\f$U'\f$ is spectrum of estimation of original image \f$U\f$,
\f$H_w\f$ is restoration filter, for example, Wiener filter.
### What is Wiener filter?
Wiener filter is a way to restore a blurred image. Let's suppose that PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known,
then simplified Wiener formula is:
\f[H_w = \frac{H}{|H|^2+\frac{1}{SNR}} \f]
where
\f$SNR\f$ is signal-to-noise ratio.
So, in order to recover an out-of-focus image by Wiener filter, it needs to know \f$SNR\f$ and \f$R\f$ of circular PSF.
Source code
-----------
You can find source code in the `samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp` of the OpenCV source code library.
@include cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp
Explanation
-----------
An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering an blurred image in frequency domain:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp main
A function calcPSF() forms an circular PSF according to input parameter radius \f$R\f$:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcPSF
A function calcWnrFilter() synthesizes simplified Wiener filter \f$H_w\f$ according to formula described above:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcWnrFilter
A function fftshift() rearranges PSF. This code was just copied from tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform":
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp fftshift
A function filter2DFreq() filters an blurred image in frequency domain:
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp filter2DFreq
Result
------
Below you can see real out-of-focus image:
![Out-of-focus image](images/original.jpg)
Below result was done by \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters:
![The restored (deblurred) image](images/recovered.jpg)
The Wiener filter was used, values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result.
We can see that the result is not perfect, but it gives us a hint to the image content. With some difficulty, the text is readable.
@note The parameter \f$R\f$ is the most important. So you should adjust \f$R\f$ first, then \f$SNR\f$.
@note Sometimes you can observe the ringing effect in an restored image. This effect can be reduced by several methods. For example, you can taper input image edges.
You can also find a quick video demonstration of this on
[YouTube](https://youtu.be/0bEcE4B0XP4).
@youtube{0bEcE4B0XP4}
References
------
- [Image Deblurring in Matlab] - Image Deblurring in Matlab
- [SmartDeblur] - SmartDeblur site
<!-- invisible references list -->
[Digital Image Processing]: http://web.ipac.caltech.edu/staff/fmasci/home/astro_refs/Digital_Image_Processing_2ndEd.pdf
[Image Deblurring in Matlab]: https://www.mathworks.com/help/images/image-deblurring.html
[SmartDeblur]: http://yuzhikov.com/articles/BlurredImagesRestoration1.htm
......@@ -292,3 +292,13 @@ In this section you will learn about the image processing (manipulation) functio
*Author:* Theodore Tsesmelis
Where we learn to segment objects using Laplacian filtering, the Distance Transformation and the Watershed algorithm.
- @subpage tutorial_out_of_focus_deblur_filter
*Languages:* C++
*Compatibility:* \> OpenCV 2.0
*Author:* Karpushin Vladislav
You will learn how to recover an out-of-focus image by Wiener filter.
......@@ -60,6 +60,17 @@
// access from within opencv code more accessible
namespace cv {
namespace hal {
enum StoreMode
{
STORE_UNALIGNED = 0,
STORE_ALIGNED = 1,
STORE_ALIGNED_NOCACHE = 2
};
}
template<typename _Tp> struct V_TypeTraits
{
};
......@@ -154,7 +165,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
// available instruction set) will get vx_ prefix
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v245_load())
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
#if CV_AVX2
#include "opencv2/core/hal/intrin_avx.hpp"
......@@ -214,14 +225,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
......@@ -316,7 +329,7 @@ template<typename _Tp> struct V_RegTraits
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
inline void vx_cleanup() { v256_cleanup(); }
#elif CV_SIMD128
#elif CV_SIMD128 || CV_SIMD128_CPP
typedef v_uint8x16 v_uint8;
typedef v_int8x16 v_int8;
typedef v_uint16x8 v_uint16;
......
......@@ -1319,7 +1319,8 @@ Scheme:
For all types except 64-bit. */
template<typename _Tp, int n>
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b)
const v_reg<_Tp, n>& b,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
int i, i2;
for( i = i2 = 0; i < n; i++, i2 += 2 )
......@@ -1339,7 +1340,8 @@ Scheme:
For all types except 64-bit. */
template<typename _Tp, int n>
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
int i, i3;
for( i = i3 = 0; i < n; i++, i3 += 3 )
......@@ -1360,7 +1362,8 @@ Scheme:
For all types except 64-bit. */
template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
const v_reg<_Tp, n>& d)
const v_reg<_Tp, n>& d,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
int i, i4;
for( i = i4 = 0; i < n; i++, i4 += 4 )
......@@ -1430,6 +1433,20 @@ inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
ptr[i] = a.s[i];
}
template<typename _Tp, int n>
inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
{
for( int i = 0; i < n; i++ )
ptr[i] = a.s[i];
}
template<typename _Tp, int n>
inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
{
for( int i = 0; i < n; i++ )
ptr[i] = a.s[i];
}
/** @brief Combine vector from first elements of two vectors
Scheme:
......
......@@ -319,6 +319,9 @@ static inline void cv_vst1_f16(void* ptr, float16x4_t a)
#endif
}
#ifndef vdup_n_f16
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
#endif
struct v_float16x8
{
......@@ -864,6 +867,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
{ vst1q_##suffix(ptr, a.val); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
......@@ -889,6 +896,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
inline v_float16x8 v_load_f16_aligned(const short* ptr)
{ return v_float16x8(cv_vld1q_f16(ptr)); }
inline v_float16x8 v_load_f16_low(const short* ptr)
{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr), vdup_n_f16((float16_t)0))); }
inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1)
{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr0), cv_vld1_f16(ptr1))); }
inline void v_store(short* ptr, const v_float16x8& a)
{ cv_vst1q_f16(ptr, a.val); }
inline void v_store_aligned(short* ptr, const v_float16x8& a)
......@@ -1292,14 +1304,16 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
c.val = v.val[2]; \
d.val = v.val[3]; \
} \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ \
_Tpvec##x2_t v; \
v.val[0] = a.val; \
v.val[1] = b.val; \
vst2q_##suffix(ptr, v); \
} \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ \
_Tpvec##x3_t v; \
v.val[0] = a.val; \
......@@ -1308,7 +1322,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
vst3q_##suffix(ptr, v); \
} \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
const v_##_Tpvec& c, const v_##_Tpvec& d) \
const v_##_Tpvec& c, const v_##_Tpvec& d, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
{ \
_Tpvec##x4_t v; \
v.val[0] = a.val; \
......@@ -1360,7 +1375,8 @@ inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \
d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
} \
\
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b ) \
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ \
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
......@@ -1369,7 +1385,8 @@ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2&
} \
\
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
const v_##tp##x2& b, const v_##tp##x2& c ) \
const v_##tp##x2& b, const v_##tp##x2& c, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ \
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
......@@ -1380,7 +1397,8 @@ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
} \
\
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
const v_##tp##x2& c, const v_##tp##x2& d ) \
const v_##tp##x2& c, const v_##tp##x2& d, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ \
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
......
......@@ -249,6 +249,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ st(a.val, 0, ptr); } \
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
{ st_a(a.val, 0, ptr); } \
inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
{ st_a(a.val, 0, ptr); } \
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vec_st_l8(a.val, ptr); } \
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
......@@ -281,13 +285,16 @@ inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \
_Tpvec& c, _Tpvec& d) \
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ vec_st_interleave(a.val, b.val, ptr); } \
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \
const _Tpvec& b, const _Tpvec& c) \
const _Tpvec& b, const _Tpvec& c, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ vec_st_interleave(a.val, b.val, c.val, ptr); } \
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
const _Tpvec& c, const _Tpvec& d) \
const _Tpvec& c, const _Tpvec& d, \
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
......
......@@ -457,6 +457,18 @@ Returns empty string if feature is not defined
*/
CV_EXPORTS_W String getHardwareFeatureName(int feature);
/** @brief Returns list of CPU features enabled during compilation.
Returned value is a string containing space separated list of CPU features with following markers:
- no markers - baseline features
- prefix `*` - features enabled in dispatcher
- suffix `?` - features enabled but not available in HW
Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?`
*/
CV_EXPORTS std::string getCPUFeaturesLine();
/** @brief Returns the number of logical CPUs available for the process.
*/
CV_EXPORTS_W int getNumberOfCPUs();
......
......@@ -1180,7 +1180,8 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
op == CMP_NE || op == CMP_GE || op == CMP_GT );
if(_src1.empty() || _src2.empty())
CV_Assert(_src1.empty() == _src2.empty());
if (_src1.empty() && _src2.empty())
{
_dst.release();
return;
......
......@@ -411,7 +411,8 @@ Mat& Mat::operator = (const Scalar& s)
{
CV_INSTRUMENT_REGION()
if (empty()) return *this;
if (this->empty())
return *this;
const Mat* arrays[] = { this };
uchar* dptr;
......
......@@ -515,17 +515,17 @@ void exp32f( const float *_x, float *y, int n )
#if CV_SIMD
const int VECSZ = v_float32::nlanes;
static const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
static const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
static const v_float32 vminval = vx_setall_f32(minval);
static const v_float32 vmaxval = vx_setall_f32(maxval);
const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
const v_float32 vminval = vx_setall_f32(minval);
const v_float32 vmaxval = vx_setall_f32(maxval);
static const v_float32 vA1 = vx_setall_f32((float)A1);
static const v_float32 vA2 = vx_setall_f32((float)A2);
static const v_float32 vA3 = vx_setall_f32((float)A3);
static const v_float32 vA4 = vx_setall_f32((float)A4);
const v_float32 vA1 = vx_setall_f32((float)A1);
const v_float32 vA2 = vx_setall_f32((float)A2);
const v_float32 vA3 = vx_setall_f32((float)A3);
const v_float32 vA4 = vx_setall_f32((float)A4);
static const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
bool y_aligned = (size_t)(void*)y % 32 == 0;
for( ; i < n; i += VECSZ*2 )
......@@ -627,18 +627,18 @@ void exp64f( const double *_x, double *y, int n )
#if CV_SIMD_64F
const int VECSZ = v_float64::nlanes;
static const v_float64 vprescale = vx_setall_f64(exp_prescale);
static const v_float64 vpostscale = vx_setall_f64(exp_postscale);
static const v_float64 vminval = vx_setall_f64(minval);
static const v_float64 vmaxval = vx_setall_f64(maxval);
static const v_float64 vA1 = vx_setall_f64(A1);
static const v_float64 vA2 = vx_setall_f64(A2);
static const v_float64 vA3 = vx_setall_f64(A3);
static const v_float64 vA4 = vx_setall_f64(A4);
static const v_float64 vA5 = vx_setall_f64(A5);
static const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
const v_float64 vprescale = vx_setall_f64(exp_prescale);
const v_float64 vpostscale = vx_setall_f64(exp_postscale);
const v_float64 vminval = vx_setall_f64(minval);
const v_float64 vmaxval = vx_setall_f64(maxval);
const v_float64 vA1 = vx_setall_f64(A1);
const v_float64 vA2 = vx_setall_f64(A2);
const v_float64 vA3 = vx_setall_f64(A3);
const v_float64 vA4 = vx_setall_f64(A4);
const v_float64 vA5 = vx_setall_f64(A5);
const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
bool y_aligned = (size_t)(void*)y % 32 == 0;
for( ; i < n; i += VECSZ*2 )
......@@ -1024,13 +1024,13 @@ void log32f( const float *_x, float *y, int n )
#if CV_SIMD
const int VECSZ = v_float32::nlanes;
static const v_float32 vln2 = vx_setall_f32((float)ln_2);
static const v_float32 v1 = vx_setall_f32(1.f);
static const v_float32 vshift = vx_setall_f32(-1.f/512);
const v_float32 vln2 = vx_setall_f32((float)ln_2);
const v_float32 v1 = vx_setall_f32(1.f);
const v_float32 vshift = vx_setall_f32(-1.f/512);
static const v_float32 vA0 = vx_setall_f32(A0);
static const v_float32 vA1 = vx_setall_f32(A1);
static const v_float32 vA2 = vx_setall_f32(A2);
const v_float32 vA0 = vx_setall_f32(A0);
const v_float32 vA1 = vx_setall_f32(A1);
const v_float32 vA2 = vx_setall_f32(A2);
for( ; i < n; i += VECSZ )
{
......@@ -1097,9 +1097,9 @@ void log64f( const double *x, double *y, int n )
#if CV_SIMD_64F
const int VECSZ = v_float64::nlanes;
static const v_float64 vln2 = vx_setall_f64(ln_2);
const v_float64 vln2 = vx_setall_f64(ln_2);
static const v_float64
const v_float64
vA0 = vx_setall_f64(A0), vA1 = vx_setall_f64(A1),
vA2 = vx_setall_f64(A2), vA3 = vx_setall_f64(A3),
vA4 = vx_setall_f64(A4), vA5 = vx_setall_f64(A5),
......
......@@ -602,13 +602,13 @@ void Mat::pop_back(size_t nelems)
void Mat::push_back_(const void* elem)
{
int r = size.p[0];
size_t r = size.p[0];
if( isSubmatrix() || dataend + step.p[0] > datalimit )
reserve( std::max(r + 1, (r*3+1)/2) );
size_t esz = elemSize();
memcpy(data + r*step.p[0], elem, esz);
size.p[0] = r + 1;
size.p[0] = int(r + 1);
dataend += step.p[0];
uint64 tsz = size.p[0];
for( int i = 1; i < dims; i++ )
......@@ -709,7 +709,8 @@ void Mat::resize(size_t nelems, const Scalar& s)
void Mat::push_back(const Mat& elems)
{
int r = size.p[0], delta = elems.size.p[0];
size_t r = size.p[0];
size_t delta = elems.size.p[0];
if( delta == 0 )
return;
if( this == &elems )
......@@ -726,7 +727,7 @@ void Mat::push_back(const Mat& elems)
size.p[0] = elems.size.p[0];
bool eq = size == elems.size;
size.p[0] = r;
size.p[0] = int(r);
if( !eq )
CV_Error(CV_StsUnmatchedSizes, "Pushed vector length is not equal to matrix row length");
if( type() != elems.type() )
......@@ -735,7 +736,7 @@ void Mat::push_back(const Mat& elems)
if( isSubmatrix() || dataend + step.p[0]*delta > datalimit )
reserve( std::max(r + delta, (r*3+1)/2) );
size.p[0] += delta;
size.p[0] += int(delta);
dataend += step.p[0]*delta;
//updateContinuityFlag(*this);
......@@ -744,7 +745,7 @@ void Mat::push_back(const Mat& elems)
memcpy(data + r*step.p[0], elems.data, elems.total()*elems.elemSize());
else
{
Mat part = rowRange(r, r + delta);
Mat part = rowRange(int(r), int(r + delta));
elems.copyTo(part);
}
}
......
......@@ -766,11 +766,13 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
{
CV_INSTRUMENT_REGION()
CV_Assert(!_src.empty());
CV_Assert( _mask.empty() || _mask.type() == CV_8UC1 );
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_meanStdDev(_src, _mean, _sdv, _mask))
Mat src = _src.getMat(), mask = _mask.getMat();
CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows),
openvx_meanStdDev(src, _mean, _sdv, mask))
......
......@@ -9,21 +9,58 @@
namespace cv { namespace hal {
#if CV_SIMD
/*
The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following:
on IA there are instructions movntps and such to which
v_store_interleave(...., STORE_ALIGNED_NOCACHE) is mapped.
Those instructions write directly into memory w/o touching cache
that results in dramatic speed improvements, especially on
large arrays (FullHD, 4K etc.).
Those intrinsics require the destination address to be aligned
by 16/32 bits (with SSE2 and AVX2, respectively).
So we potentially split the processing into 3 stages:
1) the optional prefix part [0:i0), where we use simple unaligned stores.
2) the optional main part [i0:len - VECSZ], where we use "nocache" mode.
But in some cases we have to use unaligned stores in this part.
3) the optional suffix part (the tail) (len - VECSZ:len) where we switch back to "unaligned" mode
to process the remaining len - VECSZ elements.
In principle there can be very poorly aligned data where there is no main part.
For that we set i0=0 and use unaligned stores for the whole array.
*/
template<typename T, typename VecT> static void
vecmerge_( const T** src, T* dst, int len, int cn )
{
int i;
const int VECSZ = VecT::nlanes;
int i, i0 = 0;
const T* src0 = src[0];
const T* src1 = src[1];
const int VECSZ = VecT::nlanes;
int r = (int)((size_t)(void*)dst % (VECSZ*sizeof(T)));
hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE;
if( r != 0 )
{
mode = hal::STORE_UNALIGNED;
if( r % cn == 0 && len > VECSZ )
i0 = VECSZ - (r / cn);
}
if( cn == 2 )
{
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
if( i > len - VECSZ )
{
i = len - VECSZ;
mode = hal::STORE_UNALIGNED;
}
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
v_store_interleave(dst + i*cn, a, b);
v_store_interleave(dst + i*cn, a, b, mode);
if( i < i0 )
{
i = i0 - VECSZ;
mode = hal::STORE_ALIGNED_NOCACHE;
}
}
}
else if( cn == 3 )
......@@ -31,9 +68,18 @@ vecmerge_( const T** src, T* dst, int len, int cn )
const T* src2 = src[2];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
if( i > len - VECSZ )
{
i = len - VECSZ;
mode = hal::STORE_UNALIGNED;
}
VecT a = vx_load(src0 + i), b = vx_load(src1 + i), c = vx_load(src2 + i);
v_store_interleave(dst + i*cn, a, b, c);
v_store_interleave(dst + i*cn, a, b, c, mode);
if( i < i0 )
{
i = i0 - VECSZ;
mode = hal::STORE_ALIGNED_NOCACHE;
}
}
}
else
......@@ -43,10 +89,19 @@ vecmerge_( const T** src, T* dst, int len, int cn )
const T* src3 = src[3];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
if( i > len - VECSZ )
{
i = len - VECSZ;
mode = hal::STORE_UNALIGNED;
}
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
VecT c = vx_load(src2 + i), d = vx_load(src3 + i);
v_store_interleave(dst + i*cn, a, b, c, d);
v_store_interleave(dst + i*cn, a, b, c, d, mode);
if( i < i0 )
{
i = i0 - VECSZ;
mode = hal::STORE_ALIGNED_NOCACHE;
}
}
}
vx_cleanup();
......
......@@ -2834,7 +2834,22 @@ extern "C" {
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
{
((cv::ocl::Kernel::Impl*)p)->finit(e);
try
{
((cv::ocl::Kernel::Impl*)p)->finit(e);
}
catch (const cv::Exception& exc)
{
CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
}
catch (const std::exception& exc)
{
CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
}
catch (...)
{
CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
}
}
}
......
......@@ -511,8 +511,8 @@ static RandnScaleFunc randnScaleTab[] =
void RNG::fill( InputOutputArray _mat, int disttype,
InputArray _param1arg, InputArray _param2arg, bool saturateRange )
{
if (_mat.empty())
return;
CV_Assert(!_mat.empty());
Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
int depth = mat.depth(), cn = mat.channels();
AutoBuffer<double> _parambuf;
......
......@@ -9,23 +9,46 @@
namespace cv { namespace hal {
#if CV_SIMD
// see the comments for vecmerge_ in merge.cpp
template<typename T, typename VecT> static void
vecsplit_( const T* src, T** dst, int len, int cn )
{
int i;
const int VECSZ = VecT::nlanes;
int i, i0 = 0;
T* dst0 = dst[0];
T* dst1 = dst[1];
const int VECSZ = VecT::nlanes;
int r0 = (int)((size_t)(void*)dst0 % (VECSZ*sizeof(T)));
int r1 = (int)((size_t)(void*)dst1 % (VECSZ*sizeof(T)));
int r2 = cn > 2 ? (int)((size_t)(void*)dst[2] % (VECSZ*sizeof(T))) : r0;
int r3 = cn > 3 ? (int)((size_t)(void*)dst[3] % (VECSZ*sizeof(T))) : r0;
hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE;
if( (r0|r1|r2|r3) != 0 )
{
mode = hal::STORE_UNALIGNED;
if( r0 == r1 && r0 == r2 && r0 == r3 && r0 % cn == 0 && len > VECSZ )
i0 = VECSZ - (r0 / cn);
}
if( cn == 2 )
{
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
if( i > len - VECSZ )
{
i = len - VECSZ;
mode = hal::STORE_UNALIGNED;
}
VecT a, b;
v_load_deinterleave(src + i*cn, a, b);
v_store(dst0 + i, a);
v_store(dst1 + i, b);
v_store(dst0 + i, a, mode);
v_store(dst1 + i, b, mode);
if( i < i0 )
{
i = i0 - VECSZ;
mode = hal::STORE_ALIGNED_NOCACHE;
}
}
}
else if( cn == 3 )
......@@ -33,12 +56,21 @@ vecsplit_( const T* src, T** dst, int len, int cn )
T* dst2 = dst[2];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
if( i > len - VECSZ )
{
i = len - VECSZ;
mode = hal::STORE_UNALIGNED;
}
VecT a, b, c;
v_load_deinterleave(src + i*cn, a, b, c);
v_store(dst0 + i, a);
v_store(dst1 + i, b);
v_store(dst2 + i, c);
v_store(dst0 + i, a, mode);
v_store(dst1 + i, b, mode);
v_store(dst2 + i, c, mode);
if( i < i0 )
{
i = i0 - VECSZ;
mode = hal::STORE_ALIGNED_NOCACHE;
}
}
}
else
......@@ -48,13 +80,22 @@ vecsplit_( const T* src, T** dst, int len, int cn )
T* dst3 = dst[3];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
if( i > len - VECSZ )
{
i = len - VECSZ;
mode = hal::STORE_UNALIGNED;
}
VecT a, b, c, d;
v_load_deinterleave(src + i*cn, a, b, c, d);
v_store(dst0 + i, a);
v_store(dst1 + i, b);
v_store(dst2 + i, c);
v_store(dst3 + i, d);
v_store(dst0 + i, a, mode);
v_store(dst1 + i, b, mode);
v_store(dst2 + i, c, mode);
v_store(dst3 + i, d, mode);
if( i < i0 )
{
i = i0 - VECSZ;
mode = hal::STORE_ALIGNED_NOCACHE;
}
}
}
vx_cleanup();
......
......@@ -654,6 +654,27 @@ String getHardwareFeatureName(int feature)
return name ? String(name) : String();
}
std::string getCPUFeaturesLine()
{
const int features[] = { CV_CPU_BASELINE_FEATURES, CV_CPU_DISPATCH_FEATURES };
const int sz = sizeof(features) / sizeof(features[0]);
std::string result;
std::string prefix;
for (int i = 1; i < sz; ++i)
{
if (features[i] == 0)
{
prefix = "*";
continue;
}
if (i != 1) result.append(" ");
result.append(prefix);
result.append(getHWFeatureNameSafe(features[i]));
if (!checkHardwareSupport(features[i])) result.append("?");
}
return result;
}
volatile bool useOptimizedFlag = true;
void setUseOptimized( bool flag )
......
......@@ -84,14 +84,11 @@ UMatData::~UMatData()
allocatorFlags_ = 0;
if (originalUMatData)
{
UMatData* u = originalUMatData;
CV_XADD(&(u->urefcount), -1);
CV_XADD(&(u->refcount), -1);
bool showWarn = false;
if (u->refcount == 0)
UMatData* u = originalUMatData;
bool zero_Ref = CV_XADD(&(u->refcount), -1) == 1;
if (zero_Ref)
{
if (u->urefcount > 0)
showWarn = true;
// simulate Mat::deallocate
if (u->mapcount != 0)
{
......@@ -102,7 +99,10 @@ UMatData::~UMatData()
// we don't do "map", so we can't do "unmap"
}
}
if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources
bool zero_URef = CV_XADD(&(u->urefcount), -1) == 1;
if (zero_Ref && !zero_URef)
showWarn = true;
if (zero_Ref && zero_URef) // oops, we need to free resources
{
showWarn = true;
// simulate UMat::deallocate
......
......@@ -2008,11 +2008,9 @@ TEST(Subtract, scalarc4_matc4)
TEST(Compare, empty)
{
cv::Mat temp, dst1, dst2;
cv::compare(temp, temp, dst1, cv::CMP_EQ);
dst2 = temp > 5;
EXPECT_NO_THROW(cv::compare(temp, temp, dst1, cv::CMP_EQ));
EXPECT_TRUE(dst1.empty());
EXPECT_TRUE(dst2.empty());
EXPECT_THROW(dst2 = temp > 5, cv::Exception);
}
TEST(Compare, regression_8999)
......@@ -2020,9 +2018,7 @@ TEST(Compare, regression_8999)
Mat_<double> A(4,1); A << 1, 3, 2, 4;
Mat_<double> B(1,1); B << 2;
Mat C;
ASSERT_ANY_THROW({
cv::compare(A, B, C, CMP_LT);
});
EXPECT_THROW(cv::compare(A, B, C, CMP_LT), cv::Exception);
}
......
......@@ -43,106 +43,35 @@
namespace opencv_test { namespace {
class Core_ConcatenationTest : public cvtest::BaseTest
TEST(Core_Concatenation, empty)
{
public:
Core_ConcatenationTest(bool horizontal, bool firstEmpty, bool secondEmpty);
protected:
int prepare_test_case( int );
void run_func();
int validate_test_results( int );
const Mat mat0x5(0,5, CV_8U, Scalar::all(1));
const Mat mat10x5(10,5, CV_8U, Scalar::all(1));
const Mat mat20x5(20,5, CV_8U, Scalar::all(1));
Mat mat0x5;
Mat mat10x5;
Mat mat20x5;
Mat mat5x0;
Mat mat5x10;
Mat mat5x20;
const Mat mat5x0(5,0, CV_8U, Scalar::all(1));
const Mat mat5x10(5,10, CV_8U, Scalar::all(1));
const Mat mat5x20(5,20, CV_8U, Scalar::all(1));
Mat result;
bool horizontal;
bool firstEmpty;
bool secondEmpty;
private:
static bool areEqual(const Mat& m1, const Mat& m2);
};
Core_ConcatenationTest::Core_ConcatenationTest(bool horizontal_, bool firstEmpty_, bool secondEmpty_)
: horizontal(horizontal_)
, firstEmpty(firstEmpty_)
, secondEmpty(secondEmpty_)
{
test_case_count = 1;
mat0x5 = Mat::ones(0,5, CV_8U);
mat10x5 = Mat::ones(10,5, CV_8U);
mat20x5 = Mat::ones(20,5, CV_8U);
mat5x0 = Mat::ones(5,0, CV_8U);
mat5x10 = Mat::ones(5,10, CV_8U);
mat5x20 = Mat::ones(5,20, CV_8U);
}
int Core_ConcatenationTest::prepare_test_case( int test_case_idx )
{
cvtest::BaseTest::prepare_test_case( test_case_idx );
return 1;
}
void Core_ConcatenationTest::run_func()
{
if (horizontal)
{
cv::hconcat((firstEmpty ? mat5x0 : mat5x10),
(secondEmpty ? mat5x0 : mat5x10),
result);
} else {
cv::vconcat((firstEmpty ? mat0x5 : mat10x5),
(secondEmpty ? mat0x5 : mat10x5),
result);
}
}
int Core_ConcatenationTest::validate_test_results( int )
{
Mat expected;
if (firstEmpty && secondEmpty)
expected = (horizontal ? mat5x0 : mat0x5);
else if ((firstEmpty && !secondEmpty) || (!firstEmpty && secondEmpty))
expected = (horizontal ? mat5x10 : mat10x5);
else
expected = (horizontal ? mat5x20 : mat20x5);
if (areEqual(expected, result))
{
return cvtest::TS::OK;
} else
{
ts->printf( cvtest::TS::LOG, "Concatenation failed");
ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH );
}
return cvtest::TS::OK;
}
bool Core_ConcatenationTest::areEqual(const Mat &m1, const Mat &m2)
{
return m1.size() == m2.size()
&& m1.type() == m2.type()
&& countNonZero(m1 != m2) == 0;
cv::hconcat(mat5x0, mat5x0, result);
EXPECT_MAT_N_DIFF(result, mat5x0, 0);
cv::hconcat(mat5x0, mat5x10, result);
EXPECT_MAT_N_DIFF(result, mat5x10, 0);
cv::hconcat(mat5x10, mat5x0, result);
EXPECT_MAT_N_DIFF(result, mat5x10, 0);
cv::hconcat(mat5x10, mat5x10, result);
EXPECT_MAT_N_DIFF(result, mat5x20, 0);
cv::vconcat(mat0x5, mat0x5, result);
EXPECT_MAT_N_DIFF(result, mat0x5, 0);
cv::vconcat(mat0x5, mat10x5, result);
EXPECT_MAT_N_DIFF(result, mat10x5, 0);
cv::vconcat(mat10x5, mat0x5, result);
EXPECT_MAT_N_DIFF(result, mat10x5, 0);
cv::vconcat(mat10x5, mat10x5, result);
EXPECT_MAT_N_DIFF(result, mat20x5, 0);
}
TEST(Core_Concatenation, hconcat_empty_nonempty) { Core_ConcatenationTest test(true, true, false); test.safe_run(); }
TEST(Core_Concatenation, hconcat_nonempty_empty) { Core_ConcatenationTest test(true, false, true); test.safe_run(); }
TEST(Core_Concatenation, hconcat_empty_empty) { Core_ConcatenationTest test(true, true, true); test.safe_run(); }
TEST(Core_Concatenation, vconcat_empty_nonempty) { Core_ConcatenationTest test(false, true, false); test.safe_run(); }
TEST(Core_Concatenation, vconcat_nonempty_empty) { Core_ConcatenationTest test(false, false, true); test.safe_run(); }
TEST(Core_Concatenation, vconcat_empty_empty) { Core_ConcatenationTest test(false, true, true); test.safe_run(); }
}} // namespace
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "test_intrin.simd.hpp"
\ No newline at end of file
......@@ -2,249 +2,101 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "test_intrin.simd.hpp"
#include "test_intrin_utils.hpp"
#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp"
#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp"
#define CV_CPU_DISPATCH_MODE FP16
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
using namespace cv;
#define CV_CPU_DISPATCH_MODE AVX2
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
namespace opencv_test { namespace hal {
using namespace CV_CPU_OPTIMIZATION_NAMESPACE;
//============= 8-bit integer =====================================================================
TEST(hal_intrin, uint8x16) {
TheTest<v_uint8x16>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_expand_q()
.test_addsub()
.test_addsub_wrap()
.test_cmp()
.test_logic()
.test_min_max()
.test_absdiff()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
;
}
TEST(hal_intrin, uint8x16)
{ test_hal_intrin_uint8(); }
TEST(hal_intrin, int8x16) {
TheTest<v_int8x16>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_expand_q()
.test_addsub()
.test_addsub_wrap()
.test_cmp()
.test_logic()
.test_min_max()
.test_absdiff()
.test_abs()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
;
}
TEST(hal_intrin, int8x16)
{ test_hal_intrin_int8(); }
//============= 16-bit integer =====================================================================
TEST(hal_intrin, uint16x8) {
TheTest<v_uint16x8>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_addsub_wrap()
.test_mul()
.test_mul_expand()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
;
}
TEST(hal_intrin, uint16x8)
{ test_hal_intrin_uint16(); }
TEST(hal_intrin, int16x8) {
TheTest<v_int16x8>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_addsub_wrap()
.test_mul()
.test_mul_expand()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
.test_dot_prod()
.test_logic()
.test_min_max()
.test_absdiff()
.test_abs()
.test_reduce()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
;
}
TEST(hal_intrin, int16x8)
{ test_hal_intrin_int16(); }
//============= 32-bit integer =====================================================================
TEST(hal_intrin, uint32x4) {
TheTest<v_uint32x4>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_mul()
.test_mul_expand()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
.test_transpose()
;
}
TEST(hal_intrin, int32x4)
{ test_hal_intrin_int32(); }
TEST(hal_intrin, int32x4) {
TheTest<v_int32x4>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_mul()
.test_abs()
.test_cmp()
.test_popcount()
.test_shift<1>().test_shift<8>()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_mask()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
.test_float_cvt32()
.test_float_cvt64()
.test_transpose()
;
}
TEST(hal_intrin, uint32x4)
{ test_hal_intrin_uint32(); }
//============= 64-bit integer =====================================================================
TEST(hal_intrin, uint64x2) {
TheTest<v_uint64x2>()
.test_loadstore()
.test_addsub()
.test_shift<1>().test_shift<8>()
.test_logic()
.test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>()
;
}
TEST(hal_intrin, uint64x2)
{ test_hal_intrin_uint64(); }
TEST(hal_intrin, int64x2) {
TheTest<v_int64x2>()
.test_loadstore()
.test_addsub()
.test_shift<1>().test_shift<8>()
.test_logic()
.test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>()
;
}
TEST(hal_intrin, int64x2)
{ test_hal_intrin_int64(); }
//============= Floating point =====================================================================
TEST(hal_intrin, float32x4) {
TheTest<v_float32x4>()
.test_loadstore()
.test_interleave()
.test_interleave_2channel()
.test_addsub()
.test_mul()
.test_div()
.test_cmp()
.test_sqrt_abs()
.test_min_max()
.test_float_absdiff()
.test_reduce()
.test_mask()
.test_unpack()
.test_float_math()
.test_float_cvt64()
.test_matmul()
.test_transpose()
.test_reduce_sum4()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
;
}
TEST(hal_intrin, float32x4)
{ test_hal_intrin_float32(); }
#if CV_SIMD128_64F
TEST(hal_intrin, float64x2) {
TheTest<v_float64x2>()
.test_loadstore()
.test_addsub()
.test_mul()
.test_div()
.test_cmp()
.test_sqrt_abs()
.test_min_max()
.test_float_absdiff()
.test_mask()
.test_unpack()
.test_float_math()
.test_float_cvt32()
.test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>()
;
}
#endif
TEST(hal_intrin, float64x2)
{ test_hal_intrin_float64(); }
TEST(hal_intrin,float16)
TEST(hal_intrin, float16x8)
{
CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
throw SkipTestException("Unsupported hardware: FP16 is not available");
}
}}
#define DISPATCH_SIMD_MODES AVX2
#define DISPATCH_SIMD_NAME "SIMD256"
#define DISPATCH_SIMD(fun) \
do { \
CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \
throw SkipTestException( \
"Unsupported hardware: " \
DISPATCH_SIMD_NAME \
" is not available" \
); \
} while(0)
TEST(hal_intrin256, uint8x32)
{ DISPATCH_SIMD(test_hal_intrin_uint8); }
TEST(hal_intrin256, int8x32)
{ DISPATCH_SIMD(test_hal_intrin_int8); }
TEST(hal_intrin256, uint16x16)
{ DISPATCH_SIMD(test_hal_intrin_uint16); }
TEST(hal_intrin256, int16x16)
{ DISPATCH_SIMD(test_hal_intrin_int16); }
TEST(hal_intrin256, uint32x8)
{ DISPATCH_SIMD(test_hal_intrin_uint32); }
TEST(hal_intrin256, int32x8)
{ DISPATCH_SIMD(test_hal_intrin_int32); }
TEST(hal_intrin256, uint64x4)
{ DISPATCH_SIMD(test_hal_intrin_uint64); }
TEST(hal_intrin256, int64x4)
{ DISPATCH_SIMD(test_hal_intrin_int64); }
TEST(hal_intrin256, float32x8)
{ DISPATCH_SIMD(test_hal_intrin_float32); }
TEST(hal_intrin256, float64x4)
{ DISPATCH_SIMD(test_hal_intrin_float64); }
TEST(hal_intrin256, float16x16)
{
if (!CV_CPU_HAS_SUPPORT_FP16)
throw SkipTestException("Unsupported hardware: FP16 is not available");
DISPATCH_SIMD(test_hal_intrin_float16);
}
}} // namespace
\ No newline at end of file
......@@ -9,7 +9,7 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void test_hal_intrin_float16()
{
TheTest<v_float16x8>()
TheTest<v_float16>()
.test_loadstore_fp16()
.test_float_cvt_fp16()
;
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "test_intrin_utils.hpp"
namespace opencv_test { namespace hal {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void test_hal_intrin_uint8();
void test_hal_intrin_int8();
void test_hal_intrin_uint16();
void test_hal_intrin_int16();
void test_hal_intrin_uint32();
void test_hal_intrin_int32();
void test_hal_intrin_uint64();
void test_hal_intrin_int64();
void test_hal_intrin_float32();
void test_hal_intrin_float64();
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
//============= 8-bit integer =====================================================================
void test_hal_intrin_uint8()
{
TheTest<v_uint8>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_expand_q()
.test_addsub()
.test_addsub_wrap()
.test_cmp()
.test_logic()
.test_min_max()
.test_absdiff()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
;
#if CV_SIMD256
TheTest<v_uint8>()
.test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
.test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
.test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
.test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
;
#endif
}
void test_hal_intrin_int8()
{
TheTest<v_int8>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_expand_q()
.test_addsub()
.test_addsub_wrap()
.test_cmp()
.test_logic()
.test_min_max()
.test_absdiff()
.test_abs()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
;
}
//============= 16-bit integer =====================================================================
void test_hal_intrin_uint16()
{
TheTest<v_uint16>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_addsub_wrap()
.test_mul()
.test_mul_expand()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
;
}
void test_hal_intrin_int16()
{
TheTest<v_int16>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_addsub_wrap()
.test_mul()
.test_mul_expand()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
.test_dot_prod()
.test_logic()
.test_min_max()
.test_absdiff()
.test_abs()
.test_reduce()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
;
}
//============= 32-bit integer =====================================================================
void test_hal_intrin_uint32()
{
TheTest<v_uint32>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_mul()
.test_mul_expand()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
.test_transpose()
;
}
void test_hal_intrin_int32()
{
TheTest<v_int32>()
.test_loadstore()
.test_interleave()
.test_expand()
.test_addsub()
.test_mul()
.test_abs()
.test_cmp()
.test_popcount()
.test_shift<1>().test_shift<8>()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_mask()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
.test_float_cvt32()
.test_float_cvt64()
.test_transpose()
;
}
//============= 64-bit integer =====================================================================
void test_hal_intrin_uint64()
{
TheTest<v_uint64>()
.test_loadstore()
.test_addsub()
.test_shift<1>().test_shift<8>()
.test_logic()
.test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>()
;
}
void test_hal_intrin_int64()
{
TheTest<v_int64>()
.test_loadstore()
.test_addsub()
.test_shift<1>().test_shift<8>()
.test_logic()
.test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>()
;
}
//============= Floating point =====================================================================
void test_hal_intrin_float32()
{
TheTest<v_float32>()
.test_loadstore()
.test_interleave()
.test_interleave_2channel()
.test_addsub()
.test_mul()
.test_div()
.test_cmp()
.test_sqrt_abs()
.test_min_max()
.test_float_absdiff()
.test_reduce()
.test_mask()
.test_unpack()
.test_float_math()
.test_float_cvt64()
.test_matmul()
.test_transpose()
.test_reduce_sum4()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
;
#if CV_SIMD256
TheTest<v_float32>()
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
;
#endif
}
void test_hal_intrin_float64()
{
#if CV_SIMD_64F
TheTest<v_float64>()
.test_loadstore()
.test_addsub()
.test_mul()
.test_div()
.test_cmp()
.test_sqrt_abs()
.test_min_max()
.test_float_absdiff()
.test_mask()
.test_unpack()
.test_float_math()
.test_float_cvt32()
.test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>()
;
#if CV_SIMD256
TheTest<v_float64>()
.test_extract<2>().test_extract<3>()
.test_rotate<2>().test_rotate<3>()
;
#endif //CV_SIMD256
#endif
}
#if CV_FP16 && CV_SIMD_WIDTH > 16
void test_hal_intrin_float16()
{
TheTest<v_float16>()
.test_loadstore_fp16()
.test_float_cvt_fp16()
;
}
#endif
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END
}} //namespace
\ No newline at end of file
This diff is collapsed.
......@@ -173,7 +173,6 @@ void Core_RandTest::run( int )
dsz = slice+1 < maxSlice ? (int)(cvtest::randInt(rng) % (SZ - sz) + 1) : SZ - sz;
Mat aslice = arr[k].colRange(sz, sz + dsz);
tested_rng.fill(aslice, dist_type, A, B);
//printf("%d - %d\n", sz, sz + dsz);
}
}
......
......@@ -85,12 +85,6 @@ else()
set(sources_options EXCLUDE_OPENCL)
endif()
if(HAVE_INF_ENGINE)
add_definitions(-DHAVE_INF_ENGINE=1)
list(APPEND include_dirs ${INF_ENGINE_INCLUDE_DIRS})
list(APPEND libs ${INF_ENGINE_LIBRARIES})
endif()
ocv_module_include_directories(${include_dirs})
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
......@@ -98,9 +92,9 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
endif()
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
ocv_create_module(${libs})
ocv_create_module(${libs} ${INF_ENGINE_TARGET})
ocv_add_samples()
ocv_add_accuracy_tests()
ocv_add_accuracy_tests(${INF_ENGINE_TARGET})
ocv_add_perf_tests()
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
......@@ -120,9 +114,3 @@ if(BUILD_PERF_TESTS)
endif()
endif()
endif()
# Test Intel's Inference Engine models
if(HAVE_INF_ENGINE AND TARGET opencv_test_dnn)
ocv_target_include_directories(opencv_test_dnn PRIVATE ${INF_ENGINE_INCLUDE_DIRS})
ocv_target_link_libraries(opencv_test_dnn LINK_PRIVATE ${INF_ENGINE_LIBRARIES})
endif()
......@@ -201,7 +201,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* @param[out] outputs allocated output blobs, which will store results of the computation.
* @param[out] internals allocated internal blobs
*/
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
/** @brief Given the @p input blobs, computes the output @p blobs.
* @param[in] inputs the input blobs.
......
......@@ -44,7 +44,9 @@
#include <opencv2/core.hpp>
#include <opencv2/core/types_c.h>
#include <iostream>
#include <ostream>
#include <sstream>
namespace cv {
namespace dnn {
......@@ -178,13 +180,25 @@ static inline MatShape concat(const MatShape& a, const MatShape& b)
return c;
}
inline void print(const MatShape& shape, const String& name = "")
static inline std::string toString(const MatShape& shape, const String& name = "")
{
printf("%s: [", name.c_str());
size_t i, n = shape.size();
for( i = 0; i < n; i++ )
printf(" %d", shape[i]);
printf(" ]\n");
std::ostringstream ss;
if (!name.empty())
ss << name << ' ';
ss << '[';
for(size_t i = 0, n = shape.size(); i < n; ++i)
ss << ' ' << shape[i];
ss << " ]";
return ss.str();
}
static inline void print(const MatShape& shape, const String& name = "")
{
std::cout << toString(shape, name) << std::endl;
}
static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape)
{
out << toString(shape);
return out;
}
inline int clamp(int ax, int dims)
......
......@@ -74,6 +74,10 @@ static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSize
#endif
);
// Additional checks (slowdowns execution!)
static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
using std::vector;
using std::map;
......@@ -2053,10 +2057,75 @@ struct Net::Impl
{
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
{
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
layer->forward(umat_inputBlobs,
umat_outputBlobs,
OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers));
umat_internalBlobs);
if (DNN_CHECK_NAN_INF)
{
bool fail = false;
for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
{
UMat& u = umat_outputBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
else
m = u.getMat(ACCESS_READ);
if (!checkRange(m))
{
std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
fail = true;
}
else if (!checkRange(m, true, NULL, -1e6, 1e6))
{
std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
fail = true;
}
}
if (fail)
{
for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
{
UMat& u = umat_inputBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
else
m = u.getMat(ACCESS_READ);
std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
}
for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
{
UMat& u = umat_outputBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
else
m = u.getMat(ACCESS_READ);
std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
}
for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
{
UMat& u = umat_internalBlobs[i];
Mat m;
if (u.depth() == CV_16S) // FP16
convertFp16(u, m);
else
m = u.getMat(ACCESS_READ);
std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
}
if (DNN_CHECK_NAN_INF_RAISE_ERROR)
CV_Assert(!fail);
}
}
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
}
else
......@@ -2069,6 +2138,56 @@ struct Net::Impl
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
if (DNN_CHECK_NAN_INF)
{
bool fail = false;
for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
{
const Mat& m = ld.outputBlobs[i];
if (!checkRange(m))
{
std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
fail = true;
}
else if (!checkRange(m, true, NULL, -1e6, 1e6))
{
std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
fail = true;
}
}
if (fail)
{
for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
{
const Mat* pM = ld.inputBlobs[i];
if (!pM)
{
std::cout << "INPUT " << i << " is NULL" << std::endl;
continue;
}
const Mat& m = *pM;
std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
}
for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
{
const Mat& m = ld.outputBlobs[i];
std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
}
for (size_t i = 0; i < ld.internals.size(); ++i)
{
const Mat& m = ld.internals[i];
std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
}
if (DNN_CHECK_NAN_INF_RAISE_ERROR)
CV_Assert(!fail);
}
}
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
{
if (!ld.outputBlobsWrappers[i].empty())
......@@ -3071,6 +3190,14 @@ std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
return outputs;
}
void Layer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs, outputs, internals);
}
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
......
......@@ -196,7 +196,7 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_locPredTransposed;
backendId == DNN_BACKEND_INFERENCE_ENGINE && !_locPredTransposed && _bboxesNormalized;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
......@@ -411,9 +411,12 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (_bboxesNormalized)
{
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
}
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
......@@ -916,6 +919,7 @@ public:
ieLayer->params["nms_threshold"] = format("%f", _nmsThreshold);
ieLayer->params["top_k"] = format("%d", _topK);
ieLayer->params["keep_top_k"] = format("%d", _keepTopK);
ieLayer->params["eta"] = "1.0";
ieLayer->params["confidence_threshold"] = format("%f", _confidenceThreshold);
ieLayer->params["variance_encoded_in_target"] = _varianceEncodedInTarget ? "1" : "0";
ieLayer->params["code_type"] = "caffe.PriorBoxParameter." + _codeType;
......
......@@ -135,10 +135,17 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
(type == MAX || type == AVE && !pad.width && !pad.height) ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && (type == MAX || type == AVE);
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
if (preferableTarget == DNN_TARGET_MYRIAD)
return type == MAX || type == AVE;
else
return type != STOCHASTIC;
}
else
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
(type == MAX || type == AVE && !pad.width && !pad.height);
}
#ifdef HAVE_OPENCL
......@@ -192,8 +199,11 @@ public:
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (type == MAX || type == AVE || type == STOCHASTIC)
{
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
}
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
......@@ -238,22 +248,41 @@ public:
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Pooling";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::PoolingLayer> ieLayer(new InferenceEngine::PoolingLayer(lp));
ieLayer->_kernel_x = kernel.width;
ieLayer->_kernel_y = kernel.height;
ieLayer->_stride_x = stride.width;
ieLayer->_stride_y = stride.height;
ieLayer->_padding_x = pad.width;
ieLayer->_padding_y = pad.height;
ieLayer->_exclude_pad = type == AVE && padMode == "SAME";
ieLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
if (type == MAX)
ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::MAX;
else if (type == AVE)
ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::AVG;
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer;
if (type == MAX || type == AVE)
{
lp.type = "Pooling";
InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp);
poolLayer->_kernel_x = kernel.width;
poolLayer->_kernel_y = kernel.height;
poolLayer->_stride_x = stride.width;
poolLayer->_stride_y = stride.height;
poolLayer->_padding_x = pad.width;
poolLayer->_padding_y = pad.height;
poolLayer->_exclude_pad = type == AVE && padMode == "SAME";
poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :
InferenceEngine::PoolingLayer::PoolType::AVG;
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(poolLayer);
}
else if (type == ROI)
{
lp.type = "ROIPooling";
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));
ieLayer->params["pooled_w"] = format("%d", pooledSize.width);
ieLayer->params["pooled_h"] = format("%d", pooledSize.height);
ieLayer->params["spatial_scale"] = format("%f", spatialScale);
}
else if (type == PSROI)
{
lp.type = "PSROIPooling";
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));
ieLayer->params["output_dim"] = format("%d", psRoiOutChannels);
ieLayer->params["group_size"] = format("%d", pooledSize.width);
ieLayer->params["spatial_scale"] = format("%f", spatialScale);
}
else
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
......
......@@ -6,6 +6,7 @@
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_inf_engine.hpp"
namespace cv { namespace dnn {
......@@ -16,14 +17,14 @@ public:
{
setParamsFrom(params);
uint32_t featStride = params.get<uint32_t>("feat_stride", 16);
uint32_t baseSize = params.get<uint32_t>("base_size", 16);
featStride = params.get<uint32_t>("feat_stride", 16);
baseSize = params.get<uint32_t>("base_size", 16);
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
uint32_t keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
float nmsThreshold = params.get<float>("nms_thresh", 0.7);
DictValue ratios = params.get("ratio");
DictValue scales = params.get("scale");
nmsThreshold = params.get<float>("nms_thresh", 0.7);
ratios = params.get("ratio");
scales = params.get("scale");
{
LayerParams lp;
......@@ -83,6 +84,12 @@ public:
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
......@@ -312,6 +319,38 @@ public:
outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Proposal";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
ieLayer->params["base_size"] = format("%d", baseSize);
ieLayer->params["feat_stride"] = format("%d", featStride);
ieLayer->params["min_size"] = "16";
ieLayer->params["nms_thresh"] = format("%f", nmsThreshold);
ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS);
ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS);
if (ratios.size())
{
ieLayer->params["ratio"] = format("%f", ratios.get<float>(0));
for (int i = 1; i < ratios.size(); ++i)
ieLayer->params["ratio"] += format(",%f", ratios.get<float>(i));
}
if (scales.size())
{
ieLayer->params["scale"] = format("%f", scales.get<float>(0));
for (int i = 1; i < scales.size(); ++i)
ieLayer->params["scale"] += format(",%f", scales.get<float>(i));
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
private:
// A first half of channels are background scores. We need only a second one.
static Mat getObjectScores(const Mat& m)
......@@ -342,8 +381,10 @@ private:
Ptr<PermuteLayer> deltasPermute;
Ptr<PermuteLayer> scoresPermute;
uint32_t keepTopAfterNMS;
uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
Mat fakeImageBlob;
float nmsThreshold;
DictValue ratios, scales;
#ifdef HAVE_OPENCL
UMat umat_fakeImageBlob;
#endif
......
......@@ -183,8 +183,9 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
ocl::Kernel oclk_sto_pool_forward(
kname.c_str(),
ocl::dnn::ocl4dnn_pooling_oclsrc,
format("-D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
format(" -D Dtype=%s -D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d",
(use_half) ? "half" : "float",
kernel_w_, kernel_h_,
stride_w_, stride_h_
));
......
......@@ -322,12 +322,32 @@ InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t) noex
return InferenceEngine::StatusCode::OK;
}
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(size_t size, InferenceEngine::ResponseDesc *responseDesc) noexcept
{
CV_Error(Error::StsNotImplemented, "");
return InferenceEngine::StatusCode::OK;
}
size_t InfEngineBackendNet::getBatchSize() const noexcept
{
CV_Error(Error::StsNotImplemented, "");
return 0;
}
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R2)
InferenceEngine::StatusCode InfEngineBackendNet::AddExtension(const InferenceEngine::IShapeInferExtensionPtr &extension, InferenceEngine::ResponseDesc *resp) noexcept
{
CV_Error(Error::StsNotImplemented, "");
return InferenceEngine::StatusCode::OK;
}
InferenceEngine::StatusCode InfEngineBackendNet::reshape(const InferenceEngine::ICNNNetwork::InputShapes &inputShapes, InferenceEngine::ResponseDesc *resp) noexcept
{
CV_Error(Error::StsNotImplemented, "");
return InferenceEngine::StatusCode::OK;
}
#endif
void InfEngineBackendNet::init(int targetId)
{
if (inputs.empty())
......
......@@ -9,6 +9,8 @@
#define __OPENCV_DNN_OP_INF_ENGINE_HPP__
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
#include "opencv2/dnn.hpp"
#ifdef HAVE_INF_ENGINE
#if defined(__GNUC__) && __GNUC__ >= 5
......@@ -19,6 +21,17 @@
#if defined(__GNUC__) && __GNUC__ >= 5
//#pragma GCC diagnostic pop
#endif
#define INF_ENGINE_RELEASE_2018R1 2018010000
#define INF_ENGINE_RELEASE_2018R2 2018020000
#ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2018R2 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2018R2
#endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
#endif // HAVE_INF_ENGINE
namespace cv { namespace dnn {
......@@ -86,8 +99,15 @@ public:
virtual InferenceEngine::StatusCode setBatchSize(const size_t size) noexcept CV_OVERRIDE;
virtual InferenceEngine::StatusCode setBatchSize(size_t size, InferenceEngine::ResponseDesc* responseDesc) noexcept;
virtual size_t getBatchSize() const noexcept CV_OVERRIDE;
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R2)
virtual InferenceEngine::StatusCode AddExtension(const InferenceEngine::IShapeInferExtensionPtr& extension, InferenceEngine::ResponseDesc* resp) noexcept;
virtual InferenceEngine::StatusCode reshape(const InputShapes& inputShapes, InferenceEngine::ResponseDesc* resp) noexcept;
#endif
void init(int targetId);
void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers);
......
......@@ -104,7 +104,7 @@ __kernel void
#elif defined KERNEL_AVE_POOL
__kernel void TEMPLATE(ave_pool_forward, Dtype)(
const int nthreads, __global const Dtype* const bottom_data,
const int nthreads, __global const Dtype* bottom_data,
const int channels, const int height, const int width,
const int pooled_height, const int pooled_width,
__global Dtype* top_data)
......@@ -150,7 +150,7 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
#elif defined KERNEL_STO_POOL
__kernel void TEMPLATE(sto_pool_forward_test,Dtype)(
const int nthreads, __global const Dtype* const bottom_data,
const int nthreads, __global const Dtype* bottom_data,
const int channels, const int height, const int width,
const int pooled_height, const int pooled_width,
__global Dtype* top_data)
......
......@@ -1293,7 +1293,13 @@ void TFImporter::populateNet(Net dstNet)
if (!next_layers.empty())
{
int maximumLayerIdx = next_layers[0].second;
ExcludeLayer(net, maximumLayerIdx, 0, false);
CV_Assert(net.node(maximumLayerIdx).input_size() == 2);
// The input from the Mul layer can also be at index 1.
int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1;
ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false);
layers_to_ignore.insert(next_layers[0].first);
layerParams.set("negative_slope", scaleMat.at<float>(0));
......
......@@ -938,6 +938,16 @@ struct TorchImporter
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
curModule->modules.push_back(newModule);
}
else if (nnName == "SpatialUpSamplingNearest")
{
readTorchTable(scalarParams, tensorParams);
CV_Assert(scalarParams.has("scale_factor"));
int scale_factor = scalarParams.get<int>("scale_factor");
newModule->apiType = "Resize";
layerParams.set("interpolation", "nearest");
layerParams.set("zoom_factor", scale_factor);
curModule->modules.push_back(newModule);
}
else
{
// Importer does not know how to map Torch's layer type to an OpenCV's one.
......
......@@ -175,7 +175,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
inp, "detection_out", "", l1, lInf, 0.25);
}
......@@ -233,11 +233,8 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
{
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
Size inpSize;
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
inpSize = Size(300, 300);
Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
Mat inp = blobFromImage(img, 1.0, inpSize, Scalar(104.0, 177.0, 123.0), false, false);
Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
inp, "detection_out");
}
......@@ -249,7 +246,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.008 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.07 : 0.0;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0;
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
inp, "detection_out", "", l1, lInf);
}
......
......@@ -51,6 +51,33 @@ static std::string _tf(TString filename)
return (getOpenCVExtraDir() + "/dnn/") + filename;
}
class Test_Caffe_nets : public DNNTestLayer
{
public:
void testFaster(const std::string& proto, const std::string& model, const Mat& ref,
double scoreDiff = 0.0, double iouDiff = 0.0)
{
checkBackend();
Net net = readNetFromCaffe(findDataFile("dnn/" + proto, false),
findDataFile("dnn/" + model, false));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/dog416.png", false));
resize(img, img, Size(800, 600));
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f);
net.setInput(blob, "data");
net.setInput(imInfo, "im_info");
// Output has shape 1x1xNx7 where N - number of detections.
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
Mat out = net.forward();
scoreDiff = scoreDiff ? scoreDiff : default_l1;
iouDiff = iouDiff ? iouDiff : default_lInf;
normAssertDetections(ref, out, ("model name: " + model).c_str(), 0.8, scoreDiff, iouDiff);
}
};
TEST(Test_Caffe, memory_read)
{
const string proto = findDataFile("dnn/bvlc_googlenet.prototxt", false);
......@@ -344,9 +371,15 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
}
// https://github.com/richzhang/colorization
TEST(Reproducibility_Colorization, Accuracy)
TEST_P(Test_Caffe_nets, Colorization)
{
const float l1 = 3e-5;
checkBackend();
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
const float l1 = 4e-4;
const float lInf = 3e-3;
Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
......@@ -356,7 +389,8 @@ TEST(Reproducibility_Colorization, Accuracy)
const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
Net net = readNetFromCaffe(proto, model);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
......@@ -447,39 +481,40 @@ INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector,
)
);
TEST(Test_Caffe, FasterRCNN_and_RFCN)
TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
{
std::string models[] = {"VGG16_faster_rcnn_final.caffemodel", "ZF_faster_rcnn_final.caffemodel",
"resnet50_rfcn_final.caffemodel"};
std::string protos[] = {"faster_rcnn_vgg16.prototxt", "faster_rcnn_zf.prototxt",
"rfcn_pascal_voc_resnet50.prototxt"};
Mat refs[] = {(Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166),
(Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176),
(Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16)};
for (int i = 0; i < 3; ++i)
{
std::string proto = findDataFile("dnn/" + protos[i], false);
std::string model = findDataFile("dnn/" + models[i], false);
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref);
}
Net net = readNetFromCaffe(proto, model);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
Mat img = imread(findDataFile("dnn/dog416.png", false));
resize(img, img, Size(800, 600));
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f);
TEST_P(Test_Caffe_nets, FasterRCNN_zf)
{
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
}
net.setInput(blob, "data");
net.setInput(imInfo, "im_info");
// Output has shape 1x1xNx7 where N - number of detections.
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
Mat out = net.forward();
normAssertDetections(refs[i], out, ("model name: " + models[i]).c_str(), 0.8);
}
TEST_P(Test_Caffe_nets, RFCN)
{
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_nets, dnnBackendsAndTargets());
}} // namespace
......@@ -16,7 +16,7 @@ using namespace cv;
using namespace cv::dnn;
using namespace testing;
static void test(Mat& input, Net& net, int backendId, int targetId)
static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool skipCheck = false)
{
DNNTestLayer::checkBackend(backendId, targetId);
randu(input, -1.0f, 1.0f);
......@@ -29,16 +29,19 @@ static void test(Mat& input, Net& net, int backendId, int targetId)
net.setPreferableTarget(targetId);
Mat outputHalide = net.forward().clone();
if (skipCheck)
return;
double l1, lInf;
DNNTestLayer::getDefaultThresholds(backendId, targetId, &l1, &lInf);
normAssert(outputDefault, outputHalide, "", l1, lInf);
}
static void test(LayerParams& params, Mat& input, int backendId, int targetId)
static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId, bool skipCheck = false)
{
Net net;
net.addLayerToPrev(params.name, params.type, params);
test(input, net, backendId, targetId);
test(input, net, backendId, targetId, skipCheck);
}
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
......@@ -101,16 +104,17 @@ TEST_P(Convolution, Accuracy)
Size pad = get<4>(GetParam());
Size dilation = get<5>(GetParam());
bool hasBias = get<6>(GetParam());
int backendId = get<0>(get<7>(GetParam()));
int targetId = get<1>(get<7>(GetParam()));
Backend backendId = get<0>(get<7>(GetParam()));
Target targetId = get<1>(get<7>(GetParam()));
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
throw SkipTestException("");
bool skipCheck = false;
if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
(targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1))
throw SkipTestException("Skip unstable test");
skipCheck = true;
int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
Mat weights(4, &sz[0], CV_32F);
......@@ -139,7 +143,9 @@ TEST_P(Convolution, Accuracy)
}
int inpSz[] = {1, inChannels, inSize.height, inSize.width};
Mat input(4, &inpSz[0], CV_32F);
test(lp, input, backendId, targetId);
test(lp, input, backendId, targetId, skipCheck);
if (skipCheck)
throw SkipTestException("Skip checks in unstable test");
}
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
......@@ -171,8 +177,8 @@ TEST_P(Deconvolution, Accuracy)
Size stride = Size(get<5>(GetParam())[0], get<5>(GetParam())[1]);
Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
bool hasBias = get<6>(GetParam());
int backendId = get<0>(get<7>(GetParam()));
int targetId = get<1>(get<7>(GetParam()));
Backend backendId = get<0>(get<7>(GetParam()));
Target targetId = get<1>(get<7>(GetParam()));
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
dilation.width == 2 && dilation.height == 2)
throw SkipTestException("");
......@@ -235,8 +241,8 @@ TEST_P(LRN, Accuracy)
float bias = get<2>(GetParam())[2];
bool normBySize = get<3>(GetParam());
std::string nrmType = get<4>(GetParam());
int backendId = get<0>(get<5>(GetParam()));
int targetId = get<1>(get<5>(GetParam()));
Backend backendId = get<0>(get<5>(GetParam()));
Target targetId = get<1>(get<5>(GetParam()));
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
throw SkipTestException("");
......@@ -276,8 +282,8 @@ TEST_P(AvePooling, Accuracy)
Size outSize = get<1>(GetParam());; // Input size will be computed from parameters.
Size kernel = get<2>(GetParam());
Size stride = get<3>(GetParam());
int backendId = get<0>(get<4>(GetParam()));
int targetId = get<1>(get<4>(GetParam()));
Backend backendId = get<0>(get<4>(GetParam()));
Target targetId = get<1>(get<4>(GetParam()));
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
throw SkipTestException("");
......@@ -317,8 +323,8 @@ TEST_P(MaxPooling, Accuracy)
Size kernel = get<2>(GetParam());
Size stride = get<3>(GetParam());
Size pad = get<4>(GetParam());
int backendId = get<0>(get<5>(GetParam()));
int targetId = get<1>(get<5>(GetParam()));
Backend backendId = get<0>(get<5>(GetParam()));
Target targetId = get<1>(get<5>(GetParam()));
LayerParams lp;
lp.set("pool", "max");
......@@ -355,8 +361,8 @@ TEST_P(FullyConnected, Accuracy)
Size inSize = get<1>(GetParam());
int outChannels = get<2>(GetParam());
bool hasBias = get<3>(GetParam());
int backendId = get<0>(get<4>(GetParam()));
int targetId = get<1>(get<4>(GetParam()));
Backend backendId = get<0>(get<4>(GetParam()));
Target targetId = get<1>(get<4>(GetParam()));
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
throw SkipTestException("");
......@@ -394,8 +400,8 @@ typedef TestWithParam<tuple<int, tuple<Backend, Target> > > SoftMax;
TEST_P(SoftMax, Accuracy)
{
int inChannels = get<0>(GetParam());
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
LayerParams lp;
lp.type = "SoftMax";
lp.name = "testLayer";
......@@ -457,7 +463,7 @@ TEST_P(Test_Halide_layers, MaxPoolUnpool)
////////////////////////////////////////////////////////////////////////////////
static const int kNumChannels = 3;
void testInPlaceActivation(LayerParams& lp, int backendId, int targetId)
void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId)
{
EXPECT_FALSE(lp.name.empty());
......@@ -485,8 +491,8 @@ TEST_P(BatchNorm, Accuracy)
bool hasWeights = get<0>(GetParam());
bool hasBias = get<1>(GetParam());
float epsilon = get<2>(GetParam());
int backendId = get<0>(get<3>(GetParam()));
int targetId = get<1>(get<3>(GetParam()));
Backend backendId = get<0>(get<3>(GetParam()));
Target targetId = get<1>(get<3>(GetParam()));
LayerParams lp;
lp.set("has_weight", hasWeights);
......@@ -518,8 +524,8 @@ typedef TestWithParam<tuple<float, tuple<Backend, Target> > > ReLU;
TEST_P(ReLU, Accuracy)
{
float negativeSlope = get<0>(GetParam());
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
LayerParams lp;
lp.set("negative_slope", negativeSlope);
......@@ -536,8 +542,8 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Combine(
typedef TestWithParam<tuple<std::string, tuple<Backend, Target> > > NoParamActivation;
TEST_P(NoParamActivation, Accuracy)
{
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
LayerParams lp;
lp.type = get<0>(GetParam());
......@@ -555,8 +561,8 @@ TEST_P(Power, Accuracy)
float power = get<0>(GetParam())[0];
float scale = get<0>(GetParam())[1];
float shift = get<0>(GetParam())[2];
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
LayerParams lp;
lp.set("power", power);
......@@ -589,8 +595,8 @@ typedef TestWithParam<tuple<bool, tuple<Backend, Target> > > Scale;
TEST_P(Scale, Accuracy)
{
bool hasBias = get<0>(GetParam());
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
Backend backendId = get<0>(get<1>(GetParam()));
Target targetId = get<1>(get<1>(GetParam()));
LayerParams lp;
lp.set("bias_term", hasBias);
......@@ -624,8 +630,8 @@ TEST_P(Concat, Accuracy)
{
Vec3i inSize = get<0>(GetParam());
Vec3i numChannels = get<1>(GetParam());
int backendId = get<0>(get<2>(GetParam()));
int targetId = get<1>(get<2>(GetParam()));
Backend backendId = get<0>(get<2>(GetParam()));
Target targetId = get<1>(get<2>(GetParam()));
Net net;
......@@ -692,8 +698,8 @@ TEST_P(Eltwise, Accuracy)
std::string op = get<1>(GetParam());
int numConv = get<2>(GetParam());
bool weighted = get<3>(GetParam());
int backendId = get<0>(get<4>(GetParam()));
int targetId = get<1>(get<4>(GetParam()));
Backend backendId = get<0>(get<4>(GetParam()));
Target targetId = get<1>(get<4>(GetParam()));
Net net;
......
......@@ -1205,14 +1205,6 @@ public:
}
}
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs, outputs, internals);
}
private:
int outWidth, outHeight, zoomFactor;
};
......@@ -1225,7 +1217,7 @@ TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (availa
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
// Test a cusom layer.
// Test a custom layer.
CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
try
{
......
......@@ -230,6 +230,13 @@ TEST_P(Test_TensorFlow_layers, flatten)
runTensorFlowNet("unfused_flatten_unknown_batch");
}
TEST_P(Test_TensorFlow_layers, leaky_relu)
{
runTensorFlowNet("leaky_relu_order1");
runTensorFlowNet("leaky_relu_order2");
runTensorFlowNet("leaky_relu_order3");
}
TEST_P(Test_TensorFlow_layers, l2_normalize)
{
runTensorFlowNet("l2_normalize");
......
......@@ -69,100 +69,119 @@ TEST(Torch_Importer, simple_read)
ASSERT_FALSE(net.empty());
}
static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String outLayerName = "",
bool check2ndBlob = false, bool isBinary = false)
class Test_Torch_layers : public DNNTestLayer
{
String suffix = (isBinary) ? ".dat" : ".txt";
public:
void runTorchNet(const String& prefix, String outLayerName = "",
bool check2ndBlob = false, bool isBinary = false,
double l1 = 0.0, double lInf = 0.0)
{
String suffix = (isBinary) ? ".dat" : ".txt";
Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
ASSERT_FALSE(net.empty());
Mat inp, outRef;
ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(targetId);
checkBackend(backend, target, &inp, &outRef);
Mat inp, outRef;
ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
ASSERT_FALSE(net.empty());
if (outLayerName.empty())
outLayerName = net.getLayerNames().back();
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(inp);
std::vector<Mat> outBlobs;
net.forward(outBlobs, outLayerName);
normAssert(outRef, outBlobs[0]);
if (outLayerName.empty())
outLayerName = net.getLayerNames().back();
if (check2ndBlob)
{
Mat out2 = outBlobs[1];
Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
normAssert(out2, ref2);
}
}
net.setInput(inp);
std::vector<Mat> outBlobs;
net.forward(outBlobs, outLayerName);
l1 = l1 ? l1 : default_l1;
lInf = lInf ? lInf : default_lInf;
normAssert(outRef, outBlobs[0], "", l1, lInf);
typedef testing::TestWithParam<Target> Test_Torch_layers;
if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)
{
Mat out2 = outBlobs[1];
Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
normAssert(out2, ref2, "", l1, lInf);
}
}
};
TEST_P(Test_Torch_layers, run_convolution)
{
runTorchNet("net_conv", GetParam(), "", false, true);
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
runTorchNet("net_conv", "", false, true);
}
TEST_P(Test_Torch_layers, run_pool_max)
{
runTorchNet("net_pool_max", GetParam(), "", true);
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
runTorchNet("net_pool_max", "", true);
}
TEST_P(Test_Torch_layers, run_pool_ave)
{
runTorchNet("net_pool_ave", GetParam());
runTorchNet("net_pool_ave");
}
TEST_P(Test_Torch_layers, run_reshape)
{
int targetId = GetParam();
runTorchNet("net_reshape", targetId);
runTorchNet("net_reshape_batch", targetId);
runTorchNet("net_reshape_single_sample", targetId);
runTorchNet("net_reshape_channels", targetId, "", false, true);
runTorchNet("net_reshape");
runTorchNet("net_reshape_batch");
runTorchNet("net_reshape_channels", "", false, true);
}
TEST_P(Test_Torch_layers, run_reshape_single_sample)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
runTorchNet("net_reshape_single_sample", "", false, false,
(target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.0052 : 0.0);
}
TEST_P(Test_Torch_layers, run_linear)
{
runTorchNet("net_linear_2d", GetParam());
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
runTorchNet("net_linear_2d");
}
TEST_P(Test_Torch_layers, run_concat)
{
int targetId = GetParam();
runTorchNet("net_concat", targetId, "l5_torchMerge");
runTorchNet("net_depth_concat", targetId, "", false, true);
runTorchNet("net_concat", "l5_torchMerge");
runTorchNet("net_depth_concat", "", false, true, 0.0,
target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);
}
TEST_P(Test_Torch_layers, run_deconv)
{
runTorchNet("net_deconv", GetParam());
runTorchNet("net_deconv");
}
TEST_P(Test_Torch_layers, run_batch_norm)
{
runTorchNet("net_batch_norm", GetParam(), "", false, true);
runTorchNet("net_batch_norm", "", false, true);
}
TEST_P(Test_Torch_layers, net_prelu)
{
runTorchNet("net_prelu", GetParam());
runTorchNet("net_prelu");
}
TEST_P(Test_Torch_layers, net_cadd_table)
{
runTorchNet("net_cadd_table", GetParam());
runTorchNet("net_cadd_table");
}
TEST_P(Test_Torch_layers, net_softmax)
{
int targetId = GetParam();
runTorchNet("net_softmax", targetId);
runTorchNet("net_softmax_spatial", targetId);
runTorchNet("net_softmax");
runTorchNet("net_softmax_spatial");
}
TEST_P(Test_Torch_layers, net_logsoftmax)
......@@ -173,40 +192,55 @@ TEST_P(Test_Torch_layers, net_logsoftmax)
TEST_P(Test_Torch_layers, net_lp_pooling)
{
int targetId = GetParam();
runTorchNet("net_lp_pooling_square", targetId, "", false, true);
runTorchNet("net_lp_pooling_power", targetId, "", false, true);
runTorchNet("net_lp_pooling_square", "", false, true);
runTorchNet("net_lp_pooling_power", "", false, true);
}
TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
{
runTorchNet("net_conv_gemm_lrn", GetParam(), "", false, true);
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
runTorchNet("net_conv_gemm_lrn", "", false, true,
target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
}
TEST_P(Test_Torch_layers, net_inception_block)
{
runTorchNet("net_inception_block", GetParam(), "", false, true);
runTorchNet("net_inception_block", "", false, true);
}
TEST_P(Test_Torch_layers, net_normalize)
{
runTorchNet("net_normalize", GetParam(), "", false, true);
runTorchNet("net_normalize", "", false, true);
}
TEST_P(Test_Torch_layers, net_padding)
{
int targetId = GetParam();
runTorchNet("net_padding", targetId, "", false, true);
runTorchNet("net_spatial_zero_padding", targetId, "", false, true);
runTorchNet("net_spatial_reflection_padding", targetId, "", false, true);
runTorchNet("net_padding", "", false, true);
runTorchNet("net_spatial_zero_padding", "", false, true);
runTorchNet("net_spatial_reflection_padding", "", false, true);
}
TEST_P(Test_Torch_layers, net_non_spatial)
{
runTorchNet("net_non_spatial", GetParam(), "", false, true);
if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
runTorchNet("net_non_spatial", "", false, true);
}
TEST_P(Test_Torch_layers, run_paralel)
{
if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)
throw SkipTestException("");
runTorchNet("net_parallel", "l5_torchMerge");
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets());
TEST_P(Test_Torch_layers, net_residual)
{
runTorchNet("net_residual", "", false, true);
}
typedef testing::TestWithParam<Target> Test_Torch_nets;
......@@ -313,21 +347,6 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets());
// TODO: fix OpenCL and add to the rest of tests
TEST(Torch_Importer, run_paralel)
{
runTorchNet("net_parallel", DNN_TARGET_CPU, "l5_torchMerge");
}
TEST(Torch_Importer, DISABLED_run_paralel)
{
runTorchNet("net_parallel", DNN_TARGET_OPENCL, "l5_torchMerge");
}
TEST(Torch_Importer, net_residual)
{
runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true);
}
// Test a custom layer
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
......@@ -374,17 +393,29 @@ public:
}
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
private:
int scale;
};
TEST(Torch_Importer, upsampling_nearest)
TEST_P(Test_Torch_layers, upsampling_nearest)
{
// Test a custom layer.
CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true);
try
{
runTorchNet("net_spatial_upsampling_nearest", "", false, true);
}
catch (...)
{
LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
throw;
}
LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
// Test an implemented layer.
runTorchNet("net_spatial_upsampling_nearest", "", false, true);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());
}
......@@ -307,8 +307,8 @@ icvLoadWindowPos( const char* name, CvRect& rect )
{
HKEY hkey;
char szKey[1024];
strcpy( szKey, icvWindowPosRootKey );
strcat( szKey, name );
strcpy_s( szKey, 1024, icvWindowPosRootKey );
strcat_s( szKey, 1024, name );
rect.x = rect.y = CW_USEDEFAULT;
rect.width = rect.height = 320;
......@@ -368,8 +368,8 @@ icvSaveWindowPos( const char* name, CvRect rect )
HKEY hkey;
char szKey[1024];
char rootKey[1024];
strcpy( szKey, icvWindowPosRootKey );
strcat( szKey, name );
strcpy_s( szKey, 1024, icvWindowPosRootKey );
strcat_s( szKey, 1024, name );
if( RegOpenKeyEx( HKEY_CURRENT_USER,szKey,0,KEY_READ,&hkey) != ERROR_SUCCESS )
{
......@@ -379,7 +379,7 @@ icvSaveWindowPos( const char* name, CvRect rect )
char oldestKey[1024];
char currentKey[1024];
strcpy( rootKey, icvWindowPosRootKey );
strcpy_s( rootKey, 1024, icvWindowPosRootKey );
rootKey[strlen(rootKey)-1] = '\0';
if( RegCreateKeyEx(HKEY_CURRENT_USER, rootKey, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_READ+KEY_WRITE, 0, &hroot, NULL) != ERROR_SUCCESS )
//RegOpenKeyEx( HKEY_CURRENT_USER,rootKey,0,KEY_READ,&hroot) != ERROR_SUCCESS )
......@@ -398,7 +398,7 @@ icvSaveWindowPos( const char* name, CvRect rect )
oldestTime.dwLowDateTime > accesstime.dwLowDateTime) )
{
oldestTime = accesstime;
strcpy( oldestKey, currentKey );
strcpy_s( oldestKey, 1024, currentKey );
}
}
......@@ -1500,6 +1500,8 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam )
rgn = CreateRectRgn(0, 0, wrc.right, wrc.bottom);
rgn1 = CreateRectRgn(cr.left, cr.top, cr.right, cr.bottom);
rgn2 = CreateRectRgn(tr.left, tr.top, tr.right, tr.bottom);
CV_Assert(rgn != 0, rgn1 != 0, rgn2 != 0);
ret = CombineRgn(rgn, rgn, rgn1, RGN_DIFF);
ret = CombineRgn(rgn, rgn, rgn2, RGN_DIFF);
......
......@@ -1771,7 +1771,7 @@ Corners in the image can be found as the local maxima of this response map.
size as src .
@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ).
@param ksize Aperture parameter for the Sobel operator.
@param k Harris detector free parameter. See the formula below.
@param k Harris detector free parameter. See the formula above.
@param borderType Pixel extrapolation method. See #BorderTypes.
*/
CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize,
......
......@@ -20,8 +20,12 @@ endforeach()
set(opencv_hdrs "")
set(opencv_userdef_hdrs "")
foreach(m ${OPENCV_PYTHON_MODULES})
ocv_list_filter(OPENCV_MODULE_${m}_HEADERS "${OPENCV_MODULE_${m}_LOCATION}/include" __hdrs)
list(APPEND opencv_hdrs ${__hdrs})
foreach (hdr ${OPENCV_MODULE_${m}_HEADERS})
ocv_is_subdir(is_sub "${OPENCV_MODULE_${m}_LOCATION}/include" "${hdr}")
if(is_sub)
list(APPEND opencv_hdrs "${hdr}")
endif()
endforeach()
file(GLOB userdef_hdrs ${OPENCV_MODULE_${m}_LOCATION}/misc/python/pyopencv*.hpp)
list(APPEND opencv_userdef_hdrs ${userdef_hdrs})
endforeach(m)
......
......@@ -379,10 +379,9 @@ struct TSParams
class TS
{
public:
// constructor(s) and destructor
TS();
virtual ~TS();
public:
enum
{
......@@ -484,9 +483,6 @@ public:
SKIPPED=1
};
// get file storage
CvFileStorage* get_file_storage();
// get RNG to generate random input data for a test
RNG& get_rng() { return rng; }
......@@ -629,9 +625,6 @@ struct DefaultRngAuto
void fillGradient(Mat& img, int delta = 5);
void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
void printVersionInfo(bool useStdOut = true);
// Utility functions
void addDataSearchPath(const std::string& path);
......@@ -660,6 +653,13 @@ std::string findDataFile(const std::string& relative_path, bool required = true)
*/
std::string findDataDirectory(const std::string& relative_path, bool required = true);
// Test definitions
class SystemInfoCollector : public testing::EmptyTestEventListener
{
private:
virtual void OnTestProgramStart(const testing::UnitTest&);
};
#ifndef __CV_TEST_EXEC_ARGS
#if defined(_MSC_VER) && (_MSC_VER <= 1400)
......@@ -671,15 +671,6 @@ std::string findDataDirectory(const std::string& relative_path, bool required =
#endif
#endif
#ifdef HAVE_OPENCL
namespace ocl {
void dumpOpenCLDevice();
}
#define TEST_DUMP_OCL_INFO cvtest::ocl::dumpOpenCLDevice();
#else
#define TEST_DUMP_OCL_INFO
#endif
void parseCustomOptions(int argc, char **argv);
#define CV_TEST_INIT0_NOOP (void)0
......@@ -696,8 +687,7 @@ int main(int argc, char **argv) \
ts->init(resourcesubdir); \
__CV_TEST_EXEC_ARGS(CV_TEST_INIT0_ ## INIT0) \
::testing::InitGoogleTest(&argc, argv); \
cvtest::printVersionInfo(); \
TEST_DUMP_OCL_INFO \
::testing::UnitTest::GetInstance()->listeners().Append(new SystemInfoCollector); \
__CV_TEST_EXEC_ARGS(__VA_ARGS__) \
parseCustomOptions(argc, argv); \
} \
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -905,7 +905,7 @@ public:
/** @brief Writes the next video frame
@param image The written frame
@param image The written frame. In general, color images are expected in BGR format.
The function/method writes the specified image to video file. It must have the same size as has
been specified when opening the video writer.
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -377,8 +377,8 @@ LRESULT PASCAL CvCaptureCAM_VFW::frameCallback( HWND hWnd, VIDEOHDR* hdr )
if (!hWnd) return FALSE;
capture = (CvCaptureCAM_VFW*)capGetUserData(hWnd);
if (!capture) return (LRESULT)FALSE;
capture->hdr = hdr;
return (LRESULT)TRUE;
}
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment