Commit 0c16d8f6 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge remote-tracking branch 'upstream/3.4' into merge-3.4

parents 384ac634 3903174f
...@@ -115,7 +115,7 @@ if(CUDA_FOUND) ...@@ -115,7 +115,7 @@ if(CUDA_FOUND)
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.0 7.5") set(__cuda_arch_bin "5.3 6.2 7.2")
else() else()
set(__cuda_arch_bin "${_nvcc_out}") set(__cuda_arch_bin "${_nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}") string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
......
...@@ -508,7 +508,7 @@ macro(ocv_warnings_disable) ...@@ -508,7 +508,7 @@ macro(ocv_warnings_disable)
foreach(var ${_flag_vars}) foreach(var ${_flag_vars})
foreach(warning ${_gxx_warnings}) foreach(warning ${_gxx_warnings})
if(NOT warning MATCHES "^-Wno-") if(NOT warning MATCHES "^-Wno-")
string(REGEX REPLACE "${warning}(=[^ ]*)?" "" ${var} "${${var}}") string(REGEX REPLACE "(^|[ ]+)${warning}(=[^ ]*)?([ ]+|$)" " " ${var} "${${var}}")
string(REPLACE "-W" "-Wno-" warning "${warning}") string(REPLACE "-W" "-Wno-" warning "${warning}")
endif() endif()
ocv_check_flag_support(${var} "${warning}" _varname "") ocv_check_flag_support(${var} "${warning}" _varname "")
......
...@@ -1125,6 +1125,12 @@ inline float v_reduce_sum(const v_float32x8& a) ...@@ -1125,6 +1125,12 @@ inline float v_reduce_sum(const v_float32x8& a)
return _mm_cvtss_f32(s1); return _mm_cvtss_f32(s1);
} }
inline double v_reduce_sum(const v_float64x4& a)
{
__m256d s0 = _mm256_hadd_pd(a.val, a.val);
return _mm_cvtsd_f64(_mm_add_pd(_v256_extract_low(s0), _v256_extract_high(s0)));
}
inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b, inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
const v_float32x8& c, const v_float32x8& d) const v_float32x8& c, const v_float32x8& d)
{ {
......
...@@ -984,6 +984,13 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32) ...@@ -984,6 +984,13 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32) OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32) OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
#if CV_SIMD128_64F
inline double v_reduce_sum(const v_float64x2& a)
{
return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1);
}
#endif
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d) const v_float32x4& c, const v_float32x4& d)
{ {
......
...@@ -1456,6 +1456,13 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV ...@@ -1456,6 +1456,13 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32)
inline double v_reduce_sum(const v_float64x2& a)
{
double CV_DECL_ALIGNED(32) idx[2];
v_store_aligned(idx, a);
return idx[0] + idx[1];
}
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d) const v_float32x4& c, const v_float32x4& d)
{ {
......
...@@ -716,6 +716,11 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add) ...@@ -716,6 +716,11 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max) OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min) OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
inline double v_reduce_sum(const v_float64x2& a)
{
return vec_extract(vec_add(a.val, vec_sld(a.val, a.val, 8)), 0);
}
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \ #define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
inline scalartype v_reduce_##suffix(const _Tpvec& a) \ inline scalartype v_reduce_##suffix(const _Tpvec& a) \
{ \ { \
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
...@@ -134,9 +135,38 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) ...@@ -134,9 +135,38 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
else else
CV_Error(Error::StsUnsupportedFormat, "Unsupported data type: " + CV_Error(Error::StsUnsupportedFormat, "Unsupported data type: " +
opencv_onnx::TensorProto_DataType_Name(datatype)); opencv_onnx::TensorProto_DataType_Name(datatype));
if (tensor_proto.dims_size() == 0)
blob.dims = 1; // To force 1-dimensional cv::Mat for scalars.
return blob; return blob;
} }
void runLayer(Ptr<Layer> layer, const std::vector<Mat>& inputs,
std::vector<Mat>& outputs)
{
std::vector<MatShape> inpShapes(inputs.size());
int ddepth = CV_32F;
for (size_t i = 0; i < inputs.size(); ++i)
{
inpShapes[i] = shape(inputs[i]);
if (i > 0 && ddepth != inputs[i].depth())
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
ddepth = inputs[i].depth();
}
std::vector<MatShape> outShapes, internalShapes;
layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
std::vector<Mat> internals(internalShapes.size());
outputs.resize(outShapes.size());
for (size_t i = 0; i < outShapes.size(); ++i)
outputs[i].create(outShapes[i], ddepth);
for (size_t i = 0; i < internalShapes.size(); ++i)
internals[i].create(internalShapes[i], ddepth);
layer->finalize(inputs, outputs);
layer->forward(inputs, outputs, internals);
}
std::map<std::string, Mat> ONNXImporter::getGraphTensors( std::map<std::string, Mat> ONNXImporter::getGraphTensors(
const opencv_onnx::GraphProto& graph_proto) const opencv_onnx::GraphProto& graph_proto)
{ {
...@@ -292,6 +322,26 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -292,6 +322,26 @@ void ONNXImporter::populateNet(Net dstNet)
CV_Assert(model_proto.has_graph()); CV_Assert(model_proto.has_graph());
opencv_onnx::GraphProto graph_proto = model_proto.graph(); opencv_onnx::GraphProto graph_proto = model_proto.graph();
std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto); std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto);
// List of internal blobs shapes.
std::map<std::string, MatShape> outShapes;
// Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
for (int i = 0; i < graph_proto.input_size(); ++i)
{
opencv_onnx::ValueInfoProto valueInfoProto = graph_proto.input(i);
CV_Assert(valueInfoProto.has_type());
opencv_onnx::TypeProto typeProto = valueInfoProto.type();
CV_Assert(typeProto.has_tensor_type());
opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
CV_Assert(tensor.has_shape());
opencv_onnx::TensorShapeProto tensorShape = tensor.shape();
MatShape inpShape(tensorShape.dim_size());
for (int j = 0; j < inpShape.size(); ++j)
{
inpShape[j] = tensorShape.dim(j).dim_value();
}
outShapes[valueInfoProto.name()] = inpShape;
}
std::string framework_name; std::string framework_name;
if (model_proto.has_producer_name()) { if (model_proto.has_producer_name()) {
...@@ -301,6 +351,7 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -301,6 +351,7 @@ void ONNXImporter::populateNet(Net dstNet)
// create map with network inputs (without const blobs) // create map with network inputs (without const blobs)
std::map<std::string, LayerInfo> layer_id; std::map<std::string, LayerInfo> layer_id;
std::map<std::string, LayerInfo>::iterator layerId; std::map<std::string, LayerInfo>::iterator layerId;
std::map<std::string, MatShape>::iterator shapeIt;
// fill map: push layer name, layer id and output id // fill map: push layer name, layer id and output id
std::vector<String> netInputs; std::vector<String> netInputs;
for (int j = 0; j < graph_proto.input_size(); j++) for (int j = 0; j < graph_proto.input_size(); j++)
...@@ -317,9 +368,9 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -317,9 +368,9 @@ void ONNXImporter::populateNet(Net dstNet)
LayerParams layerParams; LayerParams layerParams;
opencv_onnx::NodeProto node_proto; opencv_onnx::NodeProto node_proto;
for(int i = 0; i < layersSize; i++) for(int li = 0; li < layersSize; li++)
{ {
node_proto = graph_proto.node(i); node_proto = graph_proto.node(li);
layerParams = getLayerParams(node_proto); layerParams = getLayerParams(node_proto);
CV_Assert(node_proto.output_size() >= 1); CV_Assert(node_proto.output_size() >= 1);
layerParams.name = node_proto.output(0); layerParams.name = node_proto.output(0);
...@@ -598,6 +649,65 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -598,6 +649,65 @@ void ONNXImporter::populateNet(Net dstNet)
{ {
layerParams.type = "Padding"; layerParams.type = "Padding";
} }
else if (layer_type == "Shape")
{
CV_Assert(node_proto.input_size() == 1);
shapeIt = outShapes.find(node_proto.input(0));
CV_Assert(shapeIt != outShapes.end());
MatShape inpShape = shapeIt->second;
Mat shapeMat(inpShape.size(), 1, CV_32S);
for (int j = 0; j < inpShape.size(); ++j)
shapeMat.at<int>(j) = inpShape[j];
shapeMat.dims = 1;
constBlobs.insert(std::make_pair(layerParams.name, shapeMat));
continue;
}
else if (layer_type == "Gather")
{
CV_Assert(node_proto.input_size() == 2);
CV_Assert(layerParams.has("axis"));
Mat input = getBlob(node_proto, constBlobs, 0);
Mat indexMat = getBlob(node_proto, constBlobs, 1);
CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
int index = indexMat.at<int>(0);
int axis = layerParams.get<int>("axis");
std::vector<cv::Range> ranges(input.dims, Range::all());
ranges[axis] = Range(index, index + 1);
Mat out = input(ranges);
constBlobs.insert(std::make_pair(layerParams.name, out));
continue;
}
else if (layer_type == "Concat")
{
bool hasVariableInps = false;
for (int i = 0; i < node_proto.input_size(); ++i)
{
if (layer_id.find(node_proto.input(i)) != layer_id.end())
{
hasVariableInps = true;
break;
}
}
if (!hasVariableInps)
{
std::vector<Mat> inputs(node_proto.input_size()), concatenated;
for (size_t i = 0; i < inputs.size(); ++i)
{
inputs[i] = getBlob(node_proto, constBlobs, i);
}
Ptr<Layer> concat = ConcatLayer::create(layerParams);
runLayer(concat, inputs, concatenated);
CV_Assert(concatenated.size() == 1);
constBlobs.insert(std::make_pair(layerParams.name, concatenated[0]));
continue;
}
}
else else
{ {
for (int j = 0; j < node_proto.input_size(); j++) { for (int j = 0; j < node_proto.input_size(); j++) {
...@@ -609,12 +719,24 @@ void ONNXImporter::populateNet(Net dstNet) ...@@ -609,12 +719,24 @@ void ONNXImporter::populateNet(Net dstNet)
int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams);
layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0))); layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0)));
std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
for (int j = 0; j < node_proto.input_size(); j++) { for (int j = 0; j < node_proto.input_size(); j++) {
layerId = layer_id.find(node_proto.input(j)); layerId = layer_id.find(node_proto.input(j));
if (layerId != layer_id.end()) { if (layerId != layer_id.end()) {
dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j);
// Collect input shapes.
shapeIt = outShapes.find(node_proto.input(j));
CV_Assert(shapeIt != outShapes.end());
layerInpShapes.push_back(shapeIt->second);
} }
} }
// Compute shape of output blob for this layer.
Ptr<Layer> layer = dstNet.getLayer(id);
layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
CV_Assert(!layerOutShapes.empty());
outShapes[layerParams.name] = layerOutShapes[0];
} }
} }
......
...@@ -152,6 +152,7 @@ InfEngineBackendNet::InfEngineBackendNet() ...@@ -152,6 +152,7 @@ InfEngineBackendNet::InfEngineBackendNet()
{ {
targetDevice = InferenceEngine::TargetDevice::eCPU; targetDevice = InferenceEngine::TargetDevice::eCPU;
precision = InferenceEngine::Precision::FP32; precision = InferenceEngine::Precision::FP32;
hasNetOwner = false;
} }
InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
...@@ -162,6 +163,7 @@ InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) ...@@ -162,6 +163,7 @@ InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
outputs = net.getOutputsInfo(); outputs = net.getOutputsInfo();
layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly. layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly.
netOwner = net; netOwner = net;
hasNetOwner = true;
} }
void InfEngineBackendNet::Release() CV_NOEXCEPT void InfEngineBackendNet::Release() CV_NOEXCEPT
...@@ -178,12 +180,12 @@ void InfEngineBackendNet::setPrecision(InferenceEngine::Precision p) CV_NOEXCEPT ...@@ -178,12 +180,12 @@ void InfEngineBackendNet::setPrecision(InferenceEngine::Precision p) CV_NOEXCEPT
InferenceEngine::Precision InfEngineBackendNet::getPrecision() CV_NOEXCEPT InferenceEngine::Precision InfEngineBackendNet::getPrecision() CV_NOEXCEPT
{ {
return precision; return hasNetOwner ? netOwner.getPrecision() : precision;
} }
InferenceEngine::Precision InfEngineBackendNet::getPrecision() const CV_NOEXCEPT InferenceEngine::Precision InfEngineBackendNet::getPrecision() const CV_NOEXCEPT
{ {
return precision; return hasNetOwner ? netOwner.getPrecision() : precision;
} }
// Assume that outputs of network is unconnected blobs. // Assume that outputs of network is unconnected blobs.
......
...@@ -136,6 +136,9 @@ private: ...@@ -136,6 +136,9 @@ private:
InferenceEngine::InferRequest infRequest; InferenceEngine::InferRequest infRequest;
// In case of models from Model Optimizer we need to manage their lifetime. // In case of models from Model Optimizer we need to manage their lifetime.
InferenceEngine::CNNNetwork netOwner; InferenceEngine::CNNNetwork netOwner;
// There is no way to check if netOwner is initialized or not so we use
// a separate flag to determine if the model has been loaded from IR.
bool hasNetOwner;
std::string name; std::string name;
......
...@@ -471,6 +471,7 @@ TEST(Test_Caffe, shared_weights) ...@@ -471,6 +471,7 @@ TEST(Test_Caffe, shared_weights)
net.setInput(blob_1, "input_1"); net.setInput(blob_1, "input_1");
net.setInput(blob_2, "input_2"); net.setInput(blob_2, "input_2");
net.setPreferableBackend(DNN_BACKEND_OPENCV);
Mat sum = net.forward(); Mat sum = net.forward();
......
...@@ -306,7 +306,7 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc) ...@@ -306,7 +306,7 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
// batch size 1 // batch size 1
testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff); testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff);
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD) if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD)
#endif #endif
// batch size 2 // batch size 2
......
...@@ -166,7 +166,7 @@ TEST_P(Deconvolution, Accuracy) ...@@ -166,7 +166,7 @@ TEST_P(Deconvolution, Accuracy)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU && if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
dilation.width == 2 && dilation.height == 2) dilation.width == 2 && dilation.height == 2)
throw SkipTestException(""); throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU && if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
hasBias && group != 1) hasBias && group != 1)
throw SkipTestException("Test is disabled for OpenVINO 2018R4"); throw SkipTestException("Test is disabled for OpenVINO 2018R4");
......
...@@ -137,7 +137,7 @@ TEST_P(Test_Caffe_layers, Convolution) ...@@ -137,7 +137,7 @@ TEST_P(Test_Caffe_layers, Convolution)
TEST_P(Test_Caffe_layers, DeConvolution) TEST_P(Test_Caffe_layers, DeConvolution)
{ {
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU) if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU)
throw SkipTestException("Test is disabled for OpenVINO 2018R4"); throw SkipTestException("Test is disabled for OpenVINO 2018R4");
#endif #endif
...@@ -918,8 +918,11 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Val ...@@ -918,8 +918,11 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Val
// Using Intel's Model Optimizer generate .xml and .bin files: // Using Intel's Model Optimizer generate .xml and .bin files:
// ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \ // ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \
// -p FP32 -i -b ${batch_size} -o /path/to/output/folder // -p FP32 -i -b ${batch_size} -o /path/to/output/folder
TEST(Layer_Test_Convolution_DLDT, Accuracy) typedef testing::TestWithParam<Target> Layer_Test_Convolution_DLDT;
TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
{ {
Target targetId = GetParam();
Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt")); Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin")); Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
...@@ -930,6 +933,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy) ...@@ -930,6 +933,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
Mat outDefault = netDefault.forward(); Mat outDefault = netDefault.forward();
net.setInput(inp); net.setInput(inp);
net.setPreferableTarget(targetId);
if (targetId != DNN_TARGET_MYRIAD)
{
Mat out = net.forward(); Mat out = net.forward();
normAssert(outDefault, out); normAssert(outDefault, out);
...@@ -937,10 +944,18 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy) ...@@ -937,10 +944,18 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
std::vector<int> outLayers = net.getUnconnectedOutLayers(); std::vector<int> outLayers = net.getUnconnectedOutLayers();
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge"); ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat"); ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
} }
TEST(Layer_Test_Convolution_DLDT, setInput_uint8) TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
{ {
Target targetId = GetParam();
Mat inp = blobFromNPY(_tf("blob.npy")); Mat inp = blobFromNPY(_tf("blob.npy"));
Mat inputs[] = {Mat(inp.dims, inp.size, CV_8U), Mat()}; Mat inputs[] = {Mat(inp.dims, inp.size, CV_8U), Mat()};
...@@ -951,12 +966,25 @@ TEST(Layer_Test_Convolution_DLDT, setInput_uint8) ...@@ -951,12 +966,25 @@ TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
for (int i = 0; i < 2; ++i) for (int i = 0; i < 2; ++i)
{ {
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin")); Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
net.setPreferableTarget(targetId);
net.setInput(inputs[i]); net.setInput(inputs[i]);
if (targetId != DNN_TARGET_MYRIAD)
{
outs[i] = net.forward(); outs[i] = net.forward();
ASSERT_EQ(outs[i].type(), CV_32F); ASSERT_EQ(outs[i].type(), CV_32F);
} }
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
}
if (targetId != DNN_TARGET_MYRIAD)
normAssert(outs[0], outs[1]); normAssert(outs[0], outs[1]);
} }
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)));
// 1. Create a .prototxt file with the following network: // 1. Create a .prototxt file with the following network:
// layer { // layer {
...@@ -980,14 +1008,17 @@ TEST(Layer_Test_Convolution_DLDT, setInput_uint8) ...@@ -980,14 +1008,17 @@ TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
// net.save('/path/to/caffemodel') // net.save('/path/to/caffemodel')
// //
// 3. Convert using ModelOptimizer. // 3. Convert using ModelOptimizer.
typedef testing::TestWithParam<tuple<int, int> > Test_DLDT_two_inputs; typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
TEST_P(Test_DLDT_two_inputs, as_IR) TEST_P(Test_DLDT_two_inputs, as_IR)
{ {
int firstInpType = get<0>(GetParam()); int firstInpType = get<0>(GetParam());
int secondInpType = get<1>(GetParam()); int secondInpType = get<1>(GetParam());
// TODO: It looks like a bug in Inference Engine. Target targetId = get<2>(GetParam());
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018040000
if (secondInpType == CV_8U) if (secondInpType == CV_8U)
throw SkipTestException(""); throw SkipTestException("Test is enabled starts from OpenVINO 2018R4");
#endif
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin")); Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
int inpSize[] = {1, 2, 3}; int inpSize[] = {1, 2, 3};
...@@ -998,11 +1029,21 @@ TEST_P(Test_DLDT_two_inputs, as_IR) ...@@ -998,11 +1029,21 @@ TEST_P(Test_DLDT_two_inputs, as_IR)
net.setInput(firstInp, "data"); net.setInput(firstInp, "data");
net.setInput(secondInp, "second_input"); net.setInput(secondInp, "second_input");
net.setPreferableTarget(targetId);
if (targetId != DNN_TARGET_MYRIAD)
{
Mat out = net.forward(); Mat out = net.forward();
Mat ref; Mat ref;
cv::add(firstInp, secondInp, ref, Mat(), CV_32F); cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
normAssert(out, ref); normAssert(out, ref);
}
else
{
// An assertion is expected because the model is in FP32 format but
// Myriad plugin supports only FP16 models.
ASSERT_ANY_THROW(net.forward());
}
} }
TEST_P(Test_DLDT_two_inputs, as_backend) TEST_P(Test_DLDT_two_inputs, as_backend)
...@@ -1010,6 +1051,8 @@ TEST_P(Test_DLDT_two_inputs, as_backend) ...@@ -1010,6 +1051,8 @@ TEST_P(Test_DLDT_two_inputs, as_backend)
static const float kScale = 0.5f; static const float kScale = 0.5f;
static const float kScaleInv = 1.0f / kScale; static const float kScaleInv = 1.0f / kScale;
Target targetId = get<2>(GetParam());
Net net; Net net;
LayerParams lp; LayerParams lp;
lp.type = "Eltwise"; lp.type = "Eltwise";
...@@ -1018,9 +1061,9 @@ TEST_P(Test_DLDT_two_inputs, as_backend) ...@@ -1018,9 +1061,9 @@ TEST_P(Test_DLDT_two_inputs, as_backend)
int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp); // connect to a first input int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp); // connect to a first input
net.connect(0, 1, eltwiseId, 1); // connect to a second input net.connect(0, 1, eltwiseId, 1); // connect to a second input
int inpSize[] = {1, 2, 3}; int inpSize[] = {1, 2, 3, 4};
Mat firstInp(3, &inpSize[0], get<0>(GetParam())); Mat firstInp(4, &inpSize[0], get<0>(GetParam()));
Mat secondInp(3, &inpSize[0], get<1>(GetParam())); Mat secondInp(4, &inpSize[0], get<1>(GetParam()));
randu(firstInp, 0, 255); randu(firstInp, 0, 255);
randu(secondInp, 0, 255); randu(secondInp, 0, 255);
...@@ -1028,15 +1071,20 @@ TEST_P(Test_DLDT_two_inputs, as_backend) ...@@ -1028,15 +1071,20 @@ TEST_P(Test_DLDT_two_inputs, as_backend)
net.setInput(firstInp, "data", kScale); net.setInput(firstInp, "data", kScale);
net.setInput(secondInp, "second_input", kScaleInv); net.setInput(secondInp, "second_input", kScaleInv);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE); net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(targetId);
Mat out = net.forward(); Mat out = net.forward();
Mat ref; Mat ref;
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F); addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
normAssert(out, ref); // Output values are in range [0, 637.5].
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5;
normAssert(out, ref, "", l1, lInf);
} }
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine( INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine(
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F) Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE))
)); ));
class UnsupportedLayer : public Layer class UnsupportedLayer : public Layer
......
...@@ -162,6 +162,10 @@ TEST_P(Test_ONNX_layers, MultyInputs) ...@@ -162,6 +162,10 @@ TEST_P(Test_ONNX_layers, MultyInputs)
normAssert(ref, out, "", default_l1, default_lInf); normAssert(ref, out, "", default_l1, default_lInf);
} }
TEST_P(Test_ONNX_layers, DynamicReshape)
{
testONNXModels("dynamic_reshape");
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
......
...@@ -136,7 +136,7 @@ TEST_P(Test_Torch_layers, run_reshape_change_batch_size) ...@@ -136,7 +136,7 @@ TEST_P(Test_Torch_layers, run_reshape_change_batch_size)
TEST_P(Test_Torch_layers, run_reshape) TEST_P(Test_Torch_layers, run_reshape)
{ {
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is disabled for OpenVINO 2018R4"); throw SkipTestException("Test is disabled for OpenVINO 2018R4");
#endif #endif
...@@ -172,7 +172,7 @@ TEST_P(Test_Torch_layers, run_depth_concat) ...@@ -172,7 +172,7 @@ TEST_P(Test_Torch_layers, run_depth_concat)
TEST_P(Test_Torch_layers, run_deconv) TEST_P(Test_Torch_layers, run_deconv)
{ {
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is disabled for OpenVINO 2018R4"); throw SkipTestException("Test is disabled for OpenVINO 2018R4");
#endif #endif
......
...@@ -116,6 +116,31 @@ PERF_TEST_P(MatSize, equalizeHist, ...@@ -116,6 +116,31 @@ PERF_TEST_P(MatSize, equalizeHist,
} }
#undef MatSize #undef MatSize
typedef TestBaseWithParam< tuple<int, int> > Dim_Cmpmethod;
PERF_TEST_P(Dim_Cmpmethod, compareHist,
testing::Combine(testing::Values(1, 3),
testing::Values(HISTCMP_CORREL, HISTCMP_CHISQR, HISTCMP_INTERSECT, HISTCMP_BHATTACHARYYA, HISTCMP_CHISQR_ALT, HISTCMP_KL_DIV))
)
{
int dims = get<0>(GetParam());
int method = get<1>(GetParam());
int histSize[] = { 2048, 128, 64 };
Mat hist1(dims, histSize, CV_32FC1);
Mat hist2(dims, histSize, CV_32FC1);
randu(hist1, 0, 256);
randu(hist2, 0, 256);
declare.in(hist1.reshape(1, 256), hist2.reshape(1, 256));
TEST_CYCLE()
{
compareHist(hist1, hist2, method);
}
SANITY_CHECK_NOTHING();
}
typedef tuple<Size, double> Sz_ClipLimit_t; typedef tuple<Size, double> Sz_ClipLimit_t;
typedef TestBaseWithParam<Sz_ClipLimit_t> Sz_ClipLimit; typedef TestBaseWithParam<Sz_ClipLimit_t> Sz_ClipLimit;
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp" #include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp" #include "opencv2/core/openvx/ovx_defs.hpp"
...@@ -1938,10 +1939,6 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method ) ...@@ -1938,10 +1939,6 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
CV_Assert( it.planes[0].isContinuous() && it.planes[1].isContinuous() ); CV_Assert( it.planes[0].isContinuous() && it.planes[1].isContinuous() );
#if CV_SSE2
bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
const float* h1 = it.planes[0].ptr<float>(); const float* h1 = it.planes[0].ptr<float>();
...@@ -1961,50 +1958,63 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method ) ...@@ -1961,50 +1958,63 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
} }
else if( method == CV_COMP_CORREL ) else if( method == CV_COMP_CORREL )
{ {
#if CV_SSE2 #if CV_SIMD_64F
if (haveSIMD) v_float64 v_s1 = vx_setzero_f64();
v_float64 v_s2 = vx_setzero_f64();
v_float64 v_s11 = vx_setzero_f64();
v_float64 v_s12 = vx_setzero_f64();
v_float64 v_s22 = vx_setzero_f64();
for ( ; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{ {
__m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1; v_float32 v_a = vx_load(h1 + j);
__m128d v_s11 = v_s1, v_s22 = v_s1, v_s12 = v_s1; v_float32 v_b = vx_load(h2 + j);
for ( ; j <= len - 4; j += 4)
{
__m128 v_a = _mm_loadu_ps(h1 + j);
__m128 v_b = _mm_loadu_ps(h2 + j);
// 0-1 // 0-1
__m128d v_ad = _mm_cvtps_pd(v_a); v_float64 v_ad = v_cvt_f64(v_a);
__m128d v_bd = _mm_cvtps_pd(v_b); v_float64 v_bd = v_cvt_f64(v_b);
v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd)); v_s12 = v_muladd(v_ad, v_bd, v_s12);
v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad)); v_s11 = v_muladd(v_ad, v_ad, v_s11);
v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd)); v_s22 = v_muladd(v_bd, v_bd, v_s22);
v_s1 = _mm_add_pd(v_s1, v_ad); v_s1 += v_ad;
v_s2 = _mm_add_pd(v_s2, v_bd); v_s2 += v_bd;
// 2-3 // 2-3
v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8))); v_ad = v_cvt_f64_high(v_a);
v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8))); v_bd = v_cvt_f64_high(v_b);
v_s12 = _mm_add_pd(v_s12, _mm_mul_pd(v_ad, v_bd)); v_s12 = v_muladd(v_ad, v_bd, v_s12);
v_s11 = _mm_add_pd(v_s11, _mm_mul_pd(v_ad, v_ad)); v_s11 = v_muladd(v_ad, v_ad, v_s11);
v_s22 = _mm_add_pd(v_s22, _mm_mul_pd(v_bd, v_bd)); v_s22 = v_muladd(v_bd, v_bd, v_s22);
v_s1 = _mm_add_pd(v_s1, v_ad); v_s1 += v_ad;
v_s2 = _mm_add_pd(v_s2, v_bd); v_s2 += v_bd;
} }
s12 += v_reduce_sum(v_s12);
double CV_DECL_ALIGNED(16) ar[10]; s11 += v_reduce_sum(v_s11);
_mm_store_pd(ar, v_s12); s22 += v_reduce_sum(v_s22);
_mm_store_pd(ar + 2, v_s11); s1 += v_reduce_sum(v_s1);
_mm_store_pd(ar + 4, v_s22); s2 += v_reduce_sum(v_s2);
_mm_store_pd(ar + 6, v_s1); #elif CV_SIMD && 0 //Disable vectorization for CV_COMP_CORREL if f64 is unsupported due to low precision
_mm_store_pd(ar + 8, v_s2); v_float32 v_s1 = vx_setzero_f32();
v_float32 v_s2 = vx_setzero_f32();
s12 += ar[0] + ar[1]; v_float32 v_s11 = vx_setzero_f32();
s11 += ar[2] + ar[3]; v_float32 v_s12 = vx_setzero_f32();
s22 += ar[4] + ar[5]; v_float32 v_s22 = vx_setzero_f32();
s1 += ar[6] + ar[7]; for (; j <= len - v_float32::nlanes; j += v_float32::nlanes)
s2 += ar[8] + ar[9]; {
} v_float32 v_a = vx_load(h1 + j);
#endif v_float32 v_b = vx_load(h2 + j);
v_s12 = v_muladd(v_a, v_b, v_s12);
v_s11 = v_muladd(v_a, v_a, v_s11);
v_s22 = v_muladd(v_b, v_b, v_s22);
v_s1 += v_a;
v_s2 += v_b;
}
s12 += v_reduce_sum(v_s12);
s11 += v_reduce_sum(v_s11);
s22 += v_reduce_sum(v_s22);
s1 += v_reduce_sum(v_s1);
s2 += v_reduce_sum(v_s2);
#endif
for( ; j < len; j++ ) for( ; j < len; j++ )
{ {
double a = h1[j]; double a = h1[j];
...@@ -2019,67 +2029,68 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method ) ...@@ -2019,67 +2029,68 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
} }
else if( method == CV_COMP_INTERSECT ) else if( method == CV_COMP_INTERSECT )
{ {
#if CV_NEON #if CV_SIMD_64F
float32x4_t v_result = vdupq_n_f32(0.0f); v_float64 v_result = vx_setzero_f64();
for( ; j <= len - 4; j += 4 ) for ( ; j <= len - v_float32::nlanes; j += v_float32::nlanes)
v_result = vaddq_f32(v_result, vminq_f32(vld1q_f32(h1 + j), vld1q_f32(h2 + j)));
float CV_DECL_ALIGNED(16) ar[4];
vst1q_f32(ar, v_result);
result += ar[0] + ar[1] + ar[2] + ar[3];
#elif CV_SSE2
if (haveSIMD)
{ {
__m128d v_result = _mm_setzero_pd(); v_float32 v_src = v_min(vx_load(h1 + j), vx_load(h2 + j));
for ( ; j <= len - 4; j += 4) v_result += v_cvt_f64(v_src) + v_cvt_f64_high(v_src);
{
__m128 v_src = _mm_min_ps(_mm_loadu_ps(h1 + j),
_mm_loadu_ps(h2 + j));
v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src));
v_src = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8));
v_result = _mm_add_pd(v_result, _mm_cvtps_pd(v_src));
} }
result += v_reduce_sum(v_result);
double CV_DECL_ALIGNED(16) ar[2]; #elif CV_SIMD
_mm_store_pd(ar, v_result); v_float32 v_result = vx_setzero_f32();
result += ar[0] + ar[1]; for (; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{
v_float32 v_src = v_min(vx_load(h1 + j), vx_load(h2 + j));
v_result += v_src;
} }
#endif result += v_reduce_sum(v_result);
#endif
for( ; j < len; j++ ) for( ; j < len; j++ )
result += std::min(h1[j], h2[j]); result += std::min(h1[j], h2[j]);
} }
else if( method == CV_COMP_BHATTACHARYYA ) else if( method == CV_COMP_BHATTACHARYYA )
{ {
#if CV_SSE2 #if CV_SIMD_64F
if (haveSIMD) v_float64 v_s1 = vx_setzero_f64();
{ v_float64 v_s2 = vx_setzero_f64();
__m128d v_s1 = _mm_setzero_pd(), v_s2 = v_s1, v_result = v_s1; v_float64 v_result = vx_setzero_f64();
for ( ; j <= len - 4; j += 4) for ( ; j <= len - v_float32::nlanes; j += v_float32::nlanes)
{ {
__m128 v_a = _mm_loadu_ps(h1 + j); v_float32 v_a = vx_load(h1 + j);
__m128 v_b = _mm_loadu_ps(h2 + j); v_float32 v_b = vx_load(h2 + j);
__m128d v_ad = _mm_cvtps_pd(v_a); v_float64 v_ad = v_cvt_f64(v_a);
__m128d v_bd = _mm_cvtps_pd(v_b); v_float64 v_bd = v_cvt_f64(v_b);
v_s1 = _mm_add_pd(v_s1, v_ad); v_s1 += v_ad;
v_s2 = _mm_add_pd(v_s2, v_bd); v_s2 += v_bd;
v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd))); v_result += v_sqrt(v_ad * v_bd);
v_ad = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_a), 8))); v_ad = v_cvt_f64_high(v_a);
v_bd = _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_b), 8))); v_bd = v_cvt_f64_high(v_b);
v_s1 = _mm_add_pd(v_s1, v_ad); v_s1 += v_ad;
v_s2 = _mm_add_pd(v_s2, v_bd); v_s2 += v_bd;
v_result = _mm_add_pd(v_result, _mm_sqrt_pd(_mm_mul_pd(v_ad, v_bd))); v_result += v_sqrt(v_ad * v_bd);
} }
s1 += v_reduce_sum(v_s1);
double CV_DECL_ALIGNED(16) ar[6]; s2 += v_reduce_sum(v_s2);
_mm_store_pd(ar, v_s1); result += v_reduce_sum(v_result);
_mm_store_pd(ar + 2, v_s2); #elif CV_SIMD && 0 //Disable vectorization for CV_COMP_BHATTACHARYYA if f64 is unsupported due to low precision
_mm_store_pd(ar + 4, v_result); v_float32 v_s1 = vx_setzero_f32();
s1 += ar[0] + ar[1]; v_float32 v_s2 = vx_setzero_f32();
s2 += ar[2] + ar[3]; v_float32 v_result = vx_setzero_f32();
result += ar[4] + ar[5]; for (; j <= len - v_float32::nlanes; j += v_float32::nlanes)
} {
#endif v_float32 v_a = vx_load(h1 + j);
v_float32 v_b = vx_load(h2 + j);
v_s1 += v_a;
v_s2 += v_b;
v_result += v_sqrt(v_a * v_b);
}
s1 += v_reduce_sum(v_s1);
s2 += v_reduce_sum(v_s2);
result += v_reduce_sum(v_result);
#endif
for( ; j < len; j++ ) for( ; j < len; j++ )
{ {
double a = h1[j]; double a = h1[j];
......
...@@ -99,9 +99,7 @@ static void init_MFCreateDXGIDeviceManager() ...@@ -99,9 +99,7 @@ static void init_MFCreateDXGIDeviceManager()
pMFCreateDXGIDeviceManager_initialized = true; pMFCreateDXGIDeviceManager_initialized = true;
} }
#endif #endif
#if (WINVER >= 0x0602) // Available since Win 8 #pragma comment(lib, "Shlwapi.lib")
#pragma comment(lib, "MinCore_Downlevel")
#endif
#endif #endif
#include <mferror.h> #include <mferror.h>
......
...@@ -49,7 +49,7 @@ def getXCodeMajor(): ...@@ -49,7 +49,7 @@ def getXCodeMajor():
raise Exception("Failed to parse Xcode version") raise Exception("Failed to parse Xcode version")
class Builder: class Builder:
def __init__(self, opencv, contrib, dynamic, bitcodedisabled, exclude, targets): def __init__(self, opencv, contrib, dynamic, bitcodedisabled, exclude, enablenonfree, targets):
self.opencv = os.path.abspath(opencv) self.opencv = os.path.abspath(opencv)
self.contrib = None self.contrib = None
if contrib: if contrib:
...@@ -61,6 +61,7 @@ class Builder: ...@@ -61,6 +61,7 @@ class Builder:
self.dynamic = dynamic self.dynamic = dynamic
self.bitcodedisabled = bitcodedisabled self.bitcodedisabled = bitcodedisabled
self.exclude = exclude self.exclude = exclude
self.enablenonfree = enablenonfree
self.targets = targets self.targets = targets
def getBD(self, parent, t): def getBD(self, parent, t):
...@@ -138,7 +139,9 @@ class Builder: ...@@ -138,7 +139,9 @@ class Builder:
"-DBUILD_SHARED_LIBS=ON", "-DBUILD_SHARED_LIBS=ON",
"-DCMAKE_MACOSX_BUNDLE=ON", "-DCMAKE_MACOSX_BUNDLE=ON",
"-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO", "-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO",
] if self.dynamic else []) ] if self.dynamic else []) + ([
"-DOPENCV_ENABLE_NONFREE=ON"
] if self.enablenonfree else [])
if len(self.exclude) > 0: if len(self.exclude) > 0:
args += ["-DBUILD_opencv_world=OFF"] if not self.dynamic else [] args += ["-DBUILD_opencv_world=OFF"] if not self.dynamic else []
...@@ -286,6 +289,7 @@ if __name__ == "__main__": ...@@ -286,6 +289,7 @@ if __name__ == "__main__":
parser.add_argument('--iphoneos_deployment_target', default=os.environ.get('IPHONEOS_DEPLOYMENT_TARGET', IPHONEOS_DEPLOYMENT_TARGET), help='specify IPHONEOS_DEPLOYMENT_TARGET') parser.add_argument('--iphoneos_deployment_target', default=os.environ.get('IPHONEOS_DEPLOYMENT_TARGET', IPHONEOS_DEPLOYMENT_TARGET), help='specify IPHONEOS_DEPLOYMENT_TARGET')
parser.add_argument('--iphoneos_archs', default='armv7,armv7s,arm64', help='select iPhoneOS target ARCHS') parser.add_argument('--iphoneos_archs', default='armv7,armv7s,arm64', help='select iPhoneOS target ARCHS')
parser.add_argument('--iphonesimulator_archs', default='i386,x86_64', help='select iPhoneSimulator target ARCHS') parser.add_argument('--iphonesimulator_archs', default='i386,x86_64', help='select iPhoneSimulator target ARCHS')
parser.add_argument('--enable_nonfree', default=False, dest='enablenonfree', action='store_true', help='enable non-free modules (disabled by default)')
args = parser.parse_args() args = parser.parse_args()
os.environ['IPHONEOS_DEPLOYMENT_TARGET'] = args.iphoneos_deployment_target os.environ['IPHONEOS_DEPLOYMENT_TARGET'] = args.iphoneos_deployment_target
...@@ -295,7 +299,7 @@ if __name__ == "__main__": ...@@ -295,7 +299,7 @@ if __name__ == "__main__":
iphonesimulator_archs = args.iphonesimulator_archs.split(',') iphonesimulator_archs = args.iphonesimulator_archs.split(',')
print('Using iPhoneSimulator ARCHS=' + str(iphonesimulator_archs)) print('Using iPhoneSimulator ARCHS=' + str(iphonesimulator_archs))
b = iOSBuilder(args.opencv, args.contrib, args.dynamic, args.bitcodedisabled, args.without, b = iOSBuilder(args.opencv, args.contrib, args.dynamic, args.bitcodedisabled, args.without, args.enablenonfree,
[ [
(iphoneos_archs, "iPhoneOS"), (iphoneos_archs, "iPhoneOS"),
] if os.environ.get('BUILD_PRECOMMIT', None) else ] if os.environ.get('BUILD_PRECOMMIT', None) else
......
...@@ -38,9 +38,10 @@ if __name__ == "__main__": ...@@ -38,9 +38,10 @@ if __name__ == "__main__":
parser.add_argument('--opencv', metavar='DIR', default=folder, help='folder with opencv repository (default is "../.." relative to script location)') parser.add_argument('--opencv', metavar='DIR', default=folder, help='folder with opencv repository (default is "../.." relative to script location)')
parser.add_argument('--contrib', metavar='DIR', default=None, help='folder with opencv_contrib repository (default is "None" - build only main framework)') parser.add_argument('--contrib', metavar='DIR', default=None, help='folder with opencv_contrib repository (default is "None" - build only main framework)')
parser.add_argument('--without', metavar='MODULE', default=[], action='append', help='OpenCV modules to exclude from the framework') parser.add_argument('--without', metavar='MODULE', default=[], action='append', help='OpenCV modules to exclude from the framework')
parser.add_argument('--enable_nonfree', default=False, dest='enablenonfree', action='store_true', help='enable non-free modules (disabled by default)')
args = parser.parse_args() args = parser.parse_args()
b = OSXBuilder(args.opencv, args.contrib, False, False, args.without, b = OSXBuilder(args.opencv, args.contrib, False, False, args.without, args.enablenonfree,
[ [
(["x86_64"], "MacOSX") (["x86_64"], "MacOSX")
]) ])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment