Merge remote-tracking branch 'upstream/3.4' into merge-3.4

51e54305 · Alexander Alekhin · 000a13b6 · 9e3b6a22 · 51e54305 · 51e54305
Commit 51e54305 authored May 10, 2018 by Alexander Alekhin
20 changed files
--- a/doc/tutorials/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.markdown
+++ b/doc/tutorials/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.markdown
@@ -69,7 +69,7 @@ CvMat* mI        =  &I.operator CvMat();
 @endcode
 One of the biggest complaints of the C interface is that it leaves all the memory management to you.
 You need to figure out when it is safe to release your unused objects and make sure you do so before
-the program finishes or you could have troublesome memory leeks. To work around this issue in OpenCV
+the program finishes or you could have troublesome memory leaks. To work around this issue in OpenCV
 there is introduced a sort of smart pointer. This will automatically release the object when it's no
 longer in use. To use this declare the pointers as a specialization of the *Ptr* :
 @code{.cpp}

--- a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
+++ b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@@ -29,7 +29,7 @@ Execute in webcam:
 @code{.bash}
-$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392
+$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --rgb
 @endcode
@@ -37,7 +37,7 @@ Execute with image or video file:
 @code{.bash}
-$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --input=[PATH-TO-IMAGE-OR-VIDEO-FILE]
+$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --input=[PATH-TO-IMAGE-OR-VIDEO-FILE] --rgb
 @endcode

--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@@ -786,10 +786,14 @@ template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
 { return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
 template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
 { return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
+{ return a; } \
 template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
 { return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
 template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
-{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); }
+{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
+{ CV_UNUSED(b); return a; }
 OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8)
 OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8)

--- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
@@ -589,7 +589,7 @@ inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
    return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
 }
-#define OPENCV_IMPL_VSX_ROTATE_64(_Tpvec, suffix, rg1, rg2)       \
+#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2)   \
 template<int imm>                                                 \
 inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
 {                                                                 \
@@ -598,11 +598,13 @@ inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
    return imm ? b : a;                                           \
 }
-OPENCV_IMPL_VSX_ROTATE_64(v_int64x2,  right, a, b)
+#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec)    \
-OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
+OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left,  b, a)  \
+OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b)
-OPENCV_IMPL_VSX_ROTATE_64(v_int64x2,  left, b, a)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
-OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
 /* Extract */
 template<int s, typename _Tpvec>
@@ -716,26 +718,33 @@ inline int v_signmask(const v_uint64x2& a)
 inline int v_signmask(const v_float64x2& a)
 { return v_signmask(v_reinterpret_as_s64(a)); }
 template<typename _Tpvec>
 inline bool v_check_all(const _Tpvec& a)
-{ return vec_all_lt(a.val, _Tpvec().val);}
+{ return vec_all_lt(a.val, _Tpvec().val); }
-inline bool v_check_all(const v_uint8x16 &a)
+inline bool v_check_all(const v_uint8x16& a)
 { return v_check_all(v_reinterpret_as_s8(a)); }
-inline bool v_check_all(const v_uint16x8 &a)
+inline bool v_check_all(const v_uint16x8& a)
 { return v_check_all(v_reinterpret_as_s16(a)); }
-inline bool v_check_all(const v_uint32x4 &a)
+inline bool v_check_all(const v_uint32x4& a)
+{ return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_float32x4& a)
 { return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_s64(a)); }
 template<typename _Tpvec>
 inline bool v_check_any(const _Tpvec& a)
-{ return vec_any_lt(a.val, _Tpvec().val);}
+{ return vec_any_lt(a.val, _Tpvec().val); }
-inline bool v_check_any(const v_uint8x16 &a)
+inline bool v_check_any(const v_uint8x16& a)
 { return v_check_any(v_reinterpret_as_s8(a)); }
-inline bool v_check_any(const v_uint16x8 &a)
+inline bool v_check_any(const v_uint16x8& a)
 { return v_check_any(v_reinterpret_as_s16(a)); }
-inline bool v_check_any(const v_uint32x4 &a)
+inline bool v_check_any(const v_uint32x4& a)
+{ return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_float32x4& a)
 { return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_s64(a)); }
 ////////// Other math /////////

--- a/modules/core/include/opencv2/core/operations.hpp
+++ b/modules/core/include/opencv2/core/operations.hpp
@@ -194,8 +194,8 @@ Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
 {
    Matx<_Tp, n, m> b;
    bool ok;
-    if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
+    if( m == n && (method == DECOMP_LU || method == DECOMP_CHOLESKY) )
-        ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method);
+        ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*reinterpret_cast<const Matx<_Tp, m, m>*>(this), reinterpret_cast<Matx<_Tp, m, m>&>(b), method);
    else
    {
        Mat A(*this, false), B(b, false);

--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -837,17 +837,28 @@ template<typename R> struct TheTest
        Data<R> resC = v_rotate_right<s>(a);
        Data<R> resD = v_rotate_right<s>(a, b);
+        Data<R> resE = v_rotate_left<s>(a);
+        Data<R> resF = v_rotate_left<s>(a, b);
        for (int i = 0; i < R::nlanes; ++i)
        {
            if (i + s >= R::nlanes)
            {
                EXPECT_EQ((LaneType)0, resC[i]);
                EXPECT_EQ(dataB[i - R::nlanes + s], resD[i]);
+                EXPECT_EQ((LaneType)0, resE[i - R::nlanes + s]);
+                EXPECT_EQ(dataB[i], resF[i - R::nlanes + s]);
            }
            else
+            {
                EXPECT_EQ(dataA[i + s], resC[i]);
-        }
+                EXPECT_EQ(dataA[i + s], resD[i]);
+                EXPECT_EQ(dataA[i], resE[i + s]);
+                EXPECT_EQ(dataA[i], resF[i + s]);
+            }
+        }
        return *this;
    }

--- a/modules/cudafilters/src/cuda/column_filter.hpp
+++ b/modules/cudafilters/src/cuda/column_filter.hpp
@@ -52,10 +52,8 @@ namespace column_filter
 {
    #define MAX_KERNEL_SIZE 32
-    __constant__ float c_kernel[MAX_KERNEL_SIZE];
    template <int KSIZE, typename T, typename D, typename B>
-    __global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
+    __global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const float* kernel, const int anchor, const B brd)
    {
        #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
            const int BLOCK_DIM_X = 16;
@@ -135,7 +133,7 @@ namespace column_filter
                #pragma unroll
                for (int k = 0; k < KSIZE; ++k)
-                    sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
+                    sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * kernel[k];
                dst(y, x) = saturate_cast<D>(sum);
            }
@@ -143,7 +141,7 @@ namespace column_filter
    }
    template <int KSIZE, typename T, typename D, template<typename> class B>
-    void caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
+    void caller(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream)
    {
        int BLOCK_DIM_X;
        int BLOCK_DIM_Y;
@@ -167,7 +165,7 @@ namespace column_filter
        B<T> brd(src.rows);
-        linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
+        linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, kernel, anchor, brd);
        cudaSafeCall( cudaGetLastError() );
@@ -181,7 +179,7 @@ namespace filter
    template <typename T, typename D>
    void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
    {
-        typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
+        typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream);
        static const caller_t callers[5][33] =
        {
@@ -362,11 +360,6 @@ namespace filter
            }
        };
-        if (stream == 0)
+        callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, kernel, anchor, cc, stream);
-            cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
-        else
-            cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
-        callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
    }
 }
--- a/modules/cudafilters/src/cuda/row_filter.hpp
+++ b/modules/cudafilters/src/cuda/row_filter.hpp
@@ -52,10 +52,8 @@ namespace row_filter
 {
    #define MAX_KERNEL_SIZE 32
-    __constant__ float c_kernel[MAX_KERNEL_SIZE];
    template <int KSIZE, typename T, typename D, typename B>
-    __global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
+    __global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const float* kernel, const int anchor, const B brd)
    {
        #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
            const int BLOCK_DIM_X = 32;
@@ -135,7 +133,7 @@ namespace row_filter
                #pragma unroll
                for (int k = 0; k < KSIZE; ++k)
-                    sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
+                    sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * kernel[k];
                dst(y, x) = saturate_cast<D>(sum);
            }
@@ -143,7 +141,7 @@ namespace row_filter
    }
    template <int KSIZE, typename T, typename D, template<typename> class B>
-    void caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
+    void caller(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream)
    {
        int BLOCK_DIM_X;
        int BLOCK_DIM_Y;
@@ -167,7 +165,7 @@ namespace row_filter
        B<T> brd(src.cols);
-        linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
+        linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, kernel, anchor, brd);
        cudaSafeCall( cudaGetLastError() );
        if (stream == 0)
@@ -180,7 +178,7 @@ namespace filter
    template <typename T, typename D>
    void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
    {
-        typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
+        typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream);
        static const caller_t callers[5][33] =
        {
@@ -361,11 +359,6 @@ namespace filter
            }
        };
-        if (stream == 0)
+        callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, kernel, anchor, cc, stream);
-            cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
-        else
-            cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
-        callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
    }
 }
--- a/modules/cudaimgproc/src/canny.cpp
+++ b/modules/cudaimgproc/src/canny.cpp
@@ -58,9 +58,9 @@ namespace canny
    void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream);
-    void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream);
+    void edgesHysteresisLocal(PtrStepSzi map, short2* st1, int* d_counter, cudaStream_t stream);
-    void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream);
+    void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, int* d_counter, cudaStream_t stream);
    void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream);
 }
@@ -127,6 +127,8 @@ namespace
        Ptr<Filter> filterDX_, filterDY_;
 #endif
        int old_apperture_size_;
+        int *d_counter;
    };
    void CannyImpl::detect(InputArray _image, OutputArray _edges, Stream& stream)
@@ -218,12 +220,17 @@ namespace
    void CannyImpl::CannyCaller(GpuMat& edges, Stream& stream)
    {
-        map_.setTo(Scalar::all(0));
+        map_.setTo(Scalar::all(0), stream);
        canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_), StreamAccessor::getStream(stream));
-        canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), StreamAccessor::getStream(stream));
+        cudaSafeCall( cudaMalloc(&d_counter, sizeof(int)) );
+        canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), d_counter, StreamAccessor::getStream(stream));
+        canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), d_counter, StreamAccessor::getStream(stream));
-        canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), StreamAccessor::getStream(stream));
+        cudaSafeCall( cudaFree(d_counter) );
        canny::getEdges(map_, edges, StreamAccessor::getStream(stream));
    }

--- a/modules/cudaimgproc/src/cuda/canny.cu
+++ b/modules/cudaimgproc/src/cuda/canny.cu
--- a/modules/cudaimgproc/test/test_canny.cpp
+++ b/modules/cudaimgproc/test/test_canny.cpp
@@ -92,9 +92,66 @@ CUDA_TEST_P(Canny, Accuracy)
    EXPECT_MAT_SIMILAR(edges_gold, edges, 2e-2);
 }
+class CannyAsyncParallelLoopBody : public cv::ParallelLoopBody
+{
+public:
+    CannyAsyncParallelLoopBody(const cv::cuda::GpuMat& d_img_, cv::cuda::GpuMat* edges_, double low_thresh_, double high_thresh_, int apperture_size_, bool useL2gradient_)
+        : d_img(d_img_), edges(edges_), low_thresh(low_thresh_), high_thresh(high_thresh_), apperture_size(apperture_size_), useL2gradient(useL2gradient_) {}
+    ~CannyAsyncParallelLoopBody() {};
+    void operator()(const cv::Range& r) const
+    {
+        for (int i = r.start; i < r.end; i++) {
+            cv::cuda::Stream stream;
+            cv::Ptr<cv::cuda::CannyEdgeDetector> canny = cv::cuda::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
+            canny->detect(d_img, edges[i], stream);
+            stream.waitForCompletion();
+        }
+    }
+protected:
+    const cv::cuda::GpuMat& d_img;
+    cv::cuda::GpuMat* edges;
+    double low_thresh;
+    double high_thresh;
+    int apperture_size;
+    bool useL2gradient;
+};
+#define NUM_STREAMS 64
+CUDA_TEST_P(Canny, Async)
+{
+    if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_30))
+    {
+        throw SkipTestException("CUDA device doesn't support texture objects");
+    }
+    else
+    {
+        const cv::Mat img = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+        ASSERT_FALSE(img.empty());
+        const cv::cuda::GpuMat d_img_roi = loadMat(img, useRoi);
+        double low_thresh = 50.0;
+        double high_thresh = 100.0;
+        // Synchronous call
+        cv::Ptr<cv::cuda::CannyEdgeDetector> canny = cv::cuda::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
+        cv::cuda::GpuMat edges_gold;
+        canny->detect(d_img_roi, edges_gold);
+        // Asynchronous call
+        cv::cuda::GpuMat edges[NUM_STREAMS];
+        cv::parallel_for_(cv::Range(0, NUM_STREAMS), CannyAsyncParallelLoopBody(d_img_roi, edges, low_thresh, high_thresh, apperture_size, useL2gradient));
+        // Compare the results of synchronous call and asynchronous call
+        for (int i = 0; i < NUM_STREAMS; i++)
+            EXPECT_MAT_NEAR(edges_gold, edges[i], 0.0);
+    }
+ }
 INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, Canny, testing::Combine(
    ALL_DEVICES,
-    testing::Values(AppertureSize(3), AppertureSize(5)),
+    testing::Values(AppertureSize(3), AppertureSize(5), AppertureSize(7)),
    testing::Values(L2gradient(false), L2gradient(true)),
    WHOLE_SUBMAT));

--- a/modules/imgproc/src/color_hsv.cpp
+++ b/modules/imgproc/src/color_hsv.cpp
--- a/modules/photo/test/test_hdr.cpp
+++ b/modules/photo/test/test_hdr.cpp
@@ -213,7 +213,7 @@ TEST(Photo_MergeRobertson, regression)
    loadImage(test_path + "merge/robertson.hdr", expected);
    merge->process(images, result, times);
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__PPC64__)
    const float eps = 6.f;
 #else
    const float eps = 5.f;

--- a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp
@@ -48,6 +48,7 @@
 #endif
 #include "opencv2/core.hpp"
+#include "opencv2/core/cuda.hpp"
 namespace cv {
 namespace detail {

--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -870,7 +870,12 @@ bool CvCapture_FFMPEG::open( const char* _filename )
            int enc_width = enc->width;
            int enc_height = enc->height;
-            AVCodec *codec = avcodec_find_decoder(enc->codec_id);
+            AVCodec *codec;
+            if(av_dict_get(dict, "video_codec", NULL, 0) == NULL) {
+                codec = avcodec_find_decoder(enc->codec_id);
+            } else {
+                codec = avcodec_find_decoder_by_name(av_dict_get(dict, "video_codec", NULL, 0)->value);
+            }
            if (!codec ||
 #if LIBAVCODEC_VERSION_INT >= ((53<<16)+(8<<8)+0)
                avcodec_open2(enc, codec, NULL)

--- a/modules/videoio/src/cap_qt.cpp
+++ b/modules/videoio/src/cap_qt.cpp
--- a/samples/cpp/tutorial_code/dnn/custom_layers.cpp
+++ b/samples/cpp/tutorial_code/dnn/custom_layers.cpp
@@ -16,18 +16,18 @@ public:
    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
                                 const int requiredOutputs,
                                 std::vector<std::vector<int> > &outputs,
-                                 std::vector<std::vector<int> > &internals) const;
+                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE;
    //! [MyLayer::getMemoryShapes]
    //! [MyLayer::forward]
-    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals);
+    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE;
    //! [MyLayer::forward]
    //! [MyLayer::finalize]
-    virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs);
+    virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs) CV_OVERRIDE;
    //! [MyLayer::finalize]
-    virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals);
+    virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE;
 };
 //! [A custom layer interface]
@@ -49,7 +49,7 @@ public:
    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
                                 const int requiredOutputs,
                                 std::vector<std::vector<int> > &outputs,
-                                 std::vector<std::vector<int> > &internals) const
+                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
    {
        CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
        std::vector<int> outShape(4);
@@ -62,7 +62,7 @@ public:
    }
    // Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
-    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
+    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE
    {
        CV_UNUSED(internals);
        cv::Mat& inp = *inputs[0];
@@ -105,7 +105,7 @@ public:
        }
    }
-    virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
+    virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) CV_OVERRIDE {}
 private:
    int outWidth, outHeight;
@@ -132,7 +132,7 @@ public:
    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
                                 const int requiredOutputs,
                                 std::vector<std::vector<int> > &outputs,
-                                 std::vector<std::vector<int> > &internals) const
+                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
    {
        CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
        std::vector<int> outShape(4);
@@ -146,7 +146,7 @@ public:
    // This implementation is based on a reference implementation from
    // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
-    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
+    virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE
    {
        CV_UNUSED(internals);
        cv::Mat& inp = *inputs[0];
@@ -187,7 +187,7 @@ public:
        }
    }
-    virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
+    virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) CV_OVERRIDE {}
 private:
    static inline int offset(const cv::MatSize& size, int c, int x, int y, int b)

--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@@ -7,12 +7,13 @@
 const char* keys =
    "{ help  h     | | Print help message. }"
-    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
+    "{ device      |  0 | camera device number. }"
+    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
    "{ model m     | | Path to a binary file of model contains trained weights. "
                      "It could be a file with extensions .caffemodel (Caffe), "
-                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
+                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).}"
    "{ config c    | | Path to a text file of model contains network configuration. "
-                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
+                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).}"
    "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
    "{ classes     | | Optional path to a text file with names of classes to label detected objects. }"
    "{ mean        | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
@@ -91,7 +92,7 @@ int main(int argc, char** argv)
    if (parser.has("input"))
        cap.open(parser.get<String>("input"));
    else
-        cap.open(0);
+        cap.open(parser.get<int>("device"));
    // Process frames.
    Mat frame, blob;

--- a/samples/dnn/openpose.cpp
+++ b/samples/dnn/openpose.cpp
@@ -61,12 +61,16 @@ int main(int argc, char **argv)
        "{ p proto          |       | (required) model configuration, e.g. hand/pose.prototxt }"
        "{ m model          |       | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
        "{ i image          |       | (required) path to image file (containing a single person, or hand) }"
+        "{ width            |  368  | Preprocess input image by resizing to a specific width. }"
+        "{ height           |  368  | Preprocess input image by resizing to a specific height. }"
        "{ t threshold      |  0.1  | threshold or confidence value for the heatmap }"
    );
    String modelTxt = parser.get<string>("proto");
    String modelBin = parser.get<string>("model");
    String imageFile = parser.get<String>("image");
+    int W_in = parser.get<int>("width");
+    int H_in = parser.get<int>("height");
    float thresh = parser.get<float>("threshold");
    if (parser.get<bool>("help") || modelTxt.empty() || modelBin.empty() || imageFile.empty())
    {
@@ -75,10 +79,6 @@ int main(int argc, char **argv)
        return 0;
    }
-    // fixed input size for the pretrained network
-    int W_in = 368;
-    int H_in = 368;
    // read the network model
    Net net = readNetFromCaffe(modelTxt, modelBin);

--- a/samples/dnn/segmentation.cpp
+++ b/samples/dnn/segmentation.cpp
@@ -7,12 +7,13 @@
 const char* keys =
    "{ help  h     | | Print help message. }"
-    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
+    "{ device      |  0 | camera device number. }"
+    "{ input i     | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
    "{ model m     | | Path to a binary file of model contains trained weights. "
                      "It could be a file with extensions .caffemodel (Caffe), "
-                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
+                      ".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet). }"
    "{ config c    | | Path to a text file of model contains network configuration. "
-                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
+                      "It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet). }"
    "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
    "{ classes     | | Optional path to a text file with names of classes. }"
    "{ colors      | | Optional path to a text file with colors for an every class. "
@@ -111,7 +112,7 @@ int main(int argc, char** argv)
    if (parser.has("input"))
        cap.open(parser.get<String>("input"));
    else
-        cap.open(0);
+        cap.open(parser.get<int>("device"));
    //! [Open a video file or an image file or a camera stream]
    // Process frames.