Commit 61969dc1 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #16171 from YashasSamaga:cuda4dnn-tensor-cores

parents 8f910d2d cf93df41
......@@ -224,6 +224,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
);
}
CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionGroupCount(descriptor, group_count));
if (std::is_same<T, half>::value)
CUDA4DNN_CHECK_CUDNN(cudnnSetConvolutionMathType(descriptor, CUDNN_TENSOR_OP_MATH));
} catch (...) {
/* cudnnDestroyConvolutionDescriptor will not fail for a valid desriptor object */
CUDA4DNN_CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(descriptor));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment