Commit b505cf84 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #16096 from YashasSamaga:cuda4dnn-region-optimize

parents 476a0273 dd3f517f
This diff is collapsed.
...@@ -13,19 +13,12 @@ ...@@ -13,19 +13,12 @@
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T> template <class T>
void sigmoid_strided(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, std::size_t n, std::size_t stride, std::size_t offset); void region(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
template <class T>
void softmax_strided(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, std::size_t n, std::size_t stride, std::size_t offset);
template <class T>
void region_finalize(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
T object_prob_cutoff, T class_prob_cutoff, T object_prob_cutoff, T class_prob_cutoff,
std::size_t height_norm, std::size_t width_norm, std::size_t boxes_per_cell, std::size_t box_size,
std::size_t rows, std::size_t cols, std::size_t rows, std::size_t cols,
std::size_t boxes_per_cell, std::size_t height_norm, std::size_t width_norm,
std::size_t box_size, bool if_true_sigmoid_else_softmax);
std::size_t classes);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */ }}}} /* namespace cv::dnn::cuda4dnn::kernels */
......
...@@ -102,21 +102,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { ...@@ -102,21 +102,21 @@ namespace cv { namespace dnn { namespace cuda4dnn {
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>(); auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan(); auto output = output_wrapper->getSpan();
csl::memcpy<T>(output.get(), input.get(), output.size(), stream);
auto rows = input.get_axis_size(1); auto rows = input.get_axis_size(1);
auto cols = input.get_axis_size(2); auto cols = input.get_axis_size(2);
auto cell_box_size = classes + 4 + 1; auto cell_box_size = classes + 4 + 1;
/* we squash class scores into probabilities using softmax or sigmoid */ /* we squash class scores into probabilities using softmax or sigmoid */
if (squash_type == SquashMethod::SOFTMAX) bool if_true_sigmoid_else_softmax = (squash_type == SquashMethod::SIGMOID);
kernels::softmax_strided<T>(stream, output, input, classes, cell_box_size, 5);
else if (squash_type == SquashMethod::SIGMOID) kernels::region<T>(stream, output, input, biasTensor,
kernels::sigmoid_strided<T>(stream, output, input, classes, cell_box_size, 5); object_prob_cutoff, class_prob_cutoff,
boxes_per_cell, cell_box_size,
kernels::region_finalize<T>(stream, output, input, biasTensor, object_prob_cutoff, class_prob_cutoff, rows, cols,
height_norm, width_norm, rows, cols, boxes_per_cell, cell_box_size, classes); height_norm, width_norm,
if_true_sigmoid_else_softmax
);
if (nms_iou_threshold > 0) { if (nms_iou_threshold > 0) {
auto output_mat = output_wrapper->getMutableHostMat(); auto output_mat = output_wrapper->getMutableHostMat();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment