Commit b505cf84 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #16096 from YashasSamaga:cuda4dnn-region-optimize

parents 476a0273 dd3f517f
This diff is collapsed.
......@@ -13,19 +13,12 @@
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T>
void sigmoid_strided(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, std::size_t n, std::size_t stride, std::size_t offset);
template <class T>
void softmax_strided(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, std::size_t n, std::size_t stride, std::size_t offset);
template <class T>
void region_finalize(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
void region(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
T object_prob_cutoff, T class_prob_cutoff,
std::size_t height_norm, std::size_t width_norm,
std::size_t boxes_per_cell, std::size_t box_size,
std::size_t rows, std::size_t cols,
std::size_t boxes_per_cell,
std::size_t box_size,
std::size_t classes);
std::size_t height_norm, std::size_t width_norm,
bool if_true_sigmoid_else_softmax);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
......
......@@ -102,21 +102,21 @@ namespace cv { namespace dnn { namespace cuda4dnn {
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
csl::memcpy<T>(output.get(), input.get(), output.size(), stream);
auto rows = input.get_axis_size(1);
auto cols = input.get_axis_size(2);
auto cell_box_size = classes + 4 + 1;
/* we squash class scores into probabilities using softmax or sigmoid */
if (squash_type == SquashMethod::SOFTMAX)
kernels::softmax_strided<T>(stream, output, input, classes, cell_box_size, 5);
else if (squash_type == SquashMethod::SIGMOID)
kernels::sigmoid_strided<T>(stream, output, input, classes, cell_box_size, 5);
kernels::region_finalize<T>(stream, output, input, biasTensor, object_prob_cutoff, class_prob_cutoff,
height_norm, width_norm, rows, cols, boxes_per_cell, cell_box_size, classes);
bool if_true_sigmoid_else_softmax = (squash_type == SquashMethod::SIGMOID);
kernels::region<T>(stream, output, input, biasTensor,
object_prob_cutoff, class_prob_cutoff,
boxes_per_cell, cell_box_size,
rows, cols,
height_norm, width_norm,
if_true_sigmoid_else_softmax
);
if (nms_iou_threshold > 0) {
auto output_mat = output_wrapper->getMutableHostMat();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment