Commit babd21c7 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #9823 from alalek:dnn_halide_bypass_tbb_threads

parents 1ea1ff19 3935e136
...@@ -589,33 +589,7 @@ struct Net::Impl ...@@ -589,33 +589,7 @@ struct Net::Impl
return wrapper; return wrapper;
} }
class HalideCompiler : public ParallelLoopBody #ifdef HAVE_HALIDE
{
public:
HalideCompiler(const MapIdToLayerData& layers_, int preferableTarget_)
: layers(&layers_), preferableTarget(preferableTarget_) {}
void operator()(const Range& r) const
{
MapIdToLayerData::const_iterator it = layers->begin();
for (int i = 0; i < r.start && it != layers->end(); ++i, ++it) {}
for (int i = r.start; i < r.end && it != layers->end(); ++i, ++it)
{
const LayerData &ld = it->second;
Ptr<Layer> layer = ld.layerInstance;
bool skip = ld.skipFlags.find(DNN_BACKEND_HALIDE)->second;
if (layer->supportBackend(DNN_BACKEND_HALIDE) && !skip)
{
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
}
}
}
private:
const MapIdToLayerData* layers;
int preferableTarget;
};
void compileHalide() void compileHalide()
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
...@@ -623,8 +597,8 @@ struct Net::Impl ...@@ -623,8 +597,8 @@ struct Net::Impl
CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
HalideScheduler scheduler(halideConfigFile); HalideScheduler scheduler(halideConfigFile);
MapIdToLayerData::iterator it; std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
for (it = layers.begin(); it != layers.end(); ++it) for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
{ {
LayerData &ld = it->second; LayerData &ld = it->second;
Ptr<Layer> layer = ld.layerInstance; Ptr<Layer> layer = ld.layerInstance;
...@@ -639,10 +613,30 @@ struct Net::Impl ...@@ -639,10 +613,30 @@ struct Net::Impl
ld.inputBlobs, ld.outputBlobs, ld.inputBlobs, ld.outputBlobs,
preferableTarget); preferableTarget);
} }
compileList.emplace_back(ld);
} }
} }
parallel_for_(Range(0, layers.size()), HalideCompiler(layers, preferableTarget)); std::atomic<int> progress(0);
auto fn = ([&] () -> void
{
for (;;)
{
int id = progress.fetch_add(1);
if ((size_t)id >= compileList.size())
return;
const LayerData& ld = compileList[id].get();
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
}
});
size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
std::vector<std::thread> threads(num_threads - 1);
for (auto& t: threads) t = std::thread(fn);
fn(); // process own tasks
for (auto& t: threads) t.join();
} }
#endif
void clear() void clear()
{ {
...@@ -692,10 +686,12 @@ struct Net::Impl ...@@ -692,10 +686,12 @@ struct Net::Impl
if (!netWasAllocated ) if (!netWasAllocated )
{ {
// If user didn't call compileHalide() between #ifdef HAVE_HALIDE
// setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
if (preferableBackend == DNN_BACKEND_HALIDE) if (preferableBackend == DNN_BACKEND_HALIDE)
compileHalide(); compileHalide();
#else
CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
#endif
} }
netWasAllocated = true; netWasAllocated = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment