|
|
|
@ -589,33 +589,7 @@ struct Net::Impl |
|
|
|
|
return wrapper; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
class HalideCompiler : public ParallelLoopBody |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
HalideCompiler(const MapIdToLayerData& layers_, int preferableTarget_) |
|
|
|
|
: layers(&layers_), preferableTarget(preferableTarget_) {} |
|
|
|
|
|
|
|
|
|
void operator()(const Range& r) const |
|
|
|
|
{ |
|
|
|
|
MapIdToLayerData::const_iterator it = layers->begin(); |
|
|
|
|
for (int i = 0; i < r.start && it != layers->end(); ++i, ++it) {} |
|
|
|
|
for (int i = r.start; i < r.end && it != layers->end(); ++i, ++it) |
|
|
|
|
{ |
|
|
|
|
const LayerData &ld = it->second; |
|
|
|
|
Ptr<Layer> layer = ld.layerInstance; |
|
|
|
|
bool skip = ld.skipFlags.find(DNN_BACKEND_HALIDE)->second; |
|
|
|
|
if (layer->supportBackend(DNN_BACKEND_HALIDE) && !skip) |
|
|
|
|
{ |
|
|
|
|
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; |
|
|
|
|
dnn::compileHalide(ld.outputBlobs, node, preferableTarget); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
private: |
|
|
|
|
const MapIdToLayerData* layers; |
|
|
|
|
int preferableTarget; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_HALIDE |
|
|
|
|
void compileHalide() |
|
|
|
|
{ |
|
|
|
|
CV_TRACE_FUNCTION(); |
|
|
|
@ -623,8 +597,8 @@ struct Net::Impl |
|
|
|
|
CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); |
|
|
|
|
|
|
|
|
|
HalideScheduler scheduler(halideConfigFile); |
|
|
|
|
MapIdToLayerData::iterator it; |
|
|
|
|
for (it = layers.begin(); it != layers.end(); ++it) |
|
|
|
|
std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64); |
|
|
|
|
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) |
|
|
|
|
{ |
|
|
|
|
LayerData &ld = it->second; |
|
|
|
|
Ptr<Layer> layer = ld.layerInstance; |
|
|
|
@ -639,10 +613,30 @@ struct Net::Impl |
|
|
|
|
ld.inputBlobs, ld.outputBlobs, |
|
|
|
|
preferableTarget); |
|
|
|
|
} |
|
|
|
|
compileList.emplace_back(ld); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
parallel_for_(Range(0, layers.size()), HalideCompiler(layers, preferableTarget)); |
|
|
|
|
std::atomic<int> progress(0); |
|
|
|
|
auto fn = ([&] () -> void |
|
|
|
|
{ |
|
|
|
|
for (;;) |
|
|
|
|
{ |
|
|
|
|
int id = progress.fetch_add(1); |
|
|
|
|
if ((size_t)id >= compileList.size()) |
|
|
|
|
return; |
|
|
|
|
const LayerData& ld = compileList[id].get(); |
|
|
|
|
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; |
|
|
|
|
dnn::compileHalide(ld.outputBlobs, node, preferableTarget); |
|
|
|
|
} |
|
|
|
|
}); |
|
|
|
|
size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); |
|
|
|
|
num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); |
|
|
|
|
std::vector<std::thread> threads(num_threads - 1); |
|
|
|
|
for (auto& t: threads) t = std::thread(fn); |
|
|
|
|
fn(); // process own tasks
|
|
|
|
|
for (auto& t: threads) t.join(); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
void clear() |
|
|
|
|
{ |
|
|
|
@ -692,10 +686,12 @@ struct Net::Impl |
|
|
|
|
|
|
|
|
|
if (!netWasAllocated ) |
|
|
|
|
{ |
|
|
|
|
// If user didn't call compileHalide() between
|
|
|
|
|
// setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
|
|
|
|
|
#ifdef HAVE_HALIDE |
|
|
|
|
if (preferableBackend == DNN_BACKEND_HALIDE) |
|
|
|
|
compileHalide(); |
|
|
|
|
#else |
|
|
|
|
CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
netWasAllocated = true; |
|
|
|
|