Commit 64a9e923 authored by Dmitry Kurtaev's avatar Dmitry Kurtaev Committed by Alexander Alekhin

Merge pull request #10466 from dkurt:reduce_umat_try_2

* UMat blobs are wrapped

* Replace getUMat and getMat at OpenCLBackendWrapper
parent b6075e11
...@@ -180,6 +180,96 @@ Mat blobFromImages(const std::vector<Mat>& images_, double scalefactor, Size siz ...@@ -180,6 +180,96 @@ Mat blobFromImages(const std::vector<Mat>& images_, double scalefactor, Size siz
return blob; return blob;
} }
class OpenCLBackendWrapper : public BackendWrapper
{
public:
OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
{
m.copyTo(umat);
host = &m;
hostDirty = false;
}
OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
: BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
{
Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
CV_Assert(!base.empty());
host = &m;
int shape[] = {1, (int)base->umat.total()};
umat = base->umat.reshape(1, 2, &shape[0])
.colRange(0, host->total())
.reshape(1, host->dims, &host->size[0]);
hostDirty = false;
}
static Ptr<BackendWrapper> create(Mat& m)
{
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
}
static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
{
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
}
static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper> >& wrappers)
{
const int numWrappers = wrappers.size();
std::vector<UMat> mats(wrappers.size());
for (int i = 0; i < numWrappers; ++i)
{
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
CV_Assert(!umatWrapper.empty());
umatWrapper->copyToDevice();
mats[i] = umatWrapper->umat;
}
return mats;
}
// Replaces all umats in wrappers to specific ones.
static void update(const std::vector<Ptr<BackendWrapper> >& wrappers,
const std::vector<UMat>& umats)
{
CV_Assert(wrappers.size() == umats.size());
for (int i = 0, n = umats.size(); i < n; ++i)
{
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
CV_Assert(!umatWrapper.empty());
umatWrapper->umat = umats[i];
}
}
~OpenCLBackendWrapper() {}
// Copies data from device to a host memory.
virtual void copyToHost()
{
umat.copyTo(*host);
}
virtual void setHostDirty()
{
hostDirty = true;
};
void copyToDevice()
{
if (hostDirty)
{
host->copyTo(umat);
hostDirty = false;
}
}
private:
UMat umat;
Mat* host;
bool hostDirty;
};
struct LayerPin struct LayerPin
{ {
int lid; int lid;
...@@ -233,14 +323,12 @@ struct LayerData ...@@ -233,14 +323,12 @@ struct LayerData
std::vector<LayerPin> consumers; std::vector<LayerPin> consumers;
std::vector<Ptr<BackendWrapper> > outputBlobsWrappers; std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
std::vector<Ptr<BackendWrapper> > inputBlobsWrappers; std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
std::vector<Ptr<BackendWrapper> > internalBlobsWrappers;
Ptr<Layer> layerInstance; Ptr<Layer> layerInstance;
std::vector<Mat> outputBlobs; std::vector<Mat> outputBlobs;
std::vector<Mat*> inputBlobs; std::vector<Mat*> inputBlobs;
std::vector<Mat> internals; std::vector<Mat> internals;
std::vector<UMat> umat_outputBlobs;
std::vector<UMat> umat_inputBlobs;
std::vector<UMat> umat_internals;
// Computation nodes of implemented backends (except DEFAULT). // Computation nodes of implemented backends (except DEFAULT).
std::map<int, Ptr<BackendNode> > backendNodes; std::map<int, Ptr<BackendNode> > backendNodes;
// Flag for skip layer computation for specific backend. // Flag for skip layer computation for specific backend.
...@@ -418,77 +506,21 @@ public: ...@@ -418,77 +506,21 @@ public:
} }
} }
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
{
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
{
UMat bestBlob;
LayerPin bestBlobPin;
std::map<LayerPin, UMat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt;
const int targetTotal = total(shape);
int bestBlobTotal = INT_MAX;
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
{
refIt = refCounter.find(hostIt->first);
// Use only blobs that had references before because if not,
// it might be used as output.
if (refIt != refCounter.end() && refIt->second == 0)
{
UMat& unusedBlob = hostIt->second;
if (unusedBlob.total() >= targetTotal &&
unusedBlob.total() < bestBlobTotal)
{
bestBlobPin = hostIt->first;
bestBlob = unusedBlob;
bestBlobTotal = unusedBlob.total();
}
}
}
if (!bestBlob.empty())
{
reuse(bestBlobPin, lp);
umat_dst.create(shape, CV_32F);
return;
}
}
{
// if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same.
umat_dst.create(shape, CV_32F);
addHost(lp, umat_dst);
}
}
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs) std::vector<LayerPin>& pinsForInternalBlobs)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
preferableTarget == DNN_TARGET_OPENCL);
pinsForInternalBlobs.clear(); pinsForInternalBlobs.clear();
std::vector<Mat>& outputBlobs = ld.outputBlobs, std::vector<Mat>& outputBlobs = ld.outputBlobs,
&internalBlobs = ld.internals; &internalBlobs = ld.internals;
std::vector<UMat>& umat_outputBlobs = ld.umat_outputBlobs,
&umat_internalBlobs = ld.umat_internals;
const ShapesVec& outShapes = layerShapes.out, const ShapesVec& outShapes = layerShapes.out,
internalShapes = layerShapes.internal; internalShapes = layerShapes.internal;
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
internalBlobs.resize(internalShapes.size()); internalBlobs.resize(internalShapes.size());
if (use_umat)
{
umat_outputBlobs.resize(std::max((size_t)1, outShapes.size()));
umat_internalBlobs.resize(internalShapes.size());
}
CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
...@@ -508,19 +540,14 @@ public: ...@@ -508,19 +540,14 @@ public:
ShapesVec shapes(outShapes); ShapesVec shapes(outShapes);
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
std::vector<Mat*> blobs; std::vector<Mat*> blobs;
std::vector<UMat*> umat_blobs;
for(int i = 0; i < outputBlobs.size(); i++) for(int i = 0; i < outputBlobs.size(); i++)
{ {
blobs.push_back(&outputBlobs[i]); blobs.push_back(&outputBlobs[i]);
if (use_umat)
umat_blobs.push_back(&umat_outputBlobs[i]);
} }
for(int i = 0; i < internalBlobs.size(); i++) for(int i = 0; i < internalBlobs.size(); i++)
{ {
blobs.push_back(&internalBlobs[i]); blobs.push_back(&internalBlobs[i]);
if (use_umat)
umat_blobs.push_back(&umat_internalBlobs[i]);
if (total(internalShapes[i])) if (total(internalShapes[i]))
{ {
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
...@@ -546,27 +573,12 @@ public: ...@@ -546,27 +573,12 @@ public:
LayerPin blobPin(ld.id, index); LayerPin blobPin(ld.id, index);
if (index < outShapes.size() && inPlace) if (index < outShapes.size() && inPlace)
{ {
if (use_umat) CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
{ ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
CV_Assert(ld.umat_inputBlobs[0].total() == total(shapes[index]));
ld.umat_outputBlobs[index] =
ld.umat_inputBlobs[0].reshape(1, shapes[index].size(),
&shapes[index][0]);
}
else
{
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
}
reuse(ld.inputBlobsId[0], blobPin); reuse(ld.inputBlobsId[0], blobPin);
} }
else else
{ reuseOrCreate(shapes[index], blobPin, *blobs[index]);
if (use_umat)
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
else
reuseOrCreate(shapes[index], blobPin, *blobs[index]);
}
} }
} }
} }
...@@ -580,19 +592,6 @@ public: ...@@ -580,19 +592,6 @@ public:
refCounter.clear(); refCounter.clear();
reuseMap.clear(); reuseMap.clear();
memHosts.clear(); memHosts.clear();
umat_memHosts.clear();
preferableTarget = DNN_TARGET_CPU;
preferableBackend = DNN_BACKEND_DEFAULT;
}
void setPreferableTarget(int targetId)
{
preferableTarget = targetId;
}
void setPreferableBackend(int backendId)
{
preferableBackend = backendId;
} }
private: private:
...@@ -604,28 +603,23 @@ private: ...@@ -604,28 +603,23 @@ private:
memHosts[lp] = mat; memHosts[lp] = mat;
} }
void addHost(const LayerPin& lp, const UMat& umat)
{
CV_Assert(umat_memHosts.find(lp) == umat_memHosts.end());
reuseMap[lp] = lp;
umat_memHosts[lp] = umat;
}
std::map<LayerPin, int> refCounter; std::map<LayerPin, int> refCounter;
// Maps pin to origin blob (for whom memory was allocated firstly). // Maps pin to origin blob (for whom memory was allocated firstly).
// For origin blobs key == value. // For origin blobs key == value.
std::map<LayerPin, LayerPin> reuseMap; std::map<LayerPin, LayerPin> reuseMap;
std::map<LayerPin, Mat> memHosts; std::map<LayerPin, Mat> memHosts;
std::map<LayerPin, UMat> umat_memHosts;
int preferableTarget;
int preferableBackend;
}; };
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, const cv::Mat& m) static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
{ {
if (backendId == DNN_BACKEND_DEFAULT) if (backendId == DNN_BACKEND_DEFAULT)
{ {
return Ptr<BackendWrapper>(); if (targetId == DNN_TARGET_CPU)
return Ptr<BackendWrapper>();
else if (targetId == DNN_TARGET_OPENCL)
return OpenCLBackendWrapper::create(m);
else
CV_Error(Error::StsNotImplemented, "Unknown target identifier");
} }
else if (backendId == DNN_BACKEND_HALIDE) else if (backendId == DNN_BACKEND_HALIDE)
{ {
...@@ -660,8 +654,6 @@ struct Net::Impl ...@@ -660,8 +654,6 @@ struct Net::Impl
fusion = true; fusion = true;
preferableBackend = DNN_BACKEND_DEFAULT; preferableBackend = DNN_BACKEND_DEFAULT;
preferableTarget = DNN_TARGET_CPU; preferableTarget = DNN_TARGET_CPU;
blobManager.setPreferableBackend(DNN_BACKEND_DEFAULT);
blobManager.setPreferableTarget(DNN_TARGET_CPU);
} }
Ptr<DataLayer> netInputLayer; Ptr<DataLayer> netInputLayer;
...@@ -682,9 +674,9 @@ struct Net::Impl ...@@ -682,9 +674,9 @@ struct Net::Impl
bool fusion; bool fusion;
std::vector<int64> layersTimings; std::vector<int64> layersTimings;
Ptr<BackendWrapper> wrap(const Mat& host) Ptr<BackendWrapper> wrap(Mat& host)
{ {
if (preferableBackend == DNN_BACKEND_DEFAULT) if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU)
return Ptr<BackendWrapper>(); return Ptr<BackendWrapper>();
MatShape shape(host.dims); MatShape shape(host.dims);
...@@ -695,7 +687,12 @@ struct Net::Impl ...@@ -695,7 +687,12 @@ struct Net::Impl
if (backendWrappers.find(data) != backendWrappers.end()) if (backendWrappers.find(data) != backendWrappers.end())
{ {
Ptr<BackendWrapper> baseBuffer = backendWrappers[data]; Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
if (preferableBackend == DNN_BACKEND_HALIDE) if (preferableBackend == DNN_BACKEND_DEFAULT)
{
CV_Assert(preferableTarget == DNN_TARGET_OPENCL);
return OpenCLBackendWrapper::create(baseBuffer, host);
}
else if (preferableBackend == DNN_BACKEND_HALIDE)
{ {
CV_Assert(haveHalide()); CV_Assert(haveHalide());
#ifdef HAVE_HALIDE #ifdef HAVE_HALIDE
...@@ -771,9 +768,6 @@ struct Net::Impl ...@@ -771,9 +768,6 @@ struct Net::Impl
it->second.inputBlobs.clear(); it->second.inputBlobs.clear();
it->second.outputBlobs.clear(); it->second.outputBlobs.clear();
it->second.internals.clear(); it->second.internals.clear();
it->second.umat_inputBlobs.clear();
it->second.umat_outputBlobs.clear();
it->second.umat_internals.clear();
} }
it->second.skipFlags.clear(); it->second.skipFlags.clear();
//it->second.consumers.clear(); //it->second.consumers.clear();
...@@ -1094,11 +1088,7 @@ struct Net::Impl ...@@ -1094,11 +1088,7 @@ struct Net::Impl
allocateLayer(*i, layersShapes); allocateLayer(*i, layersShapes);
//bind inputs //bind inputs
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
preferableTarget == DNN_TARGET_OPENCL);
ld.inputBlobs.resize(ninputs); ld.inputBlobs.resize(ninputs);
if (use_umat)
ld.umat_inputBlobs.resize(ninputs);
ld.inputBlobsWrappers.resize(ninputs); ld.inputBlobsWrappers.resize(ninputs);
for (size_t i = 0; i < ninputs; i++) for (size_t i = 0; i < ninputs; i++)
{ {
...@@ -1106,8 +1096,6 @@ struct Net::Impl ...@@ -1106,8 +1096,6 @@ struct Net::Impl
CV_Assert(from.valid()); CV_Assert(from.valid());
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
if (use_umat)
ld.umat_inputBlobs[i] = layers[from.lid].umat_outputBlobs[from.oid];
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
} }
...@@ -1122,29 +1110,15 @@ struct Net::Impl ...@@ -1122,29 +1110,15 @@ struct Net::Impl
{ {
ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]);
} }
ld.internalBlobsWrappers.resize(ld.internals.size());
for (int i = 0; i < ld.internals.size(); ++i)
{
ld.internalBlobsWrappers[i] = wrap(ld.internals[i]);
}
Ptr<Layer> layerPtr = ld.getLayerInstance(); Ptr<Layer> layerPtr = ld.getLayerInstance();
{ {
if (use_umat) layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
{
std::vector<Mat> input_mats(ld.umat_inputBlobs.size());;
std::vector<Mat*> inputs(ld.umat_inputBlobs.size());;
std::vector<Mat> outputs(ld.umat_outputBlobs.size());
for (int i = 0; i < inputs.size(); i++)
{
input_mats[i] = ld.umat_inputBlobs[i].getMat(ACCESS_READ);
inputs[i] = &input_mats[i];
}
for (int i = 0; i < outputs.size(); i++)
{
outputs[i] = ld.umat_outputBlobs[i].getMat(ACCESS_READ);
}
layerPtr->finalize(inputs, outputs);
}
else
{
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
}
layerPtr->preferableTarget = preferableTarget; layerPtr->preferableTarget = preferableTarget;
#if 0 #if 0
std::cout << "\toutputs:"; std::cout << "\toutputs:";
...@@ -1221,10 +1195,8 @@ struct Net::Impl ...@@ -1221,10 +1195,8 @@ struct Net::Impl
{ {
printf_(("\tfused with %s\n", nextBNormLayer->name.c_str())); printf_(("\tfused with %s\n", nextBNormLayer->name.c_str()));
bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true; bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true;
if ( preferableTarget == DNN_TARGET_OPENCL ) ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
else
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
if( bnormData->consumers.size() == 1 ) if( bnormData->consumers.size() == 1 )
{ {
nextData = &layers[bnormData->consumers[0].lid]; nextData = &layers[bnormData->consumers[0].lid];
...@@ -1244,10 +1216,8 @@ struct Net::Impl ...@@ -1244,10 +1216,8 @@ struct Net::Impl
{ {
printf_(("\tfused with %s\n", nextScaleLayer->name.c_str())); printf_(("\tfused with %s\n", nextScaleLayer->name.c_str()));
scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true; scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true;
if ( preferableTarget == DNN_TARGET_OPENCL ) ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
else
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
if( scaleData->consumers.size() == 1 ) if( scaleData->consumers.size() == 1 )
{ {
nextData = &layers[scaleData->consumers[0].lid]; nextData = &layers[scaleData->consumers[0].lid];
...@@ -1276,10 +1246,8 @@ struct Net::Impl ...@@ -1276,10 +1246,8 @@ struct Net::Impl
LayerData *activData = nextData; LayerData *activData = nextData;
printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
activData->skipFlags[DNN_BACKEND_DEFAULT] = true; activData->skipFlags[DNN_BACKEND_DEFAULT] = true;
if ( preferableTarget == DNN_TARGET_OPENCL ) ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
else
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
if ( preferableTarget == DNN_TARGET_OPENCL ) if ( preferableTarget == DNN_TARGET_OPENCL )
{ {
...@@ -1329,6 +1297,7 @@ struct Net::Impl ...@@ -1329,6 +1297,7 @@ struct Net::Impl
// fuse eltwise + activation layer // fuse eltwise + activation layer
LayerData *firstConvLayerData = downLayerData; LayerData *firstConvLayerData = downLayerData;
{ {
CV_Assert(eltwiseData->consumers.size() == 1);
nextData = &layers[eltwiseData->consumers[0].lid]; nextData = &layers[eltwiseData->consumers[0].lid];
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
Ptr<ActivationLayer> nextActivLayer; Ptr<ActivationLayer> nextActivLayer;
...@@ -1341,13 +1310,50 @@ struct Net::Impl ...@@ -1341,13 +1310,50 @@ struct Net::Impl
!nextData->type.compare("Power")) && !nextData->type.compare("Power")) &&
currLayer->setActivation(nextActivLayer) ) currLayer->setActivation(nextActivLayer) )
{ {
CV_Assert(firstConvLayerData->umat_outputBlobs.size() == 1 && ld.umat_inputBlobs.size() == 1); CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1);
ld.umat_inputBlobs.push_back(firstConvLayerData->umat_outputBlobs[0]); ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]);
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true; eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true;
nextData->skipFlags[DNN_BACKEND_DEFAULT] = true; nextData->skipFlags[DNN_BACKEND_DEFAULT] = true;
ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs; // This optimization for cases like
// some_layer conv
// | |
// +-- eltwise --+
// |
// activ
// This way all the element-wise computations
// (i.e. some_layer+conv or some_layer*conv)
// would be done at [conv] layer. So we need to
// replace [conv]'s output blob to [eltwise]'s one
// considering that [activ] is an in-place layer.
// Also we need to move all the consumers' references.
// To prevent memory collisions (i.e. when input of
// [conv] and output of [eltwise] is the same blob)
// we allocate a new blob.
CV_Assert(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
ld.outputBlobs[0] = ld.outputBlobs[0].clone();
ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
eltwiseData->outputBlobs = ld.outputBlobs;
nextData->outputBlobs = ld.outputBlobs;
eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
// Move references of [activ] layer consumers to the newly allocated blob.
for (int i = 0; i < nextData->consumers.size(); ++i)
{
LayerData& consumer = layers[nextData->consumers[i].lid];
for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
{
if (consumer.inputBlobsId[j].lid == lpNext.lid)
{
consumer.inputBlobs[j] = &ld.outputBlobs[0];
consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
break;
}
}
}
} }
} }
} }
...@@ -1469,8 +1475,6 @@ struct Net::Impl ...@@ -1469,8 +1475,6 @@ struct Net::Impl
getLayersShapes(inputShapes, layersShapes); getLayersShapes(inputShapes, layersShapes);
blobManager.reset(); blobManager.reset();
blobManager.setPreferableTarget(preferableTarget);
blobManager.setPreferableBackend(preferableBackend);
backendWrappers.clear(); backendWrappers.clear();
// Fake references to input blobs. // Fake references to input blobs.
for (int i = 0; i < layers[0].outputBlobs.size(); ++i) for (int i = 0; i < layers[0].outputBlobs.size(); ++i)
...@@ -1510,19 +1514,29 @@ struct Net::Impl ...@@ -1510,19 +1514,29 @@ struct Net::Impl
{ {
if( !ld.skipFlags[DNN_BACKEND_DEFAULT] ) if( !ld.skipFlags[DNN_BACKEND_DEFAULT] )
{ {
for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
{ {
if (!ld.inputBlobsWrappers[i].empty()) std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
ld.inputBlobsWrappers[i]->copyToHost(); layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
umat_outputBlobs,
OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers));
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
} }
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
layer->forward(ld.umat_inputBlobs, ld.umat_outputBlobs, ld.umat_internals);
else else
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
{ {
if (!ld.outputBlobsWrappers[i].empty()) for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i)
ld.outputBlobsWrappers[i]->setHostDirty(); {
if (!ld.inputBlobsWrappers[i].empty())
ld.inputBlobsWrappers[i]->copyToHost();
}
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
{
if (!ld.outputBlobsWrappers[i].empty())
ld.outputBlobsWrappers[i]->setHostDirty();
}
} }
} }
else else
...@@ -1654,51 +1668,19 @@ struct Net::Impl ...@@ -1654,51 +1668,19 @@ struct Net::Impl
CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) + CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) +
" outputs, the #" + toString(pin.oid) + " was requsted"); " outputs, the #" + toString(pin.oid) + " was requsted");
} }
if (preferableBackend != DNN_BACKEND_DEFAULT) if (preferableTarget != DNN_TARGET_CPU)
{ {
CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty());
// Transfer data to CPU if it's require. // Transfer data to CPU if it's require.
ld.outputBlobsWrappers[pin.oid]->copyToHost(); ld.outputBlobsWrappers[pin.oid]->copyToHost();
} }
else
{
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
}
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty())
ld.umat_outputBlobs[pin.oid].copyTo(ld.outputBlobs[pin.oid]);
return ld.outputBlobs[pin.oid]; return ld.outputBlobs[pin.oid];
} }
void getBlob(UMat& umat, const LayerPin& pin)
{
CV_TRACE_FUNCTION();
if (!pin.valid())
CV_Error(Error::StsObjectNotFound, "Requested blob not found");
LayerData &ld = layers[pin.lid];
if ((size_t)pin.oid >= ld.outputBlobs.size())
{
CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) +
" outputs, the #" + toString(pin.oid) + " was requsted");
}
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty())
umat = ld.umat_outputBlobs[pin.oid];
else
umat = UMat();
}
Mat getBlob(String outputName) Mat getBlob(String outputName)
{ {
return getBlob(getPinByAlias(outputName)); return getBlob(getPinByAlias(outputName));
} }
void getBlob(UMat& umat, String outputName)
{
getBlob(umat, getPinByAlias(outputName));
}
}; };
Net::Net() : impl(new Net::Impl) Net::Net() : impl(new Net::Impl)
...@@ -1794,12 +1776,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) ...@@ -1794,12 +1776,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
if (outputBlobs.isUMat()) if (outputBlobs.isUMat())
{ {
if (ld.umat_outputBlobs.size() > 0) outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW));
{
UMat umat;
impl->getBlob(umat, layerName);
outputBlobs.assign(umat);
}
} }
else if (outputBlobs.isMat()) else if (outputBlobs.isMat())
{ {
...@@ -1807,20 +1784,31 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) ...@@ -1807,20 +1784,31 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
} }
else if (outputBlobs.isMatVector()) else if (outputBlobs.isMatVector())
{ {
if (ld.umat_outputBlobs.size() > 0) if (impl->preferableTarget != DNN_TARGET_CPU)
{ {
for (int i = 0; i < ld.umat_outputBlobs.size(); i++) for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]); {
CV_Assert(!ld.outputBlobsWrappers[i].empty());
ld.outputBlobsWrappers[i]->copyToHost();
}
} }
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj(); std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
outputvec = ld.outputBlobs; outputvec = ld.outputBlobs;
} }
else if (outputBlobs.isUMatVector()) else if (outputBlobs.isUMatVector())
{ {
if (ld.umat_outputBlobs.size() > 0) std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
impl->preferableTarget == DNN_TARGET_OPENCL)
{
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
}
else
{ {
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj(); outputvec.resize(ld.outputBlobs.size());
outputvec = ld.umat_outputBlobs; for (int i = 0; i < outputvec.size(); ++i)
outputvec[i] = ld.outputBlobs[i].getUMat(ACCESS_RW);
} }
} }
} }
...@@ -1889,7 +1877,6 @@ void Net::setPreferableBackend(int backendId) ...@@ -1889,7 +1877,6 @@ void Net::setPreferableBackend(int backendId)
if( impl->preferableBackend != backendId ) if( impl->preferableBackend != backendId )
{ {
impl->preferableBackend = backendId; impl->preferableBackend = backendId;
impl->blobManager.setPreferableBackend(backendId);
impl->netWasAllocated = false; impl->netWasAllocated = false;
impl->clear(); impl->clear();
} }
...@@ -1903,7 +1890,6 @@ void Net::setPreferableTarget(int targetId) ...@@ -1903,7 +1890,6 @@ void Net::setPreferableTarget(int targetId)
if( impl->preferableTarget != targetId ) if( impl->preferableTarget != targetId )
{ {
impl->preferableTarget = targetId; impl->preferableTarget = targetId;
impl->blobManager.setPreferableTarget(targetId);
impl->netWasAllocated = false; impl->netWasAllocated = false;
impl->clear(); impl->clear();
} }
...@@ -1930,10 +1916,6 @@ void Net::setInput(InputArray blob, const String& name) ...@@ -1930,10 +1916,6 @@ void Net::setInput(InputArray blob, const String& name)
LayerData &ld = impl->layers[pin.lid]; LayerData &ld = impl->layers[pin.lid];
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
bool use_umat = (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
impl->preferableTarget == DNN_TARGET_OPENCL);
if (use_umat)
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
MatShape prevShape = shape(ld.outputBlobs[pin.oid]); MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
Mat blob_ = blob.getMat(); Mat blob_ = blob.getMat();
...@@ -1941,14 +1923,10 @@ void Net::setInput(InputArray blob, const String& name) ...@@ -1941,14 +1923,10 @@ void Net::setInput(InputArray blob, const String& name)
if (oldShape) if (oldShape)
{ {
blob_.copyTo(ld.outputBlobs[pin.oid]); blob_.copyTo(ld.outputBlobs[pin.oid]);
if (use_umat)
blob_.copyTo(ld.umat_outputBlobs[pin.oid]);
} }
else else
{ {
ld.outputBlobs[pin.oid] = blob_.clone(); ld.outputBlobs[pin.oid] = blob_.clone();
if (use_umat)
blob_.copyTo(ld.umat_outputBlobs[pin.oid]);
} }
if (!ld.outputBlobsWrappers[pin.oid].empty()) if (!ld.outputBlobsWrappers[pin.oid].empty())
......
...@@ -709,6 +709,10 @@ public: ...@@ -709,6 +709,10 @@ public:
inps.getUMatVector(inputs); inps.getUMatVector(inputs);
outs.getUMatVector(outputs); outs.getUMatVector(outputs);
CV_Assert(outputs.size() == 1);
for (int i = 0; i < inputs.size(); ++i)
CV_Assert(inputs[i].u != outputs[0].u);
int group = inputs[0].size[1] / umat_blobs[0].size[1]; int group = inputs[0].size[1] / umat_blobs[0].size[1];
if (convolutionOp.empty()) if (convolutionOp.empty())
...@@ -913,7 +917,9 @@ public: ...@@ -913,7 +917,9 @@ public:
name.c_str(), inputs[0]->size[0], inputs[0]->size[1], inputs[0]->size[2], inputs[0]->size[3], name.c_str(), inputs[0]->size[0], inputs[0]->size[1], inputs[0]->size[2], inputs[0]->size[3],
kernel.width, kernel.height, pad.width, pad.height, kernel.width, kernel.height, pad.width, pad.height,
stride.width, stride.height, dilation.width, dilation.height);*/ stride.width, stride.height, dilation.width, dilation.height);*/
CV_Assert(inputs.size() == (size_t)1 && inputs[0]->size[1] % blobs[0].size[1] == 0); CV_Assert(inputs.size() == (size_t)1, inputs[0]->size[1] % blobs[0].size[1] == 0,
outputs.size() == 1, inputs[0]->data != outputs[0].data);
int ngroups = inputs[0]->size[1]/blobs[0].size[1]; int ngroups = inputs[0]->size[1]/blobs[0].size[1];
CV_Assert(outputs[0].size[1] % ngroups == 0); CV_Assert(outputs[0].size[1] % ngroups == 0);
int k, outCn = blobs[0].size[0]; int k, outCn = blobs[0].size[0];
......
...@@ -303,6 +303,14 @@ OCL_TEST(Reproducibility_ResNet50, Accuracy) ...@@ -303,6 +303,14 @@ OCL_TEST(Reproducibility_ResNet50, Accuracy)
Mat ref = blobFromNPY(_tf("resnet50_prob.npy")); Mat ref = blobFromNPY(_tf("resnet50_prob.npy"));
normAssert(ref, out); normAssert(ref, out);
UMat out_umat;
net.forward(out_umat);
normAssert(ref, out_umat, "out_umat");
std::vector<UMat> out_umats;
net.forward(out_umats);
normAssert(ref, out_umats[0], "out_umat_vector");
} }
TEST(Reproducibility_SqueezeNet_v1_1, Accuracy) TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
...@@ -331,9 +339,14 @@ OCL_TEST(Reproducibility_SqueezeNet_v1_1, Accuracy) ...@@ -331,9 +339,14 @@ OCL_TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false); Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false);
ASSERT_TRUE(!input.empty()); ASSERT_TRUE(!input.empty());
net.setInput(input); // Firstly set a wrong input blob and run the model to receive a wrong output.
net.setInput(input * 2.0f);
Mat out = net.forward(); Mat out = net.forward();
// Then set a correct input blob to check CPU->GPU synchronization is working well.
net.setInput(input);
out = net.forward();
Mat ref = blobFromNPY(_tf("squeezenet_v1.1_prob.npy")); Mat ref = blobFromNPY(_tf("squeezenet_v1.1_prob.npy"));
normAssert(ref, out); normAssert(ref, out);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment