Commit 329abb5b authored by Li Peng's avatar Li Peng

dnn fp16 support

Signed-off-by: 's avatarLi Peng <peng.li@intel.com>
parent bb8ff2c4
...@@ -499,7 +499,7 @@ public: ...@@ -499,7 +499,7 @@ public:
} }
} }
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate) void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate, bool use_half)
{ {
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate) if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate)
{ {
...@@ -540,14 +540,14 @@ public: ...@@ -540,14 +540,14 @@ public:
{ {
// if dst already has been allocated with total(shape) elements, // if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same. // it won't be recrreated and pointer of dst.data remains the same.
dst.create(shape, CV_32F); dst.create(shape, use_half ? CV_16S : CV_32F);
addHost(lp, dst); addHost(lp, dst);
} }
} }
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs, std::vector<LayerPin>& pinsForInternalBlobs,
bool forceCreate = false) bool forceCreate = false, bool use_half = false)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
...@@ -618,7 +618,7 @@ public: ...@@ -618,7 +618,7 @@ public:
reuse(ld.inputBlobsId[0], blobPin); reuse(ld.inputBlobsId[0], blobPin);
} }
else else
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate); reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate, use_half);
} }
} }
} }
...@@ -656,7 +656,7 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m) ...@@ -656,7 +656,7 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
{ {
if (targetId == DNN_TARGET_CPU) if (targetId == DNN_TARGET_CPU)
return Ptr<BackendWrapper>(); return Ptr<BackendWrapper>();
else if (targetId == DNN_TARGET_OPENCL) else if (IS_DNN_OPENCL_TARGET(targetId))
return OpenCLBackendWrapper::create(m); return OpenCLBackendWrapper::create(m);
else else
CV_Error(Error::StsNotImplemented, "Unknown target identifier"); CV_Error(Error::StsNotImplemented, "Unknown target identifier");
...@@ -721,6 +721,7 @@ struct Net::Impl ...@@ -721,6 +721,7 @@ struct Net::Impl
bool netWasAllocated; bool netWasAllocated;
bool fusion; bool fusion;
std::vector<int64> layersTimings; std::vector<int64> layersTimings;
Mat output_blob;
Ptr<BackendWrapper> wrap(Mat& host) Ptr<BackendWrapper> wrap(Mat& host)
{ {
...@@ -737,7 +738,7 @@ struct Net::Impl ...@@ -737,7 +738,7 @@ struct Net::Impl
Ptr<BackendWrapper> baseBuffer = backendWrappers[data]; Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
if (preferableBackend == DNN_BACKEND_DEFAULT) if (preferableBackend == DNN_BACKEND_DEFAULT)
{ {
CV_Assert(preferableTarget == DNN_TARGET_OPENCL); CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
return OpenCLBackendWrapper::create(baseBuffer, host); return OpenCLBackendWrapper::create(baseBuffer, host);
} }
else if (preferableBackend == DNN_BACKEND_HALIDE) else if (preferableBackend == DNN_BACKEND_HALIDE)
...@@ -849,7 +850,7 @@ struct Net::Impl ...@@ -849,7 +850,7 @@ struct Net::Impl
if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
{ {
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
#ifndef HAVE_OPENCL #ifndef HAVE_OPENCL
{ {
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
...@@ -1034,7 +1035,7 @@ struct Net::Impl ...@@ -1034,7 +1035,7 @@ struct Net::Impl
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
if (preferableBackend == DNN_BACKEND_DEFAULT) if (preferableBackend == DNN_BACKEND_DEFAULT)
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
else if (preferableBackend == DNN_BACKEND_HALIDE) else if (preferableBackend == DNN_BACKEND_HALIDE)
initHalideBackend(); initHalideBackend();
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
...@@ -1369,7 +1370,9 @@ struct Net::Impl ...@@ -1369,7 +1370,9 @@ struct Net::Impl
std::vector<LayerPin> pinsForInternalBlobs; std::vector<LayerPin> pinsForInternalBlobs;
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE); preferableBackend == DNN_BACKEND_INFERENCE_ENGINE,
preferableBackend == DNN_BACKEND_DEFAULT &&
preferableTarget == DNN_TARGET_OPENCL_FP16);
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
for (int i = 0; i < ld.outputBlobs.size(); ++i) for (int i = 0; i < ld.outputBlobs.size(); ++i)
{ {
...@@ -1439,7 +1442,7 @@ struct Net::Impl ...@@ -1439,7 +1442,7 @@ struct Net::Impl
// some other layers. // some other layers.
// TODO: OpenCL target support more fusion styles. // TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL && if ( preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN")) ) ld.layerInstance->type != "MVN")) )
continue; continue;
...@@ -1478,8 +1481,8 @@ struct Net::Impl ...@@ -1478,8 +1481,8 @@ struct Net::Impl
continue; // Go to the next layer. continue; // Go to the next layer.
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
if ( preferableTarget != DNN_TARGET_OPENCL || if ( !IS_DNN_OPENCL_TARGET(preferableTarget) ||
(preferableTarget == DNN_TARGET_OPENCL && (IS_DNN_OPENCL_TARGET(preferableTarget) &&
nextData && nextData &&
((nextData->type == "ReLU") || ((nextData->type == "ReLU") ||
(nextData->type == "ChannelsPReLU") || (nextData->type == "ChannelsPReLU") ||
...@@ -1502,7 +1505,7 @@ struct Net::Impl ...@@ -1502,7 +1505,7 @@ struct Net::Impl
ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
if ( preferableTarget == DNN_TARGET_OPENCL ) if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
{ {
if ( !activData->consumers.empty() ) if ( !activData->consumers.empty() )
{ {
...@@ -1514,7 +1517,7 @@ struct Net::Impl ...@@ -1514,7 +1517,7 @@ struct Net::Impl
} }
// fuse convlution layer followed by eltwise + relu // fuse convlution layer followed by eltwise + relu
if ( preferableTarget == DNN_TARGET_OPENCL ) if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
{ {
Ptr<EltwiseLayer> nextEltwiseLayer; Ptr<EltwiseLayer> nextEltwiseLayer;
if( nextData ) if( nextData )
...@@ -1727,6 +1730,13 @@ struct Net::Impl ...@@ -1727,6 +1730,13 @@ struct Net::Impl
for(int i = 0; i < layers[0].outputBlobs.size(); i++) for(int i = 0; i < layers[0].outputBlobs.size(); i++)
{ {
CV_Assert(layers[0].outputBlobs[i].total()); CV_Assert(layers[0].outputBlobs[i].total());
if (layers[0].outputBlobs[i].depth() == CV_32F &&
preferableBackend == DNN_BACKEND_DEFAULT &&
preferableTarget == DNN_TARGET_OPENCL_FP16)
{
Mat mat = layers[0].outputBlobs[i].clone();
convertFp16(mat, layers[0].outputBlobs[i]);
}
inputShapes.push_back(shape(layers[0].outputBlobs[i])); inputShapes.push_back(shape(layers[0].outputBlobs[i]));
} }
LayersShapesMap layersShapes; LayersShapesMap layersShapes;
...@@ -1772,7 +1782,7 @@ struct Net::Impl ...@@ -1772,7 +1782,7 @@ struct Net::Impl
{ {
if( !ld.skip ) if( !ld.skip )
{ {
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL) if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
{ {
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers), layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
...@@ -1937,6 +1947,13 @@ struct Net::Impl ...@@ -1937,6 +1947,13 @@ struct Net::Impl
// Transfer data to CPU if it's require. // Transfer data to CPU if it's require.
ld.outputBlobsWrappers[pin.oid]->copyToHost(); ld.outputBlobsWrappers[pin.oid]->copyToHost();
} }
if (ld.outputBlobs[pin.oid].depth() == CV_16S)
{
convertFp16(ld.outputBlobs[pin.oid], output_blob);
return output_blob;
}
else
return ld.outputBlobs[pin.oid]; return ld.outputBlobs[pin.oid];
} }
...@@ -2080,7 +2097,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) ...@@ -2080,7 +2097,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
if (outputBlobs.isUMat()) if (outputBlobs.isUMat())
{ {
outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW)); outputBlobs.assign(impl->getBlob(layerName).getUMat(ACCESS_RW));
} }
else if (outputBlobs.isMat()) else if (outputBlobs.isMat())
{ {
...@@ -2096,17 +2113,33 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) ...@@ -2096,17 +2113,33 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
ld.outputBlobsWrappers[i]->copyToHost(); ld.outputBlobsWrappers[i]->copyToHost();
} }
} }
if (ld.outputBlobs[0].depth() == CV_32F)
{
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj(); std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
outputvec = ld.outputBlobs; outputvec = ld.outputBlobs;
} else {
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
outputvec.resize(ld.outputBlobs.size());
for (int i = 0; i < outputvec.size(); i++)
convertFp16(ld.outputBlobs[i], outputvec[i]);
}
} }
else if (outputBlobs.isUMatVector()) else if (outputBlobs.isUMatVector())
{ {
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj(); std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
if (impl->preferableBackend == DNN_BACKEND_DEFAULT && if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
impl->preferableTarget == DNN_TARGET_OPENCL) IS_DNN_OPENCL_TARGET(impl->preferableTarget))
{ {
if (impl->preferableTarget == DNN_TARGET_OPENCL)
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
{
std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
outputvec.resize(out_vec.size());
for (int i = 0; i < out_vec.size(); i++)
convertFp16(out_vec[i], outputvec[i]);
}
} }
else else
{ {
...@@ -2194,6 +2227,16 @@ void Net::setPreferableTarget(int targetId) ...@@ -2194,6 +2227,16 @@ void Net::setPreferableTarget(int targetId)
if( impl->preferableTarget != targetId ) if( impl->preferableTarget != targetId )
{ {
impl->preferableTarget = targetId; impl->preferableTarget = targetId;
if (IS_DNN_OPENCL_TARGET(targetId))
{
#ifndef HAVE_OPENCL
impl->preferableTarget = DNN_TARGET_CPU;
#else
bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
impl->preferableTarget = DNN_TARGET_OPENCL;
#endif
}
impl->netWasAllocated = false; impl->netWasAllocated = false;
impl->clear(); impl->clear();
} }
...@@ -2222,7 +2265,17 @@ void Net::setInput(InputArray blob, const String& name) ...@@ -2222,7 +2265,17 @@ void Net::setInput(InputArray blob, const String& name)
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
MatShape prevShape = shape(ld.outputBlobs[pin.oid]); MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
Mat blob_ = blob.getMat(); Mat blob_;
if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
{
Mat blob_mat = blob.getMat();
convertFp16(blob_mat, blob_);
}
else
{
blob_ = blob.getMat();
}
bool oldShape = prevShape == shape(blob_); bool oldShape = prevShape == shape(blob_);
if (oldShape) if (oldShape)
{ {
...@@ -2747,6 +2800,43 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays ...@@ -2747,6 +2800,43 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
std::vector<UMat> orig_inputs;
std::vector<UMat> orig_outputs;
std::vector<UMat> orig_internals;
inputs_arr.getUMatVector(orig_inputs);
outputs_arr.getUMatVector(orig_outputs);
internals_arr.getUMatVector(orig_internals);
inputs.resize(orig_inputs.size());
for (size_t i = 0; i < orig_inputs.size(); i++)
convertFp16(orig_inputs[i], inputs[i]);
outputs.resize(orig_outputs.size());
for (size_t i = 0; i < orig_outputs.size(); i++)
outputs[i].create(shape(orig_outputs[i]), CV_32F);
internals.resize(orig_internals.size());
for (size_t i = 0; i < orig_internals.size(); i++)
internals[i].create(shape(orig_internals[i]), CV_32F);
forward(inputs, outputs, internals);
for (size_t i = 0; i < outputs.size(); i++)
convertFp16(outputs[i], orig_outputs[i]);
// sync results back
outputs_arr.assign(orig_outputs);
internals_arr.assign(orig_internals);
return;
}
std::vector<Mat> inpvec; std::vector<Mat> inpvec;
std::vector<Mat> outputs; std::vector<Mat> outputs;
std::vector<Mat> internals; std::vector<Mat> internals;
......
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
namespace cv { namespace dnn { namespace cv { namespace dnn {
CV__DNN_EXPERIMENTAL_NS_BEGIN CV__DNN_EXPERIMENTAL_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex& getInitializationMutex(); Mutex& getInitializationMutex();
void initializeLayerFactory(); void initializeLayerFactory();
CV__DNN_EXPERIMENTAL_NS_END CV__DNN_EXPERIMENTAL_NS_END
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment