Commit 8f990837 authored by Li Peng's avatar Li Peng

Add new layer forward interface

Add layer forward interface with InputArrayOfArrays and
OutputArrayOfArrays parameters, it allows UMat buffer to be
processed and transferred in the layers.
Signed-off-by: 's avatarLi Peng <peng.li@intel.com>
parent 6e4f9433
...@@ -187,16 +187,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -187,16 +187,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/ */
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0; virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0;
/** @brief Given the @p input blobs, computes the output @p blobs.
* @param[in] inputs the input blobs.
* @param[out] outputs allocated output blobs, which will store results of the computation.
* @param[out] internals allocated internal blobs
*/
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
/** @brief Given the @p input blobs, computes the output @p blobs.
* @param[in] inputs the input blobs.
* @param[out] outputs allocated output blobs, which will store results of the computation.
* @param[out] internals allocated internal blobs
*/
void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
/** @brief @overload */ /** @brief @overload */
CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs); CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
/** @brief @overload */ /** @brief @overload */
CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs); CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs);
/** @brief @overload */
CV_WRAP void forward(const std::vector<Mat> &inputs, CV_IN_OUT std::vector<Mat> &outputs,
CV_IN_OUT std::vector<Mat> &internals);
/** @brief Allocates layer and computes output. */ /** @brief Allocates layer and computes output. */
CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs, CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
CV_IN_OUT std::vector<Mat> &internals); CV_IN_OUT std::vector<Mat> &internals);
......
...@@ -132,6 +132,11 @@ static inline MatShape shape(const Mat& mat) ...@@ -132,6 +132,11 @@ static inline MatShape shape(const Mat& mat)
return shape(mat.size.p, mat.dims); return shape(mat.size.p, mat.dims);
} }
static inline MatShape shape(const UMat& mat)
{
return shape(mat.size.p, mat.dims);
}
namespace {inline bool is_neg(int i) { return i < 0; }} namespace {inline bool is_neg(int i) { return i < 0; }}
static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1) static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
...@@ -151,7 +156,7 @@ static inline int total(const MatShape& shape, int start = -1, int end = -1) ...@@ -151,7 +156,7 @@ static inline int total(const MatShape& shape, int start = -1, int end = -1)
return 0; return 0;
int elems = 1; int elems = 1;
CV_Assert(start < (int)shape.size() && end <= (int)shape.size() && CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
start <= end); start <= end);
for(int i = start; i < end; i++) for(int i = start; i < end; i++)
{ {
......
This diff is collapsed.
...@@ -102,6 +102,14 @@ public: ...@@ -102,6 +102,14 @@ public:
backendId == DNN_BACKEND_HALIDE && haveHalide(); backendId == DNN_BACKEND_HALIDE && haveHalide();
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -62,6 +62,25 @@ public: ...@@ -62,6 +62,25 @@ public:
return true; return true;
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
{
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -176,36 +176,38 @@ public: ...@@ -176,36 +176,38 @@ public:
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
CV_TRACE_FUNCTION(); std::vector<UMat> inputs;
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); std::vector<UMat> outputs;
int cAxis = clamp(axis, inputs[0]->dims); inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int cAxis = clamp(axis, inputs[0].dims);
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding)) if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
return false; return false;
int bottom_concat_axis; int bottom_concat_axis;
int concat_size = inputs[0]->size[2] * inputs[0]->size[3]; int concat_size = inputs[0].size[2] * inputs[0].size[3];
int top_concat_axis = outputs[0].size[1]; int top_concat_axis = outputs[0].size[1];
int offset_concat_axis = 0; int offset_concat_axis = 0;
UMat inpMat, outMat; UMat& outMat = outputs[0];
outMat = outputs[0].getUMat(ACCESS_WRITE); String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
ocl::Kernel kernel;
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0]->type()) + String(" ");
if (!kernel.create("concat", ocl::dnn::concat_oclsrc, buildopt))
return false;
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
inpMat = inputs[i]->getUMat(ACCESS_READ); ocl::Kernel kernel("concat", ocl::dnn::concat_oclsrc, buildopt);
bottom_concat_axis = inputs[i]->size[1]; if (kernel.empty())
size_t nthreads = inputs[i]->total(); return false;
UMat& inpMat = inputs[i];
bottom_concat_axis = inputs[i].size[1];
size_t nthreads = inputs[i].total();
kernel.set(0, (int)nthreads); kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat)); kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(2, (int)inputs[i]->size[0]); kernel.set(2, (int)inputs[i].size[0]);
kernel.set(3, (int)concat_size); kernel.set(3, (int)concat_size);
kernel.set(4, (int)top_concat_axis); kernel.set(4, (int)top_concat_axis);
kernel.set(5, (int)bottom_concat_axis); kernel.set(5, (int)bottom_concat_axis);
...@@ -222,14 +224,22 @@ public: ...@@ -222,14 +224,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
int cAxis = clamp(axis, inputs[0]->dims); int cAxis = clamp(axis, inputs[0]->dims);
Mat& outMat = outputs[0]; Mat& outMat = outputs[0];
......
...@@ -671,14 +671,20 @@ public: ...@@ -671,14 +671,20 @@ public:
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
int group = inputs[0]->size[1] / umat_blobs[0].size[1]; std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int group = inputs[0].size[1] / umat_blobs[0].size[1];
if (convolutionOp.empty()) if (convolutionOp.empty())
{ {
OCL4DNNConvConfig config; OCL4DNNConvConfig config;
config.in_shape = shape(*inputs[0]); config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]); config.out_shape = shape(outputs[0]);
config.kernel = kernel; config.kernel = kernel;
config.pad = pad; config.pad = pad;
...@@ -690,6 +696,112 @@ public: ...@@ -690,6 +696,112 @@ public:
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config)); convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
} }
int k, outCn = umat_blobs[0].size[0];
if( weightsMat.empty() )
{
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing
Mat wm = blobs[0].reshape(1, outCn).clone();
if( wm.step1() % VEC_ALIGN != 0 )
{
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
Mat wm_buffer = Mat(outCn, newcols, wm.type());
Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
wm_padding.setTo(Scalar::all(0.));
Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
wm.copyTo(wm_aligned);
wm = wm_aligned;
}
weightsMat = wm;
Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat();
biasvec.resize(outCn+2);
if( biasMat.empty() )
{
for( k = 0; k < outCn; k++ )
biasvec[k] = 0.f;
}
else
{
for( k = 0; k < outCn; k++ )
biasvec[k] = biasMat.at<float>(k);
}
if( !bnorm.empty() || !scaleLayer.empty() )
{
Mat scale, shift, scale2, shift2;
const float *scaleptr = 0, *shiftptr = 0;
const float *scaleptr2 = 0, *shiftptr2 = 0;
if( !bnorm.empty() )
{
bnorm->getScaleShift(scale, shift);
CV_Assert( scale.isContinuous() && shift.isContinuous() &&
scale.type() == CV_32F && shift.type() == CV_32F &&
scale.total() == (size_t)outCn &&
shift.total() == (size_t)outCn );
scaleptr = scale.ptr<float>();
shiftptr = shift.ptr<float>();
}
if( !scaleLayer.empty() )
{
scale2 = scaleLayer->blobs[0];
CV_Assert( scale2.isContinuous() && scale2.type() == CV_32F &&
scale2.total() == (size_t)outCn );
scaleptr2 = scale2.ptr<float>();
if( scaleLayer->hasBias )
{
shift2 = scaleLayer->blobs[1];
CV_Assert( shift2.isContinuous() && shift2.type() == CV_32F &&
shift2.total() == (size_t)outCn );
shiftptr2 = shift2.ptr<float>();
}
}
if (shiftptr || shiftptr2)
fusedBias = true;
for( int i = 0; i < outCn; i++ )
{
float s1 = scaleptr ? scaleptr[i] : 1.f;
float delta1 = shiftptr ? shiftptr[i] : 0.f;
float s2 = scaleptr2 ? scaleptr2[i] : 1.f;
float delta2 = shiftptr2 ? shiftptr2[i] : 0.f;
float* w_i = weightsMat.ptr<float>(i);
int j, wcols = weightsMat.cols;
for( j = 0; j < wcols; j++ )
w_i[j] *= (s1*s2);
biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2);
}
}
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
}
reluslope.clear();
if( activ )
{
Ptr<ReLULayer> activ_relu = activ.dynamicCast<ReLULayer>();
if( !activ_relu.empty() )
{
reluslope.assign(outCn+2, activ_relu->negativeSlope);
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
}
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
if( !activ_chprelu.empty() )
{
const Mat& m = activ_chprelu->blobs[0];
CV_Assert(m.isContinuous() && m.type() == CV_32F && (int)m.total() == outCn);
const float* mdata = m.ptr<float>();
reluslope.resize(outCn+2);
std::copy(mdata, mdata + outCn, reluslope.begin());
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
}
}
if ( newWeightAndBias ) if ( newWeightAndBias )
{ {
weightsMat.copyTo(umat_blobs[0]); weightsMat.copyTo(umat_blobs[0]);
...@@ -723,9 +835,8 @@ public: ...@@ -723,9 +835,8 @@ public:
newActiv = false; newActiv = false;
} }
UMat inpMat, outMat; UMat& inpMat = inputs[0];
inpMat = inputs[0]->getUMat(ACCESS_READ); UMat& outMat = outputs[0];
outMat = outputs[0].getUMat(ACCESS_WRITE);
int batch_size = inpMat.size[0]; int batch_size = inpMat.size[0];
return convolutionOp->Forward(inpMat, return convolutionOp->Forward(inpMat,
...@@ -736,6 +847,18 @@ public: ...@@ -736,6 +847,18 @@ public:
} }
#endif #endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
...@@ -811,11 +934,6 @@ public: ...@@ -811,11 +934,6 @@ public:
} }
} }
#ifdef HAVE_OPENCL
if (shiftptr || shiftptr2)
fusedBias = true;
#endif
for( int i = 0; i < outCn; i++ ) for( int i = 0; i < outCn; i++ )
{ {
float s1 = scaleptr ? scaleptr[i] : 1.f; float s1 = scaleptr ? scaleptr[i] : 1.f;
...@@ -841,9 +959,6 @@ public: ...@@ -841,9 +959,6 @@ public:
if( !activ_relu.empty() ) if( !activ_relu.empty() )
{ {
reluslope.assign(outCn+2, activ_relu->negativeSlope); reluslope.assign(outCn+2, activ_relu->negativeSlope);
#ifdef HAVE_OPENCL
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
#endif
} }
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>(); Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
...@@ -855,16 +970,9 @@ public: ...@@ -855,16 +970,9 @@ public:
reluslope.resize(outCn+2); reluslope.resize(outCn+2);
std::copy(mdata, mdata + outCn, reluslope.begin()); std::copy(mdata, mdata + outCn, reluslope.begin());
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1]; reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
#ifdef HAVE_OPENCL
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
#endif
} }
} }
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals))
int nstripes = std::max(getNumThreads(), 1); int nstripes = std::max(getNumThreads(), 1);
ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope, ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope,
...@@ -1173,6 +1281,14 @@ public: ...@@ -1173,6 +1281,14 @@ public:
} }
}; };
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -133,6 +133,14 @@ public: ...@@ -133,6 +133,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -194,6 +194,95 @@ public: ...@@ -194,6 +194,95 @@ public:
return false; return false;
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
std::vector<Mat> inpvec;
std::vector<Mat> outputs;
inputs_arr.getMatVector(inpvec);
outputs_arr.getMatVector(outputs);
std::vector<Mat*> inputs(inpvec.size());
for (size_t i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
std::vector<LabelBBox> allDecodedBBoxes;
std::vector<std::vector<std::vector<float> > > allConfidenceScores;
int num = inputs[0]->size[0];
// extract predictions from input layers
{
int numPriors = inputs[2]->size[2] / 4;
const float* locationData = inputs[0]->ptr<float>();
const float* confidenceData = inputs[1]->ptr<float>();
const float* priorData = inputs[2]->ptr<float>();
// Retrieve all location predictions
std::vector<LabelBBox> allLocationPredictions;
GetLocPredictions(locationData, num, numPriors, _numLocClasses,
_shareLocation, _locPredTransposed, allLocationPredictions);
// Retrieve all confidences
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
// Retrieve all prior bboxes
std::vector<caffe::NormalizedBBox> priorBBoxes;
std::vector<std::vector<float> > priorVariances;
GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
// Decode all loc predictions to bboxes
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num,
_shareLocation, _numLocClasses, _backgroundLabelId,
_codeType, _varianceEncodedInTarget, false, allDecodedBBoxes);
}
size_t numKept = 0;
std::vector<std::map<int, std::vector<int> > > allIndices;
for (int i = 0; i < num; ++i)
{
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
}
if (numKept == 0)
{
// Set confidences to zeros.
Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
outputs[0](ranges).setTo(0);
return true;
}
int outputShape[] = {1, 1, (int)numKept, 7};
Mat mat(4, outputShape, CV_32F);
float* outputsData = mat.ptr<float>();
size_t count = 0;
for (int i = 0; i < num; ++i)
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]);
}
UMat& output = outputs_arr.getUMatRef(0);
output = mat.getUMat(ACCESS_READ);
CV_Assert(count == numKept);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -156,13 +156,20 @@ public: ...@@ -156,13 +156,20 @@ public:
return true; return true;
} }
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
func.applyOCL(inputs, outputs, internals)) func.applyOCL(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
...@@ -258,25 +265,29 @@ struct ReLUFunctor ...@@ -258,25 +265,29 @@ struct ReLUFunctor
return true; return true;
} }
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
UMat src, dst; UMat& src = inputs[i];
inputs[i]->copyTo(src); UMat& dst = outputs[i];
dst = outputs[i].getUMat(ACCESS_WRITE);
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset); CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker; ocl::Kernel kernel;
CV_Assert(initKernel(ker, src)); CV_Assert(initKernel(kernel, src));
ker.set(0, (int)src.total()); kernel.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src)); kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst)); kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total(); size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, false)); CV_Assert(kernel.run(1, &gSize, &wgSize, false));
} }
return true; return true;
...@@ -347,7 +358,7 @@ struct ReLU6Functor ...@@ -347,7 +358,7 @@ struct ReLU6Functor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -382,7 +393,7 @@ struct TanHFunctor ...@@ -382,7 +393,7 @@ struct TanHFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -417,7 +428,7 @@ struct SigmoidFunctor ...@@ -417,7 +428,7 @@ struct SigmoidFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -454,7 +465,7 @@ struct ELUFunctor ...@@ -454,7 +465,7 @@ struct ELUFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -489,7 +500,7 @@ struct AbsValFunctor ...@@ -489,7 +500,7 @@ struct AbsValFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -524,7 +535,7 @@ struct BNLLFunctor ...@@ -524,7 +535,7 @@ struct BNLLFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -581,7 +592,7 @@ struct PowerFunctor ...@@ -581,7 +592,7 @@ struct PowerFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -656,7 +667,7 @@ struct ChannelsPReLUFunctor ...@@ -656,7 +667,7 @@ struct ChannelsPReLUFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
......
...@@ -254,6 +254,14 @@ public: ...@@ -254,6 +254,14 @@ public:
} }
}; };
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -104,6 +104,43 @@ public: ...@@ -104,6 +104,43 @@ public:
return true; return true;
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
std::vector<UMat> inpvec;
std::vector<UMat> outputs;
inputs_arr.getUMatVector(inpvec);
outputs_arr.getUMatVector(outputs);
std::vector<UMat*> inputs(inpvec.size());
for (int i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape outShape = shape(outputs[i]);
UMat& output = outputs_arr.getUMatRef(i);
output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
outputs_arr.isUMatVector() &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -258,12 +258,18 @@ public: ...@@ -258,12 +258,18 @@ public:
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &input, std::vector<Mat> &output) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{ {
int axisCan = clamp(axis, input[0]->dims); std::vector<UMat> inputs;
int numOutput = blobs[0].size[0]; std::vector<UMat> outputs;
int innerSize = blobs[0].size[1];
int outerSize = input[0]->total(0, axisCan); inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int axisCan = clamp(axis, inputs[0].dims);
int numOutput = umat_blobs[0].size[0];
int innerSize = umat_blobs[0].size[1];
int outerSize = total(shape(inputs[0]), 0, axisCan);
bool ret = true; bool ret = true;
if (innerProductOp.empty()) if (innerProductOp.empty())
...@@ -278,11 +284,10 @@ public: ...@@ -278,11 +284,10 @@ public:
} }
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type()); UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
for (size_t i = 0; i < input.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
UMat srcMat, dstMat; UMat& srcMat = inputs[i];
srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ); UMat& dstMat = outputs[i];
dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
dstMat.setTo(0.0f); dstMat.setTo(0.0f);
if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat)) if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))
...@@ -301,11 +306,15 @@ public: ...@@ -301,11 +306,15 @@ public:
if (ret) return true; if (ret) return true;
UMat& weights = umat_blobs[0]; UMat& weights = umat_blobs[0];
for (size_t i = 0; i < input.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
MatShape inshape, outshape;
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat; UMat srcMat, dstMat;
srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ); srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE); dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T); cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
...@@ -320,14 +329,22 @@ public: ...@@ -320,14 +329,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(input, output)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
int axisCan = clamp(axis, input[0]->dims); int axisCan = clamp(axis, input[0]->dims);
int outerSize = input[0]->total(0, axisCan); int outerSize = input[0]->total(0, axisCan);
......
...@@ -94,8 +94,14 @@ public: ...@@ -94,8 +94,14 @@ public:
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (lrnOp.empty()) if (lrnOp.empty())
{ {
OCL4DNNLRNConfig config; OCL4DNNLRNConfig config;
...@@ -108,38 +114,44 @@ public: ...@@ -108,38 +114,44 @@ public:
config.alpha = alpha; config.alpha = alpha;
config.beta = beta; config.beta = beta;
config.k = bias; config.k = bias;
CHECK_EQ(4, inputs[0]->dims) << "Input must have 4 axes, " CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)"; << "corresponding to (num, channels, height, width)";
config.batch_size = inputs[0]->size[0]; config.batch_size = inputs[0].size[0];
config.channels = inputs[0]->size[1]; config.channels = inputs[0].size[1];
config.height = inputs[0]->size[2]; config.height = inputs[0].size[2];
config.width = inputs[0]->size[3]; config.width = inputs[0].size[3];
config.norm_by_size = normBySize; config.norm_by_size = normBySize;
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config)); lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
} }
UMat inpMat, outMat; if (!lrnOp->Forward(inputs[0], outputs[0]))
inpMat = inputs[0]->getUMat(ACCESS_READ);
outMat = outputs[0].getUMat(ACCESS_WRITE);
if (!lrnOp->Forward(inpMat, outMat))
return false; return false;
return true; return true;
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_Assert(inputs.size() == outputs.size()); CV_Assert(inputs_arr.total() == outputs_arr.total());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_Assert(inputs.size() == outputs.size());
for (int i = 0; i < inputs.size(); i++) for (int i = 0; i < inputs.size(); i++)
{ {
......
...@@ -55,6 +55,14 @@ public: ...@@ -55,6 +55,14 @@ public:
return false; return false;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -60,6 +60,14 @@ public: ...@@ -60,6 +60,14 @@ public:
eps = params.get<double>("eps", 1e-9); eps = params.get<double>("eps", 1e-9);
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -69,6 +69,14 @@ public: ...@@ -69,6 +69,14 @@ public:
return true; return true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -91,6 +91,14 @@ public: ...@@ -91,6 +91,14 @@ public:
backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4; backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -247,6 +247,14 @@ public: ...@@ -247,6 +247,14 @@ public:
} }
}; };
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -113,18 +113,24 @@ public: ...@@ -113,18 +113,24 @@ public:
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{ {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (poolOp.empty()) if (poolOp.empty())
{ {
OCL4DNNPoolConfig config; OCL4DNNPoolConfig config;
config.in_shape = shape(*inputs[0]); config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]); config.out_shape = shape(outputs[0]);
config.kernel = kernel; config.kernel = kernel;
config.pad = pad; config.pad = pad;
config.stride = stride; config.stride = stride;
config.channels = inputs[0]->size[1]; config.channels = inputs[0].size[1];
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX : config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
(type == AVE ? LIBDNN_POOLING_METHOD_AVE : (type == AVE ? LIBDNN_POOLING_METHOD_AVE :
LIBDNN_POOLING_METHOD_STO); LIBDNN_POOLING_METHOD_STO);
...@@ -133,18 +139,10 @@ public: ...@@ -133,18 +139,10 @@ public:
for (size_t ii = 0; ii < inputs.size(); ii++) for (size_t ii = 0; ii < inputs.size(); ii++)
{ {
UMat inpMat, outMat, maskMat; UMat& inpMat = inputs[ii];
int out_index = (type == MAX) ? 2 : 1;
inpMat = inputs[ii]->getUMat(ACCESS_READ); UMat& outMat = outputs[out_index * ii];
UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
if (type == MAX)
{
outMat = outputs[2 * ii].getUMat(ACCESS_WRITE);
maskMat = outputs[2 * ii + 1].getUMat(ACCESS_WRITE);
} else {
outMat = outputs[ii].getUMat(ACCESS_WRITE);
maskMat = UMat();
}
CV_Assert(inpMat.offset == 0 && outMat.offset == 0); CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
...@@ -156,14 +154,22 @@ public: ...@@ -156,14 +154,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
for (size_t ii = 0; ii < inputs.size(); ii++) for (size_t ii = 0; ii < inputs.size(); ii++)
{ {
......
...@@ -249,6 +249,14 @@ public: ...@@ -249,6 +249,14 @@ public:
return false; return false;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -252,6 +252,14 @@ public: ...@@ -252,6 +252,14 @@ public:
allocated = true; allocated = true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
...@@ -465,6 +473,14 @@ public: ...@@ -465,6 +473,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -114,6 +114,14 @@ public: ...@@ -114,6 +114,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -85,6 +85,15 @@ public: ...@@ -85,6 +85,15 @@ public:
{ {
return backendId == DNN_BACKEND_DEFAULT; return backendId == DNN_BACKEND_DEFAULT;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -182,6 +182,14 @@ public: ...@@ -182,6 +182,14 @@ public:
return true; return true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -37,6 +37,14 @@ public: ...@@ -37,6 +37,14 @@ public:
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]); return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -44,6 +44,14 @@ public: ...@@ -44,6 +44,14 @@ public:
backendId == DNN_BACKEND_HALIDE && haveHalide(); backendId == DNN_BACKEND_HALIDE && haveHalide();
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -36,6 +36,14 @@ public: ...@@ -36,6 +36,14 @@ public:
return true; return true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -171,6 +171,14 @@ public: ...@@ -171,6 +171,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -91,35 +91,42 @@ public: ...@@ -91,35 +91,42 @@ public:
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays itns)
{ {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
itns.getUMatVector(internals);
if (softmaxOp.empty()) if (softmaxOp.empty())
{ {
OCL4DNNSoftmaxConfig config; OCL4DNNSoftmaxConfig config;
config.in_shape = shape(*inputs[0]); config.in_shape = shape(inputs[0]);
config.axis = axisRaw; config.axis = axisRaw;
config.channels = inputs[0]->size[axisRaw]; config.channels = inputs[0].size[axisRaw];
config.logsoftmax = logSoftMax; config.logsoftmax = logSoftMax;
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config)); softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
} }
UMat srcMat, dstMat; UMat& src = inputs[0];
srcMat = inputs[0]->getUMat(ACCESS_READ); UMat& dstMat = outputs[0];
dstMat = outputs[0].getUMat(ACCESS_WRITE);
if (softmaxOp->Forward(srcMat, dstMat)) if (softmaxOp->Forward(src, dstMat))
return true; return true;
const Mat &src = *inputs[0]; UMat& bufMat = internals[0];
UMat bufMat = internals[0].getUMat(ACCESS_WRITE); src.copyTo(dstMat);
srcMat.copyTo(dstMat);
int axis = clamp(axisRaw, src.dims); int axis = clamp(axisRaw, src.dims);
size_t outerSize = src.total(0, axis); MatShape s = shape(src);
size_t outerSize = total(s, 0, axis);
size_t channels = src.size[axis]; size_t channels = src.size[axis];
size_t innerSize = src.total(axis + 1); size_t innerSize = total(s, axis + 1);
String buildOpts = String("-DT=") + ocl::typeToStr(src.type()); String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kmax, ksub, ksum, kdiv; ocl::Kernel kmax, ksub, ksum, kdiv;
...@@ -175,14 +182,22 @@ public: ...@@ -175,14 +182,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
const Mat &src = *inputs[0]; const Mat &src = *inputs[0];
Mat &dst = outputs[0]; Mat &dst = outputs[0];
......
...@@ -78,6 +78,14 @@ public: ...@@ -78,6 +78,14 @@ public:
return false; return false;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment