Commit 8a3a75cc authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #9882 from pengli:ocl4dnn

parents e89ae986 8f990837
...@@ -187,16 +187,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -187,16 +187,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/ */
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0; virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0;
/** @brief Given the @p input blobs, computes the output @p blobs.
* @param[in] inputs the input blobs.
* @param[out] outputs allocated output blobs, which will store results of the computation.
* @param[out] internals allocated internal blobs
*/
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
/** @brief Given the @p input blobs, computes the output @p blobs.
* @param[in] inputs the input blobs.
* @param[out] outputs allocated output blobs, which will store results of the computation.
* @param[out] internals allocated internal blobs
*/
void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
/** @brief @overload */ /** @brief @overload */
CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs); CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
/** @brief @overload */ /** @brief @overload */
CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs); CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs);
/** @brief @overload */
CV_WRAP void forward(const std::vector<Mat> &inputs, CV_IN_OUT std::vector<Mat> &outputs,
CV_IN_OUT std::vector<Mat> &internals);
/** @brief Allocates layer and computes output. */ /** @brief Allocates layer and computes output. */
CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs, CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
CV_IN_OUT std::vector<Mat> &internals); CV_IN_OUT std::vector<Mat> &internals);
......
...@@ -132,6 +132,11 @@ static inline MatShape shape(const Mat& mat) ...@@ -132,6 +132,11 @@ static inline MatShape shape(const Mat& mat)
return shape(mat.size.p, mat.dims); return shape(mat.size.p, mat.dims);
} }
static inline MatShape shape(const UMat& mat)
{
return shape(mat.size.p, mat.dims);
}
namespace {inline bool is_neg(int i) { return i < 0; }} namespace {inline bool is_neg(int i) { return i < 0; }}
static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1) static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
...@@ -151,7 +156,7 @@ static inline int total(const MatShape& shape, int start = -1, int end = -1) ...@@ -151,7 +156,7 @@ static inline int total(const MatShape& shape, int start = -1, int end = -1)
return 0; return 0;
int elems = 1; int elems = 1;
CV_Assert(start < (int)shape.size() && end <= (int)shape.size() && CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
start <= end); start <= end);
for(int i = start; i < end; i++) for(int i = start; i < end; i++)
{ {
......
This diff is collapsed.
...@@ -102,6 +102,14 @@ public: ...@@ -102,6 +102,14 @@ public:
backendId == DNN_BACKEND_HALIDE && haveHalide(); backendId == DNN_BACKEND_HALIDE && haveHalide();
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -62,6 +62,25 @@ public: ...@@ -62,6 +62,25 @@ public:
return true; return true;
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
{
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -176,36 +176,38 @@ public: ...@@ -176,36 +176,38 @@ public:
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
CV_TRACE_FUNCTION(); std::vector<UMat> inputs;
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); std::vector<UMat> outputs;
int cAxis = clamp(axis, inputs[0]->dims); inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int cAxis = clamp(axis, inputs[0].dims);
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding)) if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
return false; return false;
int bottom_concat_axis; int bottom_concat_axis;
int concat_size = inputs[0]->size[2] * inputs[0]->size[3]; int concat_size = inputs[0].size[2] * inputs[0].size[3];
int top_concat_axis = outputs[0].size[1]; int top_concat_axis = outputs[0].size[1];
int offset_concat_axis = 0; int offset_concat_axis = 0;
UMat inpMat, outMat; UMat& outMat = outputs[0];
outMat = outputs[0].getUMat(ACCESS_WRITE); String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
ocl::Kernel kernel;
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0]->type()) + String(" ");
if (!kernel.create("concat", ocl::dnn::concat_oclsrc, buildopt))
return false;
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
inpMat = inputs[i]->getUMat(ACCESS_READ); ocl::Kernel kernel("concat", ocl::dnn::concat_oclsrc, buildopt);
bottom_concat_axis = inputs[i]->size[1]; if (kernel.empty())
size_t nthreads = inputs[i]->total(); return false;
UMat& inpMat = inputs[i];
bottom_concat_axis = inputs[i].size[1];
size_t nthreads = inputs[i].total();
kernel.set(0, (int)nthreads); kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat)); kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(2, (int)inputs[i]->size[0]); kernel.set(2, (int)inputs[i].size[0]);
kernel.set(3, (int)concat_size); kernel.set(3, (int)concat_size);
kernel.set(4, (int)top_concat_axis); kernel.set(4, (int)top_concat_axis);
kernel.set(5, (int)bottom_concat_axis); kernel.set(5, (int)bottom_concat_axis);
...@@ -222,14 +224,22 @@ public: ...@@ -222,14 +224,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
int cAxis = clamp(axis, inputs[0]->dims); int cAxis = clamp(axis, inputs[0]->dims);
Mat& outMat = outputs[0]; Mat& outMat = outputs[0];
......
...@@ -671,14 +671,20 @@ public: ...@@ -671,14 +671,20 @@ public:
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
int group = inputs[0]->size[1] / umat_blobs[0].size[1]; std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int group = inputs[0].size[1] / umat_blobs[0].size[1];
if (convolutionOp.empty()) if (convolutionOp.empty())
{ {
OCL4DNNConvConfig config; OCL4DNNConvConfig config;
config.in_shape = shape(*inputs[0]); config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]); config.out_shape = shape(outputs[0]);
config.kernel = kernel; config.kernel = kernel;
config.pad = pad; config.pad = pad;
...@@ -690,6 +696,112 @@ public: ...@@ -690,6 +696,112 @@ public:
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config)); convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
} }
int k, outCn = umat_blobs[0].size[0];
if( weightsMat.empty() )
{
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing
Mat wm = blobs[0].reshape(1, outCn).clone();
if( wm.step1() % VEC_ALIGN != 0 )
{
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
Mat wm_buffer = Mat(outCn, newcols, wm.type());
Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
wm_padding.setTo(Scalar::all(0.));
Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
wm.copyTo(wm_aligned);
wm = wm_aligned;
}
weightsMat = wm;
Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat();
biasvec.resize(outCn+2);
if( biasMat.empty() )
{
for( k = 0; k < outCn; k++ )
biasvec[k] = 0.f;
}
else
{
for( k = 0; k < outCn; k++ )
biasvec[k] = biasMat.at<float>(k);
}
if( !bnorm.empty() || !scaleLayer.empty() )
{
Mat scale, shift, scale2, shift2;
const float *scaleptr = 0, *shiftptr = 0;
const float *scaleptr2 = 0, *shiftptr2 = 0;
if( !bnorm.empty() )
{
bnorm->getScaleShift(scale, shift);
CV_Assert( scale.isContinuous() && shift.isContinuous() &&
scale.type() == CV_32F && shift.type() == CV_32F &&
scale.total() == (size_t)outCn &&
shift.total() == (size_t)outCn );
scaleptr = scale.ptr<float>();
shiftptr = shift.ptr<float>();
}
if( !scaleLayer.empty() )
{
scale2 = scaleLayer->blobs[0];
CV_Assert( scale2.isContinuous() && scale2.type() == CV_32F &&
scale2.total() == (size_t)outCn );
scaleptr2 = scale2.ptr<float>();
if( scaleLayer->hasBias )
{
shift2 = scaleLayer->blobs[1];
CV_Assert( shift2.isContinuous() && shift2.type() == CV_32F &&
shift2.total() == (size_t)outCn );
shiftptr2 = shift2.ptr<float>();
}
}
if (shiftptr || shiftptr2)
fusedBias = true;
for( int i = 0; i < outCn; i++ )
{
float s1 = scaleptr ? scaleptr[i] : 1.f;
float delta1 = shiftptr ? shiftptr[i] : 0.f;
float s2 = scaleptr2 ? scaleptr2[i] : 1.f;
float delta2 = shiftptr2 ? shiftptr2[i] : 0.f;
float* w_i = weightsMat.ptr<float>(i);
int j, wcols = weightsMat.cols;
for( j = 0; j < wcols; j++ )
w_i[j] *= (s1*s2);
biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2);
}
}
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
}
reluslope.clear();
if( activ )
{
Ptr<ReLULayer> activ_relu = activ.dynamicCast<ReLULayer>();
if( !activ_relu.empty() )
{
reluslope.assign(outCn+2, activ_relu->negativeSlope);
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
}
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
if( !activ_chprelu.empty() )
{
const Mat& m = activ_chprelu->blobs[0];
CV_Assert(m.isContinuous() && m.type() == CV_32F && (int)m.total() == outCn);
const float* mdata = m.ptr<float>();
reluslope.resize(outCn+2);
std::copy(mdata, mdata + outCn, reluslope.begin());
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
}
}
if ( newWeightAndBias ) if ( newWeightAndBias )
{ {
weightsMat.copyTo(umat_blobs[0]); weightsMat.copyTo(umat_blobs[0]);
...@@ -723,9 +835,8 @@ public: ...@@ -723,9 +835,8 @@ public:
newActiv = false; newActiv = false;
} }
UMat inpMat, outMat; UMat& inpMat = inputs[0];
inpMat = inputs[0]->getUMat(ACCESS_READ); UMat& outMat = outputs[0];
outMat = outputs[0].getUMat(ACCESS_WRITE);
int batch_size = inpMat.size[0]; int batch_size = inpMat.size[0];
return convolutionOp->Forward(inpMat, return convolutionOp->Forward(inpMat,
...@@ -736,6 +847,18 @@ public: ...@@ -736,6 +847,18 @@ public:
} }
#endif #endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
...@@ -811,11 +934,6 @@ public: ...@@ -811,11 +934,6 @@ public:
} }
} }
#ifdef HAVE_OPENCL
if (shiftptr || shiftptr2)
fusedBias = true;
#endif
for( int i = 0; i < outCn; i++ ) for( int i = 0; i < outCn; i++ )
{ {
float s1 = scaleptr ? scaleptr[i] : 1.f; float s1 = scaleptr ? scaleptr[i] : 1.f;
...@@ -841,9 +959,6 @@ public: ...@@ -841,9 +959,6 @@ public:
if( !activ_relu.empty() ) if( !activ_relu.empty() )
{ {
reluslope.assign(outCn+2, activ_relu->negativeSlope); reluslope.assign(outCn+2, activ_relu->negativeSlope);
#ifdef HAVE_OPENCL
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
#endif
} }
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>(); Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
...@@ -855,16 +970,9 @@ public: ...@@ -855,16 +970,9 @@ public:
reluslope.resize(outCn+2); reluslope.resize(outCn+2);
std::copy(mdata, mdata + outCn, reluslope.begin()); std::copy(mdata, mdata + outCn, reluslope.begin());
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1]; reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
#ifdef HAVE_OPENCL
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
#endif
} }
} }
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals))
int nstripes = std::max(getNumThreads(), 1); int nstripes = std::max(getNumThreads(), 1);
ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope, ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope,
...@@ -1173,6 +1281,14 @@ public: ...@@ -1173,6 +1281,14 @@ public:
} }
}; };
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -133,6 +133,14 @@ public: ...@@ -133,6 +133,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -194,6 +194,95 @@ public: ...@@ -194,6 +194,95 @@ public:
return false; return false;
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
std::vector<Mat> inpvec;
std::vector<Mat> outputs;
inputs_arr.getMatVector(inpvec);
outputs_arr.getMatVector(outputs);
std::vector<Mat*> inputs(inpvec.size());
for (size_t i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
std::vector<LabelBBox> allDecodedBBoxes;
std::vector<std::vector<std::vector<float> > > allConfidenceScores;
int num = inputs[0]->size[0];
// extract predictions from input layers
{
int numPriors = inputs[2]->size[2] / 4;
const float* locationData = inputs[0]->ptr<float>();
const float* confidenceData = inputs[1]->ptr<float>();
const float* priorData = inputs[2]->ptr<float>();
// Retrieve all location predictions
std::vector<LabelBBox> allLocationPredictions;
GetLocPredictions(locationData, num, numPriors, _numLocClasses,
_shareLocation, _locPredTransposed, allLocationPredictions);
// Retrieve all confidences
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
// Retrieve all prior bboxes
std::vector<caffe::NormalizedBBox> priorBBoxes;
std::vector<std::vector<float> > priorVariances;
GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
// Decode all loc predictions to bboxes
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num,
_shareLocation, _numLocClasses, _backgroundLabelId,
_codeType, _varianceEncodedInTarget, false, allDecodedBBoxes);
}
size_t numKept = 0;
std::vector<std::map<int, std::vector<int> > > allIndices;
for (int i = 0; i < num; ++i)
{
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
}
if (numKept == 0)
{
// Set confidences to zeros.
Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
outputs[0](ranges).setTo(0);
return true;
}
int outputShape[] = {1, 1, (int)numKept, 7};
Mat mat(4, outputShape, CV_32F);
float* outputsData = mat.ptr<float>();
size_t count = 0;
for (int i = 0; i < num; ++i)
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]);
}
UMat& output = outputs_arr.getUMatRef(0);
output = mat.getUMat(ACCESS_READ);
CV_Assert(count == numKept);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -156,13 +156,20 @@ public: ...@@ -156,13 +156,20 @@ public:
return true; return true;
} }
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
func.applyOCL(inputs, outputs, internals)) func.applyOCL(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
...@@ -258,25 +265,29 @@ struct ReLUFunctor ...@@ -258,25 +265,29 @@ struct ReLUFunctor
return true; return true;
} }
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
UMat src, dst; UMat& src = inputs[i];
inputs[i]->copyTo(src); UMat& dst = outputs[i];
dst = outputs[i].getUMat(ACCESS_WRITE);
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset); CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker; ocl::Kernel kernel;
CV_Assert(initKernel(ker, src)); CV_Assert(initKernel(kernel, src));
ker.set(0, (int)src.total()); kernel.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src)); kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst)); kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total(); size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, false)); CV_Assert(kernel.run(1, &gSize, &wgSize, false));
} }
return true; return true;
...@@ -347,7 +358,7 @@ struct ReLU6Functor ...@@ -347,7 +358,7 @@ struct ReLU6Functor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -382,7 +393,7 @@ struct TanHFunctor ...@@ -382,7 +393,7 @@ struct TanHFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -417,7 +428,7 @@ struct SigmoidFunctor ...@@ -417,7 +428,7 @@ struct SigmoidFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -454,7 +465,7 @@ struct ELUFunctor ...@@ -454,7 +465,7 @@ struct ELUFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -489,7 +500,7 @@ struct AbsValFunctor ...@@ -489,7 +500,7 @@ struct AbsValFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -524,7 +535,7 @@ struct BNLLFunctor ...@@ -524,7 +535,7 @@ struct BNLLFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -581,7 +592,7 @@ struct PowerFunctor ...@@ -581,7 +592,7 @@ struct PowerFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
...@@ -656,7 +667,7 @@ struct ChannelsPReLUFunctor ...@@ -656,7 +667,7 @@ struct ChannelsPReLUFunctor
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
// TODO: implement OCL version // TODO: implement OCL version
return false; return false;
......
...@@ -254,6 +254,14 @@ public: ...@@ -254,6 +254,14 @@ public:
} }
}; };
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -104,6 +104,43 @@ public: ...@@ -104,6 +104,43 @@ public:
return true; return true;
} }
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
std::vector<UMat> inpvec;
std::vector<UMat> outputs;
inputs_arr.getUMatVector(inpvec);
outputs_arr.getUMatVector(outputs);
std::vector<UMat*> inputs(inpvec.size());
for (int i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape outShape = shape(outputs[i]);
UMat& output = outputs_arr.getUMatRef(i);
output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
outputs_arr.isUMatVector() &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -258,12 +258,18 @@ public: ...@@ -258,12 +258,18 @@ public:
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &input, std::vector<Mat> &output) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{ {
int axisCan = clamp(axis, input[0]->dims); std::vector<UMat> inputs;
int numOutput = blobs[0].size[0]; std::vector<UMat> outputs;
int innerSize = blobs[0].size[1];
int outerSize = input[0]->total(0, axisCan); inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int axisCan = clamp(axis, inputs[0].dims);
int numOutput = umat_blobs[0].size[0];
int innerSize = umat_blobs[0].size[1];
int outerSize = total(shape(inputs[0]), 0, axisCan);
bool ret = true; bool ret = true;
if (innerProductOp.empty()) if (innerProductOp.empty())
...@@ -278,11 +284,10 @@ public: ...@@ -278,11 +284,10 @@ public:
} }
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type()); UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
for (size_t i = 0; i < input.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
UMat srcMat, dstMat; UMat& srcMat = inputs[i];
srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ); UMat& dstMat = outputs[i];
dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
dstMat.setTo(0.0f); dstMat.setTo(0.0f);
if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat)) if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))
...@@ -301,11 +306,15 @@ public: ...@@ -301,11 +306,15 @@ public:
if (ret) return true; if (ret) return true;
UMat& weights = umat_blobs[0]; UMat& weights = umat_blobs[0];
for (size_t i = 0; i < input.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
MatShape inshape, outshape;
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat; UMat srcMat, dstMat;
srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ); srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE); dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T); cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
...@@ -320,14 +329,22 @@ public: ...@@ -320,14 +329,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(input, output)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
int axisCan = clamp(axis, input[0]->dims); int axisCan = clamp(axis, input[0]->dims);
int outerSize = input[0]->total(0, axisCan); int outerSize = input[0]->total(0, axisCan);
......
...@@ -94,8 +94,14 @@ public: ...@@ -94,8 +94,14 @@ public:
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{ {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (lrnOp.empty()) if (lrnOp.empty())
{ {
OCL4DNNLRNConfig config; OCL4DNNLRNConfig config;
...@@ -108,38 +114,44 @@ public: ...@@ -108,38 +114,44 @@ public:
config.alpha = alpha; config.alpha = alpha;
config.beta = beta; config.beta = beta;
config.k = bias; config.k = bias;
CHECK_EQ(4, inputs[0]->dims) << "Input must have 4 axes, " CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)"; << "corresponding to (num, channels, height, width)";
config.batch_size = inputs[0]->size[0]; config.batch_size = inputs[0].size[0];
config.channels = inputs[0]->size[1]; config.channels = inputs[0].size[1];
config.height = inputs[0]->size[2]; config.height = inputs[0].size[2];
config.width = inputs[0]->size[3]; config.width = inputs[0].size[3];
config.norm_by_size = normBySize; config.norm_by_size = normBySize;
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config)); lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
} }
UMat inpMat, outMat; if (!lrnOp->Forward(inputs[0], outputs[0]))
inpMat = inputs[0]->getUMat(ACCESS_READ);
outMat = outputs[0].getUMat(ACCESS_WRITE);
if (!lrnOp->Forward(inpMat, outMat))
return false; return false;
return true; return true;
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_Assert(inputs.size() == outputs.size()); CV_Assert(inputs_arr.total() == outputs_arr.total());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_Assert(inputs.size() == outputs.size());
for (int i = 0; i < inputs.size(); i++) for (int i = 0; i < inputs.size(); i++)
{ {
......
...@@ -55,6 +55,14 @@ public: ...@@ -55,6 +55,14 @@ public:
return false; return false;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -60,6 +60,14 @@ public: ...@@ -60,6 +60,14 @@ public:
eps = params.get<double>("eps", 1e-9); eps = params.get<double>("eps", 1e-9);
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -69,6 +69,14 @@ public: ...@@ -69,6 +69,14 @@ public:
return true; return true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -91,6 +91,14 @@ public: ...@@ -91,6 +91,14 @@ public:
backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4; backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -247,6 +247,14 @@ public: ...@@ -247,6 +247,14 @@ public:
} }
}; };
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -113,18 +113,24 @@ public: ...@@ -113,18 +113,24 @@ public:
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{ {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (poolOp.empty()) if (poolOp.empty())
{ {
OCL4DNNPoolConfig config; OCL4DNNPoolConfig config;
config.in_shape = shape(*inputs[0]); config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]); config.out_shape = shape(outputs[0]);
config.kernel = kernel; config.kernel = kernel;
config.pad = pad; config.pad = pad;
config.stride = stride; config.stride = stride;
config.channels = inputs[0]->size[1]; config.channels = inputs[0].size[1];
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX : config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
(type == AVE ? LIBDNN_POOLING_METHOD_AVE : (type == AVE ? LIBDNN_POOLING_METHOD_AVE :
LIBDNN_POOLING_METHOD_STO); LIBDNN_POOLING_METHOD_STO);
...@@ -133,18 +139,10 @@ public: ...@@ -133,18 +139,10 @@ public:
for (size_t ii = 0; ii < inputs.size(); ii++) for (size_t ii = 0; ii < inputs.size(); ii++)
{ {
UMat inpMat, outMat, maskMat; UMat& inpMat = inputs[ii];
int out_index = (type == MAX) ? 2 : 1;
inpMat = inputs[ii]->getUMat(ACCESS_READ); UMat& outMat = outputs[out_index * ii];
UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
if (type == MAX)
{
outMat = outputs[2 * ii].getUMat(ACCESS_WRITE);
maskMat = outputs[2 * ii + 1].getUMat(ACCESS_WRITE);
} else {
outMat = outputs[ii].getUMat(ACCESS_WRITE);
maskMat = UMat();
}
CV_Assert(inpMat.offset == 0 && outMat.offset == 0); CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
...@@ -156,14 +154,22 @@ public: ...@@ -156,14 +154,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
for (size_t ii = 0; ii < inputs.size(); ii++) for (size_t ii = 0; ii < inputs.size(); ii++)
{ {
......
...@@ -249,6 +249,14 @@ public: ...@@ -249,6 +249,14 @@ public:
return false; return false;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -252,6 +252,14 @@ public: ...@@ -252,6 +252,14 @@ public:
allocated = true; allocated = true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
...@@ -465,6 +473,14 @@ public: ...@@ -465,6 +473,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -114,6 +114,14 @@ public: ...@@ -114,6 +114,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -85,6 +85,15 @@ public: ...@@ -85,6 +85,15 @@ public:
{ {
return backendId == DNN_BACKEND_DEFAULT; return backendId == DNN_BACKEND_DEFAULT;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -182,6 +182,14 @@ public: ...@@ -182,6 +182,14 @@ public:
return true; return true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -37,6 +37,14 @@ public: ...@@ -37,6 +37,14 @@ public:
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]); return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -44,6 +44,14 @@ public: ...@@ -44,6 +44,14 @@ public:
backendId == DNN_BACKEND_HALIDE && haveHalide(); backendId == DNN_BACKEND_HALIDE && haveHalide();
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -36,6 +36,14 @@ public: ...@@ -36,6 +36,14 @@ public:
return true; return true;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -171,6 +171,14 @@ public: ...@@ -171,6 +171,14 @@ public:
} }
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
...@@ -91,35 +91,42 @@ public: ...@@ -91,35 +91,42 @@ public:
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays itns)
{ {
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
itns.getUMatVector(internals);
if (softmaxOp.empty()) if (softmaxOp.empty())
{ {
OCL4DNNSoftmaxConfig config; OCL4DNNSoftmaxConfig config;
config.in_shape = shape(*inputs[0]); config.in_shape = shape(inputs[0]);
config.axis = axisRaw; config.axis = axisRaw;
config.channels = inputs[0]->size[axisRaw]; config.channels = inputs[0].size[axisRaw];
config.logsoftmax = logSoftMax; config.logsoftmax = logSoftMax;
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config)); softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
} }
UMat srcMat, dstMat; UMat& src = inputs[0];
srcMat = inputs[0]->getUMat(ACCESS_READ); UMat& dstMat = outputs[0];
dstMat = outputs[0].getUMat(ACCESS_WRITE);
if (softmaxOp->Forward(srcMat, dstMat)) if (softmaxOp->Forward(src, dstMat))
return true; return true;
const Mat &src = *inputs[0]; UMat& bufMat = internals[0];
UMat bufMat = internals[0].getUMat(ACCESS_WRITE); src.copyTo(dstMat);
srcMat.copyTo(dstMat);
int axis = clamp(axisRaw, src.dims); int axis = clamp(axisRaw, src.dims);
size_t outerSize = src.total(0, axis); MatShape s = shape(src);
size_t outerSize = total(s, 0, axis);
size_t channels = src.size[axis]; size_t channels = src.size[axis];
size_t innerSize = src.total(axis + 1); size_t innerSize = total(s, axis + 1);
String buildOpts = String("-DT=") + ocl::typeToStr(src.type()); String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kmax, ksub, ksum, kdiv; ocl::Kernel kmax, ksub, ksum, kdiv;
...@@ -175,14 +182,22 @@ public: ...@@ -175,14 +182,22 @@ public:
} }
#endif #endif
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs, outputs, internals)) forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
const Mat &src = *inputs[0]; const Mat &src = *inputs[0];
Mat &dst = outputs[0]; Mat &dst = outputs[0];
......
...@@ -78,6 +78,14 @@ public: ...@@ -78,6 +78,14 @@ public:
return false; return false;
} }
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment