Commit bacc96f4 authored by Alexander Alekhin's avatar Alexander Alekhin

dnn(ocl): fix softmax global/local size consistency

parent 942672ad
......@@ -141,26 +141,34 @@ public:
size_t bufSize = internals[0].total();
size_t totalSize =;
// adjust local/global size
size_t internal_localSize[1] = { (bufSize == 1) ? 1 : wgSize };
size_t internal_globalSize[1] = { divUp(bufSize, (unsigned int)internal_localSize[0]) * internal_localSize[0] };
// adjust local/global size (total)
size_t total_localSize[1] = { (totalSize == 1) ? 1 : wgSize };
size_t total_globalSize[1] = { divUp(totalSize, (unsigned int)total_localSize[0]) * total_localSize[0] };
kmax.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!, &bufSize, &wgSize, false))
if (!, internal_globalSize, internal_localSize, false))
return false;
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!, &totalSize, &wgSize, false))
if (!, total_globalSize, total_localSize, false))
return false;
cv::exp(dstMat, dstMat);
ksum.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!, &bufSize, &wgSize, false))
if (!, internal_globalSize, internal_localSize, false))
return false;
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!, &totalSize, &wgSize, false))
if (!, total_globalSize, total_localSize, false))
return false;
return true;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment