Commit 9d25bd58 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #10754 from dkurt:dnn_ocl_gemv_min_globalsize

parents 85af8735 65a6674c
......@@ -451,23 +451,27 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
uint row_size = M;
uint col_size = N;
size_t localsize[] = { 128 };
size_t globalsize[] = { row_size / 4 * localsize[0] };
uint argId = 0;
k.set(argId++, ocl::KernelArg::PtrReadOnly(A));
k.set(argId++, offA);
k.set(argId++, cl_uint(col_size));
k.set(argId++, cl_uint(col_size%4));
k.set(argId++, ocl::KernelArg::PtrReadOnly(x));
k.set(argId++, offx);
k.set(argId++, alpha);
k.set(argId++, beta);
k.set(argId++, ocl::KernelArg::PtrWriteOnly(y));
k.set(argId++, offy);
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
ret = k.run(1, globalsize, localsize, false);
if (row_size >= 4)
{
size_t localsize[] = { 128 };
size_t globalsize[] = { row_size / 4 * localsize[0] };
uint argId = 0;
k.set(argId++, ocl::KernelArg::PtrReadOnly(A));
k.set(argId++, offA);
k.set(argId++, cl_uint(col_size));
k.set(argId++, cl_uint(col_size%4));
k.set(argId++, ocl::KernelArg::PtrReadOnly(x));
k.set(argId++, offx);
k.set(argId++, alpha);
k.set(argId++, beta);
k.set(argId++, ocl::KernelArg::PtrWriteOnly(y));
k.set(argId++, offy);
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
ret = k.run(1, globalsize, localsize, false);
}
if ((row_size % 4) != 0 && ret)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment