Commit 65a6674c authored by Dmitry Kurtaev's avatar Dmitry Kurtaev

ocl4dnnGEMV in case of row_size < 4

parent a75840d1
...@@ -451,6 +451,9 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA, ...@@ -451,6 +451,9 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
uint row_size = M; uint row_size = M;
uint col_size = N; uint col_size = N;
if (row_size >= 4)
{
size_t localsize[] = { 128 }; size_t localsize[] = { 128 };
size_t globalsize[] = { row_size / 4 * localsize[0] }; size_t globalsize[] = { row_size / 4 * localsize[0] };
...@@ -468,6 +471,7 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA, ...@@ -468,6 +471,7 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4)); k.set(argId++, NULL, localsize[0] * sizeof(cl_float4));
ret = k.run(1, globalsize, localsize, false); ret = k.run(1, globalsize, localsize, false);
}
if ((row_size % 4) != 0 && ret) if ((row_size % 4) != 0 && ret)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment