|
|
@ -451,6 +451,9 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA, |
|
|
|
|
|
|
|
|
|
|
|
uint row_size = M; |
|
|
|
uint row_size = M; |
|
|
|
uint col_size = N; |
|
|
|
uint col_size = N; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (row_size >= 4) |
|
|
|
|
|
|
|
{ |
|
|
|
size_t localsize[] = { 128 }; |
|
|
|
size_t localsize[] = { 128 }; |
|
|
|
size_t globalsize[] = { row_size / 4 * localsize[0] }; |
|
|
|
size_t globalsize[] = { row_size / 4 * localsize[0] }; |
|
|
|
|
|
|
|
|
|
|
@ -468,6 +471,7 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA, |
|
|
|
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4)); |
|
|
|
k.set(argId++, NULL, localsize[0] * sizeof(cl_float4)); |
|
|
|
|
|
|
|
|
|
|
|
ret = k.run(1, globalsize, localsize, false); |
|
|
|
ret = k.run(1, globalsize, localsize, false); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((row_size % 4) != 0 && ret) |
|
|
|
if ((row_size % 4) != 0 && ret) |
|
|
|
{ |
|
|
|
{ |
|
|
|