Merge pull request #12985 from wzw-intel:vkcom_refine

pull/13088/head
Alexander Alekhin 6 years ago
commit 997ad12730
  1. 50
      modules/dnn/src/dnn.cpp
  2. 6
      modules/dnn/src/vkcom/shader/softmax.comp
  3. 121
      modules/dnn/src/vkcom/shader/softmax_spv.cpp
  4. 2
      modules/dnn/src/vkcom/shader/spv_shader.hpp
  5. 3
      modules/dnn/src/vkcom/src/common.hpp
  6. 3
      modules/dnn/src/vkcom/src/op_softmax.cpp
  7. 0
      modules/dnn/src/vkcom/vulkan/function_list.inl.hpp
  8. 2
      modules/dnn/src/vkcom/vulkan/vk_functions.cpp
  9. 2
      modules/dnn/src/vkcom/vulkan/vk_functions.hpp
  10. 4
      modules/dnn/src/vkcom/vulkan/vk_loader.cpp

@ -1411,9 +1411,44 @@ struct Net::Impl
continue;
}
if (ld.type == "Convolution")
{
std::vector<MatShape> in_shapes;
std::vector<MatShape> out_shapes;
CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size());
for (int i = 0; i < ld.inputBlobs.size(); i++)
{
in_shapes.push_back(shape(*ld.inputBlobs[i]));
out_shapes.push_back(shape(ld.outputBlobs[i]));
}
int64 flops = layer->getFLOPS(in_shapes, out_shapes);
// FIXME
//
// This is a workaround for GPU hang on heavy convolution workload ( > 10 GFLOPS).
// For the long time task, vkWaitForFences() return without error but next call on
// vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang.
//
// Need more investigation on root cause of GPU hang and need to optimize convolution shader
// to reduce process time.
if (flops > CV_BIG_INT(10) * 1000 * 1000 * 1000)
{
continue;
}
}
ld.skip = false;
ld.backendNodes[DNN_BACKEND_VKCOM] =
layer->initVkCom(ld.inputBlobsWrappers);
try
{
ld.backendNodes[DNN_BACKEND_VKCOM] =
layer->initVkCom(ld.inputBlobsWrappers);
}
catch (const cv::Exception& e)
{
CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
}
}
#endif
}
@ -2318,7 +2353,16 @@ struct Net::Impl
}
else if (preferableBackend == DNN_BACKEND_VKCOM)
{
forwardVkCom(ld.outputBlobsWrappers, node);
try
{
forwardVkCom(ld.outputBlobsWrappers, node);
}
catch (const cv::Exception& e)
{
CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
it->second = Ptr<BackendNode>();
forwardLayer(ld);
}
}
else
{

@ -18,6 +18,7 @@ layout(push_constant) uniform pushBlock {
int channel_size;
int outer_size;
int channels;
int logsoftmax;
} p;
layout(local_size_x = LOCAL_SZ_X, local_size_y = 1, local_size_z = 1) in;
@ -68,9 +69,8 @@ void main()
for (int i = 0; i < p.channel_size; ++i)
{
float v = output_buffer[index] / sum_buffer[reduced_buffer_off + i];
#ifdef LOG_SOFTMAX
v = log(v);
#endif
if (p.logsoftmax == 1)
v = log(v);
output_buffer[index] = v;
index++;
}

@ -9,8 +9,8 @@
namespace cv { namespace dnn { namespace vkcom {
extern const unsigned int softmax_spv[1440] = {
0x07230203,0x00010000,0x00080001,0x000000ec,0x00000000,0x00020011,0x00000001,0x0006000b,
extern const unsigned int softmax_spv[1496] = {
0x07230203,0x00010000,0x00080001,0x000000f4,0x00000000,0x00020011,0x00000001,0x0006000b,
0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e,0x00000000,0x00000001,
0x0006000f,0x00000005,0x00000004,0x6e69616d,0x00000000,0x0000000c,0x00060010,0x00000004,
0x00000011,0x00000100,0x00000001,0x00000001,0x00030003,0x00000002,0x000001c2,0x00040005,
@ -18,53 +18,55 @@ extern const unsigned int softmax_spv[1440] = {
0x475f6c67,0x61626f6c,0x766e496c,0x7461636f,0x496e6f69,0x00000044,0x00050005,0x00000013,
0x68737570,0x636f6c42,0x0000006b,0x00070006,0x00000013,0x00000000,0x6e616863,0x5f6c656e,
0x657a6973,0x00000000,0x00060006,0x00000013,0x00000001,0x6574756f,0x69735f72,0x0000657a,
0x00060006,0x00000013,0x00000002,0x6e616863,0x736c656e,0x00000000,0x00030005,0x00000015,
0x00000070,0x00050005,0x0000001f,0x626f6c67,0x6f5f6c61,0x00006666,0x00070005,0x00000029,
0x75646572,0x5f646563,0x66667562,0x6f5f7265,0x00006666,0x00040005,0x0000002e,0x65646e69,
0x00000078,0x00030005,0x00000030,0x00000069,0x00040005,0x0000003c,0x31667562,0x00000000,
0x00060006,0x0000003c,0x00000000,0x5f78616d,0x66667562,0x00007265,0x00030005,0x0000003e,
0x00000000,0x00040005,0x00000043,0x30667562,0x00000000,0x00070006,0x00000043,0x00000000,
0x75706e69,0x75625f74,0x72656666,0x00000000,0x00030005,0x00000045,0x00000000,0x00030005,
0x0000004f,0x00000063,0x00030005,0x00000059,0x00000069,0x00030005,0x00000076,0x00000069,
0x00040005,0x00000081,0x32667562,0x00000000,0x00060006,0x00000081,0x00000000,0x5f6d7573,
0x66667562,0x00007265,0x00030005,0x00000083,0x00000000,0x00030005,0x0000008c,0x00000063,
0x00030005,0x00000096,0x00000069,0x00040005,0x000000a1,0x5f707865,0x006c6176,0x00040005,
0x000000ad,0x33667562,0x00000000,0x00070006,0x000000ad,0x00000000,0x7074756f,0x625f7475,
0x65666675,0x00000072,0x00030005,0x000000af,0x00000000,0x00030005,0x000000c2,0x00000063,
0x00030005,0x000000cc,0x00000069,0x00030005,0x000000d6,0x00000076,0x00040047,0x0000000c,
0x0000000b,0x0000001c,0x00050048,0x00000013,0x00000000,0x00000023,0x00000000,0x00050048,
0x00000013,0x00000001,0x00000023,0x00000004,0x00050048,0x00000013,0x00000002,0x00000023,
0x00000008,0x00030047,0x00000013,0x00000002,0x00040047,0x0000003b,0x00000006,0x00000004,
0x00050048,0x0000003c,0x00000000,0x00000023,0x00000000,0x00030047,0x0000003c,0x00000003,
0x00040047,0x0000003e,0x00000022,0x00000000,0x00040047,0x0000003e,0x00000021,0x00000001,
0x00040047,0x00000042,0x00000006,0x00000004,0x00040048,0x00000043,0x00000000,0x00000018,
0x00050048,0x00000043,0x00000000,0x00000023,0x00000000,0x00030047,0x00000043,0x00000003,
0x00040047,0x00000045,0x00000022,0x00000000,0x00040047,0x00000045,0x00000021,0x00000000,
0x00040047,0x00000080,0x00000006,0x00000004,0x00050048,0x00000081,0x00000000,0x00000023,
0x00000000,0x00030047,0x00000081,0x00000003,0x00040047,0x00000083,0x00000022,0x00000000,
0x00040047,0x00000083,0x00000021,0x00000002,0x00040047,0x000000ac,0x00000006,0x00000004,
0x00050048,0x000000ad,0x00000000,0x00000023,0x00000000,0x00030047,0x000000ad,0x00000003,
0x00040047,0x000000af,0x00000022,0x00000000,0x00040047,0x000000af,0x00000021,0x00000003,
0x00040047,0x000000eb,0x0000000b,0x00000019,0x00020013,0x00000002,0x00030021,0x00000003,
0x00000002,0x00040015,0x00000006,0x00000020,0x00000001,0x00040020,0x00000007,0x00000007,
0x00000006,0x00040015,0x00000009,0x00000020,0x00000000,0x00040017,0x0000000a,0x00000009,
0x00000003,0x00040020,0x0000000b,0x00000001,0x0000000a,0x0004003b,0x0000000b,0x0000000c,
0x00000001,0x0004002b,0x00000009,0x0000000d,0x00000000,0x00040020,0x0000000e,0x00000001,
0x00000009,0x0005001e,0x00000013,0x00000006,0x00000006,0x00000006,0x00040020,0x00000014,
0x00000009,0x00000013,0x0004003b,0x00000014,0x00000015,0x00000009,0x0004002b,0x00000006,
0x00000016,0x00000001,0x00040020,0x00000017,0x00000009,0x00000006,0x00020014,0x0000001a,
0x0004002b,0x00000006,0x00000021,0x00000002,0x0004002b,0x00000006,0x00000025,0x00000000,
0x00030016,0x0000003a,0x00000020,0x0003001d,0x0000003b,0x0000003a,0x0003001e,0x0000003c,
0x0000003b,0x00040020,0x0000003d,0x00000002,0x0000003c,0x0004003b,0x0000003d,0x0000003e,
0x00000002,0x0003001d,0x00000042,0x0000003a,0x0003001e,0x00000043,0x00000042,0x00040020,
0x00000044,0x00000002,0x00000043,0x0004003b,0x00000044,0x00000045,0x00000002,0x00040020,
0x00000047,0x00000002,0x0000003a,0x0003001d,0x00000080,0x0000003a,0x0003001e,0x00000081,
0x00000080,0x00040020,0x00000082,0x00000002,0x00000081,0x0004003b,0x00000082,0x00000083,
0x00000002,0x0004002b,0x0000003a,0x00000087,0x00000000,0x00040020,0x000000a0,0x00000007,
0x0000003a,0x0003001d,0x000000ac,0x0000003a,0x0003001e,0x000000ad,0x000000ac,0x00040020,
0x000000ae,0x00000002,0x000000ad,0x0004003b,0x000000ae,0x000000af,0x00000002,0x0004002b,
0x00000009,0x000000e9,0x00000100,0x0004002b,0x00000009,0x000000ea,0x00000001,0x0006002c,
0x0000000a,0x000000eb,0x000000e9,0x000000ea,0x000000ea,0x00050036,0x00000002,0x00000004,
0x00060006,0x00000013,0x00000002,0x6e616863,0x736c656e,0x00000000,0x00060006,0x00000013,
0x00000003,0x73676f6c,0x6d74666f,0x00007861,0x00030005,0x00000015,0x00000070,0x00050005,
0x0000001f,0x626f6c67,0x6f5f6c61,0x00006666,0x00070005,0x00000029,0x75646572,0x5f646563,
0x66667562,0x6f5f7265,0x00006666,0x00040005,0x0000002e,0x65646e69,0x00000078,0x00030005,
0x00000030,0x00000069,0x00040005,0x0000003c,0x31667562,0x00000000,0x00060006,0x0000003c,
0x00000000,0x5f78616d,0x66667562,0x00007265,0x00030005,0x0000003e,0x00000000,0x00040005,
0x00000043,0x30667562,0x00000000,0x00070006,0x00000043,0x00000000,0x75706e69,0x75625f74,
0x72656666,0x00000000,0x00030005,0x00000045,0x00000000,0x00030005,0x0000004f,0x00000063,
0x00030005,0x00000059,0x00000069,0x00030005,0x00000076,0x00000069,0x00040005,0x00000081,
0x32667562,0x00000000,0x00060006,0x00000081,0x00000000,0x5f6d7573,0x66667562,0x00007265,
0x00030005,0x00000083,0x00000000,0x00030005,0x0000008c,0x00000063,0x00030005,0x00000096,
0x00000069,0x00040005,0x000000a1,0x5f707865,0x006c6176,0x00040005,0x000000ad,0x33667562,
0x00000000,0x00070006,0x000000ad,0x00000000,0x7074756f,0x625f7475,0x65666675,0x00000072,
0x00030005,0x000000af,0x00000000,0x00030005,0x000000c2,0x00000063,0x00030005,0x000000cc,
0x00000069,0x00030005,0x000000d6,0x00000076,0x00040047,0x0000000c,0x0000000b,0x0000001c,
0x00050048,0x00000013,0x00000000,0x00000023,0x00000000,0x00050048,0x00000013,0x00000001,
0x00000023,0x00000004,0x00050048,0x00000013,0x00000002,0x00000023,0x00000008,0x00050048,
0x00000013,0x00000003,0x00000023,0x0000000c,0x00030047,0x00000013,0x00000002,0x00040047,
0x0000003b,0x00000006,0x00000004,0x00050048,0x0000003c,0x00000000,0x00000023,0x00000000,
0x00030047,0x0000003c,0x00000003,0x00040047,0x0000003e,0x00000022,0x00000000,0x00040047,
0x0000003e,0x00000021,0x00000001,0x00040047,0x00000042,0x00000006,0x00000004,0x00040048,
0x00000043,0x00000000,0x00000018,0x00050048,0x00000043,0x00000000,0x00000023,0x00000000,
0x00030047,0x00000043,0x00000003,0x00040047,0x00000045,0x00000022,0x00000000,0x00040047,
0x00000045,0x00000021,0x00000000,0x00040047,0x00000080,0x00000006,0x00000004,0x00050048,
0x00000081,0x00000000,0x00000023,0x00000000,0x00030047,0x00000081,0x00000003,0x00040047,
0x00000083,0x00000022,0x00000000,0x00040047,0x00000083,0x00000021,0x00000002,0x00040047,
0x000000ac,0x00000006,0x00000004,0x00050048,0x000000ad,0x00000000,0x00000023,0x00000000,
0x00030047,0x000000ad,0x00000003,0x00040047,0x000000af,0x00000022,0x00000000,0x00040047,
0x000000af,0x00000021,0x00000003,0x00040047,0x000000f3,0x0000000b,0x00000019,0x00020013,
0x00000002,0x00030021,0x00000003,0x00000002,0x00040015,0x00000006,0x00000020,0x00000001,
0x00040020,0x00000007,0x00000007,0x00000006,0x00040015,0x00000009,0x00000020,0x00000000,
0x00040017,0x0000000a,0x00000009,0x00000003,0x00040020,0x0000000b,0x00000001,0x0000000a,
0x0004003b,0x0000000b,0x0000000c,0x00000001,0x0004002b,0x00000009,0x0000000d,0x00000000,
0x00040020,0x0000000e,0x00000001,0x00000009,0x0006001e,0x00000013,0x00000006,0x00000006,
0x00000006,0x00000006,0x00040020,0x00000014,0x00000009,0x00000013,0x0004003b,0x00000014,
0x00000015,0x00000009,0x0004002b,0x00000006,0x00000016,0x00000001,0x00040020,0x00000017,
0x00000009,0x00000006,0x00020014,0x0000001a,0x0004002b,0x00000006,0x00000021,0x00000002,
0x0004002b,0x00000006,0x00000025,0x00000000,0x00030016,0x0000003a,0x00000020,0x0003001d,
0x0000003b,0x0000003a,0x0003001e,0x0000003c,0x0000003b,0x00040020,0x0000003d,0x00000002,
0x0000003c,0x0004003b,0x0000003d,0x0000003e,0x00000002,0x0003001d,0x00000042,0x0000003a,
0x0003001e,0x00000043,0x00000042,0x00040020,0x00000044,0x00000002,0x00000043,0x0004003b,
0x00000044,0x00000045,0x00000002,0x00040020,0x00000047,0x00000002,0x0000003a,0x0003001d,
0x00000080,0x0000003a,0x0003001e,0x00000081,0x00000080,0x00040020,0x00000082,0x00000002,
0x00000081,0x0004003b,0x00000082,0x00000083,0x00000002,0x0004002b,0x0000003a,0x00000087,
0x00000000,0x00040020,0x000000a0,0x00000007,0x0000003a,0x0003001d,0x000000ac,0x0000003a,
0x0003001e,0x000000ad,0x000000ac,0x00040020,0x000000ae,0x00000002,0x000000ad,0x0004003b,
0x000000ae,0x000000af,0x00000002,0x0004002b,0x00000006,0x000000e0,0x00000003,0x0004002b,
0x00000009,0x000000f1,0x00000100,0x0004002b,0x00000009,0x000000f2,0x00000001,0x0006002c,
0x0000000a,0x000000f3,0x000000f1,0x000000f2,0x000000f2,0x00050036,0x00000002,0x00000004,
0x00000000,0x00000003,0x000200f8,0x00000005,0x0004003b,0x00000007,0x00000008,0x00000007,
0x0004003b,0x00000007,0x0000001f,0x00000007,0x0004003b,0x00000007,0x00000029,0x00000007,
0x0004003b,0x00000007,0x0000002e,0x00000007,0x0004003b,0x00000007,0x00000030,0x00000007,
@ -181,15 +183,20 @@ extern const unsigned int softmax_spv[1440] = {
0x000000db,0x000000cc,0x00050080,0x00000006,0x000000dc,0x000000da,0x000000db,0x00060041,
0x00000047,0x000000dd,0x00000083,0x00000025,0x000000dc,0x0004003d,0x0000003a,0x000000de,
0x000000dd,0x00050088,0x0000003a,0x000000df,0x000000d9,0x000000de,0x0003003e,0x000000d6,
0x000000df,0x0004003d,0x00000006,0x000000e0,0x0000002e,0x0004003d,0x0000003a,0x000000e1,
0x000000d6,0x00060041,0x00000047,0x000000e2,0x000000af,0x00000025,0x000000e0,0x0003003e,
0x000000e2,0x000000e1,0x0004003d,0x00000006,0x000000e3,0x0000002e,0x00050080,0x00000006,
0x000000e4,0x000000e3,0x00000016,0x0003003e,0x0000002e,0x000000e4,0x000200f9,0x000000d0,
0x000200f8,0x000000d0,0x0004003d,0x00000006,0x000000e5,0x000000cc,0x00050080,0x00000006,
0x000000e6,0x000000e5,0x00000016,0x0003003e,0x000000cc,0x000000e6,0x000200f9,0x000000cd,
0x000000df,0x00050041,0x00000017,0x000000e1,0x00000015,0x000000e0,0x0004003d,0x00000006,
0x000000e2,0x000000e1,0x000500aa,0x0000001a,0x000000e3,0x000000e2,0x00000016,0x000300f7,
0x000000e5,0x00000000,0x000400fa,0x000000e3,0x000000e4,0x000000e5,0x000200f8,0x000000e4,
0x0004003d,0x0000003a,0x000000e6,0x000000d6,0x0006000c,0x0000003a,0x000000e7,0x00000001,
0x0000001c,0x000000e6,0x0003003e,0x000000d6,0x000000e7,0x000200f9,0x000000e5,0x000200f8,
0x000000e5,0x0004003d,0x00000006,0x000000e8,0x0000002e,0x0004003d,0x0000003a,0x000000e9,
0x000000d6,0x00060041,0x00000047,0x000000ea,0x000000af,0x00000025,0x000000e8,0x0003003e,
0x000000ea,0x000000e9,0x0004003d,0x00000006,0x000000eb,0x0000002e,0x00050080,0x00000006,
0x000000ec,0x000000eb,0x00000016,0x0003003e,0x0000002e,0x000000ec,0x000200f9,0x000000d0,
0x000200f8,0x000000d0,0x0004003d,0x00000006,0x000000ed,0x000000cc,0x00050080,0x00000006,
0x000000ee,0x000000ed,0x00000016,0x0003003e,0x000000cc,0x000000ee,0x000200f9,0x000000cd,
0x000200f8,0x000000cf,0x000200f9,0x000000c6,0x000200f8,0x000000c6,0x0004003d,0x00000006,
0x000000e7,0x000000c2,0x00050080,0x00000006,0x000000e8,0x000000e7,0x00000016,0x0003003e,
0x000000c2,0x000000e8,0x000200f9,0x000000c3,0x000200f8,0x000000c5,0x000100fd,0x00010038
0x000000ef,0x000000c2,0x00050080,0x00000006,0x000000f0,0x000000ef,0x00000016,0x0003003e,
0x000000c2,0x000000f0,0x000200f9,0x000000c3,0x000200f8,0x000000c5,0x000100fd,0x00010038
};
}}} // namespace cv::dnn::vkcom

@ -16,7 +16,7 @@ extern const unsigned int permute_spv[765];
extern const unsigned int lrn_spv[1845];
extern const unsigned int concat_spv[541];
extern const unsigned int avg_pool_spv[1538];
extern const unsigned int softmax_spv[1440];
extern const unsigned int softmax_spv[1496];
extern const unsigned int prior_box_spv[1480];
extern const unsigned int max_pool_spv[1449];
extern const unsigned int relu_spv[502];

@ -42,7 +42,8 @@ enum ShapeIdx
{ \
if (f != VK_SUCCESS) \
{ \
CV_LOG_WARNING(NULL, "Vulkan check failed"); \
CV_LOG_ERROR(NULL, "Vulkan check failed, result = " << f); \
CV_Error(Error::StsError, "Vulkan check failed"); \
} \
}

@ -22,6 +22,7 @@ struct SoftmaxParam {
int channel_size;
int outer_size;
int channels;
int logsoftmax;
};
OpSoftmax::OpSoftmax(const int axis, const bool log_softmax)
@ -90,7 +91,7 @@ bool OpSoftmax::forward(Tensor& in, Tensor& out)
bindTensor(device_, *max_tensor_, 1, descriptor_set_);
bindTensor(device_, *sum_tensor_, 2, descriptor_set_);
bindTensor(device_, out, 3, descriptor_set_);
SoftmaxParam param = {channel_size_, outer_size_, channels_};
SoftmaxParam param = {channel_size_, outer_size_, channels_, log_softmax_ == true ? 1 : 0};
recordCommandBuffer((void *)&param, sizeof(SoftmaxParam));
runCommandBuffer();
return true;

@ -17,7 +17,7 @@
namespace cv { namespace dnn { namespace vkcom {
#include "function_list.inl"
#include "function_list.inl.hpp"
}}} // namespace cv::dnn::vkcom
#endif // HAVE_VULKAN

@ -20,7 +20,7 @@
namespace cv { namespace dnn { namespace vkcom {
#include "function_list.inl"
#include "function_list.inl.hpp"
}}} // namespace cv::dnn::vkcom
#endif // HAVE_VULKAN

@ -57,7 +57,7 @@ bool loadVulkanFunctions(VkInstance& instance)
return false; \
}
#include "function_list.inl"
#include "function_list.inl.hpp"
return true;
}
@ -74,7 +74,7 @@ bool loadVulkanGlobalFunctions()
return false; \
}
#include "function_list.inl"
#include "function_list.inl.hpp"
return true;
}

Loading…
Cancel
Save