From 7fff245f87a49e5f65f499e9f71b44cae1888deb Mon Sep 17 00:00:00 2001 From: Wu Zhiwen Date: Thu, 18 Oct 2018 20:22:42 +0800 Subject: [PATCH 1/4] dnn/Vulkan: Rename function_list.inl Signed-off-by: Wu Zhiwen --- .../vkcom/vulkan/{function_list.inl => function_list.inl.hpp} | 0 modules/dnn/src/vkcom/vulkan/vk_functions.cpp | 2 +- modules/dnn/src/vkcom/vulkan/vk_functions.hpp | 2 +- modules/dnn/src/vkcom/vulkan/vk_loader.cpp | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) rename modules/dnn/src/vkcom/vulkan/{function_list.inl => function_list.inl.hpp} (100%) diff --git a/modules/dnn/src/vkcom/vulkan/function_list.inl b/modules/dnn/src/vkcom/vulkan/function_list.inl.hpp similarity index 100% rename from modules/dnn/src/vkcom/vulkan/function_list.inl rename to modules/dnn/src/vkcom/vulkan/function_list.inl.hpp diff --git a/modules/dnn/src/vkcom/vulkan/vk_functions.cpp b/modules/dnn/src/vkcom/vulkan/vk_functions.cpp index d43090ee1f..3408a08f44 100644 --- a/modules/dnn/src/vkcom/vulkan/vk_functions.cpp +++ b/modules/dnn/src/vkcom/vulkan/vk_functions.cpp @@ -17,7 +17,7 @@ namespace cv { namespace dnn { namespace vkcom { -#include "function_list.inl" +#include "function_list.inl.hpp" }}} // namespace cv::dnn::vkcom #endif // HAVE_VULKAN diff --git a/modules/dnn/src/vkcom/vulkan/vk_functions.hpp b/modules/dnn/src/vkcom/vulkan/vk_functions.hpp index 02e3bd69ac..a6049c9e59 100644 --- a/modules/dnn/src/vkcom/vulkan/vk_functions.hpp +++ b/modules/dnn/src/vkcom/vulkan/vk_functions.hpp @@ -20,7 +20,7 @@ namespace cv { namespace dnn { namespace vkcom { -#include "function_list.inl" +#include "function_list.inl.hpp" }}} // namespace cv::dnn::vkcom #endif // HAVE_VULKAN diff --git a/modules/dnn/src/vkcom/vulkan/vk_loader.cpp b/modules/dnn/src/vkcom/vulkan/vk_loader.cpp index da7f865adb..9b1cdceab8 100644 --- a/modules/dnn/src/vkcom/vulkan/vk_loader.cpp +++ b/modules/dnn/src/vkcom/vulkan/vk_loader.cpp @@ -57,7 +57,7 @@ bool loadVulkanFunctions(VkInstance& instance) return false; \ } -#include "function_list.inl" +#include "function_list.inl.hpp" return true; } @@ -74,7 +74,7 @@ bool loadVulkanGlobalFunctions() return false; \ } -#include "function_list.inl" +#include "function_list.inl.hpp" return true; } From 3914c17b0de0a2eeb3e551158c17d7a7b507d6f2 Mon Sep 17 00:00:00 2001 From: Wu Zhiwen Date: Mon, 29 Oct 2018 14:32:09 +0800 Subject: [PATCH 2/4] dnn/Vulkan: Refine error handle mechanism Fallback to OPENCV backend and CPU target if catch exception from vkcom backend. Signed-off-by: Wu Zhiwen --- modules/dnn/src/dnn.cpp | 24 +++++++++++++++++++++--- modules/dnn/src/vkcom/src/common.hpp | 3 ++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index bec49f4e70..cda864980f 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1412,8 +1412,17 @@ struct Net::Impl } ld.skip = false; - ld.backendNodes[DNN_BACKEND_VKCOM] = - layer->initVkCom(ld.inputBlobsWrappers); + + try + { + ld.backendNodes[DNN_BACKEND_VKCOM] = + layer->initVkCom(ld.inputBlobsWrappers); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what()); + ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr(); + } } #endif } @@ -2318,7 +2327,16 @@ struct Net::Impl } else if (preferableBackend == DNN_BACKEND_VKCOM) { - forwardVkCom(ld.outputBlobsWrappers, node); + try + { + forwardVkCom(ld.outputBlobsWrappers, node); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what()); + it->second = Ptr(); + forwardLayer(ld); + } } else { diff --git a/modules/dnn/src/vkcom/src/common.hpp b/modules/dnn/src/vkcom/src/common.hpp index 5465a1a83a..c976f87f7e 100644 --- a/modules/dnn/src/vkcom/src/common.hpp +++ b/modules/dnn/src/vkcom/src/common.hpp @@ -42,7 +42,8 @@ enum ShapeIdx { \ if (f != VK_SUCCESS) \ { \ - CV_LOG_WARNING(NULL, "Vulkan check failed"); \ + CV_LOG_ERROR(NULL, "Vulkan check failed, result = " << f); \ + CV_Error(Error::StsError, "Vulkan check failed"); \ } \ } From 34e9d1eb3cd6c68140e138eef6d0407269326780 Mon Sep 17 00:00:00 2001 From: Wu Zhiwen Date: Mon, 29 Oct 2018 15:58:54 +0800 Subject: [PATCH 3/4] dnn/Vulkan: support log softmax Signed-off-by: Wu Zhiwen --- modules/dnn/src/vkcom/shader/softmax.comp | 6 +- modules/dnn/src/vkcom/shader/softmax_spv.cpp | 121 ++++++++++--------- modules/dnn/src/vkcom/shader/spv_shader.hpp | 2 +- modules/dnn/src/vkcom/src/op_softmax.cpp | 3 +- 4 files changed, 70 insertions(+), 62 deletions(-) diff --git a/modules/dnn/src/vkcom/shader/softmax.comp b/modules/dnn/src/vkcom/shader/softmax.comp index f1f83df087..f73ddb3ffb 100644 --- a/modules/dnn/src/vkcom/shader/softmax.comp +++ b/modules/dnn/src/vkcom/shader/softmax.comp @@ -18,6 +18,7 @@ layout(push_constant) uniform pushBlock { int channel_size; int outer_size; int channels; + int logsoftmax; } p; layout(local_size_x = LOCAL_SZ_X, local_size_y = 1, local_size_z = 1) in; @@ -68,9 +69,8 @@ void main() for (int i = 0; i < p.channel_size; ++i) { float v = output_buffer[index] / sum_buffer[reduced_buffer_off + i]; -#ifdef LOG_SOFTMAX - v = log(v); -#endif + if (p.logsoftmax == 1) + v = log(v); output_buffer[index] = v; index++; } diff --git a/modules/dnn/src/vkcom/shader/softmax_spv.cpp b/modules/dnn/src/vkcom/shader/softmax_spv.cpp index d95a451288..f51dce5788 100644 --- a/modules/dnn/src/vkcom/shader/softmax_spv.cpp +++ b/modules/dnn/src/vkcom/shader/softmax_spv.cpp @@ -9,8 +9,8 @@ namespace cv { namespace dnn { namespace vkcom { -extern const unsigned int softmax_spv[1440] = { - 0x07230203,0x00010000,0x00080001,0x000000ec,0x00000000,0x00020011,0x00000001,0x0006000b, +extern const unsigned int softmax_spv[1496] = { + 0x07230203,0x00010000,0x00080001,0x000000f4,0x00000000,0x00020011,0x00000001,0x0006000b, 0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e,0x00000000,0x00000001, 0x0006000f,0x00000005,0x00000004,0x6e69616d,0x00000000,0x0000000c,0x00060010,0x00000004, 0x00000011,0x00000100,0x00000001,0x00000001,0x00030003,0x00000002,0x000001c2,0x00040005, @@ -18,53 +18,55 @@ extern const unsigned int softmax_spv[1440] = { 0x475f6c67,0x61626f6c,0x766e496c,0x7461636f,0x496e6f69,0x00000044,0x00050005,0x00000013, 0x68737570,0x636f6c42,0x0000006b,0x00070006,0x00000013,0x00000000,0x6e616863,0x5f6c656e, 0x657a6973,0x00000000,0x00060006,0x00000013,0x00000001,0x6574756f,0x69735f72,0x0000657a, - 0x00060006,0x00000013,0x00000002,0x6e616863,0x736c656e,0x00000000,0x00030005,0x00000015, - 0x00000070,0x00050005,0x0000001f,0x626f6c67,0x6f5f6c61,0x00006666,0x00070005,0x00000029, - 0x75646572,0x5f646563,0x66667562,0x6f5f7265,0x00006666,0x00040005,0x0000002e,0x65646e69, - 0x00000078,0x00030005,0x00000030,0x00000069,0x00040005,0x0000003c,0x31667562,0x00000000, - 0x00060006,0x0000003c,0x00000000,0x5f78616d,0x66667562,0x00007265,0x00030005,0x0000003e, - 0x00000000,0x00040005,0x00000043,0x30667562,0x00000000,0x00070006,0x00000043,0x00000000, - 0x75706e69,0x75625f74,0x72656666,0x00000000,0x00030005,0x00000045,0x00000000,0x00030005, - 0x0000004f,0x00000063,0x00030005,0x00000059,0x00000069,0x00030005,0x00000076,0x00000069, - 0x00040005,0x00000081,0x32667562,0x00000000,0x00060006,0x00000081,0x00000000,0x5f6d7573, - 0x66667562,0x00007265,0x00030005,0x00000083,0x00000000,0x00030005,0x0000008c,0x00000063, - 0x00030005,0x00000096,0x00000069,0x00040005,0x000000a1,0x5f707865,0x006c6176,0x00040005, - 0x000000ad,0x33667562,0x00000000,0x00070006,0x000000ad,0x00000000,0x7074756f,0x625f7475, - 0x65666675,0x00000072,0x00030005,0x000000af,0x00000000,0x00030005,0x000000c2,0x00000063, - 0x00030005,0x000000cc,0x00000069,0x00030005,0x000000d6,0x00000076,0x00040047,0x0000000c, - 0x0000000b,0x0000001c,0x00050048,0x00000013,0x00000000,0x00000023,0x00000000,0x00050048, - 0x00000013,0x00000001,0x00000023,0x00000004,0x00050048,0x00000013,0x00000002,0x00000023, - 0x00000008,0x00030047,0x00000013,0x00000002,0x00040047,0x0000003b,0x00000006,0x00000004, - 0x00050048,0x0000003c,0x00000000,0x00000023,0x00000000,0x00030047,0x0000003c,0x00000003, - 0x00040047,0x0000003e,0x00000022,0x00000000,0x00040047,0x0000003e,0x00000021,0x00000001, - 0x00040047,0x00000042,0x00000006,0x00000004,0x00040048,0x00000043,0x00000000,0x00000018, - 0x00050048,0x00000043,0x00000000,0x00000023,0x00000000,0x00030047,0x00000043,0x00000003, - 0x00040047,0x00000045,0x00000022,0x00000000,0x00040047,0x00000045,0x00000021,0x00000000, - 0x00040047,0x00000080,0x00000006,0x00000004,0x00050048,0x00000081,0x00000000,0x00000023, - 0x00000000,0x00030047,0x00000081,0x00000003,0x00040047,0x00000083,0x00000022,0x00000000, - 0x00040047,0x00000083,0x00000021,0x00000002,0x00040047,0x000000ac,0x00000006,0x00000004, - 0x00050048,0x000000ad,0x00000000,0x00000023,0x00000000,0x00030047,0x000000ad,0x00000003, - 0x00040047,0x000000af,0x00000022,0x00000000,0x00040047,0x000000af,0x00000021,0x00000003, - 0x00040047,0x000000eb,0x0000000b,0x00000019,0x00020013,0x00000002,0x00030021,0x00000003, - 0x00000002,0x00040015,0x00000006,0x00000020,0x00000001,0x00040020,0x00000007,0x00000007, - 0x00000006,0x00040015,0x00000009,0x00000020,0x00000000,0x00040017,0x0000000a,0x00000009, - 0x00000003,0x00040020,0x0000000b,0x00000001,0x0000000a,0x0004003b,0x0000000b,0x0000000c, - 0x00000001,0x0004002b,0x00000009,0x0000000d,0x00000000,0x00040020,0x0000000e,0x00000001, - 0x00000009,0x0005001e,0x00000013,0x00000006,0x00000006,0x00000006,0x00040020,0x00000014, - 0x00000009,0x00000013,0x0004003b,0x00000014,0x00000015,0x00000009,0x0004002b,0x00000006, - 0x00000016,0x00000001,0x00040020,0x00000017,0x00000009,0x00000006,0x00020014,0x0000001a, - 0x0004002b,0x00000006,0x00000021,0x00000002,0x0004002b,0x00000006,0x00000025,0x00000000, - 0x00030016,0x0000003a,0x00000020,0x0003001d,0x0000003b,0x0000003a,0x0003001e,0x0000003c, - 0x0000003b,0x00040020,0x0000003d,0x00000002,0x0000003c,0x0004003b,0x0000003d,0x0000003e, - 0x00000002,0x0003001d,0x00000042,0x0000003a,0x0003001e,0x00000043,0x00000042,0x00040020, - 0x00000044,0x00000002,0x00000043,0x0004003b,0x00000044,0x00000045,0x00000002,0x00040020, - 0x00000047,0x00000002,0x0000003a,0x0003001d,0x00000080,0x0000003a,0x0003001e,0x00000081, - 0x00000080,0x00040020,0x00000082,0x00000002,0x00000081,0x0004003b,0x00000082,0x00000083, - 0x00000002,0x0004002b,0x0000003a,0x00000087,0x00000000,0x00040020,0x000000a0,0x00000007, - 0x0000003a,0x0003001d,0x000000ac,0x0000003a,0x0003001e,0x000000ad,0x000000ac,0x00040020, - 0x000000ae,0x00000002,0x000000ad,0x0004003b,0x000000ae,0x000000af,0x00000002,0x0004002b, - 0x00000009,0x000000e9,0x00000100,0x0004002b,0x00000009,0x000000ea,0x00000001,0x0006002c, - 0x0000000a,0x000000eb,0x000000e9,0x000000ea,0x000000ea,0x00050036,0x00000002,0x00000004, + 0x00060006,0x00000013,0x00000002,0x6e616863,0x736c656e,0x00000000,0x00060006,0x00000013, + 0x00000003,0x73676f6c,0x6d74666f,0x00007861,0x00030005,0x00000015,0x00000070,0x00050005, + 0x0000001f,0x626f6c67,0x6f5f6c61,0x00006666,0x00070005,0x00000029,0x75646572,0x5f646563, + 0x66667562,0x6f5f7265,0x00006666,0x00040005,0x0000002e,0x65646e69,0x00000078,0x00030005, + 0x00000030,0x00000069,0x00040005,0x0000003c,0x31667562,0x00000000,0x00060006,0x0000003c, + 0x00000000,0x5f78616d,0x66667562,0x00007265,0x00030005,0x0000003e,0x00000000,0x00040005, + 0x00000043,0x30667562,0x00000000,0x00070006,0x00000043,0x00000000,0x75706e69,0x75625f74, + 0x72656666,0x00000000,0x00030005,0x00000045,0x00000000,0x00030005,0x0000004f,0x00000063, + 0x00030005,0x00000059,0x00000069,0x00030005,0x00000076,0x00000069,0x00040005,0x00000081, + 0x32667562,0x00000000,0x00060006,0x00000081,0x00000000,0x5f6d7573,0x66667562,0x00007265, + 0x00030005,0x00000083,0x00000000,0x00030005,0x0000008c,0x00000063,0x00030005,0x00000096, + 0x00000069,0x00040005,0x000000a1,0x5f707865,0x006c6176,0x00040005,0x000000ad,0x33667562, + 0x00000000,0x00070006,0x000000ad,0x00000000,0x7074756f,0x625f7475,0x65666675,0x00000072, + 0x00030005,0x000000af,0x00000000,0x00030005,0x000000c2,0x00000063,0x00030005,0x000000cc, + 0x00000069,0x00030005,0x000000d6,0x00000076,0x00040047,0x0000000c,0x0000000b,0x0000001c, + 0x00050048,0x00000013,0x00000000,0x00000023,0x00000000,0x00050048,0x00000013,0x00000001, + 0x00000023,0x00000004,0x00050048,0x00000013,0x00000002,0x00000023,0x00000008,0x00050048, + 0x00000013,0x00000003,0x00000023,0x0000000c,0x00030047,0x00000013,0x00000002,0x00040047, + 0x0000003b,0x00000006,0x00000004,0x00050048,0x0000003c,0x00000000,0x00000023,0x00000000, + 0x00030047,0x0000003c,0x00000003,0x00040047,0x0000003e,0x00000022,0x00000000,0x00040047, + 0x0000003e,0x00000021,0x00000001,0x00040047,0x00000042,0x00000006,0x00000004,0x00040048, + 0x00000043,0x00000000,0x00000018,0x00050048,0x00000043,0x00000000,0x00000023,0x00000000, + 0x00030047,0x00000043,0x00000003,0x00040047,0x00000045,0x00000022,0x00000000,0x00040047, + 0x00000045,0x00000021,0x00000000,0x00040047,0x00000080,0x00000006,0x00000004,0x00050048, + 0x00000081,0x00000000,0x00000023,0x00000000,0x00030047,0x00000081,0x00000003,0x00040047, + 0x00000083,0x00000022,0x00000000,0x00040047,0x00000083,0x00000021,0x00000002,0x00040047, + 0x000000ac,0x00000006,0x00000004,0x00050048,0x000000ad,0x00000000,0x00000023,0x00000000, + 0x00030047,0x000000ad,0x00000003,0x00040047,0x000000af,0x00000022,0x00000000,0x00040047, + 0x000000af,0x00000021,0x00000003,0x00040047,0x000000f3,0x0000000b,0x00000019,0x00020013, + 0x00000002,0x00030021,0x00000003,0x00000002,0x00040015,0x00000006,0x00000020,0x00000001, + 0x00040020,0x00000007,0x00000007,0x00000006,0x00040015,0x00000009,0x00000020,0x00000000, + 0x00040017,0x0000000a,0x00000009,0x00000003,0x00040020,0x0000000b,0x00000001,0x0000000a, + 0x0004003b,0x0000000b,0x0000000c,0x00000001,0x0004002b,0x00000009,0x0000000d,0x00000000, + 0x00040020,0x0000000e,0x00000001,0x00000009,0x0006001e,0x00000013,0x00000006,0x00000006, + 0x00000006,0x00000006,0x00040020,0x00000014,0x00000009,0x00000013,0x0004003b,0x00000014, + 0x00000015,0x00000009,0x0004002b,0x00000006,0x00000016,0x00000001,0x00040020,0x00000017, + 0x00000009,0x00000006,0x00020014,0x0000001a,0x0004002b,0x00000006,0x00000021,0x00000002, + 0x0004002b,0x00000006,0x00000025,0x00000000,0x00030016,0x0000003a,0x00000020,0x0003001d, + 0x0000003b,0x0000003a,0x0003001e,0x0000003c,0x0000003b,0x00040020,0x0000003d,0x00000002, + 0x0000003c,0x0004003b,0x0000003d,0x0000003e,0x00000002,0x0003001d,0x00000042,0x0000003a, + 0x0003001e,0x00000043,0x00000042,0x00040020,0x00000044,0x00000002,0x00000043,0x0004003b, + 0x00000044,0x00000045,0x00000002,0x00040020,0x00000047,0x00000002,0x0000003a,0x0003001d, + 0x00000080,0x0000003a,0x0003001e,0x00000081,0x00000080,0x00040020,0x00000082,0x00000002, + 0x00000081,0x0004003b,0x00000082,0x00000083,0x00000002,0x0004002b,0x0000003a,0x00000087, + 0x00000000,0x00040020,0x000000a0,0x00000007,0x0000003a,0x0003001d,0x000000ac,0x0000003a, + 0x0003001e,0x000000ad,0x000000ac,0x00040020,0x000000ae,0x00000002,0x000000ad,0x0004003b, + 0x000000ae,0x000000af,0x00000002,0x0004002b,0x00000006,0x000000e0,0x00000003,0x0004002b, + 0x00000009,0x000000f1,0x00000100,0x0004002b,0x00000009,0x000000f2,0x00000001,0x0006002c, + 0x0000000a,0x000000f3,0x000000f1,0x000000f2,0x000000f2,0x00050036,0x00000002,0x00000004, 0x00000000,0x00000003,0x000200f8,0x00000005,0x0004003b,0x00000007,0x00000008,0x00000007, 0x0004003b,0x00000007,0x0000001f,0x00000007,0x0004003b,0x00000007,0x00000029,0x00000007, 0x0004003b,0x00000007,0x0000002e,0x00000007,0x0004003b,0x00000007,0x00000030,0x00000007, @@ -181,15 +183,20 @@ extern const unsigned int softmax_spv[1440] = { 0x000000db,0x000000cc,0x00050080,0x00000006,0x000000dc,0x000000da,0x000000db,0x00060041, 0x00000047,0x000000dd,0x00000083,0x00000025,0x000000dc,0x0004003d,0x0000003a,0x000000de, 0x000000dd,0x00050088,0x0000003a,0x000000df,0x000000d9,0x000000de,0x0003003e,0x000000d6, - 0x000000df,0x0004003d,0x00000006,0x000000e0,0x0000002e,0x0004003d,0x0000003a,0x000000e1, - 0x000000d6,0x00060041,0x00000047,0x000000e2,0x000000af,0x00000025,0x000000e0,0x0003003e, - 0x000000e2,0x000000e1,0x0004003d,0x00000006,0x000000e3,0x0000002e,0x00050080,0x00000006, - 0x000000e4,0x000000e3,0x00000016,0x0003003e,0x0000002e,0x000000e4,0x000200f9,0x000000d0, - 0x000200f8,0x000000d0,0x0004003d,0x00000006,0x000000e5,0x000000cc,0x00050080,0x00000006, - 0x000000e6,0x000000e5,0x00000016,0x0003003e,0x000000cc,0x000000e6,0x000200f9,0x000000cd, + 0x000000df,0x00050041,0x00000017,0x000000e1,0x00000015,0x000000e0,0x0004003d,0x00000006, + 0x000000e2,0x000000e1,0x000500aa,0x0000001a,0x000000e3,0x000000e2,0x00000016,0x000300f7, + 0x000000e5,0x00000000,0x000400fa,0x000000e3,0x000000e4,0x000000e5,0x000200f8,0x000000e4, + 0x0004003d,0x0000003a,0x000000e6,0x000000d6,0x0006000c,0x0000003a,0x000000e7,0x00000001, + 0x0000001c,0x000000e6,0x0003003e,0x000000d6,0x000000e7,0x000200f9,0x000000e5,0x000200f8, + 0x000000e5,0x0004003d,0x00000006,0x000000e8,0x0000002e,0x0004003d,0x0000003a,0x000000e9, + 0x000000d6,0x00060041,0x00000047,0x000000ea,0x000000af,0x00000025,0x000000e8,0x0003003e, + 0x000000ea,0x000000e9,0x0004003d,0x00000006,0x000000eb,0x0000002e,0x00050080,0x00000006, + 0x000000ec,0x000000eb,0x00000016,0x0003003e,0x0000002e,0x000000ec,0x000200f9,0x000000d0, + 0x000200f8,0x000000d0,0x0004003d,0x00000006,0x000000ed,0x000000cc,0x00050080,0x00000006, + 0x000000ee,0x000000ed,0x00000016,0x0003003e,0x000000cc,0x000000ee,0x000200f9,0x000000cd, 0x000200f8,0x000000cf,0x000200f9,0x000000c6,0x000200f8,0x000000c6,0x0004003d,0x00000006, - 0x000000e7,0x000000c2,0x00050080,0x00000006,0x000000e8,0x000000e7,0x00000016,0x0003003e, - 0x000000c2,0x000000e8,0x000200f9,0x000000c3,0x000200f8,0x000000c5,0x000100fd,0x00010038 + 0x000000ef,0x000000c2,0x00050080,0x00000006,0x000000f0,0x000000ef,0x00000016,0x0003003e, + 0x000000c2,0x000000f0,0x000200f9,0x000000c3,0x000200f8,0x000000c5,0x000100fd,0x00010038 }; }}} // namespace cv::dnn::vkcom diff --git a/modules/dnn/src/vkcom/shader/spv_shader.hpp b/modules/dnn/src/vkcom/shader/spv_shader.hpp index cdc96ce6e2..2a45aac787 100644 --- a/modules/dnn/src/vkcom/shader/spv_shader.hpp +++ b/modules/dnn/src/vkcom/shader/spv_shader.hpp @@ -16,7 +16,7 @@ extern const unsigned int permute_spv[765]; extern const unsigned int lrn_spv[1845]; extern const unsigned int concat_spv[541]; extern const unsigned int avg_pool_spv[1538]; -extern const unsigned int softmax_spv[1440]; +extern const unsigned int softmax_spv[1496]; extern const unsigned int prior_box_spv[1480]; extern const unsigned int max_pool_spv[1449]; extern const unsigned int relu_spv[502]; diff --git a/modules/dnn/src/vkcom/src/op_softmax.cpp b/modules/dnn/src/vkcom/src/op_softmax.cpp index f0b20f8a64..aca0aca5ac 100644 --- a/modules/dnn/src/vkcom/src/op_softmax.cpp +++ b/modules/dnn/src/vkcom/src/op_softmax.cpp @@ -22,6 +22,7 @@ struct SoftmaxParam { int channel_size; int outer_size; int channels; + int logsoftmax; }; OpSoftmax::OpSoftmax(const int axis, const bool log_softmax) @@ -90,7 +91,7 @@ bool OpSoftmax::forward(Tensor& in, Tensor& out) bindTensor(device_, *max_tensor_, 1, descriptor_set_); bindTensor(device_, *sum_tensor_, 2, descriptor_set_); bindTensor(device_, out, 3, descriptor_set_); - SoftmaxParam param = {channel_size_, outer_size_, channels_}; + SoftmaxParam param = {channel_size_, outer_size_, channels_, log_softmax_ == true ? 1 : 0}; recordCommandBuffer((void *)¶m, sizeof(SoftmaxParam)); runCommandBuffer(); return true; From 33c9d57c6f04ae8d07876039d7da787e5fb048d0 Mon Sep 17 00:00:00 2001 From: Wu Zhiwen Date: Tue, 6 Nov 2018 20:24:00 +0800 Subject: [PATCH 4/4] dnn/Vulkan: skip heavy convolution task This is a workaround for GPU hang on heavy convolution workload (> 10 GFLOPS). e.g. ResNet101_DUC_HDC For the long time task, vkWaitForFences() return without error but next call on vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang. Need more investigation on root cause of GPU hang and need to optimize convolution shader to reduce process time. --- modules/dnn/src/dnn.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index cda864980f..d0dc9dfb28 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1411,6 +1411,32 @@ struct Net::Impl continue; } + if (ld.type == "Convolution") + { + std::vector in_shapes; + std::vector out_shapes; + CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size()); + + for (int i = 0; i < ld.inputBlobs.size(); i++) + { + in_shapes.push_back(shape(*ld.inputBlobs[i])); + out_shapes.push_back(shape(ld.outputBlobs[i])); + } + int64 flops = layer->getFLOPS(in_shapes, out_shapes); + // FIXME + // + // This is a workaround for GPU hang on heavy convolution workload ( > 10 GFLOPS). + // For the long time task, vkWaitForFences() return without error but next call on + // vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang. + // + // Need more investigation on root cause of GPU hang and need to optimize convolution shader + // to reduce process time. + if (flops > CV_BIG_INT(10) * 1000 * 1000 * 1000) + { + continue; + } + } + ld.skip = false; try