mirror of https://github.com/opencv/opencv.git
Merge pull request #24768 from Haosonn:pre-pr-2
Vulkan backend for NaryEltwiseLayer in DNN module #24768 We improve Vulkan backend for ``NaryEltwiseLayer`` in DNN module by: - add a basic framework for Vulkan backend in ``NaryEltwiseLayer`` - add a compute shader for binary forwarding (an imitation of what has been done in native OpenCV backend including broadcasting and eltwise-operation) - typo fixed: - Wrong info output in ``context.cpp`` Currently, our implementation (or all layers supporting Vulkan backend) runs pretty slow on discrete GPUs basically due to IO cost in function ``copyToHost``, and we are going to fix that by - find out the best ``VkMemoryProperty`` for various discrete GPUs - prevent ``copyToHost`` in middle layers during forwarding, (i.e keep data in GPU memory) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake Co-authored-by: IskXCr <IskXCr@outlook.com>pull/24927/head
parent
03994163b5
commit
87f749277d
9 changed files with 824 additions and 232 deletions
@ -0,0 +1,87 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_OP_NARY_HPP |
||||
#define OPENCV_OP_NARY_HPP |
||||
|
||||
#include "vkcom.hpp" |
||||
#include "op_base.hpp" |
||||
|
||||
namespace cv { namespace dnn { namespace vkcom { |
||||
|
||||
#ifdef HAVE_VULKAN |
||||
|
||||
enum NaryShaderType |
||||
{ |
||||
kNaryShaderTypeBinary, |
||||
kNaryShaderTypeTrinary, |
||||
kNaryShaderTypeNary, |
||||
kNaryShaderTest, |
||||
}; |
||||
|
||||
struct NaryShaderConfig |
||||
{ |
||||
int local_size_x; |
||||
int local_size_y; |
||||
int local_size_z; |
||||
}; |
||||
|
||||
|
||||
class OpNary : public OpBase |
||||
{ |
||||
public: |
||||
// Copied from nary_eltwise_layers.cpp
|
||||
enum class OPERATION |
||||
{ |
||||
AND = 0, |
||||
EQUAL, |
||||
GREATER, |
||||
GREATER_EQUAL, |
||||
LESS, |
||||
LESS_EQUAL, |
||||
OR, |
||||
POW, |
||||
XOR, |
||||
BITSHIFT, |
||||
MAX, |
||||
MEAN, |
||||
MIN, |
||||
MOD, |
||||
PROD, |
||||
SUB, |
||||
SUM, |
||||
ADD, |
||||
DIV, |
||||
WHERE, |
||||
}; |
||||
|
||||
OpNary(const OPERATION naryOpType, int ninputs, int max_ndims, const std::vector<std::vector<int>> shapes, const std::vector<std::vector<size_t>> steps); |
||||
|
||||
void firstForward(); // Execute only in the first forward.
|
||||
virtual bool forward(std::vector<Tensor>& ins, std::vector<Tensor>& outs) CV_OVERRIDE; |
||||
Ptr<Tensor> weightTensorPtr; |
||||
private: |
||||
bool computeGroupCount(); |
||||
bool binaryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs); |
||||
bool trinaryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs); |
||||
bool naryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs); |
||||
|
||||
const OPERATION naryOpType; |
||||
NaryShaderType shaderType; |
||||
NaryShaderConfig config; |
||||
int ninputs; |
||||
int max_ndims; |
||||
AutoBuffer<int32_t> shapesBuf; |
||||
AutoBuffer<int32_t> stepsBuf; |
||||
int nplanes; // number of planes computations are to be performed on
|
||||
int N2; // value of shape[ndims - 2]
|
||||
int N1; // value of shape[ndims - 1]
|
||||
|
||||
bool firstForwardFinsh = false; |
||||
}; |
||||
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
}}} // namespace cv::dnn::vkcom
|
||||
#endif //OPENCV_OP_MATMUL_HPP
|
@ -0,0 +1,116 @@ |
||||
#version 450 |
||||
// #extension GL_EXT_debug_printf : enable |
||||
#define ALL_THREAD 1024 |
||||
// #define ALL_THREAD 128 // Experimental batched operation |
||||
#define STEP_SIZE 65536 |
||||
|
||||
layout(binding = 0) readonly buffer Input1{ |
||||
float matA[]; |
||||
}; |
||||
|
||||
layout(binding = 1) readonly buffer Input2{ |
||||
float matB[]; |
||||
}; |
||||
|
||||
layout(binding = 2) writeonly buffer Output{ |
||||
float matOut[]; |
||||
}; |
||||
|
||||
layout(binding = 3) uniform Params { |
||||
int opType; |
||||
int ndims; |
||||
} params; |
||||
|
||||
layout(binding = 4) readonly buffer Shape { |
||||
int shape[]; |
||||
}; |
||||
|
||||
layout(binding = 5) readonly buffer Step { |
||||
int matStep[]; |
||||
}; |
||||
|
||||
/* local_size_x, local_size_y, local_size_z there defines the number of invocations |
||||
of this compute shader in the current work group. */ |
||||
// TODO: Check if this makes any sense |
||||
// TODO: Check if it is required to fetch PhysicalDeviceLimit from Context |
||||
// TODO: here we shall assume that maxGroupInvocation is 1024. |
||||
layout(local_size_x = ALL_THREAD, local_size_y = 1, local_size_z = 1) in; // TODO: Check if this makes any sense |
||||
|
||||
const int AND = 0; |
||||
const int EQUAL = 1; |
||||
const int GREATER = 2; |
||||
const int GREATER_EQUAL = 3; |
||||
const int LESS = 4; |
||||
const int LESS_EQUAL = 5; |
||||
const int OR = 6; |
||||
const int POW = 7; |
||||
const int XOR = 8; |
||||
const int BITSHIFT = 9; |
||||
const int MAX = 10; |
||||
const int MEAN = 11; |
||||
const int MIN = 12; |
||||
const int MOD = 13; |
||||
const int FMOD = 14; |
||||
const int PROD = 15; |
||||
const int SUB = 16; |
||||
const int SUM = 17; |
||||
const int ADD = 18; |
||||
const int DIV = 19; |
||||
const int WHERE = 20; |
||||
|
||||
void binary_forward() |
||||
{ |
||||
int ndims = params.ndims; |
||||
int dp1 = matStep[2 * ndims - 1]; |
||||
int dp2 = matStep[3 * ndims - 1]; |
||||
int dp = matStep[ndims - 1]; |
||||
int n1 = shape[ndims - 1], n2 = shape[ndims - 2]; |
||||
|
||||
int plane_idx = int(gl_WorkGroupID.x); |
||||
|
||||
int ptr1 = 0; |
||||
int ptr2 = 0; |
||||
int ptr = 0; |
||||
int idx = plane_idx; |
||||
|
||||
for (int k = ndims - 3; k >= 0; --k) { |
||||
int next_idx = idx / shape[k]; |
||||
int i_k = idx - next_idx * shape[k]; // i_k = idx % shape[k] |
||||
ptr1 += i_k * matStep[ndims + k]; |
||||
ptr2 += i_k * matStep[2 * ndims + k]; |
||||
ptr += i_k * matStep[k]; |
||||
idx = next_idx; |
||||
} |
||||
|
||||
int i2_offset = int(gl_WorkGroupID.y); |
||||
int i1_offset = int(gl_LocalInvocationID.x); |
||||
|
||||
ptr1 += i2_offset * matStep[2 * ndims - 2]; |
||||
ptr2 += i2_offset * matStep[3 * ndims - 2]; |
||||
ptr += i2_offset * matStep[ndims - 2]; |
||||
|
||||
for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) { |
||||
switch (params.opType) { |
||||
case int(ADD): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
case int(SUB): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] - matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
case int(PROD): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] * matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
case int(DIV): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] / matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
void main() |
||||
{ |
||||
// debugPrintfEXT("nary_eltwise_binary_forward.comp loaded\n"); |
||||
binary_forward(); |
||||
return; |
||||
} |
@ -0,0 +1,232 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../../precomp.hpp" |
||||
|
||||
namespace cv { namespace dnn { namespace vkcom { |
||||
|
||||
extern const unsigned int nary_eltwise_binary_forward_spv[1757] = { |
||||
0x07230203,0x00010000,0x0008000b,0x00000131,0x00000000,0x00020011,0x00000001,0x0006000b, |
||||
0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e,0x00000000,0x00000001, |
||||
0x0007000f,0x00000005,0x00000004,0x6e69616d,0x00000000,0x0000003c,0x00000083,0x00060010, |
||||
0x00000004,0x00000011,0x00000400,0x00000001,0x00000001,0x00030003,0x00000002,0x000001c2, |
||||
0x00040005,0x00000004,0x6e69616d,0x00000000,0x00060005,0x00000006,0x616e6962,0x665f7972, |
||||
0x6177726f,0x00286472,0x00040005,0x0000000a,0x6d69646e,0x00000073,0x00040005,0x0000000b, |
||||
0x61726150,0x0000736d,0x00050006,0x0000000b,0x00000000,0x7954706f,0x00006570,0x00050006, |
||||
0x0000000b,0x00000001,0x6d69646e,0x00000073,0x00040005,0x0000000d,0x61726170,0x0000736d, |
||||
0x00030005,0x00000012,0x00317064,0x00040005,0x00000014,0x70657453,0x00000000,0x00050006, |
||||
0x00000014,0x00000000,0x5374616d,0x00706574,0x00030005,0x00000016,0x00000000,0x00030005, |
||||
0x0000001e,0x00327064,0x00030005,0x00000025,0x00007064,0x00030005,0x0000002a,0x0000316e, |
||||
0x00040005,0x0000002c,0x70616853,0x00000065,0x00050006,0x0000002c,0x00000000,0x70616873, |
||||
0x00000065,0x00030005,0x0000002e,0x00000000,0x00030005,0x00000033,0x0000326e,0x00050005, |
||||
0x00000038,0x6e616c70,0x64695f65,0x00000078,0x00060005,0x0000003c,0x575f6c67,0x476b726f, |
||||
0x70756f72,0x00004449,0x00040005,0x00000042,0x31727470,0x00000000,0x00040005,0x00000043, |
||||
0x32727470,0x00000000,0x00030005,0x00000044,0x00727470,0x00030005,0x00000045,0x00786469, |
||||
0x00030005,0x00000047,0x0000006b,0x00050005,0x00000052,0x7478656e,0x7864695f,0x00000000, |
||||
0x00030005,0x00000058,0x006b5f69,0x00050005,0x0000007d,0x6f5f3269,0x65736666,0x00000074, |
||||
0x00050005,0x00000082,0x6f5f3169,0x65736666,0x00000074,0x00080005,0x00000083,0x4c5f6c67, |
||||
0x6c61636f,0x6f766e49,0x69746163,0x44496e6f,0x00000000,0x00030005,0x000000a1,0x00003169, |
||||
0x00040005,0x000000b4,0x7074754f,0x00007475,0x00050006,0x000000b4,0x00000000,0x4f74616d, |
||||
0x00007475,0x00030005,0x000000b6,0x00000000,0x00040005,0x000000bd,0x75706e49,0x00003174, |
||||
0x00050006,0x000000bd,0x00000000,0x4174616d,0x00000000,0x00030005,0x000000bf,0x00000000, |
||||
0x00040005,0x000000c9,0x75706e49,0x00003274,0x00050006,0x000000c9,0x00000000,0x4274616d, |
||||
0x00000000,0x00030005,0x000000cb,0x00000000,0x00050048,0x0000000b,0x00000000,0x00000023, |
||||
0x00000000,0x00050048,0x0000000b,0x00000001,0x00000023,0x00000004,0x00030047,0x0000000b, |
||||
0x00000002,0x00040047,0x0000000d,0x00000022,0x00000000,0x00040047,0x0000000d,0x00000021, |
||||
0x00000003,0x00040047,0x00000013,0x00000006,0x00000004,0x00040048,0x00000014,0x00000000, |
||||
0x00000018,0x00050048,0x00000014,0x00000000,0x00000023,0x00000000,0x00030047,0x00000014, |
||||
0x00000003,0x00040047,0x00000016,0x00000022,0x00000000,0x00040047,0x00000016,0x00000021, |
||||
0x00000005,0x00040047,0x0000002b,0x00000006,0x00000004,0x00040048,0x0000002c,0x00000000, |
||||
0x00000018,0x00050048,0x0000002c,0x00000000,0x00000023,0x00000000,0x00030047,0x0000002c, |
||||
0x00000003,0x00040047,0x0000002e,0x00000022,0x00000000,0x00040047,0x0000002e,0x00000021, |
||||
0x00000004,0x00040047,0x0000003c,0x0000000b,0x0000001a,0x00040047,0x00000083,0x0000000b, |
||||
0x0000001b,0x00040047,0x000000b3,0x00000006,0x00000004,0x00040048,0x000000b4,0x00000000, |
||||
0x00000019,0x00050048,0x000000b4,0x00000000,0x00000023,0x00000000,0x00030047,0x000000b4, |
||||
0x00000003,0x00040047,0x000000b6,0x00000022,0x00000000,0x00040047,0x000000b6,0x00000021, |
||||
0x00000002,0x00040047,0x000000bc,0x00000006,0x00000004,0x00040048,0x000000bd,0x00000000, |
||||
0x00000018,0x00050048,0x000000bd,0x00000000,0x00000023,0x00000000,0x00030047,0x000000bd, |
||||
0x00000003,0x00040047,0x000000bf,0x00000022,0x00000000,0x00040047,0x000000bf,0x00000021, |
||||
0x00000000,0x00040047,0x000000c8,0x00000006,0x00000004,0x00040048,0x000000c9,0x00000000, |
||||
0x00000018,0x00050048,0x000000c9,0x00000000,0x00000023,0x00000000,0x00030047,0x000000c9, |
||||
0x00000003,0x00040047,0x000000cb,0x00000022,0x00000000,0x00040047,0x000000cb,0x00000021, |
||||
0x00000001,0x00040047,0x0000011f,0x0000000b,0x00000019,0x00020013,0x00000002,0x00030021, |
||||
0x00000003,0x00000002,0x00040015,0x00000008,0x00000020,0x00000001,0x00040020,0x00000009, |
||||
0x00000007,0x00000008,0x0004001e,0x0000000b,0x00000008,0x00000008,0x00040020,0x0000000c, |
||||
0x00000002,0x0000000b,0x0004003b,0x0000000c,0x0000000d,0x00000002,0x0004002b,0x00000008, |
||||
0x0000000e,0x00000001,0x00040020,0x0000000f,0x00000002,0x00000008,0x0003001d,0x00000013, |
||||
0x00000008,0x0003001e,0x00000014,0x00000013,0x00040020,0x00000015,0x00000002,0x00000014, |
||||
0x0004003b,0x00000015,0x00000016,0x00000002,0x0004002b,0x00000008,0x00000017,0x00000000, |
||||
0x0004002b,0x00000008,0x00000018,0x00000002,0x0004002b,0x00000008,0x0000001f,0x00000003, |
||||
0x0003001d,0x0000002b,0x00000008,0x0003001e,0x0000002c,0x0000002b,0x00040020,0x0000002d, |
||||
0x00000002,0x0000002c,0x0004003b,0x0000002d,0x0000002e,0x00000002,0x00040015,0x00000039, |
||||
0x00000020,0x00000000,0x00040017,0x0000003a,0x00000039,0x00000003,0x00040020,0x0000003b, |
||||
0x00000001,0x0000003a,0x0004003b,0x0000003b,0x0000003c,0x00000001,0x0004002b,0x00000039, |
||||
0x0000003d,0x00000000,0x00040020,0x0000003e,0x00000001,0x00000039,0x00020014,0x00000050, |
||||
0x0004002b,0x00000039,0x0000007e,0x00000001,0x0004003b,0x0000003b,0x00000083,0x00000001, |
||||
0x00030016,0x000000b2,0x00000020,0x0003001d,0x000000b3,0x000000b2,0x0003001e,0x000000b4, |
||||
0x000000b3,0x00040020,0x000000b5,0x00000002,0x000000b4,0x0004003b,0x000000b5,0x000000b6, |
||||
0x00000002,0x0003001d,0x000000bc,0x000000b2,0x0003001e,0x000000bd,0x000000bc,0x00040020, |
||||
0x000000be,0x00000002,0x000000bd,0x0004003b,0x000000be,0x000000bf,0x00000002,0x00040020, |
||||
0x000000c5,0x00000002,0x000000b2,0x0003001d,0x000000c8,0x000000b2,0x0003001e,0x000000c9, |
||||
0x000000c8,0x00040020,0x000000ca,0x00000002,0x000000c9,0x0004003b,0x000000ca,0x000000cb, |
||||
0x00000002,0x0004002b,0x00000008,0x00000119,0x00000400,0x0004002b,0x00000039,0x0000011e, |
||||
0x00000400,0x0006002c,0x0000003a,0x0000011f,0x0000011e,0x0000007e,0x0000007e,0x0004002b, |
||||
0x00000008,0x00000120,0x00000004,0x0004002b,0x00000008,0x00000121,0x00000005,0x0004002b, |
||||
0x00000008,0x00000122,0x00000006,0x0004002b,0x00000008,0x00000123,0x00000007,0x0004002b, |
||||
0x00000008,0x00000124,0x00000008,0x0004002b,0x00000008,0x00000125,0x00000009,0x0004002b, |
||||
0x00000008,0x00000126,0x0000000a,0x0004002b,0x00000008,0x00000127,0x0000000b,0x0004002b, |
||||
0x00000008,0x00000128,0x0000000c,0x0004002b,0x00000008,0x00000129,0x0000000d,0x0004002b, |
||||
0x00000008,0x0000012a,0x0000000e,0x0004002b,0x00000008,0x0000012b,0x0000000f,0x0004002b, |
||||
0x00000008,0x0000012c,0x00000010,0x0004002b,0x00000008,0x0000012d,0x00000011,0x0004002b, |
||||
0x00000008,0x0000012e,0x00000012,0x0004002b,0x00000008,0x0000012f,0x00000013,0x0004002b, |
||||
0x00000008,0x00000130,0x00000014,0x00050036,0x00000002,0x00000004,0x00000000,0x00000003, |
||||
0x000200f8,0x00000005,0x00040039,0x00000002,0x0000011c,0x00000006,0x000100fd,0x00010038, |
||||
0x00050036,0x00000002,0x00000006,0x00000000,0x00000003,0x000200f8,0x00000007,0x0004003b, |
||||
0x00000009,0x0000000a,0x00000007,0x0004003b,0x00000009,0x00000012,0x00000007,0x0004003b, |
||||
0x00000009,0x0000001e,0x00000007,0x0004003b,0x00000009,0x00000025,0x00000007,0x0004003b, |
||||
0x00000009,0x0000002a,0x00000007,0x0004003b,0x00000009,0x00000033,0x00000007,0x0004003b, |
||||
0x00000009,0x00000038,0x00000007,0x0004003b,0x00000009,0x00000042,0x00000007,0x0004003b, |
||||
0x00000009,0x00000043,0x00000007,0x0004003b,0x00000009,0x00000044,0x00000007,0x0004003b, |
||||
0x00000009,0x00000045,0x00000007,0x0004003b,0x00000009,0x00000047,0x00000007,0x0004003b, |
||||
0x00000009,0x00000052,0x00000007,0x0004003b,0x00000009,0x00000058,0x00000007,0x0004003b, |
||||
0x00000009,0x0000007d,0x00000007,0x0004003b,0x00000009,0x00000082,0x00000007,0x0004003b, |
||||
0x00000009,0x000000a1,0x00000007,0x00050041,0x0000000f,0x00000010,0x0000000d,0x0000000e, |
||||
0x0004003d,0x00000008,0x00000011,0x00000010,0x0003003e,0x0000000a,0x00000011,0x0004003d, |
||||
0x00000008,0x00000019,0x0000000a,0x00050084,0x00000008,0x0000001a,0x00000018,0x00000019, |
||||
0x00050082,0x00000008,0x0000001b,0x0000001a,0x0000000e,0x00060041,0x0000000f,0x0000001c, |
||||
0x00000016,0x00000017,0x0000001b,0x0004003d,0x00000008,0x0000001d,0x0000001c,0x0003003e, |
||||
0x00000012,0x0000001d,0x0004003d,0x00000008,0x00000020,0x0000000a,0x00050084,0x00000008, |
||||
0x00000021,0x0000001f,0x00000020,0x00050082,0x00000008,0x00000022,0x00000021,0x0000000e, |
||||
0x00060041,0x0000000f,0x00000023,0x00000016,0x00000017,0x00000022,0x0004003d,0x00000008, |
||||
0x00000024,0x00000023,0x0003003e,0x0000001e,0x00000024,0x0004003d,0x00000008,0x00000026, |
||||
0x0000000a,0x00050082,0x00000008,0x00000027,0x00000026,0x0000000e,0x00060041,0x0000000f, |
||||
0x00000028,0x00000016,0x00000017,0x00000027,0x0004003d,0x00000008,0x00000029,0x00000028, |
||||
0x0003003e,0x00000025,0x00000029,0x0004003d,0x00000008,0x0000002f,0x0000000a,0x00050082, |
||||
0x00000008,0x00000030,0x0000002f,0x0000000e,0x00060041,0x0000000f,0x00000031,0x0000002e, |
||||
0x00000017,0x00000030,0x0004003d,0x00000008,0x00000032,0x00000031,0x0003003e,0x0000002a, |
||||
0x00000032,0x0004003d,0x00000008,0x00000034,0x0000000a,0x00050082,0x00000008,0x00000035, |
||||
0x00000034,0x00000018,0x00060041,0x0000000f,0x00000036,0x0000002e,0x00000017,0x00000035, |
||||
0x0004003d,0x00000008,0x00000037,0x00000036,0x0003003e,0x00000033,0x00000037,0x00050041, |
||||
0x0000003e,0x0000003f,0x0000003c,0x0000003d,0x0004003d,0x00000039,0x00000040,0x0000003f, |
||||
0x0004007c,0x00000008,0x00000041,0x00000040,0x0003003e,0x00000038,0x00000041,0x0003003e, |
||||
0x00000042,0x00000017,0x0003003e,0x00000043,0x00000017,0x0003003e,0x00000044,0x00000017, |
||||
0x0004003d,0x00000008,0x00000046,0x00000038,0x0003003e,0x00000045,0x00000046,0x0004003d, |
||||
0x00000008,0x00000048,0x0000000a,0x00050082,0x00000008,0x00000049,0x00000048,0x0000001f, |
||||
0x0003003e,0x00000047,0x00000049,0x000200f9,0x0000004a,0x000200f8,0x0000004a,0x000400f6, |
||||
0x0000004c,0x0000004d,0x00000000,0x000200f9,0x0000004e,0x000200f8,0x0000004e,0x0004003d, |
||||
0x00000008,0x0000004f,0x00000047,0x000500af,0x00000050,0x00000051,0x0000004f,0x00000017, |
||||
0x000400fa,0x00000051,0x0000004b,0x0000004c,0x000200f8,0x0000004b,0x0004003d,0x00000008, |
||||
0x00000053,0x00000045,0x0004003d,0x00000008,0x00000054,0x00000047,0x00060041,0x0000000f, |
||||
0x00000055,0x0000002e,0x00000017,0x00000054,0x0004003d,0x00000008,0x00000056,0x00000055, |
||||
0x00050087,0x00000008,0x00000057,0x00000053,0x00000056,0x0003003e,0x00000052,0x00000057, |
||||
0x0004003d,0x00000008,0x00000059,0x00000045,0x0004003d,0x00000008,0x0000005a,0x00000052, |
||||
0x0004003d,0x00000008,0x0000005b,0x00000047,0x00060041,0x0000000f,0x0000005c,0x0000002e, |
||||
0x00000017,0x0000005b,0x0004003d,0x00000008,0x0000005d,0x0000005c,0x00050084,0x00000008, |
||||
0x0000005e,0x0000005a,0x0000005d,0x00050082,0x00000008,0x0000005f,0x00000059,0x0000005e, |
||||
0x0003003e,0x00000058,0x0000005f,0x0004003d,0x00000008,0x00000060,0x00000058,0x0004003d, |
||||
0x00000008,0x00000061,0x0000000a,0x0004003d,0x00000008,0x00000062,0x00000047,0x00050080, |
||||
0x00000008,0x00000063,0x00000061,0x00000062,0x00060041,0x0000000f,0x00000064,0x00000016, |
||||
0x00000017,0x00000063,0x0004003d,0x00000008,0x00000065,0x00000064,0x00050084,0x00000008, |
||||
0x00000066,0x00000060,0x00000065,0x0004003d,0x00000008,0x00000067,0x00000042,0x00050080, |
||||
0x00000008,0x00000068,0x00000067,0x00000066,0x0003003e,0x00000042,0x00000068,0x0004003d, |
||||
0x00000008,0x00000069,0x00000058,0x0004003d,0x00000008,0x0000006a,0x0000000a,0x00050084, |
||||
0x00000008,0x0000006b,0x00000018,0x0000006a,0x0004003d,0x00000008,0x0000006c,0x00000047, |
||||
0x00050080,0x00000008,0x0000006d,0x0000006b,0x0000006c,0x00060041,0x0000000f,0x0000006e, |
||||
0x00000016,0x00000017,0x0000006d,0x0004003d,0x00000008,0x0000006f,0x0000006e,0x00050084, |
||||
0x00000008,0x00000070,0x00000069,0x0000006f,0x0004003d,0x00000008,0x00000071,0x00000043, |
||||
0x00050080,0x00000008,0x00000072,0x00000071,0x00000070,0x0003003e,0x00000043,0x00000072, |
||||
0x0004003d,0x00000008,0x00000073,0x00000058,0x0004003d,0x00000008,0x00000074,0x00000047, |
||||
0x00060041,0x0000000f,0x00000075,0x00000016,0x00000017,0x00000074,0x0004003d,0x00000008, |
||||
0x00000076,0x00000075,0x00050084,0x00000008,0x00000077,0x00000073,0x00000076,0x0004003d, |
||||
0x00000008,0x00000078,0x00000044,0x00050080,0x00000008,0x00000079,0x00000078,0x00000077, |
||||
0x0003003e,0x00000044,0x00000079,0x0004003d,0x00000008,0x0000007a,0x00000052,0x0003003e, |
||||
0x00000045,0x0000007a,0x000200f9,0x0000004d,0x000200f8,0x0000004d,0x0004003d,0x00000008, |
||||
0x0000007b,0x00000047,0x00050082,0x00000008,0x0000007c,0x0000007b,0x0000000e,0x0003003e, |
||||
0x00000047,0x0000007c,0x000200f9,0x0000004a,0x000200f8,0x0000004c,0x00050041,0x0000003e, |
||||
0x0000007f,0x0000003c,0x0000007e,0x0004003d,0x00000039,0x00000080,0x0000007f,0x0004007c, |
||||
0x00000008,0x00000081,0x00000080,0x0003003e,0x0000007d,0x00000081,0x00050041,0x0000003e, |
||||
0x00000084,0x00000083,0x0000003d,0x0004003d,0x00000039,0x00000085,0x00000084,0x0004007c, |
||||
0x00000008,0x00000086,0x00000085,0x0003003e,0x00000082,0x00000086,0x0004003d,0x00000008, |
||||
0x00000087,0x0000007d,0x0004003d,0x00000008,0x00000088,0x0000000a,0x00050084,0x00000008, |
||||
0x00000089,0x00000018,0x00000088,0x00050082,0x00000008,0x0000008a,0x00000089,0x00000018, |
||||
0x00060041,0x0000000f,0x0000008b,0x00000016,0x00000017,0x0000008a,0x0004003d,0x00000008, |
||||
0x0000008c,0x0000008b,0x00050084,0x00000008,0x0000008d,0x00000087,0x0000008c,0x0004003d, |
||||
0x00000008,0x0000008e,0x00000042,0x00050080,0x00000008,0x0000008f,0x0000008e,0x0000008d, |
||||
0x0003003e,0x00000042,0x0000008f,0x0004003d,0x00000008,0x00000090,0x0000007d,0x0004003d, |
||||
0x00000008,0x00000091,0x0000000a,0x00050084,0x00000008,0x00000092,0x0000001f,0x00000091, |
||||
0x00050082,0x00000008,0x00000093,0x00000092,0x00000018,0x00060041,0x0000000f,0x00000094, |
||||
0x00000016,0x00000017,0x00000093,0x0004003d,0x00000008,0x00000095,0x00000094,0x00050084, |
||||
0x00000008,0x00000096,0x00000090,0x00000095,0x0004003d,0x00000008,0x00000097,0x00000043, |
||||
0x00050080,0x00000008,0x00000098,0x00000097,0x00000096,0x0003003e,0x00000043,0x00000098, |
||||
0x0004003d,0x00000008,0x00000099,0x0000007d,0x0004003d,0x00000008,0x0000009a,0x0000000a, |
||||
0x00050082,0x00000008,0x0000009b,0x0000009a,0x00000018,0x00060041,0x0000000f,0x0000009c, |
||||
0x00000016,0x00000017,0x0000009b,0x0004003d,0x00000008,0x0000009d,0x0000009c,0x00050084, |
||||
0x00000008,0x0000009e,0x00000099,0x0000009d,0x0004003d,0x00000008,0x0000009f,0x00000044, |
||||
0x00050080,0x00000008,0x000000a0,0x0000009f,0x0000009e,0x0003003e,0x00000044,0x000000a0, |
||||
0x0004003d,0x00000008,0x000000a2,0x00000082,0x0003003e,0x000000a1,0x000000a2,0x000200f9, |
||||
0x000000a3,0x000200f8,0x000000a3,0x000400f6,0x000000a5,0x000000a6,0x00000000,0x000200f9, |
||||
0x000000a7,0x000200f8,0x000000a7,0x0004003d,0x00000008,0x000000a8,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000a9,0x0000002a,0x000500b1,0x00000050,0x000000aa,0x000000a8,0x000000a9, |
||||
0x000400fa,0x000000aa,0x000000a4,0x000000a5,0x000200f8,0x000000a4,0x00050041,0x0000000f, |
||||
0x000000ab,0x0000000d,0x00000017,0x0004003d,0x00000008,0x000000ac,0x000000ab,0x000300f7, |
||||
0x000000b1,0x00000000,0x000b00fb,0x000000ac,0x000000b1,0x00000012,0x000000ad,0x00000010, |
||||
0x000000ae,0x0000000f,0x000000af,0x00000013,0x000000b0,0x000200f8,0x000000ad,0x0004003d, |
||||
0x00000008,0x000000b7,0x00000044,0x0004003d,0x00000008,0x000000b8,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000b9,0x00000025,0x00050084,0x00000008,0x000000ba,0x000000b8,0x000000b9, |
||||
0x00050080,0x00000008,0x000000bb,0x000000b7,0x000000ba,0x0004003d,0x00000008,0x000000c0, |
||||
0x00000042,0x0004003d,0x00000008,0x000000c1,0x000000a1,0x0004003d,0x00000008,0x000000c2, |
||||
0x00000012,0x00050084,0x00000008,0x000000c3,0x000000c1,0x000000c2,0x00050080,0x00000008, |
||||
0x000000c4,0x000000c0,0x000000c3,0x00060041,0x000000c5,0x000000c6,0x000000bf,0x00000017, |
||||
0x000000c4,0x0004003d,0x000000b2,0x000000c7,0x000000c6,0x0004003d,0x00000008,0x000000cc, |
||||
0x00000043,0x0004003d,0x00000008,0x000000cd,0x000000a1,0x0004003d,0x00000008,0x000000ce, |
||||
0x0000001e,0x00050084,0x00000008,0x000000cf,0x000000cd,0x000000ce,0x00050080,0x00000008, |
||||
0x000000d0,0x000000cc,0x000000cf,0x00060041,0x000000c5,0x000000d1,0x000000cb,0x00000017, |
||||
0x000000d0,0x0004003d,0x000000b2,0x000000d2,0x000000d1,0x00050081,0x000000b2,0x000000d3, |
||||
0x000000c7,0x000000d2,0x00060041,0x000000c5,0x000000d4,0x000000b6,0x00000017,0x000000bb, |
||||
0x0003003e,0x000000d4,0x000000d3,0x000200f9,0x000000b1,0x000200f8,0x000000ae,0x0004003d, |
||||
0x00000008,0x000000d6,0x00000044,0x0004003d,0x00000008,0x000000d7,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000d8,0x00000025,0x00050084,0x00000008,0x000000d9,0x000000d7,0x000000d8, |
||||
0x00050080,0x00000008,0x000000da,0x000000d6,0x000000d9,0x0004003d,0x00000008,0x000000db, |
||||
0x00000042,0x0004003d,0x00000008,0x000000dc,0x000000a1,0x0004003d,0x00000008,0x000000dd, |
||||
0x00000012,0x00050084,0x00000008,0x000000de,0x000000dc,0x000000dd,0x00050080,0x00000008, |
||||
0x000000df,0x000000db,0x000000de,0x00060041,0x000000c5,0x000000e0,0x000000bf,0x00000017, |
||||
0x000000df,0x0004003d,0x000000b2,0x000000e1,0x000000e0,0x0004003d,0x00000008,0x000000e2, |
||||
0x00000043,0x0004003d,0x00000008,0x000000e3,0x000000a1,0x0004003d,0x00000008,0x000000e4, |
||||
0x0000001e,0x00050084,0x00000008,0x000000e5,0x000000e3,0x000000e4,0x00050080,0x00000008, |
||||
0x000000e6,0x000000e2,0x000000e5,0x00060041,0x000000c5,0x000000e7,0x000000cb,0x00000017, |
||||
0x000000e6,0x0004003d,0x000000b2,0x000000e8,0x000000e7,0x00050083,0x000000b2,0x000000e9, |
||||
0x000000e1,0x000000e8,0x00060041,0x000000c5,0x000000ea,0x000000b6,0x00000017,0x000000da, |
||||
0x0003003e,0x000000ea,0x000000e9,0x000200f9,0x000000b1,0x000200f8,0x000000af,0x0004003d, |
||||
0x00000008,0x000000ec,0x00000044,0x0004003d,0x00000008,0x000000ed,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000ee,0x00000025,0x00050084,0x00000008,0x000000ef,0x000000ed,0x000000ee, |
||||
0x00050080,0x00000008,0x000000f0,0x000000ec,0x000000ef,0x0004003d,0x00000008,0x000000f1, |
||||
0x00000042,0x0004003d,0x00000008,0x000000f2,0x000000a1,0x0004003d,0x00000008,0x000000f3, |
||||
0x00000012,0x00050084,0x00000008,0x000000f4,0x000000f2,0x000000f3,0x00050080,0x00000008, |
||||
0x000000f5,0x000000f1,0x000000f4,0x00060041,0x000000c5,0x000000f6,0x000000bf,0x00000017, |
||||
0x000000f5,0x0004003d,0x000000b2,0x000000f7,0x000000f6,0x0004003d,0x00000008,0x000000f8, |
||||
0x00000043,0x0004003d,0x00000008,0x000000f9,0x000000a1,0x0004003d,0x00000008,0x000000fa, |
||||
0x0000001e,0x00050084,0x00000008,0x000000fb,0x000000f9,0x000000fa,0x00050080,0x00000008, |
||||
0x000000fc,0x000000f8,0x000000fb,0x00060041,0x000000c5,0x000000fd,0x000000cb,0x00000017, |
||||
0x000000fc,0x0004003d,0x000000b2,0x000000fe,0x000000fd,0x00050085,0x000000b2,0x000000ff, |
||||
0x000000f7,0x000000fe,0x00060041,0x000000c5,0x00000100,0x000000b6,0x00000017,0x000000f0, |
||||
0x0003003e,0x00000100,0x000000ff,0x000200f9,0x000000b1,0x000200f8,0x000000b0,0x0004003d, |
||||
0x00000008,0x00000102,0x00000044,0x0004003d,0x00000008,0x00000103,0x000000a1,0x0004003d, |
||||
0x00000008,0x00000104,0x00000025,0x00050084,0x00000008,0x00000105,0x00000103,0x00000104, |
||||
0x00050080,0x00000008,0x00000106,0x00000102,0x00000105,0x0004003d,0x00000008,0x00000107, |
||||
0x00000042,0x0004003d,0x00000008,0x00000108,0x000000a1,0x0004003d,0x00000008,0x00000109, |
||||
0x00000012,0x00050084,0x00000008,0x0000010a,0x00000108,0x00000109,0x00050080,0x00000008, |
||||
0x0000010b,0x00000107,0x0000010a,0x00060041,0x000000c5,0x0000010c,0x000000bf,0x00000017, |
||||
0x0000010b,0x0004003d,0x000000b2,0x0000010d,0x0000010c,0x0004003d,0x00000008,0x0000010e, |
||||
0x00000043,0x0004003d,0x00000008,0x0000010f,0x000000a1,0x0004003d,0x00000008,0x00000110, |
||||
0x0000001e,0x00050084,0x00000008,0x00000111,0x0000010f,0x00000110,0x00050080,0x00000008, |
||||
0x00000112,0x0000010e,0x00000111,0x00060041,0x000000c5,0x00000113,0x000000cb,0x00000017, |
||||
0x00000112,0x0004003d,0x000000b2,0x00000114,0x00000113,0x00050088,0x000000b2,0x00000115, |
||||
0x0000010d,0x00000114,0x00060041,0x000000c5,0x00000116,0x000000b6,0x00000017,0x00000106, |
||||
0x0003003e,0x00000116,0x00000115,0x000200f9,0x000000b1,0x000200f8,0x000000b1,0x000200f9, |
||||
0x000000a6,0x000200f8,0x000000a6,0x0004003d,0x00000008,0x0000011a,0x000000a1,0x00050080, |
||||
0x00000008,0x0000011b,0x0000011a,0x00000119,0x0003003e,0x000000a1,0x0000011b,0x000200f9, |
||||
0x000000a3,0x000200f8,0x000000a5,0x000100fd,0x00010038 |
||||
}; |
||||
|
||||
}}} // namespace cv::dnn::vkcom
|
@ -0,0 +1,197 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../../precomp.hpp" |
||||
#include "internal.hpp" |
||||
#include "../include/op_naryeltwise.hpp" |
||||
|
||||
namespace cv { namespace dnn { namespace vkcom { |
||||
|
||||
#ifdef HAVE_VULKAN |
||||
|
||||
#define STEP_SIZE 65536 |
||||
|
||||
#define MAX_GROUP_COUNT_X 65535 |
||||
#define MAX_GROUP_COUNT_Y 65535 |
||||
#define MAX_GROUP_COUNT_Z 65535 |
||||
|
||||
OpNary::OpNary(const OpNary::OPERATION _naryOpType, int _ninputs, int _max_ndims, |
||||
const std::vector<std::vector<int>> shapes, const std::vector<std::vector<size_t>> steps) |
||||
: naryOpType(_naryOpType), ninputs(_ninputs), max_ndims(_max_ndims) |
||||
{ |
||||
CV_Assert(ninputs > 1); |
||||
|
||||
shapesBuf.resize((ninputs + 1) * max_ndims); |
||||
stepsBuf.resize((ninputs + 1) * max_ndims); |
||||
for (int i = 0; i <= ninputs; i++) |
||||
{ |
||||
std::copy(shapes[i].begin(), shapes[i].end(), shapesBuf.data() + i * max_ndims); |
||||
std::copy(steps[i].begin(), steps[i].end(), stepsBuf.data() + i * max_ndims); |
||||
} |
||||
|
||||
// TODO(VK): support more types of operation
|
||||
switch(naryOpType) { |
||||
// case OPERATION::EQUAL:
|
||||
// case OPERATION::GREATER:
|
||||
// case OPERATION::GREATER_EQUAL:
|
||||
// case OPERATION::LESS:
|
||||
// case OPERATION::LESS_EQUAL:
|
||||
// case OPERATION::POW:
|
||||
// case OPERATION::BITSHIFT:
|
||||
// case OPERATION::MOD:
|
||||
case OPERATION::PROD: |
||||
case OPERATION::SUB: |
||||
case OPERATION::ADD: |
||||
case OPERATION::DIV: |
||||
// case OPERATION::AND:
|
||||
// case OPERATION::OR:
|
||||
// case OPERATION::XOR:
|
||||
{ |
||||
CV_Assert(ninputs == 2); |
||||
CV_Assert(max_ndims >= 2); |
||||
shaderType = kNaryShaderTypeBinary; |
||||
shader_name = "nary_eltwise_binary_forward_spv"; |
||||
|
||||
// TODO(VK): confirm if this makes any sense
|
||||
nplanes = std::accumulate(shapesBuf.data(), shapesBuf.data() + max_ndims - 2, 1, [](int32_t a, int32_t b) { return a * b; } ); |
||||
N2 = shapesBuf.data()[max_ndims - 2]; |
||||
N1 = shapesBuf.data()[max_ndims - 1]; |
||||
CV_LOG_DEBUG(NULL, "max_ndims="<<max_ndims<<", nplanes="<<nplanes<<", N2="<<N2<<", N1="<<N1); |
||||
break; |
||||
} |
||||
case OPERATION::WHERE: |
||||
{ |
||||
CV_Assert(ninputs == 3); |
||||
CV_Assert(max_ndims >= 2); |
||||
shaderType = kNaryShaderTypeTrinary; |
||||
shader_name = "nary_eltwise_trinary_forward_spv"; |
||||
break; |
||||
} |
||||
// case OPERATION::MAX:
|
||||
// case OPERATION::MEAN:
|
||||
// case OPERATION::MIN:
|
||||
case OPERATION::SUM: |
||||
{ |
||||
CV_Assert(max_ndims >= 2); |
||||
shaderType = kNaryShaderTypeNary; |
||||
shader_name = "nary_eltwise_nary_forward_spv"; |
||||
break; |
||||
} |
||||
//TODO(VK) add other cases
|
||||
default: |
||||
CV_Error(Error::StsNotImplemented, "Unsupported nary operation type"); |
||||
} |
||||
// TODO(VK): initialize OpNary class
|
||||
} |
||||
|
||||
void OpNary::firstForward() |
||||
{ |
||||
if (!firstForwardFinsh) |
||||
{ |
||||
config.local_size_x = 1; // TODO(vk) determine local_size_y if necessary
|
||||
config.local_size_y = 1; // TODO(vk) determine local_size_y if necessary
|
||||
config.local_size_z = 1; // TODO(vk) determine local_size_z if necessary
|
||||
computeGroupCount(); |
||||
firstForwardFinsh = true; |
||||
} |
||||
else |
||||
return; |
||||
} |
||||
|
||||
bool OpNary::binaryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs) |
||||
{ |
||||
std::vector<int32_t> param = {(int32_t)naryOpType, max_ndims}; |
||||
std::vector<int32_t> paramSize = {(int32_t)param.size()}; |
||||
std::vector<int32_t> dimSizes = {(ninputs + 1) * max_ndims}; |
||||
std::vector<int32_t> actualSteps; |
||||
|
||||
// TODO(VK): compute step for different dtype. Currently this is for kFormatFp32.
|
||||
actualSteps.resize(stepsBuf.size()); |
||||
std::transform(stepsBuf.data(), stepsBuf.data() + dimSizes[0], actualSteps.begin(), [](int32_t sz){ return sz / 4; }); |
||||
|
||||
Tensor paramTensor = Tensor(reinterpret_cast<const char *>(param.data()), paramSize, kFormatInt32, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); |
||||
Tensor shapeTensor = Tensor(reinterpret_cast<const char *>(shapesBuf.data()), dimSizes, kFormatInt32, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); |
||||
Tensor stepTensor = Tensor(reinterpret_cast<const char *>(actualSteps.data()), dimSizes, kFormatInt32, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); |
||||
|
||||
destTypes = { |
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // input1
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // input2
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // out
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // param
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // shape
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // step
|
||||
}; |
||||
|
||||
|
||||
Ptr<Pipeline> pipeline = pipelineFactoryPtr->getPipeline(shader_name, destTypes); |
||||
Ptr<CommandBuffer> cmdBuffer = cmdPoolPtr->allocBuffer(); |
||||
Ptr<Descriptor> desSet = pipeline->createSet(); |
||||
VkCommandBuffer cmdBufferReal = cmdBuffer->get(); |
||||
|
||||
desSet->writeTensor(ins[0], 0); |
||||
desSet->writeTensor(ins[1], 1); |
||||
desSet->writeTensor(outs[0], 2); |
||||
desSet->writeTensor(paramTensor, 3); |
||||
desSet->writeTensor(shapeTensor, 4); |
||||
desSet->writeTensor(stepTensor, 5); |
||||
|
||||
cmdBuffer->beginRecord(); |
||||
pipeline->bind(cmdBufferReal, desSet->get()); |
||||
vkCmdDispatch(cmdBufferReal, group_x_, group_y_, group_z_); |
||||
cmdBuffer->endRecord(); |
||||
cmdPoolPtr->submitAndWait(cmdBufferReal); |
||||
|
||||
return true; |
||||
} |
||||
|
||||
bool OpNary::forward(std::vector<Tensor>& ins, std::vector<Tensor>& outs) |
||||
{ |
||||
|
||||
firstForward(); |
||||
|
||||
// TODO(VK): Support more dtypes. Currently only kFormatFp32 is supported.
|
||||
for (auto &tensor: ins) |
||||
{ |
||||
CV_Assert(tensor.getFormat() == kFormatFp32); |
||||
} |
||||
for (auto &tensor: outs) |
||||
{ |
||||
CV_Assert(tensor.getFormat() == kFormatFp32); |
||||
} |
||||
|
||||
switch(shaderType) { |
||||
case kNaryShaderTypeBinary: { |
||||
return binaryForward(ins, outs); |
||||
break; |
||||
} |
||||
default: |
||||
CV_Error(Error::StsNotImplemented, "Unsupported shader type invoked."); |
||||
} |
||||
|
||||
return true; |
||||
} |
||||
|
||||
bool OpNary::computeGroupCount() |
||||
{ |
||||
if (shaderType == kNaryShaderTypeBinary) |
||||
{ |
||||
group_x_ = nplanes; // parallelism at plane level
|
||||
group_y_ = N2; |
||||
group_z_ = 1; |
||||
} |
||||
else |
||||
{ |
||||
CV_Error(CV_StsNotImplemented, "shader type is not supported at compute GroupCount."); |
||||
} |
||||
|
||||
CV_Assert(group_x_ <= MAX_GROUP_COUNT_X); |
||||
CV_Assert(group_y_ <= MAX_GROUP_COUNT_Y); |
||||
CV_Assert(group_z_ <= MAX_GROUP_COUNT_Z); |
||||
|
||||
return true; |
||||
} |
||||
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
}}} // namespace cv::dnn::vkcom
|
Loading…
Reference in new issue