commit
3a55f50133
131 changed files with 3444 additions and 862 deletions
@ -0,0 +1,87 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_OP_NARY_HPP |
||||
#define OPENCV_OP_NARY_HPP |
||||
|
||||
#include "vkcom.hpp" |
||||
#include "op_base.hpp" |
||||
|
||||
namespace cv { namespace dnn { namespace vkcom { |
||||
|
||||
#ifdef HAVE_VULKAN |
||||
|
||||
enum NaryShaderType |
||||
{ |
||||
kNaryShaderTypeBinary, |
||||
kNaryShaderTypeTrinary, |
||||
kNaryShaderTypeNary, |
||||
kNaryShaderTest, |
||||
}; |
||||
|
||||
struct NaryShaderConfig |
||||
{ |
||||
int local_size_x; |
||||
int local_size_y; |
||||
int local_size_z; |
||||
}; |
||||
|
||||
|
||||
class OpNary : public OpBase |
||||
{ |
||||
public: |
||||
// Copied from nary_eltwise_layers.cpp
|
||||
enum class OPERATION |
||||
{ |
||||
AND = 0, |
||||
EQUAL, |
||||
GREATER, |
||||
GREATER_EQUAL, |
||||
LESS, |
||||
LESS_EQUAL, |
||||
OR, |
||||
POW, |
||||
XOR, |
||||
BITSHIFT, |
||||
MAX, |
||||
MEAN, |
||||
MIN, |
||||
MOD, |
||||
PROD, |
||||
SUB, |
||||
SUM, |
||||
ADD, |
||||
DIV, |
||||
WHERE, |
||||
}; |
||||
|
||||
OpNary(const OPERATION naryOpType, int ninputs, int max_ndims, const std::vector<std::vector<int>> shapes, const std::vector<std::vector<size_t>> steps); |
||||
|
||||
void firstForward(); // Execute only in the first forward.
|
||||
virtual bool forward(std::vector<Tensor>& ins, std::vector<Tensor>& outs) CV_OVERRIDE; |
||||
Ptr<Tensor> weightTensorPtr; |
||||
private: |
||||
bool computeGroupCount(); |
||||
bool binaryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs); |
||||
bool trinaryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs); |
||||
bool naryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs); |
||||
|
||||
const OPERATION naryOpType; |
||||
NaryShaderType shaderType; |
||||
NaryShaderConfig config; |
||||
int ninputs; |
||||
int max_ndims; |
||||
AutoBuffer<int32_t> shapesBuf; |
||||
AutoBuffer<int32_t> stepsBuf; |
||||
int nplanes; // number of planes computations are to be performed on
|
||||
int N2; // value of shape[ndims - 2]
|
||||
int N1; // value of shape[ndims - 1]
|
||||
|
||||
bool firstForwardFinsh = false; |
||||
}; |
||||
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
}}} // namespace cv::dnn::vkcom
|
||||
#endif //OPENCV_OP_MATMUL_HPP
|
@ -0,0 +1,116 @@ |
||||
#version 450 |
||||
// #extension GL_EXT_debug_printf : enable |
||||
#define ALL_THREAD 1024 |
||||
// #define ALL_THREAD 128 // Experimental batched operation |
||||
#define STEP_SIZE 65536 |
||||
|
||||
layout(binding = 0) readonly buffer Input1{ |
||||
float matA[]; |
||||
}; |
||||
|
||||
layout(binding = 1) readonly buffer Input2{ |
||||
float matB[]; |
||||
}; |
||||
|
||||
layout(binding = 2) writeonly buffer Output{ |
||||
float matOut[]; |
||||
}; |
||||
|
||||
layout(binding = 3) uniform Params { |
||||
int opType; |
||||
int ndims; |
||||
} params; |
||||
|
||||
layout(binding = 4) readonly buffer Shape { |
||||
int shape[]; |
||||
}; |
||||
|
||||
layout(binding = 5) readonly buffer Step { |
||||
int matStep[]; |
||||
}; |
||||
|
||||
/* local_size_x, local_size_y, local_size_z there defines the number of invocations |
||||
of this compute shader in the current work group. */ |
||||
// TODO: Check if this makes any sense |
||||
// TODO: Check if it is required to fetch PhysicalDeviceLimit from Context |
||||
// TODO: here we shall assume that maxGroupInvocation is 1024. |
||||
layout(local_size_x = ALL_THREAD, local_size_y = 1, local_size_z = 1) in; // TODO: Check if this makes any sense |
||||
|
||||
const int AND = 0; |
||||
const int EQUAL = 1; |
||||
const int GREATER = 2; |
||||
const int GREATER_EQUAL = 3; |
||||
const int LESS = 4; |
||||
const int LESS_EQUAL = 5; |
||||
const int OR = 6; |
||||
const int POW = 7; |
||||
const int XOR = 8; |
||||
const int BITSHIFT = 9; |
||||
const int MAX = 10; |
||||
const int MEAN = 11; |
||||
const int MIN = 12; |
||||
const int MOD = 13; |
||||
const int FMOD = 14; |
||||
const int PROD = 15; |
||||
const int SUB = 16; |
||||
const int SUM = 17; |
||||
const int ADD = 18; |
||||
const int DIV = 19; |
||||
const int WHERE = 20; |
||||
|
||||
void binary_forward() |
||||
{ |
||||
int ndims = params.ndims; |
||||
int dp1 = matStep[2 * ndims - 1]; |
||||
int dp2 = matStep[3 * ndims - 1]; |
||||
int dp = matStep[ndims - 1]; |
||||
int n1 = shape[ndims - 1], n2 = shape[ndims - 2]; |
||||
|
||||
int plane_idx = int(gl_WorkGroupID.x); |
||||
|
||||
int ptr1 = 0; |
||||
int ptr2 = 0; |
||||
int ptr = 0; |
||||
int idx = plane_idx; |
||||
|
||||
for (int k = ndims - 3; k >= 0; --k) { |
||||
int next_idx = idx / shape[k]; |
||||
int i_k = idx - next_idx * shape[k]; // i_k = idx % shape[k] |
||||
ptr1 += i_k * matStep[ndims + k]; |
||||
ptr2 += i_k * matStep[2 * ndims + k]; |
||||
ptr += i_k * matStep[k]; |
||||
idx = next_idx; |
||||
} |
||||
|
||||
int i2_offset = int(gl_WorkGroupID.y); |
||||
int i1_offset = int(gl_LocalInvocationID.x); |
||||
|
||||
ptr1 += i2_offset * matStep[2 * ndims - 2]; |
||||
ptr2 += i2_offset * matStep[3 * ndims - 2]; |
||||
ptr += i2_offset * matStep[ndims - 2]; |
||||
|
||||
for (int i1 = i1_offset; i1 < n1; i1 += ALL_THREAD) { |
||||
switch (params.opType) { |
||||
case int(ADD): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] + matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
case int(SUB): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] - matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
case int(PROD): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] * matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
case int(DIV): |
||||
matOut[ptr + i1 * dp] = matA[ptr1 + i1 * dp1] / matB[ptr2 + i1 * dp2]; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
void main() |
||||
{ |
||||
// debugPrintfEXT("nary_eltwise_binary_forward.comp loaded\n"); |
||||
binary_forward(); |
||||
return; |
||||
} |
@ -0,0 +1,232 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../../precomp.hpp" |
||||
|
||||
namespace cv { namespace dnn { namespace vkcom { |
||||
|
||||
extern const unsigned int nary_eltwise_binary_forward_spv[1757] = { |
||||
0x07230203,0x00010000,0x0008000b,0x00000131,0x00000000,0x00020011,0x00000001,0x0006000b, |
||||
0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e,0x00000000,0x00000001, |
||||
0x0007000f,0x00000005,0x00000004,0x6e69616d,0x00000000,0x0000003c,0x00000083,0x00060010, |
||||
0x00000004,0x00000011,0x00000400,0x00000001,0x00000001,0x00030003,0x00000002,0x000001c2, |
||||
0x00040005,0x00000004,0x6e69616d,0x00000000,0x00060005,0x00000006,0x616e6962,0x665f7972, |
||||
0x6177726f,0x00286472,0x00040005,0x0000000a,0x6d69646e,0x00000073,0x00040005,0x0000000b, |
||||
0x61726150,0x0000736d,0x00050006,0x0000000b,0x00000000,0x7954706f,0x00006570,0x00050006, |
||||
0x0000000b,0x00000001,0x6d69646e,0x00000073,0x00040005,0x0000000d,0x61726170,0x0000736d, |
||||
0x00030005,0x00000012,0x00317064,0x00040005,0x00000014,0x70657453,0x00000000,0x00050006, |
||||
0x00000014,0x00000000,0x5374616d,0x00706574,0x00030005,0x00000016,0x00000000,0x00030005, |
||||
0x0000001e,0x00327064,0x00030005,0x00000025,0x00007064,0x00030005,0x0000002a,0x0000316e, |
||||
0x00040005,0x0000002c,0x70616853,0x00000065,0x00050006,0x0000002c,0x00000000,0x70616873, |
||||
0x00000065,0x00030005,0x0000002e,0x00000000,0x00030005,0x00000033,0x0000326e,0x00050005, |
||||
0x00000038,0x6e616c70,0x64695f65,0x00000078,0x00060005,0x0000003c,0x575f6c67,0x476b726f, |
||||
0x70756f72,0x00004449,0x00040005,0x00000042,0x31727470,0x00000000,0x00040005,0x00000043, |
||||
0x32727470,0x00000000,0x00030005,0x00000044,0x00727470,0x00030005,0x00000045,0x00786469, |
||||
0x00030005,0x00000047,0x0000006b,0x00050005,0x00000052,0x7478656e,0x7864695f,0x00000000, |
||||
0x00030005,0x00000058,0x006b5f69,0x00050005,0x0000007d,0x6f5f3269,0x65736666,0x00000074, |
||||
0x00050005,0x00000082,0x6f5f3169,0x65736666,0x00000074,0x00080005,0x00000083,0x4c5f6c67, |
||||
0x6c61636f,0x6f766e49,0x69746163,0x44496e6f,0x00000000,0x00030005,0x000000a1,0x00003169, |
||||
0x00040005,0x000000b4,0x7074754f,0x00007475,0x00050006,0x000000b4,0x00000000,0x4f74616d, |
||||
0x00007475,0x00030005,0x000000b6,0x00000000,0x00040005,0x000000bd,0x75706e49,0x00003174, |
||||
0x00050006,0x000000bd,0x00000000,0x4174616d,0x00000000,0x00030005,0x000000bf,0x00000000, |
||||
0x00040005,0x000000c9,0x75706e49,0x00003274,0x00050006,0x000000c9,0x00000000,0x4274616d, |
||||
0x00000000,0x00030005,0x000000cb,0x00000000,0x00050048,0x0000000b,0x00000000,0x00000023, |
||||
0x00000000,0x00050048,0x0000000b,0x00000001,0x00000023,0x00000004,0x00030047,0x0000000b, |
||||
0x00000002,0x00040047,0x0000000d,0x00000022,0x00000000,0x00040047,0x0000000d,0x00000021, |
||||
0x00000003,0x00040047,0x00000013,0x00000006,0x00000004,0x00040048,0x00000014,0x00000000, |
||||
0x00000018,0x00050048,0x00000014,0x00000000,0x00000023,0x00000000,0x00030047,0x00000014, |
||||
0x00000003,0x00040047,0x00000016,0x00000022,0x00000000,0x00040047,0x00000016,0x00000021, |
||||
0x00000005,0x00040047,0x0000002b,0x00000006,0x00000004,0x00040048,0x0000002c,0x00000000, |
||||
0x00000018,0x00050048,0x0000002c,0x00000000,0x00000023,0x00000000,0x00030047,0x0000002c, |
||||
0x00000003,0x00040047,0x0000002e,0x00000022,0x00000000,0x00040047,0x0000002e,0x00000021, |
||||
0x00000004,0x00040047,0x0000003c,0x0000000b,0x0000001a,0x00040047,0x00000083,0x0000000b, |
||||
0x0000001b,0x00040047,0x000000b3,0x00000006,0x00000004,0x00040048,0x000000b4,0x00000000, |
||||
0x00000019,0x00050048,0x000000b4,0x00000000,0x00000023,0x00000000,0x00030047,0x000000b4, |
||||
0x00000003,0x00040047,0x000000b6,0x00000022,0x00000000,0x00040047,0x000000b6,0x00000021, |
||||
0x00000002,0x00040047,0x000000bc,0x00000006,0x00000004,0x00040048,0x000000bd,0x00000000, |
||||
0x00000018,0x00050048,0x000000bd,0x00000000,0x00000023,0x00000000,0x00030047,0x000000bd, |
||||
0x00000003,0x00040047,0x000000bf,0x00000022,0x00000000,0x00040047,0x000000bf,0x00000021, |
||||
0x00000000,0x00040047,0x000000c8,0x00000006,0x00000004,0x00040048,0x000000c9,0x00000000, |
||||
0x00000018,0x00050048,0x000000c9,0x00000000,0x00000023,0x00000000,0x00030047,0x000000c9, |
||||
0x00000003,0x00040047,0x000000cb,0x00000022,0x00000000,0x00040047,0x000000cb,0x00000021, |
||||
0x00000001,0x00040047,0x0000011f,0x0000000b,0x00000019,0x00020013,0x00000002,0x00030021, |
||||
0x00000003,0x00000002,0x00040015,0x00000008,0x00000020,0x00000001,0x00040020,0x00000009, |
||||
0x00000007,0x00000008,0x0004001e,0x0000000b,0x00000008,0x00000008,0x00040020,0x0000000c, |
||||
0x00000002,0x0000000b,0x0004003b,0x0000000c,0x0000000d,0x00000002,0x0004002b,0x00000008, |
||||
0x0000000e,0x00000001,0x00040020,0x0000000f,0x00000002,0x00000008,0x0003001d,0x00000013, |
||||
0x00000008,0x0003001e,0x00000014,0x00000013,0x00040020,0x00000015,0x00000002,0x00000014, |
||||
0x0004003b,0x00000015,0x00000016,0x00000002,0x0004002b,0x00000008,0x00000017,0x00000000, |
||||
0x0004002b,0x00000008,0x00000018,0x00000002,0x0004002b,0x00000008,0x0000001f,0x00000003, |
||||
0x0003001d,0x0000002b,0x00000008,0x0003001e,0x0000002c,0x0000002b,0x00040020,0x0000002d, |
||||
0x00000002,0x0000002c,0x0004003b,0x0000002d,0x0000002e,0x00000002,0x00040015,0x00000039, |
||||
0x00000020,0x00000000,0x00040017,0x0000003a,0x00000039,0x00000003,0x00040020,0x0000003b, |
||||
0x00000001,0x0000003a,0x0004003b,0x0000003b,0x0000003c,0x00000001,0x0004002b,0x00000039, |
||||
0x0000003d,0x00000000,0x00040020,0x0000003e,0x00000001,0x00000039,0x00020014,0x00000050, |
||||
0x0004002b,0x00000039,0x0000007e,0x00000001,0x0004003b,0x0000003b,0x00000083,0x00000001, |
||||
0x00030016,0x000000b2,0x00000020,0x0003001d,0x000000b3,0x000000b2,0x0003001e,0x000000b4, |
||||
0x000000b3,0x00040020,0x000000b5,0x00000002,0x000000b4,0x0004003b,0x000000b5,0x000000b6, |
||||
0x00000002,0x0003001d,0x000000bc,0x000000b2,0x0003001e,0x000000bd,0x000000bc,0x00040020, |
||||
0x000000be,0x00000002,0x000000bd,0x0004003b,0x000000be,0x000000bf,0x00000002,0x00040020, |
||||
0x000000c5,0x00000002,0x000000b2,0x0003001d,0x000000c8,0x000000b2,0x0003001e,0x000000c9, |
||||
0x000000c8,0x00040020,0x000000ca,0x00000002,0x000000c9,0x0004003b,0x000000ca,0x000000cb, |
||||
0x00000002,0x0004002b,0x00000008,0x00000119,0x00000400,0x0004002b,0x00000039,0x0000011e, |
||||
0x00000400,0x0006002c,0x0000003a,0x0000011f,0x0000011e,0x0000007e,0x0000007e,0x0004002b, |
||||
0x00000008,0x00000120,0x00000004,0x0004002b,0x00000008,0x00000121,0x00000005,0x0004002b, |
||||
0x00000008,0x00000122,0x00000006,0x0004002b,0x00000008,0x00000123,0x00000007,0x0004002b, |
||||
0x00000008,0x00000124,0x00000008,0x0004002b,0x00000008,0x00000125,0x00000009,0x0004002b, |
||||
0x00000008,0x00000126,0x0000000a,0x0004002b,0x00000008,0x00000127,0x0000000b,0x0004002b, |
||||
0x00000008,0x00000128,0x0000000c,0x0004002b,0x00000008,0x00000129,0x0000000d,0x0004002b, |
||||
0x00000008,0x0000012a,0x0000000e,0x0004002b,0x00000008,0x0000012b,0x0000000f,0x0004002b, |
||||
0x00000008,0x0000012c,0x00000010,0x0004002b,0x00000008,0x0000012d,0x00000011,0x0004002b, |
||||
0x00000008,0x0000012e,0x00000012,0x0004002b,0x00000008,0x0000012f,0x00000013,0x0004002b, |
||||
0x00000008,0x00000130,0x00000014,0x00050036,0x00000002,0x00000004,0x00000000,0x00000003, |
||||
0x000200f8,0x00000005,0x00040039,0x00000002,0x0000011c,0x00000006,0x000100fd,0x00010038, |
||||
0x00050036,0x00000002,0x00000006,0x00000000,0x00000003,0x000200f8,0x00000007,0x0004003b, |
||||
0x00000009,0x0000000a,0x00000007,0x0004003b,0x00000009,0x00000012,0x00000007,0x0004003b, |
||||
0x00000009,0x0000001e,0x00000007,0x0004003b,0x00000009,0x00000025,0x00000007,0x0004003b, |
||||
0x00000009,0x0000002a,0x00000007,0x0004003b,0x00000009,0x00000033,0x00000007,0x0004003b, |
||||
0x00000009,0x00000038,0x00000007,0x0004003b,0x00000009,0x00000042,0x00000007,0x0004003b, |
||||
0x00000009,0x00000043,0x00000007,0x0004003b,0x00000009,0x00000044,0x00000007,0x0004003b, |
||||
0x00000009,0x00000045,0x00000007,0x0004003b,0x00000009,0x00000047,0x00000007,0x0004003b, |
||||
0x00000009,0x00000052,0x00000007,0x0004003b,0x00000009,0x00000058,0x00000007,0x0004003b, |
||||
0x00000009,0x0000007d,0x00000007,0x0004003b,0x00000009,0x00000082,0x00000007,0x0004003b, |
||||
0x00000009,0x000000a1,0x00000007,0x00050041,0x0000000f,0x00000010,0x0000000d,0x0000000e, |
||||
0x0004003d,0x00000008,0x00000011,0x00000010,0x0003003e,0x0000000a,0x00000011,0x0004003d, |
||||
0x00000008,0x00000019,0x0000000a,0x00050084,0x00000008,0x0000001a,0x00000018,0x00000019, |
||||
0x00050082,0x00000008,0x0000001b,0x0000001a,0x0000000e,0x00060041,0x0000000f,0x0000001c, |
||||
0x00000016,0x00000017,0x0000001b,0x0004003d,0x00000008,0x0000001d,0x0000001c,0x0003003e, |
||||
0x00000012,0x0000001d,0x0004003d,0x00000008,0x00000020,0x0000000a,0x00050084,0x00000008, |
||||
0x00000021,0x0000001f,0x00000020,0x00050082,0x00000008,0x00000022,0x00000021,0x0000000e, |
||||
0x00060041,0x0000000f,0x00000023,0x00000016,0x00000017,0x00000022,0x0004003d,0x00000008, |
||||
0x00000024,0x00000023,0x0003003e,0x0000001e,0x00000024,0x0004003d,0x00000008,0x00000026, |
||||
0x0000000a,0x00050082,0x00000008,0x00000027,0x00000026,0x0000000e,0x00060041,0x0000000f, |
||||
0x00000028,0x00000016,0x00000017,0x00000027,0x0004003d,0x00000008,0x00000029,0x00000028, |
||||
0x0003003e,0x00000025,0x00000029,0x0004003d,0x00000008,0x0000002f,0x0000000a,0x00050082, |
||||
0x00000008,0x00000030,0x0000002f,0x0000000e,0x00060041,0x0000000f,0x00000031,0x0000002e, |
||||
0x00000017,0x00000030,0x0004003d,0x00000008,0x00000032,0x00000031,0x0003003e,0x0000002a, |
||||
0x00000032,0x0004003d,0x00000008,0x00000034,0x0000000a,0x00050082,0x00000008,0x00000035, |
||||
0x00000034,0x00000018,0x00060041,0x0000000f,0x00000036,0x0000002e,0x00000017,0x00000035, |
||||
0x0004003d,0x00000008,0x00000037,0x00000036,0x0003003e,0x00000033,0x00000037,0x00050041, |
||||
0x0000003e,0x0000003f,0x0000003c,0x0000003d,0x0004003d,0x00000039,0x00000040,0x0000003f, |
||||
0x0004007c,0x00000008,0x00000041,0x00000040,0x0003003e,0x00000038,0x00000041,0x0003003e, |
||||
0x00000042,0x00000017,0x0003003e,0x00000043,0x00000017,0x0003003e,0x00000044,0x00000017, |
||||
0x0004003d,0x00000008,0x00000046,0x00000038,0x0003003e,0x00000045,0x00000046,0x0004003d, |
||||
0x00000008,0x00000048,0x0000000a,0x00050082,0x00000008,0x00000049,0x00000048,0x0000001f, |
||||
0x0003003e,0x00000047,0x00000049,0x000200f9,0x0000004a,0x000200f8,0x0000004a,0x000400f6, |
||||
0x0000004c,0x0000004d,0x00000000,0x000200f9,0x0000004e,0x000200f8,0x0000004e,0x0004003d, |
||||
0x00000008,0x0000004f,0x00000047,0x000500af,0x00000050,0x00000051,0x0000004f,0x00000017, |
||||
0x000400fa,0x00000051,0x0000004b,0x0000004c,0x000200f8,0x0000004b,0x0004003d,0x00000008, |
||||
0x00000053,0x00000045,0x0004003d,0x00000008,0x00000054,0x00000047,0x00060041,0x0000000f, |
||||
0x00000055,0x0000002e,0x00000017,0x00000054,0x0004003d,0x00000008,0x00000056,0x00000055, |
||||
0x00050087,0x00000008,0x00000057,0x00000053,0x00000056,0x0003003e,0x00000052,0x00000057, |
||||
0x0004003d,0x00000008,0x00000059,0x00000045,0x0004003d,0x00000008,0x0000005a,0x00000052, |
||||
0x0004003d,0x00000008,0x0000005b,0x00000047,0x00060041,0x0000000f,0x0000005c,0x0000002e, |
||||
0x00000017,0x0000005b,0x0004003d,0x00000008,0x0000005d,0x0000005c,0x00050084,0x00000008, |
||||
0x0000005e,0x0000005a,0x0000005d,0x00050082,0x00000008,0x0000005f,0x00000059,0x0000005e, |
||||
0x0003003e,0x00000058,0x0000005f,0x0004003d,0x00000008,0x00000060,0x00000058,0x0004003d, |
||||
0x00000008,0x00000061,0x0000000a,0x0004003d,0x00000008,0x00000062,0x00000047,0x00050080, |
||||
0x00000008,0x00000063,0x00000061,0x00000062,0x00060041,0x0000000f,0x00000064,0x00000016, |
||||
0x00000017,0x00000063,0x0004003d,0x00000008,0x00000065,0x00000064,0x00050084,0x00000008, |
||||
0x00000066,0x00000060,0x00000065,0x0004003d,0x00000008,0x00000067,0x00000042,0x00050080, |
||||
0x00000008,0x00000068,0x00000067,0x00000066,0x0003003e,0x00000042,0x00000068,0x0004003d, |
||||
0x00000008,0x00000069,0x00000058,0x0004003d,0x00000008,0x0000006a,0x0000000a,0x00050084, |
||||
0x00000008,0x0000006b,0x00000018,0x0000006a,0x0004003d,0x00000008,0x0000006c,0x00000047, |
||||
0x00050080,0x00000008,0x0000006d,0x0000006b,0x0000006c,0x00060041,0x0000000f,0x0000006e, |
||||
0x00000016,0x00000017,0x0000006d,0x0004003d,0x00000008,0x0000006f,0x0000006e,0x00050084, |
||||
0x00000008,0x00000070,0x00000069,0x0000006f,0x0004003d,0x00000008,0x00000071,0x00000043, |
||||
0x00050080,0x00000008,0x00000072,0x00000071,0x00000070,0x0003003e,0x00000043,0x00000072, |
||||
0x0004003d,0x00000008,0x00000073,0x00000058,0x0004003d,0x00000008,0x00000074,0x00000047, |
||||
0x00060041,0x0000000f,0x00000075,0x00000016,0x00000017,0x00000074,0x0004003d,0x00000008, |
||||
0x00000076,0x00000075,0x00050084,0x00000008,0x00000077,0x00000073,0x00000076,0x0004003d, |
||||
0x00000008,0x00000078,0x00000044,0x00050080,0x00000008,0x00000079,0x00000078,0x00000077, |
||||
0x0003003e,0x00000044,0x00000079,0x0004003d,0x00000008,0x0000007a,0x00000052,0x0003003e, |
||||
0x00000045,0x0000007a,0x000200f9,0x0000004d,0x000200f8,0x0000004d,0x0004003d,0x00000008, |
||||
0x0000007b,0x00000047,0x00050082,0x00000008,0x0000007c,0x0000007b,0x0000000e,0x0003003e, |
||||
0x00000047,0x0000007c,0x000200f9,0x0000004a,0x000200f8,0x0000004c,0x00050041,0x0000003e, |
||||
0x0000007f,0x0000003c,0x0000007e,0x0004003d,0x00000039,0x00000080,0x0000007f,0x0004007c, |
||||
0x00000008,0x00000081,0x00000080,0x0003003e,0x0000007d,0x00000081,0x00050041,0x0000003e, |
||||
0x00000084,0x00000083,0x0000003d,0x0004003d,0x00000039,0x00000085,0x00000084,0x0004007c, |
||||
0x00000008,0x00000086,0x00000085,0x0003003e,0x00000082,0x00000086,0x0004003d,0x00000008, |
||||
0x00000087,0x0000007d,0x0004003d,0x00000008,0x00000088,0x0000000a,0x00050084,0x00000008, |
||||
0x00000089,0x00000018,0x00000088,0x00050082,0x00000008,0x0000008a,0x00000089,0x00000018, |
||||
0x00060041,0x0000000f,0x0000008b,0x00000016,0x00000017,0x0000008a,0x0004003d,0x00000008, |
||||
0x0000008c,0x0000008b,0x00050084,0x00000008,0x0000008d,0x00000087,0x0000008c,0x0004003d, |
||||
0x00000008,0x0000008e,0x00000042,0x00050080,0x00000008,0x0000008f,0x0000008e,0x0000008d, |
||||
0x0003003e,0x00000042,0x0000008f,0x0004003d,0x00000008,0x00000090,0x0000007d,0x0004003d, |
||||
0x00000008,0x00000091,0x0000000a,0x00050084,0x00000008,0x00000092,0x0000001f,0x00000091, |
||||
0x00050082,0x00000008,0x00000093,0x00000092,0x00000018,0x00060041,0x0000000f,0x00000094, |
||||
0x00000016,0x00000017,0x00000093,0x0004003d,0x00000008,0x00000095,0x00000094,0x00050084, |
||||
0x00000008,0x00000096,0x00000090,0x00000095,0x0004003d,0x00000008,0x00000097,0x00000043, |
||||
0x00050080,0x00000008,0x00000098,0x00000097,0x00000096,0x0003003e,0x00000043,0x00000098, |
||||
0x0004003d,0x00000008,0x00000099,0x0000007d,0x0004003d,0x00000008,0x0000009a,0x0000000a, |
||||
0x00050082,0x00000008,0x0000009b,0x0000009a,0x00000018,0x00060041,0x0000000f,0x0000009c, |
||||
0x00000016,0x00000017,0x0000009b,0x0004003d,0x00000008,0x0000009d,0x0000009c,0x00050084, |
||||
0x00000008,0x0000009e,0x00000099,0x0000009d,0x0004003d,0x00000008,0x0000009f,0x00000044, |
||||
0x00050080,0x00000008,0x000000a0,0x0000009f,0x0000009e,0x0003003e,0x00000044,0x000000a0, |
||||
0x0004003d,0x00000008,0x000000a2,0x00000082,0x0003003e,0x000000a1,0x000000a2,0x000200f9, |
||||
0x000000a3,0x000200f8,0x000000a3,0x000400f6,0x000000a5,0x000000a6,0x00000000,0x000200f9, |
||||
0x000000a7,0x000200f8,0x000000a7,0x0004003d,0x00000008,0x000000a8,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000a9,0x0000002a,0x000500b1,0x00000050,0x000000aa,0x000000a8,0x000000a9, |
||||
0x000400fa,0x000000aa,0x000000a4,0x000000a5,0x000200f8,0x000000a4,0x00050041,0x0000000f, |
||||
0x000000ab,0x0000000d,0x00000017,0x0004003d,0x00000008,0x000000ac,0x000000ab,0x000300f7, |
||||
0x000000b1,0x00000000,0x000b00fb,0x000000ac,0x000000b1,0x00000012,0x000000ad,0x00000010, |
||||
0x000000ae,0x0000000f,0x000000af,0x00000013,0x000000b0,0x000200f8,0x000000ad,0x0004003d, |
||||
0x00000008,0x000000b7,0x00000044,0x0004003d,0x00000008,0x000000b8,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000b9,0x00000025,0x00050084,0x00000008,0x000000ba,0x000000b8,0x000000b9, |
||||
0x00050080,0x00000008,0x000000bb,0x000000b7,0x000000ba,0x0004003d,0x00000008,0x000000c0, |
||||
0x00000042,0x0004003d,0x00000008,0x000000c1,0x000000a1,0x0004003d,0x00000008,0x000000c2, |
||||
0x00000012,0x00050084,0x00000008,0x000000c3,0x000000c1,0x000000c2,0x00050080,0x00000008, |
||||
0x000000c4,0x000000c0,0x000000c3,0x00060041,0x000000c5,0x000000c6,0x000000bf,0x00000017, |
||||
0x000000c4,0x0004003d,0x000000b2,0x000000c7,0x000000c6,0x0004003d,0x00000008,0x000000cc, |
||||
0x00000043,0x0004003d,0x00000008,0x000000cd,0x000000a1,0x0004003d,0x00000008,0x000000ce, |
||||
0x0000001e,0x00050084,0x00000008,0x000000cf,0x000000cd,0x000000ce,0x00050080,0x00000008, |
||||
0x000000d0,0x000000cc,0x000000cf,0x00060041,0x000000c5,0x000000d1,0x000000cb,0x00000017, |
||||
0x000000d0,0x0004003d,0x000000b2,0x000000d2,0x000000d1,0x00050081,0x000000b2,0x000000d3, |
||||
0x000000c7,0x000000d2,0x00060041,0x000000c5,0x000000d4,0x000000b6,0x00000017,0x000000bb, |
||||
0x0003003e,0x000000d4,0x000000d3,0x000200f9,0x000000b1,0x000200f8,0x000000ae,0x0004003d, |
||||
0x00000008,0x000000d6,0x00000044,0x0004003d,0x00000008,0x000000d7,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000d8,0x00000025,0x00050084,0x00000008,0x000000d9,0x000000d7,0x000000d8, |
||||
0x00050080,0x00000008,0x000000da,0x000000d6,0x000000d9,0x0004003d,0x00000008,0x000000db, |
||||
0x00000042,0x0004003d,0x00000008,0x000000dc,0x000000a1,0x0004003d,0x00000008,0x000000dd, |
||||
0x00000012,0x00050084,0x00000008,0x000000de,0x000000dc,0x000000dd,0x00050080,0x00000008, |
||||
0x000000df,0x000000db,0x000000de,0x00060041,0x000000c5,0x000000e0,0x000000bf,0x00000017, |
||||
0x000000df,0x0004003d,0x000000b2,0x000000e1,0x000000e0,0x0004003d,0x00000008,0x000000e2, |
||||
0x00000043,0x0004003d,0x00000008,0x000000e3,0x000000a1,0x0004003d,0x00000008,0x000000e4, |
||||
0x0000001e,0x00050084,0x00000008,0x000000e5,0x000000e3,0x000000e4,0x00050080,0x00000008, |
||||
0x000000e6,0x000000e2,0x000000e5,0x00060041,0x000000c5,0x000000e7,0x000000cb,0x00000017, |
||||
0x000000e6,0x0004003d,0x000000b2,0x000000e8,0x000000e7,0x00050083,0x000000b2,0x000000e9, |
||||
0x000000e1,0x000000e8,0x00060041,0x000000c5,0x000000ea,0x000000b6,0x00000017,0x000000da, |
||||
0x0003003e,0x000000ea,0x000000e9,0x000200f9,0x000000b1,0x000200f8,0x000000af,0x0004003d, |
||||
0x00000008,0x000000ec,0x00000044,0x0004003d,0x00000008,0x000000ed,0x000000a1,0x0004003d, |
||||
0x00000008,0x000000ee,0x00000025,0x00050084,0x00000008,0x000000ef,0x000000ed,0x000000ee, |
||||
0x00050080,0x00000008,0x000000f0,0x000000ec,0x000000ef,0x0004003d,0x00000008,0x000000f1, |
||||
0x00000042,0x0004003d,0x00000008,0x000000f2,0x000000a1,0x0004003d,0x00000008,0x000000f3, |
||||
0x00000012,0x00050084,0x00000008,0x000000f4,0x000000f2,0x000000f3,0x00050080,0x00000008, |
||||
0x000000f5,0x000000f1,0x000000f4,0x00060041,0x000000c5,0x000000f6,0x000000bf,0x00000017, |
||||
0x000000f5,0x0004003d,0x000000b2,0x000000f7,0x000000f6,0x0004003d,0x00000008,0x000000f8, |
||||
0x00000043,0x0004003d,0x00000008,0x000000f9,0x000000a1,0x0004003d,0x00000008,0x000000fa, |
||||
0x0000001e,0x00050084,0x00000008,0x000000fb,0x000000f9,0x000000fa,0x00050080,0x00000008, |
||||
0x000000fc,0x000000f8,0x000000fb,0x00060041,0x000000c5,0x000000fd,0x000000cb,0x00000017, |
||||
0x000000fc,0x0004003d,0x000000b2,0x000000fe,0x000000fd,0x00050085,0x000000b2,0x000000ff, |
||||
0x000000f7,0x000000fe,0x00060041,0x000000c5,0x00000100,0x000000b6,0x00000017,0x000000f0, |
||||
0x0003003e,0x00000100,0x000000ff,0x000200f9,0x000000b1,0x000200f8,0x000000b0,0x0004003d, |
||||
0x00000008,0x00000102,0x00000044,0x0004003d,0x00000008,0x00000103,0x000000a1,0x0004003d, |
||||
0x00000008,0x00000104,0x00000025,0x00050084,0x00000008,0x00000105,0x00000103,0x00000104, |
||||
0x00050080,0x00000008,0x00000106,0x00000102,0x00000105,0x0004003d,0x00000008,0x00000107, |
||||
0x00000042,0x0004003d,0x00000008,0x00000108,0x000000a1,0x0004003d,0x00000008,0x00000109, |
||||
0x00000012,0x00050084,0x00000008,0x0000010a,0x00000108,0x00000109,0x00050080,0x00000008, |
||||
0x0000010b,0x00000107,0x0000010a,0x00060041,0x000000c5,0x0000010c,0x000000bf,0x00000017, |
||||
0x0000010b,0x0004003d,0x000000b2,0x0000010d,0x0000010c,0x0004003d,0x00000008,0x0000010e, |
||||
0x00000043,0x0004003d,0x00000008,0x0000010f,0x000000a1,0x0004003d,0x00000008,0x00000110, |
||||
0x0000001e,0x00050084,0x00000008,0x00000111,0x0000010f,0x00000110,0x00050080,0x00000008, |
||||
0x00000112,0x0000010e,0x00000111,0x00060041,0x000000c5,0x00000113,0x000000cb,0x00000017, |
||||
0x00000112,0x0004003d,0x000000b2,0x00000114,0x00000113,0x00050088,0x000000b2,0x00000115, |
||||
0x0000010d,0x00000114,0x00060041,0x000000c5,0x00000116,0x000000b6,0x00000017,0x00000106, |
||||
0x0003003e,0x00000116,0x00000115,0x000200f9,0x000000b1,0x000200f8,0x000000b1,0x000200f9, |
||||
0x000000a6,0x000200f8,0x000000a6,0x0004003d,0x00000008,0x0000011a,0x000000a1,0x00050080, |
||||
0x00000008,0x0000011b,0x0000011a,0x00000119,0x0003003e,0x000000a1,0x0000011b,0x000200f9, |
||||
0x000000a3,0x000200f8,0x000000a5,0x000100fd,0x00010038 |
||||
}; |
||||
|
||||
}}} // namespace cv::dnn::vkcom
|
@ -0,0 +1,197 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../../precomp.hpp" |
||||
#include "internal.hpp" |
||||
#include "../include/op_naryeltwise.hpp" |
||||
|
||||
namespace cv { namespace dnn { namespace vkcom { |
||||
|
||||
#ifdef HAVE_VULKAN |
||||
|
||||
#define STEP_SIZE 65536 |
||||
|
||||
#define MAX_GROUP_COUNT_X 65535 |
||||
#define MAX_GROUP_COUNT_Y 65535 |
||||
#define MAX_GROUP_COUNT_Z 65535 |
||||
|
||||
OpNary::OpNary(const OpNary::OPERATION _naryOpType, int _ninputs, int _max_ndims, |
||||
const std::vector<std::vector<int>> shapes, const std::vector<std::vector<size_t>> steps) |
||||
: naryOpType(_naryOpType), ninputs(_ninputs), max_ndims(_max_ndims) |
||||
{ |
||||
CV_Assert(ninputs > 1); |
||||
|
||||
shapesBuf.resize((ninputs + 1) * max_ndims); |
||||
stepsBuf.resize((ninputs + 1) * max_ndims); |
||||
for (int i = 0; i <= ninputs; i++) |
||||
{ |
||||
std::copy(shapes[i].begin(), shapes[i].end(), shapesBuf.data() + i * max_ndims); |
||||
std::copy(steps[i].begin(), steps[i].end(), stepsBuf.data() + i * max_ndims); |
||||
} |
||||
|
||||
// TODO(VK): support more types of operation
|
||||
switch(naryOpType) { |
||||
// case OPERATION::EQUAL:
|
||||
// case OPERATION::GREATER:
|
||||
// case OPERATION::GREATER_EQUAL:
|
||||
// case OPERATION::LESS:
|
||||
// case OPERATION::LESS_EQUAL:
|
||||
// case OPERATION::POW:
|
||||
// case OPERATION::BITSHIFT:
|
||||
// case OPERATION::MOD:
|
||||
case OPERATION::PROD: |
||||
case OPERATION::SUB: |
||||
case OPERATION::ADD: |
||||
case OPERATION::DIV: |
||||
// case OPERATION::AND:
|
||||
// case OPERATION::OR:
|
||||
// case OPERATION::XOR:
|
||||
{ |
||||
CV_Assert(ninputs == 2); |
||||
CV_Assert(max_ndims >= 2); |
||||
shaderType = kNaryShaderTypeBinary; |
||||
shader_name = "nary_eltwise_binary_forward_spv"; |
||||
|
||||
// TODO(VK): confirm if this makes any sense
|
||||
nplanes = std::accumulate(shapesBuf.data(), shapesBuf.data() + max_ndims - 2, 1, [](int32_t a, int32_t b) { return a * b; } ); |
||||
N2 = shapesBuf.data()[max_ndims - 2]; |
||||
N1 = shapesBuf.data()[max_ndims - 1]; |
||||
CV_LOG_DEBUG(NULL, "max_ndims="<<max_ndims<<", nplanes="<<nplanes<<", N2="<<N2<<", N1="<<N1); |
||||
break; |
||||
} |
||||
case OPERATION::WHERE: |
||||
{ |
||||
CV_Assert(ninputs == 3); |
||||
CV_Assert(max_ndims >= 2); |
||||
shaderType = kNaryShaderTypeTrinary; |
||||
shader_name = "nary_eltwise_trinary_forward_spv"; |
||||
break; |
||||
} |
||||
// case OPERATION::MAX:
|
||||
// case OPERATION::MEAN:
|
||||
// case OPERATION::MIN:
|
||||
case OPERATION::SUM: |
||||
{ |
||||
CV_Assert(max_ndims >= 2); |
||||
shaderType = kNaryShaderTypeNary; |
||||
shader_name = "nary_eltwise_nary_forward_spv"; |
||||
break; |
||||
} |
||||
//TODO(VK) add other cases
|
||||
default: |
||||
CV_Error(Error::StsNotImplemented, "Unsupported nary operation type"); |
||||
} |
||||
// TODO(VK): initialize OpNary class
|
||||
} |
||||
|
||||
void OpNary::firstForward() |
||||
{ |
||||
if (!firstForwardFinsh) |
||||
{ |
||||
config.local_size_x = 1; // TODO(vk) determine local_size_y if necessary
|
||||
config.local_size_y = 1; // TODO(vk) determine local_size_y if necessary
|
||||
config.local_size_z = 1; // TODO(vk) determine local_size_z if necessary
|
||||
computeGroupCount(); |
||||
firstForwardFinsh = true; |
||||
} |
||||
else |
||||
return; |
||||
} |
||||
|
||||
bool OpNary::binaryForward(std::vector<Tensor>& ins, std::vector<Tensor>& outs) |
||||
{ |
||||
std::vector<int32_t> param = {(int32_t)naryOpType, max_ndims}; |
||||
std::vector<int32_t> paramSize = {(int32_t)param.size()}; |
||||
std::vector<int32_t> dimSizes = {(ninputs + 1) * max_ndims}; |
||||
std::vector<int32_t> actualSteps; |
||||
|
||||
// TODO(VK): compute step for different dtype. Currently this is for kFormatFp32.
|
||||
actualSteps.resize(stepsBuf.size()); |
||||
std::transform(stepsBuf.data(), stepsBuf.data() + dimSizes[0], actualSteps.begin(), [](int32_t sz){ return sz / 4; }); |
||||
|
||||
Tensor paramTensor = Tensor(reinterpret_cast<const char *>(param.data()), paramSize, kFormatInt32, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); |
||||
Tensor shapeTensor = Tensor(reinterpret_cast<const char *>(shapesBuf.data()), dimSizes, kFormatInt32, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); |
||||
Tensor stepTensor = Tensor(reinterpret_cast<const char *>(actualSteps.data()), dimSizes, kFormatInt32, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); |
||||
|
||||
destTypes = { |
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // input1
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // input2
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // out
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // param
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // shape
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // step
|
||||
}; |
||||
|
||||
|
||||
Ptr<Pipeline> pipeline = pipelineFactoryPtr->getPipeline(shader_name, destTypes); |
||||
Ptr<CommandBuffer> cmdBuffer = cmdPoolPtr->allocBuffer(); |
||||
Ptr<Descriptor> desSet = pipeline->createSet(); |
||||
VkCommandBuffer cmdBufferReal = cmdBuffer->get(); |
||||
|
||||
desSet->writeTensor(ins[0], 0); |
||||
desSet->writeTensor(ins[1], 1); |
||||
desSet->writeTensor(outs[0], 2); |
||||
desSet->writeTensor(paramTensor, 3); |
||||
desSet->writeTensor(shapeTensor, 4); |
||||
desSet->writeTensor(stepTensor, 5); |
||||
|
||||
cmdBuffer->beginRecord(); |
||||
pipeline->bind(cmdBufferReal, desSet->get()); |
||||
vkCmdDispatch(cmdBufferReal, group_x_, group_y_, group_z_); |
||||
cmdBuffer->endRecord(); |
||||
cmdPoolPtr->submitAndWait(cmdBufferReal); |
||||
|
||||
return true; |
||||
} |
||||
|
||||
bool OpNary::forward(std::vector<Tensor>& ins, std::vector<Tensor>& outs) |
||||
{ |
||||
|
||||
firstForward(); |
||||
|
||||
// TODO(VK): Support more dtypes. Currently only kFormatFp32 is supported.
|
||||
for (auto &tensor: ins) |
||||
{ |
||||
CV_Assert(tensor.getFormat() == kFormatFp32); |
||||
} |
||||
for (auto &tensor: outs) |
||||
{ |
||||
CV_Assert(tensor.getFormat() == kFormatFp32); |
||||
} |
||||
|
||||
switch(shaderType) { |
||||
case kNaryShaderTypeBinary: { |
||||
return binaryForward(ins, outs); |
||||
break; |
||||
} |
||||
default: |
||||
CV_Error(Error::StsNotImplemented, "Unsupported shader type invoked."); |
||||
} |
||||
|
||||
return true; |
||||
} |
||||
|
||||
bool OpNary::computeGroupCount() |
||||
{ |
||||
if (shaderType == kNaryShaderTypeBinary) |
||||
{ |
||||
group_x_ = nplanes; // parallelism at plane level
|
||||
group_y_ = N2; |
||||
group_z_ = 1; |
||||
} |
||||
else |
||||
{ |
||||
CV_Error(CV_StsNotImplemented, "shader type is not supported at compute GroupCount."); |
||||
} |
||||
|
||||
CV_Assert(group_x_ <= MAX_GROUP_COUNT_X); |
||||
CV_Assert(group_y_ <= MAX_GROUP_COUNT_Y); |
||||
CV_Assert(group_z_ <= MAX_GROUP_COUNT_Z); |
||||
|
||||
return true; |
||||
} |
||||
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
}}} // namespace cv::dnn::vkcom
|
@ -0,0 +1,18 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#include <opencv2/gapi/gcommon.hpp> |
||||
#include <opencv2/core/utility.hpp> |
||||
|
||||
cv::use_threaded_executor::use_threaded_executor() |
||||
: num_threads(cv::getNumThreads()) { |
||||
} |
||||
|
||||
cv::use_threaded_executor::use_threaded_executor(const uint32_t nthreads) |
||||
: num_threads(nthreads) { |
||||
} |
@ -0,0 +1,511 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#include <ade/util/zip_range.hpp> |
||||
|
||||
#include <opencv2/gapi/opencv_includes.hpp> |
||||
|
||||
#include "api/gproto_priv.hpp" // ptr(GRunArgP) |
||||
#include "executor/gthreadedexecutor.hpp" |
||||
#include "compiler/passes/passes.hpp" |
||||
|
||||
namespace cv { |
||||
namespace gimpl { |
||||
namespace magazine { |
||||
namespace { |
||||
|
||||
void bindInArgExec(Mag& mag, const RcDesc &rc, const GRunArg &arg) { |
||||
if (rc.shape != GShape::GMAT) { |
||||
bindInArg(mag, rc, arg); |
||||
return; |
||||
} |
||||
auto& mag_rmat = mag.template slot<cv::RMat>()[rc.id]; |
||||
switch (arg.index()) { |
||||
case GRunArg::index_of<Mat>() : |
||||
mag_rmat = make_rmat<RMatOnMat>(util::get<Mat>(arg)); |
||||
break; |
||||
case GRunArg::index_of<cv::RMat>() : |
||||
mag_rmat = util::get<cv::RMat>(arg); |
||||
break; |
||||
default: util::throw_error(std::logic_error("content type of the runtime argument does not match to resource description ?")); |
||||
} |
||||
// FIXME: has to take extra care about meta here for this particuluar
|
||||
// case, just because this function exists at all
|
||||
mag.meta<cv::RMat>()[rc.id] = arg.meta; |
||||
} |
||||
|
||||
void bindOutArgExec(Mag& mag, const RcDesc &rc, const GRunArgP &arg) { |
||||
if (rc.shape != GShape::GMAT) { |
||||
bindOutArg(mag, rc, arg); |
||||
return; |
||||
} |
||||
auto& mag_rmat = mag.template slot<cv::RMat>()[rc.id]; |
||||
switch (arg.index()) { |
||||
case GRunArgP::index_of<Mat*>() : |
||||
mag_rmat = make_rmat<RMatOnMat>(*util::get<Mat*>(arg)); break; |
||||
case GRunArgP::index_of<cv::RMat*>() : |
||||
mag_rmat = *util::get<cv::RMat*>(arg); break; |
||||
default: util::throw_error(std::logic_error("content type of the runtime argument does not match to resource description ?")); |
||||
} |
||||
} |
||||
|
||||
cv::GRunArgP getObjPtrExec(Mag& mag, const RcDesc &rc) { |
||||
if (rc.shape != GShape::GMAT) { |
||||
return getObjPtr(mag, rc); |
||||
} |
||||
return GRunArgP(&mag.slot<cv::RMat>()[rc.id]); |
||||
} |
||||
|
||||
void writeBackExec(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg) { |
||||
if (rc.shape != GShape::GMAT) { |
||||
writeBack(mag, rc, g_arg); |
||||
return; |
||||
} |
||||
|
||||
switch (g_arg.index()) { |
||||
case GRunArgP::index_of<cv::Mat*>() : { |
||||
// If there is a copy intrinsic at the end of the graph
|
||||
// we need to actually copy the data to the user buffer
|
||||
// since output runarg was optimized to simply point
|
||||
// to the input of the copy kernel
|
||||
// FIXME:
|
||||
// Rework, find a better way to check if there should be
|
||||
// a real copy (add a pass to StreamingBackend?)
|
||||
// NB: In case RMat adapter not equal to "RMatOnMat" need to
|
||||
// copy data back to the host as well.
|
||||
auto& out_mat = *util::get<cv::Mat*>(g_arg); |
||||
const auto& rmat = mag.template slot<cv::RMat>().at(rc.id); |
||||
auto* adapter = rmat.get<RMatOnMat>(); |
||||
if ((adapter != nullptr && out_mat.data != adapter->data()) || |
||||
(adapter == nullptr)) { |
||||
auto view = rmat.access(RMat::Access::R); |
||||
asMat(view).copyTo(out_mat); |
||||
} |
||||
break; |
||||
} |
||||
case GRunArgP::index_of<cv::RMat*>() : /* do nothing */ break; |
||||
default: util::throw_error(std::logic_error("content type of the runtime argument does not match to resource description ?")); |
||||
} |
||||
} |
||||
|
||||
void assignMetaStubExec(Mag& mag, const RcDesc &rc, const cv::GRunArg::Meta &meta) { |
||||
switch (rc.shape) { |
||||
case GShape::GARRAY: mag.meta<cv::detail::VectorRef>()[rc.id] = meta; break; |
||||
case GShape::GOPAQUE: mag.meta<cv::detail::OpaqueRef>()[rc.id] = meta; break; |
||||
case GShape::GSCALAR: mag.meta<cv::Scalar>()[rc.id] = meta; break; |
||||
case GShape::GFRAME: mag.meta<cv::MediaFrame>()[rc.id] = meta; break; |
||||
case GShape::GMAT: |
||||
mag.meta<cv::Mat>() [rc.id] = meta; |
||||
mag.meta<cv::RMat>()[rc.id] = meta; |
||||
#if !defined(GAPI_STANDALONE) |
||||
mag.meta<cv::UMat>()[rc.id] = meta; |
||||
#endif |
||||
break; |
||||
default: util::throw_error(std::logic_error("Unsupported GShape type")); break; |
||||
} |
||||
} |
||||
|
||||
} // anonymous namespace
|
||||
}}} // namespace cv::gimpl::magazine
|
||||
|
||||
cv::gimpl::StreamMsg cv::gimpl::GThreadedExecutor::Input::get() { |
||||
std::lock_guard<std::mutex> lock{m_state.m}; |
||||
cv::GRunArgs res; |
||||
for (const auto &rc : desc()) { res.emplace_back(magazine::getArg(m_state.mag, rc)); } |
||||
return cv::gimpl::StreamMsg{std::move(res)}; |
||||
} |
||||
|
||||
cv::gimpl::GThreadedExecutor::Input::Input(cv::gimpl::GraphState &state, |
||||
const std::vector<RcDesc> &rcs) |
||||
: m_state(state) { |
||||
set(rcs); |
||||
}; |
||||
|
||||
cv::GRunArgP cv::gimpl::GThreadedExecutor::Output::get(int idx) { |
||||
std::lock_guard<std::mutex> lock{m_state.m}; |
||||
auto r = magazine::getObjPtrExec(m_state.mag, desc()[idx]); |
||||
// Remember the output port for this output object
|
||||
m_out_idx[cv::gimpl::proto::ptr(r)] = idx; |
||||
return r; |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::Output::post(cv::GRunArgP&&, const std::exception_ptr& e) { |
||||
if (e) { |
||||
m_eptr = e; |
||||
} |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::Output::post(Exception&& ex) { |
||||
m_eptr = std::move(ex.eptr); |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::Output::meta(const GRunArgP &out, const GRunArg::Meta &m) { |
||||
const auto idx = m_out_idx.at(cv::gimpl::proto::ptr(out)); |
||||
std::lock_guard<std::mutex> lock{m_state.m}; |
||||
magazine::assignMetaStubExec(m_state.mag, desc()[idx], m); |
||||
} |
||||
|
||||
cv::gimpl::GThreadedExecutor::Output::Output(cv::gimpl::GraphState &state, |
||||
const std::vector<RcDesc> &rcs) |
||||
: m_state(state) { |
||||
set(rcs); |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::Output::verify() { |
||||
if (m_eptr) { |
||||
std::rethrow_exception(m_eptr); |
||||
} |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::initResource(const ade::NodeHandle &nh, const ade::NodeHandle &orig_nh) { |
||||
const Data &d = m_gm.metadata(orig_nh).get<Data>(); |
||||
|
||||
if ( d.storage != Data::Storage::INTERNAL |
||||
&& d.storage != Data::Storage::CONST_VAL) { |
||||
return; |
||||
} |
||||
|
||||
// INTERNALS+CONST only! no need to allocate/reset output objects
|
||||
// to as it is bound externally (e.g. already in the m_state.mag)
|
||||
|
||||
switch (d.shape) { |
||||
case GShape::GMAT: { |
||||
// Let island allocate it's outputs if it can,
|
||||
// allocate cv::Mat and wrap it with RMat otherwise
|
||||
GAPI_Assert(!nh->inNodes().empty()); |
||||
const auto desc = util::get<cv::GMatDesc>(d.meta); |
||||
auto& exec = m_gim.metadata(nh->inNodes().front()).get<IslandExec>().object; |
||||
auto& rmat = m_state.mag.slot<cv::RMat>()[d.rc]; |
||||
if (exec->allocatesOutputs()) { |
||||
rmat = exec->allocate(desc); |
||||
} else { |
||||
Mat mat; |
||||
createMat(desc, mat); |
||||
rmat = make_rmat<RMatOnMat>(mat); |
||||
} |
||||
} |
||||
break; |
||||
|
||||
case GShape::GSCALAR: |
||||
if (d.storage == Data::Storage::CONST_VAL) { |
||||
auto rc = RcDesc{d.rc, d.shape, d.ctor}; |
||||
magazine::bindInArg(m_state.mag, rc, m_gm.metadata(orig_nh).get<ConstValue>().arg); |
||||
} |
||||
break; |
||||
|
||||
case GShape::GARRAY: |
||||
if (d.storage == Data::Storage::CONST_VAL) { |
||||
auto rc = RcDesc{d.rc, d.shape, d.ctor}; |
||||
magazine::bindInArg(m_state.mag, rc, m_gm.metadata(orig_nh).get<ConstValue>().arg); |
||||
} |
||||
break; |
||||
case GShape::GOPAQUE: |
||||
// Constructed on Reset, do nothing here
|
||||
break; |
||||
case GShape::GFRAME: { |
||||
// Should be defined by backend, do nothing here
|
||||
break; |
||||
} |
||||
default: |
||||
GAPI_Error("InternalError"); |
||||
} |
||||
} |
||||
|
||||
cv::gimpl::IslandActor::IslandActor(const std::vector<RcDesc> &in_objects, |
||||
const std::vector<RcDesc> &out_objects, |
||||
std::shared_ptr<GIslandExecutable> isl_exec, |
||||
cv::gimpl::GraphState &state) |
||||
: m_isl_exec(isl_exec), |
||||
m_inputs(state, in_objects), |
||||
m_outputs(state, out_objects) { |
||||
} |
||||
|
||||
void cv::gimpl::IslandActor::run() { |
||||
m_isl_exec->run(m_inputs, m_outputs); |
||||
} |
||||
|
||||
void cv::gimpl::IslandActor::verify() { |
||||
m_outputs.verify(); |
||||
}; |
||||
|
||||
class cv::gimpl::Task { |
||||
friend class TaskManager; |
||||
public: |
||||
using Ptr = std::shared_ptr<Task>; |
||||
Task(TaskManager::F&& f, std::vector<Task::Ptr> &&producers); |
||||
|
||||
struct ExecutionState { |
||||
cv::gapi::own::ThreadPool& tp; |
||||
cv::gapi::own::Latch& latch; |
||||
}; |
||||
|
||||
void run(ExecutionState& state); |
||||
bool isLast() const { return m_consumers.empty(); } |
||||
void reset() { m_ready_producers.store(0u); } |
||||
|
||||
private: |
||||
TaskManager::F m_f; |
||||
const uint32_t m_num_producers; |
||||
std::atomic<uint32_t> m_ready_producers; |
||||
std::vector<Task*> m_consumers; |
||||
}; |
||||
|
||||
cv::gimpl::Task::Task(TaskManager::F &&f, |
||||
std::vector<Task::Ptr> &&producers) |
||||
: m_f(std::move(f)), |
||||
m_num_producers(static_cast<uint32_t>(producers.size())) { |
||||
for (auto producer : producers) { |
||||
producer->m_consumers.push_back(this); |
||||
} |
||||
} |
||||
|
||||
void cv::gimpl::Task::run(ExecutionState& state) { |
||||
// Execute the task
|
||||
m_f(); |
||||
// Notify every consumer about completion one of its dependencies
|
||||
for (auto* consumer : m_consumers) { |
||||
const auto num_ready = |
||||
consumer->m_ready_producers.fetch_add(1, std::memory_order_relaxed) + 1; |
||||
// The last completed producer schedule the consumer for execution
|
||||
if (num_ready == consumer->m_num_producers) { |
||||
state.tp.schedule([&state, consumer](){ |
||||
consumer->run(state); |
||||
}); |
||||
} |
||||
} |
||||
// If tasks has no consumers this is the last task
|
||||
// Execution lasts until all last tasks are completed
|
||||
// Decrement the latch to notify about completion
|
||||
if (isLast()) { |
||||
state.latch.count_down(); |
||||
} |
||||
} |
||||
|
||||
std::shared_ptr<cv::gimpl::Task> |
||||
cv::gimpl::TaskManager::createTask(cv::gimpl::TaskManager::F &&f, |
||||
std::vector<std::shared_ptr<cv::gimpl::Task>> &&producers) { |
||||
const bool is_initial = producers.empty(); |
||||
auto task = std::make_shared<cv::gimpl::Task>(std::move(f), |
||||
std::move(producers)); |
||||
m_all_tasks.emplace_back(task); |
||||
if (is_initial) { |
||||
m_initial_tasks.emplace_back(task); |
||||
} |
||||
return task; |
||||
} |
||||
|
||||
void cv::gimpl::TaskManager::scheduleAndWait(cv::gapi::own::ThreadPool& tp) { |
||||
// Reset the number of ready dependencies for all tasks
|
||||
for (auto& task : m_all_tasks) { task->reset(); } |
||||
|
||||
// Count the number of last tasks
|
||||
auto isLast = [](const std::shared_ptr<Task>& task) { return task->isLast(); }; |
||||
const auto kNumLastsTasks = |
||||
std::count_if(m_all_tasks.begin(), m_all_tasks.end(), isLast); |
||||
|
||||
// Initialize the latch, schedule initial tasks
|
||||
// and wait until all lasts tasks are done
|
||||
cv::gapi::own::Latch latch(kNumLastsTasks); |
||||
Task::ExecutionState state{tp, latch}; |
||||
for (auto task : m_initial_tasks) { |
||||
state.tp.schedule([&state, task](){ task->run(state); }); |
||||
} |
||||
latch.wait(); |
||||
} |
||||
|
||||
cv::gimpl::GThreadedExecutor::GThreadedExecutor(const uint32_t num_threads, |
||||
std::unique_ptr<ade::Graph> &&g_model) |
||||
: GAbstractExecutor(std::move(g_model)), |
||||
m_thread_pool(num_threads) { |
||||
auto sorted = m_gim.metadata().get<ade::passes::TopologicalSortData>(); |
||||
|
||||
std::unordered_map< ade::NodeHandle |
||||
, std::shared_ptr<Task> |
||||
, ade::HandleHasher<ade::Node>> m_tasks_map; |
||||
for (auto nh : sorted.nodes()) |
||||
{ |
||||
switch (m_gim.metadata(nh).get<NodeKind>().k) |
||||
{ |
||||
case NodeKind::ISLAND: |
||||
{ |
||||
std::vector<RcDesc> input_rcs; |
||||
std::vector<RcDesc> output_rcs; |
||||
input_rcs.reserve(nh->inNodes().size()); |
||||
output_rcs.reserve(nh->outNodes().size()); |
||||
|
||||
auto xtract = [&](ade::NodeHandle slot_nh, std::vector<RcDesc> &vec) { |
||||
const auto orig_data_nh |
||||
= m_gim.metadata(slot_nh).get<DataSlot>().original_data_node; |
||||
const auto &orig_data_info |
||||
= m_gm.metadata(orig_data_nh).get<Data>(); |
||||
vec.emplace_back(RcDesc{ orig_data_info.rc |
||||
, orig_data_info.shape |
||||
, orig_data_info.ctor}); |
||||
}; |
||||
for (auto in_slot_nh : nh->inNodes()) xtract(in_slot_nh, input_rcs); |
||||
for (auto out_slot_nh : nh->outNodes()) xtract(out_slot_nh, output_rcs); |
||||
|
||||
auto actor = std::make_shared<IslandActor>(std::move(input_rcs), |
||||
std::move(output_rcs), |
||||
m_gim.metadata(nh).get<IslandExec>().object, |
||||
m_state); |
||||
m_actors.push_back(actor); |
||||
|
||||
std::unordered_set<ade::NodeHandle, ade::HandleHasher<ade::Node>> producer_nhs; |
||||
for (auto slot_nh : nh->inNodes()) { |
||||
for (auto island_nh : slot_nh->inNodes()) { |
||||
GAPI_Assert(m_gim.metadata(island_nh).get<NodeKind>().k == NodeKind::ISLAND); |
||||
producer_nhs.emplace(island_nh); |
||||
} |
||||
} |
||||
std::vector<std::shared_ptr<Task>> producers; |
||||
producers.reserve(producer_nhs.size()); |
||||
for (auto producer_nh : producer_nhs) { |
||||
producers.push_back(m_tasks_map.at(producer_nh)); |
||||
} |
||||
auto task = m_task_manager.createTask( |
||||
[actor](){actor->run();}, std::move(producers)); |
||||
m_tasks_map.emplace(nh, task); |
||||
} |
||||
break; |
||||
|
||||
case NodeKind::SLOT: |
||||
{ |
||||
const auto orig_data_nh |
||||
= m_gim.metadata(nh).get<DataSlot>().original_data_node; |
||||
initResource(nh, orig_data_nh); |
||||
m_slots.emplace_back(DataDesc{nh, orig_data_nh}); |
||||
} |
||||
break; |
||||
|
||||
default: |
||||
GAPI_Error("InternalError"); |
||||
break; |
||||
} // switch(kind)
|
||||
} // for(gim nodes)
|
||||
|
||||
prepareForNewStream(); |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::run(cv::gimpl::GRuntimeArgs &&args) { |
||||
const auto proto = m_gm.metadata().get<Protocol>(); |
||||
|
||||
// Basic check if input/output arguments are correct
|
||||
// FIXME: Move to GCompiled (do once for all GExecutors)
|
||||
if (proto.inputs.size() != args.inObjs.size()) { // TODO: Also check types
|
||||
util::throw_error(std::logic_error |
||||
("Computation's input protocol doesn\'t " |
||||
"match actual arguments!")); |
||||
} |
||||
if (proto.outputs.size() != args.outObjs.size()) { // TODO: Also check types
|
||||
util::throw_error(std::logic_error |
||||
("Computation's output protocol doesn\'t " |
||||
"match actual arguments!")); |
||||
} |
||||
|
||||
namespace util = ade::util; |
||||
|
||||
// ensure that output Mat parameters are correctly allocated
|
||||
// FIXME: avoid copy of NodeHandle and GRunRsltComp ?
|
||||
for (auto index : util::iota(proto.out_nhs.size())) { |
||||
auto& nh = proto.out_nhs.at(index); |
||||
const Data &d = m_gm.metadata(nh).get<Data>(); |
||||
if (d.shape == GShape::GMAT) { |
||||
using cv::util::get; |
||||
const auto desc = get<cv::GMatDesc>(d.meta); |
||||
|
||||
auto check_rmat = [&desc, &args, &index]() { |
||||
auto& out_mat = *get<cv::RMat*>(args.outObjs.at(index)); |
||||
GAPI_Assert(desc.canDescribe(out_mat)); |
||||
}; |
||||
|
||||
#if !defined(GAPI_STANDALONE) |
||||
// Building as part of OpenCV - follow OpenCV behavior In
|
||||
// the case of cv::Mat if output buffer is not enough to
|
||||
// hold the result, reallocate it
|
||||
if (cv::util::holds_alternative<cv::Mat*>(args.outObjs.at(index))) { |
||||
auto& out_mat = *get<cv::Mat*>(args.outObjs.at(index)); |
||||
createMat(desc, out_mat); |
||||
} |
||||
// In the case of RMat check to fit required meta
|
||||
else { |
||||
check_rmat(); |
||||
} |
||||
#else |
||||
// Building standalone - output buffer should always exist,
|
||||
// and _exact_ match our inferred metadata
|
||||
if (cv::util::holds_alternative<cv::Mat*>(args.outObjs.at(index))) { |
||||
auto& out_mat = *get<cv::Mat*>(args.outObjs.at(index)); |
||||
GAPI_Assert(out_mat.data != nullptr && |
||||
desc.canDescribe(out_mat)); |
||||
} |
||||
// In the case of RMat check to fit required meta
|
||||
else { |
||||
check_rmat(); |
||||
} |
||||
#endif // !defined(GAPI_STANDALONE)
|
||||
} |
||||
} |
||||
// Update storage with user-passed objects
|
||||
for (auto it : ade::util::zip(ade::util::toRange(proto.inputs), |
||||
ade::util::toRange(args.inObjs))) { |
||||
magazine::bindInArgExec(m_state.mag, std::get<0>(it), std::get<1>(it)); |
||||
} |
||||
for (auto it : ade::util::zip(ade::util::toRange(proto.outputs), |
||||
ade::util::toRange(args.outObjs))) { |
||||
magazine::bindOutArgExec(m_state.mag, std::get<0>(it), std::get<1>(it)); |
||||
} |
||||
|
||||
// Reset internal data
|
||||
for (auto &sd : m_slots) { |
||||
const auto& data = m_gm.metadata(sd.data_nh).get<Data>(); |
||||
magazine::resetInternalData(m_state.mag, data); |
||||
} |
||||
|
||||
m_task_manager.scheduleAndWait(m_thread_pool); |
||||
for (auto actor : m_actors) { |
||||
actor->verify(); |
||||
} |
||||
for (auto it : ade::util::zip(ade::util::toRange(proto.outputs), |
||||
ade::util::toRange(args.outObjs))) { |
||||
magazine::writeBackExec(m_state.mag, std::get<0>(it), std::get<1>(it)); |
||||
} |
||||
} |
||||
|
||||
bool cv::gimpl::GThreadedExecutor::canReshape() const { |
||||
for (auto actor : m_actors) { |
||||
if (actor->exec()->canReshape()) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::reshape(const GMetaArgs& inMetas, const GCompileArgs& args) { |
||||
GAPI_Assert(canReshape()); |
||||
auto& g = *m_orig_graph.get(); |
||||
ade::passes::PassContext ctx{g}; |
||||
passes::initMeta(ctx, inMetas); |
||||
passes::inferMeta(ctx, true); |
||||
|
||||
// NB: Before reshape islands need to re-init resources for every slot.
|
||||
for (auto slot : m_slots) { |
||||
initResource(slot.slot_nh, slot.data_nh); |
||||
} |
||||
|
||||
for (auto actor : m_actors) { |
||||
actor->exec()->reshape(g, args); |
||||
} |
||||
} |
||||
|
||||
void cv::gimpl::GThreadedExecutor::prepareForNewStream() { |
||||
for (auto actor : m_actors) { |
||||
actor->exec()->handleNewStream(); |
||||
} |
||||
} |
@ -0,0 +1,123 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
|
||||
|
||||
#ifndef OPENCV_GAPI_GTHREADEDEXECUTOR_HPP |
||||
#define OPENCV_GAPI_GTHREADEDEXECUTOR_HPP |
||||
|
||||
#include <utility> // tuple, required by magazine |
||||
#include <unordered_map> // required by magazine |
||||
|
||||
#include "executor/gabstractexecutor.hpp" |
||||
#include "executor/thread_pool.hpp" |
||||
|
||||
namespace cv { |
||||
namespace gimpl { |
||||
|
||||
class Task; |
||||
class TaskManager { |
||||
public: |
||||
using F = std::function<void()>; |
||||
|
||||
std::shared_ptr<Task> createTask(F &&f, std::vector<std::shared_ptr<Task>> &&producers); |
||||
void scheduleAndWait(cv::gapi::own::ThreadPool& tp); |
||||
|
||||
private: |
||||
std::vector<std::shared_ptr<Task>> m_all_tasks; |
||||
std::vector<std::shared_ptr<Task>> m_initial_tasks; |
||||
}; |
||||
|
||||
struct GraphState { |
||||
Mag mag; |
||||
std::mutex m; |
||||
}; |
||||
|
||||
class IslandActor; |
||||
class GThreadedExecutor final: public GAbstractExecutor { |
||||
public: |
||||
class Input; |
||||
class Output; |
||||
|
||||
explicit GThreadedExecutor(const uint32_t num_threads, |
||||
std::unique_ptr<ade::Graph> &&g_model); |
||||
void run(cv::gimpl::GRuntimeArgs &&args) override; |
||||
|
||||
bool canReshape() const override; |
||||
void reshape(const GMetaArgs& inMetas, const GCompileArgs& args) override; |
||||
|
||||
void prepareForNewStream() override; |
||||
|
||||
private: |
||||
struct DataDesc |
||||
{ |
||||
ade::NodeHandle slot_nh; |
||||
ade::NodeHandle data_nh; |
||||
}; |
||||
|
||||
void initResource(const ade::NodeHandle &nh, const ade::NodeHandle &orig_nh); |
||||
|
||||
GraphState m_state; |
||||
std::vector<DataDesc> m_slots; |
||||
cv::gapi::own::ThreadPool m_thread_pool; |
||||
TaskManager m_task_manager; |
||||
std::vector<std::shared_ptr<IslandActor>> m_actors; |
||||
}; |
||||
|
||||
class GThreadedExecutor::Input final: public GIslandExecutable::IInput |
||||
{ |
||||
public: |
||||
Input(GraphState& state, const std::vector<RcDesc> &rcs); |
||||
|
||||
private: |
||||
virtual StreamMsg get() override; |
||||
virtual StreamMsg try_get() override { return get(); } |
||||
|
||||
private: |
||||
GraphState& m_state; |
||||
}; |
||||
|
||||
class GThreadedExecutor::Output final: public GIslandExecutable::IOutput |
||||
{ |
||||
public: |
||||
Output(GraphState &state, const std::vector<RcDesc> &rcs); |
||||
void verify(); |
||||
|
||||
private: |
||||
GRunArgP get(int idx) override; |
||||
void post(cv::GRunArgP&&, const std::exception_ptr& e) override; |
||||
void post(Exception&& ex) override; |
||||
void post(EndOfStream&&) override {}; |
||||
void meta(const GRunArgP &out, const GRunArg::Meta &m) override; |
||||
|
||||
private: |
||||
GraphState& m_state; |
||||
std::unordered_map<const void*, int> m_out_idx; |
||||
std::exception_ptr m_eptr; |
||||
}; |
||||
|
||||
class IslandActor { |
||||
public: |
||||
using Ptr = std::shared_ptr<IslandActor>; |
||||
IslandActor(const std::vector<RcDesc> &in_objects, |
||||
const std::vector<RcDesc> &out_objects, |
||||
std::shared_ptr<GIslandExecutable> isl_exec, |
||||
GraphState &state); |
||||
|
||||
void run(); |
||||
void verify(); |
||||
std::shared_ptr<GIslandExecutable> exec() { return m_isl_exec; } |
||||
|
||||
private: |
||||
std::shared_ptr<GIslandExecutable> m_isl_exec; |
||||
GThreadedExecutor::Input m_inputs; |
||||
GThreadedExecutor::Output m_outputs; |
||||
}; |
||||
|
||||
|
||||
} // namespace gimpl
|
||||
} // namespace cv
|
||||
|
||||
#endif // OPENCV_GAPI_GTHREADEDEXECUTOR_HPP
|
@ -0,0 +1,67 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
|
||||
|
||||
#include "thread_pool.hpp" |
||||
|
||||
#include <opencv2/gapi/util/throw.hpp> |
||||
|
||||
cv::gapi::own::Latch::Latch(const uint64_t expected) |
||||
: m_expected(expected) { |
||||
} |
||||
|
||||
void cv::gapi::own::Latch::count_down() { |
||||
std::lock_guard<std::mutex> lk{m_mutex}; |
||||
--m_expected; |
||||
if (m_expected == 0) { |
||||
m_all_done.notify_all(); |
||||
} |
||||
} |
||||
|
||||
void cv::gapi::own::Latch::wait() { |
||||
std::unique_lock<std::mutex> lk{m_mutex}; |
||||
while (m_expected != 0u) { |
||||
m_all_done.wait(lk); |
||||
} |
||||
} |
||||
|
||||
cv::gapi::own::ThreadPool::ThreadPool(const uint32_t num_workers) { |
||||
m_workers.reserve(num_workers); |
||||
for (uint32_t i = 0; i < num_workers; ++i) { |
||||
m_workers.emplace_back( |
||||
cv::gapi::own::ThreadPool::worker, std::ref(m_queue)); |
||||
} |
||||
} |
||||
|
||||
void cv::gapi::own::ThreadPool::worker(QueueClass<Task>& queue) { |
||||
while (true) { |
||||
cv::gapi::own::ThreadPool::Task task; |
||||
queue.pop(task); |
||||
if (!task) { |
||||
break; |
||||
} |
||||
task(); |
||||
} |
||||
} |
||||
|
||||
void cv::gapi::own::ThreadPool::schedule(cv::gapi::own::ThreadPool::Task&& task) { |
||||
m_queue.push(std::move(task)); |
||||
}; |
||||
|
||||
void cv::gapi::own::ThreadPool::shutdown() { |
||||
for (size_t i = 0; i < m_workers.size(); ++i) { |
||||
// NB: Empty task - is an indicator for workers to stop their loops
|
||||
m_queue.push({}); |
||||
} |
||||
for (auto& worker : m_workers) { |
||||
worker.join(); |
||||
} |
||||
m_workers.clear(); |
||||
} |
||||
|
||||
cv::gapi::own::ThreadPool::~ThreadPool() { |
||||
shutdown(); |
||||
} |
@ -0,0 +1,71 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
|
||||
#ifndef OPENCV_GAPI_THREAD_POOL_HPP |
||||
#define OPENCV_GAPI_THREAD_POOL_HPP |
||||
|
||||
#include <functional> |
||||
#include <vector> |
||||
#include <thread> |
||||
#include <mutex> |
||||
#include <atomic> |
||||
#include <condition_variable> |
||||
|
||||
#include <opencv2/gapi/own/exports.hpp> // GAPI_EXPORTS |
||||
|
||||
#if defined(HAVE_TBB) |
||||
# include <tbb/concurrent_queue.h> // FIXME: drop it from here!
|
||||
template<typename T> using QueueClass = tbb::concurrent_bounded_queue<T>; |
||||
#else |
||||
# include "executor/conc_queue.hpp" |
||||
template<typename T> using QueueClass = cv::gapi::own::concurrent_bounded_queue<T>; |
||||
#endif // TBB
|
||||
|
||||
namespace cv { |
||||
namespace gapi { |
||||
namespace own { |
||||
|
||||
// NB: Only for tests
|
||||
class GAPI_EXPORTS Latch { |
||||
public: |
||||
explicit Latch(const uint64_t expected); |
||||
|
||||
Latch(const Latch&) = delete; |
||||
Latch& operator=(const Latch&) = delete; |
||||
|
||||
void count_down(); |
||||
void wait(); |
||||
|
||||
private: |
||||
uint64_t m_expected; |
||||
std::mutex m_mutex; |
||||
std::condition_variable m_all_done; |
||||
}; |
||||
|
||||
// NB: Only for tests
|
||||
class GAPI_EXPORTS ThreadPool { |
||||
public: |
||||
using Task = std::function<void()>; |
||||
explicit ThreadPool(const uint32_t num_workers); |
||||
|
||||
ThreadPool(const ThreadPool&) = delete; |
||||
ThreadPool& operator=(const ThreadPool&) = delete; |
||||
|
||||
void schedule(Task&& task); |
||||
~ThreadPool(); |
||||
|
||||
private: |
||||
static void worker(QueueClass<Task>& queue); |
||||
void shutdown(); |
||||
|
||||
private: |
||||
std::vector<std::thread> m_workers; |
||||
QueueClass<Task> m_queue; |
||||
}; |
||||
|
||||
}}} // namespace cv::gapi::own
|
||||
|
||||
#endif // OPENCV_GAPI_THREAD_POOL_HPP
|
@ -0,0 +1,124 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
|
||||
#include "../test_precomp.hpp" |
||||
|
||||
#include <chrono> |
||||
#include <thread> |
||||
|
||||
#include "executor/thread_pool.hpp" |
||||
|
||||
namespace opencv_test |
||||
{ |
||||
|
||||
using namespace cv::gapi; |
||||
|
||||
TEST(ThreadPool, ScheduleNotBlock) |
||||
{ |
||||
own::Latch latch(1u); |
||||
std::atomic<uint32_t> counter{0u}; |
||||
|
||||
own::ThreadPool tp(4u); |
||||
tp.schedule([&](){ |
||||
std::this_thread::sleep_for(std::chrono::milliseconds{500u}); |
||||
counter++; |
||||
latch.count_down(); |
||||
}); |
||||
|
||||
EXPECT_EQ(0u, counter); |
||||
latch.wait(); |
||||
EXPECT_EQ(1u, counter); |
||||
} |
||||
|
||||
TEST(ThreadPool, MultipleTasks) |
||||
{ |
||||
const uint32_t kNumTasks = 100u; |
||||
own::Latch latch(kNumTasks); |
||||
std::atomic<uint32_t> completed{0u}; |
||||
|
||||
own::ThreadPool tp(4u); |
||||
for (uint32_t i = 0; i < kNumTasks; ++i) { |
||||
tp.schedule([&]() { |
||||
++completed; |
||||
latch.count_down(); |
||||
}); |
||||
} |
||||
latch.wait(); |
||||
|
||||
EXPECT_EQ(kNumTasks, completed.load()); |
||||
} |
||||
|
||||
struct ExecutionState { |
||||
ExecutionState(const uint32_t num_threads, |
||||
const uint32_t num_tasks) |
||||
: guard(0u), |
||||
critical(0u), |
||||
limit(num_tasks), |
||||
latch(num_threads), |
||||
tp(num_threads) { |
||||
} |
||||
|
||||
std::atomic<uint32_t> guard; |
||||
std::atomic<uint32_t> critical; |
||||
const uint32_t limit; |
||||
own::Latch latch; |
||||
own::ThreadPool tp; |
||||
}; |
||||
|
||||
static void doRecursive(ExecutionState& state) { |
||||
// NB: Protects function to be executed no more than limit number of times
|
||||
if (state.guard.fetch_add(1u) >= state.limit) { |
||||
state.latch.count_down(); |
||||
return; |
||||
} |
||||
// NB: This simulates critical section
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{50}); |
||||
++state.critical; |
||||
// NB: Schedule the new one recursively
|
||||
state.tp.schedule([&](){ doRecursive(state); }); |
||||
} |
||||
|
||||
TEST(ThreadPool, ScheduleRecursively) |
||||
{ |
||||
const int kNumThreads = 5u; |
||||
const uint32_t kNumTasks = 100u; |
||||
|
||||
ExecutionState state(kNumThreads, kNumTasks); |
||||
for (uint32_t i = 0; i < kNumThreads; ++i) { |
||||
state.tp.schedule([&](){ |
||||
doRecursive(state); |
||||
}); |
||||
} |
||||
state.latch.wait(); |
||||
|
||||
EXPECT_EQ(kNumTasks, state.critical.load()); |
||||
} |
||||
|
||||
TEST(ThreadPool, ExecutionIsParallel) |
||||
{ |
||||
const uint32_t kNumThreads = 4u; |
||||
std::atomic<uint32_t> counter{0}; |
||||
own::Latch latch{kNumThreads}; |
||||
|
||||
own::ThreadPool tp(kNumThreads); |
||||
auto start = std::chrono::high_resolution_clock::now(); |
||||
for (uint32_t i = 0; i < kNumThreads; ++i) { |
||||
tp.schedule([&]() { |
||||
std::this_thread::sleep_for(std::chrono::milliseconds{800u}); |
||||
++counter; |
||||
latch.count_down(); |
||||
}); |
||||
} |
||||
latch.wait(); |
||||
|
||||
auto end = std::chrono::high_resolution_clock::now(); |
||||
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); |
||||
|
||||
EXPECT_GE(1000u, elapsed); |
||||
EXPECT_EQ(kNumThreads, counter.load()); |
||||
} |
||||
|
||||
} // namespace opencv_test
|
@ -1,9 +0,0 @@ |
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<classpath> |
||||
<classpathentry kind="src" path="src"/> |
||||
<classpathentry kind="src" path="gen"/> |
||||
<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/> |
||||
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/> |
||||
<classpathentry kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/> |
||||
<classpathentry kind="output" path="bin/classes"/> |
||||
</classpath> |
@ -1,33 +0,0 @@ |
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<projectDescription> |
||||
<name>OpenCV_JavaAPI_Tests</name> |
||||
<comment></comment> |
||||
<projects> |
||||
</projects> |
||||
<buildSpec> |
||||
<buildCommand> |
||||
<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name> |
||||
<arguments> |
||||
</arguments> |
||||
</buildCommand> |
||||
<buildCommand> |
||||
<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name> |
||||
<arguments> |
||||
</arguments> |
||||
</buildCommand> |
||||
<buildCommand> |
||||
<name>org.eclipse.jdt.core.javabuilder</name> |
||||
<arguments> |
||||
</arguments> |
||||
</buildCommand> |
||||
<buildCommand> |
||||
<name>com.android.ide.eclipse.adt.ApkBuilder</name> |
||||
<arguments> |
||||
</arguments> |
||||
</buildCommand> |
||||
</buildSpec> |
||||
<natures> |
||||
<nature>com.android.ide.eclipse.adt.AndroidNature</nature> |
||||
<nature>org.eclipse.jdt.core.javanature</nature> |
||||
</natures> |
||||
</projectDescription> |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue