Merge pull request #24509 from Abdurrahheem:ash/dev_einsum_fast_gemm

Fast gemm for einsum #24509

## This PR adds performance tests for Einsum Layer with FastGemm. See below results of performance test on different inputs

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
pull/24562/head
Abduragim Shtanchaev 1 year ago committed by GitHub
parent 83d70b0f36
commit 8c10545d3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 64
      modules/dnn/perf/perf_einsum.cpp
  2. 222
      modules/dnn/src/layers/einsum_layer.cpp

@ -11,19 +11,16 @@ struct EinsumParams {
int outputSize;
std::string equation;
std::vector<MatShape> einsumInpShapes;
EinsumParams(std::string equation_, int inputSize_, int outputSize_, std::vector<MatShape> einsumInpShapes_ = std::vector<MatShape>())
EinsumParams(std::string equation_, std::vector<MatShape> einsumInpShapes_ = std::vector<MatShape>())
{
inputSize = inputSize_;
outputSize = outputSize_;
inputSize = einsumInpShapes_.size();
equation = equation_;
einsumInpShapes = einsumInpShapes_;
}
};
static inline void PrintTo(const EinsumParams& params, ::std::ostream* os) {
(*os) << "Eqiation=" << params.equation << ", "
<< "InputSize=" << params.inputSize << ", "
<< "OutputSize=" << params.outputSize << ", ";
(*os) << "Equation=" << params.equation << " ";
(*os) << "InputShape={";
for(int i = 0; i < params.einsumInpShapes.size(); i++)
@ -41,22 +38,22 @@ static inline void PrintTo(const EinsumParams& params, ::std::ostream* os) {
// test cases
static const EinsumParams testEinsumConfigs[] = {
// TODO: Add tests with one input after ellips merge
{"ij, jk -> ik", 2, 1, {{2, 3}, {3, 2}}},
{"ij, jk -> ik", 2, 1, {{20, 30}, {30, 20}}},
{"ij, jk -> ik", 2, 1, {{113, 127}, {127, 113}}},
{"ij, jk -> ik", {{2, 3}, {3, 2}}},
{"ij, jk -> ik", {{20, 30}, {30, 20}}},
{"ij, jk -> ik", {{113, 127}, {127, 113}}},
{"imkj, injs -> imnks", 2, 1, {{1, 4, 7, 9}, {1, 5, 9, 8}}},
{"imkj, injs -> imnks", 2, 1, {{1, 4, 70, 90}, {1, 5, 90, 80}}},
{"imkj, injs -> imnks", 2, 1, {{1, 4, 73, 91}, {1, 5, 91, 57}}},
{"imkj, injs -> imnks", {{1, 4, 7, 9}, {1, 5, 9, 8}}},
{"imkj, injs -> imnks", {{1, 4, 70, 90}, {1, 5, 90, 80}}},
{"imkj, injs -> imnks", {{1, 4, 73, 91}, {1, 5, 91, 57}}},
{"ij -> i", 1, 1, {{30, 40}}},
{"ij -> i", 1, 1, {{113, 374}}},
{"ij -> i", {{30, 40}}},
{"ij -> i", {{113, 374}}},
{"...ij -> ...i", 1, 1, {{30, 40}}},
{"...ij -> ...i", 1, 1, {{113, 374}}},
{"...ij -> ...i", {{30, 40}}},
{"...ij -> ...i", {{113, 374}}},
{"...ij, ...jk -> ...ik", 2, 1, {{40, 50}, {50, 80}}},
{"...ij, ...jk -> ...ik", 2, 1, {{47, 51}, {51, 83}}},
{"...ij, ...jk -> ...ik", {{40, 50}, {50, 80}}},
{"...ij, ...jk -> ...ik", {{47, 51}, {51, 83}}},
};
class Layer_Einsum: public TestBaseWithParam<EinsumParams> {};
@ -68,7 +65,7 @@ PERF_TEST_P_(Layer_Einsum, einsum) {
lp.name = "testEinsum";
lp.set("equation", params.equation);
lp.set("inputSize", params.inputSize);
lp.set("outputSize", params.outputSize);
lp.set("outputSize", 1);
CV_CheckFalse(params.einsumInpShapes.empty(), "ERROR no inputs shapes provided");
@ -79,38 +76,27 @@ PERF_TEST_P_(Layer_Einsum, einsum) {
Net net;
std::vector<Mat> inputs;
std::vector<std::string> input_names;
if (params.inputSize == 1){
int id = net.addLayer(lp.name, lp.type, lp);
for (int i = 0; i < params.inputSize; ++i) {
// create inputs
inputs.emplace_back(Mat(params.einsumInpShapes[0].size(), params.einsumInpShapes[0].data(), CV_32FC1));
inputs.emplace_back(Mat(params.einsumInpShapes[i].size(), params.einsumInpShapes[i].data(), CV_32FC1));
int id = net.addLayerToPrev(lp.name, lp.type, lp);
net.connect(0, 0, id, 0);
// connect each input to the layer
net.connect(0, i, id, i);
input_names.emplace_back("input1");
} else {
// create inputs
inputs.emplace_back(Mat(params.einsumInpShapes[0].size(), params.einsumInpShapes[0].data(), CV_32FC1));
inputs.emplace_back(Mat(params.einsumInpShapes[1].size(), params.einsumInpShapes[1].data(), CV_32FC1));
int id = net.addLayerToPrev(lp.name, lp.type, lp);
net.connect(0, 0, id, 0);
net.connect(0, 1, id, 1);
input_names.emplace_back("input1");
input_names.emplace_back("input2");
// create input names dynamically, assuming input naming follows a consistent pattern
input_names.emplace_back("input" + std::to_string(i + 1));
}
//warm up
std::vector<Mat> outputs;
net.setInputsNames(input_names);
for (int i = 0; i < input_names.size(); i++){
net.setInput(inputs[i], input_names[i]);
}
Mat out = net.forward();
net.forward(outputs, "testEinsum");
std::vector<Mat> outputs;
TEST_CYCLE()
{
net.forward(outputs, "testEinsum");

@ -6,6 +6,7 @@
#include <opencv2/dnn/shape_utils.hpp>
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "cpu_kernels/fast_gemm.hpp"
namespace cv
{
@ -32,111 +33,6 @@ static bool IsTransposeReshapeForEinsum(const std::vector<size_t>& perm,
return true;
}
static Mat batchwiseMatMul(
const Mat& input1,
const MatShape& input1ShapeOverride,
const Mat& input2,
const MatShape& input2ShapeOverride)
{
// Sanity checks before the actual MatMul
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
CV_CheckEQ(input2ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
CV_CheckEQ((size_t) input1ShapeOverride[0], (size_t) input2ShapeOverride[0], "Batch dimension should match for MatMul;");
CV_CheckEQ((size_t) input1ShapeOverride[2], (size_t) input2ShapeOverride[1], "Incompatible matrix dimensions for matMul");
size_t batches = input1ShapeOverride[0];
size_t M = input1ShapeOverride[1];
size_t K = input1ShapeOverride[2];
size_t N = input2ShapeOverride[2];
std::vector<Mat> output;
if (batches > 1)
{
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
// input1 should of size MxK
// check if input1 needs reshape, if need reshape
if (input1.size[0] != M || input1.size[1] != K)
{
int shape[] = {static_cast<int>(batches), static_cast<int>(M), static_cast<int>(K)};
reshapedInput1 = input1.reshape(1, 3, shape);
}
// input2 should be of size KxN
// check if input2 needs reshape, if needs reshape
if (input2.size[0] != K || input2.size[1] != N)
{
int shape[] = {static_cast<int>(batches), static_cast<int>(K), static_cast<int>(N)};
reshapedInput2 = input2.reshape(1, 3, shape);
}
for (size_t i=0; i < batches; i++)
{
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
for (int j = 1; j < reshapedInput1.dims; j++)
ranges1.emplace_back(cv::Range::all());
Mat part1 = reshapedInput1(ranges1);
int shape[] = {static_cast<int>(M), static_cast<int>(K)};
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
for (int j = 1; j < reshapedInput2.dims; j++)
ranges2.emplace_back(cv::Range::all());
Mat part2 = reshapedInput2(ranges2);
int shape2[] = {static_cast<int>(K), static_cast<int>(N)};
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
Mat tmp_output;
cv::gemm(part1, part2, 1.0, cv::Mat(), 1.0, tmp_output);
int newShape[] = {1, static_cast<int>(M), static_cast<int>(N)};
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
output.emplace_back(tmp_output);
}
} else {
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
// input1 should of size MxK
// check if input1 needs reshape, if need reshape
if (input1.dims > 2 || input1.size[0] != M || input1.size[1] != K)
{
int shape[] = {static_cast<int>(M), static_cast<int>(K)};
reshapedInput1 = input1.reshape(1, 2, shape);
}
// input2 should be of size KxN
// check if input2 needs reshape, if needs reshape
if (input2.dims > 2 || input2.size[0] != K || input2.size[1] != N)
{
int shape2[] = {static_cast<int>(K), static_cast<int>(N)};
reshapedInput2 = input2.reshape(1, 2, shape2);
}
Mat tmp_output;
cv::gemm(reshapedInput1, reshapedInput2, 1.0, cv::Mat(), 1.0, tmp_output);
int newShape[] = {1, static_cast<int>(M), static_cast<int>(N)};
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
output.emplace_back(tmp_output);
}
int outputDim[] = {static_cast<int>(output.size()), static_cast<int>(M), static_cast<int>(N)};
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
for (size_t i = 0; i < output.size(); i++) {
Mat output_slice = output_buffer.row(i);
output[i].copyTo(output_slice);
}
return output_buffer;
};
static Mat Transpose(
const Mat& input,
@ -452,6 +348,8 @@ public:
// The number of dimensions that are encompassed by an "ellipsis" - "...".
size_t numOfEllipsisDims = 0;
// Backend for fastgemm
FastGemmOpt opt;
void parseEquation(String equation);
void processEquation(const std::vector<MatShape>& inputs);
@ -469,7 +367,12 @@ public:
const MatShape& reduceDims,
bool isFinalPair
);
Mat batchwiseMatMul(
const Mat& input1,
const MatShape& input1ShapeOverride,
const Mat& input2,
const MatShape& input2ShapeOverride
);
// constructor
LayerEinsumImpl(const LayerParams& params)
@ -491,6 +394,7 @@ public:
einsumInpShapes.emplace_back(shape);
}
opt.init();
// Maintains a mapping between input indices and their corresponding subscript labels for each input
inputSubscriptIndices.reserve(numInputs);
@ -1389,6 +1293,112 @@ Mat LayerEinsumImpl::pairwiseOperandProcess(
return output;
};
Mat LayerEinsumImpl::batchwiseMatMul(
const Mat& input1,
const MatShape& input1ShapeOverride,
const Mat& input2,
const MatShape& input2ShapeOverride)
{
// Sanity checks before the actual MatMul
CV_CheckType(input1.type(), input2.type(), "Data types of the inputs must match for MatMul");
CV_CheckEQ(input1ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
CV_CheckEQ(input2ShapeOverride.size(), (size_t) 3, "Only 1 batch dimension is allowed for MatMul");
CV_CheckEQ((size_t) input1ShapeOverride[0], (size_t) input2ShapeOverride[0], "Batch dimension should match for MatMul;");
CV_CheckEQ((size_t) input1ShapeOverride[2], (size_t) input2ShapeOverride[1], "Incompatible matrix dimensions for matMul");
int batches = input1ShapeOverride[0];
int M = input1ShapeOverride[1];
int K = input1ShapeOverride[2];
int N = input2ShapeOverride[2];
std::vector<Mat> output;
if (batches > 1)
{
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
// input1 should of size MxK
// check if input1 needs reshape, if need reshape
if (input1.size[0] != M || input1.size[1] != K)
{
int shape[] = {batches, M, K};
reshapedInput1 = input1.reshape(1, 3, shape);
}
// input2 should be of size KxN
// check if input2 needs reshape, if needs reshape
if (input2.size[0] != K || input2.size[1] != N)
{
int shape[] = {batches, K, N};
reshapedInput2 = input2.reshape(1, 3, shape);
}
for (size_t i=0; i < batches; i++)
{
std::vector<Range> ranges1 = {cv::Range(i, i+1)};
for (int j = 1; j < reshapedInput1.dims; j++)
ranges1.emplace_back(cv::Range::all());
Mat part1 = reshapedInput1(ranges1);
int shape[] = {M, K};
part1 = part1.reshape(1, sizeof(shape)/sizeof(shape[0]), shape);
std::vector<Range> ranges2 = {cv::Range(i, i+1)};
for (int j = 1; j < reshapedInput2.dims; j++)
ranges2.emplace_back(cv::Range::all());
Mat part2 = reshapedInput2(ranges2);
int shape2[] = {K, N};
part2 = part2.reshape(1, sizeof(shape2)/sizeof(shape2[0]), shape2);
Mat tmp_output(M, N, part1.type());
fastGemm(false, false, 1.0, part1, part2, 0.0, tmp_output, opt);
int newShape[] = {1, M, N};
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
output.emplace_back(tmp_output);
}
} else {
Mat reshapedInput1 = input1;
Mat reshapedInput2 = input2;
// input1 should of size MxK
// check if input1 needs reshape, if need reshape
if (input1.dims > 2 || input1.size[0] != M || input1.size[1] != K)
{
int shape[] = {M, K};
reshapedInput1 = input1.reshape(1, 2, shape);
}
// input2 should be of size KxN
// check if input2 needs reshape, if needs reshape
if (input2.dims > 2 || input2.size[0] != K || input2.size[1] != N)
{
int shape2[] = {K, N};
reshapedInput2 = input2.reshape(1, 2, shape2);
}
Mat tmp_output(M, N, reshapedInput1.type());
fastGemm(false, false, 1.0, reshapedInput1, reshapedInput2, 0.0, tmp_output, opt);
int newShape[] = {1, M, N};
tmp_output = tmp_output.reshape(1, sizeof(newShape)/sizeof(newShape[0]), newShape);
output.emplace_back(tmp_output);
}
int outputDim[] = {static_cast<int>(output.size()), M, N};
Mat output_buffer = Mat::zeros(3, outputDim, CV_32F);
for (size_t i = 0; i < output.size(); i++) {
Mat output_slice = output_buffer.row(i);
output[i].copyTo(output_slice);
}
return output_buffer;
};
Ptr<EinsumLayer> EinsumLayer::create(const LayerParams& params)
{
return makePtr<LayerEinsumImpl>(params);

Loading…
Cancel
Save