Merge pull request #23047 from fengyuentau:layer_norm
dnn: add layer normalization for vision transformers * add layer norm onnx parser, impl and tests * add onnx graph simplifier for layer norm expanded * handle the case when constants are of type Initializer * add test case for layer norm expanded with initializers * use CV_Assert & CV_CheckType in place of CV_Assert_N; use forward_fallback for OCL_FP16 * use const ref / ref in parameters of invoker::run; extract inner const if from nested loop; use size_t in place of ull * template hasBias * remove trailing whitespace * use pointer parameter with null check; move normSize division & mean_square division outside of loop; use std::max to ensure positive value before std::sqrt * refactor implementation, optimize parallel_for * disable layer norm expanded * remove the removal of layer norm optional outputspull/23188/head
parent
52855a39ad
commit
4d918ba40b
7 changed files with 636 additions and 0 deletions
@ -0,0 +1,176 @@ |
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include "../precomp.hpp" |
||||||
|
#include "layers_common.hpp" |
||||||
|
|
||||||
|
namespace cv { namespace dnn { |
||||||
|
|
||||||
|
class LayerNormLayerImpl CV_FINAL : public LayerNormLayer |
||||||
|
{ |
||||||
|
public: |
||||||
|
LayerNormLayerImpl(const LayerParams& params) |
||||||
|
{ |
||||||
|
setParamsFrom(params); |
||||||
|
|
||||||
|
// standard attr
|
||||||
|
axis = params.get<int>("axis", 0); |
||||||
|
epsilon = params.get<float>("epsilon", 1e-5); |
||||||
|
|
||||||
|
// opencv attr
|
||||||
|
hasBias = params.get<bool>("hasBias", false); |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool supportBackend(int backendId) CV_OVERRIDE |
||||||
|
{ |
||||||
|
return backendId == DNN_BACKEND_OPENCV; |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs, |
||||||
|
const int requiredOutputs, |
||||||
|
std::vector<MatShape> &outputs, |
||||||
|
std::vector<MatShape> &internals) const CV_OVERRIDE |
||||||
|
{ |
||||||
|
// check shapes of weight and bias if existed
|
||||||
|
// inputs >= 2 (X and Weight are requested, bias is optional)
|
||||||
|
CV_Check(inputs.size(), inputs.size() >= 2 && inputs.size() <= 3, "LayerNorm: require two (x, weight) or three (x, weight, bias) inputs"); |
||||||
|
|
||||||
|
auto x_shape = inputs[0]; |
||||||
|
int x_ndims = static_cast<int>(x_shape.size()); |
||||||
|
|
||||||
|
auto w_shape = inputs[1]; |
||||||
|
// if axis == last_dim, scale and b are both 1d tensor (represented as 2d mat nx1)
|
||||||
|
int w_ndims = static_cast<int>(w_shape.size()); |
||||||
|
w_ndims = (axis == x_ndims - 1 && w_ndims == 2) ? w_ndims - 1 : w_ndims; |
||||||
|
CV_CheckEQ(x_ndims - axis, w_ndims, "LayerNorm: shape of weight does not match with given axis and shape of input"); |
||||||
|
for (int i = 0; i < w_ndims; ++i) |
||||||
|
CV_CheckEQ(x_shape[axis+i], w_shape[i], "LayerNorm: weight dimensions does not match with input dimensions"); |
||||||
|
if (hasBias) |
||||||
|
{ |
||||||
|
CV_CheckEQ(inputs.size(), (size_t)3, ""); |
||||||
|
auto b_shape = inputs[2]; |
||||||
|
CV_CheckEQ(w_shape.size(), b_shape.size(), "LayerNorm: shape of weight does not match with shape of bias"); |
||||||
|
for (size_t i = 0; i < w_shape.size(); ++i) |
||||||
|
CV_CheckEQ(w_shape[i], b_shape[i], "LayerNorm: bias dimensions does not match with weight dimensions"); |
||||||
|
} |
||||||
|
|
||||||
|
// only one output is needed; Mean & InvStdDev are not needed
|
||||||
|
// in inference and should beomitted in onnx importer
|
||||||
|
outputs.assign(1, inputs[0]); |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
template<bool hasBias> |
||||||
|
class LayerNormInvoker : public ParallelLoopBody |
||||||
|
{ |
||||||
|
public: |
||||||
|
const Mat& src; |
||||||
|
const float* scaleData; |
||||||
|
const float* biasData; |
||||||
|
Mat& dst; |
||||||
|
|
||||||
|
float epsilon; |
||||||
|
|
||||||
|
int total; |
||||||
|
int normSize; |
||||||
|
float invNormSize; |
||||||
|
|
||||||
|
LayerNormInvoker(const Mat& src_, const Mat& scale, const Mat* b, Mat& dst_, int axis, float epsilon_) |
||||||
|
: src(src_), scaleData(scale.ptr<float>()), biasData(nullptr), dst(dst_), epsilon(epsilon_) |
||||||
|
{ |
||||||
|
if (hasBias) |
||||||
|
{ |
||||||
|
CV_Assert(b != nullptr); |
||||||
|
CV_Assert(b->isContinuous()); |
||||||
|
biasData = (const float*)b->ptr<float>(); |
||||||
|
} |
||||||
|
|
||||||
|
auto dstShape = shape(dst); |
||||||
|
total = std::accumulate(dstShape.begin(), dstShape.begin() + axis, 1, std::multiplies<int>()); |
||||||
|
normSize = std::accumulate(dstShape.begin() + axis, dstShape.end(), 1, std::multiplies<int>()); |
||||||
|
invNormSize = 1.0f / normSize; |
||||||
|
} |
||||||
|
|
||||||
|
static void run(const Mat& src, const Mat& scale, const Mat* b, Mat& dst, int axis, float epsilon) |
||||||
|
{ |
||||||
|
CV_Assert(src.isContinuous()); |
||||||
|
CV_Assert(dst.isContinuous()); |
||||||
|
CV_CheckTypeEQ(src.type(), CV_32F, "DNN/LayerNorm: only support float32"); |
||||||
|
CV_CheckTypeEQ(src.type(), dst.type(), ""); |
||||||
|
CV_Assert(scale.isContinuous()); |
||||||
|
|
||||||
|
CV_CheckGE(epsilon, 0.0f, ""); |
||||||
|
|
||||||
|
LayerNormInvoker p(src, scale, b, dst, axis, epsilon); |
||||||
|
|
||||||
|
double nstripes = ((size_t)p.total * p.normSize) * (1 / 1024.0); |
||||||
|
// double nstripes = ((size_t)p.total) * (1 / 1024.0);
|
||||||
|
parallel_for_(Range(0, p.total), p, nstripes); |
||||||
|
} |
||||||
|
|
||||||
|
void operator()(const Range& r) const CV_OVERRIDE |
||||||
|
{ |
||||||
|
int stripeStart = r.start; |
||||||
|
int stripeEnd = r.end; |
||||||
|
|
||||||
|
const float* srcData = src.ptr<float>(); |
||||||
|
float* dstData = dst.ptr<float>(); |
||||||
|
|
||||||
|
for (int ofs = stripeStart; ofs < stripeEnd; ++ofs) |
||||||
|
{ |
||||||
|
const float* first = srcData + ofs * normSize; |
||||||
|
float* dstFirst = dstData + ofs * normSize; |
||||||
|
|
||||||
|
float mean = 0; |
||||||
|
float meanSquare = 0; |
||||||
|
for (int h = 0; h < normSize; ++h) |
||||||
|
{ |
||||||
|
float v = first[h]; |
||||||
|
mean += v; |
||||||
|
meanSquare += v * v; |
||||||
|
} |
||||||
|
mean *= invNormSize; |
||||||
|
meanSquare = std::sqrt(std::max(0.f, meanSquare * invNormSize - mean * mean) + epsilon); |
||||||
|
float invMeanSquare = 1.0f / meanSquare; |
||||||
|
for (int h = 0; h < normSize; ++h) |
||||||
|
{ |
||||||
|
float v = (first[h] - mean) * invMeanSquare * scaleData[h]; |
||||||
|
if (hasBias) { |
||||||
|
v = v + biasData[h]; |
||||||
|
} |
||||||
|
dstFirst[h] = v; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE |
||||||
|
{ |
||||||
|
CV_TRACE_FUNCTION(); |
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
||||||
|
|
||||||
|
if (inputs_arr.depth() == CV_16S) |
||||||
|
{ |
||||||
|
forward_fallback(inputs_arr, outputs_arr, internals_arr); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
std::vector<Mat> inputs, outputs; |
||||||
|
inputs_arr.getMatVector(inputs); |
||||||
|
outputs_arr.getMatVector(outputs); |
||||||
|
|
||||||
|
if (hasBias) { |
||||||
|
LayerNormInvoker<true>::run(inputs[0], inputs[1], &inputs[2], outputs[0], axis, epsilon); |
||||||
|
} else { |
||||||
|
LayerNormInvoker<false>::run(inputs[0], inputs[1], nullptr, outputs[0], axis, epsilon); |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
Ptr<LayerNormLayer> LayerNormLayer::create(const LayerParams& params) |
||||||
|
{ |
||||||
|
return makePtr<LayerNormLayerImpl>(params); |
||||||
|
} |
||||||
|
|
||||||
|
}} // cv::dnn
|
Loading…
Reference in new issue