mirror of https://github.com/opencv/opencv.git
Merge pull request #24409 from fengyuentau:norm_kernel
dnn: add shared fastNorm kernel for mvn, instance norm and layer norm #24409 Relates https://github.com/opencv/opencv/pull/24378#issuecomment-1756906570 TODO: - [x] add fastNorm - [x] refactor layer norm with fastNorm - [x] refactor mvn with fastNorm - [ ] add onnx mvn in importer (in a new PR?) - [ ] refactor instance norm with fastNorm (in another PR https://github.com/opencv/opencv/pull/24378, need to merge this one first though) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMakepull/24477/head
parent
e202116b56
commit
c91af16fa7
7 changed files with 220 additions and 166 deletions
@ -0,0 +1,160 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../../precomp.hpp" |
||||
#include "fast_norm.hpp" |
||||
|
||||
namespace cv { namespace dnn { |
||||
|
||||
void fastNorm(const Mat &input, Mat &output, float epsilon, size_t normalized_axis, bool normalize_variance) { |
||||
const auto input_shape = shape(input); |
||||
CV_CheckLT(normalized_axis, input_shape.size(), "fastNorm: axis out of range"); |
||||
|
||||
size_t loops = static_cast<size_t>(total(input_shape, 0, static_cast<int>(normalized_axis))), |
||||
norm_size = static_cast<size_t>(total(input_shape, static_cast<int>(normalized_axis))); |
||||
float inv_norm_size = 1.0 / norm_size; |
||||
|
||||
auto fn = [&](const Range &r) { |
||||
const auto *input_data = input.ptr<const float>(); |
||||
auto *output_data = output.ptr<float>(); |
||||
for (int i = r.start; i < r.end; i++) { |
||||
const auto *x = input_data + norm_size * i; |
||||
auto *y = output_data + norm_size * i; |
||||
|
||||
float mean = 0.f, mean_square = 0.f; |
||||
for (int j = 0; j < norm_size; j++) { |
||||
float v = x[j]; |
||||
mean += v; |
||||
mean_square += v * v; |
||||
} |
||||
|
||||
mean *= inv_norm_size; |
||||
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon); |
||||
float inv_stdev = normalize_variance ? 1.f / mean_square : 1.f; |
||||
|
||||
for (size_t j = 0; j < norm_size; j++) { |
||||
y[j] = (x[j] - mean) * inv_stdev; |
||||
} |
||||
} |
||||
}; |
||||
double nstripes = loops * norm_size * (1 / 1024.0); |
||||
parallel_for_(Range(0, loops), fn, nstripes); |
||||
} |
||||
|
||||
void fastNorm(const Mat &input, const Mat &scale, Mat &output, float epsilon, size_t normalized_axis) { |
||||
const auto input_shape = shape(input); |
||||
CV_CheckLT(normalized_axis, input_shape.size(), "fastNorm: axis out of range"); |
||||
|
||||
size_t loops = static_cast<size_t>(total(input_shape, 0, static_cast<int>(normalized_axis))), |
||||
norm_size = static_cast<size_t>(total(input_shape, static_cast<int>(normalized_axis))); |
||||
float inv_norm_size = 1.0 / norm_size; |
||||
|
||||
auto fn = [&](const Range &r) { |
||||
const auto *input_data = input.ptr<const float>(); |
||||
const auto *scale_data = scale.ptr<const float>(); |
||||
auto *output_data = output.ptr<float>(); |
||||
for (int i = r.start; i < r.end; i++) { |
||||
const auto *x = input_data + norm_size * i; |
||||
auto *y = output_data + norm_size * i; |
||||
|
||||
float mean = 0.f, mean_square = 0.f; |
||||
for (int j = 0; j < norm_size; j++) { |
||||
float v = x[j]; |
||||
mean += v; |
||||
mean_square += v * v; |
||||
} |
||||
|
||||
mean *= inv_norm_size; |
||||
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon); |
||||
float inv_stdev = 1.f / mean_square; |
||||
|
||||
for (size_t j = 0; j < norm_size; j++) { |
||||
y[j] = scale_data[j] * (x[j] - mean) * inv_stdev; |
||||
} |
||||
} |
||||
}; |
||||
double nstripes = loops * norm_size * (1 / 1024.0); |
||||
parallel_for_(Range(0, loops), fn, nstripes); |
||||
} |
||||
|
||||
void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t normalized_axis) { |
||||
const auto input_shape = shape(input); |
||||
CV_CheckLT(normalized_axis, input_shape.size(), "fastNorm: axis out of range"); |
||||
CV_CheckEQ(scale.total(), bias.total(), "fastNorm: scale and bias should have the same shape"); |
||||
|
||||
size_t loops = static_cast<size_t>(total(input_shape, 0, static_cast<int>(normalized_axis))), |
||||
norm_size = static_cast<size_t>(total(input_shape, static_cast<int>(normalized_axis))); |
||||
float inv_norm_size = 1.0 / norm_size; |
||||
|
||||
auto fn = [&](const Range &r) { |
||||
const auto *input_data = input.ptr<const float>(); |
||||
const auto *scale_data = scale.ptr<const float>(); |
||||
const auto *bias_data = bias.ptr<const float>(); |
||||
auto *output_data = output.ptr<float>(); |
||||
for (int i = r.start; i < r.end; i++) { |
||||
const auto *x = input_data + norm_size * i; |
||||
auto *y = output_data + norm_size * i; |
||||
|
||||
float mean = 0.f, mean_square = 0.f; |
||||
for (int j = 0; j < norm_size; j++) { |
||||
float v = x[j]; |
||||
mean += v; |
||||
mean_square += v * v; |
||||
} |
||||
|
||||
mean *= inv_norm_size; |
||||
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon); |
||||
float inv_stdev = 1.f / mean_square; |
||||
|
||||
for (size_t j = 0; j < norm_size; j++) { |
||||
y[j] = scale_data[j] * (x[j] - mean) * inv_stdev + bias_data[j]; |
||||
} |
||||
} |
||||
}; |
||||
double nstripes = loops * norm_size * (1 / 1024.0); |
||||
parallel_for_(Range(0, loops), fn, nstripes); |
||||
} |
||||
|
||||
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon) { |
||||
const auto input_shape = shape(input); |
||||
CV_CheckEQ(scale.total(), bias.total(), "fastNormChannel: scale and bias should have the same shape"); |
||||
CV_CheckGE(input.dims, 3, "fastNormChannel: input dimension >= 3"); |
||||
|
||||
size_t N = input_shape[0], C = input_shape[1]; |
||||
size_t loops = N * C, |
||||
norm_size = static_cast<size_t>(total(input_shape, 2)); |
||||
float inv_norm_size = 1.0 / norm_size; |
||||
|
||||
auto fn = [&](const Range &r) { |
||||
const auto *input_data = input.ptr<const float>(); |
||||
const auto *scale_data = scale.ptr<const float>(); |
||||
const auto *bias_data = bias.ptr<const float>(); |
||||
auto *output_data = output.ptr<float>(); |
||||
for (int i = r.start; i < r.end; i++) { |
||||
const auto *x = input_data + norm_size * i; |
||||
auto *y = output_data + norm_size * i; |
||||
|
||||
float mean = 0.f, mean_square = 0.f; |
||||
for (int j = 0; j < norm_size; j++) { |
||||
float v = x[j]; |
||||
mean += v; |
||||
mean_square += v * v; |
||||
} |
||||
|
||||
mean *= inv_norm_size; |
||||
mean_square = std::sqrt(std::max(0.f, mean_square * inv_norm_size - mean * mean) + epsilon); |
||||
float inv_stdev = 1.f / mean_square; |
||||
|
||||
size_t c = i % C; |
||||
float s = scale_data[c], b = bias_data[c]; |
||||
for (size_t j = 0; j < norm_size; j++) { |
||||
y[j] = s * (x[j] - mean) * inv_stdev + b; |
||||
} |
||||
} |
||||
}; |
||||
double nstripes = loops * norm_size * (1 / 1024.0); |
||||
parallel_for_(Range(0, loops), fn, nstripes); |
||||
} |
||||
|
||||
}} // cv::dnn
|
@ -0,0 +1,26 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_FAST_NORM_HPP |
||||
#define OPENCV_DNN_FAST_NORM_HPP |
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp> |
||||
|
||||
namespace cv { namespace dnn { |
||||
|
||||
// Normalization speedup by multi-threading, mainly for Caffe MVN layer which has normalize_variance parameter.
|
||||
void fastNorm(const Mat &input, Mat &output, float epsilon, size_t normalized_axis = 0, bool normalize_variance = true); |
||||
|
||||
// Normalization speedup by multi-threading with absent bias. Mainly for LayerNormalization.
|
||||
void fastNorm(const Mat &input, const Mat &scale, Mat &output, float epsilon, size_t normalized_axis = 0); |
||||
|
||||
// Normalization speedup by multi-threading with scale and bias. Mainly for LayerNormalization.
|
||||
void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon, size_t normalized_axis = 0); |
||||
|
||||
// Channel-wise Normalization speedup by multi-threading. Scale and bias should have the same shape (C). Input should have dimension >= 3.
|
||||
void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &output, float epsilon); |
||||
|
||||
}} // cv::dnn
|
||||
|
||||
#endif // OPENCV_DNN_FAST_NORM_HPP
|
Loading…
Reference in new issue