Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
11 KiB
328 lines
11 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
|
|
#include "../precomp.hpp" |
|
#include "layers_common.hpp" |
|
#include "../op_timvx.hpp" |
|
|
|
#include <opencv2/dnn/shape_utils.hpp> |
|
#include <iostream> |
|
|
|
namespace cv |
|
{ |
|
namespace dnn |
|
{ |
|
|
|
class ActivationLayerInt8Impl CV_FINAL : public ActivationLayerInt8 |
|
{ |
|
public: |
|
int input_zp, output_zp; |
|
float input_sc, output_sc; |
|
float slope = 0.0f; |
|
|
|
#ifdef HAVE_TIMVX |
|
tvActivationType tvActType; |
|
#endif |
|
ActivationLayerInt8Impl(const LayerParams ¶ms) |
|
{ |
|
setParamsFrom(params); |
|
activationLUT = !blobs.empty() ? blobs[0] : Mat(); |
|
|
|
input_zp = params.get<int>("input_zeropoint"); |
|
input_sc = params.get<float>("input_scale"); |
|
output_zp = params.get<int>("zeropoints"); |
|
output_sc = params.get<float>("scales"); |
|
|
|
if (params.has("slope")) |
|
{ |
|
slope = params.get<float>("slope"); |
|
} |
|
|
|
#ifdef HAVE_TIMVX |
|
tvActType = getTimVXActType(type); |
|
#endif |
|
|
|
} |
|
|
|
virtual bool supportBackend(int backendId) CV_OVERRIDE |
|
{ |
|
#ifdef HAVE_TIMVX |
|
if (backendId == DNN_BACKEND_TIMVX) |
|
{ |
|
// TODO!: Leaky ReLU will be supported in future. |
|
if (tvActType == tvActReLU && slope != 0.f) |
|
return false; |
|
return tvActType != tvActNotSupported; |
|
} |
|
#endif |
|
return backendId == DNN_BACKEND_OPENCV; |
|
} |
|
|
|
bool getMemoryShapes(const std::vector<MatShape> &inputs, |
|
const int requiredOutputs, |
|
std::vector<MatShape> &outputs, |
|
std::vector<MatShape> &internals) const CV_OVERRIDE |
|
{ |
|
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); |
|
return true; |
|
} |
|
|
|
class Activation : public cv::ParallelLoopBody |
|
{ |
|
public: |
|
const Mat* src; |
|
const Mat* lut; |
|
Mat* dst; |
|
int nstripes; |
|
|
|
Activation() : src(0), lut(0), dst(0), nstripes(0){} |
|
|
|
static void run(const Mat& src, const Mat& lut, Mat& dst, int nstripes) |
|
{ |
|
Activation p; |
|
|
|
p.src = &src; |
|
p.lut = &lut; |
|
p.dst = &dst; |
|
p.nstripes = nstripes; |
|
|
|
parallel_for_(Range(0, nstripes), p, nstripes); |
|
} |
|
|
|
void operator()(const Range &r) const CV_OVERRIDE |
|
{ |
|
const int8_t* table = lut->ptr<int8_t>(); |
|
int nsamples = 1, outCn = 1; |
|
size_t planeSize = 1; |
|
|
|
if (src->dims > 1) |
|
{ |
|
nsamples = src->size[0]; |
|
outCn = src->size[1]; |
|
} |
|
else |
|
outCn = src->size[0]; |
|
|
|
for (int i = 2; i < src->dims; ++i) |
|
planeSize *= src->size[i]; |
|
|
|
size_t stripeSize = (planeSize + nstripes - 1)/nstripes; |
|
size_t stripeStart = r.start*stripeSize; |
|
size_t stripeEnd = std::min(r.end*stripeSize, planeSize); |
|
int len = (int)(stripeEnd - stripeStart); |
|
|
|
for( int i = 0; i < nsamples; i++ ) |
|
{ |
|
const int8_t* srcptr = src->ptr<int8_t>(i) + stripeStart; |
|
int8_t* dstptr = dst->ptr<int8_t>(i) + stripeStart; |
|
for( int cn = 0; cn < outCn; cn++, srcptr += planeSize, dstptr += planeSize ) |
|
{ |
|
int i = 0; |
|
#if CV_SIMD128 |
|
for( ; i <= len - 16; i += 16 ) |
|
{ |
|
v_int8x16 out(table[srcptr[i] + 128], table[srcptr[i+1] + 128], table[srcptr[i+2] + 128], table[srcptr[i+3] + 128], |
|
table[srcptr[i+4] + 128], table[srcptr[i+5] + 128], table[srcptr[i+6] + 128], table[srcptr[i+7] + 128], |
|
table[srcptr[i+8] + 128], table[srcptr[i+9] + 128], table[srcptr[i+10] + 128], table[srcptr[i+11] + 128], |
|
table[srcptr[i+12] + 128], table[srcptr[i+13] + 128], table[srcptr[i+14] + 128], table[srcptr[i+15] + 128]); |
|
v_store(dstptr + i, out); |
|
} |
|
#endif |
|
for( ; i < len; i++ ) |
|
{ |
|
dstptr[i] = table[srcptr[i] + 128]; |
|
} |
|
} |
|
} |
|
} |
|
}; |
|
|
|
virtual Ptr<BackendNode> initTimVX(void* timVXInfo_, |
|
const std::vector<Ptr<BackendWrapper> > &inputsWrapper, |
|
const std::vector<Ptr<BackendWrapper> > &outputsWrapper, |
|
bool isLast) CV_OVERRIDE |
|
{ |
|
#ifdef HAVE_TIMVX |
|
// tvGraph Initialization. |
|
auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_); |
|
CV_Assert(timVxInfo); |
|
Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph(); |
|
CV_Assert(tvGraph); |
|
Ptr<tim::vx::Graph> graph = tvGraph->graph; |
|
|
|
std::vector<int> inputsIndex, outputsIndex; |
|
int input_index, output_index; |
|
CV_Assert(inputsWrapper.size() == 1); |
|
|
|
// input Tensor |
|
Ptr<TimVXBackendWrapper> inputWrapper = inputsWrapper[0].dynamicCast<TimVXBackendWrapper>(); |
|
|
|
if (inputWrapper->isTensor()) |
|
{ |
|
input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); |
|
if(input_index == -1) |
|
{ |
|
// Copy To New inputWrapper |
|
Mat tmp = inputWrapper->getMat(); |
|
inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp)); |
|
} |
|
} |
|
|
|
if (!inputWrapper->isTensor()) |
|
{ |
|
Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>( |
|
new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); |
|
inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant); |
|
input_index = tvGraph->addWrapper(inputWrapper); |
|
} |
|
|
|
inputsIndex.push_back(input_index); |
|
|
|
// output tensor |
|
CV_Assert(outputsWrapper.size() == 1); |
|
Ptr<TimVXBackendWrapper> outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>(); |
|
Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>( |
|
new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); |
|
|
|
Ptr<tim::vx::Tensor> outputTensor; |
|
|
|
if (isLast) |
|
{ |
|
auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); |
|
|
|
// For Graph Output tensor, we need to set tensor shape before createTensor(). |
|
outputWrapper->setTensorShape(shapeType); |
|
outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); |
|
} |
|
else |
|
{ |
|
outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); |
|
} |
|
output_index = tvGraph->addWrapper(outputWrapper); |
|
outputsIndex.push_back(output_index); |
|
|
|
std::shared_ptr<tim::vx::Operation> tvAct; |
|
|
|
switch(tvActType) { |
|
case tvActReLU: |
|
{ |
|
if (slope != 0.f) |
|
tvAct = graph->CreateOperation<tim::vx::ops::LeakyRelu>(slope); |
|
else |
|
tvAct = graph->CreateOperation<tim::vx::ops::Relu>(); |
|
break; |
|
} |
|
case tvActReLU6: |
|
tvAct = graph->CreateOperation<tim::vx::ops::Relu6>(); |
|
break; |
|
case tvActTanH: |
|
tvAct = graph->CreateOperation<tim::vx::ops::Tanh>(); |
|
break; |
|
case tvActSwish: |
|
tvAct = graph->CreateOperation<tim::vx::ops::Swish>(); |
|
break; |
|
case tvActMish: |
|
tvAct = graph->CreateOperation<tim::vx::ops::Mish>(); |
|
break; |
|
case tvActSigmoid: |
|
tvAct = graph->CreateOperation<tim::vx::ops::Sigmoid>(); |
|
break; |
|
case tvActELU: |
|
tvAct = graph->CreateOperation<tim::vx::ops::Elu>(); |
|
break; |
|
default: |
|
// TODO! check the default function. |
|
tvAct = graph->CreateOperation<tim::vx::ops::Relu>(); |
|
break; |
|
} |
|
|
|
Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvAct, inputsIndex, outputsIndex); |
|
|
|
return tvBackendNode; |
|
#endif // HAVE_TIMVX |
|
return Ptr<BackendNode>(); |
|
} |
|
|
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE |
|
{ |
|
CV_TRACE_FUNCTION(); |
|
|
|
std::vector<Mat> inputs, outputs; |
|
inputs_arr.getMatVector(inputs); |
|
outputs_arr.getMatVector(outputs); |
|
|
|
for (size_t i = 0; i < inputs.size(); i++) |
|
{ |
|
const Mat &src = inputs[i]; |
|
if (!activationLUT.empty()) |
|
{ |
|
const int nstripes = getNumThreads(); |
|
Mat &dst = outputs[i]; |
|
CV_Assert(src.size == dst.size && src.type() == dst.type() && |
|
src.isContinuous() && dst.isContinuous() && src.type() == CV_8S); |
|
|
|
Activation::run(src, activationLUT, dst, nstripes); |
|
} |
|
else |
|
{ |
|
src.copyTo(outputs[i]); |
|
} |
|
} |
|
} |
|
|
|
void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE |
|
{ |
|
for( int cn = cn0; cn < cn1; cn++, src += planeSize, dst += planeSize ) |
|
{ |
|
int i = 0; |
|
#if CV_SIMD128 |
|
for( ; i <= len - 16; i += 16 ) |
|
{ |
|
v_int8x16 out(lut[src[i] + 128], lut[src[i+1] + 128], lut[src[i+2] + 128], lut[src[i+3] + 128], |
|
lut[src[i+4] + 128], lut[src[i+5] + 128], lut[src[i+6] + 128], lut[src[i+7] + 128], |
|
lut[src[i+8] + 128], lut[src[i+9] + 128], lut[src[i+10] + 128], lut[src[i+11] + 128], |
|
lut[src[i+12] + 128], lut[src[i+13] + 128], lut[src[i+14] + 128], lut[src[i+15] + 128]); |
|
v_store(dst + i, out); |
|
} |
|
#endif |
|
for( ; i < len; i++ ) |
|
dst[i] = lut[src[i] + 128]; |
|
} |
|
} |
|
|
|
void forwardSlice(const int* src, const int* lut, int* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE |
|
{ |
|
for( int cn = cn0; cn < cn1; cn++, src += planeSize, dst += planeSize ) |
|
{ |
|
int i = 0; |
|
#if CV_SIMD128 |
|
for( ; i <= len - 16; i += 16 ) |
|
{ |
|
v_int32x4 out0(lut[src[i] + 128], lut[src[i+1] + 128], lut[src[i+2] + 128], lut[src[i+3] + 128]); |
|
v_int32x4 out1(lut[src[i+4] + 128], lut[src[i+5] + 128], lut[src[i+6] + 128], lut[src[i+7] + 128]); |
|
v_int32x4 out2(lut[src[i+8] + 128], lut[src[i+9] + 128], lut[src[i+10] + 128], lut[src[i+11] + 128]); |
|
v_int32x4 out3(lut[src[i+12] + 128], lut[src[i+13] + 128], lut[src[i+14] + 128], lut[src[i+15] + 128]); |
|
|
|
v_store(dst + i, out0); |
|
v_store(dst + i + 4, out1); |
|
v_store(dst + i + 8, out2); |
|
v_store(dst + i + 12, out3); |
|
} |
|
#endif |
|
for( ; i < len; i++ ) |
|
dst[i] = lut[src[i] + 128]; |
|
} |
|
|
|
} |
|
|
|
Mat activationLUT; |
|
}; |
|
|
|
Ptr<ActivationLayerInt8> ActivationLayerInt8::create(const LayerParams& params) |
|
{ |
|
return Ptr<ActivationLayerInt8>(new ActivationLayerInt8Impl(params)); |
|
} |
|
|
|
} |
|
}
|
|
|