|
|
|
@ -64,6 +64,7 @@ public: |
|
|
|
|
MAX = 2, |
|
|
|
|
} op; |
|
|
|
|
std::vector<float> coeffs; |
|
|
|
|
bool variableChannels; |
|
|
|
|
|
|
|
|
|
EltwiseLayerImpl(const LayerParams& params) |
|
|
|
|
{ |
|
|
|
@ -98,7 +99,7 @@ public: |
|
|
|
|
{ |
|
|
|
|
return backendId == DNN_BACKEND_OPENCV || |
|
|
|
|
backendId == DNN_BACKEND_HALIDE || |
|
|
|
|
(backendId == DNN_BACKEND_INFERENCE_ENGINE && |
|
|
|
|
(backendId == DNN_BACKEND_INFERENCE_ENGINE && !variableChannels && |
|
|
|
|
(preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -108,33 +109,57 @@ public: |
|
|
|
|
std::vector<MatShape> &internals) const CV_OVERRIDE |
|
|
|
|
{ |
|
|
|
|
CV_Assert(inputs.size() >= 2); |
|
|
|
|
CV_Assert(inputs[0].size() >= 2); |
|
|
|
|
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); |
|
|
|
|
CV_Assert(op == SUM || coeffs.size() == 0); |
|
|
|
|
|
|
|
|
|
int dims = inputs[0].size(); |
|
|
|
|
int numChannels = inputs[0][1]; |
|
|
|
|
for (int i = 1; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(inputs[0] == inputs[i]); |
|
|
|
|
CV_Assert(inputs[0][0] == inputs[i][0]); |
|
|
|
|
numChannels = std::max(numChannels, inputs[i][1]); |
|
|
|
|
|
|
|
|
|
// It's allowed for channels axis to be different.
|
|
|
|
|
for (int j = 2; j < dims; j++) |
|
|
|
|
CV_Assert(inputs[0][j] == inputs[i][j]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
outputs.assign(1, inputs[0]); |
|
|
|
|
|
|
|
|
|
outputs[0][1] = numChannels; |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE |
|
|
|
|
{ |
|
|
|
|
std::vector<Mat> inputs; |
|
|
|
|
inputs_arr.getMatVector(inputs); |
|
|
|
|
variableChannels = false; |
|
|
|
|
for (int i = 1; i < inputs.size(); ++i) |
|
|
|
|
{ |
|
|
|
|
if (inputs[i].size[1] != inputs[0].size[1]) |
|
|
|
|
{ |
|
|
|
|
variableChannels = true; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EltwiseInvoker : public ParallelLoopBody |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
const Mat* srcs; |
|
|
|
|
std::vector<const Mat*> srcs; |
|
|
|
|
int nsrcs; |
|
|
|
|
Mat* dst; |
|
|
|
|
const std::vector<float>* coeffs; |
|
|
|
|
std::vector<float> coeffs; |
|
|
|
|
EltwiseOp op; |
|
|
|
|
int nstripes; |
|
|
|
|
const ActivationLayer* activ; |
|
|
|
|
int channels; |
|
|
|
|
size_t planeSize; |
|
|
|
|
|
|
|
|
|
EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} |
|
|
|
|
EltwiseInvoker() : nsrcs(0), dst(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} |
|
|
|
|
|
|
|
|
|
static void run(const Mat* srcs, int nsrcs, Mat& dst, |
|
|
|
|
const std::vector<float>& coeffs, EltwiseOp op, |
|
|
|
@ -143,15 +168,23 @@ public: |
|
|
|
|
CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous()); |
|
|
|
|
CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs); |
|
|
|
|
|
|
|
|
|
EltwiseInvoker p; |
|
|
|
|
p.srcs.resize(nsrcs); |
|
|
|
|
p.coeffs = coeffs; |
|
|
|
|
for( int i = 0; i < nsrcs; i++ ) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(srcs[i].size == dst.size && |
|
|
|
|
srcs[i].type() == dst.type() && |
|
|
|
|
p.srcs[i] = srcs + i; |
|
|
|
|
CV_Assert(srcs[i].type() == dst.type() && |
|
|
|
|
srcs[i].isContinuous()); |
|
|
|
|
// Sort srcs and coefficients in the order by number of channels
|
|
|
|
|
for( int j = i - 1; j >= 1 && p.srcs[j - 1]->size[1] < p.srcs[j]->size[1]; j++ ) |
|
|
|
|
{ |
|
|
|
|
std::swap(p.srcs[j - 1], p.srcs[j]); |
|
|
|
|
if (!p.coeffs.empty()) |
|
|
|
|
std::swap(p.coeffs[j - 1], p.coeffs[j]); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
EltwiseInvoker p; |
|
|
|
|
p.srcs = srcs; |
|
|
|
|
p.nsrcs = nsrcs; |
|
|
|
|
p.dst = &dst; |
|
|
|
|
p.op = op; |
|
|
|
@ -173,7 +206,8 @@ public: |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
p.coeffs = simpleCoeffs ? 0 : &coeffs; |
|
|
|
|
if (simpleCoeffs) |
|
|
|
|
p.coeffs.clear(); |
|
|
|
|
p.activ = activ; |
|
|
|
|
|
|
|
|
|
parallel_for_(Range(0, nstripes), p, nstripes); |
|
|
|
@ -185,8 +219,8 @@ public: |
|
|
|
|
size_t stripeSize = (total + nstripes - 1)/nstripes; |
|
|
|
|
size_t stripeStart = r.start*stripeSize; |
|
|
|
|
size_t stripeEnd = std::min(r.end*stripeSize, total); |
|
|
|
|
int c, j, k, n = nsrcs; |
|
|
|
|
const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0; |
|
|
|
|
int c, j, k, n; |
|
|
|
|
const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0; |
|
|
|
|
float* dstptr0 = dst->ptr<float>(); |
|
|
|
|
int blockSize0 = 1 << 12, blockSize; |
|
|
|
|
|
|
|
|
@ -201,14 +235,35 @@ public: |
|
|
|
|
for( c = 0; c < channels; c++ ) |
|
|
|
|
{ |
|
|
|
|
size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize; |
|
|
|
|
const float* srcptr0 = srcs[0].ptr<float>() + globalDelta; |
|
|
|
|
const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta; |
|
|
|
|
float* dstptr = dstptr0 + globalDelta; |
|
|
|
|
|
|
|
|
|
if( op == PROD ) |
|
|
|
|
// This code assumes that srcs are sorted in descending order by channels.
|
|
|
|
|
for (n = 1; n < nsrcs && c < srcs[n]->size[1]; ++n) {} |
|
|
|
|
|
|
|
|
|
if (n == 1) |
|
|
|
|
{ |
|
|
|
|
if( !coeffsptr ) |
|
|
|
|
{ |
|
|
|
|
for( j = 0; j < blockSize; j++ ) |
|
|
|
|
{ |
|
|
|
|
dstptr[j] = srcptr0[j]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
float c0 = coeffsptr[0]; |
|
|
|
|
for( j = 0; j < blockSize; j++ ) |
|
|
|
|
{ |
|
|
|
|
dstptr[j] = c0*srcptr0[j]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if( op == PROD ) |
|
|
|
|
{ |
|
|
|
|
for( k = 1; k < n; k++ ) |
|
|
|
|
{ |
|
|
|
|
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; |
|
|
|
|
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; |
|
|
|
|
for( j = 0; j < blockSize; j++ ) |
|
|
|
|
{ |
|
|
|
|
dstptr[j] = srcptr0[j]*srcptr1[j]; |
|
|
|
@ -220,7 +275,7 @@ public: |
|
|
|
|
{ |
|
|
|
|
for( k = 1; k < n; k++ ) |
|
|
|
|
{ |
|
|
|
|
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; |
|
|
|
|
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; |
|
|
|
|
for( j = 0; j < blockSize; j++ ) |
|
|
|
|
{ |
|
|
|
|
dstptr[j] = std::max(srcptr0[j], srcptr1[j]); |
|
|
|
@ -232,7 +287,7 @@ public: |
|
|
|
|
{ |
|
|
|
|
for( k = 1; k < n; k++ ) |
|
|
|
|
{ |
|
|
|
|
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; |
|
|
|
|
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; |
|
|
|
|
for( j = 0; j < blockSize; j++ ) |
|
|
|
|
{ |
|
|
|
|
dstptr[j] = srcptr0[j] + srcptr1[j]; |
|
|
|
@ -245,7 +300,7 @@ public: |
|
|
|
|
float c0 = coeffsptr[0]; |
|
|
|
|
for( k = 1; k < n; k++ ) |
|
|
|
|
{ |
|
|
|
|
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; |
|
|
|
|
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta; |
|
|
|
|
float c1 = coeffsptr[k]; |
|
|
|
|
for( j = 0; j < blockSize; j++ ) |
|
|
|
|
{ |
|
|
|
@ -272,7 +327,7 @@ public: |
|
|
|
|
std::vector<UMat> inputs; |
|
|
|
|
std::vector<UMat> outputs; |
|
|
|
|
|
|
|
|
|
if (inputs_.depth() == CV_16S && op != SUM) |
|
|
|
|
if ((inputs_.depth() == CV_16S && op != SUM) || variableChannels) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
inputs_.getUMatVector(inputs); |
|
|
|
|