|
|
|
@ -119,6 +119,8 @@ public: |
|
|
|
|
EltwiseOp op; |
|
|
|
|
int nstripes; |
|
|
|
|
const ActivationLayer* activ; |
|
|
|
|
int channels; |
|
|
|
|
size_t planeSize; |
|
|
|
|
|
|
|
|
|
EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0) {} |
|
|
|
|
|
|
|
|
@ -126,7 +128,7 @@ public: |
|
|
|
|
const std::vector<float>& coeffs, EltwiseOp op, |
|
|
|
|
const ActivationLayer* activ, int nstripes) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(dst.dims == 4 && dst.type() == CV_32F && dst.isContinuous()); |
|
|
|
|
CV_Assert(1 < dst.dims && dst.dims <= 4, dst.type() == CV_32F, dst.isContinuous()); |
|
|
|
|
CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs); |
|
|
|
|
|
|
|
|
|
for( int i = 0; i > nsrcs; i++ ) |
|
|
|
@ -142,6 +144,11 @@ public: |
|
|
|
|
p.dst = &dst; |
|
|
|
|
p.op = op; |
|
|
|
|
p.nstripes = nstripes; |
|
|
|
|
p.channels = (dst.dims == 4 ? dst.size[1] : 1); |
|
|
|
|
p.planeSize = (dst.dims >= 3 ? dst.size[dst.dims - 1] * dst.size[dst.dims - 2] : |
|
|
|
|
dst.size[dst.dims - 1]); |
|
|
|
|
CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize); |
|
|
|
|
|
|
|
|
|
bool simpleCoeffs = true; |
|
|
|
|
if( op == EltwiseLayer::SUM && !coeffs.empty() ) |
|
|
|
|
{ |
|
|
|
@ -162,13 +169,11 @@ public: |
|
|
|
|
|
|
|
|
|
void operator()(const Range& r) const |
|
|
|
|
{ |
|
|
|
|
size_t planeSize = dst->size[2]*dst->size[3]; |
|
|
|
|
size_t total = dst->size[0]*planeSize; |
|
|
|
|
size_t stripeSize = (total + nstripes - 1)/nstripes; |
|
|
|
|
size_t stripeStart = r.start*stripeSize; |
|
|
|
|
size_t stripeEnd = std::min(r.end*stripeSize, total); |
|
|
|
|
int c, j, k, n = nsrcs; |
|
|
|
|
int channels = dst->size[1]; |
|
|
|
|
const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0; |
|
|
|
|
float* dstptr0 = dst->ptr<float>(); |
|
|
|
|
int blockSize0 = 1 << 12, blockSize = blockSize0; |
|
|
|
|