|
|
|
@ -18,14 +18,12 @@ |
|
|
|
|
|
|
|
|
|
namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class ReLUOp final : public CUDABackendNode { |
|
|
|
|
public: |
|
|
|
|
template <template<class> class Op, class T> |
|
|
|
|
struct BaseOp : public CUDABackendNode |
|
|
|
|
{ |
|
|
|
|
protected: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
ReLUOp(csl::Stream stream_, T slope_) |
|
|
|
|
: stream(std::move(stream_)), slope{ slope_ } { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
@ -39,9 +37,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::relu<T>(stream, output, input, slope); |
|
|
|
|
static_cast<const Op<T>*>(this)->calculate(output, input); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class ReLUOp final : public BaseOp<ReLUOp, T> { |
|
|
|
|
public: |
|
|
|
|
ReLUOp(csl::Stream stream_, T slope_) |
|
|
|
|
: stream(std::move(stream_)), slope{ slope_ } { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::relu<T>(stream, output, input, slope); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
@ -49,28 +59,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class ClippedReLUOp final : public CUDABackendNode { |
|
|
|
|
class ClippedReLUOp final : public BaseOp<ClippedReLUOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
ClippedReLUOp(csl::Stream stream_, T min_, T max_) |
|
|
|
|
: stream(std::move(stream_)), min{ min_ }, max{ max_ } { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::clipped_relu<T>(stream, output, input, min, max); |
|
|
|
|
} |
|
|
|
|
kernels::clipped_relu<T>(stream, output, input, min, max); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -79,35 +75,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class ChannelwiseReLUOp final : public CUDABackendNode { |
|
|
|
|
class ChannelwiseReLUOp final : public BaseOp<ChannelwiseReLUOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
ChannelwiseReLUOp(csl::Stream stream_, const Mat& slope) |
|
|
|
|
: stream(std::move(stream_)) |
|
|
|
|
: stream(std::move(stream_)) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(!slope.empty()); |
|
|
|
|
slopeTensor = csl::makeTensorHeader<T>(slope); |
|
|
|
|
csl::copyMatToTensor<T>(slope, slopeTensor, stream); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
CV_Assert(input.get_axis_size(1) == slopeTensor.size()); |
|
|
|
|
std::size_t inner_size = input.size_range(2, input.rank()); |
|
|
|
|
kernels::axiswise_relu<T>(stream, output, input, inner_size, slopeTensor); |
|
|
|
|
} |
|
|
|
|
CV_Assert(input.get_axis_size(1) == slopeTensor.size()); |
|
|
|
|
std::size_t inner_size = input.size_range(2, input.rank()); |
|
|
|
|
kernels::axiswise_relu<T>(stream, output, input, inner_size, slopeTensor); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -116,27 +98,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class TanHOp final : public CUDABackendNode { |
|
|
|
|
class TanHOp final : public BaseOp<TanHOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
TanHOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::tanh<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
kernels::tanh<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -144,27 +112,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class SwishOp final : public CUDABackendNode { |
|
|
|
|
class SwishOp final : public BaseOp<SwishOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
SwishOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::swish<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
kernels::swish<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -172,27 +126,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class MishOp final : public CUDABackendNode { |
|
|
|
|
class MishOp final : public BaseOp<MishOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
MishOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::mish<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
kernels::mish<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -200,27 +140,13 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class SigmoidOp final : public CUDABackendNode { |
|
|
|
|
class SigmoidOp final : public BaseOp<SigmoidOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
SigmoidOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::sigmoid<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
kernels::sigmoid<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -228,27 +154,27 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class ELUOp final : public CUDABackendNode { |
|
|
|
|
class ELUOp final : public BaseOp<ELUOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
ELUOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
kernels::elu<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
kernels::elu<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
template <class T> |
|
|
|
|
class AbsValOp final : public BaseOp<AbsValOp, T> { |
|
|
|
|
public: |
|
|
|
|
AbsValOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::abs<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -256,27 +182,41 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class AbsValOp final : public CUDABackendNode { |
|
|
|
|
class BNLLOp final : public BaseOp<BNLLOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
BNLLOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
AbsValOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::bnll<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class CeilOp final : public BaseOp<CeilOp, T> { |
|
|
|
|
public: |
|
|
|
|
CeilOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
kernels::ceil<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
kernels::abs<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
template <class T> |
|
|
|
|
class FloorOp final : public BaseOp<FloorOp, T> { |
|
|
|
|
public: |
|
|
|
|
FloorOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::floor<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -284,27 +224,41 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class BNLLOp final : public CUDABackendNode { |
|
|
|
|
class LogOp final : public BaseOp<LogOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
LogOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
BNLLOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::log<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class RoundOp final : public BaseOp<RoundOp, T> { |
|
|
|
|
public: |
|
|
|
|
RoundOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
kernels::rint<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
kernels::bnll<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
template <class T> |
|
|
|
|
class SqrtOp final : public BaseOp<SqrtOp, T> { |
|
|
|
|
public: |
|
|
|
|
SqrtOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::sqrt<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -312,28 +266,28 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class PowerOp final : public CUDABackendNode { |
|
|
|
|
class NotOp final : public BaseOp<NotOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
NotOp(csl::Stream stream_) : stream(std::move(stream_)) { } |
|
|
|
|
|
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
kernels::not_k<T>(stream, output, input); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
csl::Stream stream; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class PowerOp final : public BaseOp<PowerOp, T> { |
|
|
|
|
public: |
|
|
|
|
PowerOp(csl::Stream stream_, T exp_, T scale_, T shift_) |
|
|
|
|
: stream(std::move(stream_)), exp{ exp_ }, scale{ scale_ }, shift{ shift_ } { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::power<T>(stream, output, input, exp, scale, shift); |
|
|
|
|
} |
|
|
|
|
kernels::power<T>(stream, output, input, exp, scale, shift); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
@ -342,28 +296,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <class T> |
|
|
|
|
class ExpOp final : public CUDABackendNode { |
|
|
|
|
class ExpOp final : public BaseOp<ExpOp, T> { |
|
|
|
|
public: |
|
|
|
|
using wrapper_type = GetCUDABackendWrapperType<T>; |
|
|
|
|
|
|
|
|
|
ExpOp(csl::Stream stream_, T nScale_, T nShift_) |
|
|
|
|
: stream(std::move(stream_)), normScale{ nScale_ }, normShift{ nShift_ } { } |
|
|
|
|
|
|
|
|
|
void forward( |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& inputs, |
|
|
|
|
const std::vector<cv::Ptr<BackendWrapper>>& outputs, |
|
|
|
|
csl::Workspace& workspace) override |
|
|
|
|
void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
auto input_wrapper = inputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto input = input_wrapper->getView(); |
|
|
|
|
|
|
|
|
|
auto output_wrapper = outputs[i].dynamicCast<wrapper_type>(); |
|
|
|
|
auto output = output_wrapper->getSpan(); |
|
|
|
|
|
|
|
|
|
kernels::exp<T>(stream, output, input, normScale, normShift); |
|
|
|
|
} |
|
|
|
|
kernels::exp<T>(stream, output, input, normScale, normShift); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|