From 23b244d3a336111c5205c1514b8ef263fac7d808 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Fri, 19 Jul 2024 21:03:19 +0800 Subject: [PATCH] Merge pull request #25881 from fengyuentau:dnn/cpu/optimize_activations_with_v_exp dnn: optimize activations with v_exp #25881 Merge with https://github.com/opencv/opencv_extra/pull/1191. This PR optimizes the following activations: - [x] Swish - [x] Mish - [x] Elu - [x] Celu - [x] Selu - [x] HardSwish ### Performance (Updated on 2024-07-18) #### AmLogic A311D2 (ARM Cortex A73 + A53) ``` Geometric mean (ms) Name of Test activations activations.patch activations.patch vs activations (x-factor) Celu::Layer_Elementwise::OCV/CPU 115.859 27.930 4.15 Elu::Layer_Elementwise::OCV/CPU 27.846 27.003 1.03 Gelu::Layer_Elementwise::OCV/CPU 0.657 0.602 1.09 HardSwish::Layer_Elementwise::OCV/CPU 31.885 6.781 4.70 Mish::Layer_Elementwise::OCV/CPU 35.729 32.089 1.11 Selu::Layer_Elementwise::OCV/CPU 61.955 27.850 2.22 Swish::Layer_Elementwise::OCV/CPU 30.819 26.688 1.15 ``` #### Apple M1 ``` Geometric mean (ms) Name of Test activations activations.patch activations.patch vs activations (x-factor) Celu::Layer_Elementwise::OCV/CPU 16.184 2.118 7.64 Celu::Layer_Elementwise::OCV/CPU_FP16 16.280 2.123 7.67 Elu::Layer_Elementwise::OCV/CPU 9.123 1.878 4.86 Elu::Layer_Elementwise::OCV/CPU_FP16 9.085 1.897 4.79 Gelu::Layer_Elementwise::OCV/CPU 0.089 0.081 1.11 Gelu::Layer_Elementwise::OCV/CPU_FP16 0.086 0.074 1.17 HardSwish::Layer_Elementwise::OCV/CPU 1.560 1.555 1.00 HardSwish::Layer_Elementwise::OCV/CPU_FP16 1.536 1.523 1.01 Mish::Layer_Elementwise::OCV/CPU 6.077 2.476 2.45 Mish::Layer_Elementwise::OCV/CPU_FP16 5.990 2.496 2.40 Selu::Layer_Elementwise::OCV/CPU 11.351 1.976 5.74 Selu::Layer_Elementwise::OCV/CPU_FP16 11.533 1.985 5.81 Swish::Layer_Elementwise::OCV/CPU 4.687 1.890 2.48 Swish::Layer_Elementwise::OCV/CPU_FP16 4.715 1.873 2.52 ``` #### Intel i7-12700K ``` Geometric mean (ms) Name of Test activations activations.patch activations.patch vs activations (x-factor) Celu::Layer_Elementwise::OCV/CPU 17.106 3.560 4.81 Elu::Layer_Elementwise::OCV/CPU 5.064 3.478 1.46 Gelu::Layer_Elementwise::OCV/CPU 0.036 0.035 1.04 HardSwish::Layer_Elementwise::OCV/CPU 2.914 2.893 1.01 Mish::Layer_Elementwise::OCV/CPU 3.820 3.529 1.08 Selu::Layer_Elementwise::OCV/CPU 10.799 3.593 3.01 Swish::Layer_Elementwise::OCV/CPU 3.651 3.473 1.05 ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/perf/perf_layer.cpp | 93 ++++--- modules/dnn/src/layers/elementwise_layers.cpp | 243 ++++++++++++++++-- modules/dnn/test/test_onnx_conformance.cpp | 8 + ...conformance_layer_filter__openvino.inl.hpp | 16 ++ ..._conformance_layer_parser_denylist.inl.hpp | 6 + 5 files changed, 306 insertions(+), 60 deletions(-) diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index ea1e70ae30..98adc56ffb 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -975,49 +975,72 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Softmax, Combine( /* withCann= */ false) // only test on CPU )); -using Layer_Elementwise = TestBaseWithParam, std::string, tuple>>; -PERF_TEST_P_(Layer_Elementwise, elementwise) { - std::vector input_shape = get<0>(GetParam()); - std::string op = get<1>(GetParam()); - int backend_id = get<0>(get<2>(GetParam())); - int target_id = get<1>(get<2>(GetParam())); +struct Layer_Elementwise : public TestBaseWithParam> { + void test_layer(const std::string &op_type, const std::vector &input_shape) { + int backend_id = get<0>(GetParam()); + int target_id = get<1>(GetParam()); - Mat input(input_shape, CV_32F); - randn(input, 0.f, 1.f); + Mat input(input_shape, CV_32F); + randu(input, -10.0f, 10.f); - LayerParams lp; - lp.type = op; - lp.name = "TestLayer"; + LayerParams lp; + lp.type = op_type; + lp.name = cv::format("PerfLayer/%s", op_type.c_str()); - Net net; - net.addLayerToPrev(lp.name, lp.type, lp); + Net net; + net.addLayerToPrev(lp.name, lp.type, lp); - // Warmup - { - net.setInput(input); - net.setPreferableBackend(backend_id); - net.setPreferableTarget(target_id); - Mat out = net.forward(); - } + // Warmup + { + net.setInput(input); + net.setPreferableBackend(backend_id); + net.setPreferableTarget(target_id); + net.forward(); + } - TEST_CYCLE() { - net.forward(); + TEST_CYCLE() { + net.forward(); + } + + SANITY_CHECK_NOTHING(); } - SANITY_CHECK_NOTHING(); + int N = 2; + int C = 32; + int H = 416; + int W = 416; +}; + +PERF_TEST_P_(Layer_Elementwise, Gelu) { + test_layer("Gelu", std::vector{1, 50, 3072}); +} +PERF_TEST_P_(Layer_Elementwise, Swish) { + test_layer("Swish", std::vector{N, C, H, W}); +} +PERF_TEST_P_(Layer_Elementwise, Mish) { + test_layer("Mish", std::vector{N, C, H, W}); +} +PERF_TEST_P_(Layer_Elementwise, Elu) { + test_layer("ELU", std::vector{N, C, H, W}); +} +PERF_TEST_P_(Layer_Elementwise, Celu) { + test_layer("Celu", std::vector{N, C, H, W}); +} +PERF_TEST_P_(Layer_Elementwise, Selu) { + test_layer("Selu", std::vector{N, C, H, W}); +} +PERF_TEST_P_(Layer_Elementwise, HardSwish) { + test_layer("HardSwish", std::vector{N, C, H, W}); } -INSTANTIATE_TEST_CASE_P(/**/, Layer_Elementwise, testing::Combine( - testing::Values(std::vector{1, 50, 3072}), - testing::Values(std::string{"Gelu"}), - dnnBackendsAndTargets(/* withInferenceEngine= */ true, - /* withHalide= */ false, - /* withCpuOCV= */ true, - /* withVkCom= */ false, - /* withCUDA= */ true, - /* withNgraph= */ true, - /* withWebnn= */ false, - /* withCann= */ false) // only test on CPU -)); +INSTANTIATE_TEST_CASE_P(/**/, Layer_Elementwise, + dnnBackendsAndTargets(/* withInferenceEngine= */ true, + /* withHalide= */ false, + /* withCpuOCV= */ true, + /* withVkCom= */ false, + /* withCUDA= */ true, + /* withNgraph= */ true, + /* withWebnn= */ false, + /* withCann= */ false)); } // namespace diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 477aad88be..6c06554d5f 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -859,12 +859,6 @@ struct GeluFunctor : public BaseFunctor { one = vx_setall_f32(1.0f), reciprocal_sqrt2 = vx_setall_f32(M_SQRT1_2); for (; i <= len - vlanes; i += vlanes) { - if (i + vlanes > len) { - if (i == 0 || i == len) { - break; - } - i = len - vlanes; - } v_float32 x0 = vx_load(srcptr + i); // t = x * M_SQRT1_2 @@ -1048,7 +1042,17 @@ const char* const TanHFunctor::BaseDefaultFunctor::ocl_kernel_name struct SwishFunctor : public BaseDefaultFunctor { - typedef SwishLayer Layer; + using Layer = SwishLayer; + + int vlanes; + + explicit SwishFunctor() { +#if (CV_SIMD || CV_SIMD_SCALABLE) + vlanes = VTraits::vlanes(); +#else + vlanes = 1; +#endif + } bool supportBackend(int backendId, int) { @@ -1064,6 +1068,32 @@ struct SwishFunctor : public BaseDefaultFunctor return x / (1.f + exp(-x)); } + void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const { + CV_UNUSED(stripeStart); + for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) { + int i = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + // x / (1.f + exp(-x)); + v_float32 one = vx_setall_f32(1.0f), + zero = vx_setzero_f32(); + for (; i <= len - vlanes; i += vlanes) { + v_float32 x = vx_load(srcptr + i); + + v_float32 t = v_sub(zero, x); + t = v_exp(t); + t = v_add(one, t); + t = v_div(x, t); + + vx_store(dstptr + i, t); + } +#endif + // In case SIMD is not available or len < vlanes + for (; i < len; i++) { + dstptr[i] = calculate(srcptr[i]); + } + } + } + #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { @@ -1116,9 +1146,27 @@ struct SwishFunctor : public BaseDefaultFunctor template<> const char* const SwishFunctor::BaseDefaultFunctor::ocl_kernel_name = "SwishForward"; +namespace { + constexpr float MISH_THRESHOLD = -36.73f; +} + +/* + This implementation is derived from + https://github.com/vpisarev/ficus/blob/3c9a8b78f49e17489c5e1fd6dd5dd487348c99c2/lib/NN/OpElemwise.fx#L110 +*/ struct MishFunctor : public BaseDefaultFunctor { - typedef MishLayer Layer; + using Layer = MishLayer; + + int vlanes; + + explicit MishFunctor() { +#if (CV_SIMD || CV_SIMD_SCALABLE) + vlanes = VTraits::vlanes(); +#else + vlanes = 1; +#endif + } bool supportBackend(int backendId, int) { @@ -1131,15 +1179,34 @@ struct MishFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - // Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200 - if (x >= 8.f) - { - return x; - } + float y = x > MISH_THRESHOLD ? std::exp(-x) : 1.f; + x *= x > MISH_THRESHOLD ? 1.f : 0.f; + return x * (1 + 2 * y) / (1 + 2 * y + 2 * y * y); + } - float eX = exp(x); - float n = (eX + 2.f) * eX; - return (x * n) / (n + 2.f); + void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const { + CV_UNUSED(stripeStart); + for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) { + int i = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 v_threshold = vx_setall_f32(MISH_THRESHOLD), one = vx_setall_f32(1.f), z = vx_setzero_f32(); + for (; i <= len - vlanes; i += vlanes) { + v_float32 x = vx_load(srcptr + i); + + x = v_select(v_le(x, v_threshold), z, x); + v_float32 y = v_exp(v_sub(z, x)); + v_float32 _2y = v_add(y, y), + _2ya1 = v_add(_2y, one); + x = v_div(v_mul(x, _2ya1), v_add(_2ya1, v_mul(_2y, y))); + + vx_store(dstptr + i, x); + } +#endif + // In case SIMD is not available or len < vlanes + for (; i < len; i++) { + dstptr[i] = calculate(srcptr[i]); + } + } } #ifdef HAVE_CUDA @@ -1270,10 +1337,18 @@ const char* const SigmoidFunctor::BaseDefaultFunctor::ocl_kernel struct ELUFunctor : public BaseDefaultFunctor { - typedef ELULayer Layer; + using Layer = ELULayer; + float alpha; + int vlanes; - explicit ELUFunctor(float alpha_ = 1.f) : alpha(alpha_) {} + explicit ELUFunctor(float alpha_ = 1.f) : alpha(alpha_) { +#if (CV_SIMD || CV_SIMD_SCALABLE) + vlanes = VTraits::vlanes(); +#else + vlanes = 1; +#endif + } bool supportBackend(int backendId, int) { @@ -1292,6 +1367,28 @@ struct ELUFunctor : public BaseDefaultFunctor return x >= 0.f ? x : alpha * (exp(x) - 1.f); } + void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const { + CV_UNUSED(stripeStart); + for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) { + int i = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 z = vx_setzero_f32(), v_alpha = vx_setall_f32(alpha), one = vx_setall_f32(1.0f); + for (; i <= len - vlanes; i += vlanes) { + v_float32 x = vx_load(srcptr + i); + + v_float32 t = v_mul(v_alpha, v_sub(v_exp(x), one)); + x = v_select(v_ge(x, z), x, t); + + vx_store(dstptr + i, x); + } +#endif + // In case SIMD is not available or len < vlanes + for (; i < len; i++) { + dstptr[i] = calculate(srcptr[i]); + } + } + } + inline void setKernelParams(ocl::Kernel& kernel) const { kernel.set(3, alpha); @@ -1991,7 +2088,16 @@ const char* const BaseDefaultFunctor::ocl_kernel_name = "ErfForward" struct HardSwishFunctor : public BaseDefaultFunctor { - typedef HardSwishLayer Layer; + using Layer = HardSwishLayer; + int vlanes; + + explicit HardSwishFunctor() { +#if (CV_SIMD || CV_SIMD_SCALABLE) + vlanes = VTraits::vlanes(); +#else + vlanes = 1; +#endif + } bool supportBackend(int backendId, int) { @@ -2002,7 +2108,32 @@ struct HardSwishFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return x * max(0.f, min(1.f, x / 6.f + 0.5f)); + return x * std::max(0.f, std::min(1.f, x / 6.f + 0.5f)); + } + + void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const { + CV_UNUSED(stripeStart); + for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) { + int i = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 zero = vx_setzero_f32(), one = vx_setall_f32(1.0f), + half = vx_setall_f32(0.5f), sixth = vx_setall_f32(1 / 6.0f); + for (; i <= len - vlanes; i += vlanes) { + v_float32 x = vx_load(srcptr + i); + + v_float32 t = v_add(v_mul(x, sixth), half); + t = v_min(one, t); + t = v_max(zero, t); + t = v_mul(x, t); + + vx_store(dstptr + i, t); + } +#endif + // In case SIMD is not available or len > vlanes + for (; i < len; i++) { + dstptr[i] = calculate(srcptr[i]); + } + } } #ifdef HAVE_CUDA @@ -2176,11 +2307,18 @@ const char* const BaseDefaultFunctor::ocl_kernel_name = "TanForward" struct CeluFunctor : public BaseDefaultFunctor { - typedef CeluLayer Layer; + using Layer = CeluLayer; float alpha; + int vlanes; - explicit CeluFunctor(float alpha_ = 1.f) : alpha(alpha_) {} + explicit CeluFunctor(float alpha_ = 1.f) : alpha(alpha_) { +#if (CV_SIMD || CV_SIMD_SCALABLE) + vlanes = VTraits::vlanes(); +#else + vlanes = 1; +#endif + } bool supportBackend(int backendId, int) { @@ -2189,7 +2327,30 @@ struct CeluFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return max(0.f, x) + min(0.f, alpha * expm1(x / alpha)); + return std::max(0.f, x) + std::min(0.f, alpha * expm1(x / alpha)); + } + + void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const { + CV_UNUSED(stripeStart); + for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) { + int i = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 zero = vx_setzero_f32(), v_alpha = vx_setall_f32(alpha), + one = vx_setall_f32(1.0f), v_ralpha = vx_setall_f32(1.0f / alpha); + for (; i <= len - vlanes; i += vlanes) { + v_float32 x = vx_load(srcptr + i); + + v_float32 t = v_min(zero, v_mul(v_alpha, v_sub(v_exp(v_mul(x, v_ralpha)), one))); + t = v_add(v_max(zero, x), t); + + vx_store(dstptr + i, t); + } +#endif + // In case SIMD is not available or len < vlanes + for (; i < len; i++) { + dstptr[i] = calculate(srcptr[i]); + } + } } inline void setKernelParams(ocl::Kernel& kernel) const @@ -2250,13 +2411,21 @@ const char* const BaseDefaultFunctor::ocl_kernel_name = "Har struct SeluFunctor : public BaseDefaultFunctor { - typedef SeluLayer Layer; + using Layer = SeluLayer; float alpha; float gamma; + int vlanes; explicit SeluFunctor(float alpha_ = 1.67326319217681884765625f, - float gamma_ = 1.05070102214813232421875f) : alpha(alpha_), gamma(gamma_) {} + float gamma_ = 1.05070102214813232421875f) + : alpha(alpha_), gamma(gamma_) { +#if (CV_SIMD || CV_SIMD_SCALABLE) + vlanes = VTraits::vlanes(); +#else + vlanes = 1; +#endif + } bool supportBackend(int backendId, int) { @@ -2268,6 +2437,30 @@ struct SeluFunctor : public BaseDefaultFunctor return gamma * (x > 0.f ? x : alpha * expm1(x)); } + void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const { + CV_UNUSED(stripeStart); + for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) { + int i = 0; +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 z = vx_setzero_f32(), one = vx_setall_f32(1.0f), + v_alpha = vx_setall_f32(alpha), v_gamma = vx_setall_f32(gamma); + for (; i <= len - vlanes; i += vlanes) { + v_float32 x = vx_load(srcptr + i); + + v_float32 t = v_mul(v_alpha, v_sub(v_exp(x), one)); + x = v_select(v_le(x, z), t, x); + x = v_mul(v_gamma, x); + + vx_store(dstptr + i, x); + } +#endif + // In case SIMD is not available or len > vlanes + for (; i < len; i++) { + dstptr[i] = calculate(srcptr[i]); + } + } + } + inline void setKernelParams(ocl::Kernel& kernel) const { kernel.set(3, alpha); diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index 47e6f22fce..57969ced87 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -250,7 +250,10 @@ static const TestCase testConformanceConfig[] = { {"test_einsum_transpose", 1, 1}, {"test_elu", 1, 1}, {"test_elu_default", 1, 1}, + {"test_elu_default_expanded_ver18", 1, 1}, {"test_elu_example", 1, 1}, + {"test_elu_example_expanded_ver18", 1, 1}, + {"test_elu_expanded_ver18", 1, 1}, {"test_equal", 2, 1}, {"test_equal_bcast", 2, 1}, {"test_erf", 1, 1}, @@ -454,6 +457,8 @@ static const TestCase testConformanceConfig[] = { {"test_min_uint32", 2, 1}, {"test_min_uint64", 2, 1}, {"test_min_uint8", 2, 1}, + {"test_mish", 1, 1}, + {"test_mish_expanded", 1, 1}, {"test_mod_broadcast", 2, 1}, {"test_mod_int64_fmod", 2, 1}, {"test_mod_mixed_sign_float16", 2, 1}, @@ -775,7 +780,10 @@ static const TestCase testConformanceConfig[] = { {"test_sce_sum_log_prob_expanded", 2, 2}, {"test_selu", 1, 1}, {"test_selu_default", 1, 1}, + {"test_selu_default_expanded_ver18", 1, 1}, {"test_selu_example", 1, 1}, + {"test_selu_example_expanded_ver18", 1, 1}, + {"test_selu_expanded_ver18", 1, 1}, {"test_sequence_insert_at_back", 2, 1}, {"test_sequence_insert_at_front", 3, 1}, {"test_shape", 1, 1}, diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index cbbc349bda..9b2a2f4f2d 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -624,8 +624,14 @@ CASE(test_elu) // no filter CASE(test_elu_default) // no filter +CASE(test_elu_default_expanded_ver18) + // no filter CASE(test_elu_example) // no filter +CASE(test_elu_example_expanded_ver18) + // no filter +CASE(test_elu_expanded_ver18) + // no filter CASE(test_equal) // no filter CASE(test_equal_bcast) @@ -1098,6 +1104,10 @@ CASE(test_min_uint64) // no filter CASE(test_min_uint8) // no filter +CASE(test_mish) + // no filter +CASE(test_mish_expanded) + // no filter CASE(test_mod_broadcast) // no filter CASE(test_mod_int64_fmod) @@ -1851,8 +1861,14 @@ CASE(test_selu) // no filter CASE(test_selu_default) // no filter +CASE(test_selu_default_expanded_ver18) + // no filter CASE(test_selu_example) // no filter +CASE(test_selu_example_expanded_ver18) + // no filter +CASE(test_selu_expanded_ver18) + // no filter CASE(test_sequence_insert_at_back) // no filter CASE(test_sequence_insert_at_front) diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index 7253a64cef..78c26eeea2 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -103,6 +103,9 @@ "test_dynamicquantizelinear_min_adjusted_expanded", "test_edge_pad", "test_einsum_inner_prod", +"test_elu_default_expanded_ver18", +"test_elu_example_expanded_ver18", +"test_elu_expanded_ver18", "test_equal", "test_equal_bcast", "test_expand_dim_changed", @@ -412,6 +415,9 @@ "test_sce_sum_expanded", "test_sce_sum_log_prob", "test_sce_sum_log_prob_expanded", +"test_selu_default_expanded_ver18", +"test_selu_example_expanded_ver18", +"test_selu_expanded_ver18", "test_sequence_insert_at_back", "test_sequence_insert_at_front", "test_shape",