From f99a135eda1de0f893dafc25fef4058f251cfc5b Mon Sep 17 00:00:00 2001 From: Li Peng Date: Thu, 4 Jan 2018 02:21:04 +0800 Subject: [PATCH] add eltwise layer ocl implementation Signed-off-by: Li Peng --- modules/dnn/src/layers/eltwise_layer.cpp | 52 ++++++++++++++++++++++++ modules/dnn/test/test_layers.cpp | 5 +++ modules/dnn/test/test_tf_importer.cpp | 40 ++++++++++-------- 3 files changed, 81 insertions(+), 16 deletions(-) diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 7e2214ea46..40375734d8 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -259,11 +259,63 @@ public: } }; +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + switch (op) + { + case SUM: + if (coeffs.empty()) + { + add(inputs[0], inputs[1], outputs[0]); + for (int i = 2; i < inputs.size(); ++i) + add(outputs[0], inputs[i], outputs[0]); + } + else + { + UMat mul0, mul1; + multiply(coeffs[0], inputs[0], mul0); + multiply(coeffs[1], inputs[1], mul1); + add(mul0, mul1, outputs[0]); + for (int i = 2; i < inputs.size(); ++i) + { + multiply(coeffs[i], inputs[i], mul0); + add(mul0, outputs[0], outputs[0]); + } + } + break; + case PROD: + multiply(inputs[0], inputs[1], outputs[0]); + for (int i = 2; i < inputs.size(); ++i) + multiply(inputs[i], outputs[0], outputs[0]); + break; + case MAX: + max(inputs[0], inputs[1], outputs[0]); + for (int i = 2; i < inputs.size(); ++i) + max(inputs[i], outputs[0], outputs[0]); + break; + default: + return false; + } + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index a966e84bd1..cd23541aed 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -320,6 +320,11 @@ TEST(Layer_Test_Eltwise, Accuracy) testLayerUsingCaffeModels("layer_eltwise"); } +OCL_TEST(Layer_Test_Eltwise, Accuracy) +{ + testLayerUsingCaffeModels("layer_eltwise", DNN_TARGET_OPENCL); +} + TEST(Layer_Test_PReLU, Accuracy) { testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 2cad882b5a..04b4c9a35e 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -76,7 +76,7 @@ static std::string path(const std::string& file) return findDataFile("dnn/tensorflow/" + file, false); } -static void runTensorFlowNet(const std::string& prefix, bool hasText = false, +static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false, double l1 = 1e-5, double lInf = 1e-4, bool memoryLoad = false) { @@ -104,6 +104,9 @@ static void runTensorFlowNet(const std::string& prefix, bool hasText = false, ASSERT_FALSE(net.empty()); + net.setPreferableBackend(DNN_BACKEND_DEFAULT); + net.setPreferableTarget(targetId); + cv::Mat input = blobFromNPY(inpPath); cv::Mat target = blobFromNPY(outPath); @@ -132,6 +135,11 @@ TEST(Test_TensorFlow, eltwise_add_mul) runTensorFlowNet("eltwise_add_mul"); } +OCL_TEST(Test_TensorFlow, eltwise_add_mul) +{ + runTensorFlowNet("eltwise_add_mul", DNN_TARGET_OPENCL); +} + TEST(Test_TensorFlow, pad_and_concat) { runTensorFlowNet("pad_and_concat"); @@ -141,7 +149,7 @@ TEST(Test_TensorFlow, batch_norm) { runTensorFlowNet("batch_norm"); runTensorFlowNet("fused_batch_norm"); - runTensorFlowNet("batch_norm_text", true); + runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true); } TEST(Test_TensorFlow, pooling) @@ -179,15 +187,15 @@ TEST(Test_TensorFlow, fp16) { const float l1 = 1e-3; const float lInf = 1e-2; - runTensorFlowNet("fp16_single_conv", false, l1, lInf); - runTensorFlowNet("fp16_deconvolution", false, l1, lInf); - runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf); - runTensorFlowNet("fp16_padding_valid", false, l1, lInf); - runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf); - runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf); - runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf); - runTensorFlowNet("fp16_max_pool_even", false, l1, lInf); - runTensorFlowNet("fp16_padding_same", false, l1, lInf); + runTensorFlowNet("fp16_single_conv", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_deconvolution", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_max_pool_odd_same", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_padding_valid", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_eltwise_add_mul", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_max_pool_odd_valid", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_pad_and_concat", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_max_pool_even", DNN_TARGET_CPU, false, l1, lInf); + runTensorFlowNet("fp16_padding_same", DNN_TARGET_CPU, false, l1, lInf); } TEST(Test_TensorFlow, quantized) @@ -267,7 +275,7 @@ OCL_TEST(Test_TensorFlow, MobileNet_SSD) TEST(Test_TensorFlow, lstm) { - runTensorFlowNet("lstm", true); + runTensorFlowNet("lstm", DNN_TARGET_CPU, true); } TEST(Test_TensorFlow, split) @@ -284,11 +292,11 @@ TEST(Test_TensorFlow, memory_read) { double l1 = 1e-5; double lInf = 1e-4; - runTensorFlowNet("lstm", true, l1, lInf, true); + runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true); - runTensorFlowNet("batch_norm", false, l1, lInf, true); - runTensorFlowNet("fused_batch_norm", false, l1, lInf, true); - runTensorFlowNet("batch_norm_text", true, l1, lInf, true); + runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true); + runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true); + runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true); } }