From 45b5b3c13a9f0f9f6dcbc4fab854b5a774708aee Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 20 Jul 2018 15:19:44 +0300 Subject: [PATCH 01/25] dnn: check layer output for NaN/Inf --- .../dnn/include/opencv2/dnn/shape_utils.hpp | 26 +++- modules/dnn/src/dnn.cpp | 123 +++++++++++++++++- 2 files changed, 141 insertions(+), 8 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp index 1e2332cf10..953a437dc2 100644 --- a/modules/dnn/include/opencv2/dnn/shape_utils.hpp +++ b/modules/dnn/include/opencv2/dnn/shape_utils.hpp @@ -44,7 +44,9 @@ #include #include +#include #include +#include namespace cv { namespace dnn { @@ -178,13 +180,25 @@ static inline MatShape concat(const MatShape& a, const MatShape& b) return c; } -inline void print(const MatShape& shape, const String& name = "") +static inline std::string toString(const MatShape& shape, const String& name = "") { - printf("%s: [", name.c_str()); - size_t i, n = shape.size(); - for( i = 0; i < n; i++ ) - printf(" %d", shape[i]); - printf(" ]\n"); + std::ostringstream ss; + if (!name.empty()) + ss << name << ' '; + ss << '['; + for(size_t i = 0, n = shape.size(); i < n; ++i) + ss << ' ' << shape[i]; + ss << " ]"; + return ss.str(); +} +static inline void print(const MatShape& shape, const String& name = "") +{ + std::cout << toString(shape, name) << std::endl; +} +static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape) +{ + out << toString(shape); + return out; } inline int clamp(int ax, int dims) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 5014365fdd..16ece7151e 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -74,6 +74,10 @@ static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSize #endif ); +// Additional checks (slowdowns execution!) +static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false); +static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); +static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); using std::vector; using std::map; @@ -2053,10 +2057,75 @@ struct Net::Impl { if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { + std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers), + std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); + layer->forward(umat_inputBlobs, umat_outputBlobs, - OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers)); + umat_internalBlobs); + if (DNN_CHECK_NAN_INF) + { + bool fail = false; + for (size_t i = 0; i < umat_outputBlobs.size(); ++i) + { + UMat& u = umat_outputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + if (!checkRange(m)) + { + std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + else if (!checkRange(m, true, NULL, -1e6, 1e6)) + { + std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + } + if (fail) + { + for (size_t i = 0; i < umat_inputBlobs.size(); ++i) + { + UMat& u = umat_inputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; + if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < umat_outputBlobs.size(); ++i) + { + UMat& u = umat_outputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; + if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < umat_internalBlobs.size(); ++i) + { + UMat& u = umat_internalBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; + if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; + } + if (DNN_CHECK_NAN_INF_RAISE_ERROR) + CV_Assert(!fail); + } + } OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); } else @@ -2069,6 +2138,56 @@ struct Net::Impl layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals); + if (DNN_CHECK_NAN_INF) + { + bool fail = false; + for (size_t i = 0; i < ld.outputBlobs.size(); ++i) + { + const Mat& m = ld.outputBlobs[i]; + if (!checkRange(m)) + { + std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + else if (!checkRange(m, true, NULL, -1e6, 1e6)) + { + std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + } + if (fail) + { + for (size_t i = 0; i < ld.inputBlobs.size(); ++i) + { + const Mat* pM = ld.inputBlobs[i]; + if (!pM) + { + std::cout << "INPUT " << i << " is NULL" << std::endl; + continue; + } + const Mat& m = *pM; + std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < ld.outputBlobs.size(); ++i) + { + const Mat& m = ld.outputBlobs[i]; + std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < ld.internals.size(); ++i) + { + const Mat& m = ld.internals[i]; + std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; + } + if (DNN_CHECK_NAN_INF_RAISE_ERROR) + CV_Assert(!fail); + } + } + for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) { if (!ld.outputBlobsWrappers[i].empty()) From 74cf48b5d77bc98084b8e809ec95eb80a1e1b1c2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 25 Jul 2018 16:51:11 +0300 Subject: [PATCH 02/25] dnn(test): use Backend/Target enums instead of 'int' --- modules/dnn/test/test_halide_layers.cpp | 62 ++++++++++++------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index eda414551f..637b1f00b5 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -16,7 +16,7 @@ using namespace cv; using namespace cv::dnn; using namespace testing; -static void test(Mat& input, Net& net, int backendId, int targetId) +static void test(Mat& input, Net& net, Backend backendId, Target targetId) { DNNTestLayer::checkBackend(backendId, targetId); randu(input, -1.0f, 1.0f); @@ -34,7 +34,7 @@ static void test(Mat& input, Net& net, int backendId, int targetId) normAssert(outputDefault, outputHalide, "", l1, lInf); } -static void test(LayerParams& params, Mat& input, int backendId, int targetId) +static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId) { Net net; net.addLayerToPrev(params.name, params.type, params); @@ -101,8 +101,8 @@ TEST_P(Convolution, Accuracy) Size pad = get<4>(GetParam()); Size dilation = get<5>(GetParam()); bool hasBias = get<6>(GetParam()); - int backendId = get<0>(get<7>(GetParam())); - int targetId = get<1>(get<7>(GetParam())); + Backend backendId = get<0>(get<7>(GetParam())); + Target targetId = get<1>(get<7>(GetParam())); if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD) throw SkipTestException(""); @@ -171,8 +171,8 @@ TEST_P(Deconvolution, Accuracy) Size stride = Size(get<5>(GetParam())[0], get<5>(GetParam())[1]); Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]); bool hasBias = get<6>(GetParam()); - int backendId = get<0>(get<7>(GetParam())); - int targetId = get<1>(get<7>(GetParam())); + Backend backendId = get<0>(get<7>(GetParam())); + Target targetId = get<1>(get<7>(GetParam())); if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU && dilation.width == 2 && dilation.height == 2) throw SkipTestException(""); @@ -235,8 +235,8 @@ TEST_P(LRN, Accuracy) float bias = get<2>(GetParam())[2]; bool normBySize = get<3>(GetParam()); std::string nrmType = get<4>(GetParam()); - int backendId = get<0>(get<5>(GetParam())); - int targetId = get<1>(get<5>(GetParam())); + Backend backendId = get<0>(get<5>(GetParam())); + Target targetId = get<1>(get<5>(GetParam())); if (backendId == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException(""); @@ -276,8 +276,8 @@ TEST_P(AvePooling, Accuracy) Size outSize = get<1>(GetParam());; // Input size will be computed from parameters. Size kernel = get<2>(GetParam()); Size stride = get<3>(GetParam()); - int backendId = get<0>(get<4>(GetParam())); - int targetId = get<1>(get<4>(GetParam())); + Backend backendId = get<0>(get<4>(GetParam())); + Target targetId = get<1>(get<4>(GetParam())); if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD) throw SkipTestException(""); @@ -317,8 +317,8 @@ TEST_P(MaxPooling, Accuracy) Size kernel = get<2>(GetParam()); Size stride = get<3>(GetParam()); Size pad = get<4>(GetParam()); - int backendId = get<0>(get<5>(GetParam())); - int targetId = get<1>(get<5>(GetParam())); + Backend backendId = get<0>(get<5>(GetParam())); + Target targetId = get<1>(get<5>(GetParam())); LayerParams lp; lp.set("pool", "max"); @@ -355,8 +355,8 @@ TEST_P(FullyConnected, Accuracy) Size inSize = get<1>(GetParam()); int outChannels = get<2>(GetParam()); bool hasBias = get<3>(GetParam()); - int backendId = get<0>(get<4>(GetParam())); - int targetId = get<1>(get<4>(GetParam())); + Backend backendId = get<0>(get<4>(GetParam())); + Target targetId = get<1>(get<4>(GetParam())); if (backendId == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException(""); @@ -394,8 +394,8 @@ typedef TestWithParam > > SoftMax; TEST_P(SoftMax, Accuracy) { int inChannels = get<0>(GetParam()); - int backendId = get<0>(get<1>(GetParam())); - int targetId = get<1>(get<1>(GetParam())); + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); LayerParams lp; lp.type = "SoftMax"; lp.name = "testLayer"; @@ -457,7 +457,7 @@ TEST_P(Test_Halide_layers, MaxPoolUnpool) //////////////////////////////////////////////////////////////////////////////// static const int kNumChannels = 3; -void testInPlaceActivation(LayerParams& lp, int backendId, int targetId) +void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId) { EXPECT_FALSE(lp.name.empty()); @@ -485,8 +485,8 @@ TEST_P(BatchNorm, Accuracy) bool hasWeights = get<0>(GetParam()); bool hasBias = get<1>(GetParam()); float epsilon = get<2>(GetParam()); - int backendId = get<0>(get<3>(GetParam())); - int targetId = get<1>(get<3>(GetParam())); + Backend backendId = get<0>(get<3>(GetParam())); + Target targetId = get<1>(get<3>(GetParam())); LayerParams lp; lp.set("has_weight", hasWeights); @@ -518,8 +518,8 @@ typedef TestWithParam > > ReLU; TEST_P(ReLU, Accuracy) { float negativeSlope = get<0>(GetParam()); - int backendId = get<0>(get<1>(GetParam())); - int targetId = get<1>(get<1>(GetParam())); + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); LayerParams lp; lp.set("negative_slope", negativeSlope); @@ -536,8 +536,8 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Combine( typedef TestWithParam > > NoParamActivation; TEST_P(NoParamActivation, Accuracy) { - int backendId = get<0>(get<1>(GetParam())); - int targetId = get<1>(get<1>(GetParam())); + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); LayerParams lp; lp.type = get<0>(GetParam()); @@ -555,8 +555,8 @@ TEST_P(Power, Accuracy) float power = get<0>(GetParam())[0]; float scale = get<0>(GetParam())[1]; float shift = get<0>(GetParam())[2]; - int backendId = get<0>(get<1>(GetParam())); - int targetId = get<1>(get<1>(GetParam())); + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); LayerParams lp; lp.set("power", power); @@ -589,8 +589,8 @@ typedef TestWithParam > > Scale; TEST_P(Scale, Accuracy) { bool hasBias = get<0>(GetParam()); - int backendId = get<0>(get<1>(GetParam())); - int targetId = get<1>(get<1>(GetParam())); + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); LayerParams lp; lp.set("bias_term", hasBias); @@ -624,8 +624,8 @@ TEST_P(Concat, Accuracy) { Vec3i inSize = get<0>(GetParam()); Vec3i numChannels = get<1>(GetParam()); - int backendId = get<0>(get<2>(GetParam())); - int targetId = get<1>(get<2>(GetParam())); + Backend backendId = get<0>(get<2>(GetParam())); + Target targetId = get<1>(get<2>(GetParam())); Net net; @@ -692,8 +692,8 @@ TEST_P(Eltwise, Accuracy) std::string op = get<1>(GetParam()); int numConv = get<2>(GetParam()); bool weighted = get<3>(GetParam()); - int backendId = get<0>(get<4>(GetParam())); - int targetId = get<1>(get<4>(GetParam())); + Backend backendId = get<0>(get<4>(GetParam())); + Target targetId = get<1>(get<4>(GetParam())); Net net; From dd8701c1a031a3db99d0f026ba19484e4256b3e2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 25 Jul 2018 16:53:37 +0300 Subject: [PATCH 03/25] dnn(test): skip checks only for unstable tests but execute tested functions in Layer_Test_Halide/Convolution.Accuracy --- modules/dnn/test/test_halide_layers.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 637b1f00b5..788f237b22 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -16,7 +16,7 @@ using namespace cv; using namespace cv::dnn; using namespace testing; -static void test(Mat& input, Net& net, Backend backendId, Target targetId) +static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool skipCheck = false) { DNNTestLayer::checkBackend(backendId, targetId); randu(input, -1.0f, 1.0f); @@ -29,16 +29,19 @@ static void test(Mat& input, Net& net, Backend backendId, Target targetId) net.setPreferableTarget(targetId); Mat outputHalide = net.forward().clone(); + if (skipCheck) + return; + double l1, lInf; DNNTestLayer::getDefaultThresholds(backendId, targetId, &l1, &lInf); normAssert(outputDefault, outputHalide, "", l1, lInf); } -static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId) +static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId, bool skipCheck = false) { Net net; net.addLayerToPrev(params.name, params.type, params); - test(input, net, backendId, targetId); + test(input, net, backendId, targetId, skipCheck); } static testing::internal::ParamGenerator > dnnBackendsAndTargetsWithHalide() @@ -107,10 +110,11 @@ TEST_P(Convolution, Accuracy) if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD) throw SkipTestException(""); + bool skipCheck = false; if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) && kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1)) - throw SkipTestException("Skip unstable test"); + skipCheck = true; int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width}; Mat weights(4, &sz[0], CV_32F); @@ -139,7 +143,9 @@ TEST_P(Convolution, Accuracy) } int inpSz[] = {1, inChannels, inSize.height, inSize.width}; Mat input(4, &inpSz[0], CV_32F); - test(lp, input, backendId, targetId); + test(lp, input, backendId, targetId, skipCheck); + if (skipCheck) + throw SkipTestException("Skip checks in unstable test"); } INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine( From faa6c4e1e16ebfcf1415168acb159f2fc723bc36 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 24 Jul 2018 19:12:58 +0300 Subject: [PATCH 04/25] Faster-RCNN anf RFCN models on CPU using Intel's Inference Engine backend. Enable Torch layers tests with Intel's Inference Engine backend. --- modules/dnn/include/opencv2/dnn/dnn.hpp | 2 +- modules/dnn/src/dnn.cpp | 8 + .../dnn/src/layers/detection_output_layer.cpp | 11 +- modules/dnn/src/layers/pooling_layer.cpp | 71 +++++-- modules/dnn/src/layers/proposal_layer.cpp | 55 +++++- modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp | 3 +- modules/dnn/src/opencl/ocl4dnn_pooling.cl | 4 +- modules/dnn/src/torch/torch_importer.cpp | 10 + modules/dnn/test/test_caffe_importer.cpp | 101 ++++++---- modules/dnn/test/test_layers.cpp | 10 +- modules/dnn/test/test_torch_importer.cpp | 179 ++++++++++-------- samples/dnn/object_detection.py | 2 +- 12 files changed, 303 insertions(+), 153 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 0809891942..c737177128 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -201,7 +201,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * @param[out] outputs allocated output blobs, which will store results of the computation. * @param[out] internals allocated internal blobs */ - virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0; + virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); /** @brief Given the @p input blobs, computes the output @p blobs. * @param[in] inputs the input blobs. diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 5014365fdd..202be4d2c0 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -3071,6 +3071,14 @@ std::vector Layer::finalize(const std::vector &inputs) return outputs; } +void Layer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + Layer::forward_fallback(inputs, outputs, internals); +} + void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index f4d4d2b822..fdcaab02e3 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -196,7 +196,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_locPredTransposed; + backendId == DNN_BACKEND_INFERENCE_ENGINE && !_locPredTransposed && _bboxesNormalized; } bool getMemoryShapes(const std::vector &inputs, @@ -411,9 +411,12 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && - OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), - forward_ocl(inputs_arr, outputs_arr, internals_arr)) + if (_bboxesNormalized) + { + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + } Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 775a044b44..4e0fea21d8 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -135,10 +135,17 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_HALIDE && haveHalide() && - (type == MAX || type == AVE && !pad.width && !pad.height) || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && (type == MAX || type == AVE); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + { + if (preferableTarget == DNN_TARGET_MYRIAD) + return type == MAX || type == AVE; + else + return type != STOCHASTIC; + } + else + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_HALIDE && haveHalide() && + (type == MAX || type == AVE && !pad.width && !pad.height); } #ifdef HAVE_OPENCL @@ -192,8 +199,11 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), - forward_ocl(inputs_arr, outputs_arr, internals_arr)) + if (type == MAX || type == AVE || type == STOCHASTIC) + { + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + } Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -238,22 +248,41 @@ public: #ifdef HAVE_INF_ENGINE InferenceEngine::LayerParams lp; lp.name = name; - lp.type = "Pooling"; lp.precision = InferenceEngine::Precision::FP32; - std::shared_ptr ieLayer(new InferenceEngine::PoolingLayer(lp)); - - ieLayer->_kernel_x = kernel.width; - ieLayer->_kernel_y = kernel.height; - ieLayer->_stride_x = stride.width; - ieLayer->_stride_y = stride.height; - ieLayer->_padding_x = pad.width; - ieLayer->_padding_y = pad.height; - ieLayer->_exclude_pad = type == AVE && padMode == "SAME"; - ieLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor"; - if (type == MAX) - ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::MAX; - else if (type == AVE) - ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::AVG; + + std::shared_ptr ieLayer; + if (type == MAX || type == AVE) + { + lp.type = "Pooling"; + InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp); + poolLayer->_kernel_x = kernel.width; + poolLayer->_kernel_y = kernel.height; + poolLayer->_stride_x = stride.width; + poolLayer->_stride_y = stride.height; + poolLayer->_padding_x = pad.width; + poolLayer->_padding_y = pad.height; + poolLayer->_exclude_pad = type == AVE && padMode == "SAME"; + poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor"; + poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX : + InferenceEngine::PoolingLayer::PoolType::AVG; + ieLayer = std::shared_ptr(poolLayer); + } + else if (type == ROI) + { + lp.type = "ROIPooling"; + ieLayer = std::shared_ptr(new InferenceEngine::CNNLayer(lp)); + ieLayer->params["pooled_w"] = format("%d", pooledSize.width); + ieLayer->params["pooled_h"] = format("%d", pooledSize.height); + ieLayer->params["spatial_scale"] = format("%f", spatialScale); + } + else if (type == PSROI) + { + lp.type = "PSROIPooling"; + ieLayer = std::shared_ptr(new InferenceEngine::CNNLayer(lp)); + ieLayer->params["output_dim"] = format("%d", psRoiOutChannels); + ieLayer->params["group_size"] = format("%d", pooledSize.width); + ieLayer->params["spatial_scale"] = format("%f", spatialScale); + } else CV_Error(Error::StsNotImplemented, "Unsupported pooling type"); diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 44671268a7..cdc5e2250a 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -6,6 +6,7 @@ // Third party copyrights are property of their respective owners. #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_inf_engine.hpp" namespace cv { namespace dnn { @@ -16,14 +17,14 @@ public: { setParamsFrom(params); - uint32_t featStride = params.get("feat_stride", 16); - uint32_t baseSize = params.get("base_size", 16); + featStride = params.get("feat_stride", 16); + baseSize = params.get("base_size", 16); // uint32_t minSize = params.get("min_size", 16); - uint32_t keepTopBeforeNMS = params.get("pre_nms_topn", 6000); + keepTopBeforeNMS = params.get("pre_nms_topn", 6000); keepTopAfterNMS = params.get("post_nms_topn", 300); - float nmsThreshold = params.get("nms_thresh", 0.7); - DictValue ratios = params.get("ratio"); - DictValue scales = params.get("scale"); + nmsThreshold = params.get("nms_thresh", 0.7); + ratios = params.get("ratio"); + scales = params.get("scale"); { LayerParams lp; @@ -83,6 +84,12 @@ public: } } + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD; + } + bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -312,6 +319,38 @@ public: outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0); } + virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE + { +#ifdef HAVE_INF_ENGINE + InferenceEngine::LayerParams lp; + lp.name = name; + lp.type = "Proposal"; + lp.precision = InferenceEngine::Precision::FP32; + std::shared_ptr ieLayer(new InferenceEngine::CNNLayer(lp)); + + ieLayer->params["base_size"] = format("%d", baseSize); + ieLayer->params["feat_stride"] = format("%d", featStride); + ieLayer->params["min_size"] = "16"; + ieLayer->params["nms_thresh"] = format("%f", nmsThreshold); + ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS); + ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS); + if (ratios.size()) + { + ieLayer->params["ratio"] = format("%f", ratios.get(0)); + for (int i = 1; i < ratios.size(); ++i) + ieLayer->params["ratio"] += format(",%f", ratios.get(i)); + } + if (scales.size()) + { + ieLayer->params["scale"] = format("%f", scales.get(0)); + for (int i = 1; i < scales.size(); ++i) + ieLayer->params["scale"] += format(",%f", scales.get(i)); + } + return Ptr(new InfEngineBackendNode(ieLayer)); +#endif // HAVE_INF_ENGINE + return Ptr(); + } + private: // A first half of channels are background scores. We need only a second one. static Mat getObjectScores(const Mat& m) @@ -342,8 +381,10 @@ private: Ptr deltasPermute; Ptr scoresPermute; - uint32_t keepTopAfterNMS; + uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize; Mat fakeImageBlob; + float nmsThreshold; + DictValue ratios, scales; #ifdef HAVE_OPENCL UMat umat_fakeImageBlob; #endif diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp index 8b74248b64..77cd3a6337 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp @@ -183,8 +183,9 @@ bool OCL4DNNPool::Forward(const UMat& bottom, ocl::Kernel oclk_sto_pool_forward( kname.c_str(), ocl::dnn::ocl4dnn_pooling_oclsrc, - format("-D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d" + format(" -D Dtype=%s -D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d" " -D STRIDE_W=%d -D STRIDE_H=%d", + (use_half) ? "half" : "float", kernel_w_, kernel_h_, stride_w_, stride_h_ )); diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl index 501f5a5e87..77d2e5ba33 100644 --- a/modules/dnn/src/opencl/ocl4dnn_pooling.cl +++ b/modules/dnn/src/opencl/ocl4dnn_pooling.cl @@ -104,7 +104,7 @@ __kernel void #elif defined KERNEL_AVE_POOL __kernel void TEMPLATE(ave_pool_forward, Dtype)( - const int nthreads, __global const Dtype* const bottom_data, + const int nthreads, __global const Dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, __global Dtype* top_data) @@ -150,7 +150,7 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)( #elif defined KERNEL_STO_POOL __kernel void TEMPLATE(sto_pool_forward_test,Dtype)( - const int nthreads, __global const Dtype* const bottom_data, + const int nthreads, __global const Dtype* bottom_data, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, __global Dtype* top_data) diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 049c83f606..52bc0ce8a3 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -938,6 +938,16 @@ struct TorchImporter layerParams.set("end", DictValue::arrayInt(&ends[0], 4)); curModule->modules.push_back(newModule); } + else if (nnName == "SpatialUpSamplingNearest") + { + readTorchTable(scalarParams, tensorParams); + CV_Assert(scalarParams.has("scale_factor")); + int scale_factor = scalarParams.get("scale_factor"); + newModule->apiType = "Resize"; + layerParams.set("interpolation", "nearest"); + layerParams.set("zoom_factor", scale_factor); + curModule->modules.push_back(newModule); + } else { // Importer does not know how to map Torch's layer type to an OpenCV's one. diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 5365b2a435..b957b8caf4 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -51,6 +51,33 @@ static std::string _tf(TString filename) return (getOpenCVExtraDir() + "/dnn/") + filename; } +class Test_Caffe_nets : public DNNTestLayer +{ +public: + void testFaster(const std::string& proto, const std::string& model, const Mat& ref, + double scoreDiff = 0.0, double iouDiff = 0.0) + { + checkBackend(); + Net net = readNetFromCaffe(findDataFile("dnn/" + proto, false), + findDataFile("dnn/" + model, false)); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + Mat img = imread(findDataFile("dnn/dog416.png", false)); + resize(img, img, Size(800, 600)); + Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); + Mat imInfo = (Mat_(1, 3) << img.rows, img.cols, 1.6f); + + net.setInput(blob, "data"); + net.setInput(imInfo, "im_info"); + // Output has shape 1x1xNx7 where N - number of detections. + // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] + Mat out = net.forward(); + scoreDiff = scoreDiff ? scoreDiff : default_l1; + iouDiff = iouDiff ? iouDiff : default_lInf; + normAssertDetections(ref, out, ("model name: " + model).c_str(), 0.8, scoreDiff, iouDiff); + } +}; + TEST(Test_Caffe, memory_read) { const string proto = findDataFile("dnn/bvlc_googlenet.prototxt", false); @@ -344,9 +371,15 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy) } // https://github.com/richzhang/colorization -TEST(Reproducibility_Colorization, Accuracy) +TEST_P(Test_Caffe_nets, Colorization) { - const float l1 = 3e-5; + checkBackend(); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + + const float l1 = 4e-4; const float lInf = 3e-3; Mat inp = blobFromNPY(_tf("colorization_inp.npy")); @@ -356,7 +389,8 @@ TEST(Reproducibility_Colorization, Accuracy) const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false); const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false); Net net = readNetFromCaffe(proto, model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel); net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606)); @@ -447,39 +481,40 @@ INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, ) ); -TEST(Test_Caffe, FasterRCNN_and_RFCN) +TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) { - std::string models[] = {"VGG16_faster_rcnn_final.caffemodel", "ZF_faster_rcnn_final.caffemodel", - "resnet50_rfcn_final.caffemodel"}; - std::string protos[] = {"faster_rcnn_vgg16.prototxt", "faster_rcnn_zf.prototxt", - "rfcn_pascal_voc_resnet50.prototxt"}; - Mat refs[] = {(Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, - 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, - 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166), - (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, - 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, - 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176), - (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, - 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16)}; - for (int i = 0; i < 3; ++i) - { - std::string proto = findDataFile("dnn/" + protos[i], false); - std::string model = findDataFile("dnn/" + models[i], false); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + static Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, + 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, + 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); + testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref); +} - Net net = readNetFromCaffe(proto, model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); - Mat img = imread(findDataFile("dnn/dog416.png", false)); - resize(img, img, Size(800, 600)); - Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); - Mat imInfo = (Mat_(1, 3) << img.rows, img.cols, 1.6f); +TEST_P(Test_Caffe_nets, FasterRCNN_zf) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + static Mat ref = (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, + 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, + 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); + testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref); +} - net.setInput(blob, "data"); - net.setInput(imInfo, "im_info"); - // Output has shape 1x1xNx7 where N - number of detections. - // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] - Mat out = net.forward(); - normAssertDetections(refs[i], out, ("model name: " + models[i]).c_str(), 0.8); - } +TEST_P(Test_Caffe_nets, RFCN) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + static Mat ref = (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, + 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); + testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref); } +INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_nets, dnnBackendsAndTargets()); + }} // namespace diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 3ebb4172d9..77a326417c 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1205,14 +1205,6 @@ public: } } - void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - Layer::forward_fallback(inputs, outputs, internals); - } - private: int outWidth, outHeight, zoomFactor; }; @@ -1225,7 +1217,7 @@ TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (availa { if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) throw SkipTestException(""); - // Test a cusom layer. + // Test a custom layer. CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer); try { diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 37966a1f93..c07c5b39d8 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -69,100 +69,119 @@ TEST(Torch_Importer, simple_read) ASSERT_FALSE(net.empty()); } -static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String outLayerName = "", - bool check2ndBlob = false, bool isBinary = false) +class Test_Torch_layers : public DNNTestLayer { - String suffix = (isBinary) ? ".dat" : ".txt"; +public: + void runTorchNet(const String& prefix, String outLayerName = "", + bool check2ndBlob = false, bool isBinary = false, + double l1 = 0.0, double lInf = 0.0) + { + String suffix = (isBinary) ? ".dat" : ".txt"; - Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary); - ASSERT_FALSE(net.empty()); + Mat inp, outRef; + ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) ); + ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) ); - net.setPreferableBackend(DNN_BACKEND_OPENCV); - net.setPreferableTarget(targetId); + checkBackend(backend, target, &inp, &outRef); - Mat inp, outRef; - ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) ); - ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) ); + Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary); + ASSERT_FALSE(net.empty()); - if (outLayerName.empty()) - outLayerName = net.getLayerNames().back(); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); - net.setInput(inp); - std::vector outBlobs; - net.forward(outBlobs, outLayerName); - normAssert(outRef, outBlobs[0]); + if (outLayerName.empty()) + outLayerName = net.getLayerNames().back(); - if (check2ndBlob) - { - Mat out2 = outBlobs[1]; - Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary); - normAssert(out2, ref2); - } -} + net.setInput(inp); + std::vector outBlobs; + net.forward(outBlobs, outLayerName); + l1 = l1 ? l1 : default_l1; + lInf = lInf ? lInf : default_lInf; + normAssert(outRef, outBlobs[0], "", l1, lInf); -typedef testing::TestWithParam Test_Torch_layers; + if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE) + { + Mat out2 = outBlobs[1]; + Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary); + normAssert(out2, ref2, "", l1, lInf); + } + } +}; TEST_P(Test_Torch_layers, run_convolution) { - runTorchNet("net_conv", GetParam(), "", false, true); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + runTorchNet("net_conv", "", false, true); } TEST_P(Test_Torch_layers, run_pool_max) { - runTorchNet("net_pool_max", GetParam(), "", true); + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + runTorchNet("net_pool_max", "", true); } TEST_P(Test_Torch_layers, run_pool_ave) { - runTorchNet("net_pool_ave", GetParam()); + runTorchNet("net_pool_ave"); } TEST_P(Test_Torch_layers, run_reshape) { - int targetId = GetParam(); - runTorchNet("net_reshape", targetId); - runTorchNet("net_reshape_batch", targetId); - runTorchNet("net_reshape_single_sample", targetId); - runTorchNet("net_reshape_channels", targetId, "", false, true); + runTorchNet("net_reshape"); + runTorchNet("net_reshape_batch"); + runTorchNet("net_reshape_channels", "", false, true); +} + +TEST_P(Test_Torch_layers, run_reshape_single_sample) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + runTorchNet("net_reshape_single_sample", "", false, false, + (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.0052 : 0.0); } TEST_P(Test_Torch_layers, run_linear) { - runTorchNet("net_linear_2d", GetParam()); + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + runTorchNet("net_linear_2d"); } TEST_P(Test_Torch_layers, run_concat) { - int targetId = GetParam(); - runTorchNet("net_concat", targetId, "l5_torchMerge"); - runTorchNet("net_depth_concat", targetId, "", false, true); + runTorchNet("net_concat", "l5_torchMerge"); + runTorchNet("net_depth_concat", "", false, true, 0.0, + target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0); } TEST_P(Test_Torch_layers, run_deconv) { - runTorchNet("net_deconv", GetParam()); + runTorchNet("net_deconv"); } TEST_P(Test_Torch_layers, run_batch_norm) { - runTorchNet("net_batch_norm", GetParam(), "", false, true); + runTorchNet("net_batch_norm", "", false, true); } TEST_P(Test_Torch_layers, net_prelu) { - runTorchNet("net_prelu", GetParam()); + runTorchNet("net_prelu"); } TEST_P(Test_Torch_layers, net_cadd_table) { - runTorchNet("net_cadd_table", GetParam()); + runTorchNet("net_cadd_table"); } TEST_P(Test_Torch_layers, net_softmax) { - int targetId = GetParam(); - runTorchNet("net_softmax", targetId); - runTorchNet("net_softmax_spatial", targetId); + runTorchNet("net_softmax"); + runTorchNet("net_softmax_spatial"); } TEST_P(Test_Torch_layers, net_logsoftmax) @@ -173,40 +192,55 @@ TEST_P(Test_Torch_layers, net_logsoftmax) TEST_P(Test_Torch_layers, net_lp_pooling) { - int targetId = GetParam(); - runTorchNet("net_lp_pooling_square", targetId, "", false, true); - runTorchNet("net_lp_pooling_power", targetId, "", false, true); + runTorchNet("net_lp_pooling_square", "", false, true); + runTorchNet("net_lp_pooling_power", "", false, true); } TEST_P(Test_Torch_layers, net_conv_gemm_lrn) { - runTorchNet("net_conv_gemm_lrn", GetParam(), "", false, true); + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) + throw SkipTestException(""); + runTorchNet("net_conv_gemm_lrn", "", false, true, + target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0, + target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0); } TEST_P(Test_Torch_layers, net_inception_block) { - runTorchNet("net_inception_block", GetParam(), "", false, true); + runTorchNet("net_inception_block", "", false, true); } TEST_P(Test_Torch_layers, net_normalize) { - runTorchNet("net_normalize", GetParam(), "", false, true); + runTorchNet("net_normalize", "", false, true); } TEST_P(Test_Torch_layers, net_padding) { - int targetId = GetParam(); - runTorchNet("net_padding", targetId, "", false, true); - runTorchNet("net_spatial_zero_padding", targetId, "", false, true); - runTorchNet("net_spatial_reflection_padding", targetId, "", false, true); + runTorchNet("net_padding", "", false, true); + runTorchNet("net_spatial_zero_padding", "", false, true); + runTorchNet("net_spatial_reflection_padding", "", false, true); } TEST_P(Test_Torch_layers, net_non_spatial) { - runTorchNet("net_non_spatial", GetParam(), "", false, true); + if (backend == DNN_BACKEND_INFERENCE_ENGINE && + (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + runTorchNet("net_non_spatial", "", false, true); +} + +TEST_P(Test_Torch_layers, run_paralel) +{ + if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU) + throw SkipTestException(""); + runTorchNet("net_parallel", "l5_torchMerge"); } -INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets()); +TEST_P(Test_Torch_layers, net_residual) +{ + runTorchNet("net_residual", "", false, true); +} typedef testing::TestWithParam Test_Torch_nets; @@ -313,21 +347,6 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets()); -// TODO: fix OpenCL and add to the rest of tests -TEST(Torch_Importer, run_paralel) -{ - runTorchNet("net_parallel", DNN_TARGET_CPU, "l5_torchMerge"); -} - -TEST(Torch_Importer, DISABLED_run_paralel) -{ - runTorchNet("net_parallel", DNN_TARGET_OPENCL, "l5_torchMerge"); -} - -TEST(Torch_Importer, net_residual) -{ - runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true); -} // Test a custom layer // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest @@ -374,17 +393,29 @@ public: } } - virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} - private: int scale; }; -TEST(Torch_Importer, upsampling_nearest) +TEST_P(Test_Torch_layers, upsampling_nearest) { + // Test a custom layer. CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer); - runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true); + try + { + runTorchNet("net_spatial_upsampling_nearest", "", false, true); + } + catch (...) + { + LayerFactory::unregisterLayer("SpatialUpSamplingNearest"); + throw; + } LayerFactory::unregisterLayer("SpatialUpSamplingNearest"); + + // Test an implemented layer. + runTorchNet("net_spatial_upsampling_nearest", "", false, true); } +INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets()); + } diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 386e02890d..329c349e49 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -190,7 +190,7 @@ while cv.waitKey(1) < 0: net.setInput(blob) if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN frame = cv.resize(frame, (inpWidth, inpHeight)) - net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info') + net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info') outs = net.forward(getOutputsNames(net)) postprocess(frame, outs) From 43820d89b475dd32d11b441eaeef998dcd530752 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 26 Jul 2018 12:04:28 +0300 Subject: [PATCH 05/25] further improvements in split & merge; started using non-temporary store instructions (#12063) * 1. changed static const __m128/256 to const __m128/256 to avoid wierd instructions and calls inserted by compiler. 2. added universal intrinsics that wrap MOVNTPS and other such (non-temporary or "no cache" store) instructions. v_store_interleave() and v_store() got respective flags/overloaded variants 3. rewrote split & merge to use the "no cache" store instructions. It resulted in dramatic performance improvement when processing big arrays * hopefully, fixed some test failures where 4-channel v_store_interleave() is used * added missing implementation of the new universal intrinsics (v_store_aligned_nocache() etc.) * fixed silly typo in the new intrinsics in intrin_vsx.hpp * still trying to fix VSX compiler errors * still trying to fix VSX compiler errors * still trying to fix VSX compiler errors * still trying to fix VSX compiler errors --- .../core/include/opencv2/core/hal/intrin.hpp | 11 + .../include/opencv2/core/hal/intrin_avx.hpp | 370 ++++++++++--- .../include/opencv2/core/hal/intrin_cpp.hpp | 23 +- .../include/opencv2/core/hal/intrin_neon.hpp | 22 +- .../include/opencv2/core/hal/intrin_sse.hpp | 513 +++++++++++++----- .../include/opencv2/core/hal/intrin_vsx.hpp | 13 +- modules/core/src/mathfuncs_core.simd.hpp | 58 +- modules/core/src/merge.cpp | 71 ++- modules/core/src/split.cpp | 69 ++- 9 files changed, 879 insertions(+), 271 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 031f8f3d02..9569e6127e 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -60,6 +60,17 @@ // access from within opencv code more accessible namespace cv { +namespace hal { + +enum StoreMode +{ + STORE_UNALIGNED = 0, + STORE_ALIGNED = 1, + STORE_ALIGNED_NOCACHE = 2 +}; + +} + template struct V_TypeTraits { }; diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 4ea66f5c0b..5c2d0b60c2 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -304,6 +304,17 @@ inline v_float16x16 v256_setall_f16(short val) { return v_float16x16(_mm256_set1 { _mm256_storeu_si256((__m256i*)ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { _mm256_store_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_si256((__m256i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_si256((__m256i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_si256((__m256i*)ptr, a.val); \ + else \ + _mm256_store_si256((__m256i*)ptr, a.val); \ + } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -338,6 +349,17 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int64x4, int64) { _mm256_storeu_##suffix(ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { _mm256_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_##suffix(ptr, a.val); \ + else \ + _mm256_store_##suffix(ptr, a.val); \ + } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -1616,7 +1638,7 @@ inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); - static const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, + const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); __m256i p0 = _mm256_shuffle_epi8(ab0, sh); __m256i p1 = _mm256_shuffle_epi8(ab1, sh); @@ -1633,7 +1655,7 @@ inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); - static const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, + const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); __m256i p0 = _mm256_shuffle_epi8(ab0, sh); __m256i p1 = _mm256_shuffle_epi8(ab1, sh); @@ -1683,16 +1705,16 @@ inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g, __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); - static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); - static const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1); __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); - static const __m256i + const __m256i sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13), sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, @@ -1717,18 +1739,18 @@ inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16& __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); - static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); - static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); - static const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); - static const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, + const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); - static const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); b0 = _mm256_shuffle_epi8(b0, sh_b); g0 = _mm256_shuffle_epi8(g0, sh_g); @@ -1785,7 +1807,7 @@ inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g, __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96)); - static const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); @@ -1820,7 +1842,7 @@ inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16& __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48)); - static const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, + const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15); __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); @@ -1901,7 +1923,8 @@ inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& b, v_uint64x4& g ///////////////////////////// store interleave ///////////////////////////////////// -inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y ) +inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val); __m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val); @@ -1909,11 +1932,25 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x3 __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, xy0); - _mm256_storeu_si256((__m256i*)(ptr + 32), xy1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 32), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 32), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 32), xy1); + } } -inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y ) +inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val); __m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val); @@ -1921,11 +1958,25 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint1 __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, xy0); - _mm256_storeu_si256((__m256i*)(ptr + 16), xy1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 16), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 16), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 16), xy1); + } } -inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y ) +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val); __m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val); @@ -1933,11 +1984,25 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, xy0); - _mm256_storeu_si256((__m256i*)(ptr + 8), xy1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 8), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 8), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 8), xy1); + } } -inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y ) +inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val); __m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val); @@ -1945,19 +2010,33 @@ inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64 __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, xy0); - _mm256_storeu_si256((__m256i*)(ptr + 4), xy1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 4), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 4), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 4), xy1); + } } -inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r ) +inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { - static const __m256i sh_b = _mm256_setr_epi8( + const __m256i sh_b = _mm256_setr_epi8( 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); - static const __m256i sh_g = _mm256_setr_epi8( + const __m256i sh_g = _mm256_setr_epi8( 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); - static const __m256i sh_r = _mm256_setr_epi8( + const __m256i sh_r = _mm256_setr_epi8( 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); @@ -1965,9 +2044,9 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x3 __m256i g0 = _mm256_shuffle_epi8(g.val, sh_g); __m256i r0 = _mm256_shuffle_epi8(r.val, sh_r); - static const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); - static const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); @@ -1978,20 +2057,36 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x3 __m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16); __m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgr0); - _mm256_storeu_si256((__m256i*)(ptr + 32), bgr1); - _mm256_storeu_si256((__m256i*)(ptr + 64), bgr2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 32), bgr1); + _mm256_store_si256((__m256i*)(ptr + 64), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgr2); + } } -inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, const v_uint16x16& r ) +inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, const v_uint16x16& r, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { - static const __m256i sh_b = _mm256_setr_epi8( + const __m256i sh_b = _mm256_setr_epi8( 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); - static const __m256i sh_g = _mm256_setr_epi8( + const __m256i sh_g = _mm256_setr_epi8( 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); - static const __m256i sh_r = _mm256_setr_epi8( + const __m256i sh_r = _mm256_setr_epi8( 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); @@ -1999,9 +2094,9 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint1 __m256i g0 = _mm256_shuffle_epi8(g.val, sh_g); __m256i r0 = _mm256_shuffle_epi8(r.val, sh_r); - static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); - static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); @@ -2012,12 +2107,28 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint1 //__m256i bgr1 = p1; __m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgr0); - _mm256_storeu_si256((__m256i*)(ptr + 16), p1); - _mm256_storeu_si256((__m256i*)(ptr + 32), bgr2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 16), p1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 16), p1); + _mm256_store_si256((__m256i*)(ptr + 32), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 16), p1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr2); + } } -inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, const v_uint32x8& r ) +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, const v_uint32x8& r, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i b0 = _mm256_shuffle_epi32(b.val, 0x6c); __m256i g0 = _mm256_shuffle_epi32(g.val, 0xb1); @@ -2031,12 +2142,28 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint //__m256i bgr1 = p2; __m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgr0); - _mm256_storeu_si256((__m256i*)(ptr + 8), p2); - _mm256_storeu_si256((__m256i*)(ptr + 16), bgr2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 8), p2); + _mm256_stream_si256((__m256i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 8), p2); + _mm256_store_si256((__m256i*)(ptr + 16), bgr2); + } + else + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 8), p2); + _mm256_stream_si256((__m256i*)(ptr + 16), bgr2); + } } -inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, const v_uint64x4& r ) +inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, const v_uint64x4& r, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i s01 = _mm256_unpacklo_epi64(b.val, g.val); __m256i s12 = _mm256_unpackhi_epi64(g.val, r.val); @@ -2046,12 +2173,29 @@ inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64 __m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f); __m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgr0); - _mm256_storeu_si256((__m256i*)(ptr + 4), bgr1); - _mm256_storeu_si256((__m256i*)(ptr + 8), bgr2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 4), bgr1); + _mm256_store_si256((__m256i*)(ptr + 8), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgr2); + } } -inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r, const v_uint8x32& a ) +inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, + const v_uint8x32& r, const v_uint8x32& a, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i bg0 = _mm256_unpacklo_epi8(b.val, g.val); __m256i bg1 = _mm256_unpackhi_epi8(b.val, g.val); @@ -2068,14 +2212,32 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x3 __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgra0); - _mm256_storeu_si256((__m256i*)(ptr + 32), bgra1); - _mm256_storeu_si256((__m256i*)(ptr + 64), bgra2); - _mm256_storeu_si256((__m256i*)(ptr + 96), bgra3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 96), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 32), bgra1); + _mm256_store_si256((__m256i*)(ptr + 64), bgra2); + _mm256_store_si256((__m256i*)(ptr + 96), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 96), bgra3); + } } inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, - const v_uint16x16& r, const v_uint16x16& a ) + const v_uint16x16& r, const v_uint16x16& a, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i bg0 = _mm256_unpacklo_epi16(b.val, g.val); __m256i bg1 = _mm256_unpackhi_epi16(b.val, g.val); @@ -2092,14 +2254,32 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint1 __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgra0); - _mm256_storeu_si256((__m256i*)(ptr + 16), bgra1); - _mm256_storeu_si256((__m256i*)(ptr + 32), bgra2); - _mm256_storeu_si256((__m256i*)(ptr + 48), bgra3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 48), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 16), bgra1); + _mm256_store_si256((__m256i*)(ptr + 32), bgra2); + _mm256_store_si256((__m256i*)(ptr + 48), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 48), bgra3); + } } inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, - const v_uint32x8& r, const v_uint32x8& a ) + const v_uint32x8& r, const v_uint32x8& a, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i bg0 = _mm256_unpacklo_epi32(b.val, g.val); __m256i bg1 = _mm256_unpackhi_epi32(b.val, g.val); @@ -2116,14 +2296,32 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgra0); - _mm256_storeu_si256((__m256i*)(ptr + 8), bgra1); - _mm256_storeu_si256((__m256i*)(ptr + 16), bgra2); - _mm256_storeu_si256((__m256i*)(ptr + 24), bgra3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 24), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 8), bgra1); + _mm256_store_si256((__m256i*)(ptr + 16), bgra2); + _mm256_store_si256((__m256i*)(ptr + 24), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 24), bgra3); + } } inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, - const v_uint64x4& r, const v_uint64x4& a ) + const v_uint64x4& r, const v_uint64x4& a, + hal::StoreMode mode=hal::STORE_UNALIGNED ) { __m256i bg0 = _mm256_unpacklo_epi64(b.val, g.val); __m256i bg1 = _mm256_unpackhi_epi64(b.val, g.val); @@ -2135,10 +2333,27 @@ inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64 __m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16); __m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16); - _mm256_storeu_si256((__m256i*)ptr, bgra0); - _mm256_storeu_si256((__m256i*)(ptr + 4), bgra1); - _mm256_storeu_si256((__m256i*)(ptr + 8), bgra2); - _mm256_storeu_si256((__m256i*)(ptr + 12), bgra3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 12), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 4), bgra1); + _mm256_store_si256((__m256i*)(ptr + 8), bgra2); + _mm256_store_si256((__m256i*)(ptr + 12), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 12), bgra3); + } } #define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ @@ -2166,27 +2381,30 @@ inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpv c0 = v_reinterpret_as_##suffix0(c1); \ d0 = v_reinterpret_as_##suffix0(d1); \ } \ -inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ { \ _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ - v_store_interleave((_Tp1*)ptr, a1, b1); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ } \ -inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ { \ _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ - v_store_interleave((_Tp1*)ptr, a1, b1, c1); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ } \ inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ - const _Tpvec0& c0, const _Tpvec0& d0 ) \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ { \ _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ - v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ } OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8) diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 1f5f53100a..61d58dbb06 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -1319,7 +1319,8 @@ Scheme: For all types except 64-bit. */ template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b) + const v_reg<_Tp, n>& b, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) { int i, i2; for( i = i2 = 0; i < n; i++, i2 += 2 ) @@ -1339,7 +1340,8 @@ Scheme: For all types except 64-bit. */ template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c) + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) { int i, i3; for( i = i3 = 0; i < n; i++, i3 += 3 ) @@ -1360,7 +1362,8 @@ Scheme: For all types except 64-bit. */ template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - const v_reg<_Tp, n>& d) + const v_reg<_Tp, n>& d, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) { int i, i4; for( i = i4 = 0; i < n; i++, i4 += 4 ) @@ -1430,6 +1433,20 @@ inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) ptr[i] = a.s[i]; } +template +inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + /** @brief Combine vector from first elements of two vectors Scheme: diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index d8067306a5..b601e3e820 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -864,6 +864,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { vst1q_##suffix(ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { vst1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ vst1q_##suffix(ptr, a.val); } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -1292,14 +1296,16 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ c.val = v.val[2]; \ d.val = v.val[3]; \ } \ -inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ _Tpvec##x2_t v; \ v.val[0] = a.val; \ v.val[1] = b.val; \ vst2q_##suffix(ptr, v); \ } \ -inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ _Tpvec##x3_t v; \ v.val[0] = a.val; \ @@ -1308,7 +1314,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& vst3q_##suffix(ptr, v); \ } \ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ - const v_##_Tpvec& c, const v_##_Tpvec& d) \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ { \ _Tpvec##x4_t v; \ v.val[0] = a.val; \ @@ -1360,7 +1367,8 @@ inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \ d = v_##tp##x2(vcombine_##suffix(d0, d1)); \ } \ \ -inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b ) \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ @@ -1369,7 +1377,8 @@ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& } \ \ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \ - const v_##tp##x2& b, const v_##tp##x2& c ) \ + const v_##tp##x2& b, const v_##tp##x2& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ @@ -1380,7 +1389,8 @@ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \ } \ \ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ - const v_##tp##x2& c, const v_##tp##x2& d ) \ + const v_##tp##x2& c, const v_##tp##x2& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 4971c777e4..6e07940042 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -788,7 +788,7 @@ inline v_float32x4 v_sqrt(const v_float32x4& x) inline v_float32x4 v_invsqrt(const v_float32x4& x) { - static const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f); + const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f); __m128 t = x.val; __m128 h = _mm_mul_ps(t, _0_5); t = _mm_rsqrt_ps(t); @@ -801,7 +801,7 @@ inline v_float64x2 v_sqrt(const v_float64x2& x) inline v_float64x2 v_invsqrt(const v_float64x2& x) { - static const __m128d v_1 = _mm_set1_pd(1.); + const __m128d v_1 = _mm_set1_pd(1.); return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val))); } @@ -1261,6 +1261,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { _mm_storeu_si128((__m128i*)ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { _mm_store_si128((__m128i*)ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_si128((__m128i*)ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_si128((__m128i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_si128((__m128i*)ptr, a.val); \ + else \ + _mm_store_si128((__m128i*)ptr, a.val); \ +} \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { _mm_storel_epi64((__m128i*)ptr, a.val); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -1292,6 +1303,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { _mm_storeu_##suffix(ptr, a.val); } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { _mm_store_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_##suffix(ptr, a.val); \ + else \ + _mm_store_##suffix(ptr, a.val); \ +} \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -1671,17 +1693,17 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) { #if CV_SSE4_1 - static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); - static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); __m128i s0 = _mm_loadu_si128((const __m128i*)ptr); __m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); __m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); __m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1); __m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1); __m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1); - static const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13); - static const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14); - static const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13); + const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14); + const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); a0 = _mm_shuffle_epi8(a0, sh_b); b0 = _mm_shuffle_epi8(b0, sh_g); c0 = _mm_shuffle_epi8(c0, sh_r); @@ -1689,9 +1711,9 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, b.val = b0; c.val = c0; #elif CV_SSSE3 - static const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14); - static const __m128i m1 = _mm_alignr_epi8(m0, m0, 11); - static const __m128i m2 = _mm_alignr_epi8(m0, m0, 6); + const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14); + const __m128i m1 = _mm_alignr_epi8(m0, m0, 11); + const __m128i m2 = _mm_alignr_epi8(m0, m0, 6); __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); @@ -1784,9 +1806,9 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, __m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24); __m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24); - static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); - static const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); - static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); a0 = _mm_shuffle_epi8(a0, sh_a); b0 = _mm_shuffle_epi8(b0, sh_b); c0 = _mm_shuffle_epi8(c0, sh_c); @@ -1955,55 +1977,61 @@ inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, // store interleave -inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b) +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) { __m128i v0 = _mm_unpacklo_epi8(a.val, b.val); __m128i v1 = _mm_unpackhi_epi8(a.val, b.val); - _mm_storeu_si128((__m128i*)(ptr), v0); - _mm_storeu_si128((__m128i*)(ptr + 16), v1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + } } inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, - const v_uint8x16& c ) + const v_uint8x16& c, hal::StoreMode mode = hal::STORE_UNALIGNED) { #if CV_SSE4_1 - static const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); - static const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); - static const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); - static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); - static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); __m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0); __m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0); __m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0); - - _mm_storeu_si128((__m128i*)(ptr), v0); - _mm_storeu_si128((__m128i*)(ptr + 16), v1); - _mm_storeu_si128((__m128i*)(ptr + 32), v2); #elif CV_SSSE3 - static const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5); - static const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10); - static const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15); + const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5); + const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10); + const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15); __m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5); t0 = _mm_alignr_epi8(c.val, t0, 5); - __m128i s0 = _mm_shuffle_epi8(t0, m0); + __m128i v0 = _mm_shuffle_epi8(t0, m0); __m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6); t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5); - __m128i s1 = _mm_shuffle_epi8(t1, m1); + __m128i v1 = _mm_shuffle_epi8(t1, m1); __m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11); t2 = _mm_alignr_epi8(t2, a.val, 11); - __m128i s2 = _mm_shuffle_epi8(t2, m2); - - _mm_storeu_si128((__m128i*)ptr, s0); - _mm_storeu_si128((__m128i*)(ptr + 16), s1); - _mm_storeu_si128((__m128i*)(ptr + 32), s2); + __m128i v2 = _mm_shuffle_epi8(t2, m2); #else __m128i z = _mm_setzero_si128(); __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val); @@ -2042,15 +2070,31 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1 __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10)); __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6)); __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2)); - - _mm_storeu_si128((__m128i*)(ptr), v0); - _mm_storeu_si128((__m128i*)(ptr + 16), v1); - _mm_storeu_si128((__m128i*)(ptr + 32), v2); #endif + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + } } inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, - const v_uint8x16& c, const v_uint8x16& d) + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { // a0 a1 a2 a3 .... // b0 b1 b2 b3 .... @@ -2062,33 +2106,64 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1 __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ... __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ... - __m128i v1 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ... - __m128i v2 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ... + __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ... + __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ... __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ... - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 16), v2); - _mm_storeu_si128((__m128i*)(ptr + 32), v1); - _mm_storeu_si128((__m128i*)(ptr + 48), v3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + _mm_stream_si128((__m128i*)(ptr + 48), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + _mm_store_si128((__m128i*)(ptr + 48), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + _mm_storeu_si128((__m128i*)(ptr + 48), v3); + } } -inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b ) +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) { - __m128i t0, t1; - t0 = _mm_unpacklo_epi16(a.val, b.val); - t1 = _mm_unpackhi_epi16(a.val, b.val); - _mm_storeu_si128((__m128i*)(ptr), t0); - _mm_storeu_si128((__m128i*)(ptr + 8), t1); + __m128i v0 = _mm_unpacklo_epi16(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi16(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + } } inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, - const v_uint16x8& b, - const v_uint16x8& c ) + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode mode = hal::STORE_UNALIGNED) { #if CV_SSE4_1 - static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); - static const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); - static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); @@ -2096,10 +2171,6 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, __m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24); __m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24); __m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24); - - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 8), v1); - _mm_storeu_si128((__m128i*)(ptr + 16), v2); #else __m128i z = _mm_setzero_si128(); __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val); @@ -2128,15 +2199,30 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10)); __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6)); __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2)); - - _mm_storeu_si128((__m128i*)(ptr), v0); - _mm_storeu_si128((__m128i*)(ptr + 8), v1); - _mm_storeu_si128((__m128i*)(ptr + 16), v2); #endif + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + } } inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, - const v_uint16x8& c, const v_uint16x8& d) + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { // a0 a1 a2 a3 .... // b0 b1 b2 b3 .... @@ -2148,27 +2234,58 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16 __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ... __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ... - __m128i v1 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ... - __m128i v2 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ... + __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ... + __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ... __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ... - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 8), v2); - _mm_storeu_si128((__m128i*)(ptr + 16), v1); - _mm_storeu_si128((__m128i*)(ptr + 24), v3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + _mm_stream_si128((__m128i*)(ptr + 24), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + _mm_store_si128((__m128i*)(ptr + 24), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + _mm_storeu_si128((__m128i*)(ptr + 24), v3); + } } -inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b ) +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) { - __m128i t0 = _mm_unpacklo_epi32(a.val, b.val); - __m128i t1 = _mm_unpackhi_epi32(a.val, b.val); + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); - _mm_storeu_si128((__m128i*)ptr, t0); - _mm_storeu_si128((__m128i*)(ptr + 4), t1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + } } inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c ) + const v_uint32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) { v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3; v_transpose4x4(a, b, c, z, u0, u1, u2, u3); @@ -2177,35 +2294,82 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8)); __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4)); - _mm_storeu_si128((__m128i*)ptr, v0); - _mm_storeu_si128((__m128i*)(ptr + 4), v1); - _mm_storeu_si128((__m128i*)(ptr + 8), v2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + _mm_stream_si128((__m128i*)(ptr + 8), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + _mm_store_si128((__m128i*)(ptr + 8), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + _mm_storeu_si128((__m128i*)(ptr + 8), v2); + } } inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { - v_uint32x4 t0, t1, t2, t3; - v_transpose4x4(a, b, c, d, t0, t1, t2, t3); - v_store(ptr, t0); - v_store(ptr + 4, t1); - v_store(ptr + 8, t2); - v_store(ptr + 12, t3); + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0.val); + _mm_stream_si128((__m128i*)(ptr + 4), v1.val); + _mm_stream_si128((__m128i*)(ptr + 8), v2.val); + _mm_stream_si128((__m128i*)(ptr + 12), v3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0.val); + _mm_store_si128((__m128i*)(ptr + 4), v1.val); + _mm_store_si128((__m128i*)(ptr + 8), v2.val); + _mm_store_si128((__m128i*)(ptr + 12), v3.val); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0.val); + _mm_storeu_si128((__m128i*)(ptr + 4), v1.val); + _mm_storeu_si128((__m128i*)(ptr + 8), v2.val); + _mm_storeu_si128((__m128i*)(ptr + 12), v3.val); + } } // 2-channel, float only -inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b) +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) { - // a0 a1 a2 a3 ... - // b0 b1 b2 b3 ... - __m128 u0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1 - __m128 u1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3 + __m128 v0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1 + __m128 v1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3 - _mm_storeu_ps(ptr, u0); - _mm_storeu_ps((ptr + 4), u1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + } } -inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) { __m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0)); __m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0)); @@ -2217,13 +2381,29 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32 __m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3)); __m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0)); - _mm_storeu_ps(ptr + 0, v0); - _mm_storeu_ps(ptr + 4, v1); - _mm_storeu_ps(ptr + 8, v2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + } } inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { __m128 u0 = _mm_unpacklo_ps(a.val, c.val); __m128 u1 = _mm_unpacklo_ps(b.val, d.val); @@ -2234,43 +2414,109 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32 __m128 v1 = _mm_unpackhi_ps(u0, u1); __m128 v3 = _mm_unpackhi_ps(u2, u3); - _mm_storeu_ps(ptr + 0, v0); - _mm_storeu_ps(ptr + 4, v1); - _mm_storeu_ps(ptr + 8, v2); - _mm_storeu_ps(ptr + 12, v3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + _mm_stream_ps(ptr + 12, v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + _mm_store_ps(ptr + 12, v3); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + _mm_storeu_ps(ptr + 12, v3); + } } -inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b) +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) { - __m128i t0 = _mm_unpacklo_epi64(a.val, b.val); - __m128i t1 = _mm_unpackhi_epi64(a.val, b.val); + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi64(a.val, b.val); - _mm_storeu_si128((__m128i*)ptr, t0); - _mm_storeu_si128((__m128i*)(ptr + 2), t1); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + } } -inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c) +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode mode = hal::STORE_UNALIGNED) { - __m128i t0 = _mm_unpacklo_epi64(a.val, b.val); - __m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val)); - __m128i t2 = _mm_unpackhi_epi64(b.val, c.val); + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val)); + __m128i v2 = _mm_unpackhi_epi64(b.val, c.val); - _mm_storeu_si128((__m128i*)ptr, t0); - _mm_storeu_si128((__m128i*)(ptr + 2), t1); - _mm_storeu_si128((__m128i*)(ptr + 4), t2); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + } } -inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, const v_uint64x2& d) +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) { - __m128i t0 = _mm_unpacklo_epi64(a.val, b.val); - __m128i t1 = _mm_unpacklo_epi64(c.val, d.val); - __m128i t2 = _mm_unpackhi_epi64(a.val, b.val); - __m128i t3 = _mm_unpackhi_epi64(c.val, d.val); + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, d.val); + __m128i v2 = _mm_unpackhi_epi64(a.val, b.val); + __m128i v3 = _mm_unpackhi_epi64(c.val, d.val); - _mm_storeu_si128((__m128i*)ptr, t0); - _mm_storeu_si128((__m128i*)(ptr + 2), t1); - _mm_storeu_si128((__m128i*)(ptr + 4), t2); - _mm_storeu_si128((__m128i*)(ptr + 6), t3); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + _mm_stream_si128((__m128i*)(ptr + 6), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + _mm_store_si128((__m128i*)(ptr + 6), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + _mm_storeu_si128((__m128i*)(ptr + 6), v3); + } } #define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ @@ -2298,27 +2544,30 @@ inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpv c0 = v_reinterpret_as_##suffix0(c1); \ d0 = v_reinterpret_as_##suffix0(d1); \ } \ -inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ { \ _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ - v_store_interleave((_Tp1*)ptr, a1, b1); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ } \ -inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ { \ _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ - v_store_interleave((_Tp1*)ptr, a1, b1, c1); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ } \ inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ - const _Tpvec0& c0, const _Tpvec0& d0 ) \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ { \ _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ - v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ } OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index 9ad8234895..52bc2cc0ba 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -249,6 +249,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { st(a.val, 0, ptr); } \ inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ { st_a(a.val, 0, ptr); } \ +inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { vec_st_l8(a.val, ptr); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ @@ -281,13 +285,16 @@ inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \ inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \ _Tpvec& c, _Tpvec& d) \ { vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \ -inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { vec_st_interleave(a.val, b.val, ptr); } \ inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \ - const _Tpvec& b, const _Tpvec& c) \ + const _Tpvec& b, const _Tpvec& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { vec_st_interleave(a.val, b.val, c.val, ptr); } \ inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ - const _Tpvec& c, const _Tpvec& d) \ + const _Tpvec& c, const _Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { vec_st_interleave(a.val, b.val, c.val, d.val, ptr); } OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16) diff --git a/modules/core/src/mathfuncs_core.simd.hpp b/modules/core/src/mathfuncs_core.simd.hpp index 354cc00421..b15810317e 100644 --- a/modules/core/src/mathfuncs_core.simd.hpp +++ b/modules/core/src/mathfuncs_core.simd.hpp @@ -515,17 +515,17 @@ void exp32f( const float *_x, float *y, int n ) #if CV_SIMD const int VECSZ = v_float32::nlanes; - static const v_float32 vprescale = vx_setall_f32((float)exp_prescale); - static const v_float32 vpostscale = vx_setall_f32((float)exp_postscale); - static const v_float32 vminval = vx_setall_f32(minval); - static const v_float32 vmaxval = vx_setall_f32(maxval); + const v_float32 vprescale = vx_setall_f32((float)exp_prescale); + const v_float32 vpostscale = vx_setall_f32((float)exp_postscale); + const v_float32 vminval = vx_setall_f32(minval); + const v_float32 vmaxval = vx_setall_f32(maxval); - static const v_float32 vA1 = vx_setall_f32((float)A1); - static const v_float32 vA2 = vx_setall_f32((float)A2); - static const v_float32 vA3 = vx_setall_f32((float)A3); - static const v_float32 vA4 = vx_setall_f32((float)A4); + const v_float32 vA1 = vx_setall_f32((float)A1); + const v_float32 vA2 = vx_setall_f32((float)A2); + const v_float32 vA3 = vx_setall_f32((float)A3); + const v_float32 vA4 = vx_setall_f32((float)A4); - static const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK); + const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK); bool y_aligned = (size_t)(void*)y % 32 == 0; for( ; i < n; i += VECSZ*2 ) @@ -627,18 +627,18 @@ void exp64f( const double *_x, double *y, int n ) #if CV_SIMD_64F const int VECSZ = v_float64::nlanes; - static const v_float64 vprescale = vx_setall_f64(exp_prescale); - static const v_float64 vpostscale = vx_setall_f64(exp_postscale); - static const v_float64 vminval = vx_setall_f64(minval); - static const v_float64 vmaxval = vx_setall_f64(maxval); - - static const v_float64 vA1 = vx_setall_f64(A1); - static const v_float64 vA2 = vx_setall_f64(A2); - static const v_float64 vA3 = vx_setall_f64(A3); - static const v_float64 vA4 = vx_setall_f64(A4); - static const v_float64 vA5 = vx_setall_f64(A5); - - static const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK); + const v_float64 vprescale = vx_setall_f64(exp_prescale); + const v_float64 vpostscale = vx_setall_f64(exp_postscale); + const v_float64 vminval = vx_setall_f64(minval); + const v_float64 vmaxval = vx_setall_f64(maxval); + + const v_float64 vA1 = vx_setall_f64(A1); + const v_float64 vA2 = vx_setall_f64(A2); + const v_float64 vA3 = vx_setall_f64(A3); + const v_float64 vA4 = vx_setall_f64(A4); + const v_float64 vA5 = vx_setall_f64(A5); + + const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK); bool y_aligned = (size_t)(void*)y % 32 == 0; for( ; i < n; i += VECSZ*2 ) @@ -1024,13 +1024,13 @@ void log32f( const float *_x, float *y, int n ) #if CV_SIMD const int VECSZ = v_float32::nlanes; - static const v_float32 vln2 = vx_setall_f32((float)ln_2); - static const v_float32 v1 = vx_setall_f32(1.f); - static const v_float32 vshift = vx_setall_f32(-1.f/512); + const v_float32 vln2 = vx_setall_f32((float)ln_2); + const v_float32 v1 = vx_setall_f32(1.f); + const v_float32 vshift = vx_setall_f32(-1.f/512); - static const v_float32 vA0 = vx_setall_f32(A0); - static const v_float32 vA1 = vx_setall_f32(A1); - static const v_float32 vA2 = vx_setall_f32(A2); + const v_float32 vA0 = vx_setall_f32(A0); + const v_float32 vA1 = vx_setall_f32(A1); + const v_float32 vA2 = vx_setall_f32(A2); for( ; i < n; i += VECSZ ) { @@ -1097,9 +1097,9 @@ void log64f( const double *x, double *y, int n ) #if CV_SIMD_64F const int VECSZ = v_float64::nlanes; - static const v_float64 vln2 = vx_setall_f64(ln_2); + const v_float64 vln2 = vx_setall_f64(ln_2); - static const v_float64 + const v_float64 vA0 = vx_setall_f64(A0), vA1 = vx_setall_f64(A1), vA2 = vx_setall_f64(A2), vA3 = vx_setall_f64(A3), vA4 = vx_setall_f64(A4), vA5 = vx_setall_f64(A5), diff --git a/modules/core/src/merge.cpp b/modules/core/src/merge.cpp index a57d3bbb6e..9c52f0e20c 100644 --- a/modules/core/src/merge.cpp +++ b/modules/core/src/merge.cpp @@ -9,21 +9,58 @@ namespace cv { namespace hal { #if CV_SIMD +/* + The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following: + on IA there are instructions movntps and such to which + v_store_interleave(...., STORE_ALIGNED_NOCACHE) is mapped. + Those instructions write directly into memory w/o touching cache + that results in dramatic speed improvements, especially on + large arrays (FullHD, 4K etc.). + + Those intrinsics require the destination address to be aligned + by 16/32 bits (with SSE2 and AVX2, respectively). + So we potentially split the processing into 3 stages: + 1) the optional prefix part [0:i0), where we use simple unaligned stores. + 2) the optional main part [i0:len - VECSZ], where we use "nocache" mode. + But in some cases we have to use unaligned stores in this part. + 3) the optional suffix part (the tail) (len - VECSZ:len) where we switch back to "unaligned" mode + to process the remaining len - VECSZ elements. + In principle there can be very poorly aligned data where there is no main part. + For that we set i0=0 and use unaligned stores for the whole array. +*/ template static void vecmerge_( const T** src, T* dst, int len, int cn ) { - int i; + const int VECSZ = VecT::nlanes; + int i, i0 = 0; const T* src0 = src[0]; const T* src1 = src[1]; - const int VECSZ = VecT::nlanes; + int r = (int)((size_t)(void*)dst % (VECSZ*sizeof(T))); + hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE; + if( r != 0 ) + { + mode = hal::STORE_UNALIGNED; + if( r % cn == 0 && len > VECSZ ) + i0 = VECSZ - (r / cn); + } + if( cn == 2 ) { for( i = 0; i < len; i += VECSZ ) { - i = std::min( len - VECSZ, i ); + if( i > len - VECSZ ) + { + i = len - VECSZ; + mode = hal::STORE_UNALIGNED; + } VecT a = vx_load(src0 + i), b = vx_load(src1 + i); - v_store_interleave(dst + i*cn, a, b); + v_store_interleave(dst + i*cn, a, b, mode); + if( i < i0 ) + { + i = i0 - VECSZ; + mode = hal::STORE_ALIGNED_NOCACHE; + } } } else if( cn == 3 ) @@ -31,9 +68,18 @@ vecmerge_( const T** src, T* dst, int len, int cn ) const T* src2 = src[2]; for( i = 0; i < len; i += VECSZ ) { - i = std::min( len - VECSZ, i ); + if( i > len - VECSZ ) + { + i = len - VECSZ; + mode = hal::STORE_UNALIGNED; + } VecT a = vx_load(src0 + i), b = vx_load(src1 + i), c = vx_load(src2 + i); - v_store_interleave(dst + i*cn, a, b, c); + v_store_interleave(dst + i*cn, a, b, c, mode); + if( i < i0 ) + { + i = i0 - VECSZ; + mode = hal::STORE_ALIGNED_NOCACHE; + } } } else @@ -43,10 +89,19 @@ vecmerge_( const T** src, T* dst, int len, int cn ) const T* src3 = src[3]; for( i = 0; i < len; i += VECSZ ) { - i = std::min( len - VECSZ, i ); + if( i > len - VECSZ ) + { + i = len - VECSZ; + mode = hal::STORE_UNALIGNED; + } VecT a = vx_load(src0 + i), b = vx_load(src1 + i); VecT c = vx_load(src2 + i), d = vx_load(src3 + i); - v_store_interleave(dst + i*cn, a, b, c, d); + v_store_interleave(dst + i*cn, a, b, c, d, mode); + if( i < i0 ) + { + i = i0 - VECSZ; + mode = hal::STORE_ALIGNED_NOCACHE; + } } } vx_cleanup(); diff --git a/modules/core/src/split.cpp b/modules/core/src/split.cpp index 6f7b61ac7e..78d8daadd0 100644 --- a/modules/core/src/split.cpp +++ b/modules/core/src/split.cpp @@ -9,23 +9,46 @@ namespace cv { namespace hal { #if CV_SIMD +// see the comments for vecmerge_ in merge.cpp template static void vecsplit_( const T* src, T** dst, int len, int cn ) { - int i; + const int VECSZ = VecT::nlanes; + int i, i0 = 0; T* dst0 = dst[0]; T* dst1 = dst[1]; - const int VECSZ = VecT::nlanes; + int r0 = (int)((size_t)(void*)dst0 % (VECSZ*sizeof(T))); + int r1 = (int)((size_t)(void*)dst1 % (VECSZ*sizeof(T))); + int r2 = cn > 2 ? (int)((size_t)(void*)dst[2] % (VECSZ*sizeof(T))) : r0; + int r3 = cn > 3 ? (int)((size_t)(void*)dst[3] % (VECSZ*sizeof(T))) : r0; + + hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE; + if( (r0|r1|r2|r3) != 0 ) + { + mode = hal::STORE_UNALIGNED; + if( r0 == r1 && r0 == r2 && r0 == r3 && r0 % cn == 0 && len > VECSZ ) + i0 = VECSZ - (r0 / cn); + } + if( cn == 2 ) { for( i = 0; i < len; i += VECSZ ) { - i = std::min( len - VECSZ, i ); + if( i > len - VECSZ ) + { + i = len - VECSZ; + mode = hal::STORE_UNALIGNED; + } VecT a, b; v_load_deinterleave(src + i*cn, a, b); - v_store(dst0 + i, a); - v_store(dst1 + i, b); + v_store(dst0 + i, a, mode); + v_store(dst1 + i, b, mode); + if( i < i0 ) + { + i = i0 - VECSZ; + mode = hal::STORE_ALIGNED_NOCACHE; + } } } else if( cn == 3 ) @@ -33,12 +56,21 @@ vecsplit_( const T* src, T** dst, int len, int cn ) T* dst2 = dst[2]; for( i = 0; i < len; i += VECSZ ) { - i = std::min( len - VECSZ, i ); + if( i > len - VECSZ ) + { + i = len - VECSZ; + mode = hal::STORE_UNALIGNED; + } VecT a, b, c; v_load_deinterleave(src + i*cn, a, b, c); - v_store(dst0 + i, a); - v_store(dst1 + i, b); - v_store(dst2 + i, c); + v_store(dst0 + i, a, mode); + v_store(dst1 + i, b, mode); + v_store(dst2 + i, c, mode); + if( i < i0 ) + { + i = i0 - VECSZ; + mode = hal::STORE_ALIGNED_NOCACHE; + } } } else @@ -48,13 +80,22 @@ vecsplit_( const T* src, T** dst, int len, int cn ) T* dst3 = dst[3]; for( i = 0; i < len; i += VECSZ ) { - i = std::min( len - VECSZ, i ); + if( i > len - VECSZ ) + { + i = len - VECSZ; + mode = hal::STORE_UNALIGNED; + } VecT a, b, c, d; v_load_deinterleave(src + i*cn, a, b, c, d); - v_store(dst0 + i, a); - v_store(dst1 + i, b); - v_store(dst2 + i, c); - v_store(dst3 + i, d); + v_store(dst0 + i, a, mode); + v_store(dst1 + i, b, mode); + v_store(dst2 + i, c, mode); + v_store(dst3 + i, d, mode); + if( i < i0 ) + { + i = i0 - VECSZ; + mode = hal::STORE_ALIGNED_NOCACHE; + } } } vx_cleanup(); From 9eb79926df99e52edc635f6023f15a28aafd3e22 Mon Sep 17 00:00:00 2001 From: Triplesalt <39653793+Triplesalt@users.noreply.github.com> Date: Fri, 20 Jul 2018 03:03:17 +0200 Subject: [PATCH 06/25] Allow a different input order for Mul+Maximum. Squashed : ReLU operand order tests. --- modules/dnn/src/tensorflow/tf_importer.cpp | 8 +++++++- modules/dnn/test/test_tf_importer.cpp | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 89732b45ad..57d4978593 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1260,7 +1260,13 @@ void TFImporter::populateNet(Net dstNet) if (!next_layers.empty()) { int maximumLayerIdx = next_layers[0].second; - ExcludeLayer(net, maximumLayerIdx, 0, false); + + CV_Assert(net.node(maximumLayerIdx).input_size() == 2); + + // The input from the Mul layer can also be at index 1. + int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1; + + ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false); layers_to_ignore.insert(next_layers[0].first); layerParams.set("negative_slope", scaleMat.at(0)); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 6ab0e41e18..bd5c63db97 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -230,6 +230,13 @@ TEST_P(Test_TensorFlow_layers, flatten) runTensorFlowNet("unfused_flatten_unknown_batch"); } +TEST_P(Test_TensorFlow_layers, leaky_relu) +{ + runTensorFlowNet("leaky_relu_order1"); + runTensorFlowNet("leaky_relu_order2"); + runTensorFlowNet("leaky_relu_order3"); +} + TEST_P(Test_TensorFlow_layers, l2_normalize) { runTensorFlowNet("l2_normalize"); From 597db69151ea052dbc7b5927043101a11ace8818 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 26 Jul 2018 16:27:47 +0300 Subject: [PATCH 07/25] ts: test case list is printed after cmd line parsing, refactored --- cmake/OpenCVCompilerOptimizations.cmake | 11 +- modules/core/include/opencv2/core/utility.hpp | 12 ++ modules/core/src/system.cpp | 21 +++ modules/ts/include/opencv2/ts.hpp | 28 ++-- modules/ts/include/opencv2/ts/ts_perf.hpp | 12 +- modules/ts/src/ocl_test.cpp | 26 ---- modules/ts/src/ts.cpp | 88 ++++++++++- modules/ts/src/ts_func.cpp | 147 ------------------ 8 files changed, 136 insertions(+), 209 deletions(-) diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 76f56ba422..8beabefe41 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -700,12 +700,21 @@ macro(ocv_compiler_optimization_fill_cpu_config) list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT}) endforeach() list(REMOVE_DUPLICATES __dispatch_modes) - set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "") foreach(OPT ${__dispatch_modes}) set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE} #define CV_CPU_DISPATCH_COMPILE_${OPT} 1") endforeach() + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE} +\n\n#define CV_CPU_DISPATCH_FEATURES 0 \\") + foreach(OPT ${__dispatch_modes}) + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE} + , CV_CPU_${OPT} \\") + endif() + endforeach() + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}\n") + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n") foreach(OPT ${CPU_ALL_OPTIMIZATIONS}) if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 4888eae5cf..a15bbff096 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -457,6 +457,18 @@ Returns empty string if feature is not defined */ CV_EXPORTS_W String getHardwareFeatureName(int feature); +/** @brief Returns list of CPU features enabled during compilation. + +Returned value is a string containing space separated list of CPU features with following markers: + +- no markers - baseline features +- prefix `*` - features enabled in dispatcher +- suffix `?` - features enabled but not available in HW + +Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?` +*/ +CV_EXPORTS std::string getCPUFeaturesLine(); + /** @brief Returns the number of logical CPUs available for the process. */ CV_EXPORTS_W int getNumberOfCPUs(); diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 41a589ea3a..1ebd993a2f 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -654,6 +654,27 @@ String getHardwareFeatureName(int feature) return name ? String(name) : String(); } +std::string getCPUFeaturesLine() +{ + const int features[] = { CV_CPU_BASELINE_FEATURES, CV_CPU_DISPATCH_FEATURES }; + const int sz = sizeof(features) / sizeof(features[0]); + std::string result; + std::string prefix; + for (int i = 1; i < sz; ++i) + { + if (features[i] == 0) + { + prefix = "*"; + continue; + } + if (i != 1) result.append(" "); + result.append(prefix); + result.append(getHWFeatureNameSafe(features[i])); + if (!checkHardwareSupport(features[i])) result.append("?"); + } + return result; +} + volatile bool useOptimizedFlag = true; void setUseOptimized( bool flag ) diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 3fbea894e8..4880b6cf2c 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -379,10 +379,9 @@ struct TSParams class TS { -public: - // constructor(s) and destructor TS(); virtual ~TS(); +public: enum { @@ -484,9 +483,6 @@ public: SKIPPED=1 }; - // get file storage - CvFileStorage* get_file_storage(); - // get RNG to generate random input data for a test RNG& get_rng() { return rng; } @@ -629,9 +625,6 @@ struct DefaultRngAuto void fillGradient(Mat& img, int delta = 5); void smoothBorder(Mat& img, const Scalar& color, int delta = 3); -void printVersionInfo(bool useStdOut = true); - - // Utility functions void addDataSearchPath(const std::string& path); @@ -660,6 +653,13 @@ std::string findDataFile(const std::string& relative_path, bool required = true) */ std::string findDataDirectory(const std::string& relative_path, bool required = true); +// Test definitions + +class SystemInfoCollector : public testing::EmptyTestEventListener +{ +private: + virtual void OnTestProgramStart(const testing::UnitTest&); +}; #ifndef __CV_TEST_EXEC_ARGS #if defined(_MSC_VER) && (_MSC_VER <= 1400) @@ -671,15 +671,6 @@ std::string findDataDirectory(const std::string& relative_path, bool required = #endif #endif -#ifdef HAVE_OPENCL -namespace ocl { -void dumpOpenCLDevice(); -} -#define TEST_DUMP_OCL_INFO cvtest::ocl::dumpOpenCLDevice(); -#else -#define TEST_DUMP_OCL_INFO -#endif - void parseCustomOptions(int argc, char **argv); #define CV_TEST_INIT0_NOOP (void)0 @@ -696,8 +687,7 @@ int main(int argc, char **argv) \ ts->init(resourcesubdir); \ __CV_TEST_EXEC_ARGS(CV_TEST_INIT0_ ## INIT0) \ ::testing::InitGoogleTest(&argc, argv); \ - cvtest::printVersionInfo(); \ - TEST_DUMP_OCL_INFO \ + ::testing::UnitTest::GetInstance()->listeners().Append(new SystemInfoCollector); \ __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ parseCustomOptions(argc, argv); \ } \ diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index e33850c267..3a5f6846d6 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -637,15 +637,6 @@ void PrintTo(const Size& sz, ::std::ostream* os); #endif #endif -#ifdef HAVE_OPENCL -namespace cvtest { namespace ocl { -void dumpOpenCLDevice(); -}} -#define TEST_DUMP_OCL_INFO cvtest::ocl::dumpOpenCLDevice(); -#else -#define TEST_DUMP_OCL_INFO -#endif - #define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...) \ CV_TRACE_FUNCTION(); \ @@ -654,11 +645,10 @@ void dumpOpenCLDevice(); ::perf::TestBase::Init(std::vector(impls, impls + sizeof impls / sizeof *impls), \ argc, argv); \ ::testing::InitGoogleTest(&argc, argv); \ - cvtest::printVersionInfo(); \ + ::testing::UnitTest::GetInstance()->listeners().Append(new cvtest::SystemInfoCollector); \ ::testing::Test::RecordProperty("cv_module_name", #modulename); \ ::perf::TestBase::RecordRunParameters(); \ __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ - TEST_DUMP_OCL_INFO \ } \ return RUN_ALL_TESTS(); diff --git a/modules/ts/src/ocl_test.cpp b/modules/ts/src/ocl_test.cpp index 8eaa7e637d..6d5ff26d7f 100644 --- a/modules/ts/src/ocl_test.cpp +++ b/modules/ts/src/ocl_test.cpp @@ -43,25 +43,6 @@ #include "opencv2/ts/ocl_test.hpp" -#ifdef HAVE_OPENCL - -#define DUMP_CONFIG_PROPERTY(propertyName, propertyValue) \ - do { \ - std::stringstream ssName, ssValue;\ - ssName << propertyName;\ - ssValue << (propertyValue); \ - ::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \ - } while (false) - -#define DUMP_MESSAGE_STDOUT(msg) \ - do { \ - std::cout << msg << std::endl; \ - } while (false) - -#include - -#endif // HAVE_OPENCL - namespace cvtest { namespace ocl { @@ -69,13 +50,6 @@ using namespace cv; int test_loop_times = 1; // TODO Read from command line / environment -#ifdef HAVE_OPENCL -void dumpOpenCLDevice() -{ - cv::dumpOpenCLInformation(); -} -#endif // HAVE_OPENCL - Mat TestUtils::readImage(const String &fileName, int flags) { return cv::imread(cvtest::TS::ptr()->get_data_path() + fileName, flags); diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index b1ea96bb15..fabfa1b260 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -74,7 +74,26 @@ # include #endif +#ifdef HAVE_OPENCL +#define DUMP_CONFIG_PROPERTY(propertyName, propertyValue) \ + do { \ + std::stringstream ssName, ssValue;\ + ssName << propertyName;\ + ssValue << (propertyValue); \ + ::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \ + } while (false) + +#define DUMP_MESSAGE_STDOUT(msg) \ + do { \ + std::cout << msg << std::endl; \ + } while (false) + +#include "opencv2/core/opencl/opencl_info.hpp" + +#endif // HAVE_OPENCL + +#include "opencv2/core/utility.hpp" #include "opencv_tests_config.hpp" namespace opencv_test { @@ -230,7 +249,6 @@ bool BaseTest::can_do_fast_forward() void BaseTest::safe_run( int start_from ) { CV_TRACE_FUNCTION(); - read_params( ts->get_file_storage() ); ts->update_context( 0, -1, true ); ts->update_context( this, -1, true ); @@ -552,8 +570,6 @@ void TS::set_gtest_status() } -CvFileStorage* TS::get_file_storage() { return 0; } - void TS::update_context( BaseTest* test, int test_case_idx, bool update_ts_context ) { if( current_test_info.test != test ) @@ -614,8 +630,11 @@ void TS::printf( int streams, const char* fmt, ... ) } -static TS ts; -TS* TS::ptr() { return &ts; } +TS* TS::ptr() +{ + static TS ts; + return &ts; +} void fillGradient(Mat& img, int delta) { @@ -866,6 +885,65 @@ std::string findDataDirectory(const std::string& relative_path, bool required) return findData(relative_path, required, true); } +inline static std::string getSnippetFromConfig(const std::string & start, const std::string & end) +{ + const std::string buildInfo = cv::getBuildInformation(); + size_t pos1 = buildInfo.find(start); + if (pos1 != std::string::npos) + { + pos1 += start.length(); + pos1 = buildInfo.find_first_not_of(" \t\n\r", pos1); + } + size_t pos2 = buildInfo.find(end, pos1); + if (pos2 != std::string::npos) + { + pos2 = buildInfo.find_last_not_of(" \t\n\r", pos2); + } + if (pos1 != std::string::npos && pos2 != std::string::npos && pos1 < pos2) + { + return buildInfo.substr(pos1, pos2 - pos1 + 1); + } + return std::string(); +} + +inline static void recordPropertyVerbose(const std::string & property, + const std::string & msg, + const std::string & value, + const std::string & build_value = std::string()) +{ + ::testing::Test::RecordProperty(property, value); + std::cout << msg << ": " << (value.empty() ? std::string("N/A") : value) << std::endl; + if (!build_value.empty()) + { + ::testing::Test::RecordProperty(property + "_build", build_value); + if (build_value != value) + std::cout << "WARNING: build value differs from runtime: " << build_value << endl; + } +} + +#ifdef _DEBUG +#define CV_TEST_BUILD_CONFIG "Debug" +#else +#define CV_TEST_BUILD_CONFIG "Release" +#endif + +void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&) +{ + std::cout << "CTEST_FULL_OUTPUT" << std::endl; // Tell CTest not to discard any output + recordPropertyVerbose("cv_version", "OpenCV version", cv::getVersionString(), CV_VERSION); + recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n")); + recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG); + recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n")); + recordPropertyVerbose("cv_parallel_framework", "Parallel framework", cv::currentParallelFramework()); + recordPropertyVerbose("cv_cpu_features", "CPU features", cv::getCPUFeaturesLine()); +#ifdef HAVE_IPP + recordPropertyVerbose("cv_ipp_version", "Intel(R) IPP version", cv::ipp::useIPP() ? cv::ipp::getIppVersion() : "disabled"); +#endif +#ifdef HAVE_OPENCL + cv::dumpOpenCLInformation(); +#endif +} + } //namespace cvtest /* End of file. */ diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index a5366bf6fd..60c88a7e65 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -3,10 +3,6 @@ #include #include "opencv2/imgproc/types_c.h" -#ifdef HAVE_TEGRA_OPTIMIZATION -#include "tegra.hpp" -#endif - using namespace cv; namespace cvtest @@ -2977,149 +2973,6 @@ MatComparator::operator()(const char* expr1, const char* expr2, << "- " << expr2 << ":\n" << MatPart(m2part, border > 0 ? &loc : 0) << ".\n"; } -void printVersionInfo(bool useStdOut) -{ - // Tell CTest not to discard any output - if(useStdOut) std::cout << "CTEST_FULL_OUTPUT" << std::endl; - - ::testing::Test::RecordProperty("cv_version", CV_VERSION); - if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl; - - std::string buildInfo( cv::getBuildInformation() ); - - size_t pos1 = buildInfo.find("Version control"); - size_t pos2 = buildInfo.find('\n', pos1); - if(pos1 != std::string::npos && pos2 != std::string::npos) - { - size_t value_start = buildInfo.rfind(' ', pos2) + 1; - std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); - ::testing::Test::RecordProperty("cv_vcs_version", ver); - if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl; - } - - pos1 = buildInfo.find("inner version"); - pos2 = buildInfo.find('\n', pos1); - if(pos1 != std::string::npos && pos2 != std::string::npos) - { - size_t value_start = buildInfo.rfind(' ', pos2) + 1; - std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); - ::testing::Test::RecordProperty("cv_inner_vcs_version", ver); - if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl; - } - - const char * build_type = -#ifdef _DEBUG - "debug"; -#else - "release"; -#endif - - ::testing::Test::RecordProperty("cv_build_type", build_type); - if (useStdOut) std::cout << "Build type: " << build_type << std::endl; - - const char* parallel_framework = currentParallelFramework(); - - if (parallel_framework) { - ::testing::Test::RecordProperty("cv_parallel_framework", parallel_framework); - if (useStdOut) std::cout << "Parallel framework: " << parallel_framework << std::endl; - } - - std::string cpu_features; - -#if CV_POPCNT - if (checkHardwareSupport(CV_CPU_POPCNT)) cpu_features += " popcnt"; -#endif -#if CV_MMX - if (checkHardwareSupport(CV_CPU_MMX)) cpu_features += " mmx"; -#endif -#if CV_SSE - if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse"; -#endif -#if CV_SSE2 - if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2"; -#endif -#if CV_SSE3 - if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3"; -#endif -#if CV_SSSE3 - if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3"; -#endif -#if CV_SSE4_1 - if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1"; -#endif -#if CV_SSE4_2 - if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2"; -#endif -#if CV_AVX - if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx"; -#endif -#if CV_AVX2 - if (checkHardwareSupport(CV_CPU_AVX2)) cpu_features += " avx2"; -#endif -#if CV_FMA3 - if (checkHardwareSupport(CV_CPU_FMA3)) cpu_features += " fma3"; -#endif -#if CV_AVX_512F - if (checkHardwareSupport(CV_CPU_AVX_512F)) cpu_features += " avx-512f"; -#endif -#if CV_AVX_512BW - if (checkHardwareSupport(CV_CPU_AVX_512BW)) cpu_features += " avx-512bw"; -#endif -#if CV_AVX_512CD - if (checkHardwareSupport(CV_CPU_AVX_512CD)) cpu_features += " avx-512cd"; -#endif -#if CV_AVX_512DQ - if (checkHardwareSupport(CV_CPU_AVX_512DQ)) cpu_features += " avx-512dq"; -#endif -#if CV_AVX_512ER - if (checkHardwareSupport(CV_CPU_AVX_512ER)) cpu_features += " avx-512er"; -#endif -#if CV_AVX_512IFMA512 - if (checkHardwareSupport(CV_CPU_AVX_512IFMA512)) cpu_features += " avx-512ifma512"; -#endif -#if CV_AVX_512PF - if (checkHardwareSupport(CV_CPU_AVX_512PF)) cpu_features += " avx-512pf"; -#endif -#if CV_AVX_512VBMI - if (checkHardwareSupport(CV_CPU_AVX_512VBMI)) cpu_features += " avx-512vbmi"; -#endif -#if CV_AVX_512VL - if (checkHardwareSupport(CV_CPU_AVX_512VL)) cpu_features += " avx-512vl"; -#endif -#if CV_NEON - if (checkHardwareSupport(CV_CPU_NEON)) cpu_features += " neon"; -#endif -#if CV_FP16 - if (checkHardwareSupport(CV_CPU_FP16)) cpu_features += " fp16"; -#endif -#if CV_VSX - if (checkHardwareSupport(CV_CPU_VSX)) cpu_features += " VSX"; -#endif - - cpu_features.erase(0, 1); // erase initial space - - ::testing::Test::RecordProperty("cv_cpu_features", cpu_features); - if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl; - -#ifdef HAVE_TEGRA_OPTIMIZATION - const char * tegra_optimization = tegra::useTegra() && tegra::isDeviceSupported() ? "enabled" : "disabled"; - ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization); - if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl; -#endif - -#ifdef HAVE_IPP - const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled"; - ::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization); - if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl; - - cv::String ippVer = cv::ipp::getIppVersion(); - ::testing::Test::RecordProperty("cv_ipp_version", ippVer); - if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl; -#endif -} - - - void threshold( const Mat& _src, Mat& _dst, double thresh, double maxval, int thresh_type ) { From 1165fdd0f504537784869309df83b1c8ca6af0e2 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Wed, 18 Jul 2018 15:24:58 +0300 Subject: [PATCH 08/25] Added more strict checks for empty inputs to compare, meanStdDev and RNG::fill --- modules/core/src/arithm.cpp | 3 +- modules/core/src/copy.cpp | 3 +- modules/core/src/mean.cpp | 4 +- modules/core/src/rand.cpp | 4 +- modules/core/test/test_arithm.cpp | 10 +- modules/core/test/test_concatenation.cpp | 119 +++++------------------ modules/core/test/test_rand.cpp | 1 - 7 files changed, 36 insertions(+), 108 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index e3ded859b6..dbfcc5c727 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1233,7 +1233,8 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ || op == CMP_NE || op == CMP_GE || op == CMP_GT ); - if(_src1.empty() || _src2.empty()) + CV_Assert(_src1.empty() == _src2.empty()); + if (_src1.empty() && _src2.empty()) { _dst.release(); return; diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 8775bff4aa..e89a17b323 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -411,7 +411,8 @@ Mat& Mat::operator = (const Scalar& s) { CV_INSTRUMENT_REGION() - if (empty()) return *this; + if (this->empty()) + return *this; const Mat* arrays[] = { this }; uchar* dptr; diff --git a/modules/core/src/mean.cpp b/modules/core/src/mean.cpp index d0029b3cbc..dcf1ae206c 100644 --- a/modules/core/src/mean.cpp +++ b/modules/core/src/mean.cpp @@ -766,11 +766,13 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input { CV_INSTRUMENT_REGION() + CV_Assert(!_src.empty()); + CV_Assert( _mask.empty() || _mask.type() == CV_8UC1 ); + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, ocl_meanStdDev(_src, _mean, _sdv, _mask)) Mat src = _src.getMat(), mask = _mask.getMat(); - CV_Assert( mask.empty() || mask.type() == CV_8UC1 ); CV_OVX_RUN(!ovx::skipSmallImages(src.cols, src.rows), openvx_meanStdDev(src, _mean, _sdv, mask)) diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index a456c72633..cc46345ecd 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -511,8 +511,8 @@ static RandnScaleFunc randnScaleTab[] = void RNG::fill( InputOutputArray _mat, int disttype, InputArray _param1arg, InputArray _param2arg, bool saturateRange ) { - if (_mat.empty()) - return; + CV_Assert(!_mat.empty()); + Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat(); int depth = mat.depth(), cn = mat.channels(); AutoBuffer _parambuf; diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index dd2ed9a86e..9ca48a0949 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -1967,11 +1967,9 @@ TEST(Subtract, scalarc4_matc4) TEST(Compare, empty) { cv::Mat temp, dst1, dst2; - cv::compare(temp, temp, dst1, cv::CMP_EQ); - dst2 = temp > 5; - + EXPECT_NO_THROW(cv::compare(temp, temp, dst1, cv::CMP_EQ)); EXPECT_TRUE(dst1.empty()); - EXPECT_TRUE(dst2.empty()); + EXPECT_THROW(dst2 = temp > 5, cv::Exception); } TEST(Compare, regression_8999) @@ -1979,9 +1977,7 @@ TEST(Compare, regression_8999) Mat_ A(4,1); A << 1, 3, 2, 4; Mat_ B(1,1); B << 2; Mat C; - ASSERT_ANY_THROW({ - cv::compare(A, B, C, CMP_LT); - }); + EXPECT_THROW(cv::compare(A, B, C, CMP_LT), cv::Exception); } diff --git a/modules/core/test/test_concatenation.cpp b/modules/core/test/test_concatenation.cpp index 1470094fd0..201bf0e9af 100644 --- a/modules/core/test/test_concatenation.cpp +++ b/modules/core/test/test_concatenation.cpp @@ -43,106 +43,35 @@ namespace opencv_test { namespace { -class Core_ConcatenationTest : public cvtest::BaseTest +TEST(Core_Concatenation, empty) { -public: - Core_ConcatenationTest(bool horizontal, bool firstEmpty, bool secondEmpty); -protected: - int prepare_test_case( int ); - void run_func(); - int validate_test_results( int ); + const Mat mat0x5(0,5, CV_8U, Scalar::all(1)); + const Mat mat10x5(10,5, CV_8U, Scalar::all(1)); + const Mat mat20x5(20,5, CV_8U, Scalar::all(1)); - Mat mat0x5; - Mat mat10x5; - Mat mat20x5; - - Mat mat5x0; - Mat mat5x10; - Mat mat5x20; + const Mat mat5x0(5,0, CV_8U, Scalar::all(1)); + const Mat mat5x10(5,10, CV_8U, Scalar::all(1)); + const Mat mat5x20(5,20, CV_8U, Scalar::all(1)); Mat result; - bool horizontal; - bool firstEmpty; - bool secondEmpty; - -private: - static bool areEqual(const Mat& m1, const Mat& m2); - -}; - -Core_ConcatenationTest::Core_ConcatenationTest(bool horizontal_, bool firstEmpty_, bool secondEmpty_) - : horizontal(horizontal_) - , firstEmpty(firstEmpty_) - , secondEmpty(secondEmpty_) -{ - test_case_count = 1; - - mat0x5 = Mat::ones(0,5, CV_8U); - mat10x5 = Mat::ones(10,5, CV_8U); - mat20x5 = Mat::ones(20,5, CV_8U); - - mat5x0 = Mat::ones(5,0, CV_8U); - mat5x10 = Mat::ones(5,10, CV_8U); - mat5x20 = Mat::ones(5,20, CV_8U); -} - -int Core_ConcatenationTest::prepare_test_case( int test_case_idx ) -{ - cvtest::BaseTest::prepare_test_case( test_case_idx ); - return 1; -} - -void Core_ConcatenationTest::run_func() -{ - if (horizontal) - { - cv::hconcat((firstEmpty ? mat5x0 : mat5x10), - (secondEmpty ? mat5x0 : mat5x10), - result); - } else { - cv::vconcat((firstEmpty ? mat0x5 : mat10x5), - (secondEmpty ? mat0x5 : mat10x5), - result); - } -} - -int Core_ConcatenationTest::validate_test_results( int ) -{ - Mat expected; - - if (firstEmpty && secondEmpty) - expected = (horizontal ? mat5x0 : mat0x5); - else if ((firstEmpty && !secondEmpty) || (!firstEmpty && secondEmpty)) - expected = (horizontal ? mat5x10 : mat10x5); - else - expected = (horizontal ? mat5x20 : mat20x5); - - if (areEqual(expected, result)) - { - return cvtest::TS::OK; - } else - { - ts->printf( cvtest::TS::LOG, "Concatenation failed"); - ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH ); - } - - return cvtest::TS::OK; -} - -bool Core_ConcatenationTest::areEqual(const Mat &m1, const Mat &m2) -{ - return m1.size() == m2.size() - && m1.type() == m2.type() - && countNonZero(m1 != m2) == 0; + cv::hconcat(mat5x0, mat5x0, result); + EXPECT_MAT_N_DIFF(result, mat5x0, 0); + cv::hconcat(mat5x0, mat5x10, result); + EXPECT_MAT_N_DIFF(result, mat5x10, 0); + cv::hconcat(mat5x10, mat5x0, result); + EXPECT_MAT_N_DIFF(result, mat5x10, 0); + cv::hconcat(mat5x10, mat5x10, result); + EXPECT_MAT_N_DIFF(result, mat5x20, 0); + + cv::vconcat(mat0x5, mat0x5, result); + EXPECT_MAT_N_DIFF(result, mat0x5, 0); + cv::vconcat(mat0x5, mat10x5, result); + EXPECT_MAT_N_DIFF(result, mat10x5, 0); + cv::vconcat(mat10x5, mat0x5, result); + EXPECT_MAT_N_DIFF(result, mat10x5, 0); + cv::vconcat(mat10x5, mat10x5, result); + EXPECT_MAT_N_DIFF(result, mat20x5, 0); } -TEST(Core_Concatenation, hconcat_empty_nonempty) { Core_ConcatenationTest test(true, true, false); test.safe_run(); } -TEST(Core_Concatenation, hconcat_nonempty_empty) { Core_ConcatenationTest test(true, false, true); test.safe_run(); } -TEST(Core_Concatenation, hconcat_empty_empty) { Core_ConcatenationTest test(true, true, true); test.safe_run(); } - -TEST(Core_Concatenation, vconcat_empty_nonempty) { Core_ConcatenationTest test(false, true, false); test.safe_run(); } -TEST(Core_Concatenation, vconcat_nonempty_empty) { Core_ConcatenationTest test(false, false, true); test.safe_run(); } -TEST(Core_Concatenation, vconcat_empty_empty) { Core_ConcatenationTest test(false, true, true); test.safe_run(); } - }} // namespace diff --git a/modules/core/test/test_rand.cpp b/modules/core/test/test_rand.cpp index 6504649842..49062ff7c7 100644 --- a/modules/core/test/test_rand.cpp +++ b/modules/core/test/test_rand.cpp @@ -173,7 +173,6 @@ void Core_RandTest::run( int ) dsz = slice+1 < maxSlice ? (int)(cvtest::randInt(rng) % (SZ - sz) + 1) : SZ - sz; Mat aslice = arr[k].colRange(sz, sz + dsz); tested_rng.fill(aslice, dist_type, A, B); - //printf("%d - %d\n", sz, sz + dsz); } } From a8e9a3a88df83ae118b54e975056c687ac3d30d2 Mon Sep 17 00:00:00 2001 From: Karpushin Vladislav Date: Tue, 24 Jul 2018 16:54:17 +0700 Subject: [PATCH 09/25] doc: add new tutorial "Out of focus deblur filter" In this tutorial you will learn: - what is a degradation image model - what is a PSF of an out-of-focus image - how to restore a blurred image - what is the Wiener filter --- doc/opencv.bib | 14 ++ .../images/original.jpg | Bin 0 -> 14447 bytes .../out_of_focus_deblur_filter/images/psf.png | Bin 0 -> 630 bytes .../images/recovered.jpg | Bin 0 -> 42725 bytes .../out_of_focus_deblur_filter.markdown | 112 +++++++++++++ .../imgproc/table_of_content_imgproc.markdown | 10 ++ .../out_of_focus_deblur_filter.cpp | 149 ++++++++++++++++++ 7 files changed, 285 insertions(+) create mode 100755 doc/tutorials/imgproc/out_of_focus_deblur_filter/images/original.jpg create mode 100755 doc/tutorials/imgproc/out_of_focus_deblur_filter/images/psf.png create mode 100755 doc/tutorials/imgproc/out_of_focus_deblur_filter/images/recovered.jpg create mode 100755 doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown create mode 100755 samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp diff --git a/doc/opencv.bib b/doc/opencv.bib index edb7033e8d..7c8303f7f4 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -1016,3 +1016,17 @@ year = {2017}, organization = {IEEE} } + +@ARTICLE{gonzalez, + title={Digital Image Fundamentals, Digital Imaging Processing}, + author={Gonzalez, Rafael C and others}, + year={1987}, + publisher={Addison Wesley Publishing Company} +} + +@ARTICLE{gruzman, + title={Цифровая обработка изображений в информационных системах}, + author={Грузман, И.С. and Киричук, В.С. and Косых, В.П. and Перетягин, Г.И. and Спектор, А.А.}, + year={2000}, + publisher={Изд-во НГТУ Новосибирск} +} diff --git a/doc/tutorials/imgproc/out_of_focus_deblur_filter/images/original.jpg b/doc/tutorials/imgproc/out_of_focus_deblur_filter/images/original.jpg new file mode 100755 index 0000000000000000000000000000000000000000..ecd23c89f947e994f5e3a84d34e0ffee3802508e GIT binary patch literal 14447 zcmbVy1z1$y)9~G;J0+K9>F!kN?hch&x|WhqP`Z(Bl$MZ`6qSahO9Vtk0RdeV5fP=n zyYLJD-}A=vymxo+oja%I%$Ye8XU~?-)&O#CbuD!O0tEmF_yNw|0k718-JAhHON$S{ z2LSL#3Bke?o&OjhIOk7q@SX85I0%G!{;C7^1BimXQGw5J@Fxbs)ZjB6{82&w?Z+(+ z`@Ek@5RbVu!5?Ofzn@AfswM~otC?qjo0ok6f>l%m5EBuVkrR=Y6O(2Y6_=Bcl@pf& zuz-6RPyicjC?+NbKp$LmdIzKe>o0f>)9YW?*?VG3Eftj;hQPdIoyA zf$$*+Q+pxo{Q&@*7=sT$IJ$zc2nZAVnHsBrumUIdlp4u8M_h+vRT08sJq4e@hx zaSdSQapYwcm6erY)kNI%L<9r~8rnO0*!wxLs`z;M+Ixopz{Q*~tpG|4Z&^VmONq)# zNePMxf$ab1`X3Absr5etn63-@I!cmLz#n8 z=H~4JF1NRnTY#I7H>;cXf49Q_$7X-ffB`S$8U&~+P66s0LIC+RHGuVb0>Gvs0!wR!4q!CTf@SMM?m-wVKfnLS4VnhNLjB!bSTSf7V^dbgKtIgR2VU5i8xBAS zkb!$0J-`fb06c&oAO=VS^1xL<4bTP*08_vUxDGf1u7DTd2iyd10e68|APKk!WCI1j zeV_uU0UCf7pabXu`ha0z9GC%K0Iz^GU=#QR>;ebC5d;FkgOEU|AoLIx2pqx>5rfD; zE<@BHx)5WC6~qqW0`Z0fLT*8#AW4u6NCBi2QUhs*bV2$cD99{i8S)OY1KET8hGId9 zq0~?&C>K;1Dg#x5>Of7Q*Pt#?Bs3Hn4NZmSLCc`^&~|7abR7Bux(59O{ecBw5n<6_ zv0({f$zZ8q8DLpsIb-=@g<~aRy8_Sn~YnG+lbqXJBz!H`vVUTj~xMC?SeL`FnzM0beth?W4P$uwFkXTSjFi5aY@SPBikgiaqP>0YLVNPLd z;dJ2<;ZqSw5l@i{kyoOWqS~U7qTQl3rB@hxN5=)X) zl6sPHlKqk=Qqod>QVmi&(wx$E(nZoQWvFEgWs+q^WpQMcWg}#JWPiy?$py%@$bFR; zl=qOYmETt2RzN6JD6A{8E7~iTDz05-y=-^+!R56pY**~BlwEm$mE)?@)vBvoO1w($ zO7%)#ltq;Nl-rbls>rF_R_Rs6Qq@pRRGm_zR5Md6P9w)UV7v5tvOfzEqfK3zZEZat`;wqB;*syuZHP2{VuWjCXjEwQ!C2Jzmhp%QmB|g0T9X4)Wz#g%S7tnBfoA>YWahT!HReAp zR4pU9(>rZcx z-LSvWYKLQIVOMK+Y;Ry+X8*%M%c00&*HOhW*YT6nRi`Yc9fTqx1F_|-=$zra?V{+C z>9XT`#Wlwj?WW>Z;P%a3)BV2tPY(l+YL7Ed3(rO`d@nn%E^kV25AQ)AW}jf6SzmtN zSl>0I95NfZ>!;&aDvwVo78DVqeC|$K8*|j(3ZnN)S)TPdH6sg$X#sbZ)~syeCmu3md+{BW>Fx~8TUR-0A_tqZK%skg14d8GQNvq7+- zw2`JUsR?KbZ2H)2*Sy%G)6(B6)7tPD{LpnpF!|KBW zBT6IBM-@kVP;#iQF`2QBajEh43CW4JNy*8!DXFRUY3b?C8QGccS%ukWb64j2=2hp1 z7qk|}U+BM>T{K%{*mlp3`p(Qpn~$5Hd_NtbV?R@TF8adr z;a}eJ*4&V1TEg7a0u-^5kc$tf?&W9isRtE7(gBSSo zqE4_zGP)6e%cE&!s4WR{x58Ncg$6ew5Wz5fU#(V=`r-bSwC&4RN{=IyzMbHW?6a`U zoL+fwcpA0-n+#I`f^mNr-1@i9IY{+l*@^Oh40O@*2FuJJ13mkf7#T&P zs2Sf-QZnVWeHe4Ha5aStwYZ~(a{0Gi7rqR!ED`6K42eX!fN$f9a_(bt_F84tv+ijs zYx|eUz#EJ?Dr)9X5fASH2BFlqEu%3*d%j@3KM1%Lq4Hl42{!nnj_*ZV z3<}fl`4~bMW8mSHVn{y6|4BYLNar6FZJDEY_WeG79Hd|GzjEN>7rErWOjUvu&44qg zF@>5fqut)kzQku#Z&kB=fk4L59S$VlvLA-oH<@b@xeE4>L_=HM`Xz|1OM>_ zON2zfCH;*X0$X9eLL%#=qF_(R^AcMS=Wv1O1e=4E90D^w$^4i(rvIL!gFwHL%PNoJ zeUl%J)*Um(FBnA6g^#pVUE}EPEL7)8g!81i9>mUmCMjQG)c0`d5Z<1uTPQCorfBm* z6@9su`ou)Nrv}=}o3||S1iq4iGJAc0r?$(k(*Ri;??qDjSPIh?^O#v-RB`p9`SZ~Z zA_c*+qzk;--w=qiQE@HOLdvv#I|B+YTbRJ!4AzcunWgj4ccya9$nD>6{^Ay4dec1R zeF`Nn+gdO)7FMqv!}j+!q?ba3zRWPvQqs6U%kekw*YSQ~al>F`|K{r` zY2vlvQLTpD-S~cYMYhv0iyQN;2K0ehL6tMRbi}w=kUPD0BL4saAmT3gPV@Hz3YH4s z;@s(N0&&pZ0(g1S_2(a^v!6!nS=C#-S541KPo5Q(y!*1^WcsGoJEj+fgR?ZTbacJ; z$D+kH@rUEvG{ug+ilFen4rZ7aIFyKyQ9}ksBD5p=ZUv?Rk3TV!*k32X#Y95j#R`T{ z_`p?G@Qw{8gUA!S*g|A}jGub`QZyMZO_e(3)E?lqbm7I;vkTmb@P9oQta01zbDHXzhT9^FUdK$aj&CFw?v+nueZ1kTYDqnCg*WM|KLoegR1t6bpG7iQBTUH zovkW#1C3terWG!37d`Ug$lu|79_u%cq8rOe-Xy*tDNNJ{I8~a^ z!tu9;v}>nTzTNCJ{^VP9ZbY~mGC#$T0Qh* zUYd(y-dxh!+SjnT1q!``MY1+?xMl5Pw=z03=Q_CJyo2^HZ7lk3Zx$h*lm(5m{D!~e zirxvCZgpRqHHmT^Hk0Bu=|yUGnniiq(@!E`?enz9vVv9ohsf8~GG2+Yp6V3fN>`oC ze%U_en5lom6<;9=GG4qz)YwB~X)LKD730Nb zMO=QF;+tr9w}Y7i$s7zvy~B>gg9(B4k+pChPG0yLQj6rVPO@eDU{0CvAcsuZcN(3Q z1i$!>L7Vpq8~JpWezv}Ed~N+K{RbEE%nN^NJSpEzQ*R9}V4oSjV<$r0e8Mftup)>` zkr>I#j!zO*>SoIt>1Ov&yjpe7+p2dpLzSOKkNW*gQ3k1I96OH0@P`bxmAo!WGscn>l8cYK2)m4bThqn;w8&=Be$Aoero#q zq1#vPGFE1}7m62c6tY>d&u3m;z5B(rl0HDHvWt4&b*Nvf)sokao8E3xUF8+qOL~`< zipp5!20hmGd+R2sx+r$ZHm>W#LejAsM0h&fF`05sx_midh+U_L#`BVa0f=4~1hgS_ zGMM~OCC2FzTR+@bx7tFr_TFJ8Eg>nVaU^^(QZ#;*iYi*Io&6R0Fn)SWoZtw~7S0Gf zPC@TMz|I)}YKbuZew{R^CDan&Zj<#chi9S5xCR=j)WN=#UY6*ZVBVTx<|gHauYrSP zlTjlTy)pYa^;XKm3XS!QxkTKu(VPX&_v1DMVcMw^Bs@_oQ2|PyHB-I`-|`jwM)J+U zoBcIC`8Vd_dX){OvDm5t@petag6mN8AnP0lNB*GEOK%45FOz;(b9P8Zc$hFeAN{2j z)GWVi-0HU-CspM}-$Nn$c+}sXpxwKN>YVpPQ;ezdq``$(cgjIQD%(xdZY{%TKh}^F zVx+8LO<)!4!k=7xs%^obZbk67G9#D#2Uqa)uCypl zl0}DRpW7RZ0Ezc=WDCWNC2neVPP6e-a_5s}mchLjZ3RX*ohe|a;9?8yTF^9n#reu+ z^ULn(3H_5M)Jkj8J}N^xh(T5w11L}#L(u&?S;bEZ?7px@+;8JwzZu4%_8n3aWK7BQd zDl*(kVtpGmzvLT7=G+vgs$a3xNnEXNWwx{H!O&zAtGlTUH*B$B(Dkm23^rG<)vU@9 z5t86G^>x}w)pIn~l;*j&2}|)pz^O@wG8TMQl5GUx)Pl2~)gAO~W9)A!YKBrk!ztmy zPwB9!9%LIsu$lXpN)V;W6SI3@RW)AB!g}((YoZDR=uhy%PeijEWFD4Pl#LLxzril@ zGdE@!VC)z3w_<3K;S zT1rJaqq$ChZ+j!B;zOF%=t>H8Bi#3>oQ349@*)NS3Eh8XN+hA4w4kT_)l9I)qZ?6w zZBWDx9$em!myoRQHIb3J9n^4Z@fQ5r^(b35uXqQJRNOg-K@|#q0)oy+q*Awlu~Bc9 zLmb8o(rk=8<7Hi6j~<1q9jFzVvi5ZHzJp(=8-i)Y z1+BkFnO#nU1ea&9yhib5VA|BQu zD#=BWoPk>zi69U~YOZuj!f={0ca@_P*VBb+>}-umd7+h?47I;G6279mpCxz72Mb-Y z<2Dba#qZQbm)BEWqf1|?+fFda%?QSdYFQ^1AT;Abfol}gAC-A3AWQP$Gz3*06X~e?w-e!Zm~L&Q&NmLA-8`%Z0v9UtZ5R z-}w0z8^0fOEYq#IsTIeECzl1X89A++Q;kE|v}%Gpy7H!sQM}`*kQ{=r z7TrqD#OO@{LSzoQbg?4I-A1>4RG-qRyO6JY)vDaMk0QE&swS!?*Oo!faSinS$??+8JN!nj31! z%)!=-Y*$+z8N*d%`B0N6<&CzRK%L!%-=?gWQ0j7VZM0p%;cWY$z4fN?;0H z>D&ux#ZWhIg+2p7@+L3H1HCa^U{S4^%pDIWQYJ@_+8j57zklPFe5~c z8SNsriUwH8?_A|#6Qie;MgMDye6Y$|m(GAI{ff1>ILf#lR!B&Q0Cd1;)0Wr9jZF2n z>bui2_*+VXQ=9O zMy8)&M^uS%h7&)hsned%oB5KktmIc2%F&fiTHEW_n#l35LLvBou4;=_QL=!{mkVnw zlJJ6vv)AeE(6Um0Ej@McI};&Wyb`oACDP1NeZN>>#=?7+34&`H4NeEY~!fq z|0dSL$wP~?T#Gqe2dKn3o7vxfbU5f(@{0eh+d0l-lDIcf*@0aRE9f73$IviyzSI{h z0FEy!q;Lef^`IT~v6{E~6mkDpu{**vpm&x!@hWBkh>17vB&kU?Id8Opk&3oF9=0o~?52ISGuwvjKOM?R4Pz=ML%gxwQ9l{V0|!>G*X946K&_(9k8J zP@x0=PGE;6o~g;fY2tzFt;6vql2WfwgXB^)iT+4uL)8?q#zN{^maWg&u=e=X}Q|m#+U<|081h zlzEe-R?Y$36;&X2fL>gd0s5h~nR43MsyE56?T1IMd5M?t^63%xt66x?R8X@IS@^CyEP@ z^)WsXw9k&FVM^a)%hWignQ!1Ux7PlPh#>bX6$9@Z_JqA#EhYpni)05Cm3+vaxR1 zy`nkFX0fb$yj-MOM4=2BR0YZ#${VUr47<$nv&S!M$8<4e_KR6GwpuQy>rYn5&nj#V zPb7lA({kR%)GL^!`fAIT@KDieqaZ{7Fsj?YZK_%zV!Lx`*SK&@a&^~{SE1#mXnm{H zn|002a(PnuR3gxjr4G=HBBntHf3@A+lRlzL-3zSx|qFtU9H%2PnY3#jHzXu=5v~uN;#vI9(I2BAE1C%X~Xuf>YVFUP9tXRMoT z;iX3v4wW3P9g%`Qiu6zWei01o#iFPNUrAI^s<%cVWe=?-8^|D@jFw{WJ!PyO1wQ8Y z@Ocmud@cIp>R`=);J~-O%yI!_9dXUeESFvwegmgs5;n4aL%-q)Lt-B(5xWO||3K<` zdnuDD89b(G4&znE`Yl{Ad8L)5nbj?;RD^)z9?hOtm@e+l*DCOvQ(A# zh?h1=LT39s7O^&W`Fd#S>pO~Y9Ss;|am_KTQracq2ocVs#KA%7Us=RndwgQVv%uwx7q0AFWoh9(>7sb%tv zU=I_)+b+$_$DaY@6B=2Bm|?Qrg~atzO|s*Mg*qrsBBGpp1c_q}w zj!r_#E^x-a6TpK#!LyASTsmAfd{kw+c974P_u4>~nja|?_<>8i2#*;riXS6JXF&CR zNP;Hh2(SVNyuA zDNvvz!&Non;LXJ+w!K)f=Q@vc7V4tnK|!NitZUVqqI&D25XxlO%jQ}s23#Lm60&y2 z@#kY*KW$G(%c$4%zHlSylhCfjs-b+4UbQgO&Y94UvQ!RyV3bJRzLwp=F{q?2pr=M8 zO0%8hhtjJjuV{@d5pm?L=(CPSt3Dy`a2m!4(D!N3PZ-AiQEXO{8g+2K1{m=f1{LA* zZ1ksb?&wNm7JR<+diz_-2I5LOL(WK5pDQH;Mw)NON?iL04+g~-dw+I zNQt#n!|)`x07vo4Ponh4>c6BCH*PbUz7p^DqW8{J?GQ*MwK@ZqXWtCq6n}V)mK99$ z+opEPgxA<7QEt!iR;gi}fHPqJ@2N;$Ohx)*Y9}}qj66YY=eV;8rMM=g!?}deI`l%c zPP60RP|1%5`152FZZf=3h-vLh6Bl~I=-le#7FB4%P`JwY)}g_x*f1m^rY8SXi_t7& zE0el1$Cj$MKFM_}i*tjb)&Pa9snD&<+}4jh&L#9EZMUxLl?js3Ag&qV>6^`6>okkC zUC(mt%T2FUiI&kx4$7aR$6v@&0sZoH%k5OjqU_Wrl}AoC6k?I^J}P(=(Vrrv_est} zk%(A`fg-p zp#?E(3%$7M1~|I=dUlkw@sxU<6`mzuhM+_@{9R7n&@YxHrxd| zrDy?G8ge@T^lN}7C^^2e0~K377~1|LAm^qvHts<21mr3xARtpkAot!!Q828;JtYcI z8snjUdAFuMVG`TRplo%Kz`V@G8!9bOz)z6f>GULK0?bzPPL~U5EQ>c+-iv93kNI%= zNwV#UT}ykAT(J>oRutrVZtE`W)PE50_ZCML3hJ1O$`SKH;SUX4Jqk@n(CD#55pgZ* zcG3N8_PNKal_t3MxUwk4C3ur&G}=17VNsWLZ$z#z6+cCfQJvWMQM&;VDfFE=z4x;i zh9=57emc0(ay6~$?$06;<7ZJZx1p6Bnj<*)*r0<8+`*X3Xy}EPbK&Qnn=}WEpF1;i zI`X{g#OUQk@)Q?v0j+sO14S(u1RSPoxkKnIan&sCIgn~~JK5`sz*Q6~Ao3J!1 z>V~62AD-kAWj(JdbAV?|Z1^wv*@pZ)ce@5xzzr0nXzOX^iP7>oMZ^H+PzDGgelSM- zVD;I6-x0|twDbsCmg4}Hene_{l2mZT;X~Sz{|MT^h^6^f-bvD?2HL<31UpYMejtyi z7KNJ?9gg6ij88Ma=PnCh&)vUYa(}-u=WVQTCflw2+esTbX#HQe^Ox(ET9(IMe$OVI zB;P-xID(%6P0vsf=!lKUGvIM+a-rF2^rPLW7(^OcKPU_v9pP~6*~I7ev6r!akIdS2 z2*h3M3Z`A0Q%%NDpEQ149Ryvv(Je55`TiA?;SvD@n1ZPjvM*u3Z@gei2elxQ4h)XK zx%Gc)hrJbFD(P4jwtY76VQYXamrc<$X3q&sb??CsbldbooDD##6CxhyoB?W)FTvC} z-KppEX6Jj|CP$LmBgYQDtvitoBgX_54ziz9YUs?rWbH1$B=n4!SJX3ita7O;Ln%m2 zqGxq{a{Na$Bwf(cItGjn?ph{3<}DNOG(e&#~% zd@Ooil{v4&Y=z?Etllina`Q{A-H%aZKpD<9mg>ik^vl-I^NL-DIke2iWC_1#mk>CV zb1S{gcx<`;1Ll@&HGh4f95_iMDNL+Uk0O>0`Fkp~Ob zvI0gXa&8&O?RJ!3|G2Er!g__jcXkO;o%Y6e4iArL){p~jq1$Zo98~3cx14rwB8QQ? z{x%g~c}AS-oL^j@ACPC0gwOnh{jMs~*RD6rf7;#o`F2s;YJJmw-3j^9Mbx_fSJc|i zwJXOjzkj3a>rRAe5XLJLy#xA-B6Q3yznR1&j4kChvdX4-;XPmGhde)_>aD!;wcqm6 zw{MIP+JrcEsgoiab>Weo6wk_UbC?*zNtuUN2ueA8d?DeQX(<=5YVMjrhd%7vrVL7}?w;l|tO+O|u%8(u{P?FX$R z9;azsQu|A8oWIB(%#H7AsE0}1ZkWir5(bAzYr*T~HD}m8-j|FQ}jY z#Y$q{^`}JOEt*V)6Z4p{+r=v+{6-pPZ@*ij1{{KP@W0INyl@K$iXg5)1cuz*e2YgD zvXc0-2Q=ix)q_3EC!(*gZExjf@;|6ZlRK2rO4A94#|FN>B(q8PLfap`9jL)f>ZYWA zjY%>*zUtrD>94@?kBOh#?BR1cw+nf`edtnHSuXCgyscp%SVd!$ALOMbCfO=_U3bwh z(1_{jb8ge?M#8A<7hj?jiq-VRZo}F&_?Pyxo|(;l-KiTS2|BRZp__C;O(O(@lCusp zw{y73aE%^ur8cxzrlu-7g{NE7{$fs*lL@@^!FeUHrG5H}Rk%yEPiY;0Kdrn8^U8Lq`?f->#~#S2(91cE#RS#1Srv9UKRaDK2oIKO%18o7>irS zi^AjPWs)#Eajum?3|sQ^rl7p~`sy0_x$_+ccLUw*S6pgLo0-Q}d!llbG!&a3S2Q8r z`3Cd0LsI7sg1j!x#Pi@##?_Q0yTDrID@RNeDvG1PvI7Cx<2=RGZ+34jXA1z#DtA@SPCa0a+fmO28RvN=HViJ#fB44?B}V%1vn;New0#cot*c0zLOxn zQu_j7i9+$^3K?j}B#ws@zk(q$JcHTE#7IYv;{N%g+>xR+dijm^14PtS;ZH*i92R5nN;nmHFJrwdbIl zxrxtCutnZh~uLId=)al6D@(Np3BLbuE8n#OXG zF4;AfO`qpZIYT+XjjYO#U0%8C=wqx`{?l{dz`J|<0EepMck|78bpSl%Kg zs9DMOh56kVlD_-v*>Q9iKfhOP}OJ(fQMrWP+NXdR^1tW+_Mx)-D95UN3))0^3c$yg@2*nXfIjP zumKrPMr7V;v*cLu?4{p$@a1x~hl4cw9iWd?T<{RIC%X&0I;dClAMmN4@Q!|OOQZCZ zkp8jp#biwOYwm^_<(@~UP4tFJI%RrHhOCzh$;4V$Q4V-oK>UUnJvaVcu~@R03E9+U zuRXgL$MZ`i>sIL)4E&aFk{WIvTsGX4`O)r_!jzZ3?RUgA9zlcjIiC869yHu^Eq6=l zj=rC+e25CNf0)d5KWe;%ujc9V*_ZwX?V${})fYs~hY-wU(|^<`*t zdgfSYyI}YW3de z(Nl$>P>GM8MM#1Cw>oWwJC7525ljk2x3Rj7cPBRa8E6XqN^BG@!DvZB(g)#JB)7c9 zeL2-wucSy|P}jS)scf2SHF5*K#BE4F*9tfwS*I%?bxE1ZYy}uvTLLPae|2Z|Uby>Gen_TMH{KqmdPPc9+34vQ?N?Dv< zR=K})s?nNj6n-P)@k`DW5a##!I%*aXt|xnvbbqP%bnxqe%Jb!-F9-h1Zj)Ydf*G(}s09pz$Hx6nz(3p9lxhz;$`f~mPr5G%x0XqBpV0D6XKH>F z=}Fr-VnH1soU5O+T`zym^_+W|#$Bu@PyJX2HP@yXf>(23FMqV>cD=e0u4n)*59;RD zZ<)7uk1^xtO`E*UWaO%*%FHmEuAY5y{n0wTbZYsP`5Wmk)6dtn=-_ScMz>G@p2wbI zy8KBZ?az(p=To_r=CF03^L$zC$#T{s(c)OK^sM|m(3@Nrnk=`sJy#1B9=|x@CX9lR z-mo7>%ijL$?LhbQXsI)OwRE%m)AaFW_b)wZg-cy`R|wH~(oLJ}=2TC3P^jT+4E;Y3 zTpJE7MEt`@9pK)|Qfto~5I(B>@)8z-8@Jw!W=F^4g80FY~ x*rWO$d-ndi9mgXdy621@7m~6k%hI3a5980W1vR#8gQ_V`%r@SSt@UjA{{hERhiL!+ literal 0 HcmV?d00001 diff --git a/doc/tutorials/imgproc/out_of_focus_deblur_filter/images/psf.png b/doc/tutorials/imgproc/out_of_focus_deblur_filter/images/psf.png new file mode 100755 index 0000000000000000000000000000000000000000..3835124db8501778e3ac6f8081db07966e6ff70f GIT binary patch literal 630 zcmeAS@N?(olHy`uVBq!ia0vp^zd)D+NHDBzXMM=Pz+~&`;uuoF`1GLRq#JAkM;spf z?~hx)YEr4Y+F^#flT;%IC;GH>^7|9!efB5BpD-VfPw{)!_g^)6Q`w*9oRil+GDl8Y z`Pkh^#rwE^ihEmM{K=j-GW;jHz{DAu{u3Q*WcW|Efr%cO{*x^TB1cC5L=#TJM`rzr z2Ar~B34~E#7Q&)4AbY_s024g2`zJ|8TGc698mdKI;Vst0Ls7legFUf literal 0 HcmV?d00001 diff --git a/doc/tutorials/imgproc/out_of_focus_deblur_filter/images/recovered.jpg b/doc/tutorials/imgproc/out_of_focus_deblur_filter/images/recovered.jpg new file mode 100755 index 0000000000000000000000000000000000000000..2794d427d750448b6bd947504907688e9bda9c92 GIT binary patch literal 42725 zcmbT71yo(Jx}f(F+}+`nLveR^m!b!0i%W6$0>z=Y7cUfdX^~QjyE_zUp&X<@acO}9 zL+^ca@10p|=B@R@%FefwKYz0SB>N;g;eP3U9l%#rR8a&#U;qF;`~mmhffWT`I~xE{ zQDFtp0pP(Cf*^mB{^2P>DE|nZAMUCCi@hJ1`M+cy^Z+~$+K3*)pa;+Uz{C$>@`EP= z|67k;6!af@N*?0>I^_rdw~YTC%E-yqj-GZ-=APCt9&Uh_n@3obTS$~w2*$%F zD!?tu%MU<+q!cg!eaI-n%Ljl9{;4$hp%#$;;`P5;|I6KPU>mB)$(d^DXez3xD*&K} zG;C#cJ7*U#J^(m5d%Ei=$-)ecp2E;305E_7paMJqU~cK*s-df>_g9IEf*j1_LFj*L z{AYN%@Q=C!vmDA=FxdZ@_TSzSSh~7*wQ{oh%Y6?V?d@g#z!3EZj`X&( z_IcpB2WD{eaeZA|k>tWosWtYfn!OEptl;b9XD4oQspIxw9Vt{PUUr z<^l-++7|X;WI-MgK|u~)&Ij}V%l&^7|0VVRhrez6Pl+?_f3z8hbnM@8|IYjGIG1+- zAbjy)o1}loS!4k~O9TLrFaJA^p#T8z!U3Rp@;{!3^lyK$_4ITV<>K=3@!_v39q1eke-+p_kb?zj$bFXDd5TI~Qk|o%8>zh5v`y{-X|m@t^(r0f>t4 z0Af>40DqDgfEY*A7JviH00+Pe2mxY%G@t;e0-AsxU<8-| zmVhnb1h@k}KmZT|L;|ru5|9pL18;$1paQ4`8i5v|9q0xIfKgxymM9m(S$sO*g)JNL68_o2IL*27SaY8gv>(L zAwMBkP*f-p6b9vhib0j32&g604H^WEgJwg^q0P`f=q&U*^cZ@Ff`dYh!iFM>qJmNF}6^#TnI zjRK7gO#)35%^b}WEfOsYts1QpZ3b-%?Ke6WIvqMMx+3~hbZ7KX^bGV$^bYhH^d0mY z41A2o7@`=O7*-g57>O9g7;PAn7+V-Om;{(im=c(JnD&?2SixACSoK&#Sl_X(unDnQvE{IzV|!sIV3%QcVSm9s!@fry`dowrlU>E}|ZzzM!F{QKE6B$)f3{`AJJkD@|)pn?~C~yGutz zCrM{Vmqyn~w+ADE$-o?8nXo?C@gwR-%8$Gr6+W7HbVJWXkDw2wucQA8$Ak;RZQyC} z9{4c>9fJl#07DhS@?(t0!jEkqzj-|P_!lD+qXA0gX zp;)DOtR$fHQmIQBMOjTbS$R=~M#WO4LgiRhNHtKkPYqj5S1ns@L!DLKUA;{Mtf8ur zsQ6owtJEPX8XixQ@mOj0BrtmEN+17J`=aJ7B zjG2smjfYJrO&m==o8p<8o4z+gF*7i$H2Y(&Y5vaq!a~I&*W%bx&hm}rzLm69n$@ng zgmsGbj*Wy(ip`G~5-(C;?Al7%X4w9;le5dVJF!=`e{28CLDQkk;oi}}vCawI$;|1a zGoiDCbDs;Xi?7SHE1PSi>$;n$Te{n^yPA8c2gt+Nqt%o6VKO!5#q1T~_1#<2JIDLC zkG{`)Ujkoe-!VT{zt?^{{)+x3FQG3jU-kq%3J3}K9w-x77z7Hk2grOthw`zy~XZzf+lzxFN7+t|0) z1$G5Xg-V6Z?-<`@6rmP*7i|~o7Y~*QmsFHemd2LemN}QLm+O@GRR~p7R8m(aR)MR$ zs&=cNS5MW**RV! z+p^wj)H?l9^!0ZU`xbO7uyy$Z6`q}-W8`)#pv(jtW zJJ+Y(H_@-wKQy2?&^stI*f}II)IKaa{BcBhq-9iaw0TTmtZ7_eylFykqIptivUN&i zs%=_)x?@Iarh8U?c3@6rZgl?1{PY51VQKOC;`)-+(vL5WUrv_2mv2^rS5a1DzT$t) z_(uKh-5SeU-MZlV=kN00$2N2~R*;s+{Y{U}KU?A3INKRJbUPJ4cz?9-D(+70J=@#b zciq4F8F@f(kbB5{_~A(UXzbYVcXt0J^{F}JI43D7T zm-l@SGX?+*`MXB>mwQ-DL7*t8_y8CTf`CyVU?>U%`XB-VLkIvU;UgXy6e4#_%SJNep2QV~7rx6%!X=b~JM6Y$ zH8my%4RhxMI2PIUew>=ro2Q0eND6w$o^`(`RV$ooL`y;)758? zNu*F0tzmb%LQvTRBqM#0q4j3_m?{8IVTbuE+g|_dF^$s-QcZvL|dlvHs0&2EVuTz-hv&q)IJ! z^pnwHYNif6hNGTiD!`nZguF;~{BnT%vOx}QchbMdvqqo0G^0QN3G=iFLoi#jTv?jn z0%ohJ;`+7p&@v0=;kr3~gAO<>RM?8Nru*)T_($!};&Jg1$R~?tqs=UMO*#6=26j!u zToA8XiFM%q>_MD4-(x~9wp)f|cRcbvAxtw#R;Fhs_ z^11$uvL6jk2C3dbvgg?9Qdj+o)4;8$;(~^m6X$l5xe>!O_cJJEPdx^aT@?k9DoX8l zU$Q6lpWmBj2oJ9|g|oZYP_`>zQ7x!g;K*Z}#TYoy23D~;vA-UnYMY|JT|x_uiANAp?MHBn*OnFH*uS{_G|lXW7Dh#{Fy87LbT$C z_S_QwGv&Ji2U-m(LO}SH9DNxRzHJHWFcQm-!8r#%n- zSSf%|UZp^I$lMy@BNxj$r(YB|R;3SE{72S0n|WrjzKx0e5L*}Z@uHF@SvbC8 zjA^3m$+q|`O}oG?nFsp1Zzei5AZo=nxRa0t4^|ZfOYRVO%2L=BlVzwC>R)iq1aY$i z*UxedrZca!edVnqM3H%fyvrfcZ8p!IA}Kuga9V^ZA9EGt*b$r&^*!VrK&luG}(Zk5!Z0ITIIWNzNZXryL9flF`a?d2B7Q znxc+kuc3+z%y9A_NH=T0t~M{-=6cDP_>kb$0Pz6pNaoVS2(t9jD3{ZiUqfwXlsTmo z;zDjHzU2DQu)U(>&G>%S#1#@fPcx8+l=@p^NQyKjex>A6vh9#qp6yFyTg-A8n+AW* zXW0g;NNrf{Tk=B&jAsh#_^lfKPlywJw_N?$aj^5%>iI#nt%Rs+=WeOz-s|ag11g?3 zRO#)6J(v}UKh{t4gcFs2qEU>XsK>B%*exhL5tcyJW=+ONcIXiwFn+QK@gp|sL20G1 zf0fC#CYo;w-w{HOO8Z!fE)$oUNG&L89SS(NUiQ>ufsgQZub6J)ECC&q!~wj=3N7Q< z@!$q6<=q!p1y(QwHjgv`N2mE3)Q!cW&s7v_&y_bV3EZsN;^;$=LN;n3*=5CC$xQED zt6ASus70Er)E`GhocE5ua1FbGJuHv~aXF9xSkUUd0$#qFVCRl~v^aaK(sgZ4{DHuw z0vVEAq|$0Qd(=X|+umG?DfyMBcqSMGnCaE2F(b)$k)BY*0{yPB;dxy%_iNvdH3}(J z$}Vt+BHc`}ry<@7gr0jNamj2MC@8q&E@uAK^(PJ$7O#r7ZZ7JwjXW}(MOp@~v6`!hAd8fnVm4_U3% zDsXdH+kT`W`ypL&Y(?lhB`r~HiW~!UC+yFyNby+^@#wb%hlRbqh(N%VPke?5*c~2` zd;Wq%?0am*jhW^d+8QVPckfYNK|$rsf=j$qWT!|i{~g9g>u5o|_cKgnDY<2f&=NB3 zjsx^c*hG{SkNScMOA#emDS??Hys1=Zg-Xgt)ZC~^>Lkv~%33mOY2SD4 zA`;&4bkJ@*=g_TaUSV5~wA8RBNr>b5_8S*A?ag>~%v)7cVVa`kE<#0(#p1L07Bg!O zuh}@)8=Quy`CtuQ4DK=VcD(K*$h zb>`$cDCo%05YEEya`2k!YeFZ38S}~@wdn+(qVi@C&iaxb1aY$<%HS*_wZ;}m_;tYd zce|w80DJTIOW_^?tBv?S1nCi8Ot$Nn?|7!G56snraM_bYJoLDhsC5fcEU)aM4Dub&5cdy~k!sos(jEH)2HgWI8A`;=-YM)J@)qqpDvw#{ z;s|@OU)b#&a*q~=nK>)mB=8_eimaKEPf~=ZwUP$zmLA0r8ZO~n_8We`)g&=l40X!6 z6+fFrZR8%K5dH-rwe6a%aOPU$-A}5E8Xcr^J{xSeE-p}#;QgwYV0gI(D;Lr2+Q&uC zoXFyr9mEucB@3HJ-2=LurqwU(lxBZVq?}>i%Pz)HG_s7WGJFLz)pS1Wm9ev z8IUR$IF*vWXPjB|AKok4YNOsr@S}I1GCd-s2{X2<;Qb~!lBJyk9_==03l8rDrj&_I z>pL&UD7l~Y^S3P?i4G3hKc}>1%vLEejNOyc0R_$>WOi@N7wz693p|YkM%p5tajXcm zq>>i(i{Du$@SC*^lVlN1wJrTAB>fGI>CF977%CyyDNq0OtgvOL!&owN6j}Qy?a`^9 z!`N(>ZeJ5{klv{xcN9zQb`Q+8i+tlAnVuyS+Zd#>Z@P{$L}Y4O~~F{N1DN$`8hWkXXl zR1|$n$4%3Nh;_s}I+zx4K^^+73!1Q`zZmB8?ZiiSN#O3voLR|Hmj;V%b`AXAd08Qj zJ$-LaEMOBhBsldaE=ConJ#c_#(_Qf9yq~be%5;TnEUDrAe1Nkr7Jk{EL4j8(coPTR zgFoA>L(+@5b-TocP*WK&(CtQ86*aANnq<*(YnCjTPChrD0kw=VB$}tU&xT^YC)qXm zEvo`s9otE08UM~zKgI4)-m}Ztg?LT71P`wlZ=!Q`9n_95tUhNm4m@p|$=bSbSynoT zH0DY}I`5%jXX|Igyr=uv@B3J}ht~GI-3{YZld%h#t4#kwqdUvc6B){_g!P&>y5o;k z0wZN8x{g8F3bX;j>{$XkMGG)CVuNiYw(h>Jrn{U@DmGQ0tR(rao6!Bzdy?ppIWw}8 zq6)#EPO<5)aF(X}bNST-{TVGo-~h^IE?e(b=LFt_kHxmY6-1Q#?e|#f?V)%@hwH85 znP8rj7Sdn4hCdbW0e9g>e@o25#XDXfpaQ;poVQ}jOzk5AVwPz_?%V?+m<=Xu`FFp& zoFxoG5NTKT%h6lOlcl#XH{aKVvwqs|ru5#77l^bo^?q2Z@YCLu4pgu7&sAw>Q!4jp z9ex?ITo&v;+%BcTz8v}HC7N_DZGpKd#w-Jl^DlZw8=D`JyhEN-7)2Qz_du^kY^!%S zZI*%FEZIk@*vf9kx0;;ALoyb+=Cy2CyC3==$=3Z5UDn%7?fuZW_ta&hipZ&{%KI5H zci)61?0pXGa=`Z6FK56a3`T5_n4HkX4en3)?Au+Rs3FhAJb zX6bH;!NwX}g7Ib@tixXUHmBOW(@ARw^XMtr><*a&&)0yLw7=(SdcE#}tyzdn;N)n4$uFtML_v;@9DO;Ci@& zrmI3W=y!E=mnh-_PCR6gW11j}RN}TTrGwmTGJtGZZ=>X57=Y^^c&-05zNPv;C^x@KgZ@`l5W`7o~+HP zyrn_`q`!latmb~jbseD5kB3af>|c(YcX?GMuGrN^+ymlPPsAWC z%{=h2k`7@8EBb0Ye_`cPl7}(X!bGm;@9l{Mo3@w+m=QN$(@}5QW)1%!zC_E-SDRWL zbXqg{A}D$faJ)J^biLAP43-A43Kz+Sye%e-f{9Un4V-54Khk74?eiSa{ahpMux@zw zk>u&VZL`o!BaP(cMwiJ_4G6KKW}51{|ihbpqlDGDFRq3gEZ16U#bZnMOSE5XdlKSa#~C6O{pboRne0m~1Fjz8;& z<>CS@`)H5AL85OjN4D_&HZj0M1z_~=9lSe+rkbD8iCqqk zqPaXsgyw=@n#Jn~w-Bmmsk{UAU?U3n5>FnqLg^k5pCk8i-na+OvOkt8C5}g-ZUxhy>6bm!V&hxLFI1cwS*GsGMf=>4eHZtda0@n~3onN3) zIz6}4vlM6(r0S>DiyJM^uw+T#9j6q^a4A3Xb6#-4boz-M1Cru*6~pixYq|jQ4UB(3 zU8(K;Xn1y%K3wGX!Rg0G-L9eBn=Na>P&cw#+IPo=>v3(IMPmL>PZtmJl$F3AcLWvW z-B!5bbPS%LJ@?ceVhB+xLy{vxUS0K&rD%2@gin?3o5nFLr~hzhaeO_;bwL4CiJQ}R zVOE`g(-jj}J)lI>63rK;keS=VAA}DS8V5JNr2SY@vNOh6eI{fkuR_3 zBAzZ2xi+k^%Fn~rDY&G;eRa|6`6?{Y#-y8Yc_?o(?!o1r*U1;;il~B3k zW58><_Id@Eet!JiGPBP%W3OUhr)*!<(Q*({wpN;;^Fd$ZQ{dIO?nF`-gOwQ;0=w#> zp*n<|<{oIEcxK;MuDBSV$!#EkL0)9M$(d(Jid#U4*BS8Xu8=&w#7feT4so%=cI!@9 zL+#U=B=dufYhE1|kD=8+F@?6mOnHI%PWcv3I>nEAfwR$$N`qPX787>?EmetQdgO4d zkr|HyJm~6AU8h}@#WYu!QeuV>3)u`yYkuduaYll|S@7r=-EF&1j#gXd2G18fb?I94 zyQMqIx0kfq^{OAyR`Rp5C=JiG(VJIT;E9$wAF3F(6$48&I1>KYV2FRQNK&DHb7pJ$ ztv;(sg{U&Vj=DGXdED}BYw*YO#^DcVbJW`%rG}vy$tv1KE7crdA67sAR0Vbk;Rb|4U$C( z3zPoT)b>_jx3?roxNZD0(vvf{y4cVp@_mej5i&d&IWap@OfVkmK9?z&`ecCVg{Z5M z_l?)cCaP|gt>?kkFu4U=Ih>{-b6`3suUEW$|I=FB`<|k}8B#o=pbrA6KHUxRxOoXx zn*C^z(p@))OuWNob01RQcem!TGNC5UPo9r9x$cQyB^TkDK!!4Jp-O~5HU-ypSh&&l%%O?DkkYE0 z$rpPeqy+)Q;)c0Oq9t(kkhbJ4YH>xOLA*acRv-MBJWg73+>@3NX(Z@5d7~)0Hee_W z(`v*!NFYAskc&%G28dpf_m({Y3R8n|eu?W721?JL-I$3FpkNC5_N#oGpI5+JVVTPi zwb7GR<5>@U&mXEdWoIzt{ypO4X6s?@k#T<1U>J->ZFBI=Bw0En1Ri0toB2xhn+in3 zN(OPNu{HlFUuR0FR6uVh*x}9%`ZSa#GCxG)vl7z|WY^0X-BymP7^MR^x*A2zFlT-R zbGG}H`l+7UCeNyRrkgC)attj+%p+?CV;<>&4jH#YUt3&(x{lNLSVeQoj>*_VQx^s7 z&ik4CDEUq8Z_JX&FVwmor`m^p)!5SdQHmg=d_?uy%(?p@J&&|%K{@WIR>4K1dWe)Q@IGHtc3?9fz|>Egd|qu!$gKRDyooR;C*&kbJ+>^?{5+pL zd58%+n}e?@?a?n(SIUn={L6kM(}loSx|f3(_<(YV56x%cj-{G+vtveMZjrF6@}sll z0@;`9>)^sQn5j&Wq+upK%Ti-mx z?@VVwU-y%4`Lhv@H(cT;Y-zk0XW1ruDzBTuuyHs@?xUEBWum|4L`Aj;ZE)7J9m(u3 zKR=b^9+AgycakUn86;-Stv+M-s|Z{WY22G6wL&T4Ia!envkODDwyu2wtiTOygox6h z9*Yz?Ryh=M3>Eexe#mBs2DP=7WVY|>xl0@(Pj8qyyeOlIF2Ga(_Z{o`7g(^rHSfSk zqxRS7v^AWg*CMM1^;%LC35JU!JGIyCJD>eSs3YJa?k6yTZi-YLj&Gk&KZ;;o^-G{~ zU!(sXHAmV_EJXbY$upK>aLakL&b<4kqTnSM&GCJTM*iY}P-oY}ZwuSp8q*q9q89Wd zq6>TvK+p?{KCc5#3Y`iHIJs+0x5ut<-vAnpLgE_1j)E7qX5GV{ zft_Wdt)-%zw29zSrctLmnwgjtZh>=6r3eJb41Sj4Tj%b2)hEGyVeCZH@q{atF8eWz zuYQ^Re2`bE#TLy8ar6C=f6i~R73N46{#J|@%SDq;ID4W)$m7EmpOc6$+Uzr-qSTcf z&x1S$P|~&3*619Yf016``mLWOwcB>3a2{uo^b$$q7PX+U5gq_2q^sW!xiheDoTCL4 z68MOtS?r`4rWm$ND^Z%4wVt7QVO!7jv8U1?vJbL0=P@3q!Eh*9+juXoD6D23XRoG* z^<{RNh4zYgSt;;Ie~P9i@>Q!Wj=rC}xjlKN12^5sHp%0mpNFf$d1q+>NaCx5*Fw=s z1W*=h1~$yyzyTx1T8SG-h7Q+B*u|8nu-L$8t%&_)e{A0)+d0KXEV70-b*+7gItP`q zDdO4Qt0)&x?{2zi=(6`m+yRM>KHMzFv$|cRcU(pi7q9?#C_2*<6{^$-PZ(%`Uc%$2 z!f38pjG1s9Ta)E7C6b*3to0UYB{gM?m9@?&GJH(DNKni(MYmjC?SWnH({fVAhs%L zkAIjoy&fy<6xXz7K?6I;!s%SGkZ5$Ma}Nv_MwHAxRjdDT*=A9?W|a@ zQ$#x0smPM5gKRQ8Ze*H=4sm5s7XuTw84LWCE7!Nn+0J9gBJN;U)JRn!C$v3VJ<|!Z zH-3t`^UrN)l-XE<2F`MX&oOJmPcn0_vrpe#H{vSoV4e;hyixSk&ijmmBf4XtG5dBu z`}-!B=7g;(EO9~@nHV-8TrlqC=}xOG@p02q0IOlPRJJVSEVBCPja#xFyJyo2N>*Ni z=MaOl%!}HQWW5|{iW6f4F0xws&=AAh8D3spVa1#s#rKI^r9hO{o%oJ1=~LBeXv80z zg&4sTgbl-$8)0X_PVpH388j`!_b2Jb_##EiG+ortL2DyW=GG>nSfowp7e}3rTTF#q zf!0w95%DqQT$l%z_1`oZ+dN`80xGQDkF%zl{Mm?1wy+c5aKw_cN(`9& zekzZ>SDd5~?7v_WD$U=+QS)pv)34DbZm!|zc=3$5#kj>L(kgW%aGM_%-OCK=Ffo48 zdi0{P$cbgwn5QY0YR|g$_h~=kf`(5-(llp_Z5HaGHaQ_!IGGZQjoVcrXprQ5s=}r` z)9$LWO(u&$gBCDdVYlIRx2EFs(%s4O;G?#AiD$<}~5()b94$ zK*>sB@$P!g@uoB1dO7h1ezgV@F2kUL-1m2HD`KQAZF~4g(Uu29ZA%fKN1jcb*|%x! zaJ^Mym>LL`E|fO&KCjJEx)t?KFXm%BVRm&0#k(A7$`;WtvG@yXC^3qk*B2Zn#WabQOZkRXqLy-E7_jPX+s4_zdKzK$8_4O^bu_CI=k` z5@m>n%l+_)c%i9Zn$M*IOlK5R(l+}!zi_p(#WJQo!J=b$I=YQT^J&49F&hIaH>^)T z2`}4WRo2xgdKOFqELD5ZrsP2_N3E&TuiY6~_e|&$ZYrv-(ku>SDpyg^P{IsY0~_-B z(W|y1Iy6c4h7eR9#78YZmDHu>2Exlr-3{-7k|=fA6CN-6FyLYF&*#kbArS%kJ(itO zU(1!TBXXA4AJvq*t+C4%IAA%4`SsVxT2k2>33va6^>A<8ft}Q?*UgRzCBfpkNS!(- z&up29r+CKx3Ok))tu3`r&G@8qOI#ODi_fcO#dV`~b1CI_Pf(yGVZ+oW1lEakjuOnM zsLN-Pb*i(K#ny(3T{)vq;)PI`O5;NUPlK{ZV+K0%yWCz-cn)DUL1PT6`@PRo$kGzl0Mh#YF0aVI2+c6Ku6<=pKKJ20&qM zSA9GwJ;4Hh=4YuJ3v9&ngvbfyt#t}*^U>j@g6l*{?5S&j```>&#JiV)^=xI8c|w>E zhfdH?#!2>*bFx=iui(w>slDi613hF#FZI-?jz!%4Imj%vYD~XOU8njxvlI=o%o`&W zww5(c8mA>%i7Lo7u*rPmJ!C{p!n9NJohpz)Y6z0R>%2t0BrrK{Er%eomuJXZ2W!&c zsk;y1hf3G#og=Bno+3Dx}R=kZV z3iJAU%a z|C&H1WuUAA+%HCp_vCr}*Y~RX<_5IZZ{kQej6XNvHj>{uY&-g(OCJQ{qAg^3wYfGE z>KwPiTdAq>_rkBWA1Pk!ZnBqM#cUhQ59QNHj*kR{XFYs(Io!|QnCJZ|M+TcW?ST8n zwn-j{alOG$^}1}UR=MEq6;C}tvl$pP_br zpWEa?=DTcP;_!HUHq`hifkB}h%j642HkM|?)u#zUxE_M<0_B$jl`1fl<#X{YZ}+ih zCzkb3w{#dT9$f)+`;n+wP5B>nvSiHA1eNZAG2i)YmJle(cIQ1%jXiP-=aS5>bM}DH zsbMl4jZyZ#*B!((5XoLUo&TI*l~dSWd=F%BGxEAI8ipl#v=r_<5%rq*K-L(4``(I9 z_0LP|6GPO)cB2Z}m>O5WCgP7!;X`+~^{&m}-;x1RNeww2_w)JcR&(bUpV z!JB1~+Ax*{d)y4(?-Y^8MThQLUX2vV-osSz2>L+=WNpL5IS(;U6O{y?_;OLXZBG@7 z5wY>Hi%{d+xxRk6DfVZZo@4os;A{g%nef9^{I>)Aq8U+|W#4>0Ju>6U?z>ei$ySgD zClR&IXSY*UJoa6t7hILq=sJrq@#LFDl~;hTZ@;g=D75=Qf+H^%z#oPCqwv+-M zAM4^!?{8CMvoLSB?_Q~5^ErB?BwB{AHTZ+M>?vCLwz0UG3_%QdPPGlBlpiLbNWM3n zkUMj%h8u+!naDL{^yqX6UIkX7KOO$;#Q7t*RdCVyAbxI`l6(*=65#N0$s9tmbJ z>Ly3P#X!83l{2wa3Ict%dn{$JGOGzYCg!Jh!^l^sIh%~X-G}2c>F~N1C*9Z&(E|@G(hno(zaxU<>MpeEa^?Bqgz)8 zPeCf(vKQ{sR5Cju25G)lgx#g5S(_M?*FLVc7k5{!y9*R0R>pnltXy?8yq}37nxzpm zD%@}Q19QmElclwb8fkgi^9LISQWZ*_C|*xRFV3))i=f7%eu1kxY~qE;L8basJ{=+t z(i8>9Laz|evqc{JIgG3~h0emi#zd!tSUQv_#^$LMXc8>nq&b#dDY7D&=KVYa6}{V~ z?tz6H>iykox4{XAs3cH_`?+`v$t*jKW02cTgM`CD%FnPRpMfCFRd?4G0E;7Z*N!!?fpRI z&z8^Tq2QPVPpa#F*GgGz`w~Vaq1<~9sLvd^TQd2GryAi!+0NTpB0p0h(T@*AEWR1j zz1)qCv;QMr7U_W$ttZ;~e~Q@w(?ZYW~d} z2*{kiLfq79Y@l#gHzFd~CAQihhSNpx{2thOI8~p|e$nB20GmlZLQK$CMm0IWE0bqW zUh^jpte6-#ragbWW%e#7;6VuO@wyMOhEr)NvEPTevbyWkI(eWk%Pw|aDE z{;gxF{&%?h>TA6*Y1)fa!|sV}ZjxR$VH@fS>aFCx4c{d`Xgyjt(+VNR`ENh-3R4LoKw1Z^FCy%c6z=x@lgVpWjv9`dSW%*qNTi0}FI zFz_IK4~-Fm(^$nJL5oPRH}-z3Ek5nE`pF^r;^np`MphH0tq+gePF}MJk|RgIVFcS@ zw@lmzUVb&Qz34Qn#GtKzD|n3lP3 z1b3P81ySiO*r+7*6Vk>~}7bFQG? z1Et*AB0}}af?UwVI39BA*wNSN!J8p>zXOJ5^ik%xsj+2h;_>NtY$+XmoA4)$!<^rt zQAnSlB>2SWkU5}$KUoh~Bq*-5J2SSDTc!EMQZ%f^RqqoBaUK@ft2qYRd3V9oLj^Z$UiERBs} zxdtuK8tXY4qJZYg*ojKuJ3M|f|wd6U&oHV z8BT!3E<*-?aCHKy0*Nt%Xu^3RG~3@ z)#`^orlD|QHO%`npYmuwe8AST1C?cV-z-Z;`x&NlXI;$QZ>yPMu1K}Abw2u z-X9-Q`(c@W7RpPX4RBf8Zq|=~FwQDbHZL1xeP(Ldzp==}BGempD;hSrRJwCh4E2LY zP+Auf#5vKhl`>DOH>lQB7}sEETdsajRkX~IZKQk7MnSU3p)7Cz;|X$`!)*zdx9Cy1 z%6g=?OI*LeK~V4L-N!IJzq)1*0pQT(&M3y2?JQkrMslfYkwSE9I#J&{Kuh-F|D`Wf3mC0pUqBD02I0egjtJWaKK&OsuB zZlqWc>%1|3ue7@$!5uA-u2IBH2~FXSRMYx|PCj0WZPJ9g1hT?qBUwK!N|7Dw zk3a#pj-HQH5S9=;-?Eg!hdb1la?N%S6va({8?9*^a-WmUKGC_TE7@;-;$KXxGrQ^@L#MEdh9Yk^ zc2VY6Po|>N<)~o4t_BJ0e(=#BDj;_VB{=gi?l3_CZZpU2w(t8}><2hhL)Mxl*x&Xh zL)aj%++J_82b1aH7|eEvX|sorjuV_JYI8~N0}CNc)|Wv%(b}Xl>ra>9YlIZ(-4;M2 z&Vv4O=J)w}J7Nw(^o_a|LZ>HrMXJmznTEPc4__}zWL36^S8M9O98CrJjn&&feycvo z(yX?LDNZ@cv>fRno1QkkJITZ&Qi?ceuV~-axCqdma4tWua~Mm-)m&q7DZqv_6YshI zxa;tE|GjgwrT4`Pwc+Z;vQ4~zskgeW_91{}%YsDz$&F=`+l!`G>QSXU#9Zs*E&GAbtt!jz&#WUTj^p+vXqSM?JlQ2$Kh%ae}^$J|+kh*Fp<1_f^AW5%I|L6hIF zYoarVi6Qz=NRugLYmEI(nXpq8Qy6S&>gw$KBfauEqvS8hC-x~LZ6L8Xxs#>yewCp* zq1_z)9w!Ok9+h=|Z%g?ga**s*zPH^RAu#zIgKxMRflJ9`+|#g~EewvrR?+b_bsT*- zfCkqpKD5o|=nDCEvvpKnYJr8ieyD90c&t#3Y9v%a`%DXdRoAT2G+q5T^nH?Dx-T4U z|8=?r!BE$f1D!?!E~o~ljA>Yq%8pF&G-E+*k|=u3-O!4$j>A}+WUIGZOMjC3axi-M z`g4K=Z^`^QoKvC6?4Z|Uv5-RXBd4!mQ+&@=w`8q`WUX+tPTa_6$FnrfBCT(h+;OZE zdLnx@ua@gMKWQ^WN11zbf=nXY+_LlfL}?Bm_8hRXX>iRJlRFV2W-5__P`q4Y8zk*J z8KzR%n%Tl%qbVvh0(ID7R#v>G5+6s9^P|+EbdudkB=+lIQ7ip77X%+tgTo!|e=&t6 zeLNNLsIH!2hW2S{SPLm|r7U(R$bH`_JljO0derUf~-lSSO<%mU0G(o{~*Znll!R$jU|(VJE4DP^n~ zBF|L8Jwkl=rJWhad$76p_E|;OTbjiQMHQp`Zf)%2^asr;9WuLd8yUKDf>SO?-U;xS z!nwnQO;I{pr^xu+>G(Y`$fNFZ>cVP1fN~v4yn8i@ya)JB@?K#H5e1$#mC)7(zJD%J zx5f|J6I!x9H|jRc;4CRtPg0+R+D%LpFf$d> zk!OEq_A`v&FGm3_5}4t!7|7|D`0mQRoj{$PHvOLBL+I$X*kN1}epk3GZ_ynuWG2aL zkF=SW-Jl=AXd_!8QcqD}%xiyFhzR@Rk+Dap=xY@(A1NT2|Hdy)rV?Q)$V~uoW=M!4 zq0d&Z<^hjY%AsyVnlir8D+sQXG4p?zpa(KgmX8IQ$<*@_;!UFA)~cYEJwctfAO~xr zC1jx{Xfys83ATi%#w4kd%=Am3b-Z%qD9GEkF><(-P@o>Kof)Q5(g9{tCg%HNkOFo^ zDK;rD#N^8*B)_C0O5Z4y>R5flD&L(I6pA`z-G9X1-jrB*LZ-ad^Exvx!BD`Prg7-Q zPcQ2)nTF>F5|URg{C%_-mDP2+#e_qJ#jes#@KuY^f>3Mddv^#bS`O;z#}5e9N^Xij z!}1X>*%A(6^l#Ql^o<8>u%n=h=qc7wW@b!PJvx61 zt7BNQVF@c??~5vcG00`>n(u^P!N<);#4}3EA z9x$@>*-AS!-pN&=g}e7{AkC;>nyL*XFL1omJRN54!rfsDS46oA=>g|N@1JBS7pQ$` z3Vt}7^TIcUezHq<54z|KjIGti5!il@U8Mgw_!0#Vfo6&Ka_nbkufUMAWS@-)$_nlN zCz0j1m7nA9uEytXm5z)OoWH11Jx5$OytzF+zn)wXqb0k0c02g|=dS2^`xo@hz~7^T z3l24&qg0i*RWxn+rGuRVPnb>BIVPP89***$(bsVOF972}9KX280NQVXSjyX2?onXa ztQ0t^plkQVFs&?Q6@cgtL8gc@x}t_rU;h9wjWkOc^eXXRd}F7NX4X}q{GefuJ*syN z$G!K&iZBAl*Kfc3VAW~XQo9>t2ELpeFsv&XK+5D2UA?lwL|qNF`(Y}08;3km!Q}l^ z)(P%u>jn)j(~gQWEaOn?Szs=-)@*dPQLgH z)mI|x@sP4GAdyYfdyK9)qKJOL?4_wV2 zrQp;$xiT-F0Atu3x*cbmGoV)f6uwy?anF1bJc&Fo?dCwrdjdx|vFkkj5q44mj<|03 zU+swwrg>tDM2K9Xq+6rbulB}=P+`{U5l{jL9N*h0J}0PpZ!J8*xTpr~uTl1_(W{_&5RitGH_gwV(r;DcFKo>z}R(S}fF+NGQAU*9xOAv&lQNj8}E zC0Ku`ZM@89lk{^c$ae_Yl&P*k%QbR4Jkuo1rBWvf4_g#QY1cv|@bkQq7>fIh!LK85 zMJ!mfXQ|_kbEPq1Hm7wJS<#lebjry#2?PAstivH9K01io`{z4QIW~EroZU|>PyX!1 zFJIp(ZWLf&s|xkY*X#UeU&8)|&eoE?GiID}J~H*?&2|GVKT`gG+Zlx(eU1}h1$Xzu zL%+5`b#?QRLEv%55Ng5XT@Khl1-U$P`(o}frux1MRpvph1xpCX0xvLG`n~@EIMZti zhq(7_jbD@2@JXzZJk9Y@O}$&7J@TlglS@9Vk#UDxCAtHauDV%w6hp8UKG#{NO(Q;+ z&zV#jEGyeFJo7e~ld*TeT?jd)fP=%gMgd@e{mo@^>g9>{I;m$W0S2n^TTv;M6zwER z0Y4SSELzQ3E7NO8(TW`_5)EdmJv1!V*2j6PBb?i+o<(S5LAT7YyW28JtDYT9YFxLe zLmK0pVK$`d6p1VuwpDh`8i@+(BaPq7LJ0(LN%qX=Qa@S|Wo?8NKYHU4CT8-;kb~ra zZ<;slg(fW=N@OSw@LI{U+coPpBuD{kD@21`yW%Xn{3zb#K3E?)o<>v*v01fNJd^jv znIo1qD;pR?SRDRv0L$hoM(q)BN{?#BC?8gBTp?s=_KubBl=%`glaUJrWp7VnZyaq^ z-mtdDbs;wWBy~A{o**@n%+Zj*a=`FLCZ-_LO;D3^D|G5T@CfCGSy>>pNl6Lr2f-eX_Fp0|o2b3BeA1UMxao|W4Z^tynzwS6&B=e{?KqF}4r|pzB>EPJ8f~goyAm@$ z=_YF|=yy7~TK2|BWr4K?ShAS}iU(3V<4LDy)yFwymbP{F!#0KxvVwli~M{qUXu9{$G(9-c6c06h=)zz-}=GL6InZjJT82Gf5eV0A4X z!g*vXUpbU$j@ZK!{#TdN0nm~q20}wd5FfTYnlm7fZ*Sr%kF{loOr}X0nW@T_Q^nR8 z^pWj#w*kuFesD4%3a-F& z(D%p{7+(hZ9=LYwpbZa!izJH@7LkI1-xvYL)BEJQ9q4z$RbQK+I^}Um6K*KiM5_;+ zVxorxuR<@T29hcY&0zQYV_FTMS3Hass)eYHyyXX59CJq-E$=%+1IAuiOA7#o^xwWJ z$_j2`7mdUdc&{QUb_aIU54QdCxu#e&o@UkbkwLwWY+Y8Rbn~L7)47xtJ%Pg>v0G`7 zs<)hD0G{>JAN4Sz##z;O2LqnFaer42lWQW=MjZ|1Qft;Zotc$(ZLkkp$v=t31QJWQ zFi8dN_BiRRjiyggJ6y)Z4k+}zWFk=FLG+eF4|+UdYBH;Kku9s-VV1TM0VN1=SQPJ4 zYoCGA`rsEC3IQN=9>)gO8=G?w{{T)|*eZ%6pK8jGJr-eSvA&}koV|f;1=jBPr5eK$ zES^V7!0zfw?F4r>hR)G`^>_1?9nHn5yBB!02LSPX^E)o)KR@xAbzHC>+~+%1cBwtj zIn2}!D0^ln;ZNTv9OO@|VINV;$1BaQO=9GT@F}MKyagsXQUyKW6u?OSxkx4QEU3vQvf=AnO2Ch$D*uSe!DJ0$% zToNxNnyA()4vfWY3+<70k%R_EP{W`W3LU{4)K7$(({U@Q)1v>x&HNWszM^@Z7DQ z=PXBpbzlc7IaAjvX`zMyM&}?|J?k6wa1eJPphfiJ@_wcia#aq{c9tCh#X8vhwvfa? zRtmo)<4-+kUTBwV0zkaTC7K;XvJ?e&25x`0YBRIy9b-#X0@xmyC)7zCk_AO85mLnb z;wGuw9u|cAIli9xo-->5OpUnt_4w(GlT50SLaGktziPa-B-C%6i!Y+a+UxnlGkM+- zBW|(>wR4rc+Jp=iUCSDy5&-GnG=k6J?I4rZzz|Pt!t=~XM3kpqUyC>WaF&sE42t1H z%*tD(dU(s@a@$gAn|L5C8sEXnH0TyMhnVCI+pPEJt{4jh(;qRtX!Z&3+Z)0JZRgV+ zrAXS%+ba;jQW)4UBd;fqY!HG5kVwt~%E|~G`(}+)2_%pTET%b8fWCkFb2JtjJB^q= zmXF*V8l61CDHF*qtg2A;s_Ty=QkGr%c6LyV8v9--fy}#}B2a3_r=c8m%^IlHbvhLb zY9y3b-A+{wjo~qnfv-#DmA?0eNE{@5KJ|uZS^VM=K_M#5-5hTz;?z$f?M=>> zy@~Hxu<^Nt5YA0xqT6f>2uQ2|u~H3#?}gojmMDS!@okj>9ELZOcwk2)FZ93^gSng4 z*yJ);3EUhDcFYeaj9tVs**MlcR(k&cOjATC=DXM56tEU}=yIcE^sam5GIb-M12N3J z*&{jHx<6spIl8_SjKu!{PvwPeaGZHijmZj z(=a>?#vsu#K^vh^d*y}_zo;fIuqZ?J6mSS65d|^P@Jz4%w7wr`YO91=fXkHbr`PzXpC`fTdVmP#wv|aRdoA|K2o3M1Sp}$Oob5iMYc9YWHBHr z!~(;iu9-F^un0X|-_9fHM3L=d#ZV)t#Dhd4EY91QfTD#~TvONd@<+FPFBo;;bJqad z3iLJC3aA3dKF8w%8*QX>RreUBiEe}`G<)MnEx9}%Yp+bIZ8}J(&M#ryhWq7;x9K2K z^sI=eYKWoO;MwJ|C5BJ+CI^ zumPdpApt=nsXcH91%j~Wu)q~U*%#jHrth6Lfp5!B{|{{S!#jOzGb z*3>(D=W|WV16?z0eL)#`9r+x0?Ur;4+x@YCJ^j1k9R=a8pKiS{o}Eo~!DfeCV4#K_ z3COevC$D^gc_7t&Ll2-hYQT2Y3*#8=GC!w7i~Hhux`lXQUA z?znwyK7FeXLsj#PB;D4y$-?vB*@Sv6Hy3X#+I$!&bt ztSw|v{`hO5ta|9$NMkNvgaQ38T>CP#rqrZ9H?$;Q>KA ztNrk81g_o(bBe&Ile zZl|VUap}CI^LZ1O7qmY?PA*ZX6CC>@CJ zk$#YDUK+a>+n#Xp4V;3HPegVKqQlp zuU>yAAqW^I;k*4e&16>%D+}avUc(5g71Z6mPOh5IwRctDg@s)qqgKB)WjCBK?iQ|Su-R11{6{F8M zU&b5KpeWtge%XG9#2->U%AKWMDfM}{#|qS7{mJv zi_aCvIBKM^-zQ4rC6ivDuwt zaB8~{zw?#`l08_F?{tuW7ueTKZx0L|w-#d2jo0tb81pGQ!p^KwPzBr28^wVpFDRDT zQo#AXnR@`S>4-;QtUL7k=bhC9mZUPbqndU9`#!UvPWLJ1tfTVr7 zyxZZLX(QA-IGAvI+6cZ*O~Fr0JP>eg z1doh_*gq!^76Galrj<(`_{D|Q3m5=Uyq>sD!PB6?B)|ORZ%c9gGBf%?syEXH<42Bz zk&2JK1BSId!N{z_RzuRa1BKWC5Bg8^!?a&(JQ0Z#`guO3S)um6xkQ(``Ks%f^#g&N-B7h`=ITi4n49T8e51MhWKXPN z{$rX{TQA!>ei#0C{lYk3! z$ZLX6PEIP%8%XM|w!c;O!Wk`nLa*N=OAKL|ikdv1oT&JW15PCbZ2tg!xAifpRk68; z1HK^FN44Cws`nV*tBe5Kmb(7<%_~^{0Bl(`XpE@eQvpD8i~6N5;F{Ujjxxwx$eXCI zHP0B!d#nBOHJ@x7PXvHI_{WjP1H~HYhP@{6Z3pKkChH&q!1lmTB#b0p&})vl7;IN! z)tJh&X<$IdbdP8I=ADDcA)0{({{VdFJM4i>64If zOB`|;8*0x)sp5*~MQTX6HcI~hwsO4L3=u~b^(uzz{{WUrH4;9ez-w5USRe0|hG~N= zMamLBuFaiTw}|5p{$lLA%L=C8-3w62sHHk~7%DyuRs8Xgs{J*Cfk(bhg!CAMF(C2( z02teao4~|yc)s{Ifx!OwL#+c>#vr|BqiclfU08JYZfJV-BY;URejmbv!Tp;m>UD z>92R^H!o5BvqA6AW0p`LccOo`I@bf^0{tN4kT2f-@BnzeaNh)E1EJ>n3roHnG; z;N$=S*QQHQO=Khk_Ulf%S4e0+9hBd3nKl;7`vj5E=TiiuVaV! z2T)UbmUY?y>+_l8i%q74tZslSSL9{yA{kM4fb1++d?iaE=8s&SS)kX~0>V`a-bF)T zl7D;~MVmeH5CH>=$Q%w2Oq3g*_y`~Q?T2dZ=nb3{J9B_C)mL7)09L-2*YlHT3_4;S zPvfutav!7<^ZoH?n&-DC^Oaq6Jx)^~^%0av!~&o=MQY}LG=!AHm6Zz@0vQ;>T4|o(Bit?eXywHeY#-+*MWw;K$DRDKW^g< ze)ZoW9V_1;TL+vfpmSI#8n1ji03_iQC}67xf$xEeE3W5NW7wQKkx4ti>U#CS%(SGk zklifx%FeO{b__~|vAFakb;CBJPZ2^*klRHLhYL>-EH`qzpW5(9=@fg^7Ol1i&L1*A zHaNn4LjM5x;aMUm&rPr%r@jd#iVcO=Zq44wMc8wZ>QmxGY5 z?s>Y%NLmy)=L5dQcdml}0Bp$YPL3t-Ba83e^6gn;qLEceW(I)k*ExE#tJ~;CVQpo4 zUle%D*(dX!H3gbP2F;2)d8`0x9tV`hS7RXn{0?h0BusT}DU8&JhvXjlX0uFXf&9Oy zNw|Fb=0oB0{;%~3LHDt)!un=XlftsiDu&%B0lE`ebQ)>DR{=ax-e~;v&ZDseH);a! z=LNRP@7Ma^4CQwmD|KJ(fo%N!@>vc+v-iTvq<}yt?~3Aq=cXN&Hyiun`DbnW9@s+F zi{sw{pdbU(ckh6&sE)rl0-fvAueJtiUcIie0(j@AxaSjbzNe6X+be#(57pNxmal=B z=GrUYIl8V?)^jI3eC8Lie|)I*#b2mn{{ZS7(Ojl<{5byr7-xS@4gB71PSyVajL&0M>ozoo&X-*FeoK|ZkXL$@sN%$A?j%H@sLmg`sD6w`*q1p$2~BMqCw{9@q^Sz z7yDyv`N3QOE1tg@nn>UEtssO*tbs-LtQvhZk+i#%5CI52558Pg#4zndB}l)Iwr7(^ zB#kU~Hil+XUCu|+O3@qIBrSjq}qD@$-Zc zSAYv_n{MzDLjltXUZnp3{mA=ScAh~La;EZpwqsWn_UV>+hyalN3Of5@tnmeHth^C- z_PmWOW>q_YYU1%*P@*DBsRe)^oPu5Yp%?^hO~Cdb@r&j>T4s@eA0FoxEoeI5AabDr zyq~VqH<$^(Ecfr0iDP1tq4Y58k(e7$4+k0L18J@Z#q)oJNF3ouzD@Em70*t*^v3#} znz!Nr?74;xJm zaRDEi>6IQxul0Ln67}i-0NTu&k!!)uZli^7e%Z{Q+-4~VAlJT9KB~oEsAIR*b3o_I z`)5nTd6ahNJp>6N~q@02K@oljnV*#@s}ktUOy;MX8yF0vBpo~_OOGQ%|wpx#3>owS2{6Uv$ukkM_`P1KvmZAnc+ zMo<&LVeNhMQ9VEgu-6!&Nax!ak<+F?2V9lykbd=w0R$d-#ywBC9@rSz0>6BPKnzcA za|^fqitXz&AL?^dujdFT6O(lA4u#lFCg7!c@f5~WK~x5Z0EKtFc_zutO3bE?R|3BR`Q{(wx^yr z7`{`TV@TAhx5zx7z9T^+X+%i;2iyVw08QUG-=w`#%ES@?-T3y+gr#%+@)-~wqVNd? zb{^RTv7SL0ElEDdE0#Ez0 zcR$ksC$1agAOfzk5q&ZaI`M$;K>*?3Vu;AKcUceE;=bk-MT5Z^1WU9n0k@~d4s0;~ zUOlj^RII0~eB{`y`;S%Py|Oq!HXr9Ve)xAHjo!lm^4UN&>Ni+Q07+3*+Ylk3P5$`` zdhYshfuh_t`{5~PgU7Zi!%BB|=NL!L-)vHZ16OCD!lz*&vTqFCFVvY$18R`3jCX z{NV~YyU41b08dU%RFi`wsLQpJ`i!wDm0P&}*!*Ag;|0;KdiBB@$4|yir_{WG*C7Fi z11FNFk8kOR%1*g9#t&=B<(^XCPt77^ITG*%gdV-$)pwvv%p*AXW-_rfiCY zwUfxmV^hH5&q~N)qv$;M#aPNOEN<@$J+)E5ySDVfx=SaDz)XjMd~uPQLfivg0K>Jm zn>?HfT{_)&!^2(BW00zV1>e6p2GU!l4hAWYal5|Q3}t#A{js(HYZamaY>|Dk6$gtX znmDQ3t&e;F`pxnEuy5!GUTor)K42tv^?vzOhPf6+^~$8Cv^Qgsm}Vd#Z(~O}`hWp( z*yk=|*!9dZ3T8P|a>Ey?%MT25L}SvK(eVEO^Vjd5h99gE#%%|IUpb)Ct?^ZsUlr5+ zv6r#wkx!)XFN`2^I*vMI_pbOyUYHI404^S!o_(+zW$TQKP0u)?TVMmR#wy#}*@kCW zdAsABB9D9oDd%-ztMB;21rPk^FCY&Gg`5BmsSKLowsOwjGvioP-XaoP_Q8$#y^EAT`DfipKAc zjD;_OczRJdI^P^&Ay$dWye;v7Q&#TSS=B`hQ2U1>uzG^}Y?F{i`>&@Q;(r()esact zR@y&&oGNqFoStW^7s8A&uG_rUc0D}q!5$5K7<=%^~)1s}!;!B7>t z?%T!%C7neOSP}NFm+(iGJ9C8fB!2llfi;5ceDuS6c&ou~dUiP1 zM!#%%bQDj%OMlKC z-kyZt6_7JscEe+?0Oas+o`0Jr+UELUEVb7co}A)Qer>e)FYkN3jX2X;gAY9{8^$X&H85K-JzXWo@@@0eQRk@0DtG zo=dnutQIZclh>SasY0cT06-Q!tYYf5hfE{fKDGlFP2vkArYQ|Km{o6#wkeWb!A+3F zV!Vha^2s)-XX6$15F`%P+%Qx3!kU;J)C(THy!+M-H1NoFEm(G6z6YK-+iMEAiTsY( zVb#Sm%CrEFZ(p3s({9#0jW}&M%`xjsYh^`Ieboo)<^zWB#Gv8f{jgqMAU%)FU7cs(a?Ac68i^oAm`$pK8plGdsph6^?H*Ha^vf zB+^_Zq#zhT-F--E8E9Sl4>4GV4|m*(-f}g%< zBhq_ta=<>D=bUum@_fDe2IKmwOw0q60Nr-?yb;?!BZQ816HjVcQ71a{cL-vDFb%W z-LEX3aSeJ)7Brv^rxEo^fNxTyscOerEzlGo6}$GumT-w7DZ=#m%!9-Z$J~H876)GV z&8ul*R8qT52`i49_mINi^XlRdaE-w_e!RtEOAw zl5h#Tu6t&Q<|0V|kO68}_vaci?>xtOq+$a0#qkY6fd{CAM2hc~F0XvkvO6(U1wi>3 zc7T>rciq?EXELtZl&=mZW#%tNPR~GI8K+ZnE0h)CLN~ll*$l%sv z{WOy50TJzAILUX=Rz98vgpsvCEW|f+hgAfIN1i%cQbF&}Y|j^mYFar1I@7woIq>=R zg%T|px0)RWTcXg(1D(>3loNnps(_%7`cm=x|Np z+ulRwA5`K>_BlU8x;5P+UgN#L)Nd~vW`yqqt0*<>MrMyhjU$rIR#sU*Bc?lPA{vP# zP)!|H<8L?! z$1FJa11x18Y$%igCB=3*PwB!y=ytIRN{^oSX48c_bpRhNcMBa(Ll8)sGz01`D}a76 zUKOxK6;VnLY|h}UOLlM5{^v4CcLx>EZ01Z(>+E^XW~ZxHW0WPnH!8YA!9Byq_kp{3q;1tafSr(0x2 zLvYc!H0< zTCm8vrglXt>%k|GYoW_GGE52K(giN4@D#~mTl}VyAq+&@dp|WeJ%gkk_4jO_UQMH?(&-=n8EKSKwY(rxt~PdM+Q)3Bn0kO$y&!n$|_ zMm15h+qMSM#@jsu)6kyS=@^f3_vGYZ9E1V^85S6$MBs*TDn`ql+zBy`)?A|!0>Ar|Z?pO0)lSm&`LlYr8jQAG1s zfrv5&t{1jR9lB7fu5w8;M;PMGAL)q<@8tQ802_B+P`bu#OtB(X(4;XvZ!aarq+L^R z2eo@*YMNEkvAw-&{{W^go_B3J%P9)QH*NRHCbl#InTFF`eBMTyHE9dFq9lM__WuCs z#%)9qLS!-xh}%!cHI*Gk)*v|C2G(LYCiuumWswYo6;MgWk)!=$pkfVx);KmYGbWl9 zOFvs6p_M~AsVD(v@{1GOj6tj$=aegL8y!u5De7`fs3%(#YM1;`Um%Oat<_1UidG0( zL2nrL9rA_LFl{E3CgS6BGaiJD0(rkvBxP*^U4!27n~0=R1;$j4esd(gnGJt2p`6L6 zsUFrkXEJ*-5PA`qX5`<_RO8k6#TjSweltS-3-`_XFY{E_7oLtUt2VD8Z>K-oG!ja; zw<9gS0p}uetA5?S_)zh6T`<`sagz-NBEPNx2^{yrcsve(WU9Wt{N%eMg&}x6VG5ya zZNB_q+7Hrf{{X%ci_LXB{&3PLzIZp3>61!9Xoans?gzbPuJJ;ppM_uPjATv-TiwnQ z%iMg7Uo}>TOXvybB$DkQ5!BGbOvDm!5}}ka0gplNixi$@dy}ZYP85`sQvQPHfsB11 zmS8KsXI)brL#cHMK&ki3jLI0M@wf{h`(F? zc3W#vWaA;;N(%&XLGP3_&br48t+fEE?|4^OvZ9rZn|UI>*`-huV}o^^#yW{w2^b3! zFxz#S&ZG#D)oqA2xbmMa9n>uFZ?;n;hTW8nh!nTa+ZpFnYGQ(=3;3*4sAfWUD7kh6 zT``%;NU`rzT(S4Vs?^4@QpvQnjyj$*I+-Tb!V*F*E$8IrPV=N5bIeQ0yMfSFJ4o(y zw$FNn=bn|x%^o7bG;QnJC|@;W+bS&mgw%LL z`D@EWeL0F*!g=hl5KKr1AI+C6@=2?QaxwH8p}^z5TRieA?+NBFm$ZTYuYAR)mTC2h zuM3EpCG_;F>wg(!@V~FiN{%wDs(C$mtQw6jqGLGnj>%>ubK8o^s?%yGO;W1Zl#qG* zbi*uKi<4rh^9sFPeK2;(k1#erk#)ZRE#{{U8iS!1VA8HnmgKkb+5BVAN) zwYC(oW7{(>jf-jBn5slBtEucTPZJpl0fP_}k9+sYB6)H7cVvL1{El-hpXH%9-|%~9 zGI2r2`hg=klbaxp*@jQ8{`pgK!xWEQWsTHkfISQM&H6vBe);I&0KYvmYMvDR^G8)Z zy5*JAxj=AJVoSU z6HrCpP)8Z7TPmS}tp5O-F0(4?51Ro6ug+y_{{T*3uH3qvJcUarVhwiVGU?rxSXiSE zTZSMXr?xUXnDq4<77g53BJ*CgiyX~-;q0qjA>StdH7=08U~ z{xZEZk3OC{O}haAj`?JNsgg+8#)Q=n$E9WZQ8aA()K!uzDxEb9?m^lgjyvU=Xy+4| zCTsMC;8*7)ut_A2R!yOl4xZSt=-w0?Bp#RD)88(MW?2blB}-r1DSacIyJR2-E;#9U zvG{R7fJ)PNnWkJ>{oDd2MR|7ls$TN z%KrdbjO?CU8aob0*D9hCwIq&E1K5m_PUe9G20}-{9dJje4X5)$g|!SX`r~=T>WHCA z7KrvaR;yN#LawT%o?k5E4Yr7^y?M&Uq9$$DP_AK2n~vC20YmgN`u9A6i9B`sJdiasL3?#!uxUxWGDd-m$&9`((OXyd&zPT;K%QHPhn@a1B>DMGz0R zWt&2iJ2M^}Z6n*8!gR{WftjoVq?6Y8#)n7)ZnJ(~&$t*P(?=LUpo1ACU$N_u=+N3x z+q(J7V}L@SAtSNC+BjspN@6)UW6`3;v@HYFd*ddRHnNa5p4E_PlJ19F9CXVbG;SCV z&P}Mi@mLJ2(;Ipf>L}z0v}&u^WDd7vfp|B>Kp}w#+as8^Q;|pC77+w7ZHxe;7)cfJ zR&XrH$I>sHu1#!F$`ONEUEfb^aj8Z{C<)v+%Lg@xT&X;9-tvB-PXoRMl=0J^ob|*y zt75CSfo9K4jjN0)e=$m^vPZTFbsP`-#)Q0@c;BU^kUjh4*NIa{BgARwQ6GH6vVAntunKuX=XV_S&1htkRj5Q- zMP&iLa@NxXXxLEN)GYQnk-W(u8yPnbxJzjSmQc-qb?Lxhh^*9j&XFy{?kMT<~~y) z2jjkV&z9&)a^?;{+c@(^>hlyr7tKaNTn~TikE@1dI+Gm%xE1YtjT~)6fA?WyiquT3 zPq-a&u@!2yAdSfvr2zRih9K#oUod*hoBW;zCZ@xY*J|=}m#gM5MssK3SY{F^oT9yW z2i1;#p_)ICee-^os`Jg~q=46a+aE>i-tzHu$ERGftgpv!#xn8;Y=}7hyW#G)*A4Z) z_(`hC!35+|2>{r@!t9O@-=-4#94&yywgEEk3SC1Mq3?NA`ld}xQ6mmy+QZ_q_9OsL zPzk)o-XpnSl0FgGsp9W1U1B_H#PHoK<1GxuM?HE`%#dm(^JR&xm`TULtYy`bk~gV> zfOz}kB=6>;lzq=+U&biZZ)7xBvWBkU9DN!A85YEEHP6mlsFFB#t8D>|S?QU{+Z=`} z8)PD`?Doc4Wiu2-7NiERkQbo6DI zsU)jaliM!T(?hqYvABDT%^-wElIhmDE3|RfE@>f2;Z35#{a3=_nc+l~g&eP6e8ngd zgRold{mt~o^2{+rQXx3- zu!|g68E6xELL!vv!NIOC{xHj>dEH#Ft>Kt^k?og8h9zWRwCq@qzE-hnA<&rAuKOWL zJ+efM3KYGm5l(xbe6mk6Sj6)t394<}vrp+C%Ff2R$TzU3JZ%(k*=EfGu0|mMMQNXrvUsFk?<^O3a3UvmI>>T(@2o_qXb0URnzumhyUzI`(pmI6SYyV?TbFV$MnR< z=CUTXO}1fgewgu2@_M!r_rUKV{uonafiV`3x@@Dx@3aoK!M`p{|#yt}v?-r=c7RtR7LY7q?77 zxX3qq8vXLQkd6(u<7X?Ii@r@+tivzUUgpkor{p_lE^Yu{&S7t@&Q#w}7^j@D^ZRA- z@faUDyGiwrzIsUhvF)2|Y*Tz@f`5kJ1M`+g^pDR>FxOf*Q|srp7v-0|VQvL~T%89} zJ+Oc~pWg@Eo;W;q!Wn3EqA1{{+Ck}cj1fVuP4yWF1Z^PE7lhF2DMkVViebg@Z>AlH z7Tf|zqH?$YipwIA83XQ82;k!CH46f(l^aU{Z;HG!Y7qwF%d@${`h-Lb z#05R?D{WGFk)_`8YDYO%r&jUqiA_Gj!K)6BUb7+I;4-K!#eQErm z<~Ksuju} zGN+noG~0s$S(QSbqW$t8*UZo;nG?Y)>f{|K!aEb^#VP*{&J=y{{4ssfIxf~vt&UpjguzG|;xxR3t3MaK-QT@hc)#@aQVib-jk@?Ex zo?q3peK(`*J*-z>oCao9)0bkZ!Gl=nYu_Ts^Q>zugd-ud)1KHPo>3GeJ28?LzDH4l zA2dJ?^6@$K8a|#f?HnimpEN|Zozy<~kwT3eA-DN^c9HDaWZDt`08U8dW_Ciqehw$; zA@deWV;OIv>!|CRH83I3qC|txN2jhtT0nDn)zX^4o7tHtjSz`(X^#7~PAeTXxgAmW>|R1TmH2Z0!ncZjC}{^sK9iw zk+dML2jd*4mJ@1#5Ou*CdO_F$9R6`OC~6V+vTMFsB(AZLh(R9X1QE!?d)<@J;-Po$Pd3?GH)FRo}rb%+KWkDNqDCYXA1$_O~1F*ddC2K|NM*jO7N zk^2l^A=$ZlTv;@mx=csKWc?IsFL}RwE2M}HT%3bNr#DuAIIH3MWjtRUuZ(s`E~tAG zk^H#VrWBSn78moC351Sq2_Jm1^$~(7UMo!k3v?E8exprR0R6E)TNqjh1!FAe`>knk zD#qs2*Tz=t5vX#Y*#jAwSOd@wnO^Dxjw=z^a(UqOyw1k{{WBD!sOy~VPZl$osPCAh z9FvqDYXJK3*wc*BU&9}K+o$dK8R=r`25gV~rRU!?uk+PouWLT|$;boawnRUAAY&U0 zJ@Ou;d*LR)C#SYe-kQGkhPdliDBwZqJoO#D@~44ia_pgvb;2M~DPXtl?MJQ?r%j6< zz++RuBcR7Rc*S~Fs;;smAya9gT&;G(DQ^nkGkIcr*rS%*8WRex2I)6-Uu%Z!TfoGhkK?!d)m~+q7 zhFD;n5U!uTDAMWU`s7e*xiyhnM5CFa{{a0A2z2s0EK;KoE{AMW6GqD5n(vQ6l7e)&+Uqu(fy@<8_On5VFPuQ}STgx+&CJd3=<+=G-3xepJH!szAT z++#Lr{*hHFO^uKXgWLHbyulB4p#AbTm~a#i-wA+i^r*A*g&+ZF;V~`WLHEU7fSt_0zC$Dd ztIUiZ{EQE((<$0!jDB(%pz_O!5D$Cf6ZLv9zNyl_`B?D{1dXt22U_pWL$1+B(nI(M z7Ecw>M502(-?HUt<2nszp;g=NwTk{Zq0-uIVfL)4Kg60jABbMR)N3UF0OB1v03>_C z_c>#DtbR`Rjdu=q-e>;+@)F8wB^{`oc)V1FwnSC@^OcYB5OT#8YoM;#U;hA+N`xSd z8P8xcmj3_~>O9s;KtR8z5>N3qrucMF4IVPBNBE0d#@Ur`*bJ&)@g}t*7b(Ab$e-dp zXO#CbBo4e{>AZJSD|+=0Ui|xIvFh~`NJwc&x(3ZjLEI~K^2_sNVrKE%YIPi8@FNel%KEjk-2Pl?U%3lT;{z$LB@J$ zPeJjUqvj>&zFw#6zIe+QjiaYrWUuMd_sBkdv6xu^`($sYi^dhRj8qlx&J`x_+aVye z-{!Er4xD$aJA$aH85WfdbnC`Ma#-;~I^YLVn#cwDGM&6Kpv6%H;9gUqm2&1_-=UUn7Oy#GcENO<#*C^+?JG2FS{r zU9XYNOntx10|C|Rq$;9yAonukNSgN>W8TOA0Bc>~)31m|N^Eje`mE)t4~R%h7{~*$ zIKTAmb&^2m=N$NUqHq}sg|2zRd^=pgA(2&i9GuB6nc_(aXO&cXnlZrqfq2Is4up5%8m<(s`0g<{iLM9A<9_@X6(4ByJKSXi(>_U#kBA z3~F^chAk@X2an&TW7bA=bGDpmw}8M4@1BcG;JQ@P2570sy7oBV^ypc~@OTFgO^D zN(TOCU^?Zqe}p_q0+B3i#PkM$!T$i`9v~0#WmN#tJFbTr(SPu7nGdH~%^uudbG5(W zEq<9DAbbW6chpKnj!u5O^2R zR&i@NRwYmDy6 zE}BS2&^hP;t0y^<>Q)~cU{|7rvVDei7>RU`Am~Nc;}c0H_(AK&CXtwyUt`WJXKuES z0=VNU)L5NKKg=?{f7BbjW=&xK0JdJ2oY{HPx5je&9Kt`$Z@yID=3qZwG@Vy8H~B+0 z>G+6#`RO#YK(objn_}fu6V0CapnpHu^y4h(R1SmN7z!YJj`;u^K|FDc7on_+$om`w z9mD`}i%=KTWZ--J)<6K{WZTWz9^(t3J@^Hi2x}B zH4qs80Bpyi@U+#WawLohZ&4X!-W$3`O)|8F4Ktpda|5$P@haMl(mmLIdg-l2<+-0kxh9wki4LkXm?AaU70Jo3pH>@eM1j+Rnr50vv?1u1Abvh_jUSsNM6r7| zNBre_jeBUf`Gm9fOXj`vh=atuQ&Z~S<wbE%I zt*3ik1?1G6}SSjAZgX^Rc7Xys`Oun8;!) zDAxE~I+(oGf~4TsJ%90>_&hsbrF%`hukLTYY|(1Ku`>i^G(r34M^^(eW=guThDZ$2 zC~Dj?t#o2NG|joHxZBj6=XgTE#o-njN4sv~vu?A=(4st&g;2ne+~z6dhgYeCRS~~1 z5%LfJ0C~==Ku4ie?nWnvF#SH`?Um@-reQ;_DK)RUuGjg*)wl5y&qI}Kje+l&^*`i} zMc#9!;sOqG&qJ7N_>K3#bkS+kYP&JAJ+CZp5O}rQHzMhjfKMHL zvmcI1S6>v74je5S`m?30)XC&1joLYuj@A{s8FeLMO+#1>!-6^-b(FrAOD$k^#C>8! zZHrg;&UC-3@eNXj^CN@(Ba6+G#}((TT&trN~KLO;C$xEe4pfE7i}x+d7(G@ehVeF;TcM=jqNjkNB>ZUldk?R#KD;f4+6}9zUm= zAZAv$)B?ks%_qcR)RYJqs~YKz<31ps8=>@*=D4I4oR12O>tr+Pw=iyPznkMb+D24U zwJ8_4{@GJ_gkxTjtYnr`&Qq?|Mm1`W4&WExIr8|N2T>jAg$VE5!c*Rj+6p zMuj}L-0r5i!{VAfZmUe8K_p%C+y4OW4*}86siSGPDK5Y%_|Hz9vd7c`U*8fjn(eoj{{G~^B_s^tw)h&urS91s~XEj^_Qk1C}4<{{WwiVcQ~_S~e>f za+Wei)-QrErElqj{{T|O{Na-KfoA=(Ois`&gN)>?H_uDS8(dZn4LKw+B=PUt79GZB zqIko)=L=`;hfv!j??*Y`;$o?zh3o(yzuPc)ZlyG{MzjWX%WL7S=Q4_>WdyeeMLgj@1vuJ3%^SSH%?!+Y)nzk79<&5@kecg6bC|q4R{C8*(drzuY=4I# z>DL{6E#6jfGp(@|`OMnC0-UDUwPVfd0Tp?d^v~iI1U9o+^B9hRf4BF}gZLt90>|f> z3}%UL-SeM6gK8qv%K@hi8QVwa+co$XgQIvLhI`%}jjtH~9sV1R9YaYi)le7loUa-G z01ro|*Q;sw6_EWWn$Gr*!t{}8&?*CKM#oc{m}&G{KQ(|=L3Lc5;fQ7Ny;SqF6-2Fo zbo*yp!!$lbV7qX5NeL*d?hT85kqoD9_4V@BQ>KySv`{!a9pwI+?ScI%*Z9I^dUUrky9VbB& zZ5BvB(<@D4NgTkBq!U$~j~9WGaj4URHz+Sd<1Fz102OD7F(RGGL+&%Dt|9csO@)=0 zk(qoc;EzHSMs1C;Bd@kgHr)(vQYgE!6YE zll}3U#D+fQMfb%}6JfgMzWGxw)-A^*^OVoaBYQ4m)$(0CY#MS3 zy#vR%!2^#yy8dxk)~QHSUucfFlnKeh3HE2)BgZDuSel}I3Zm#jq=v!B=pTP zMW%;GBOprKv{yOLfws5nT^_=)?S7Pn7a9x~Q$Z0zJlQc+nJ8jt{-%b7|t6 zYX&_<**Is?>DmCV$V0eUx;e(wx@qSrrUc^vxBT*)KNBe6QeDs2|E@&5pv8YYWX zrH^dvo>htq*F6gAqS3<|NEw573)e4MpphNp7eeZ&;L_>@k&+RV(KT6`J!5M%FAPe` zsr9$}Vrn2duR%&GeI~kdnPAsT9;oRi8(QcdmVWuW)hue$!hFbuf)8AyMXZ)hWU={O zH(haAt4qB%D2yaZY?H|+F;6zGRMfdURPc*OfDTpQ+MR0|QrBQ$2OZ9Jb%#)!e3!p* zS%bwT)kmd@S36Gn;d5q)|pq6zP#1&=w* z(~TChOXW$!CjgV0G=2j%n^KyEmvlqa7p>=TL?zSiaq8=pcyeux<0HP}J9CmLNY;qz zkOR$k$*V+&N>mS08?M;Qv1&BvZ>R&OCyetxCaa*D)rseDvDABKq44cT>U=yIPzGf_ zlZoLf5(1M@A&%V8+GX#5ZC9G&oJk~VqtX}m%CznLIX7`{?fr6Y zzYwqeyy9xhZj1`+$M2I_=#Oze{{T5esy|At#nxi_;eU+gvA1EIxli#*{N_3R6P&JI zpyIdEF~|8sE`$80&3ay2JuC#;-yCM{z<*59U@haX`D5a-g?VS=3V=rk)CGO;rIJA( zwm>~Zbo^xh0M+Vl@;h=YwltjwQCS+9FRkIAW8(h+u1JNiUicy#S%sgr9iU1W9-|8j z^ZsmS{?riLSrY2o^Jz~lHTkA z*8bVcgT#C=TsvnYxk%^?u{1geLQMLNLc;8NtqJf~bRdA@bH%MKslQ zc)zKPOkhv*I-YtDd@zWhAyGp6o9mGzPzfiHPamA;c z&UCd})r(N5V`J=VzE$Hal4)a7%soM<4s%25Rxb{D*PVz>9P-`|6rL+Pyu1QSx6XQq zYArj8H%G2lquyq^>qL`_to=l~*u-`bPdNS{(?=ft>Z(3_^tCRh&jcCE?%v)br?Qoa!qzxk?xNx|WQc8+C{`Y`}x z(yOMqw@KIeZyQsj4JeUV*fx2~B!OitrO09^1Q!`j0#sy8T!d|Pi*LEytvpe71-r5G5tZl z6fvRK8&Gy?w{!GjYK;VMVlj0m%zzPnFDYaH0B8rte))+8*klIpIfA^?eKVIo{{XPZ z=Q3)!)MaxIrxp4PfBi9+!~RodttTX38R+2pkoL{}moyP^^VYJ!M$S3o3UH^l+YDqI z6c21GS6}aeMM=Xj{ERG4;cRfYB>hJ6Fk(Kd_`+3+5DrD%M%$mh4^ob%uf8_I#I>9e zhGZVTopM4dS5i$LnH(^cTKaHv2aZp-P6Uf;Tm%0AIXn|!)38ECHv_L9eAg!u7lf31 zvPVPs%=(W$W{sp(wLL~&%_}K+SOC23tjFPTzN1klskk3Xfxym`h1D5{Cv6aW;*Ih| zgm9z|J+U>BPSAl1ZIB)t+bd7hzMTW?Sx|!Dp5EgqeO{}?8RF40#Do6;Q1w4-zrxkx za*|UXyN}d=eBt<)hf5nvs?*M;caVD#@N>1{Q5{c$`BD?fHOFh?7f;}=3p2&2c%Qb@ z!5b%P3}5NsS;SKtL|i`GJd?&tift>z zT{jU#SUJw{P-1Dcv#16(BopnMqw*SaPq=-KTPr1i1$@?1uaQoURa9eZ*0|1ZfTF>w zWeT7$y9Vn!QwZfNxqt(`!Qo7zj_x=$nSa9(Y|=AwJkEZ@I#$Ma zRWtyh&PgWne4rcZpgFuU&SN$g`ez%)d`YEMF{v>r>J1yr8ej2i3FCp|v0xS*hI(0* zC({i^nx4gtr>=2yzC4RNKy=gtQD*z+Y7Y`gj?_X8*Et_mPe2Y!A6Tme? zJp@Np`b!6%ybL-yH4VRaAaT>0N2xVeyWa+> zOGz7%e0$Kzsv-2FzsUaM5t`vQLdAkTvoafp=JOeDWK*2!M%e{tGH>%(pM1xt7d4Ek zUf2&@Z{!(#e?iS!ZrSMcijUM=V}b<`S#Bxx(_6 z0QpRL?U_76a(p@IOl10Q{{VbCGWu;YBr*An0JgdZEpIX7YD8AA^?!UwrJ5uRr5gD= ziRqTlKBI<17Q^sOXD7#eLRZrMohaA&rAGjJUu@-Re0Dt$3CZrHcF$V`@oThcs*G8# zgQz~)!Tn+>)WTs4ZNV$fb^I$(!$70T+d}Md{V_z^gp|s=q(gK;f=h~|!RoN6?l zG7L<`WfVc_o3s*YWK{-59-nUcW5DN743dhz1?7G-1Iea+kF;$+oaXpeoKvZhtO4rz z&dj<+@`v#VAba4B26%Xrf%DfnJ|`HORnvKdo96xVt)Yz}3ljeTAjCo&RPAEA4u5!}(U0P^0-pKR;uHTn82R%CGZ49mDssYCBrbP#(eX>J3J20VtY*9E# z?9un*9p#Q$cq1_Q;(?_Vd^hct_%i{ge_0~z^PLy+ozF}710^b;8`#%ex<$Vi9|*~RV3hzmT4Zg@yATX;w=O^%`AF@VA~bVEgp!@fKGvb(oZ-% zU%~XAEvF0_M1zpv=a<)L4}rwU^ks2j$9&^yV|`wtgRpbW=ezKTl3fBtJeMfpufA%O z!=v#KOX$s|}=)-#QS>3b|2MMb%|6>Y-Cgp!$v2 z`R6T`c!b1-93O1wc>M4@8gRpn_0u(Ib=npw3@oP2P|J7e;?y##fCn8jCyPlE>9=hH z$T@fFP)4pRisvM{hL=#on*Fj&RZ=o)!K_(kEfXlk3>F}7@^d<{DA$=}dUmddzF?V! z@to<+$WCOPt$@nkpv62KVf2i(4+k_t-3E4|mX&@pTF>bo*=o)L@Nv^n2RT1c&tAC4 zP%7^r)Y|=AV=kak8-T!?g$Jm>X97#L0~$?0BoVjogz6Vy1dGAvror^#6E4GBYa~*k zuu6l0W(dWBUNMtV8^}Q_Ml7=Jloc!2re^Vp-SoV6IbVd$Q8j=~jyevRyy~3T@{DU}$HsHx)VT3x{u+bH_|A;8NWeLgi0$u=`hd`Iz}U*PvKQ0R5E(}!Dl^899CA3= z&rAufGRZuhE~|WVD@J+?y32e|O{DRyCdMjdkLh8-&poK{$ME>fn}ex7lYEbpmBldC zO}cp**;j3h>gjdba^p)e+_gcve)+m-^cqOjT2!v1o^Zd7_OW25VK)K`du9*uxREtleL^)UTHyE240@N;$crNo zRCO7AGHW7xf@u%fla@}}-E76D-q+K2+dDoHp@%?=EpL&*2K#0y6I%5o28rj}EYv2Y zp+M&B8_Ix^NDMX8n!KJLr)2R6`YQ&`_VrytivzAyrZA_}2dJKY`DCvcmzNyZLzz5o zD5db^rUz^H%o-01E2>F6zyM%+=UQ?90D4B@?oH)BGfk#ZxDA!i^*LfZX{JKW&m3j4 zYGVYDN7LIW8isXJeek}aKw($M<0?9mzzeUoR`Tsr(=(X?yA_zFea+NnDWn#2G?ZHM zwl`S4PBAV8z#T-AYrQxq4>a1Dog>_@5>ehshSC?)AP$n29YPjE$;p z3qK2` zuWTB9SD9*9*zH|$7sL*hVXzwLLCQRBnZ$EPdrniQmOW0GLoW`zhP;=aerbp5e6^fIRS+vo| zNC!^CF==&LbeDSqc+Q5STcv1$;=Qm9WHM>}Ci5hEsJ!9a`(}*~iG`ixy1b?JO{Lcf z0pOgoUny$TN7t?IY$#MX!b}O(BdZH^9c!_^L@Y zCeQ~!24>W1^&VnTATh+%c%+(me7MQ;4}WZL3-MNwnRO09st2}T7QPbOM}EIKOxo@G z;3J{5b*!{%9i;@hyhnCcH0Q6rSuBOq>OS3bF0tnQI6~xta?KpfimkbiwVYw8cbuKv zpPWrx%;RqE0K8_Hv-AFN*|(rXea!Q8~;I-m^#FDm^>VnIA*a?Q;G zGKZ3du^FAt;!aY>)mWzi$=e$E!i`yKNI~n2Wjng#JjS}@^N3p~2%5)V-y+Uxf#(9u zRk=7$px+rh_9w0bJ6}95QRX&VUcbW@mP9Qxo~dd1Q@1SFg4_I>$w( zvy7USX0m{fbB1Zz=miVUVTfgx>dDI)m@Na(-xV5U7uOM`n1f>sT8H6I7Zbdk9x%)1 z3b&zw0sG>!LX+I1IqI7>y|{*8{%l7>3kh zk9-+2VHgKoVn{q>EOHhnz8&PQMPdd3NH>JL9xotrr>;XGyoJf{lX>^ZWMVlyVG@ul zjEgHNu~=G7$F2`GhXSyfi*>*l_8?-l-zOM~UBUUrQzVI2IK@Kuw{{OwNmtGp0R%Sg za>jL%E`F@8DN1@z(~TrRjmSaD{Ko>f9I;9;^*1=am1_p@yyd$L0!!hrG=%Ut7=M%M zug(Qxr*`ZnXFU!s_r9kOn)O?-tjs!+a2Xoz@)@N608z-!@(xYbL=3=ydSpg$cB>J2 z4(Olf8eq-*PkaEG6KOn*i2hqq$15C*=``6_tq&G4(@`RHIHXAIpx`nt+82== y0m;A|0!YZRka@uJ7B)D*S(~ZGB%;;e+)DMxgJ>rJVox>uU?!U(B;+|eXaCtUnwZD{ literal 0 HcmV?d00001 diff --git a/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown b/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown new file mode 100755 index 0000000000..abab071208 --- /dev/null +++ b/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown @@ -0,0 +1,112 @@ +Out-of-focus Deblur Filter {#tutorial_out_of_focus_deblur_filter} +========================== + +Goal +---- + +In this tutorial you will learn: + +- what is a degradation image model +- what is PSF of out-of-focus image +- how to restore a blurred image +- what is Wiener filter + +Theory +------ + +@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and an article [SmartDeblur]. +@note An out-of-focus image on this page is a real world image. An out-of-focus was done manually by camera optics. + +### What is a degradation image model? + +A mathematical model of the image degradation in frequency domain representation is: + +\f[S = H\cdot U + N\f] + +where +\f$S\f$ is a spectrum of blurred (degraded) image, +\f$U\f$ is a spectrum of original true (undegraded) image, +\f$H\f$ is frequency response of point spread function (PSF), +\f$N\f$ is a spectrum of additive noise. + +Circular PSF is a good approximation of out-of-focus distortion. Such PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work. + +![Circular point spread function](psf.png) + +### How to restore an blurred image? + +The objective of restoration (deblurring) is to obtain an estimate of the original image. Restoration formula in frequency domain is: + +\f[U' = H_w\cdot S\f] + +where +\f$U'\f$ is spectrum of estimation of original image \f$U\f$, +\f$H_w\f$ is restoration filter, for example, Wiener filter. + +### What is Wiener filter? + +Wiener filter is a way to restore a blurred image. Let's suppose that PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known, +then simplified Wiener formula is: + +\f[H_w = \frac{H}{|H|^2+\frac{1}{SNR}} \f] + +where +\f$SNR\f$ is signal-to-noise ratio. + +So, in order to recover an out-of-focus image by Wiener filter, it needs to know \f$SNR\f$ and \f$R\f$ of circular PSF. + + +Source code +----------- + +You can find source code in the `samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp` of the OpenCV source code library. + +@include cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp + +Explanation +----------- + +An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering an blurred image in frequency domain: +@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp main + +A function calcPSF() forms an circular PSF according to input parameter radius \f$R\f$: +@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcPSF + +A function calcWnrFilter() synthesizes simplified Wiener filter \f$H_w\f$ according to formula described above: +@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcWnrFilter + +A function fftshift() rearranges PSF. This code was just copied from tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform": +@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp fftshift + +A function filter2DFreq() filters an blurred image in frequency domain: +@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp filter2DFreq + +Result +------ + +Below you can see real out-of-focus image: +![Out-of-focus image](images/original.jpg) + + +Below result was done by \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters: +![The restored (deblurred) image](images/recovered.jpg) + +The Wiener filter was used, values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result. +We can see that the result is not perfect, but it gives us a hint to the image content. With some difficulty, the text is readable. + +@note The parameter \f$R\f$ is the most important. So you should adjust \f$R\f$ first, then \f$SNR\f$. +@note Sometimes you can observe the ringing effect in an restored image. This effect can be reduced by several methods. For example, you can taper input image edges. + +You can also find a quick video demonstration of this on +[YouTube](https://youtu.be/0bEcE4B0XP4). +@youtube{0bEcE4B0XP4} + +References +------ +- [Image Deblurring in Matlab] - Image Deblurring in Matlab +- [SmartDeblur] - SmartDeblur site + + +[Digital Image Processing]: http://web.ipac.caltech.edu/staff/fmasci/home/astro_refs/Digital_Image_Processing_2ndEd.pdf +[Image Deblurring in Matlab]: https://www.mathworks.com/help/images/image-deblurring.html +[SmartDeblur]: http://yuzhikov.com/articles/BlurredImagesRestoration1.htm diff --git a/doc/tutorials/imgproc/table_of_content_imgproc.markdown b/doc/tutorials/imgproc/table_of_content_imgproc.markdown index 59c985e1dd..3d82c0cf53 100644 --- a/doc/tutorials/imgproc/table_of_content_imgproc.markdown +++ b/doc/tutorials/imgproc/table_of_content_imgproc.markdown @@ -292,3 +292,13 @@ In this section you will learn about the image processing (manipulation) functio *Author:* Theodore Tsesmelis Where we learn to segment objects using Laplacian filtering, the Distance Transformation and the Watershed algorithm. + +- @subpage tutorial_out_of_focus_deblur_filter + + *Languages:* C++ + + *Compatibility:* \> OpenCV 2.0 + + *Author:* Karpushin Vladislav + + You will learn how to recover an out-of-focus image by Wiener filter. diff --git a/samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp b/samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp new file mode 100755 index 0000000000..059df8bd55 --- /dev/null +++ b/samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp @@ -0,0 +1,149 @@ +/** +* @brief You will learn how to recover an out-of-focus image by Wiener filter +* @author Karpushin Vladislav, karpushin@ngs.ru, https://github.com/VladKarpushin +*/ +#include +#include "opencv2/imgproc.hpp" +#include "opencv2/imgcodecs.hpp" + +using namespace cv; +using namespace std; + +void help(); +void calcPSF(Mat& outputImg, Size filterSize, int R); +void fftshift(const Mat& inputImg, Mat& outputImg); +void filter2DFreq(const Mat& inputImg, Mat& outputImg, const Mat& H); +void calcWnrFilter(const Mat& input_h_PSF, Mat& output_G, double nsr); + +const String keys = +"{help h usage ? | | print this message }" +"{image |original.JPG | input image name }" +"{R |53 | radius }" +"{SNR |5200 | signal to noise ratio}" +; + +int main(int argc, char *argv[]) +{ + help(); + CommandLineParser parser(argc, argv, keys); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + int R = parser.get("R"); + int snr = parser.get("SNR"); + string strInFileName = parser.get("image"); + + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + Mat imgIn; + imgIn = imread(strInFileName, IMREAD_GRAYSCALE); + if (imgIn.empty()) //check whether the image is loaded or not + { + cout << "ERROR : Image cannot be loaded..!!" << endl; + return -1; + } + + Mat imgOut; + +//! [main] + // it needs to process even image only + Rect roi = Rect(0, 0, imgIn.cols & -2, imgIn.rows & -2); + + //Hw calculation (start) + Mat Hw, h; + calcPSF(h, roi.size(), R); + calcWnrFilter(h, Hw, 1.0 / double(snr)); + //Hw calculation (stop) + + // filtering (start) + filter2DFreq(imgIn(roi), imgOut, Hw); + // filtering (stop) +//! [main] + + imgOut.convertTo(imgOut, CV_8U); + normalize(imgOut, imgOut, 0, 255, NORM_MINMAX); + imwrite("result.jpg", imgOut); + return 0; +} + +void help() +{ + cout << "2018-07-12" << endl; + cout << "DeBlur_v8" << endl; + cout << "You will learn how to recover an out-of-focus image by Wiener filter" << endl; +} + +//! [calcPSF] +void calcPSF(Mat& outputImg, Size filterSize, int R) +{ + Mat h(filterSize, CV_32F, Scalar(0)); + Point point(filterSize.width / 2, filterSize.height / 2); + circle(h, point, R, 255, -1, 8); + Scalar summa = sum(h); + outputImg = h / summa[0]; +} +//! [calcPSF] + +//! [fftshift] +void fftshift(const Mat& inputImg, Mat& outputImg) +{ + outputImg = inputImg.clone(); + int cx = outputImg.cols / 2; + int cy = outputImg.rows / 2; + Mat q0(outputImg, Rect(0, 0, cx, cy)); + Mat q1(outputImg, Rect(cx, 0, cx, cy)); + Mat q2(outputImg, Rect(0, cy, cx, cy)); + Mat q3(outputImg, Rect(cx, cy, cx, cy)); + Mat tmp; + q0.copyTo(tmp); + q3.copyTo(q0); + tmp.copyTo(q3); + q1.copyTo(tmp); + q2.copyTo(q1); + tmp.copyTo(q2); +} +//! [fftshift] + +//! [filter2DFreq] +void filter2DFreq(const Mat& inputImg, Mat& outputImg, const Mat& H) +{ + Mat planes[2] = { Mat_(inputImg.clone()), Mat::zeros(inputImg.size(), CV_32F) }; + Mat complexI; + merge(planes, 2, complexI); + dft(complexI, complexI, DFT_SCALE); + + Mat planesH[2] = { Mat_(H.clone()), Mat::zeros(H.size(), CV_32F) }; + Mat complexH; + merge(planesH, 2, complexH); + Mat complexIH; + mulSpectrums(complexI, complexH, complexIH, 0); + + idft(complexIH, complexIH); + split(complexIH, planes); + outputImg = planes[0]; +} +//! [filter2DFreq] + +//! [calcWnrFilter] +void calcWnrFilter(const Mat& input_h_PSF, Mat& output_G, double nsr) +{ + Mat h_PSF_shifted; + fftshift(input_h_PSF, h_PSF_shifted); + Mat planes[2] = { Mat_(h_PSF_shifted.clone()), Mat::zeros(h_PSF_shifted.size(), CV_32F) }; + Mat complexI; + merge(planes, 2, complexI); + dft(complexI, complexI); + split(complexI, planes); + Mat denom; + pow(abs(planes[0]), 2, denom); + denom += nsr; + divide(planes[0], denom, output_G); +} +//! [calcWnrFilter] From 0aded5aae6090a2a3d95b3e5d6afc16ec21371f7 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 27 Jul 2018 14:29:01 +0300 Subject: [PATCH 10/25] cmake: fixed builds in directories containing plus sign --- modules/python/bindings/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/python/bindings/CMakeLists.txt b/modules/python/bindings/CMakeLists.txt index f7c86e0250..9d0346d5fc 100644 --- a/modules/python/bindings/CMakeLists.txt +++ b/modules/python/bindings/CMakeLists.txt @@ -20,8 +20,12 @@ endforeach() set(opencv_hdrs "") set(opencv_userdef_hdrs "") foreach(m ${OPENCV_PYTHON_MODULES}) - ocv_list_filter(OPENCV_MODULE_${m}_HEADERS "${OPENCV_MODULE_${m}_LOCATION}/include" __hdrs) - list(APPEND opencv_hdrs ${__hdrs}) + foreach (hdr ${OPENCV_MODULE_${m}_HEADERS}) + ocv_is_subdir(is_sub "${OPENCV_MODULE_${m}_LOCATION}/include" "${hdr}") + if(is_sub) + list(APPEND opencv_hdrs "${hdr}") + endif() + endforeach() file(GLOB userdef_hdrs ${OPENCV_MODULE_${m}_LOCATION}/misc/python/pyopencv*.hpp) list(APPEND opencv_userdef_hdrs ${userdef_hdrs}) endforeach(m) From e031bada7d0a474787411f05b6ce7a1b74d0b5e4 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 27 Jul 2018 18:25:55 +0300 Subject: [PATCH 11/25] Fixed several issues found by static analysis, Windows-specific --- modules/highgui/src/window_w32.cpp | 14 +++++----- modules/videoio/src/cap_dshow.cpp | 41 ++++++++---------------------- modules/videoio/src/cap_msmf.cpp | 16 +++++++++--- modules/videoio/src/cap_vfw.cpp | 2 +- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index ea7b461f90..945b2e6e78 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -307,8 +307,8 @@ icvLoadWindowPos( const char* name, CvRect& rect ) { HKEY hkey; char szKey[1024]; - strcpy( szKey, icvWindowPosRootKey ); - strcat( szKey, name ); + strcpy_s( szKey, 1024, icvWindowPosRootKey ); + strcat_s( szKey, 1024, name ); rect.x = rect.y = CW_USEDEFAULT; rect.width = rect.height = 320; @@ -368,8 +368,8 @@ icvSaveWindowPos( const char* name, CvRect rect ) HKEY hkey; char szKey[1024]; char rootKey[1024]; - strcpy( szKey, icvWindowPosRootKey ); - strcat( szKey, name ); + strcpy_s( szKey, 1024, icvWindowPosRootKey ); + strcat_s( szKey, 1024, name ); if( RegOpenKeyEx( HKEY_CURRENT_USER,szKey,0,KEY_READ,&hkey) != ERROR_SUCCESS ) { @@ -379,7 +379,7 @@ icvSaveWindowPos( const char* name, CvRect rect ) char oldestKey[1024]; char currentKey[1024]; - strcpy( rootKey, icvWindowPosRootKey ); + strcpy_s( rootKey, 1024, icvWindowPosRootKey ); rootKey[strlen(rootKey)-1] = '\0'; if( RegCreateKeyEx(HKEY_CURRENT_USER, rootKey, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_READ+KEY_WRITE, 0, &hroot, NULL) != ERROR_SUCCESS ) //RegOpenKeyEx( HKEY_CURRENT_USER,rootKey,0,KEY_READ,&hroot) != ERROR_SUCCESS ) @@ -398,7 +398,7 @@ icvSaveWindowPos( const char* name, CvRect rect ) oldestTime.dwLowDateTime > accesstime.dwLowDateTime) ) { oldestTime = accesstime; - strcpy( oldestKey, currentKey ); + strcpy_s( oldestKey, 1024, currentKey ); } } @@ -1500,6 +1500,8 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) rgn = CreateRectRgn(0, 0, wrc.right, wrc.bottom); rgn1 = CreateRectRgn(cr.left, cr.top, cr.right, cr.bottom); rgn2 = CreateRectRgn(tr.left, tr.top, tr.right, tr.bottom); + CV_Assert(rgn != 0, rgn1 != 0, rgn2 != 0); + ret = CombineRgn(rgn, rgn, rgn1, RGN_DIFF); ret = CombineRgn(rgn, rgn, rgn2, RGN_DIFF); diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp index 3a92a81d49..03cb5a4f37 100644 --- a/modules/videoio/src/cap_dshow.cpp +++ b/modules/videoio/src/cap_dshow.cpp @@ -811,6 +811,8 @@ void videoDevice::NukeDownstream(IBaseFilter *pBF){ IEnumPins *pins = NULL; PIN_INFO pininfo; HRESULT hr = pBF->EnumPins(&pins); + if (hr != S_OK || !pins) + return; pins->Reset(); while (hr == NOERROR) { @@ -838,7 +840,7 @@ void videoDevice::NukeDownstream(IBaseFilter *pBF){ pP->Release(); } } - if (pins) pins->Release(); + pins->Release(); } @@ -999,17 +1001,6 @@ videoDevice::~videoDevice(){ (pGraph) = 0; } - //delete our pointers - delete pDestFilter; - delete pVideoInputFilter; - delete pGrabberF; - delete pGrabber; - delete pControl; - delete streamConf; - delete pMediaEvent; - delete pCaptureGraph; - delete pGraph; - DebugPrintOut("SETUP: Device %i disconnected and freed\n\n",myID); } @@ -1654,7 +1645,7 @@ bool videoInput::getVideoSettingFilter(int deviceID, long Property, long &min, l IAMVideoProcAmp *pAMVideoProcAmp = NULL; hr = VD->pVideoInputFilter->QueryInterface(IID_IAMVideoProcAmp, (void**)&pAMVideoProcAmp); - if(FAILED(hr)){ + if(FAILED(hr) || !pAMVideoProcAmp){ DebugPrintOut("setVideoSetting - QueryInterface Error\n"); #if 0 if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release(); @@ -1676,7 +1667,7 @@ bool videoInput::getVideoSettingFilter(int deviceID, long Property, long &min, l hr = pAMVideoProcAmp->Get(Property, ¤tValue, &flags); } - if(pAMVideoProcAmp)pAMVideoProcAmp->Release(); + pAMVideoProcAmp->Release(); #if 0 if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release(); if(VD->pVideoInputFilter)VD->pVideoInputFilter = NULL; @@ -1881,7 +1872,7 @@ bool videoInput::getVideoSettingCamera(int deviceID, long Property, long &min, l IAMCameraControl *pIAMCameraControl = NULL; hr = VD->pVideoInputFilter->QueryInterface(IID_IAMCameraControl, (void**)&pIAMCameraControl); - if(FAILED(hr)){ + if(FAILED(hr) || !pIAMCameraControl){ DebugPrintOut("setVideoSetting - QueryInterface Error\n"); #if 0 if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release(); @@ -1902,7 +1893,7 @@ bool videoInput::getVideoSettingCamera(int deviceID, long Property, long &min, l hr = pIAMCameraControl->Get(Property, ¤tValue, &flags); } - if(pIAMCameraControl)pIAMCameraControl->Release(); + pIAMCameraControl->Release(); #if 0 if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release(); if(VD->pVideoInputFilter)VD->pVideoInputFilter = NULL; @@ -2595,7 +2586,7 @@ int videoInput::start(int deviceID, videoDevice *VD){ //we do this because webcams don't have a preview mode hr = VD->pCaptureGraph->FindInterface(&CAPTURE_MODE, &MEDIATYPE_Video, VD->pVideoInputFilter, IID_IAMStreamConfig, (void **)&VD->streamConf); - if(FAILED(hr)){ + if(FAILED(hr) || !VD->streamConf){ DebugPrintOut("ERROR: Couldn't config the stream!\n"); stopDevice(deviceID); return hr; @@ -2737,14 +2728,8 @@ int videoInput::start(int deviceID, videoDevice *VD){ //lets try freeing our stream conf here too //this will fail if the device is already running - if(VD->streamConf){ - VD->streamConf->Release(); - VD->streamConf = NULL; - }else{ - DebugPrintOut("ERROR: connecting device - prehaps it is already being used?\n"); - stopDevice(deviceID); - return S_FALSE; - } + VD->streamConf->Release(); + VD->streamConf = NULL; //NULL RENDERER// @@ -3093,7 +3078,7 @@ HRESULT videoInput::routeCrossbar(ICaptureGraphBuilder2 **ppBuild, IBaseFilter * IAMCrossbar *pXBar1 = NULL; HRESULT hr = pBuild->FindInterface(&LOOK_UPSTREAM_ONLY, NULL, pVidFilter, IID_IAMCrossbar, (void**)&pXBar1); - if (SUCCEEDED(hr)) + if (SUCCEEDED(hr) && pXBar1) { bool foundDevice = false; @@ -3163,10 +3148,6 @@ HRESULT videoInput::routeCrossbar(ICaptureGraphBuilder2 **ppBuild, IBaseFilter * //we were getting a crash otherwise //if(Crossbar)Crossbar->Release(); //if(Crossbar)Crossbar = NULL; - - if(pXBar1)pXBar1->Release(); - if(pXBar1)pXBar1 = NULL; - }else{ DebugPrintOut("SETUP: You are a webcam or snazzy firewire cam! No Crossbar needed\n"); return hr; diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 35043ee535..863f46bc89 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -91,7 +91,7 @@ static bool pMFCreateDXGIDeviceManager_initialized = false; static FN_MFCreateDXGIDeviceManager pMFCreateDXGIDeviceManager = NULL; static void init_MFCreateDXGIDeviceManager() { - HMODULE h = LoadLibraryA("mfplat.dll"); + HMODULE h = LoadLibraryExA("mfplat.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (h) { pMFCreateDXGIDeviceManager = (FN_MFCreateDXGIDeviceManager)GetProcAddress(h, "MFCreateDXGIDeviceManager"); @@ -1720,7 +1720,7 @@ bool CvCapture_MSMF::setProperty( int property_id, double value ) return setTime(duration * value, true); break; case CV_CAP_PROP_POS_FRAMES: - if (getFramerate(nativeFormat) != 0) + if (std::fabs(getFramerate(nativeFormat)) > 0) return setTime(value * 1e7 / getFramerate(nativeFormat), false); break; case CV_CAP_PROP_POS_MSEC: @@ -1978,7 +1978,17 @@ private: CvVideoWriter_MSMF::CvVideoWriter_MSMF(): MF(Media_Foundation::getInstance()), - initiated(false) + videoWidth(0), + videoHeight(0), + fps(0), + bitRate(0), + frameSize(0), + encodingFormat(), + inputFormat(), + streamIndex(0), + initiated(false), + rtStart(0), + rtDuration(0) { } diff --git a/modules/videoio/src/cap_vfw.cpp b/modules/videoio/src/cap_vfw.cpp index 0d71a0c2a5..f62baf4e71 100644 --- a/modules/videoio/src/cap_vfw.cpp +++ b/modules/videoio/src/cap_vfw.cpp @@ -377,8 +377,8 @@ LRESULT PASCAL CvCaptureCAM_VFW::frameCallback( HWND hWnd, VIDEOHDR* hdr ) if (!hWnd) return FALSE; capture = (CvCaptureCAM_VFW*)capGetUserData(hWnd); + if (!capture) return (LRESULT)FALSE; capture->hdr = hdr; - return (LRESULT)TRUE; } From dd8e99045144b4828a79eba5a1e592fc489927c0 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 27 Jul 2018 18:41:39 +0300 Subject: [PATCH 12/25] Fixed several issues found by static analysis, GStreamer backend --- modules/videoio/src/cap_gstreamer.cpp | 37 ++++++++++----------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index c3100f56f3..8758b21dd9 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -1224,7 +1224,11 @@ Ptr cv::createGStreamerCapture(int index) class CvVideoWriter_GStreamer : public CvVideoWriter { public: - CvVideoWriter_GStreamer() { init(); } + CvVideoWriter_GStreamer() + : pipeline(0), source(0), encodebin(0), file(0), buffer(0), input_pix_fmt(0), + num_frames(0), framerate(0) + { + } virtual ~CvVideoWriter_GStreamer() CV_OVERRIDE { close(); } virtual bool open( const char* filename, int fourcc, @@ -1232,7 +1236,6 @@ public: virtual void close(); virtual bool writeFrame( const IplImage* image ) CV_OVERRIDE; protected: - void init(); const char* filenameToMimetype(const char* filename); GstElement* pipeline; GstElement* source; @@ -1245,22 +1248,6 @@ protected: double framerate; }; -/*! - * \brief CvVideoWriter_GStreamer::init - * initialise all variables - */ -void CvVideoWriter_GStreamer::init() -{ - pipeline = NULL; - source = NULL; - encodebin = NULL; - file = NULL; - buffer = NULL; - - num_frames = 0; - framerate = 0; -} - /*! * \brief CvVideoWriter_GStreamer::close * ends the pipeline by sending EOS and destroys the pipeline and all @@ -1282,17 +1269,19 @@ void CvVideoWriter_GStreamer::close() //wait for EOS to trickle down the pipeline. This will let all elements finish properly GstBus* bus = gst_element_get_bus(pipeline); GstMessage *msg = gst_bus_timed_pop_filtered(bus, GST_CLOCK_TIME_NONE, (GstMessageType)(GST_MESSAGE_ERROR | GST_MESSAGE_EOS)); - if (GST_MESSAGE_TYPE(msg) == GST_MESSAGE_ERROR) + if (!msg || GST_MESSAGE_TYPE(msg) == GST_MESSAGE_ERROR) { CV_WARN("Error during VideoWriter finalization\n"); + if(msg != NULL) + { + gst_message_unref(msg); + g_object_unref(G_OBJECT(bus)); + } return; } - if(msg != NULL) - { - gst_message_unref(msg); - g_object_unref(G_OBJECT(bus)); - } + gst_message_unref(msg); + g_object_unref(G_OBJECT(bus)); status = gst_element_set_state (pipeline, GST_STATE_NULL); if (status == GST_STATE_CHANGE_ASYNC) From fb1f12021b93e4fa554d2f0c9bc8410610b28c25 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 27 Jul 2018 19:56:35 +0300 Subject: [PATCH 13/25] Fixed build with latest IE version --- modules/dnn/src/op_inf_engine.cpp | 18 ++++++++++++++++++ modules/dnn/src/op_inf_engine.hpp | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index a7c13f3a13..eb409eebbc 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -322,12 +322,30 @@ InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t) noex return InferenceEngine::StatusCode::OK; } +InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(size_t size, InferenceEngine::ResponseDesc *responseDesc) noexcept +{ + CV_Error(Error::StsNotImplemented, ""); + return InferenceEngine::StatusCode::OK; +} + size_t InfEngineBackendNet::getBatchSize() const noexcept { CV_Error(Error::StsNotImplemented, ""); return 0; } +InferenceEngine::StatusCode InfEngineBackendNet::AddExtension(const InferenceEngine::IShapeInferExtensionPtr &extension, InferenceEngine::ResponseDesc *resp) noexcept +{ + CV_Error(Error::StsNotImplemented, ""); + return InferenceEngine::StatusCode::OK; +} + +InferenceEngine::StatusCode InfEngineBackendNet::reshape(const InferenceEngine::ICNNNetwork::InputShapes &inputShapes, InferenceEngine::ResponseDesc *resp) noexcept +{ + CV_Error(Error::StsNotImplemented, ""); + return InferenceEngine::StatusCode::OK; +} + void InfEngineBackendNet::init(int targetId) { if (inputs.empty()) diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index a33d93cb03..a5ad63fb5f 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -9,6 +9,8 @@ #define __OPENCV_DNN_OP_INF_ENGINE_HPP__ #include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/dnn.hpp" #ifdef HAVE_INF_ENGINE #if defined(__GNUC__) && __GNUC__ >= 5 @@ -86,8 +88,14 @@ public: virtual InferenceEngine::StatusCode setBatchSize(const size_t size) noexcept CV_OVERRIDE; + virtual InferenceEngine::StatusCode setBatchSize(size_t size, InferenceEngine::ResponseDesc* responseDesc) noexcept; + virtual size_t getBatchSize() const noexcept CV_OVERRIDE; + virtual InferenceEngine::StatusCode AddExtension(const InferenceEngine::IShapeInferExtensionPtr& extension, InferenceEngine::ResponseDesc* resp) noexcept; + + virtual InferenceEngine::StatusCode reshape(const InputShapes& inputShapes, InferenceEngine::ResponseDesc* resp) noexcept; + void init(int targetId); void addBlobs(const std::vector >& wrappers); From 0bef42ba12227158409393ecf6ede7ba3a014e6e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 28 Jul 2018 09:08:09 +0000 Subject: [PATCH 14/25] videoio: add note about image BGR format VideoWriter::write() --- modules/videoio/include/opencv2/videoio.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index eef840b6ec..2628609040 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -905,7 +905,7 @@ public: /** @brief Writes the next video frame - @param image The written frame + @param image The written frame. In general, color images are expected in BGR format. The function/method writes the specified image to video file. It must have the same size as has been specified when opening the video writer. From 89528d7c3a9f2b5ea365f2ae4988390ab2ca71ab Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 28 Jul 2018 10:29:26 +0000 Subject: [PATCH 15/25] core(ocl): don't expose exceptions from OpenCL callback to avoid silent crashes of OpenCL worker threads. --- modules/core/src/ocl.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index cc6feacbbb..05f128baa7 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2834,7 +2834,22 @@ extern "C" { static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) { - ((cv::ocl::Kernel::Impl*)p)->finit(e); + try + { + ((cv::ocl::Kernel::Impl*)p)->finit(e); + } + catch (const cv::Exception& exc) + { + CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what()); + } + catch (const std::exception& exc) + { + CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what()); + } + catch (...) + { + CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback"); + } } } From 47202b3349cad9df1e62978a8280fba266ae487d Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Sun, 29 Jul 2018 18:22:46 +0200 Subject: [PATCH 16/25] core:avx2 fix unaligned store for v_store_interleave v_uint32x8-3ch --- modules/core/include/opencv2/core/hal/intrin_avx.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 5c2d0b60c2..fc2fd7cee1 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -2156,9 +2156,9 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint } else { - _mm256_stream_si256((__m256i*)ptr, bgr0); - _mm256_stream_si256((__m256i*)(ptr + 8), p2); - _mm256_stream_si256((__m256i*)(ptr + 16), bgr2); + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 8), p2); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgr2); } } From 6499263b418d43cc06a51298236e46ea99ac6a64 Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Wed, 25 Jul 2018 01:01:19 +0200 Subject: [PATCH 17/25] core:test Expand hal_intrin tests to support SIMD256 --- .../core/include/opencv2/core/hal/intrin.hpp | 10 +- .../include/opencv2/core/hal/intrin_avx.hpp | 121 ++++--- .../include/opencv2/core/hal/intrin_neon.hpp | 8 + .../include/opencv2/core/hal/intrin_sse.hpp | 5 + modules/core/test/test_intrin.avx2.cpp | 5 + modules/core/test/test_intrin.cpp | 298 +++++------------- modules/core/test/test_intrin.simd.hpp | 296 +++++++++++++++++ modules/core/test/test_intrin_utils.hpp | 173 ++++++---- 8 files changed, 562 insertions(+), 354 deletions(-) create mode 100644 modules/core/test/test_intrin.avx2.cpp create mode 100644 modules/core/test/test_intrin.simd.hpp diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 031f8f3d02..4631e586ac 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -154,7 +154,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; // but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load(). // Correspondingly, the wide intrinsics (which are mapped to the "widest" // available instruction set) will get vx_ prefix -// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v245_load()) +// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load()) #if CV_AVX2 #include "opencv2/core/hal/intrin_avx.hpp" @@ -214,14 +214,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \ inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \ inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \ + inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \ + inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \ inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \ inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } #define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ -inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); } + inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); } #define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \ -inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); } + inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); } #define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \ CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ @@ -316,7 +318,7 @@ template struct V_RegTraits CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) inline void vx_cleanup() { v256_cleanup(); } -#elif CV_SIMD128 +#elif CV_SIMD128 || CV_SIMD128_CPP typedef v_uint8x16 v_uint8; typedef v_int8x16 v_int8; typedef v_uint16x8 v_uint16; diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 4ea66f5c0b..90cfd4028a 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -407,6 +407,11 @@ inline v_float16x16 v256_load_f16(const short* ptr) inline v_float16x16 v256_load_f16_aligned(const short* ptr) { return v_float16x16(_mm256_load_si256((const __m256i*)ptr)); } +inline v_float16x16 v256_load_f16_low(const short* ptr) +{ return v_float16x16(v256_load_low(ptr).val); } +inline v_float16x16 v256_load_f16_halves(const short* ptr0, const short* ptr1) +{ return v_float16x16(v256_load_halves(ptr0, ptr1).val); } + inline void v_store(short* ptr, const v_float16x16& a) { _mm256_storeu_si256((__m256i*)ptr, a.val); } inline void v_store_aligned(short* ptr, const v_float16x16& a) @@ -819,94 +824,80 @@ OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd) template inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b) { - __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03); - - switch(imm) - { - case 0: return a; - case 32: return b; - case 16: return v_uint8x32(swap); - } + enum {IMM_R = (16 - imm) & 0xFF}; + enum {IMM_R2 = (32 - imm) & 0xFF}; - if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, 16 - imm)); - if (imm < 32) return v_uint8x32(_mm256_alignr_epi8(swap, b.val, 32 - imm)); + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); - return v_uint8x32(); + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R)); + return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32 } template inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b) { - __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21); + enum {IMM_L = (imm - 16) & 0xFF}; - switch(imm) - { - case 0: return a; - case 32: return b; - case 16: return v_uint8x32(swap); - } + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); - if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm)); - if (imm < 32) return v_uint8x32(_mm256_alignr_epi8(b.val, swap, imm - 16)); - - return v_uint8x32(); + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm)); + return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L)); } template inline v_uint8x32 v_rotate_left(const v_uint8x32& a) { - v_uint8x32 res; + enum {IMM_L = (imm - 16) & 0xFF}; + enum {IMM_R = (16 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + // ESAC control[3] ? [127:0] = 0 __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0)); - - if (imm == 0) - return a; - if (imm == 16) - res.val = swapz; - else if (imm < 16) - res.val = _mm256_alignr_epi8(a.val, swapz, 16 - imm); - else if (imm < 32) - res.val = _mm256_slli_si256(swapz, imm - 16); - else - return v_uint8x32(); - return res; + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R)); + return v_uint8x32(_mm256_slli_si256(swapz, IMM_L)); } template inline v_uint8x32 v_rotate_right(const v_uint8x32& a) { - v_uint8x32 res; + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + // ESAC control[3] ? [127:0] = 0 __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1)); - - if (imm == 0) - return a; - if (imm == 16) - res.val = swapz; - else if (imm < 16) - res.val = _mm256_alignr_epi8(swapz, a.val, imm); - else if (imm < 32) - res.val = _mm256_srli_si256(swapz, imm - 16); - else - return v_uint8x32(); - return res; -} - -#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \ - template \ - inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \ - { \ - const int w = sizeof(typename _Tpvec::lane_type); \ - v_uint8x32 ret = intrin(v_reinterpret_as_u8(a), \ - v_reinterpret_as_u8(b)); \ - return _Tpvec(cast(ret.val)); \ - } \ - template \ - inline _Tpvec intrin(const _Tpvec& a) \ - { \ - const int w = sizeof(typename _Tpvec::lane_type); \ - v_uint8x32 ret = intrin(v_reinterpret_as_u8(a)); \ - return _Tpvec(cast(ret.val)); \ + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm)); + return v_uint8x32(_mm256_srli_si256(swapz, IMM_L)); +} + +#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \ + template \ + inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a), \ + v_reinterpret_as_u8(b)); \ + return _Tpvec(cast(ret.val)); \ + } \ + template \ + inline _Tpvec intrin(const _Tpvec& a) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a)); \ + return _Tpvec(cast(ret.val)); \ } #define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \ diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index d8067306a5..f6bfe94f9f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -319,6 +319,9 @@ static inline void cv_vst1_f16(void* ptr, float16x4_t a) #endif } +#ifndef vdup_n_f16 + #define vdup_n_f16(v) (float16x4_t){v, v, v, v} +#endif struct v_float16x8 { @@ -889,6 +892,11 @@ inline v_float16x8 v_load_f16(const short* ptr) inline v_float16x8 v_load_f16_aligned(const short* ptr) { return v_float16x8(cv_vld1q_f16(ptr)); } +inline v_float16x8 v_load_f16_low(const short* ptr) +{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr), vdup_n_f16((float16_t)0))); } +inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1) +{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr0), cv_vld1_f16(ptr1))); } + inline void v_store(short* ptr, const v_float16x8& a) { cv_vst1q_f16(ptr, a.val); } inline void v_store_aligned(short* ptr, const v_float16x8& a) diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 4971c777e4..943e86abf7 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -1308,6 +1308,11 @@ inline v_float16x8 v_load_f16(const short* ptr) inline v_float16x8 v_load_f16_aligned(const short* ptr) { return v_float16x8(_mm_load_si128((const __m128i*)ptr)); } +inline v_float16x8 v_load_f16_low(const short* ptr) +{ return v_float16x8(v_load_low(ptr).val); } +inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1) +{ return v_float16x8(v_load_halves(ptr0, ptr1).val); } + inline void v_store(short* ptr, const v_float16x8& a) { _mm_storeu_si128((__m128i*)ptr, a.val); } inline void v_store_aligned(short* ptr, const v_float16x8& a) diff --git a/modules/core/test/test_intrin.avx2.cpp b/modules/core/test/test_intrin.avx2.cpp new file mode 100644 index 0000000000..9ebfcdf542 --- /dev/null +++ b/modules/core/test/test_intrin.avx2.cpp @@ -0,0 +1,5 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" +#include "test_intrin.simd.hpp" \ No newline at end of file diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 9a1130fe96..6610e332de 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -2,249 +2,101 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include "test_intrin.simd.hpp" -#include "test_intrin_utils.hpp" - -#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp" +#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp" #define CV_CPU_DISPATCH_MODE FP16 #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" - -using namespace cv; +#define CV_CPU_DISPATCH_MODE AVX2 +#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" namespace opencv_test { namespace hal { using namespace CV_CPU_OPTIMIZATION_NAMESPACE; -//============= 8-bit integer ===================================================================== - -TEST(hal_intrin, uint8x16) { - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_expand_q() - .test_addsub() - .test_addsub_wrap() - .test_cmp() - .test_logic() - .test_min_max() - .test_absdiff() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() - .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() - .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() - ; -} +TEST(hal_intrin, uint8x16) +{ test_hal_intrin_uint8(); } -TEST(hal_intrin, int8x16) { - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_expand_q() - .test_addsub() - .test_addsub_wrap() - .test_cmp() - .test_logic() - .test_min_max() - .test_absdiff() - .test_abs() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() - .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() - ; -} +TEST(hal_intrin, int8x16) +{ test_hal_intrin_int8(); } -//============= 16-bit integer ===================================================================== - -TEST(hal_intrin, uint16x8) { - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_addsub_wrap() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() - .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() - .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() - ; -} +TEST(hal_intrin, uint16x8) +{ test_hal_intrin_uint16(); } -TEST(hal_intrin, int16x8) { - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_addsub_wrap() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_dot_prod() - .test_logic() - .test_min_max() - .test_absdiff() - .test_abs() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() - .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() - ; -} +TEST(hal_intrin, int16x8) +{ test_hal_intrin_int16(); } -//============= 32-bit integer ===================================================================== - -TEST(hal_intrin, uint32x4) { - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - .test_transpose() - ; -} +TEST(hal_intrin, int32x4) +{ test_hal_intrin_int32(); } -TEST(hal_intrin, int32x4) { - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_mul() - .test_abs() - .test_cmp() - .test_popcount() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - .test_float_cvt32() - .test_float_cvt64() - .test_transpose() - ; -} +TEST(hal_intrin, uint32x4) +{ test_hal_intrin_uint32(); } -//============= 64-bit integer ===================================================================== - -TEST(hal_intrin, uint64x2) { - TheTest() - .test_loadstore() - .test_addsub() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} +TEST(hal_intrin, uint64x2) +{ test_hal_intrin_uint64(); } -TEST(hal_intrin, int64x2) { - TheTest() - .test_loadstore() - .test_addsub() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} +TEST(hal_intrin, int64x2) +{ test_hal_intrin_int64(); } -//============= Floating point ===================================================================== - -TEST(hal_intrin, float32x4) { - TheTest() - .test_loadstore() - .test_interleave() - .test_interleave_2channel() - .test_addsub() - .test_mul() - .test_div() - .test_cmp() - .test_sqrt_abs() - .test_min_max() - .test_float_absdiff() - .test_reduce() - .test_mask() - .test_unpack() - .test_float_math() - .test_float_cvt64() - .test_matmul() - .test_transpose() - .test_reduce_sum4() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - ; -} +TEST(hal_intrin, float32x4) +{ test_hal_intrin_float32(); } -#if CV_SIMD128_64F -TEST(hal_intrin, float64x2) { - TheTest() - .test_loadstore() - .test_addsub() - .test_mul() - .test_div() - .test_cmp() - .test_sqrt_abs() - .test_min_max() - .test_float_absdiff() - .test_mask() - .test_unpack() - .test_float_math() - .test_float_cvt32() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} -#endif +TEST(hal_intrin, float64x2) +{ test_hal_intrin_float64(); } -TEST(hal_intrin,float16) +TEST(hal_intrin, float16x8) { CV_CPU_CALL_FP16_(test_hal_intrin_float16, ()); throw SkipTestException("Unsupported hardware: FP16 is not available"); } -}} +#define DISPATCH_SIMD_MODES AVX2 +#define DISPATCH_SIMD_NAME "SIMD256" +#define DISPATCH_SIMD(fun) \ + do { \ + CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \ + throw SkipTestException( \ + "Unsupported hardware: " \ + DISPATCH_SIMD_NAME \ + " is not available" \ + ); \ + } while(0) + +TEST(hal_intrin256, uint8x32) +{ DISPATCH_SIMD(test_hal_intrin_uint8); } + +TEST(hal_intrin256, int8x32) +{ DISPATCH_SIMD(test_hal_intrin_int8); } + +TEST(hal_intrin256, uint16x16) +{ DISPATCH_SIMD(test_hal_intrin_uint16); } + +TEST(hal_intrin256, int16x16) +{ DISPATCH_SIMD(test_hal_intrin_int16); } + +TEST(hal_intrin256, uint32x8) +{ DISPATCH_SIMD(test_hal_intrin_uint32); } + +TEST(hal_intrin256, int32x8) +{ DISPATCH_SIMD(test_hal_intrin_int32); } + +TEST(hal_intrin256, uint64x4) +{ DISPATCH_SIMD(test_hal_intrin_uint64); } + +TEST(hal_intrin256, int64x4) +{ DISPATCH_SIMD(test_hal_intrin_int64); } + +TEST(hal_intrin256, float32x8) +{ DISPATCH_SIMD(test_hal_intrin_float32); } + +TEST(hal_intrin256, float64x4) +{ DISPATCH_SIMD(test_hal_intrin_float64); } + +TEST(hal_intrin256, float16x16) +{ + if (!CV_CPU_HAS_SUPPORT_FP16) + throw SkipTestException("Unsupported hardware: FP16 is not available"); + DISPATCH_SIMD(test_hal_intrin_float16); +} + +}} // namespace \ No newline at end of file diff --git a/modules/core/test/test_intrin.simd.hpp b/modules/core/test/test_intrin.simd.hpp new file mode 100644 index 0000000000..4e0d3a073f --- /dev/null +++ b/modules/core/test/test_intrin.simd.hpp @@ -0,0 +1,296 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" +#include "test_intrin_utils.hpp" + +namespace opencv_test { namespace hal { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +void test_hal_intrin_uint8(); +void test_hal_intrin_int8(); +void test_hal_intrin_uint16(); +void test_hal_intrin_int16(); +void test_hal_intrin_uint32(); +void test_hal_intrin_int32(); +void test_hal_intrin_uint64(); +void test_hal_intrin_int64(); +void test_hal_intrin_float32(); +void test_hal_intrin_float64(); + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +//============= 8-bit integer ===================================================================== + +void test_hal_intrin_uint8() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_expand_q() + .test_addsub() + .test_addsub_wrap() + .test_cmp() + .test_logic() + .test_min_max() + .test_absdiff() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() + .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() + .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() + ; + +#if CV_SIMD256 + TheTest() + .test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>() + .test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>() + .test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>() + .test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>() + ; +#endif +} + +void test_hal_intrin_int8() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_expand_q() + .test_addsub() + .test_addsub_wrap() + .test_cmp() + .test_logic() + .test_min_max() + .test_absdiff() + .test_abs() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() + .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() + ; +} + +//============= 16-bit integer ===================================================================== + +void test_hal_intrin_uint16() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_addsub_wrap() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() + .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() + .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() + ; +} + +void test_hal_intrin_int16() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_addsub_wrap() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_dot_prod() + .test_logic() + .test_min_max() + .test_absdiff() + .test_abs() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() + .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() + ; +} + +//============= 32-bit integer ===================================================================== + +void test_hal_intrin_uint32() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + .test_transpose() + ; +} + +void test_hal_intrin_int32() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_mul() + .test_abs() + .test_cmp() + .test_popcount() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + .test_float_cvt32() + .test_float_cvt64() + .test_transpose() + ; +} + +//============= 64-bit integer ===================================================================== + +void test_hal_intrin_uint64() +{ + TheTest() + .test_loadstore() + .test_addsub() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; +} + +void test_hal_intrin_int64() +{ + TheTest() + .test_loadstore() + .test_addsub() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; +} + +//============= Floating point ===================================================================== +void test_hal_intrin_float32() +{ + TheTest() + .test_loadstore() + .test_interleave() + .test_interleave_2channel() + .test_addsub() + .test_mul() + .test_div() + .test_cmp() + .test_sqrt_abs() + .test_min_max() + .test_float_absdiff() + .test_reduce() + .test_mask() + .test_unpack() + .test_float_math() + .test_float_cvt64() + .test_matmul() + .test_transpose() + .test_reduce_sum4() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + ; + +#if CV_SIMD256 + TheTest() + .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() + .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>() + ; +#endif +} + +void test_hal_intrin_float64() +{ +#if CV_SIMD_64F + TheTest() + .test_loadstore() + .test_addsub() + .test_mul() + .test_div() + .test_cmp() + .test_sqrt_abs() + .test_min_max() + .test_float_absdiff() + .test_mask() + .test_unpack() + .test_float_math() + .test_float_cvt32() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; + +#if CV_SIMD256 + TheTest() + .test_extract<2>().test_extract<3>() + .test_rotate<2>().test_rotate<3>() + ; +#endif //CV_SIMD256 + +#endif +} + +#if CV_FP16 && CV_SIMD_WIDTH > 16 +void test_hal_intrin_float16() +{ + TheTest() + .test_loadstore_fp16() + .test_float_cvt_fp16() + ; +} +#endif + +#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +CV_CPU_OPTIMIZATION_NAMESPACE_END + +}} //namespace \ No newline at end of file diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 2f8c1cf0b7..5f3175bc6c 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -13,6 +13,27 @@ void test_hal_intrin_float16(); template struct Data; template struct initializer; +template <> struct initializer<64> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], + d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31], + d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47], + d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[50], d[51], d[52], d[53], + d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]); + } +}; + +template <> struct initializer<32> +{ + template static R init(const Data & d) + { + return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], + d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]); + } +}; + template <> struct initializer<16> { template static R init(const Data & d) @@ -125,6 +146,17 @@ template struct Data { return d + R::nlanes / 2; } + LaneType sum(int s, int c) + { + LaneType res = 0; + for (int i = s; i < s + c; ++i) + res += d[i]; + return res; + } + LaneType sum() + { + return sum(0, R::nlanes); + } bool operator==(const Data & other) const { for (int i = 0; i < R::nlanes; ++i) @@ -147,13 +179,12 @@ template struct Data return false; return true; } - LaneType d[R::nlanes]; }; template struct AlignedData { - Data CV_DECL_ALIGNED(16) a; // aligned + Data CV_DECL_ALIGNED(CV_SIMD_WIDTH) a; // aligned char dummy; Data u; // unaligned }; @@ -207,22 +238,22 @@ template struct TheTest AlignedData out; // check if addresses are aligned and unaligned respectively - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); + EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH); + EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH); + EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH); + EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH); // check some initialization methods R r1 = data.a; - R r2 = v_load(data.u.d); - R r3 = v_load_aligned(data.a.d); + R r2 = vx_load(data.u.d); + R r3 = vx_load_aligned(data.a.d); R r4(r2); EXPECT_EQ(data.a[0], r1.get0()); EXPECT_EQ(data.u[0], r2.get0()); EXPECT_EQ(data.a[0], r3.get0()); EXPECT_EQ(data.u[0], r4.get0()); - R r_low = v_load_low((LaneType*)data.u.d); + R r_low = vx_load_low((LaneType*)data.u.d); EXPECT_EQ(data.u[0], r_low.get0()); v_store(out.u.d, r_low); for (int i = 0; i < R::nlanes/2; ++i) @@ -230,7 +261,7 @@ template struct TheTest EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]); } - R r_low_align8byte = v_load_low((LaneType*)((char*)data.u.d + 8)); + R r_low_align8byte = vx_load_low((LaneType*)((char*)data.u.d + (CV_SIMD_WIDTH / 2))); EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0()); v_store(out.u.d, r_low_align8byte); for (int i = 0; i < R::nlanes/2; ++i) @@ -255,7 +286,7 @@ template struct TheTest // check halves load correctness res.clear(); - R r6 = v_load_halves(d.d, d.mid()); + R r6 = vx_load_halves(d.d, d.mid()); v_store(res.d, r6); EXPECT_EQ(d, res); @@ -270,17 +301,17 @@ template struct TheTest } // reinterpret_as - v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a); - v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a); - v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a); - v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a); - v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a); - v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a); - v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a); - v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a); - v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a); -#if CV_SIMD128_64F - v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a); + v_uint8 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a); + v_int8 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a); + v_uint16 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a); + v_int16 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a); + v_uint32 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a); + v_int32 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a); + v_uint64 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a); + v_int64 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a); + v_float32 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a); +#if CV_SIMD_64F + v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a); #endif return *this; @@ -357,7 +388,7 @@ template struct TheTest Data dataA; R a = dataA; - Data resB = v_load_expand(dataA.d); + Data resB = vx_load_expand(dataA.d); Rx2 c, d; v_expand(a, c, d); @@ -378,7 +409,7 @@ template struct TheTest { typedef typename V_RegTraits::q_reg Rx4; Data data; - Data out = v_load_expand_q(data.d); + Data out = vx_load_expand_q(data.d); const int n = Rx4::nlanes; for (int i = 0; i < n; ++i) EXPECT_EQ(data[i], out[i]); @@ -610,7 +641,13 @@ template struct TheTest TheTest & test_popcount() { - static unsigned popcountTable[] = {0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33}; + static unsigned popcountTable[] = { + 0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33, + 35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81, + 83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123, + 128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172, + 176, 181, 186, 192, 193 + }; Data dataA; R a = dataA; @@ -918,7 +955,7 @@ template struct TheTest TheTest & test_float_cvt32() { - typedef v_float32x4 Rt; + typedef v_float32 Rt; Data dataA; dataA *= 1.1; R a = dataA; @@ -934,8 +971,8 @@ template struct TheTest TheTest & test_float_cvt64() { -#if CV_SIMD128_64F - typedef v_float64x2 Rt; +#if CV_SIMD_64F + typedef v_float64 Rt; Data dataA; dataA *= 1.1; R a = dataA; @@ -965,23 +1002,29 @@ template struct TheTest R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD; Data res = v_matmul(v, a, b, c, d); - for (int i = 0; i < R::nlanes; ++i) + for (int i = 0; i < R::nlanes; i += 4) { - LaneType val = dataV[0] * dataA[i] - + dataV[1] * dataB[i] - + dataV[2] * dataC[i] - + dataV[3] * dataD[i]; - EXPECT_DOUBLE_EQ(val, res[i]); + for (int j = i; j < i + 4; ++j) + { + LaneType val = dataV[i] * dataA[j] + + dataV[i + 1] * dataB[j] + + dataV[i + 2] * dataC[j] + + dataV[i + 3] * dataD[j]; + EXPECT_COMPARE_EQ(val, res[j]); + } } Data resAdd = v_matmuladd(v, a, b, c, d); - for (int i = 0; i < R::nlanes; ++i) + for (int i = 0; i < R::nlanes; i += 4) { - LaneType val = dataV[0] * dataA[i] - + dataV[1] * dataB[i] - + dataV[2] * dataC[i] - + dataD[i]; - EXPECT_DOUBLE_EQ(val, resAdd[i]); + for (int j = i; j < i + 4; ++j) + { + LaneType val = dataV[i] * dataA[j] + + dataV[i + 1] * dataB[j] + + dataV[i + 2] * dataC[j] + + dataD[j]; + EXPECT_COMPARE_EQ(val, resAdd[j]); + } } return *this; } @@ -998,30 +1041,36 @@ template struct TheTest e, f, g, h); Data res[4] = {e, f, g, h}; - for (int i = 0; i < R::nlanes; ++i) + for (int i = 0; i < R::nlanes; i += 4) { - EXPECT_EQ(dataA[i], res[i][0]); - EXPECT_EQ(dataB[i], res[i][1]); - EXPECT_EQ(dataC[i], res[i][2]); - EXPECT_EQ(dataD[i], res[i][3]); + for (int j = 0; j < 4; ++j) + { + EXPECT_EQ(dataA[i + j], res[j][i]); + EXPECT_EQ(dataB[i + j], res[j][i + 1]); + EXPECT_EQ(dataC[i + j], res[j][i + 2]); + EXPECT_EQ(dataD[i + j], res[j][i + 3]); + } } return *this; } TheTest & test_reduce_sum4() { - R a(0.1f, 0.02f, 0.003f, 0.0004f); - R b(1, 20, 300, 4000); - R c(10, 2, 0.3f, 0.04f); - R d(1, 2, 3, 4); - - R sum = v_reduce_sum4(a, b, c, d); - - Data res = sum; - EXPECT_EQ(0.1234f, res[0]); - EXPECT_EQ(4321.0f, res[1]); - EXPECT_EQ(12.34f, res[2]); - EXPECT_EQ(10.0f, res[3]); + Data dataA, dataB, dataC, dataD; + dataB *= 0.01f; + dataC *= 0.001f; + dataD *= 0.002f; + + R a = dataA, b = dataB, c = dataC, d = dataD; + Data res = v_reduce_sum4(a, b, c, d); + + for (int i = 0; i < R::nlanes; i += 4) + { + EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]); + EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]); + EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]); + EXPECT_COMPARE_EQ(dataD.sum(i, 4), res[i + 3]); + } return *this; } @@ -1032,14 +1081,14 @@ template struct TheTest AlignedData out; // check if addresses are aligned and unaligned respectively - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); + EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH); + EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH); + EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH); + EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH); // check some initialization methods R r1 = data.u; - R r2 = v_load_f16(data.a.d); + R r2 = vx_load_f16(data.a.d); R r3(r2); EXPECT_EQ(data.u[0], r1.get0()); EXPECT_EQ(data.a[0], r2.get0()); From 83039c8752961764a2f7ade885461bef307904de Mon Sep 17 00:00:00 2001 From: Kuang Fangjun Date: Mon, 30 Jul 2018 18:18:18 +0800 Subject: [PATCH 18/25] fix a typo. --- modules/imgproc/include/opencv2/imgproc.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index c760a54787..a831d0e92b 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1771,7 +1771,7 @@ Corners in the image can be found as the local maxima of this response map. size as src . @param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). @param ksize Aperture parameter for the Sobel operator. -@param k Harris detector free parameter. See the formula below. +@param k Harris detector free parameter. See the formula above. @param borderType Pixel extrapolation method. See #BorderTypes. */ CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize, From 2988260107bf596e4e0d4d1de39d324172feeca1 Mon Sep 17 00:00:00 2001 From: miaow1988 Date: Mon, 30 Jul 2018 17:04:15 +0800 Subject: [PATCH 19/25] Fixed the int size overflow bug of cv::Mat.push_back(). Changed the type of variable *r* from int to size_t. This change makes sure that a valid result of std::max(r + delta, (r*3+1)/2) can be passed into the reserve function. --- modules/core/src/matrix.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 65ac200463..2da6ca6399 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -602,13 +602,13 @@ void Mat::pop_back(size_t nelems) void Mat::push_back_(const void* elem) { - int r = size.p[0]; + size_t r = size.p[0]; if( isSubmatrix() || dataend + step.p[0] > datalimit ) reserve( std::max(r + 1, (r*3+1)/2) ); size_t esz = elemSize(); memcpy(data + r*step.p[0], elem, esz); - size.p[0] = r + 1; + size.p[0] = int(r + 1); dataend += step.p[0]; uint64 tsz = size.p[0]; for( int i = 1; i < dims; i++ ) @@ -709,7 +709,8 @@ void Mat::resize(size_t nelems, const Scalar& s) void Mat::push_back(const Mat& elems) { - int r = size.p[0], delta = elems.size.p[0]; + size_t r = size.p[0]; + size_t delta = elems.size.p[0]; if( delta == 0 ) return; if( this == &elems ) @@ -726,7 +727,7 @@ void Mat::push_back(const Mat& elems) size.p[0] = elems.size.p[0]; bool eq = size == elems.size; - size.p[0] = r; + size.p[0] = int(r); if( !eq ) CV_Error(CV_StsUnmatchedSizes, "Pushed vector length is not equal to matrix row length"); if( type() != elems.type() ) @@ -735,7 +736,7 @@ void Mat::push_back(const Mat& elems) if( isSubmatrix() || dataend + step.p[0]*delta > datalimit ) reserve( std::max(r + delta, (r*3+1)/2) ); - size.p[0] += delta; + size.p[0] += int(delta); dataend += step.p[0]*delta; //updateContinuityFlag(*this); @@ -744,7 +745,7 @@ void Mat::push_back(const Mat& elems) memcpy(data + r*step.p[0], elems.data, elems.total()*elems.elemSize()); else { - Mat part = rowRange(r, r + delta); + Mat part = rowRange(int(r), int(r + delta)); elems.copyTo(part); } } From e90e398e7a64f48757c3a0e365256aef69f80dd2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Jul 2018 14:09:50 +0300 Subject: [PATCH 20/25] core(ocl): do not split refcount operations / compare - check result from CV_XADD() directly - decrease urefcount after unmap() call only --- modules/core/src/umatrix.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index f61126b3d9..151c4ac9cb 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -84,14 +84,11 @@ UMatData::~UMatData() allocatorFlags_ = 0; if (originalUMatData) { - UMatData* u = originalUMatData; - CV_XADD(&(u->urefcount), -1); - CV_XADD(&(u->refcount), -1); bool showWarn = false; - if (u->refcount == 0) + UMatData* u = originalUMatData; + bool zero_Ref = CV_XADD(&(u->refcount), -1) == 1; + if (zero_Ref) { - if (u->urefcount > 0) - showWarn = true; // simulate Mat::deallocate if (u->mapcount != 0) { @@ -102,7 +99,10 @@ UMatData::~UMatData() // we don't do "map", so we can't do "unmap" } } - if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources + bool zero_URef = CV_XADD(&(u->urefcount), -1) == 1; + if (zero_Ref && !zero_URef) + showWarn = true; + if (zero_Ref && zero_URef) // oops, we need to free resources { showWarn = true; // simulate UMat::deallocate From db8585701d1f42b48b84743f86e26d6df72eb733 Mon Sep 17 00:00:00 2001 From: Suleyman TURKMEN Date: Sun, 22 Jul 2018 13:32:43 +0300 Subject: [PATCH 21/25] Update create_mask.cpp --- samples/cpp/create_mask.cpp | 126 ++++++++++++------------------------ 1 file changed, 42 insertions(+), 84 deletions(-) diff --git a/samples/cpp/create_mask.cpp b/samples/cpp/create_mask.cpp index b925cacba7..b90a7b70f2 100644 --- a/samples/cpp/create_mask.cpp +++ b/samples/cpp/create_mask.cpp @@ -12,26 +12,18 @@ #include "opencv2/imgproc.hpp" #include "opencv2/imgcodecs.hpp" #include "opencv2/highgui.hpp" -#include "opencv2/core.hpp" #include -#include using namespace std; using namespace cv; -Mat img0, img1, res1, final; +Mat src, img1, mask, final; Point point; +vector pts; int drag = 0; - -int numpts = 100; -Point* pts = new Point[100]; - int var = 0; int flag = 0; -int flag1 = 0; - -int minx,miny,maxx,maxy,lenx,leny; void mouseHandler(int, int, int, int, void*); @@ -40,16 +32,17 @@ void mouseHandler(int event, int x, int y, int, void*) if (event == EVENT_LBUTTONDOWN && !drag) { - if(flag1 == 0) + if (flag == 0) { - if(var==0) - img1 = img0.clone(); + if (var == 0) + img1 = src.clone(); point = Point(x, y); - circle(img1,point,2,Scalar(0, 0, 255),-1, 8, 0); - pts[var] = point; + circle(img1, point, 2, Scalar(0, 0, 255), -1, 8, 0); + pts.push_back(point); var++; drag = 1; - if(var>1) + + if (var > 1) line(img1,pts[var-2], point, Scalar(0, 0, 255), 2, 8, 0); imshow("Source", img1); @@ -59,103 +52,68 @@ void mouseHandler(int event, int x, int y, int, void*) if (event == EVENT_LBUTTONUP && drag) { imshow("Source", img1); - drag = 0; } + if (event == EVENT_RBUTTONDOWN) { - flag1 = 1; - img1 = img0.clone(); - for(int i = var; i < numpts ; i++) - pts[i] = point; + flag = 1; + img1 = src.clone(); - if(var!=0) + if (var != 0) { - const Point* pts3[1] = {&pts[0]}; - polylines( img1, pts3, &numpts,1, 1, Scalar(0,0,0), 2, 8, 0); + polylines( img1, pts, 1, Scalar(0,0,0), 2, 8, 0); } - for(int i=0;i > vpts; + vpts.push_back(pts); + fillPoly(mask, vpts, Scalar(255, 255, 255), 8, 0); + bitwise_and(src, src, final, mask); + imshow("Mask", mask); + imshow("Result", final); imshow("Source", img1); - } + if (event == EVENT_MBUTTONDOWN) { - for(int i = 0; i < numpts ; i++) - { - pts[i].x=0; - pts[i].y=0; - } + pts.clear(); var = 0; - flag1 = 0; - minx = INT_MAX; miny = INT_MAX; maxx = INT_MIN; maxy = INT_MIN; - imshow("Source", img0); drag = 0; + flag = 0; + imshow("Source", src); } } -static void help() -{ - cout << "\nThis program demonstrates using mouse events" - "\nCall:\n" - "./create_mask \n" - "\n" - "\tleft mouse button - set a point to create mask shape" - "\n" - "\tright mouse button - create mask from points\n" - "\tmiddle mouse button - reset\n" << endl; -} - int main(int argc, char **argv) { - cv::CommandLineParser parser(argc, argv, "{@input | ../data/lena.jpg | input image}"); - help(); - string input_image = parser.get("@input"); - if (input_image.empty()) - { - parser.printMessage(); - parser.printErrors(); - return 0; - } - - Mat src = imread(input_image); - - minx = INT_MAX; miny = INT_MAX; maxx = INT_MIN; maxy = INT_MIN; + CommandLineParser parser(argc, argv, "{@input | ../data/lena.jpg | input image}"); + parser.about("This program demonstrates using mouse events\n"); + parser.printMessage(); + cout << "\n\tleft mouse button - set a point to create mask shape\n" + "\tright mouse button - create mask from points\n" + "\tmiddle mouse button - reset\n"; + String input_image = parser.get("@input"); - img0 = src; + src = imread(input_image); - res1 = Mat::zeros(img0.size(),CV_8UC1); - final = Mat::zeros(img0.size(),CV_8UC3); - //////////// source image /////////////////// + if (src.empty()) + { + printf("Error opening image: %s\n", input_image.c_str()); + return 0; + } - namedWindow("Source", 1); + namedWindow("Source", WINDOW_AUTOSIZE); setMouseCallback("Source", mouseHandler, NULL); - imshow("Source", img0); + imshow("Source", src); waitKey(0); return 0; From bb82cdc9281d040b1a54ba654e60d2f8e47c746d Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Tue, 31 Jul 2018 09:47:11 +0200 Subject: [PATCH 22/25] core:test Fix fp16 build if AVX2 sets as baseline --- modules/core/test/test_intrin.fp16.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/test/test_intrin.fp16.cpp b/modules/core/test/test_intrin.fp16.cpp index 893c5f147a..9f6416bcf8 100644 --- a/modules/core/test/test_intrin.fp16.cpp +++ b/modules/core/test/test_intrin.fp16.cpp @@ -9,7 +9,7 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN void test_hal_intrin_float16() { - TheTest() + TheTest() .test_loadstore_fp16() .test_float_cvt_fp16() ; From ed0e79cb615e53122cb7e6c0512caf12d1083929 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 31 Jul 2018 11:37:45 +0300 Subject: [PATCH 23/25] Add missing parameter to DetectionOutput layer from Intel's Inference Engine --- modules/dnn/src/layers/detection_output_layer.cpp | 1 + modules/dnn/test/test_backends.cpp | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index fdcaab02e3..7473751707 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -919,6 +919,7 @@ public: ieLayer->params["nms_threshold"] = format("%f", _nmsThreshold); ieLayer->params["top_k"] = format("%d", _topK); ieLayer->params["keep_top_k"] = format("%d", _keepTopK); + ieLayer->params["eta"] = "1.0"; ieLayer->params["confidence_threshold"] = format("%f", _confidenceThreshold); ieLayer->params["variance_encoded_in_target"] = _varianceEncodedInTarget ? "1" : "0"; ieLayer->params["code_type"] = "caffe.PriorBoxParameter." + _codeType; diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 49e1a2a983..60beca272b 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -175,7 +175,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow) Mat sample = imread(findDataFile("dnn/street.png", false)); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0; + float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0; processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt", inp, "detection_out", "", l1, lInf, 0.25); } @@ -233,11 +233,8 @@ TEST_P(DNNTestNetwork, opencv_face_detector) { if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); - Size inpSize; - if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) - inpSize = Size(300, 300); Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false)); - Mat inp = blobFromImage(img, 1.0, inpSize, Scalar(104.0, 177.0, 123.0), false, false); + Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", inp, "detection_out"); } @@ -249,7 +246,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) Mat sample = imread(findDataFile("dnn/street.png", false)); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.008 : 0.0; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.07 : 0.0; + float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0; processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", inp, "detection_out", "", l1, lInf); } From 7cf52de47efae7d7b698e4d6e2a5ea0b78e22792 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 30 Jul 2018 18:21:17 +0300 Subject: [PATCH 24/25] dnn: modified IE search, R2 compatibility fixed --- CMakeLists.txt | 18 ++-- cmake/OpenCVDetectInferenceEngine.cmake | 130 ++++++++++++------------ modules/dnn/CMakeLists.txt | 16 +-- modules/dnn/src/op_inf_engine.cpp | 2 + modules/dnn/src/op_inf_engine.hpp | 14 ++- 5 files changed, 95 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 898a837b77..f436fc2cea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1407,15 +1407,19 @@ if(WITH_HALIDE OR HAVE_HALIDE) status(" Halide:" HAVE_HALIDE THEN "YES (${HALIDE_LIBRARIES} ${HALIDE_INCLUDE_DIRS})" ELSE NO) endif() -if(WITH_INF_ENGINE OR HAVE_INF_ENGINE) - if(HAVE_INF_ENGINE) - set(__msg "YES") - if(DEFINED INF_ENGINE_VERSION) - set(__msg "YES (ver ${INF_ENGINE_VERSION})") +if(WITH_INF_ENGINE OR INF_ENGINE_TARGET) + if(INF_ENGINE_TARGET) + set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})") + get_target_property(_lib ${INF_ENGINE_TARGET} IMPORTED_LOCATION) + if(NOT _lib) + get_target_property(_lib_rel ${INF_ENGINE_TARGET} IMPORTED_IMPLIB_RELEASE) + get_target_property(_lib_dbg ${INF_ENGINE_TARGET} IMPORTED_IMPLIB_DEBUG) + set(_lib "${_lib_rel} / ${_lib_dbg}") endif() + get_target_property(_inc ${INF_ENGINE_TARGET} INTERFACE_INCLUDE_DIRECTORIES) status(" Inference Engine:" "${__msg}") - status(" libs:" "${INF_ENGINE_LIBRARIES}") - status(" includes:" "${INF_ENGINE_INCLUDE_DIRS}") + status(" libs:" "${_lib}") + status(" includes:" "${_inc}") else() status(" Inference Engine:" "NO") endif() diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index 366509ed39..e5e64fc6db 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -1,83 +1,87 @@ # The script detects Intel(R) Inference Engine installation # -# Parameters: -# INTEL_CVSDK_DIR - Path to Inference Engine root folder -# IE_PLUGINS_PATH - Path to folder with Inference Engine plugins +# Cache variables: +# INF_ENGINE_OMP_DIR - directory with OpenMP library to link with (needed by some versions of IE) +# INF_ENGINE_RELEASE - a number reflecting IE source interface (linked with OpenVINO release) # -# On return this will define: +# Detect parameters: +# 1. Native cmake IE package: +# - enironment variable InferenceEngine_DIR is set to location of cmake module +# 2. Custom location: +# - INF_ENGINE_INCLUDE_DIRS - headers search location +# - INF_ENGINE_LIB_DIRS - library search location +# 3. OpenVINO location: +# - environment variable INTEL_CVSDK_DIR is set to location of OpenVINO installation dir +# - INF_ENGINE_PLATFORM - part of name of library directory representing its platform (default ubuntu_16.04) # -# HAVE_INF_ENGINE - True if Intel Inference Engine was found -# INF_ENGINE_INCLUDE_DIRS - Inference Engine include folder -# INF_ENGINE_LIBRARIES - Inference Engine libraries and it's dependencies +# Result: +# INF_ENGINE_TARGET - set to name of imported library target representing InferenceEngine # -macro(ie_fail) - set(HAVE_INF_ENGINE FALSE) - return() -endmacro() if(NOT HAVE_CXX11) message(WARNING "DL Inference engine requires C++11. You can turn it on via ENABLE_CXX11=ON CMake flag.") - ie_fail() -endif() - -find_package(InferenceEngine QUIET) -if(InferenceEngine_FOUND) - set(INF_ENGINE_LIBRARIES "${InferenceEngine_LIBRARIES}") - set(INF_ENGINE_INCLUDE_DIRS "${InferenceEngine_INCLUDE_DIRS}") - set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}") - set(HAVE_INF_ENGINE TRUE) - return() + return() endif() -ocv_check_environment_variables(INTEL_CVSDK_DIR INF_ENGINE_ROOT_DIR IE_PLUGINS_PATH) +# ======================= -if(NOT INF_ENGINE_ROOT_DIR OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp") - set(ie_root_paths "${INF_ENGINE_ROOT_DIR}") - if(DEFINED INTEL_CVSDK_DIR) - list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/") - list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/deployment_tools/inference_engine") - endif() +function(add_custom_ie_build _inc _lib _lib_rel _lib_dbg _msg) + if(NOT _inc OR NOT (_lib OR _lib_rel OR _lib_dbg)) + return() + endif() + add_library(inference_engine UNKNOWN IMPORTED) + set_target_properties(inference_engine PROPERTIES + IMPORTED_LOCATION "${_lib}" + IMPORTED_IMPLIB_RELEASE "${_lib_rel}" + IMPORTED_IMPLIB_DEBUG "${_lib_dbg}" + INTERFACE_INCLUDE_DIRECTORIES "${_inc}" + ) + find_library(omp_lib iomp5 PATHS "${INF_ENGINE_OMP_DIR}" NO_DEFAULT_PATH) + if(NOT omp_lib) + message(WARNING "OpenMP for IE have not been found. Set INF_ENGINE_OMP_DIR variable if you experience build errors.") + else() + set_target_properties(inference_engine PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${omp_lib}") + endif() + set(INF_ENGINE_VERSION "Unknown" CACHE STRING "") + set(INF_ENGINE_TARGET inference_engine PARENT_SCOPE) + message(STATUS "Detected InferenceEngine: ${_msg}") +endfunction() - if(NOT ie_root_paths) - list(APPEND ie_root_paths "/opt/intel/computer_vision_sdk/deployment_tools/inference_engine/") - endif() +# ====================== - find_path(INF_ENGINE_ROOT_DIR include/inference_engine.hpp PATHS ${ie_root_paths}) - if(INF_ENGINE_ROOT_DIR MATCHES "-NOTFOUND$") - unset(INF_ENGINE_ROOT_DIR CACHE) - endif() +find_package(InferenceEngine QUIET) +if(InferenceEngine_FOUND) + set(INF_ENGINE_TARGET IE::inference_engine) + set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}" CACHE STRING "") + message(STATUS "Detected InferenceEngine: cmake package") endif() -set(INF_ENGINE_INCLUDE_DIRS "${INF_ENGINE_ROOT_DIR}/include" CACHE PATH "Path to Inference Engine include directory") - -if(NOT INF_ENGINE_ROOT_DIR - OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}" - OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp" -) - message(WARNING "DL IE: Can't detect INF_ENGINE_ROOT_DIR location.") - ie_fail() +if(NOT INF_ENGINE_TARGET AND INF_ENGINE_LIB_DIRS AND INF_ENGINE_INCLUDE_DIRS) + find_path(ie_custom_inc "inference_engine.hpp" PATHS "${INF_ENGINE_INCLUDE_DIRS}" NO_DEFAULT_PATH) + find_library(ie_custom_lib "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH) + find_library(ie_custom_lib_rel "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Release" NO_DEFAULT_PATH) + find_library(ie_custom_lib_dbg "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Debug" NO_DEFAULT_PATH) + add_custom_ie_build("${ie_custom_inc}" "${ie_custom_lib}" "${ie_custom_lib_rel}" "${ie_custom_lib_dbg}" "INF_ENGINE_{INCLUDE,LIB}_DIRS") endif() -set(INF_ENGINE_LIBRARIES "") - -set(ie_lib_list inference_engine) - -if(NOT IS_ABSOLUTE "${IE_PLUGINS_PATH}") - set(IE_PLUGINS_PATH "${INF_ENGINE_ROOT_DIR}/${IE_PLUGINS_PATH}") +set(_loc "$ENV{INTEL_CVSDK_DIR}") +if(NOT INF_ENGINE_TARGET AND _loc) + set(INF_ENGINE_PLATFORM "ubuntu_16.04" CACHE STRING "InferenceEngine platform (library dir)") + find_path(ie_custom_env_inc "inference_engine.hpp" PATHS "${_loc}/deployment_tools/inference_engine/include" NO_DEFAULT_PATH) + find_library(ie_custom_env_lib "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH) + find_library(ie_custom_env_lib_rel "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Release" NO_DEFAULT_PATH) + find_library(ie_custom_env_lib_dbg "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Debug" NO_DEFAULT_PATH) + add_custom_ie_build("${ie_custom_env_inc}" "${ie_custom_env_lib}" "${ie_custom_env_lib_rel}" "${ie_custom_env_lib_dbg}" "OpenVINO (${_loc})") endif() -link_directories( - ${INF_ENGINE_ROOT_DIR}/external/mkltiny_lnx/lib - ${INF_ENGINE_ROOT_DIR}/external/cldnn/lib -) - -foreach(lib ${ie_lib_list}) - find_library(${lib} NAMES ${lib} HINTS ${IE_PLUGINS_PATH}) - if(NOT ${lib}) - message(WARNING "DL IE: Can't find library: '${lib}'") - ie_fail() - endif() - list(APPEND INF_ENGINE_LIBRARIES ${${lib}}) -endforeach() +# Add more features to the target -set(HAVE_INF_ENGINE TRUE) +if(INF_ENGINE_TARGET) + if(NOT INF_ENGINE_RELEASE) + message(WARNING "InferenceEngine version have not been set, 2018R2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + endif() + set(INF_ENGINE_RELEASE "2018020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)") + set_target_properties(${INF_ENGINE_TARGET} PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" + ) +endif() diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index a4cdc18cf7..64fefb3509 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -85,12 +85,6 @@ else() set(sources_options EXCLUDE_OPENCL) endif() -if(HAVE_INF_ENGINE) - add_definitions(-DHAVE_INF_ENGINE=1) - list(APPEND include_dirs ${INF_ENGINE_INCLUDE_DIRS}) - list(APPEND libs ${INF_ENGINE_LIBRARIES}) -endif() - ocv_module_include_directories(${include_dirs}) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC @@ -98,9 +92,9 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang endif() ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs}) -ocv_create_module(${libs}) +ocv_create_module(${libs} ${INF_ENGINE_TARGET}) ocv_add_samples() -ocv_add_accuracy_tests() +ocv_add_accuracy_tests(${INF_ENGINE_TARGET}) ocv_add_perf_tests() ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF) @@ -120,9 +114,3 @@ if(BUILD_PERF_TESTS) endif() endif() endif() - -# Test Intel's Inference Engine models -if(HAVE_INF_ENGINE AND TARGET opencv_test_dnn) - ocv_target_include_directories(opencv_test_dnn PRIVATE ${INF_ENGINE_INCLUDE_DIRS}) - ocv_target_link_libraries(opencv_test_dnn LINK_PRIVATE ${INF_ENGINE_LIBRARIES}) -endif() diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index eb409eebbc..7d94b9d54a 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -334,6 +334,7 @@ size_t InfEngineBackendNet::getBatchSize() const noexcept return 0; } +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R2) InferenceEngine::StatusCode InfEngineBackendNet::AddExtension(const InferenceEngine::IShapeInferExtensionPtr &extension, InferenceEngine::ResponseDesc *resp) noexcept { CV_Error(Error::StsNotImplemented, ""); @@ -345,6 +346,7 @@ InferenceEngine::StatusCode InfEngineBackendNet::reshape(const InferenceEngine:: CV_Error(Error::StsNotImplemented, ""); return InferenceEngine::StatusCode::OK; } +#endif void InfEngineBackendNet::init(int targetId) { diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index a5ad63fb5f..a811f4eae7 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -21,6 +21,17 @@ #if defined(__GNUC__) && __GNUC__ >= 5 //#pragma GCC diagnostic pop #endif + +#define INF_ENGINE_RELEASE_2018R1 2018010000 +#define INF_ENGINE_RELEASE_2018R2 2018020000 + +#ifndef INF_ENGINE_RELEASE +#warning("IE version have not been provided via command-line. Using 2018R2 by default") +#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2018R2 +#endif + +#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) + #endif // HAVE_INF_ENGINE namespace cv { namespace dnn { @@ -92,9 +103,10 @@ public: virtual size_t getBatchSize() const noexcept CV_OVERRIDE; +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R2) virtual InferenceEngine::StatusCode AddExtension(const InferenceEngine::IShapeInferExtensionPtr& extension, InferenceEngine::ResponseDesc* resp) noexcept; - virtual InferenceEngine::StatusCode reshape(const InputShapes& inputShapes, InferenceEngine::ResponseDesc* resp) noexcept; +#endif void init(int targetId); From 5aceee5a36c28f4e2710329d9adb2a156191d0db Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 31 Jul 2018 19:34:19 +0300 Subject: [PATCH 25/25] Restored tests dependencies processing --- cmake/OpenCVModule.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index db439b3981..00d15dc6d9 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -1132,7 +1132,7 @@ function(ocv_add_perf_tests) source_group("Src" FILES "${${the_target}_pch}") ocv_add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch}) ocv_target_include_modules(${the_target} ${perf_deps} "${perf_path}") - ocv_target_link_libraries(${the_target} LINK_PRIVATE ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS}) + ocv_target_link_libraries(${the_target} LINK_PRIVATE ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_PERF_${the_module}_DEPS}) add_dependencies(opencv_perf_tests ${the_target}) set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};PerfTest") @@ -1175,7 +1175,7 @@ function(ocv_add_perf_tests) endfunction() # this is a command for adding OpenCV accuracy/regression tests to the module -# ocv_add_accuracy_tests([FILES ] [DEPENDS_ON] ) +# ocv_add_accuracy_tests() function(ocv_add_accuracy_tests) ocv_debug_message("ocv_add_accuracy_tests(" ${ARGN} ")") @@ -1211,7 +1211,7 @@ function(ocv_add_accuracy_tests) source_group("Src" FILES "${${the_target}_pch}") ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch}) ocv_target_include_modules(${the_target} ${test_deps} "${test_path}") - ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS}) + ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_TEST_${the_module}_DEPS}) add_dependencies(opencv_tests ${the_target}) set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")