diff --git a/3rdparty/openjpeg/CMakeLists.txt b/3rdparty/openjpeg/CMakeLists.txt index 3a7ffaf002..d3db9e8c47 100644 --- a/3rdparty/openjpeg/CMakeLists.txt +++ b/3rdparty/openjpeg/CMakeLists.txt @@ -15,6 +15,7 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wimplicit-const-int-float-conversion # clang -Wunused-but-set-variable # clang15 -Wmissing-prototypes # clang, function opj_t1_ht_decode_cblk + -Wmissing-declarations # gcc, function opj_t1_ht_decode_cblk ) #----------------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 2deb0a2ad2..76eb4c493a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,6 +241,8 @@ OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O (iOS/Mac)" ON VISIBLE_IF APPLE VERIFY HAVE_AVFOUNDATION) +OCV_OPTION(WITH_AVIF "Enable AVIF support" OFF + VERIFY HAVE_AVIF) OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON VISIBLE_IF IOS VERIFY HAVE_CAP_IOS) @@ -1390,6 +1392,14 @@ if(WITH_WEBP OR HAVE_WEBP) status(" WEBP:" WEBP_FOUND THEN "${WEBP_LIBRARY} (ver ${WEBP_VERSION})" ELSE "build (ver ${WEBP_VERSION})") endif() +if(WITH_AVIF OR HAVE_AVIF) + if(AVIF_VERSION) + status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY} (ver ${AVIF_VERSION})" ELSE "NO") + else() + status(" AVIF:" AVIF_FOUND THEN "${AVIF_LIBRARY}" ELSE "NO") + endif() +endif() + if(WITH_PNG OR HAVE_PNG OR WITH_SPNG) if(WITH_SPNG) status(" PNG:" "build-${SPNG_LIBRARY} (ver ${SPNG_VERSION})") diff --git a/cmake/OpenCVFindAVIF.cmake b/cmake/OpenCVFindAVIF.cmake new file mode 100644 index 0000000000..26195a7769 --- /dev/null +++ b/cmake/OpenCVFindAVIF.cmake @@ -0,0 +1,46 @@ +#============================================================================= +# Find AVIF library +#============================================================================= +# Find the native AVIF headers and libraries. +# +# AVIF_INCLUDE_DIRS - where to find avif/avif.h, etc. +# AVIF_LIBRARIES - List of libraries when using AVIF. +# AVIF_FOUND - True if AVIF is found. +#============================================================================= + +# Look for the header file. + +unset(AVIF_FOUND) + +find_package(libavif QUIET) + +if(TARGET avif) + MARK_AS_ADVANCED(AVIF_INCLUDE_DIR) + MARK_AS_ADVANCED(AVIF_LIBRARY) + + SET(AVIF_FOUND TRUE) + GET_TARGET_PROPERTY(AVIF_LIBRARY avif LOCATION) + GET_TARGET_PROPERTY(AVIF_INCLUDE_DIR1 avif INCLUDE_DIRECTORIES) + GET_TARGET_PROPERTY(AVIF_INCLUDE_DIR2 avif INTERFACE_INCLUDE_DIRECTORIES) + set(AVIF_INCLUDE_DIR) + if(AVIF_INCLUDE_DIR1) + LIST(APPEND AVIF_INCLUDE_DIR ${AVIF_INCLUDE_DIR1}) + endif() + if(AVIF_INCLUDE_DIR2) + LIST(APPEND AVIF_INCLUDE_DIR ${AVIF_INCLUDE_DIR2}) + endif() +else() + FIND_PATH(AVIF_INCLUDE_DIR NAMES avif/avif.h) + + # Look for the library. + FIND_LIBRARY(AVIF_LIBRARY NAMES avif) + MARK_AS_ADVANCED(AVIF_LIBRARY) + + # handle the QUIETLY and REQUIRED arguments and set AVIF_FOUND to TRUE if + # all listed variables are TRUE + INCLUDE(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(AVIF DEFAULT_MSG AVIF_LIBRARY AVIF_INCLUDE_DIR) + + SET(AVIF_LIBRARIES ${AVIF_LIBRARY}) + SET(AVIF_INCLUDE_DIRS ${AVIF_INCLUDE_DIR}) +endif() diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 4e8a1de17a..e544f78eaa 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -37,6 +37,16 @@ if(NOT ZLIB_FOUND) ocv_parse_header2(ZLIB "${${ZLIB_LIBRARY}_SOURCE_DIR}/zlib.h" ZLIB_VERSION) endif() +# --- libavif (optional) --- + +if(WITH_AVIF) + ocv_clear_internal_cache_vars(AVIF_LIBRARY AVIF_INCLUDE_DIR) + include(cmake/OpenCVFindAVIF.cmake) + if(AVIF_FOUND) + set(HAVE_AVIF 1) + endif() +endif() + # --- libjpeg (optional) --- if(WITH_JPEG) if(BUILD_JPEG) diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index db2d8792d6..b1e27ce59a 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -72,6 +72,9 @@ #cmakedefine HAVE_OPENJPEG #cmakedefine HAVE_JASPER +/* AVIF codec */ +#cmakedefine HAVE_AVIF + /* IJG JPEG codec */ #cmakedefine HAVE_JPEG diff --git a/cmake/templates/opencv_run_all_tests_unix.sh.in b/cmake/templates/opencv_run_all_tests_unix.sh.in index f92d7c6135..2375d1ffb1 100644 --- a/cmake/templates/opencv_run_all_tests_unix.sh.in +++ b/cmake/templates/opencv_run_all_tests_unix.sh.in @@ -18,7 +18,7 @@ EOF # Parse options COLOR_OUTPUT=0 -while getopts “hc” OPTION +while getopts "hc" OPTION do case $OPTION in h) diff --git a/doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown b/doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown index 1d7b3e3911..29e385c64d 100644 --- a/doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown +++ b/doc/py_tutorials/py_feature2d/py_fast/py_fast.markdown @@ -133,7 +133,7 @@ nonmaxSuppression: Additional Resources -------------------- --# Edward Rosten and Tom Drummond, “Machine learning for high speed corner detection” in 9th +-# Edward Rosten and Tom Drummond, "Machine learning for high speed corner detection" in 9th European Conference on Computer Vision, vol. 1, 2006, pp. 430–443. 2. Edward Rosten, Reid Porter, and Tom Drummond, "Faster and better: a machine learning approach to corner detection" in IEEE Trans. Pattern Analysis and Machine Intelligence, 2010, vol 32, pp. diff --git a/doc/tutorials/others/barcode_detect_and_decode.markdown b/doc/tutorials/others/barcode_detect_and_decode.markdown new file mode 100644 index 0000000000..edfe9b8c10 --- /dev/null +++ b/doc/tutorials/others/barcode_detect_and_decode.markdown @@ -0,0 +1,76 @@ +Barcode Recognition {#tutorial_barcode_detect_and_decode} +=================== + +@tableofcontents + +@prev_tutorial{tutorial_traincascade} +@next_tutorial{tutorial_introduction_to_svm} + +| | | +| -: | :- | +| Compatibility | OpenCV >= 4.8 | + +Goal +---- + +In this chapter we will familiarize with the barcode detection and decoding methods available in OpenCV. + +Basics +---- + +Barcode is major technique to identify commodity in real life. A common barcode is a pattern of parallel lines arranged by black bars and white bars with vastly different reflectivity. Barcode recognition is to scan the barcode in the horizontal direction to get a string of binary codes composed of bars of different widths and colors, that is, the code information of the barcode. The content of barcode can be decoded by matching with various barcode encoding methods. Currently, we support EAN-8, EAN-13, UPC-A and UPC-E standards. + +See https://en.wikipedia.org/wiki/Universal_Product_Code and https://en.wikipedia.org/wiki/International_Article_Number + +Related papers: @cite Xiangmin2015research , @cite kass1987analyzing , @cite bazen2002systematic + +Code example +------------ + +### Main class +Several algorithms were introduced for barcode recognition. + +While coding, we firstly need to create a cv::barcode::BarcodeDetector object. It has mainly three member functions, which will be introduced in the following. + +#### Initialization + +Optionally user can construct barcode detector with super resolution model which should be downloaded from https://github.com/WeChatCV/opencv_3rdparty/tree/wechat_qrcode (`sr.caffemodel`, `sr.prototxt`). + +@snippet cpp/barcode.cpp initialize + +We need to create variables to store the outputs. + +@snippet cpp/barcode.cpp output + +#### Detecting + +cv::barcode::BarcodeDetector::detect method uses an algorithm based on directional coherence. First, we compute the average squared gradients of every pixel, @cite bazen2002systematic . Then we divide an image into square patches and compute the **gradient orientation coherence** and **mean gradient direction** of each patch. Then, we connect all patches that have **high gradient orientation coherence** and **similar gradient direction**. At this stage we use multiscale patches to capture the gradient distribution of multi-size barcodes, and apply non-maximum suppression to filter duplicate proposals. At last, we use cv::minAreaRect to bound the ROI, and output the corners of the rectangles. + +Detect codes in the input image, and output the corners of detected rectangles: + +@snippet cpp/barcode.cpp detect + +#### Decoding + +cv::barcode::BarcodeDetector::decode method first super-scales the image (_optionally_) if it is smaller than threshold, sharpens the image and then binaries it by OTSU or local binarization. Then it reads the contents of the barcode by matching the similarity of the specified barcode pattern. + +#### Detecting and decoding + +cv::barcode::BarcodeDetector::detectAndDecode combines `detect` and `decode` in a single call. A simple example below shows how to use this function: + +@snippet cpp/barcode.cpp detectAndDecode + +Visualize the results: + +@snippet cpp/barcode.cpp visualize + +Results +------- + +Original image: + +![image](images/barcode_book.jpg) + +After detection: + +![image](images/barcode_book_res.jpg) diff --git a/doc/tutorials/others/images/barcode_book.jpg b/doc/tutorials/others/images/barcode_book.jpg new file mode 100644 index 0000000000..5b467d58ea Binary files /dev/null and b/doc/tutorials/others/images/barcode_book.jpg differ diff --git a/doc/tutorials/others/images/barcode_book_res.jpg b/doc/tutorials/others/images/barcode_book_res.jpg new file mode 100644 index 0000000000..b672f48727 Binary files /dev/null and b/doc/tutorials/others/images/barcode_book_res.jpg differ diff --git a/doc/tutorials/others/introduction_to_svm.markdown b/doc/tutorials/others/introduction_to_svm.markdown index 6899a7acb5..11c9fbaf78 100644 --- a/doc/tutorials/others/introduction_to_svm.markdown +++ b/doc/tutorials/others/introduction_to_svm.markdown @@ -3,7 +3,7 @@ Introduction to Support Vector Machines {#tutorial_introduction_to_svm} @tableofcontents -@prev_tutorial{tutorial_traincascade} +@prev_tutorial{tutorial_barcode_detect_and_decode} @next_tutorial{tutorial_non_linear_svms} | | | diff --git a/doc/tutorials/others/table_of_content_other.markdown b/doc/tutorials/others/table_of_content_other.markdown index a004df63e2..b4bbf62777 100644 --- a/doc/tutorials/others/table_of_content_other.markdown +++ b/doc/tutorials/others/table_of_content_other.markdown @@ -8,6 +8,7 @@ Other tutorials (ml, objdetect, photo, stitching, video) {#tutorial_table_of_con - video. @subpage tutorial_optical_flow - objdetect. @subpage tutorial_cascade_classifier - objdetect. @subpage tutorial_traincascade +- objdetect. @subpage tutorial_barcode_detect_and_decode - ml. @subpage tutorial_introduction_to_svm - ml. @subpage tutorial_non_linear_svms - ml. @subpage tutorial_introduction_to_pca diff --git a/doc/tutorials/others/traincascade.markdown b/doc/tutorials/others/traincascade.markdown index e7c3018187..03d93d5b94 100644 --- a/doc/tutorials/others/traincascade.markdown +++ b/doc/tutorials/others/traincascade.markdown @@ -4,7 +4,7 @@ Cascade Classifier Training {#tutorial_traincascade} @tableofcontents @prev_tutorial{tutorial_cascade_classifier} -@next_tutorial{tutorial_introduction_to_svm} +@next_tutorial{tutorial_barcode_detect_and_decode} Introduction ------------ diff --git a/modules/3d/src/usac/quality.cpp b/modules/3d/src/usac/quality.cpp index 7afd3324cb..05e5c797c6 100644 --- a/modules/3d/src/usac/quality.cpp +++ b/modules/3d/src/usac/quality.cpp @@ -410,9 +410,9 @@ public: * 1. Check whether j-th data point is consistent with the * model * 2. Compute the likelihood ratio λj eq. (1) - * 3. If λj > A, decide the model is ’bad’ (model ”re-jected”), + * 3. If λj > A, decide the model is ’bad’ (model "re-jected"), * else increment j or continue testing - * 4. If j = N the number of correspondences decide model ”accepted” + * 4. If j = N the number of correspondences decide model "accepted" * * Verifies model and returns model score. diff --git a/modules/3d/test/test_odometry.cpp b/modules/3d/test/test_odometry.cpp index 3e4638e57e..a8bd0ed63c 100644 --- a/modules/3d/test/test_odometry.cpp +++ b/modules/3d/test/test_odometry.cpp @@ -420,7 +420,7 @@ void OdometryTest::prepareFrameCheck() odf.getPyramidAt(normi, OdometryFramePyramidType::PYR_NORM, i); ASSERT_FALSE(normi.empty()); double nnorm = cv::norm(normi, gtNormal, NORM_INF, normmaski); - EXPECT_LE(nnorm, 1.8e-7) << "Normals diff is too big at pyr level " << i; + EXPECT_LE(nnorm, 3.3e-7) << "Normals diff is too big at pyr level " << i; if (i == 0) { diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 517b0f31a5..1b3f574275 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -6,6 +6,7 @@ ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3) ocv_add_dispatched_file(convert SSE2 AVX2 VSX3) ocv_add_dispatched_file(convert_scale SSE2 AVX2) ocv_add_dispatched_file(count_non_zero SSE2 AVX2) +ocv_add_dispatched_file(has_non_zero SSE2 AVX2) ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD) ocv_add_dispatched_file(mean SSE2 AVX2) ocv_add_dispatched_file(merge SSE2 AVX2) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 2262ca1955..96cf00a50d 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -572,6 +572,14 @@ independently for each channel. */ CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src); +/** @brief Checks for the presence of at least one non-zero array element. + +The function returns whether there are non-zero elements in src +@param src single-channel array. +@sa mean, meanStdDev, norm, minMaxLoc, calcCovarMatrix +*/ +CV_EXPORTS_W bool hasNonZero( InputArray src ); + /** @brief Counts non-zero array elements. The function returns the number of non-zero elements in src : diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 526bc4e874..8d1e7a6288 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -460,6 +460,30 @@ OCL_PERF_TEST_P(CountNonZeroFixture, CountNonZero, SANITY_CHECK(result); } +///////////// countNonZero //////////////////////// + +typedef Size_MatType HasNonZeroFixture; + +OCL_PERF_TEST_P(HasNonZeroFixture, HasNonZero, + ::testing::Combine(OCL_TEST_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_32FC1))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src(srcSize, type); + /*bool result = false;*/ + randu(src, 0, 10); + declare.in(src); + + OCL_TEST_CYCLE() /*result =*/ cv::hasNonZero(src); + + SANITY_CHECK_NOTHING(); +} + ///////////// Phase //////////////////////// typedef Size_MatType PhaseFixture; diff --git a/modules/core/perf/perf_stat.cpp b/modules/core/perf/perf_stat.cpp index 15ca2e6559..025700c989 100644 --- a/modules/core/perf/perf_stat.cpp +++ b/modules/core/perf/perf_stat.cpp @@ -101,4 +101,20 @@ PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPIC SANITY_CHECK(cnt); } +PERF_TEST_P(Size_MatType, hasNonZero, testing::Combine( testing::Values( TYPICAL_MAT_SIZES ), testing::Values( CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1 ) )) +{ + Size sz = get<0>(GetParam()); + int matType = get<1>(GetParam()); + + Mat src(sz, matType); + /*bool hnz = false;*/ + + declare.in(src, WARMUP_RNG); + + int runs = (sz.width <= 640) ? 8 : 1; + TEST_CYCLE_MULTIRUN(runs) /*hnz =*/ hasNonZero(src); + + SANITY_CHECK_NOTHING(); +} + } // namespace diff --git a/modules/core/src/has_non_zero.dispatch.cpp b/modules/core/src/has_non_zero.dispatch.cpp new file mode 100644 index 0000000000..6de78ec7a3 --- /dev/null +++ b/modules/core/src/has_non_zero.dispatch.cpp @@ -0,0 +1,107 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + + +#include "precomp.hpp" +#include "opencl_kernels_core.hpp" +#include "stat.hpp" + +#include "has_non_zero.simd.hpp" +#include "has_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { + +static HasNonZeroFunc getHasNonZeroTab(int depth) +{ + CV_INSTRUMENT_REGION(); + CV_CPU_DISPATCH(getHasNonZeroTab, (depth), + CV_CPU_DISPATCH_MODES_ALL); +} + +#ifdef HAVE_OPENCL +static bool ocl_hasNonZero( InputArray _src, bool & res ) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (depth == CV_64F && !doubleSupport) + return false; + + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, + format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO" + " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), + ocl::typeToStr(depth), (int)wgs, kercn, + wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + _src.isContinuous() ? " -D HAVE_SRC_CONT" : "")); + if (k.empty()) + return false; + + UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), + dbsize, ocl::KernelArg::PtrWriteOnly(db)); + + size_t globalsize = dbsize * wgs; + if (k.run(1, &globalsize, &wgs, true)) + return res = (saturate_cast(cv::sum(db.getMat(ACCESS_READ))[0])>0), true; + return false; +} +#endif + +bool hasNonZero(InputArray _src) +{ + CV_INSTRUMENT_REGION(); + + int type = _src.type(), cn = CV_MAT_CN(type); + CV_Assert( cn == 1 ); + + bool res = false; + +#ifdef HAVE_OPENCL + CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, + ocl_hasNonZero(_src, res), + res) +#endif + + Mat src = _src.getMat(); + + HasNonZeroFunc func = getHasNonZeroTab(src.depth()); + CV_Assert( func != 0 ); + + if (src.dims == 2)//fast path to avoid creating planes of single rows + { + if (src.isContinuous()) + res |= func(src.ptr(0), src.total()); + else + for(int row = 0, rowsCount = src.rows ; !res && (row(row), src.cols); + } + else//if (src.dims != 2) + { + const Mat* arrays[] = {&src, nullptr}; + Mat planes[1]; + NAryMatIterator itNAry(arrays, planes, 1); + for(size_t p = 0 ; !res && (p(0), plane.total()); + else + for(int row = 0, rowsCount = plane.rows ; !res && (row(row), plane.cols); + } + } + + return res; +} + +} // namespace diff --git a/modules/core/src/has_non_zero.simd.hpp b/modules/core/src/has_non_zero.simd.hpp new file mode 100644 index 0000000000..6ea8bcd7d2 --- /dev/null +++ b/modules/core/src/has_non_zero.simd.hpp @@ -0,0 +1,327 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" + +namespace cv { + +typedef bool (*HasNonZeroFunc)(const uchar*, size_t); + + +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +HasNonZeroFunc getHasNonZeroTab(int depth); + + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +template +inline bool hasNonZero_(const T* src, size_t len ) +{ + bool res = false; + if (len > 0) + { + size_t i=0; + #if CV_ENABLE_UNROLLED + for(; !res && (i+4 <= len); i += 4 ) + res |= ((src[i] | src[i+1] | src[i+2] | src[i+3]) != 0); + #endif + for( ; !res && (i < len); i++ ) + res |= (src[i] != 0); + } + return res; +} + +template<> +inline bool hasNonZero_(const float* src, size_t len ) +{ + bool res = false; + if (len > 0) + { + size_t i=0; + if (sizeof(float) == sizeof(unsigned int)) + { + #if CV_ENABLE_UNROLLED + typedef unsigned int float_as_uint_t; + const float_as_uint_t* src_as_ui = reinterpret_cast(src); + for(; !res && (i+4 <= len); i += 4 ) + { + const float_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]); + res |= ((gathered<<1) != 0);//remove what would be the sign bit + } + #endif + } + for( ; !res && (i < len); i++ ) + res |= (src[i] != 0); + } + return res; +} + +template<> +inline bool hasNonZero_(const double* src, size_t len ) +{ + bool res = false; + if (len > 0) + { + size_t i=0; + if (sizeof(double) == sizeof(uint64_t)) + { + #if CV_ENABLE_UNROLLED + typedef uint64_t double_as_uint_t; + const double_as_uint_t* src_as_ui = reinterpret_cast(src); + for(; !res && (i+4 <= len); i += 4 ) + { + const double_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]); + res |= ((gathered<<1) != 0);//remove what would be the sign bit + } + #endif + } + for( ; !res && (i < len); i++ ) + res |= (src[i] != 0); + } + return res; +} + +static bool hasNonZero8u( const uchar* src, size_t len ) +{ + bool res = false; + const uchar* srcEnd = src+len; +#if CV_SIMD + typedef v_uint8 v_type; + const v_type v_zero = vx_setzero_u8(); + constexpr const int unrollCount = 2; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const uchar* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + res = v_check_any(((v0 | v1) != v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero16u( const ushort* src, size_t len ) +{ + bool res = false; + const ushort* srcEnd = src+len; +#if CV_SIMD + typedef v_uint16 v_type; + const v_type v_zero = vx_setzero_u16(); + constexpr const int unrollCount = 4; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const ushort* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + res = v_check_any(((v0 | v2) != v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero32s( const int* src, size_t len ) +{ + bool res = false; + const int* srcEnd = src+len; +#if CV_SIMD + typedef v_int32 v_type; + const v_type v_zero = vx_setzero_s32(); + constexpr const int unrollCount = 8; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const int* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v_type v4 = vx_load(src); + src += v_type::nlanes; + v_type v5 = vx_load(src); + src += v_type::nlanes; + v_type v6 = vx_load(src); + src += v_type::nlanes; + v_type v7 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + v4 |= v5; + v6 |= v7; + + v0 |= v2; + v4 |= v6; + res = v_check_any(((v0 | v4) != v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero32f( const float* src, size_t len ) +{ + bool res = false; + const float* srcEnd = src+len; +#if CV_SIMD + typedef v_float32 v_type; + const v_type v_zero = vx_setzero_f32(); + constexpr const int unrollCount = 8; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const float* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v_type v4 = vx_load(src); + src += v_type::nlanes; + v_type v5 = vx_load(src); + src += v_type::nlanes; + v_type v6 = vx_load(src); + src += v_type::nlanes; + v_type v7 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + v4 |= v5; + v6 |= v7; + + v0 |= v2; + v4 |= v6; + //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ + res = !v_check_all(((v0 | v4) == v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +static bool hasNonZero64f( const double* src, size_t len ) +{ + bool res = false; + const double* srcEnd = src+len; +#if CV_SIMD_64F + typedef v_float64 v_type; + const v_type v_zero = vx_setzero_f64(); + constexpr const int unrollCount = 16; + int step = v_type::nlanes * unrollCount; + int len0 = len & -step; + const double* srcSimdEnd = src+len0; + + int countSIMD = static_cast((srcSimdEnd-src)/step); + while(!res && countSIMD--) + { + v_type v0 = vx_load(src); + src += v_type::nlanes; + v_type v1 = vx_load(src); + src += v_type::nlanes; + v_type v2 = vx_load(src); + src += v_type::nlanes; + v_type v3 = vx_load(src); + src += v_type::nlanes; + v_type v4 = vx_load(src); + src += v_type::nlanes; + v_type v5 = vx_load(src); + src += v_type::nlanes; + v_type v6 = vx_load(src); + src += v_type::nlanes; + v_type v7 = vx_load(src); + src += v_type::nlanes; + v_type v8 = vx_load(src); + src += v_type::nlanes; + v_type v9 = vx_load(src); + src += v_type::nlanes; + v_type v10 = vx_load(src); + src += v_type::nlanes; + v_type v11 = vx_load(src); + src += v_type::nlanes; + v_type v12 = vx_load(src); + src += v_type::nlanes; + v_type v13 = vx_load(src); + src += v_type::nlanes; + v_type v14 = vx_load(src); + src += v_type::nlanes; + v_type v15 = vx_load(src); + src += v_type::nlanes; + v0 |= v1; + v2 |= v3; + v4 |= v5; + v6 |= v7; + v8 |= v9; + v10 |= v11; + v12 |= v13; + v14 |= v15; + + v0 |= v2; + v4 |= v6; + v8 |= v10; + v12 |= v14; + + v0 |= v4; + v8 |= v12; + //res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ + res = !v_check_all(((v0 | v8) == v_zero)); + } + + v_cleanup(); +#endif + return res || hasNonZero_(src, srcEnd-src); +} + +HasNonZeroFunc getHasNonZeroTab(int depth) +{ + static HasNonZeroFunc hasNonZeroTab[] = + { + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32s), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32f), + (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero64f), 0 + }; + + return hasNonZeroTab[depth]; +} + +#endif + +CV_CPU_OPTIMIZATION_NAMESPACE_END +} // namespace diff --git a/modules/core/test/test_hasnonzero.cpp b/modules/core/test/test_hasnonzero.cpp new file mode 100644 index 0000000000..9834117ddf --- /dev/null +++ b/modules/core/test/test_hasnonzero.cpp @@ -0,0 +1,201 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +typedef testing::TestWithParam > HasNonZeroAllZeros; + +TEST_P(HasNonZeroAllZeros, hasNonZeroAllZeros) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + Mat m = Mat::zeros(size, type); + EXPECT_FALSE(hasNonZero(m)); +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroAllZeros, + testing::Combine( + testing::Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), + testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) + ) +); + +typedef testing::TestWithParam > HasNonZeroNegZeros; + +TEST_P(HasNonZeroNegZeros, hasNonZeroNegZeros) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + Mat m = Mat(size, type); + m.setTo(Scalar::all(-0.)); + EXPECT_FALSE(hasNonZero(m)); +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroNegZeros, + testing::Combine( + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) + ) +); + +typedef testing::TestWithParam > HasNonZeroLimitValues; + +TEST_P(HasNonZeroLimitValues, hasNonZeroLimitValues) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + Mat m = Mat(size, type); + + m.setTo(Scalar::all(std::numeric_limits::infinity())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo(Scalar::all(-std::numeric_limits::infinity())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo(Scalar::all(std::numeric_limits::quiet_NaN())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits::epsilon()) : Scalar::all(std::numeric_limits::epsilon())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits::min()) : Scalar::all(std::numeric_limits::min())); + EXPECT_TRUE(hasNonZero(m)); + + m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits::denorm_min()) : Scalar::all(std::numeric_limits::denorm_min())); + EXPECT_TRUE(hasNonZero(m)); +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroLimitValues, + testing::Combine( + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) + ) +); + +typedef testing::TestWithParam > HasNonZeroRandom; + +TEST_P(HasNonZeroRandom, hasNonZeroRandom) +{ + const int type = std::get<0>(GetParam()); + const Size size = std::get<1>(GetParam()); + + RNG& rng = theRNG(); + + const size_t N = std::min(100, size.area()); + for(size_t i = 0 ; i > HasNonZeroNd; + +TEST_P(HasNonZeroNd, hasNonZeroNd) +{ + const int type = get<0>(GetParam()); + const int ndims = get<1>(GetParam()); + const bool continuous = get<2>(GetParam()); + + RNG& rng = theRNG(); + + const size_t N = 10; + for(size_t i = 0 ; i steps(ndims); + std::vector sizes(ndims); + size_t totalBytes = 1; + for(int dim = 0 ; dim(length))*CV_ELEM_SIZE(type); + sizes[dim] = (isFirstDim || continuous) ? length : rng.uniform(1, length); + totalBytes *= steps[dim]*static_cast(sizes[dim]); + } + + std::vector buffer(totalBytes); + void* data = buffer.data(); + + Mat m = Mat(ndims, sizes.data(), type, data, steps.data()); + + std::vector nzRange(ndims); + for(int dim = 0 ; dim0), hasNonZero(m)); + } +} + +INSTANTIATE_TEST_CASE_P(Core, HasNonZeroNd, + testing::Combine( + testing::Values(CV_8UC1), + testing::Values(2, 3), + testing::Values(true, false) + ) +); + +}} // namespace diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 54b51b4133..5c91aae56f 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -446,5 +446,11 @@ class dnn_test(NewOpenCVTests): normAssert(self, real_output, gold_output, "", getDefaultThreshold(target)) + def test_scalefactor_assign(self): + params = cv.dnn.Image2BlobParams() + self.assertEqual(params.scalefactor, (1.0, 1.0, 1.0, 1.0)) + params.scalefactor = 2.0 + self.assertEqual(params.scalefactor, (2.0, 0.0, 0.0, 0.0)) + if __name__ == '__main__': NewOpenCVTests.bootstrap() diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp index b8b520c1a6..3c08b92a75 100644 --- a/modules/dnn/src/caffe/caffe_importer.cpp +++ b/modules/dnn/src/caffe/caffe_importer.cpp @@ -97,11 +97,11 @@ class CaffeImporter public: - CaffeImporter(const char *pototxt, const char *caffeModel) + CaffeImporter(const char *prototxt, const char *caffeModel) { CV_TRACE_FUNCTION(); - ReadNetParamsFromTextFileOrDie(pototxt, &net); + ReadNetParamsFromTextFileOrDie(prototxt, &net); if (caffeModel && caffeModel[0]) ReadNetParamsFromBinaryFileOrDie(caffeModel, &netBinary); @@ -193,7 +193,6 @@ public: break; default: CV_Error(Error::StsError, "Unknown type \"" + String(field->type_name()) + "\" in prototxt"); - break; } } @@ -556,7 +555,6 @@ public: if (idx < 0) { CV_Error(Error::StsObjectNotFound, "Can't find output blob \"" + name + "\""); - return; } dstNet.connect(addedBlobs[idx].layerId, addedBlobs[idx].outNum, layerId, inNum); diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp index aad067ee37..18c7e975eb 100644 --- a/modules/dnn/src/dnn_utils.cpp +++ b/modules/dnn/src/dnn_utils.cpp @@ -51,6 +51,11 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); + if (images_.kind() != _InputArray::STD_VECTOR_MAT && images_.kind() != _InputArray::STD_ARRAY_MAT && + images_.kind() != _InputArray::STD_VECTOR_VECTOR) { + String error_message = "The data is expected as vectors of vectors or vectors of matrices."; + CV_Error(Error::StsBadArg, error_message); + } Image2BlobParams param(Scalar::all(scalefactor), size, mean_, swapRB, ddepth); if (crop) param.paddingmode = DNN_PMODE_CROP_CENTER; @@ -83,9 +88,13 @@ Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, const Image2BlobParams& param) { CV_TRACE_FUNCTION(); + if (images_.kind() != _InputArray::STD_VECTOR_MAT && images_.kind() != _InputArray::STD_ARRAY_MAT && + images_.kind() != _InputArray::STD_VECTOR_VECTOR) { + String error_message = "The data is expected as vectors of vectors or vectors of matrices."; + CV_Error(Error::StsBadArg, error_message); + } CV_CheckType(param.ddepth, param.ddepth == CV_32F || param.ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); - Size size = param.size; std::vector images; images_.getMatVector(images); diff --git a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp index c844acd730..a18943994c 100644 --- a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp +++ b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp @@ -31,7 +31,6 @@ void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, float* bpptr, int bpstep, float* outptr, int outstep, float bias, float minval, float maxval, bool ifMinMaxAct); - int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct) { @@ -51,6 +50,23 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu int pad_left = conv->pad_left; int ngroups = conv->ngroups, Cg = C/ngroups, Kg = K/ngroups; + + const int CONV_WINO_KBLOCK = 4; +#if (CV_NEON && CV_NEON_AARCH64) + const int CONV_WINO_IBLOCK = 6; +#elif CV_TRY_AVX || CV_TRY_AVX2 + const int CONV_WINO_IBLOCK = (conv->useAVX || conv->useAVX2) ? 6 : 3; +#else + const int CONV_WINO_IBLOCK = 3; +#endif + +#if CV_TRY_AVX || CV_TRY_AVX2 + const int CONV_WINO_ATOM_F32 = (conv->useAVX || conv->useAVX2) ? 8 : 4; +#else + const int CONV_WINO_ATOM_F32 = 4; +#endif + const int CONV_WINO_NATOMS_F32 = CONV_WINO_AREA / CONV_WINO_ATOM_F32; // for AVX2, it is 8, otherwise, it's 16. + int Kg_nblocks = (Kg + CONV_WINO_KBLOCK - 1)/CONV_WINO_KBLOCK; const size_t inp_planesize = (size_t)Hi*Wi; const size_t out_planesize = (size_t)H0*W0; @@ -398,7 +414,7 @@ void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, i void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, float* outptr, int Cg, const int winoIblock, const int winoAtomF32) { - CV_Assert(CONV_WINO_IBLOCK == 3 && CONV_WINO_KBLOCK == 4 && CONV_WINO_ATOM_F32 == 4); + CV_Assert(winoIblock == 3 && winoAtomF32 == 4); v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4); v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4); v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4); @@ -573,7 +589,6 @@ void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, float* bpptr, int bpstep, float* outptr, int outstep, float bias, float minval, float maxval, bool ifMinMaxAct) { - CV_Assert(CONV_WINO_IBLOCK == 3 && CONV_WINO_KBLOCK == 4 && CONV_WINO_ATOM_F32 == 4); v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4); v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4); v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4); diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.cpp b/modules/dnn/src/layers/cpu_kernels/convolution.cpp index 0b666a855d..c76b3494e2 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.cpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.cpp @@ -181,6 +181,21 @@ Ptr initFastConv( {0.0f, 0.0f, 1.0f} }; + const int CONV_WINO_KBLOCK = 4; + +#if CV_TRY_AVX || CV_TRY_AVX2 + const int CONV_WINO_ATOM_F32 = (conv->useAVX || conv->useAVX2) ? 8 : 4; +#else + const int CONV_WINO_ATOM_F32 = 4; +#endif + const int CONV_WINO_NATOMS_F32 = CONV_WINO_AREA / CONV_WINO_ATOM_F32; // for AVX2, it is 8, otherwise, it's 16. + +#ifdef CONV_ARM_FP16 + // FP 16 + const int CONV_WINO_ATOM_F16 = CONV_WINO_ATOM_F32 * 2; + const int CONV_WINO_NATOMS_F16 = CONV_WINO_AREA / CONV_WINO_ATOM_F16; +#endif + // the weights are packed as 6-dim tensor: // ngroups * ceil((K/ngroups)/KBLOCK) * (W*W/ATOM_SIZE) * (C/ngroups) * KBLOCK * ATOM_SIZE, // where W is the size of Winograd-transformed kernel (8x8), @@ -1275,7 +1290,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co else Kg_nblocks = 1; - bool seperateIm2col = fast_1x1 || stripes_per_plane == 1; + bool separateIm2col = fast_1x1 || stripes_per_plane == 1; int Kstripes = Kg_nblocks * stripes_per_plane; int nsubtasks = N * ngroups * Kstripes; @@ -1285,12 +1300,12 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co size_t taskbufsize = cbufsize * sizeof(float ); - if (!seperateIm2col) + if (!separateIm2col) taskbufsize += MAX_STRIPES * stripesize * esz; size_t totalbufsize_base = taskbufsize * ntasks; size_t totalbufsize = totalbufsize_base; - if (seperateIm2col) + if (separateIm2col) totalbufsize += N * ngroups * stripes_per_plane0 * stripesize * esz; AutoBuffer inpbuf_all_; @@ -1308,7 +1323,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co // In general, im2row results in Hk*Wk-x unrolling factor // (e.g. 3*3=9x unrolling for 3x3 convolution), thus for 1x1 convolution // the reordered tensor will take as much space as the original tensor. - if (seperateIm2col) + if (separateIm2col) { // the optional phase 1. im2row parallel_for_(Range(0, ntasks), [&](const Range& r0) { @@ -1409,7 +1424,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co CV_Assert(nstripes <= MAX_STRIPES); - if (!seperateIm2col) + if (!separateIm2col) { packInputData(inpbuf_task, inp, ofstab, dhwTab, zyx0, zyx_block_limit, ksize, stride_d, stride_h, stride_w, pad_front, pad_top, pad_left, Dk, Hk, Wk, dilation_d, dilation_h, dilation_w, @@ -1442,8 +1457,8 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co int out_width = zyx_block_limit - zyx0; float *outptr = out + outofs; const float biasVal = *(conv->biasBuf.data() + g); - const char *inptr_ = seperateIm2col ? inpbuf_all_0 + (ng*stripes_per_plane0 + zyx0/CONV_NR) * stripesize * esz: - inpbuf_task; + const char *inptr_ = separateIm2col ? inpbuf_all_0 + (ng * stripes_per_plane0 + zyx0 / CONV_NR) * stripesize * esz : + inpbuf_task; for (int stripe = 0; stripe < nstripes; stripe++) { @@ -1496,8 +1511,8 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co for (int c0 = 0; c0 < DkHkWkCg; c0 += C_BLOCK_SIZE) { int c1 = c0 + C_BLOCK_SIZE < DkHkWkCg ? c0 + C_BLOCK_SIZE : DkHkWkCg; - const char *inptr = seperateIm2col ? inpbuf_all_0 + (ng*stripes_per_plane0 + zyx0/CONV_NR)*stripesize*esz: - inpbuf_task; + const char *inptr = separateIm2col ? inpbuf_all_0 + (ng * stripes_per_plane0 + zyx0 / CONV_NR) * stripesize * esz : + inpbuf_task; inptr += (c0 * CONV_NR) * esz; for (int stripe = 0; stripe < nstripes; stripe++, inptr += stripesize * esz) { diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp index 6fabc3da7c..22ef9a8575 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.hpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.hpp @@ -33,36 +33,17 @@ typedef __fp16 float16_t; // Fix conflict between float16_t in arm_neon.h and fl #define CONV_NR_FP32 24 #endif -// Winograd Params enum { CONV_WINO_STEP=6, CONV_WINO_KSIZE=3, - CONV_WINO_SIZE=CONV_WINO_STEP+CONV_WINO_KSIZE-1, // 8 + CONV_WINO_SIZE=CONV_WINO_STEP+CONV_WINO_KSIZE - 1, // 8 CONV_WINO_AREA=CONV_WINO_SIZE*CONV_WINO_SIZE, - - CONV_WINO_KBLOCK = 4, -#if (CV_NEON && CV_NEON_AARCH64) || CV_TRY_AVX || CV_TRY_AVX2 - CONV_WINO_IBLOCK = 6, -#else - CONV_WINO_IBLOCK = 3, -#endif - -#if CV_TRY_AVX || CV_TRY_AVX2 - CONV_WINO_ATOM_F32 = 8, -#else - CONV_WINO_ATOM_F32 = 4, -#endif - - CONV_WINO_NATOMS_F32 = CONV_WINO_AREA / CONV_WINO_ATOM_F32, // for AVX2, it is 8, otherwise, it's 16. - - // FP 16 - CONV_WINO_ATOM_F16 = CONV_WINO_ATOM_F32 * 2, - CONV_WINO_NATOMS_F16 = CONV_WINO_AREA / CONV_WINO_ATOM_F16, }; // NOTE that: CONV_TYPE_DEPTHWISE is for 3x3 depthwise conv, and others depthwise will be set as CONV_TYPE_DEPTHWISE_REMAIN. enum { CONV_TYPE_GENERIC=0, CONV_TYPE_DEPTHWISE=1, CONV_TYPE_WINOGRAD3X3=2, CONV_TYPE_DEPTHWISE_REMAIN=3 }; enum { CONV_1D = 0, CONV_2D = 1, CONV_3D = 2 }; + #endif namespace cv { diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 46ee70aa5b..5cd22057ad 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1363,14 +1363,22 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP { DictValue splits = layerParams.get("split"); const int numSplits = splits.size(); - CV_Assert(numSplits > 1); - std::vector slicePoints(numSplits - 1, splits.get(0)); - for (int i = 1; i < splits.size() - 1; ++i) + if (numSplits == 1) { - slicePoints[i] = slicePoints[i - 1] + splits.get(i); + layerParams.set("num_split", 1); + } + else + { + CV_Assert(numSplits >= 1); + + std::vector slicePoints(numSplits - 1, splits.get(0)); + for (int i = 1; i < splits.size() - 1; ++i) + { + slicePoints[i] = slicePoints[i - 1] + splits.get(i); + } + layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); } - layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); } else if (node_proto.input_size() == 2) // opset >= 13, the split will be stored at the second input, instead of the attribute. { @@ -2771,13 +2779,15 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node if (layerParams.has("coordinate_transformation_mode")) { String interp_mode = layerParams.get("coordinate_transformation_mode"); - CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + CV_Assert(interp_mode != "tf_crop_and_resize"); + + bool halfPixel = interp_mode == "tf_half_pixel_for_nn" || interp_mode == "half_pixel" || interp_mode == "pytorch_half_pixel"; layerParams.set("align_corners", interp_mode == "align_corners"); + layerParams.set("half_pixel_centers", halfPixel); if (layerParams.get("mode") == "linear") { - layerParams.set("mode", interp_mode == "pytorch_half_pixel" || interp_mode == "half_pixel" ? - "opencv_linear" : "bilinear"); + layerParams.set("mode", halfPixel ? "opencv_linear" : "bilinear"); } } if (layerParams.get("mode") == "linear" && framework_name == "pytorch") @@ -2830,13 +2840,15 @@ void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::No if (layerParams.has("coordinate_transformation_mode")) { String interp_mode = layerParams.get("coordinate_transformation_mode"); - CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + CV_Assert(interp_mode != "tf_crop_and_resize"); + + bool halfPixel = interp_mode == "tf_half_pixel_for_nn" || interp_mode == "half_pixel" || interp_mode == "pytorch_half_pixel"; layerParams.set("align_corners", interp_mode == "align_corners"); + layerParams.set("half_pixel_centers", halfPixel); if (layerParams.get("mode") == "linear") { - layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? - "opencv_linear" : "bilinear"); + layerParams.set("mode", halfPixel ? "opencv_linear" : "bilinear"); } } if (layerParams.get("mode") == "linear" && framework_name == "pytorch") diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index ee97ecb2ee..49908e7ff1 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1017,6 +1017,7 @@ TEST_P(Test_ONNX_layers, Padding) TEST_P(Test_ONNX_layers, Resize) { testONNXModels("resize_nearest"); + testONNXModels("tf_half_pixel_for_nn"); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); testONNXModels("resize_bilinear"); @@ -1146,6 +1147,7 @@ TEST_P(Test_ONNX_layers, Split) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("split_0"); testONNXModels("split_1"); testONNXModels("split_2"); testONNXModels("split_3"); diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index f3979c0bbd..2433c41d95 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -156,6 +156,10 @@ set(gapi_srcs src/backends/ie/giebackend.cpp src/backends/ie/giebackend/giewrapper.cpp + # OV Backend. FIXME: should be included by CMake + # if and only if OV support is enabled + src/backends/ov/govbackend.cpp + # ONNX backend src/backends/onnx/gonnxbackend.cpp @@ -182,6 +186,7 @@ set(gapi_srcs # Python bridge src/backends/ie/bindings_ie.cpp src/backends/onnx/bindings_onnx.cpp + src/backends/ov/bindings_ov.cpp src/backends/python/gpythonbackend.cpp # OpenVPL Streaming source diff --git a/modules/gapi/include/opencv2/gapi/infer/bindings_ov.hpp b/modules/gapi/include/opencv2/gapi/infer/bindings_ov.hpp new file mode 100644 index 0000000000..08f5c83a3f --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/infer/bindings_ov.hpp @@ -0,0 +1,128 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#ifndef OPENCV_GAPI_INFER_BINDINGS_OV_HPP +#define OPENCV_GAPI_INFER_BINDINGS_OV_HPP + +#include +#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#include // GKernelPackage +#include // Params + +#include + +namespace cv { +namespace gapi { +namespace ov { + +// NB: Used by python wrapper +// This class can be marked as SIMPLE, because it's implemented as pimpl +class GAPI_EXPORTS_W_SIMPLE PyParams { +public: + GAPI_WRAP + PyParams() = default; + + GAPI_WRAP + PyParams(const std::string &tag, + const std::string &model_path, + const std::string &bin_path, + const std::string &device); + + GAPI_WRAP + PyParams(const std::string &tag, + const std::string &blob_path, + const std::string &device); + + GAPI_WRAP + PyParams& cfgPluginConfig( + const std::map &config); + + GAPI_WRAP + PyParams& cfgInputTensorLayout(std::string tensor_layout); + + GAPI_WRAP + PyParams& cfgInputTensorLayout( + std::map layout_map); + + GAPI_WRAP + PyParams& cfgInputModelLayout(std::string tensor_layout); + + GAPI_WRAP + PyParams& cfgInputModelLayout( + std::map layout_map); + + GAPI_WRAP + PyParams& cfgOutputTensorLayout(std::string tensor_layout); + + GAPI_WRAP + PyParams& cfgOutputTensorLayout( + std::map layout_map); + + GAPI_WRAP + PyParams& cfgOutputModelLayout(std::string tensor_layout); + + GAPI_WRAP + PyParams& cfgOutputModelLayout( + std::map layout_map); + + GAPI_WRAP + PyParams& cfgOutputTensorPrecision(int precision); + + GAPI_WRAP + PyParams& cfgOutputTensorPrecision( + std::map precision_map); + + GAPI_WRAP + PyParams& cfgReshape(std::vector new_shape); + + GAPI_WRAP + PyParams& cfgReshape( + std::map> new_shape_map); + + GAPI_WRAP + PyParams& cfgNumRequests(const size_t nireq); + + GAPI_WRAP + PyParams& cfgMean(std::vector mean_values); + + GAPI_WRAP + PyParams& cfgMean( + std::map> mean_map); + + GAPI_WRAP + PyParams& cfgScale(std::vector scale_values); + + GAPI_WRAP + PyParams& cfgScale( + std::map> scale_map); + + GAPI_WRAP + PyParams& cfgResize(int interpolation); + + GAPI_WRAP + PyParams& cfgResize(std::map interpolation); + + GBackend backend() const; + std::string tag() const; + cv::util::any params() const; + +private: + std::shared_ptr> m_priv; +}; + +GAPI_EXPORTS_W PyParams params(const std::string &tag, + const std::string &model_path, + const std::string &weights, + const std::string &device); + +GAPI_EXPORTS_W PyParams params(const std::string &tag, + const std::string &bin_path, + const std::string &device); +} // namespace ov +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_INFER_BINDINGS_OV_HPP diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp index 470d50ac98..b403479ca2 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019-2021 Intel Corporation +// Copyright (C) 2019-2023 Intel Corporation #ifndef OPENCV_GAPI_INFER_IE_HPP #define OPENCV_GAPI_INFER_IE_HPP @@ -55,6 +55,21 @@ using IEConfig = std::map; enum InferMode {Sync, Async}; namespace detail { + +template +using AttrMap = std::map; +// NB: This type is used to hold in/out layers +// attributes such as precision, layout, shape etc. +// +// User can provide attributes either: +// 1. cv::util::monostate - No value specified explicitly. +// 2. Attr - value specified explicitly that should be broadcasted to all layers. +// 3. AttrMap[str->T] - map specifies value for particular layer. +template +using LayerVariantAttr = cv::util::variant< cv::util::monostate + , AttrMap + , Attr>; + struct ParamDesc { std::string model_path; std::string weights_path; @@ -103,7 +118,11 @@ struct ParamDesc { using PrecisionVariantT = cv::util::variant; + PrecisionVariantT output_precision; + LayerVariantAttr input_layout; + LayerVariantAttr output_layout; + LayerVariantAttr interpolation; }; } // namespace detail @@ -150,6 +169,9 @@ public: , {} , {} , InferMode::Async + , {} + , {} + , {} , {} } { }; @@ -176,6 +198,9 @@ public: , {} , {} , InferMode::Async + , {} + , {} + , {} , {} } { }; @@ -412,6 +437,80 @@ public: return *this; } + /** @brief Specifies the input layout for model. + + The function is used to set an input layout for model. + + @param layout Layout in string representation ("NCHW", "NHWC", etc) + will be applied to all input layers. + @return reference to this parameter structure. + */ + Params& cfgInputLayout(std::string layout) { + desc.input_layout = std::move(layout); + return *this; + } + + /** @overload + + @param layout_map Map of pairs: name of corresponding input layer + and its layout in string representation ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgInputLayout(detail::AttrMap layout_map) { + desc.input_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies the output layout for model. + + The function is used to set an output layout for model. + + @param layout Layout in string representation ("NCHW", "NHWC", etc) + will be applied to all output layers. + @return reference to this parameter structure. + */ + Params& cfgOutputLayout(std::string layout) { + desc.output_layout = std::move(layout); + return *this; + } + + /** @overload + + @param layout_map Map of pairs: name of corresponding output layer + and its layout in string representation ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgOutputLayout(detail::AttrMap layout_map) { + desc.output_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies resize interpolation algorithm. + * + The function is used to configure resize preprocessing for input layer. + + @param interpolation Resize interpolation algorithm. + Supported algorithms: #INTER_LINEAR, #INTER_AREA. + @return reference to this parameter structure. + */ + Params& cfgResize(int interpolation) { + desc.interpolation = interpolation; + return *this; + } + + /** @overload + + @param interpolation Map of pairs: name of corresponding input layer + and its resize algorithm. + @return reference to this parameter structure. + */ + Params& cfgResize(detail::AttrMap interpolation) { + desc.interpolation = std::move(interpolation); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return Net::tag(); } @@ -446,7 +545,7 @@ public: const std::string &device) : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u, - {}, {}, {}, {}, InferMode::Async, {} }, + {}, {}, {}, {}, InferMode::Async, {}, {}, {}, {} }, m_tag(tag) { }; @@ -464,7 +563,7 @@ public: const std::string &device) : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u, - {}, {}, {}, {}, InferMode::Async, {} }, + {}, {}, {}, {}, InferMode::Async, {}, {}, {}, {} }, m_tag(tag) { }; @@ -556,6 +655,44 @@ public: return *this; } + /** @see ie::Params::cfgInputLayout */ + Params& cfgInputLayout(std::string layout) { + desc.input_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgInputLayout(detail::AttrMap layout_map) { + desc.input_layout = std::move(layout_map); + return *this; + } + + /** @see ie::Params::cfgOutputLayout */ + Params& cfgOutputLayout(std::string layout) { + desc.output_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgOutputLayout(detail::AttrMap layout_map) { + desc.output_layout = std::move(layout_map); + return *this; + } + + /** @see ie::Params::cfgResize */ + Params& cfgResize(int interpolation) { + desc.interpolation = interpolation; + return *this; + } + + /** @overload */ + Params& cfgResize(detail::AttrMap interpolation) { + desc.interpolation = std::move(interpolation); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return m_tag; } diff --git a/modules/gapi/include/opencv2/gapi/infer/ov.hpp b/modules/gapi/include/opencv2/gapi/infer/ov.hpp new file mode 100644 index 0000000000..99d701f937 --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/infer/ov.hpp @@ -0,0 +1,685 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#ifndef OPENCV_GAPI_INFER_OV_HPP +#define OPENCV_GAPI_INFER_OV_HPP + +#include + +#include +#include // GAPI_EXPORTS +#include // GKernelType[M], GBackend +#include // Generic + +#include + +namespace cv { +namespace gapi { + +/** + * @brief This namespace contains G-API OpenVINO 2.0 backend functions, + * structures, and symbols. + */ +namespace ov { + +GAPI_EXPORTS cv::gapi::GBackend backend(); + +namespace detail { + +template +using AttrMap = std::map; +// NB: This type is supposed to be used to hold in/out layers +// attributes such as precision, layout, shape etc. +// +// User can provide attributes either: +// 1. cv::util::monostate - No value specified explicitly. +// 2. Attr - value specified explicitly that should be broadcasted to all layers. +// 3. AttrMap[str->T] - map specifies value for particular layer. +template +using LayerVariantAttr = cv::util::variant< cv::util::monostate + , AttrMap + , Attr>; + +struct ParamDesc { + struct Model { + + Model(const std::string &model_path_, + const std::string &bin_path_) + : model_path(model_path_), bin_path(bin_path_) { + } + + std::string model_path; + std::string bin_path; + + LayerVariantAttr input_tensor_layout; + LayerVariantAttr input_model_layout; + LayerVariantAttr output_tensor_layout; + LayerVariantAttr output_model_layout; + LayerVariantAttr output_tensor_precision; + + LayerVariantAttr> new_shapes; + + LayerVariantAttr> mean_values; + LayerVariantAttr> scale_values; + + LayerVariantAttr interpolation; + }; + + struct CompiledModel { + std::string blob_path; + }; + + using Kind = cv::util::variant; + + ParamDesc(Kind &&kind_, + const std::string &device_, + const bool is_generic_, + const size_t num_in_, + const size_t num_out_) + : kind(std::move(kind_)), device(device_), + is_generic(is_generic_), + num_in(num_in_), num_out(num_out_) { + } + + Kind kind; + + std::string device; + bool is_generic; + + std::size_t num_in; + std::size_t num_out; + + std::vector input_names; + std::vector output_names; + + using PluginConfigT = std::map; + PluginConfigT config; + + size_t nireq = 1; +}; + +// NB: Just helper to avoid code duplication. +static detail::ParamDesc::Model& +getModelToSetAttrOrThrow(detail::ParamDesc::Kind &kind, + const std::string &attr_name) { + if (cv::util::holds_alternative(kind)) { + cv::util::throw_error( + std::logic_error("Specifying " + attr_name + " isn't" + " possible for compiled model.")); + } + GAPI_Assert(cv::util::holds_alternative(kind)); + return cv::util::get(kind); +} + +} // namespace detail + +/** + * @brief This structure provides functions + * that fill inference parameters for "OpenVINO Toolkit" model. + */ +template struct Params { +public: + /** @brief Class constructor. + + Constructs Params based on model information and specifies default values for other + inference description parameters. Model is loaded and compiled using "OpenVINO Toolkit". + + @param model_path Path to a model. + @param bin_path Path to a data file. + For IR format (*.bin): + If path is empty, will try to read a bin file with the same name as xml. + If the bin file with the same name is not found, will load IR without weights. + For PDPD (*.pdmodel) and ONNX (*.onnx) formats bin_path isn't used. + @param device target device to use. + */ + Params(const std::string &model_path, + const std::string &bin_path, + const std::string &device) + : m_desc( detail::ParamDesc::Kind{detail::ParamDesc::Model{model_path, bin_path}} + , device + , false /* is generic */ + , std::tuple_size::value + , std::tuple_size::value) { + } + + /** @overload + Use this constructor to work with pre-compiled network. + Model is imported from a pre-compiled blob. + + @param blob_path path to the compiled model (*.blob). + @param device target device to use. + */ + Params(const std::string &blob_path, + const std::string &device) + : m_desc( detail::ParamDesc::Kind{detail::ParamDesc::CompiledModel{blob_path}} + , device + , false /* is generic */ + , std::tuple_size::value + , std::tuple_size::value) { + } + + /** @brief Specifies sequence of network input layers names for inference. + + The function is used to associate cv::gapi::infer<> inputs with the model inputs. + Number of names has to match the number of network inputs as defined in G_API_NET(). + In case a network has only single input layer, there is no need to specify name manually. + + @param layer_names std::array where N is the number of inputs + as defined in the @ref G_API_NET. Contains names of input layers. + @return reference to this parameter structure. + */ + Params& cfgInputLayers(const std::vector &layer_names) { + m_desc.input_names = layer_names; + return *this; + } + + /** @brief Specifies sequence of network output layers names for inference. + + The function is used to associate cv::gapi::infer<> outputs with the model outputs. + Number of names has to match the number of network outputs as defined in G_API_NET(). + In case a network has only single output layer, there is no need to specify name manually. + + @param layer_names std::array where N is the number of outputs + as defined in the @ref G_API_NET. Contains names of output layers. + @return reference to this parameter structure. + */ + Params& cfgOutputLayers(const std::vector &layer_names) { + m_desc.output_names = layer_names; + return *this; + } + + /** @brief Specifies OpenVINO plugin configuration. + + The function is used to set configuration for OpenVINO plugin. Some parameters + can be different for each plugin. Please follow https://docs.openvinotoolkit.org/latest/index.html + to check information about specific plugin. + + @param config Map of pairs: (config parameter name, config parameter value). + @return reference to this parameter structure. + */ + Params& cfgPluginConfig(const detail::ParamDesc::PluginConfigT &config) { + m_desc.config = config; + return *this; + } + + /** @brief Specifies tensor layout for an input layer. + + The function is used to set tensor layout for an input layer. + + @param layout Tensor layout ("NCHW", "NWHC", etc) + will be applied to all input layers. + @return reference to this parameter structure. + */ + Params& cfgInputTensorLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input tensor layout") + .input_tensor_layout = std::move(layout); + return *this; + } + + /** @overload + @param layout_map Map of pairs: name of corresponding input layer + and its tensor layout represented in std::string ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgInputTensorLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input tensor layout") + .input_tensor_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies model layout for an input layer. + + The function is used to set model layout for an input layer. + + @param layout Model layout ("NCHW", "NHWC", etc) + will be applied to all input layers. + @return reference to this parameter structure. + */ + Params& cfgInputModelLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input model layout") + .input_model_layout = std::move(layout); + return *this; + } + + /** @overload + @param layout_map Map of pairs: name of corresponding input layer + and its model layout ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgInputModelLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input model layout") + .input_model_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies tensor layout for an output layer. + + The function is used to set tensor layout for an output layer. + + @param layout Tensor layout ("NCHW", "NWHC", etc) + will be applied to all output layers. + @return reference to this parameter structure. + */ + Params& cfgOutputTensorLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor layout") + .output_tensor_layout = std::move(layout); + return *this; + } + + /** @overload + @param layout_map Map of pairs: name of corresponding output layer + and its tensor layout represented in std::string ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgOutputTensorLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor layout") + .output_tensor_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies model layout for an output layer. + + The function is used to set model layout for an output layer. + + @param layout Model layout ("NCHW", "NHWC", etc) + will be applied to all output layers. + @return reference to this parameter structure. + */ + Params& cfgOutputModelLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output model layout") + .output_model_layout = std::move(layout); + return *this; + } + + /** @overload + @param layout_map Map of pairs: name of corresponding output layer + and its model layout ("NCHW", "NHWC", etc) + @return reference to this parameter structure. + */ + Params& + cfgOutputModelLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output model layout") + .output_model_layout = std::move(layout_map); + return *this; + } + + /** @brief Specifies tensor precision for an output layer. + + The function is used to set tensor precision for an output layer.. + + @param precision Precision in OpenCV format (CV_8U, CV_32F, ...) + will be applied to all output layers. + @return reference to this parameter structure. + */ + Params& cfgOutputTensorPrecision(int precision) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor precision") + .output_tensor_precision = precision; + return *this; + } + + /** @overload + + @param precision_map Map of pairs: name of corresponding output layer + and its precision in OpenCV format (CV_8U, CV_32F, ...) + @return reference to this parameter structure. + */ + Params& + cfgOutputTensorPrecision(detail::AttrMap precision_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor precision") + .output_tensor_precision = std::move(precision_map); + return *this; + } + + /** @brief Specifies the new shape for input layers. + + The function is used to set new shape for input layers. + + @param new_shape New shape will be applied to all input layers. + @return reference to this parameter structure. + */ + Params& + cfgReshape(std::vector new_shape) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "reshape") + .new_shapes = std::move(new_shape); + return *this; + } + + /** @overload + + @param new_shape_map Map of pairs: name of corresponding output layer + and its new shape. + @return reference to this parameter structure. + */ + Params& + cfgReshape(detail::AttrMap> new_shape_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "reshape") + .new_shapes = std::move(new_shape_map); + return *this; + } + + /** @brief Specifies number of asynchronous inference requests. + + @param nireq Number of inference asynchronous requests. + @return reference to this parameter structure. + */ + Params& cfgNumRequests(const size_t nireq) { + if (nireq == 0) { + cv::util::throw_error( + std::logic_error("Number of inference requests" + " must be greater than zero.")); + } + m_desc.nireq = nireq; + return *this; + } + + /** @brief Specifies mean values for preprocessing. + * + The function is used to set mean values for input layer preprocessing. + + @param mean_values Float vector contains mean values + @return reference to this parameter structure. + */ + Params& cfgMean(std::vector mean_values) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "mean values") + .mean_values = std::move(mean_values); + return *this; + } + + /** @overload + + @param mean_map Map of pairs: name of corresponding input layer + and its mean values. + @return reference to this parameter structure. + */ + Params& cfgMean(detail::AttrMap> mean_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "mean values") + .mean_values = std::move(mean_map); + return *this; + } + + /** @brief Specifies scale values for preprocessing. + * + The function is used to set scale values for input layer preprocessing. + + @param scale_values Float vector contains scale values + @return reference to this parameter structure. + */ + Params& cfgScale(std::vector scale_values) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "scale values") + .scale_values = std::move(scale_values); + return *this; + } + + /** @overload + + @param scale_map Map of pairs: name of corresponding input layer + and its mean values. + @return reference to this parameter structure. + */ + Params& cfgScale(detail::AttrMap> scale_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "scale values") + .scale_values = std::move(scale_map); + return *this; + } + + /** @brief Specifies resize interpolation algorithm. + * + The function is used to configure resize preprocessing for input layer. + + @param interpolation Resize interpolation algorithm. + Supported algorithms: #INTER_NEAREST, #INTER_LINEAR, #INTER_CUBIC. + @return reference to this parameter structure. + */ + Params& cfgResize(int interpolation) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "resize preprocessing") + .interpolation = std::move(interpolation); + return *this; + } + + /** @overload + + @param interpolation Map of pairs: name of corresponding input layer + and its resize algorithm. + @return reference to this parameter structure. + */ + Params& cfgResize(detail::AttrMap interpolation) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "resize preprocessing") + .interpolation = std::move(interpolation); + return *this; + } + + // BEGIN(G-API's network parametrization API) + GBackend backend() const { return cv::gapi::ov::backend(); } + std::string tag() const { return Net::tag(); } + cv::util::any params() const { return { m_desc }; } + // END(G-API's network parametrization API) + +protected: + detail::ParamDesc m_desc; +}; + +/* +* @brief This structure provides functions for generic network type that +* fill inference parameters. +* @see struct Generic +*/ +template<> +class Params { +public: + /** @brief Class constructor. + + Constructs Params based on model information and specifies default values for other + inference description parameters. Model is loaded and compiled using "OpenVINO Toolkit". + + @param tag string tag of the network for which these parameters are intended. + @param model_path Path to a model. + @param bin_path Path to a data file. + For IR format (*.bin): + If path is empty, will try to read a bin file with the same name as xml. + If the bin file with the same name is not found, will load IR without weights. + For PDPD (*.pdmodel) and ONNX (*.onnx) formats bin_path isn't used. + @param device target device to use. + */ + Params(const std::string &tag, + const std::string &model_path, + const std::string &bin_path, + const std::string &device) + : m_tag(tag), + m_desc( detail::ParamDesc::Kind{detail::ParamDesc::Model{model_path, bin_path}} + , device + , true /* is generic */ + , 0u + , 0u) { + } + + /** @overload + + This constructor for pre-compiled networks. Model is imported from pre-compiled + blob. + + @param tag string tag of the network for which these parameters are intended. + @param blob_path path to the compiled model (*.blob). + @param device target device to use. + */ + Params(const std::string &tag, + const std::string &blob_path, + const std::string &device) + : m_tag(tag), + m_desc( detail::ParamDesc::Kind{detail::ParamDesc::CompiledModel{blob_path}} + , device + , true /* is generic */ + , 0u + , 0u) { + } + + /** @see ov::Params::cfgPluginConfig. */ + Params& cfgPluginConfig(const detail::ParamDesc::PluginConfigT &config) { + m_desc.config = config; + return *this; + } + + /** @see ov::Params::cfgInputTensorLayout. */ + Params& cfgInputTensorLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input tensor layout") + .input_tensor_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgInputTensorLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input tensor layout") + .input_tensor_layout = std::move(layout_map); + return *this; + } + + /** @see ov::Params::cfgInputModelLayout. */ + Params& cfgInputModelLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input model layout") + .input_model_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgInputModelLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "input model layout") + .input_model_layout = std::move(layout_map); + return *this; + } + + /** @see ov::Params::cfgOutputTensorLayout. */ + Params& cfgOutputTensorLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor layout") + .output_tensor_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgOutputTensorLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor layout") + .output_tensor_layout = std::move(layout_map); + return *this; + } + + /** @see ov::Params::cfgOutputModelLayout. */ + Params& cfgOutputModelLayout(std::string layout) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output model layout") + .output_model_layout = std::move(layout); + return *this; + } + + /** @overload */ + Params& + cfgOutputModelLayout(detail::AttrMap layout_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output model layout") + .output_model_layout = std::move(layout_map); + return *this; + } + + /** @see ov::Params::cfgOutputTensorPrecision. */ + Params& cfgOutputTensorPrecision(int precision) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor precision") + .output_tensor_precision = precision; + return *this; + } + + /** @overload */ + Params& + cfgOutputTensorPrecision(detail::AttrMap precision_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "output tensor precision") + .output_tensor_precision = std::move(precision_map); + return *this; + } + + /** @see ov::Params::cfgReshape. */ + Params& cfgReshape(std::vector new_shape) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "reshape") + .new_shapes = std::move(new_shape); + return *this; + } + + /** @overload */ + Params& + cfgReshape(detail::AttrMap> new_shape_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "reshape") + .new_shapes = std::move(new_shape_map); + return *this; + } + + /** @see ov::Params::cfgNumRequests. */ + Params& cfgNumRequests(const size_t nireq) { + if (nireq == 0) { + cv::util::throw_error( + std::logic_error("Number of inference requests" + " must be greater than zero.")); + } + m_desc.nireq = nireq; + return *this; + } + + /** @see ov::Params::cfgMean. */ + Params& cfgMean(std::vector mean_values) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "mean values") + .mean_values = std::move(mean_values); + return *this; + } + + /** @overload */ + Params& cfgMean(detail::AttrMap> mean_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "mean values") + .mean_values = std::move(mean_map); + return *this; + } + + /** @see ov::Params::cfgScale. */ + Params& cfgScale(std::vector scale_values) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "scale values") + .scale_values = std::move(scale_values); + return *this; + } + + /** @overload */ + Params& cfgScale(detail::AttrMap> scale_map) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "scale values") + .scale_values = std::move(scale_map); + return *this; + } + + /** @see ov::Params::cfgResize. */ + Params& cfgResize(int interpolation) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "resize preprocessing") + .interpolation = std::move(interpolation); + return *this; + } + + /** @overload */ + Params& cfgResize(detail::AttrMap interpolation) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "resize preprocessing") + .interpolation = std::move(interpolation); + return *this; + } + + // BEGIN(G-API's network parametrization API) + GBackend backend() const { return cv::gapi::ov::backend(); } + std::string tag() const { return m_tag; } + cv::util::any params() const { return { m_desc }; } + // END(G-API's network parametrization API) + +protected: + std::string m_tag; + detail::ParamDesc m_desc; +}; + +} // namespace ov +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_INFER_OV_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/cap.hpp b/modules/gapi/include/opencv2/gapi/streaming/cap.hpp index adf1133c3f..9c2185c1ab 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/cap.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/cap.hpp @@ -22,6 +22,7 @@ * because of this file. */ #include +#include #include #include @@ -47,8 +48,16 @@ namespace wip { class GCaptureSource: public IStreamSource { public: - explicit GCaptureSource(int id) : cap(id) { prep(); } - explicit GCaptureSource(const std::string &path) : cap(path) { prep(); } + explicit GCaptureSource(int id, const std::map &properties = {}) + : cap(id) { prep(properties); } + + explicit GCaptureSource(const std::string &path, + const std::map &properties = {}) + : cap(path) { prep(properties); } + + void set(int propid, double value) { + cap.set(propid, value); + } // TODO: Add more constructor overloads to make it // fully compatible with VideoCapture's interface. @@ -59,8 +68,12 @@ protected: bool first_pulled = false; int64_t counter = 0; - void prep() + void prep(const std::map &properties) { + for (const auto &it : properties) { + cap.set(it.first, it.second); + } + // Prepare first frame to report its meta to engine // when needed GAPI_Assert(first.empty()); @@ -114,15 +127,19 @@ protected: }; // NB: Overload for using from python -GAPI_EXPORTS_W cv::Ptr inline make_capture_src(const std::string& path) +GAPI_EXPORTS_W cv::Ptr +inline make_capture_src(const std::string& path, + const std::map& properties = {}) { - return make_src(path); + return make_src(path, properties); } // NB: Overload for using from python -GAPI_EXPORTS_W cv::Ptr inline make_capture_src(const int id) +GAPI_EXPORTS_W cv::Ptr +inline make_capture_src(const int id, + const std::map& properties = {}) { - return make_src(id); + return make_src(id, properties); } } // namespace wip diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 86273da321..70698ffd48 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -15,6 +15,7 @@ using gapi_GKernelPackage = cv::GKernelPackage; using gapi_GNetPackage = cv::gapi::GNetPackage; using gapi_ie_PyParams = cv::gapi::ie::PyParams; using gapi_onnx_PyParams = cv::gapi::onnx::PyParams; +using gapi_ov_PyParams = cv::gapi::ov::PyParams; using gapi_wip_IStreamSource_Ptr = cv::Ptr; using detail_ExtractArgsCallback = cv::detail::ExtractArgsCallback; using detail_ExtractMetaCallback = cv::detail::ExtractMetaCallback; @@ -22,6 +23,12 @@ using vector_GNetParam = std::vector; using vector_GMat = std::vector; using gapi_streaming_queue_capacity = cv::gapi::streaming::queue_capacity; using GStreamerSource_OutputType = cv::gapi::wip::GStreamerSource::OutputType; +using map_string_and_int = std::map; +using map_string_and_string = std::map; +using map_string_and_string = std::map; +using map_string_and_vector_size_t = std::map>; +using map_string_and_vector_float = std::map>; +using map_int_and_double = std::map; // NB: Python wrapper generate T_U for T // This behavior is only observed for inputs diff --git a/modules/gapi/misc/python/shadow_gapi.hpp b/modules/gapi/misc/python/shadow_gapi.hpp index cf81335e0b..c0c1e38136 100644 --- a/modules/gapi/misc/python/shadow_gapi.hpp +++ b/modules/gapi/misc/python/shadow_gapi.hpp @@ -80,5 +80,6 @@ namespace detail { gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ie::PyParams params); gapi::GNetParam GAPI_EXPORTS_W strip(gapi::onnx::PyParams params); + gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ov::PyParams params); } // namespace detail } // namespace cv diff --git a/modules/gapi/misc/python/test/test_gapi_infer_ov.py b/modules/gapi/misc/python/test/test_gapi_infer_ov.py new file mode 100644 index 0000000000..b4022b6e2d --- /dev/null +++ b/modules/gapi/misc/python/test/test_gapi_infer_ov.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python + +import numpy as np +import cv2 as cv +import os +import sys +import unittest + +from tests_common import NewOpenCVTests + + +try: + + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') + + + openvino_is_available = True + try: + from openvino.runtime import Core, Type, Layout, PartialShape + from openvino.preprocess import ResizeAlgorithm, PrePostProcessor + except ImportError: + openvino_is_available = False + + + def skip_if_openvino_not_available(): + if not openvino_is_available: + raise unittest.SkipTest("OpenVINO isn't available from python.") + + + class AgeGenderOV: + def __init__(self, model_path, bin_path, device): + self.device = device + self.core = Core() + self.model = self.core.read_model(model_path, bin_path) + + + def reshape(self, new_shape): + self.model.reshape(new_shape) + + + def cfgPrePostProcessing(self, pp_callback): + ppp = PrePostProcessor(self.model) + pp_callback(ppp) + self.model = ppp.build() + + + def apply(self, in_data): + compiled_model = self.core.compile_model(self.model, self.device) + infer_request = compiled_model.create_infer_request() + results = infer_request.infer(in_data) + ov_age = results['age_conv3'].squeeze() + ov_gender = results['prob'].squeeze() + return ov_age, ov_gender + + + class AgeGenderGAPI: + tag = 'age-gender-net' + + def __init__(self, model_path, bin_path, device): + g_in = cv.GMat() + inputs = cv.GInferInputs() + inputs.setInput('data', g_in) + # TODO: It'd be nice to pass dict instead. + # E.g cv.gapi.infer("net", {'data': g_in}) + outputs = cv.gapi.infer(AgeGenderGAPI.tag, inputs) + age_g = outputs.at("age_conv3") + gender_g = outputs.at("prob") + + self.comp = cv.GComputation(cv.GIn(g_in), cv.GOut(age_g, gender_g)) + self.pp = cv.gapi.ov.params(AgeGenderGAPI.tag, \ + model_path, bin_path, device) + + + def apply(self, in_data): + compile_args = cv.gapi.compile_args(cv.gapi.networks(self.pp)) + gapi_age, gapi_gender = self.comp.apply(cv.gin(in_data), compile_args) + gapi_gender = gapi_gender.squeeze() + gapi_age = gapi_age.squeeze() + return gapi_age, gapi_gender + + + class test_gapi_infer_ov(NewOpenCVTests): + + def test_age_gender_infer_image(self): + skip_if_openvino_not_available() + + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.imread(img_path) + + # OpenVINO + def preproc(ppp): + ppp.input().model().set_layout(Layout("NCHW")) + ppp.input().tensor().set_element_type(Type.u8) \ + .set_spatial_static_shape(img.shape[0], img.shape[1]) \ + .set_layout(Layout("NHWC")) + ppp.input().preprocess().resize(ResizeAlgorithm.RESIZE_LINEAR) + + + ref = AgeGenderOV(model_path, bin_path, device_id) + ref.cfgPrePostProcessing(preproc) + ov_age, ov_gender = ref.apply(np.expand_dims(img, 0)) + + # OpenCV G-API (No preproc required) + comp = AgeGenderGAPI(model_path, bin_path, device_id) + gapi_age, gapi_gender = comp.apply(img) + + # Check + self.assertEqual(0.0, cv.norm(ov_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(ov_age, gapi_age, cv.NORM_INF)) + + + def test_age_gender_infer_tensor(self): + skip_if_openvino_not_available() + + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.imread(img_path) + + # Prepare data manually + tensor = cv.resize(img, (62, 62)).astype(np.float32) + tensor = np.transpose(tensor, (2, 0, 1)) + tensor = np.expand_dims(tensor, 0) + + # OpenVINO (No preproce required) + ref = AgeGenderOV(model_path, bin_path, device_id) + ov_age, ov_gender = ref.apply(tensor) + + # OpenCV G-API (No preproc required) + comp = AgeGenderGAPI(model_path, bin_path, device_id) + gapi_age, gapi_gender = comp.apply(tensor) + + # Check + self.assertEqual(0.0, cv.norm(ov_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(ov_age, gapi_age, cv.NORM_INF)) + + + def test_age_gender_infer_batch(self): + skip_if_openvino_not_available() + + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + img_path1 = self.find_file('cv/face/david1.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img_path2 = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img1 = cv.imread(img_path1) + img2 = cv.imread(img_path2) + # img1 and img2 have the same size + batch_img = np.array([img1, img2]) + + # OpenVINO + def preproc(ppp): + ppp.input().model().set_layout(Layout("NCHW")) + ppp.input().tensor().set_element_type(Type.u8) \ + .set_spatial_static_shape(img1.shape[0], img2.shape[1]) \ + .set_layout(Layout("NHWC")) + ppp.input().preprocess().resize(ResizeAlgorithm.RESIZE_LINEAR) + + + ref = AgeGenderOV(model_path, bin_path, device_id) + ref.reshape(PartialShape([2, 3, 62, 62])) + ref.cfgPrePostProcessing(preproc) + ov_age, ov_gender = ref.apply(batch_img) + + # OpenCV G-API + comp = AgeGenderGAPI(model_path, bin_path, device_id) + comp.pp.cfgReshape([2, 3, 62, 62]) \ + .cfgInputModelLayout("NCHW") \ + .cfgInputTensorLayout("NHWC") \ + .cfgResize(cv.INTER_LINEAR) + gapi_age, gapi_gender = comp.apply(batch_img) + + # Check + self.assertEqual(0.0, cv.norm(ov_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(ov_age, gapi_age, cv.NORM_INF)) + + + def test_age_gender_infer_planar(self): + skip_if_openvino_not_available() + + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.imread(img_path) + planar_img = np.transpose(img, (2, 0, 1)) + planar_img = np.expand_dims(planar_img, 0) + + # OpenVINO + def preproc(ppp): + ppp.input().tensor().set_element_type(Type.u8) \ + .set_spatial_static_shape(img.shape[0], img.shape[1]) + ppp.input().preprocess().resize(ResizeAlgorithm.RESIZE_LINEAR) + + + ref = AgeGenderOV(model_path, bin_path, device_id) + ref.cfgPrePostProcessing(preproc) + ov_age, ov_gender = ref.apply(planar_img) + + # OpenCV G-API + comp = AgeGenderGAPI(model_path, bin_path, device_id) + comp.pp.cfgResize(cv.INTER_LINEAR) + gapi_age, gapi_gender = comp.apply(planar_img) + + # Check + self.assertEqual(0.0, cv.norm(ov_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(ov_age, gapi_age, cv.NORM_INF)) + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass + + pass + + +if __name__ == '__main__': + NewOpenCVTests.bootstrap() diff --git a/modules/gapi/perf/common/gapi_core_perf_tests.hpp b/modules/gapi/perf/common/gapi_core_perf_tests.hpp index 60294d2193..6104fea3bf 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests.hpp @@ -62,7 +62,7 @@ namespace opencv_test class InRangePerfTest : public TestPerfParams> {}; class Split3PerfTest : public TestPerfParams> {}; class Split4PerfTest : public TestPerfParams> {}; - class Merge3PerfTest : public TestPerfParams> {}; + class Merge3PerfTest : public TestPerfParams> {}; class Merge4PerfTest : public TestPerfParams> {}; class RemapPerfTest : public TestPerfParams> {}; class FlipPerfTest : public TestPerfParams> {}; diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp index 83ef13008c..3a777cff3d 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp @@ -1577,11 +1577,12 @@ PERF_TEST_P_(Merge3PerfTest, TestPerformance) { compare_f cmpF; cv::Size sz; + MatType type = -1; cv::GCompileArgs compile_args; - std::tie(cmpF, sz, compile_args) = GetParam(); + std::tie(cmpF, sz, type, compile_args) = GetParam(); - initMatsRandU(CV_8UC1, sz, CV_8UC3); - cv::Mat in_mat3(sz, CV_8UC1); + initMatsRandU(type, sz, CV_MAKETYPE(type, 3)); + cv::Mat in_mat3(sz, type); cv::Scalar mean = cv::Scalar::all(127); cv::Scalar stddev = cv::Scalar::all(40.f); cv::randn(in_mat3, mean, stddev); diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp index 2f91e07e52..f8e147973e 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp @@ -252,6 +252,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestCPU, Split4PerfTest, INSTANTIATE_TEST_CASE_P(Merge3PerfTestCPU, Merge3PerfTest, Combine(Values(AbsExact().to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), + Values(CV_8U), Values(cv::compile_args(CORE_CPU)))); INSTANTIATE_TEST_CASE_P(Merge4PerfTestCPU, Merge4PerfTest, diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index 83de793a81..8284896d6c 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -253,6 +253,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestFluid, Split4PerfTest, INSTANTIATE_TEST_CASE_P(Merge3PerfTestFluid, Merge3PerfTest, Combine(Values(AbsExact().to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), + Values(CV_8U, CV_16S, CV_16U, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(Merge4PerfTestFluid, Merge4PerfTest, diff --git a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp index 8aaa304e58..bcc9894d46 100644 --- a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp +++ b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp @@ -242,6 +242,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestGPU, Split4PerfTest, INSTANTIATE_TEST_CASE_P(Merge3PerfTestGPU, Merge3PerfTest, Combine(Values(AbsExact().to_compare_f()), Values( szSmall128, szVGA, sz720p, sz1080p ), + Values(CV_8U), Values(cv::compile_args(CORE_GPU)))); INSTANTIATE_TEST_CASE_P(Merge4PerfTestGPU, Merge4PerfTest, diff --git a/modules/gapi/samples/semantic_segmentation.cpp b/modules/gapi/samples/semantic_segmentation.cpp index fd3ec27750..db358816d1 100644 --- a/modules/gapi/samples/semantic_segmentation.cpp +++ b/modules/gapi/samples/semantic_segmentation.cpp @@ -5,34 +5,41 @@ #include #include +#include +#include + +#include + const std::string keys = "{ h help | | Print this help message }" + "{ desync | false | Desynchronize inference }" "{ input | | Path to the input video file }" "{ output | | Path to the output video file }" "{ ssm | semantic-segmentation-adas-0001.xml | Path to OpenVINO IE semantic segmentation model (.xml) }"; // 20 colors for 20 classes of semantic-segmentation-adas-0001 -const std::vector colors = { - { 128, 64, 128 }, - { 232, 35, 244 }, - { 70, 70, 70 }, - { 156, 102, 102 }, - { 153, 153, 190 }, - { 153, 153, 153 }, - { 30, 170, 250 }, - { 0, 220, 220 }, - { 35, 142, 107 }, - { 152, 251, 152 }, - { 180, 130, 70 }, - { 60, 20, 220 }, - { 0, 0, 255 }, - { 142, 0, 0 }, - { 70, 0, 0 }, - { 100, 60, 0 }, - { 90, 0, 0 }, - { 230, 0, 0 }, - { 32, 11, 119 }, - { 0, 74, 111 }, +static std::vector colors = { + { 0, 0, 0 }, + { 0, 0, 128 }, + { 0, 128, 0 }, + { 0, 128, 128 }, + { 128, 0, 0 }, + { 128, 0, 128 }, + { 128, 128, 0 }, + { 128, 128, 128 }, + { 0, 0, 64 }, + { 0, 0, 192 }, + { 0, 128, 64 }, + { 0, 128, 192 }, + { 128, 0, 64 }, + { 128, 0, 192 }, + { 128, 128, 64 }, + { 128, 128, 192 }, + { 0, 64, 0 }, + { 0, 64, 128 }, + { 0, 192, 0 }, + { 0, 192, 128 }, + { 128, 64, 0 } }; namespace { @@ -43,12 +50,23 @@ std::string get_weights_path(const std::string &model_path) { auto ext = model_path.substr(sz - EXT_LEN); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ - return static_cast(std::tolower(c)); - }); + return static_cast(std::tolower(c)); + }); CV_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } +bool isNumber(const std::string &str) { + return !str.empty() && std::all_of(str.begin(), str.end(), + [](unsigned char ch) { return std::isdigit(ch); }); +} + +std::string toStr(double value) { + std::stringstream ss; + ss << std::fixed << std::setprecision(1) << value; + return ss.str(); +} + void classesToColors(const cv::Mat &out_blob, cv::Mat &mask_img) { const int H = out_blob.size[0]; @@ -97,6 +115,25 @@ void probsToClasses(const cv::Mat& probs, cv::Mat& classes) { } // anonymous namespace +namespace vis { + +static void putText(cv::Mat& mat, const cv::Point &position, const std::string &message) { + auto fontFace = cv::FONT_HERSHEY_COMPLEX; + int thickness = 2; + cv::Scalar color = {200, 10, 10}; + double fontScale = 0.65; + + cv::putText(mat, message, position, fontFace, + fontScale, cv::Scalar(255, 255, 255), thickness + 1); + cv::putText(mat, message, position, fontFace, fontScale, color, thickness); +} + +static void drawResults(cv::Mat &img, const cv::Mat &color_mask) { + img = img / 2 + color_mask / 2; +} + +} // namespace vis + namespace custom { G_API_OP(PostProcessing, , "sample.custom.post_processing") { static cv::GMatDesc outMeta(const cv::GMatDesc &in, const cv::GMatDesc &) { @@ -106,19 +143,34 @@ G_API_OP(PostProcessing, , "sample.custom.post_pro GAPI_OCV_KERNEL(OCVPostProcessing, PostProcessing) { static void run(const cv::Mat &in, const cv::Mat &out_blob, cv::Mat &out) { + int C = -1, H = -1, W = -1; + if (out_blob.size.dims() == 4u) { + C = 1; H = 2, W = 3; + } else if (out_blob.size.dims() == 3u) { + C = 0; H = 1, W = 2; + } else { + throw std::logic_error( + "Number of dimmensions for model output must be 3 or 4!"); + } cv::Mat classes; // NB: If output has more than single plane, it contains probabilities // otherwise class id. - if (out_blob.size[1] > 1) { + if (out_blob.size[C] > 1) { probsToClasses(out_blob, classes); } else { - out_blob.convertTo(classes, CV_8UC1); - classes = classes.reshape(1, out_blob.size[2]); + if (out_blob.depth() != CV_32S) { + throw std::logic_error( + "Single channel output must have integer precision!"); + } + cv::Mat view(out_blob.size[H], // cols + out_blob.size[W], // rows + CV_32SC1, + out_blob.data); + view.convertTo(classes, CV_8UC1); } - cv::Mat mask_img; classesToColors(classes, mask_img); - cv::resize(mask_img, out, in.size()); + cv::resize(mask_img, out, in.size(), 0, 0, cv::INTER_NEAREST); } }; } // namespace custom @@ -134,6 +186,7 @@ int main(int argc, char *argv[]) { const std::string input = cmd.get("input"); const std::string output = cmd.get("output"); const auto model_path = cmd.get("ssm"); + const bool desync = cmd.get("desync"); const auto weights_path = get_weights_path(model_path); const auto device = "CPU"; G_API_NET(SemSegmNet, , "semantic-segmentation"); @@ -145,40 +198,87 @@ int main(int argc, char *argv[]) { // Now build the graph cv::GMat in; - cv::GMat out_blob = cv::gapi::infer(in); - cv::GMat post_proc_out = custom::PostProcessing::on(in, out_blob); - cv::GMat blending_in = in * 0.3f; - cv::GMat blending_out = post_proc_out * 0.7f; - cv::GMat out = blending_in + blending_out; + cv::GMat bgr = cv::gapi::copy(in); + cv::GMat frame = desync ? cv::gapi::streaming::desync(bgr) : bgr; + cv::GMat out_blob = cv::gapi::infer(frame); + cv::GMat out = custom::PostProcessing::on(frame, out_blob); - cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)) - .compileStreaming(cv::compile_args(kernels, networks)); - auto inputs = cv::gin(cv::gapi::wip::make_src(input)); + cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, out)) + .compileStreaming(cv::compile_args(kernels, networks, + cv::gapi::streaming::queue_capacity{1})); + + std::shared_ptr source; + if (isNumber(input)) { + source = std::make_shared( + std::stoi(input), + std::map { + {cv::CAP_PROP_FRAME_WIDTH, 1280}, + {cv::CAP_PROP_FRAME_HEIGHT, 720}, + {cv::CAP_PROP_BUFFERSIZE, 1}, + {cv::CAP_PROP_AUTOFOCUS, true} + } + ); + } else { + source = std::make_shared(input); + } + auto inputs = cv::gin( + static_cast(source)); // The execution part pipeline.setSource(std::move(inputs)); - cv::VideoWriter writer; cv::TickMeter tm; - cv::Mat outMat; + cv::VideoWriter writer; + + cv::util::optional color_mask; + cv::util::optional image; + cv::Mat last_image; + cv::Mat last_color_mask; - std::size_t frames = 0u; - tm.start(); pipeline.start(); - while (pipeline.pull(cv::gout(outMat))) { - ++frames; - cv::imshow("Out", outMat); - cv::waitKey(1); - if (!output.empty()) { - if (!writer.isOpened()) { - const auto sz = cv::Size{outMat.cols, outMat.rows}; - writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); - CV_Assert(writer.isOpened()); + tm.start(); + + std::size_t frames = 0u; + std::size_t masks = 0u; + while (pipeline.pull(cv::gout(image, color_mask))) { + if (image.has_value()) { + ++frames; + last_image = std::move(*image); + } + + if (color_mask.has_value()) { + ++masks; + last_color_mask = std::move(*color_mask); + } + + if (!last_image.empty() && !last_color_mask.empty()) { + tm.stop(); + + std::string stream_fps = "Stream FPS: " + toStr(frames / tm.getTimeSec()); + std::string inference_fps = "Inference FPS: " + toStr(masks / tm.getTimeSec()); + + cv::Mat tmp = last_image.clone(); + + vis::drawResults(tmp, last_color_mask); + vis::putText(tmp, {10, 22}, stream_fps); + vis::putText(tmp, {10, 22 + 30}, inference_fps); + + cv::imshow("Out", tmp); + cv::waitKey(1); + if (!output.empty()) { + if (!writer.isOpened()) { + const auto sz = cv::Size{tmp.cols, tmp.rows}; + writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); + CV_Assert(writer.isOpened()); + } + writer << tmp; } - writer << outMat; + + tm.start(); } } tm.stop(); - std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl; + std::cout << "Processed " << frames << " frames" << " (" + << frames / tm.getTimeSec()<< " FPS)" << std::endl; return 0; } diff --git a/modules/gapi/src/api/gmat.cpp b/modules/gapi/src/api/gmat.cpp index 47a246c293..03f2e736be 100644 --- a/modules/gapi/src/api/gmat.cpp +++ b/modules/gapi/src/api/gmat.cpp @@ -153,10 +153,18 @@ std::ostream& operator<<(std::ostream& os, const cv::GMatDesc &desc) break; } - os << "C" << desc.chan; - if (desc.planar) os << "p"; - os << " "; - os << desc.size.width << "x" << desc.size.height; + if (desc.isND()) { + os << " ["; + for (size_t i = 0; i < desc.dims.size() - 1; ++i) { + os << desc.dims[i] << "x"; + } + os << desc.dims.back() << "]"; + } else { + os << "C" << desc.chan; + if (desc.planar) os << "p"; + os << " "; + os << desc.size.width << "x" << desc.size.height; + } return os; } diff --git a/modules/gapi/src/backends/common/gbackend.hpp b/modules/gapi/src/backends/common/gbackend.hpp index b05d8e2c36..794af94d3a 100644 --- a/modules/gapi/src/backends/common/gbackend.hpp +++ b/modules/gapi/src/backends/common/gbackend.hpp @@ -227,6 +227,12 @@ inline void convertInt64ToInt32(const int64_t* src, int* dst, size_t size) [](int64_t el) { return static_cast(el); }); } +inline void convertInt32ToInt64(const int* src, int64_t* dst, size_t size) +{ + std::transform(src, src + size, dst, + [](int el) { return static_cast(el); }); +} + }} // cv::gimpl #endif // OPENCV_GAPI_GBACKEND_HPP diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 7a8f1f5ed8..c2686c7bd3 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -2320,12 +2320,15 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3) { + GAPI_Assert((src.meta().depth == CV_8U) && (dst1.meta().depth == CV_8U) && + (dst2.meta().depth == CV_8U) && (dst3.meta().depth == CV_8U) && + (3 == src.meta().chan)); + const auto *in = src.InLine(0); auto *out1 = dst1.OutLine(); auto *out2 = dst2.OutLine(); auto *out3 = dst3.OutLine(); - GAPI_Assert(3 == src.meta().chan); int width = src.length(); int w = 0; @@ -2348,13 +2351,16 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4) { + GAPI_Assert((src.meta().depth == CV_8U) && (dst1.meta().depth == CV_8U) && + (dst2.meta().depth == CV_8U) && (dst3.meta().depth == CV_8U) && + (dst4.meta().depth == CV_8U) && (4 == src.meta().chan)); + const auto *in = src.InLine(0); auto *out1 = dst1.OutLine(); auto *out2 = dst2.OutLine(); auto *out3 = dst3.OutLine(); auto *out4 = dst4.OutLine(); - GAPI_Assert(4 == src.meta().chan); int width = src.length(); int w = 0; @@ -2372,31 +2378,46 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false) } }; +template +CV_ALWAYS_INLINE void run_merge3(Buffer& dst, const View& src1, const View& src2, const View& src3) +{ + const auto* in1 = src1.InLine(0); + const auto* in2 = src2.InLine(0); + const auto* in3 = src3.InLine(0); + auto* out = dst.OutLine(); + + int width = dst.length(); + int w = 0; + +#if CV_SIMD + w = merge3_simd(in1, in2, in3, out, width); +#endif + + for (; w < width; w++) + { + out[3 * w] = in1[w]; + out[3 * w + 1] = in2[w]; + out[3 * w + 2] = in3[w]; + } +} + GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false) { static const int Window = 1; - static void run(const View &src1, const View &src2, const View &src3, Buffer &dst) + static void run(const View& src1, const View& src2, const View& src3, Buffer& dst) { - const auto *in1 = src1.InLine(0); - const auto *in2 = src2.InLine(0); - const auto *in3 = src3.InLine(0); - auto *out = dst.OutLine(); - - GAPI_Assert(3 == dst.meta().chan); - int width = dst.length(); - int w = 0; + GAPI_Assert((src1.meta().depth == dst.meta().depth) && + (src1.meta().depth == src2.meta().depth) && + (src1.meta().depth == src3.meta().depth)); - #if CV_SIMD - w = merge3_simd(in1, in2, in3, out, width); - #endif + // SRC/DST TYPE OP __VA_ARGS__ + MERGE3_(uchar, run_merge3, dst, src1, src2, src3); + MERGE3_(ushort, run_merge3, dst, src1, src2, src3); + MERGE3_(short, run_merge3, dst, src1, src2, src3); + MERGE3_(float, run_merge3, dst, src1, src2, src3); - for (; w < width; w++) - { - out[3*w ] = in1[w]; - out[3*w + 1] = in2[w]; - out[3*w + 2] = in3[w]; - } + CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } }; @@ -2407,13 +2428,16 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false) static void run(const View &src1, const View &src2, const View &src3, const View &src4, Buffer &dst) { + GAPI_Assert((dst.meta().depth == CV_8U) && (src1.meta().depth == CV_8U) && + (src2.meta().depth == CV_8U) && (src3.meta().depth == CV_8U) && + (4 == dst.meta().chan)); + const auto *in1 = src1.InLine(0); const auto *in2 = src2.InLine(0); const auto *in3 = src3.InLine(0); const auto *in4 = src4.InLine(0); auto *out = dst.OutLine(); - GAPI_Assert(4 == dst.meta().chan); int width = dst.length(); int w = 0; // cycle counter diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index 6171bff802..05d3417024 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -277,13 +277,21 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], CV_CPU_DISPATCH_MODES_ALL); } -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width) -{ - CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width), - CV_CPU_DISPATCH_MODES_ALL); +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width) \ +{ \ + CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width), \ + CV_CPU_DISPATCH_MODES_ALL); \ } +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD + int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width) { diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index aec03c0b50..0511f4e095 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -216,8 +216,16 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[], int split4_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], uchar out4[], const int width); -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width); +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width); + +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width); diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index d1fe33fa2e..aed0ee97d8 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -322,12 +322,21 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[], int split4_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], uchar out4[], const int width); -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width); +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width); + +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width); + #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #define SRC_SHORT_OR_USHORT std::is_same::value || std::is_same::value @@ -2530,33 +2539,41 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], // //------------------------- -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width) -{ - constexpr int nlanes = v_uint8::nlanes; - if (width < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= width - nlanes; x += nlanes) - { - v_uint8 a, b, c; - a = vx_load(&in1[x]); - b = vx_load(&in2[x]); - c = vx_load(&in3[x]); - v_store_interleave(&out[3 * x], a, b, c); - } - if (x < width) - { - x = width - nlanes; - continue; - } - break; - } - return x; -} +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + if (width < nlanes) \ + return 0; \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= width - nlanes; x += nlanes) \ + { \ + vector_type_of_t a, b, c; \ + a = vx_load(&in1[x]); \ + b = vx_load(&in2[x]); \ + c = vx_load(&in3[x]); \ + v_store_interleave(&out[3 * x], a, b, c); \ + } \ + if (x < width) \ + { \ + x = width - nlanes; \ + continue; \ + } \ + break; \ + } \ + return x; \ +} + +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD //------------------------- // @@ -2926,6 +2943,8 @@ CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const SRC* inx, float* outx) int convertto_simd(const SRC in[], DST out[], const int length) \ { \ constexpr int nlanes = vector_type_of_t::nlanes; \ + if (length < nlanes) \ + return 0; \ \ int x = 0; \ for (;;) \ @@ -3093,6 +3112,9 @@ int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ const float beta, const int length) \ { \ constexpr int nlanes = vector_type_of_t::nlanes; \ + if (length < nlanes) \ + return 0; \ + \ v_float32 v_alpha = vx_setall_f32(alpha); \ v_float32 v_beta = vx_setall_f32(beta); \ \ diff --git a/modules/gapi/src/backends/fluid/gfluidutils.hpp b/modules/gapi/src/backends/fluid/gfluidutils.hpp index 4da16f2dee..f7eff8d3b8 100644 --- a/modules/gapi/src/backends/fluid/gfluidutils.hpp +++ b/modules/gapi/src/backends/fluid/gfluidutils.hpp @@ -86,6 +86,23 @@ using cv::gapi::own::rintd; return; \ } +#define MERGE3_(T, OP, ...) \ + if (cv::DataType::depth == dst.meta().depth && \ + cv::DataType::depth == src1.meta().depth) \ + { \ + GAPI_DbgAssert(dst.length() == src1.length()); \ + GAPI_DbgAssert(dst.length() == src2.length()); \ + GAPI_DbgAssert(dst.length() == src3.length()); \ + \ + GAPI_DbgAssert(1 == src1.meta().chan); \ + GAPI_DbgAssert(1 == src2.meta().chan); \ + GAPI_DbgAssert(1 == src3.meta().chan); \ + GAPI_DbgAssert(3 == dst.meta().chan); \ + \ + OP(__VA_ARGS__); \ + return; \ + } + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 70c53ae295..a257c21252 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation #include "precomp.hpp" @@ -71,6 +71,28 @@ namespace IE = InferenceEngine; namespace { +IE::Layout toIE(const std::string &layout) { + const std::unordered_map layouts = { + {"NCDHW", IE::Layout::NCDHW}, + {"NDHWC", IE::Layout::NDHWC}, + {"NHWC" , IE::Layout::NHWC }, + {"NCHW" , IE::Layout::NCHW }, + {"CHW" , IE::Layout::CHW }, + {"HWC" , IE::Layout::HWC }, + {"HW" , IE::Layout::HW }, + {"NC" , IE::Layout::NC }, + {"CN" , IE::Layout::CN }, + {"C" , IE::Layout::C }, + }; + + const auto it = layouts.find(layout); + if (it == layouts.end()) { + cv::util::throw_error( + std::logic_error("IE Backend: Unsupported layout: " + layout)); + } + return it->second; +}; + inline IE::ROI toIE(const cv::Rect &rc) { return IE::ROI { 0u @@ -130,11 +152,90 @@ inline int toCV(IE::Precision prec) { return -1; } +inline IE::ResizeAlgorithm toIEInterp(int interpolation) { + switch (interpolation) { + case cv::INTER_LINEAR: return IE::RESIZE_BILINEAR; + case cv::INTER_AREA: return IE::RESIZE_AREA; + default: GAPI_Error("IE Backend: Unsupported resize algorithm"); + } + // Unreachable code + GAPI_Assert(false); +} + +template +using AttrMap = cv::gapi::ie::detail::AttrMap; + +template +using LayerVariantAttr = cv::gapi::ie::detail::LayerVariantAttr; + +template AttrMap +broadcastLayerAttr(const LayerVariantAttr &layer_attr, + const std::vector &layer_names) { + AttrMap map; + if (cv::util::holds_alternative>(layer_attr)) { + map = cv::util::get>(layer_attr); + // NB: Validate map: + std::unordered_set existing_layers = + {layer_names.begin(), layer_names.end()}; + + for (const auto &p : map) { + const auto it = existing_layers.find(p.first); + if (it == existing_layers.end()) { + cv::util::throw_error( + std::logic_error("IE Backend: Failed to" + " find layer with name: " + p.first)); + } + } + } else if (cv::util::holds_alternative(layer_attr)) { + // NB: Broadcast value to all layers. + auto elem = cv::util::get(layer_attr); + for (auto &&layer_name : layer_names) { + map.emplace(layer_name, elem); + } + } + return map; +} + +// TODO: Move it to some common place +template +cv::optional lookUp(const std::map &map, const K& key) { + const auto it = map.find(key); + if (it == map.end()) { + return {}; + } + return cv::util::make_optional(std::move(it->second)); +} + +static bool isImage(const cv::GMatDesc &desc, + const IE::SizeVector &model_dims) { + return (model_dims.size() == 4u) && + (!desc.isND()) /* dims == 2 */ && + (desc.chan == 1 || desc.chan == 3) && + (desc.size.height != 1 && desc.size.width != 1) && + (desc.depth == CV_8U); +} + +cv::gapi::ie::TraitAs clarifyTrait(const cv::GMatDesc &mat_desc, + const IE::SizeVector &model_dims) { + if (isImage(mat_desc, model_dims)) { + return cv::gapi::ie::TraitAs::IMAGE; + } + return cv::gapi::ie::TraitAs::TENSOR; +} + +cv::gapi::ie::TraitAs clarifyTrait(const cv::GMetaArg &meta, + const IE::SizeVector &model_dims) { + // NB: All media formats: BGR, NV12, Gray + // are traited as image. + if (cv::util::holds_alternative(meta)) { + return cv::gapi::ie::TraitAs::IMAGE; + } + GAPI_Assert(cv::util::holds_alternative(meta)); + return clarifyTrait(cv::util::get(meta), model_dims); +} + inline IE::TensorDesc toIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { const auto &sz = mat.size; - // NB: For some reason RGB image is 2D image - // (since channel component is not counted here). - // Note: regular 2D vectors also fall into this category if (sz.dims() == 2 && hint == cv::gapi::ie::TraitAs::IMAGE) { // NB: This logic is mainly taken from IE samples @@ -155,8 +256,72 @@ inline IE::TensorDesc toIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { return IE::TensorDesc(toIE(mat.depth()), toIE(sz), toIELayout(sz.dims())); } -inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { - const auto tDesc = toIE(mat, hint); +// NB: Inference dimmensions always follow NCDHW order +// even though the real layout is different. +// E.g if user provided Mat({1, 240, 320, 3}, CV_8U) + NHWC layout +// need to create Blob(U8, {1, 3, 240, 320}, NHWC). +inline IE::SizeVector toIEDims(const IE::SizeVector &dims, + const IE::Layout layout) { + switch (layout) { + case IE::Layout::NDHWC: // NCDHW + return {dims[0], dims[4], dims[1], dims[2], dims[3]}; + case IE::Layout::NHWC: // NCHW + return {dims[0], dims[3], dims[1], dims[2]}; + case IE::Layout::HWC: // CHW + return {dims[2], dims[0], dims[1]}; + default: return dims; + } + GAPI_Assert(false); +} + +// NB: Inference dimmensions always follow NCDHW order +// even though the real layout is different. +// E.g if U8 blob has {1, 3, 240, 320} dims and NHWC layout +// need to create cv::Mat({1, 240, 320, 3}, CV_8U); +inline std::vector toCVDims(const std::vector &dims, + const IE::Layout layout) { + switch (layout) { + case IE::Layout::NDHWC: // NCDHW + return {dims[0], dims[2], dims[3], dims[4], dims[1]}; + case IE::Layout::NHWC: // NCHW + return {dims[0], dims[2], dims[3], dims[1]}; + case IE::Layout::HWC: // CHW + return {dims[1], dims[2], dims[0]}; + default: return dims; + } + GAPI_Assert(false); +} + +inline IE::TensorDesc toIE(const cv::Mat &mat, + const cv::gapi::ie::TraitAs hint, + const IE::Layout layout) { + const auto &sz = mat.size; + if (sz.dims() == 2 && hint == cv::gapi::ie::TraitAs::IMAGE) + { + // NB: This logic is mainly taken from IE samples + const size_t channels = mat.channels(); + const size_t height = mat.size().height; + const size_t width = mat.size().width; + + const size_t strideH = mat.step1(); + IE::BlockingDesc bdesc({1, height, width, channels} /* blocking dims */, + {0, 2, 3, 1} /* order for NHWC */, + 0 /* offset */, + {0, 0, 0, 0} /* offsets for dims */, + {strideH * height, strideH, channels, 1} /* strides for dims */); + + return IE::TensorDesc(toIE(mat.depth()), + IE::SizeVector{1, channels, height, width}, bdesc); + } + return IE::TensorDesc(toIE(mat.depth()), + toIEDims(toIE(sz), layout), + layout); +} + +inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, + cv::gapi::ie::TraitAs hint, + const IE::Layout layout = IE::Layout::ANY) { + const auto tDesc = toIE(mat, hint, layout); switch (mat.depth()) { // NB: Seems there's no way to create an untyped (T-less) Blob::Ptr // in IE given only precision via TensorDesc. So we have to do this: @@ -303,6 +468,7 @@ struct IEUnit { }; InputFramesDesc net_input_params; + std::unordered_map inputs_type; explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp) : params(pp) { @@ -481,6 +647,8 @@ public: cv::GRunArgP output (std::size_t idx); cv::Mat& outMatR(std::size_t idx); + cv::gapi::ie::TraitAs getInputType(const std::string &layer_name) const; + const IEUnit &uu; cv::gimpl::GIslandExecutable::IOutput &out; @@ -524,6 +692,9 @@ private: // keep alive preprocessed frames std::mutex keep_alive_frames_mutex; std::unordered_map keep_alive_pp_frames; + + // NB: Hint to wrap input data properly into IE::Blob (see: wrapIE) + std::unordered_map input_type; }; IECallContext::IECallContext(const IEUnit & unit, @@ -558,6 +729,16 @@ IECallContext::IECallContext(const IEUnit & } } +cv::gapi::ie::TraitAs +IECallContext::getInputType(const std::string &layer_name) const { + const auto it = uu.inputs_type.find(layer_name); + if (it == uu.inputs_type.end()) { + cv::util::throw_error(std::logic_error( + "Failed to find input type for layer: \"" + layer_name + "\"")); + } + return it->second; +} + const cv::GArgs& IECallContext::inArgs() const { return m_args; } @@ -732,7 +913,8 @@ cv::MediaFrame preprocess_frame_impl(cv::MediaFrame &&in_frame, const std::strin inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, - cv::gapi::ie::TraitAs hint, + const cv::gapi::ie::TraitAs hint, + const IE::Layout &layout, const std::string& layer_name, const cv::util::optional &opt_roi, cv::MediaFrame* out_keep_alive_frame = nullptr, @@ -780,7 +962,7 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, return wrapIE(*(ctx.views.back()), frame.desc()); } case cv::GShape::GMAT: { - return wrapIE(ctx.inMat(i), hint); + return wrapIE(ctx.inMat(i), hint, layout); } default: GAPI_Assert("Unsupported input shape for IE backend"); @@ -788,7 +970,6 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, GAPI_Error("InternalError"); } - static void setBlob(InferenceEngine::InferRequest& req, const std::string& layer_name, const IE::Blob::Ptr& blob, @@ -1162,55 +1343,109 @@ static void configureInputReshapeByImage(const IE::InputInfo::Ptr& ii, input_reshape_table.emplace(layer_name, input_dims); } -static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) { +static void cfgInputPrecision(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) { switch (mm.index()) { - case cv::GMetaArg::index_of(): - { - ii->setPrecision(toIE(util::get(mm).depth)); + case cv::GMetaArg::index_of(): { + const auto &desc = util::get(mm); + ii->setPrecision(toIE(desc.depth)); break; } case cv::GMetaArg::index_of(): - { - const auto &meta = util::get(mm); - switch (meta.fmt) { - case cv::MediaFormat::NV12: - ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); - break; - case cv::MediaFormat::BGR: - // NB: Do nothing - break; - case cv::MediaFormat::GRAY: - // NB: Do nothing - break; - default: - GAPI_Error("Unsupported media format for IE backend"); - } ii->setPrecision(toIE(CV_8U)); break; - } default: util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); } } -static bool isApplicableForResize(const IE::TensorDesc& desc) { - const auto layout = desc.getLayout(); - const auto prec = desc.getPrecision(); - return (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) && - (prec == IE::Precision::FP32 || prec == IE::Precision::U8); +static void cfgImagePreprocessing(const IE::InputInfo::Ptr &ii, + const cv::GMetaArg &mm, + const IE::ResizeAlgorithm interp) { + if (!cv::util::holds_alternative(mm) && + !cv::util::holds_alternative(mm)) { + util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); + } + + ii->getPreProcess().setResizeAlgorithm(interp); + if (cv::util::holds_alternative(mm)) { + const auto &meta = util::get(mm); + if (meta.fmt == cv::MediaFormat::NV12) { + ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); + } + } } -static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, - const cv::GMetaArg& mm) { +// NB: This function is used in order to configure +// preprocessing for "Load" case networks. +static void cfgInputPreprocessing(const cv::gapi::ie::TraitAs trait, + const IE::InputInfo::Ptr &ii, + const cv::GMetaArg &mm, + const std::string &layer_name, + const AttrMap &layout_map, + const AttrMap &interp_map) { + cfgInputPrecision(ii, mm); + const auto explicit_input_layout = lookUp(layout_map, layer_name); + const auto explicit_resize = lookUp(interp_map, layer_name); + if (trait == cv::gapi::ie::TraitAs::IMAGE) { + // NB: Image case - preprocessing is configured automatically. + GAPI_LOG_DEBUG(NULL, "IE Backend: Input: \"" << + layer_name << " " << mm << "\" is image."); + // NB: BlockingDesc is used instead (see wrapIE) + if (explicit_input_layout) { + util::throw_error(std::logic_error("Input data provided for layer: \"" + + layer_name + "\" is recognized as \"image\". Explicitly" + + " specified layout is prohibited.")); + } + const auto interp = explicit_resize ? toIEInterp(*explicit_resize) + : IE::RESIZE_BILINEAR; + cfgImagePreprocessing(ii, mm, interp); + } else { + // NB: Tensor case - preprocessing is configured only if user asked. + GAPI_LOG_DEBUG(NULL, "IE Backend: Input: \"" << + layer_name << "\" " << mm << " is tensor."); + if (explicit_input_layout) { + GAPI_LOG_DEBUG(NULL, "IE Backend: Set input layout \"" << + *explicit_input_layout << "\" for layer \"" << layer_name << "\""); + ii->setLayout(toIE(*explicit_input_layout)); + } + if (explicit_resize) { + GAPI_LOG_DEBUG(NULL, "IE Backend: Set resize for layer \"" << layer_name << "\""); + ii->getPreProcess().setResizeAlgorithm(toIEInterp(*explicit_resize)); + } + } +} + +static IE::PreProcessInfo createImagePreProcInfo(const cv::GMetaArg &mm, + const IE::ResizeAlgorithm interp) { + if (!cv::util::holds_alternative(mm) && + !cv::util::holds_alternative(mm)) { + util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); + } IE::PreProcessInfo info; + info.setResizeAlgorithm(interp); if (cv::util::holds_alternative(mm)) { - auto desc = cv::util::get(mm); - if (desc.fmt == cv::MediaFormat::NV12) { + const auto &meta = util::get(mm); + if (meta.fmt == cv::MediaFormat::NV12) { info.setColorFormat(IE::ColorFormat::NV12); } } - if (isApplicableForResize(ii->getTensorDesc())) { - info.setResizeAlgorithm(IE::RESIZE_BILINEAR); + return info; +} + +// NB: This function is used in order to create +// preprocessing for "Import" case networks. +static IE::PreProcessInfo createPreProcInfo(const cv::gapi::ie::TraitAs trait, + const cv::GMetaArg& mm, + const cv::optional explicit_resize) { + if (trait == cv::gapi::ie::TraitAs::IMAGE) { + const auto interp = explicit_resize ? toIEInterp(*explicit_resize) + : IE::RESIZE_BILINEAR; + return createImagePreProcInfo(mm, interp); + } + // NB: In case "tensor" only resize can't be spefied for "import" models. + IE::PreProcessInfo info; + if (explicit_resize) { + info.setResizeAlgorithm(toIEInterp(*explicit_resize)); } return info; } @@ -1237,6 +1472,13 @@ static void configureOutputPrecision(const IE::OutputsDataMap &outputs ); } +static void configureOutputLayout(const IE::OutputsDataMap &outputs_info, + const AttrMap &output_layout) { + for (const auto it : output_layout) { + outputs_info.at(it.first)->setLayout(toIE(it.second)); + } +} + // NB: This is a callback used by async infer // to post outputs blobs (cv::GMat's). static void PostOutputs(InferenceEngine::InferRequest &request, @@ -1356,6 +1598,10 @@ struct Infer: public cv::detail::KernelTag { GAPI_Assert(uu.params.input_names.size() == in_metas.size() && "Known input layers count doesn't match input meta count"); + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); // NB: Configuring input/output precision and network reshape must be done // only in the loadNetwork case. using namespace cv::gapi::ie::detail; @@ -1365,25 +1611,24 @@ struct Infer: public cv::detail::KernelTag { ade::util::toRange(in_metas))) { const auto &input_name = std::get<0>(it); auto ii = inputs.at(input_name); - const auto & mm = std::get<1>(it); + const auto &mm = std::get<1>(it); - configureInputInfo(ii, mm); if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - - if (isApplicableForResize(ii->getTensorDesc())) { - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } - + const auto trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); + // FIXME: This is the only place where information about input type + // can be stored for the futher execution. + const_cast(uu).inputs_type.emplace(input_name, trait); + cfgInputPreprocessing(trait, ii, mm, input_name, + input_layout, interpolation); // NB: configure input param for further preproc if (uu.net_input_params.is_applicable(mm)) { const_cast(uu.net_input_params) .set_param(input_name, ii->getTensorDesc()); } } - for (auto &&p : uu.params.const_inputs) { const auto ii = inputs.at(p.first); ii->setPrecision(toIE(p.second.first.depth())); @@ -1395,6 +1640,10 @@ struct Infer: public cv::detail::KernelTag { if (!input_reshape_table.empty()) { const_cast(&uu.net)->reshape(input_reshape_table); } + + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == ParamDesc::Kind::Import); @@ -1406,7 +1655,13 @@ struct Infer: public cv::detail::KernelTag { const auto &input_name = std::get<0>(it); auto ii = inputs.at(input_name); const auto & mm = std::get<1>(it); - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + const auto trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); + // FIXME: This is the only place where information about input type + // can be stored for the futher execution. + const_cast(uu).inputs_type.emplace(input_name, trait); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(trait, mm, explicit_resize)); // NB: configure input param for further preproc if (uu.net_input_params.is_applicable(mm)) { @@ -1428,7 +1683,7 @@ struct Infer: public cv::detail::KernelTag { : uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); cv::GMatDesc outm(toCV(desc.getPrecision()), - toCV(desc.getDims())); + toCVDims(toCV(desc.getDims()), desc.getLayout())); result.emplace_back(outm); } return result; @@ -1444,15 +1699,10 @@ struct Infer: public cv::detail::KernelTag { // - assumes all inputs/outputs are always Mats for (auto i : ade::util::iota(ctx->uu.params.num_in)) { const auto& layer_name = ctx->uu.params.input_names[i]; - auto layout = - ctx->uu.this_network.GetInputsInfo(). - at(layer_name)->getTensorDesc().getLayout(); - auto hint = - (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) - ? cv::gapi::ie::TraitAs::IMAGE : cv::gapi::ie::TraitAs::TENSOR; - + const auto hint = ctx->getInputType(layer_name); + const auto layout = req.GetBlob(layer_name)->getTensorDesc().getLayout(); IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint, - layer_name, + layout, layer_name, cv::util::optional{}); setBlob(req, layer_name, this_blob, *ctx); } @@ -1485,20 +1735,43 @@ struct InferROI: public cv::detail::KernelTag { const auto &input_name = uu.params.input_names.at(0); auto &&mm = in_metas.at(1u); + const auto &tensor_desc = + (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) + ? uu.net.getInputsInfo().at(input_name)->getTensorDesc() + : uu.this_network.GetInputsInfo().at(input_name)->getTensorDesc(); + + if (cv::util::holds_alternative(mm) || + cv::util::holds_alternative(mm)) { + const auto trait = clarifyTrait(mm, tensor_desc.getDims()); + if (trait != cv::gapi::ie::TraitAs::IMAGE) { + util::throw_error(std::runtime_error( + "IE Backend: Only image is supported" + " as the 1th argument for InferROI")); + } + } else { + util::throw_error(std::runtime_error( + "IE Backend: Unsupported input meta for" + " 1th argument for InferROI")); + } + // NB: Configuring input precision and network reshape must be done // only in the loadNetwork case. + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); + const auto trait = cv::gapi::ie::TraitAs::IMAGE; if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { // 0th is ROI, 1st is input image auto inputs = uu.net.getInputsInfo(); auto ii = inputs.at(input_name); - configureInputInfo(ii, mm); + if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - if (isApplicableForResize(ii->getTensorDesc())) { - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } + cfgInputPreprocessing(trait, ii, mm, input_name, + input_layout, interpolation); // FIXME: This isn't the best place to call reshape function. // Сorrect solution would be to do this in compile() method of network, @@ -1517,6 +1790,9 @@ struct InferROI: public cv::detail::KernelTag { inputs.at(p.first)->setPrecision(toIE(p.second.first.depth())); } + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); @@ -1524,7 +1800,9 @@ struct InferROI: public cv::detail::KernelTag { // FIXME: This isn't the best place to collect PreProcMap. auto* non_const_prepm = const_cast(&uu.preproc_map); auto ii = inputs.at(input_name); - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(trait, mm, explicit_resize)); // NB: configure intput param for further preproc if (uu.net_input_params.is_applicable(mm)) { @@ -1545,7 +1823,7 @@ struct InferROI: public cv::detail::KernelTag { : uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); cv::GMatDesc outm(toCV(desc.getPrecision()), - toCV(desc.getDims())); + toCVDims(toCV(desc.getDims()), desc.getLayout())); result.emplace_back(outm); } return result; @@ -1568,6 +1846,7 @@ struct InferROI: public cv::detail::KernelTag { bool preprocessed = false; IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + IE::Layout::ANY, *(ctx->uu.params.input_names.begin()), cv::util::make_optional(this_roi), slot_ptr, &preprocessed); @@ -1613,20 +1892,31 @@ struct InferList: public cv::detail::KernelTag { // NB: Configuring input precision and network reshape must be done // only in the loadNetwork case. + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { std::size_t idx = 1u; auto inputs = uu.net.getInputsInfo(); for (auto &&input_name : uu.params.input_names) { auto ii = inputs.at(input_name); const auto & mm = in_metas[idx++]; - configureInputInfo(ii, mm); + + // NB: InferList expects the input starts with index 1 wil be the images. + const auto input_trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); + if (input_trait != cv::gapi::ie::TraitAs::IMAGE) { + util::throw_error(std::runtime_error( + "IE Backend: Only image is supported" + " as the " + std::to_string(idx) + "th argument for InferList")); + } + if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - if (isApplicableForResize(ii->getTensorDesc())) { - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } + cfgInputPreprocessing(input_trait, ii, mm, + input_name, input_layout, interpolation); } // FIXME: This isn't the best place to call reshape function. @@ -1641,6 +1931,9 @@ struct InferList: public cv::detail::KernelTag { ii->setPrecision(toIE(p.second.first.depth())); } + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); @@ -1650,7 +1943,18 @@ struct InferList: public cv::detail::KernelTag { for (auto &&input_name : uu.params.input_names) { auto ii = inputs.at(input_name); const auto & mm = in_metas[idx++]; - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + + // NB: InferList expects the input starts with index 1 wil be the images. + const auto input_trait = clarifyTrait(mm, ii->getTensorDesc().getDims()); + if (input_trait != cv::gapi::ie::TraitAs::IMAGE) { + util::throw_error(std::runtime_error( + "IE Backend: Only image is supported" + " as the " + std::to_string(idx) + "th argument for InferList")); + } + + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(input_trait, mm, explicit_resize)); } } @@ -1678,6 +1982,7 @@ struct InferList: public cv::detail::KernelTag { // NB: This blob will be used to make roi from its, so // it should be treated as image IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + IE::Layout::ANY, ctx->uu.params.input_names[0u], cv::util::optional{}); @@ -1688,7 +1993,7 @@ struct InferList: public cv::detail::KernelTag { ctx->uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load ? ctx->uu.net.getOutputsInfo().at(out_name)->getTensorDesc() : ctx->uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); - cached_dims[i] = toCV(desc.getDims()); + cached_dims[i] = toCVDims(toCV(desc.getDims()), desc.getLayout()); // FIXME: Isn't this should be done automatically // by some resetInternalData(), etc? (Probably at the GExecutor level) auto& out_vec = ctx->outVecR(i); @@ -1744,51 +2049,52 @@ struct InferList2: public cv::detail::KernelTag { // "blob"-based ones) // FIXME: this is filtering not done, actually! GArrayDesc has // no hint for its underlying type! + + const auto &input_name_0 = uu.params.input_names.front(); const auto &mm_0 = in_metas[0u]; - switch (in_metas[0u].index()) { - case cv::GMetaArg::index_of(): { - const auto &meta_0 = util::get(mm_0); - GAPI_Assert( !meta_0.isND() - && !meta_0.planar - && "Only images are supported as the 0th argument"); - break; - } - case cv::GMetaArg::index_of(): { - // FIXME: Is there any validation for GFrame ? - break; + const auto &tensor_desc_0 = + (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) + ? uu.net.getInputsInfo().at(input_name_0)->getTensorDesc() + : uu.this_network.GetInputsInfo().at(input_name_0)->getTensorDesc(); + + if (cv::util::holds_alternative(mm_0) || + cv::util::holds_alternative(mm_0)) { + const auto trait = clarifyTrait(mm_0, tensor_desc_0.getDims()); + if (trait != cv::gapi::ie::TraitAs::IMAGE) { + util::throw_error(std::runtime_error( + "IE Backend: Only images is" + " supported as the 0th argument")); } - default: - util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); - } - - if (util::holds_alternative(mm_0)) { - const auto &meta_0 = util::get(mm_0); - GAPI_Assert( !meta_0.isND() - && !meta_0.planar - && "Only images are supported as the 0th argument"); + } else { + util::throw_error(std::runtime_error( + "IE Backend: Unsupported input meta" + " for 0th argument in IE backend")); } std::size_t idx = 1u; + const auto input_layout = broadcastLayerAttr(uu.params.input_layout, + uu.params.input_names); + const auto interpolation = broadcastLayerAttr(uu.params.interpolation, + uu.params.input_names); for (auto &&input_name : uu.params.input_names) { const auto &mm = in_metas[idx]; GAPI_Assert(util::holds_alternative(mm) && "Non-array inputs are not supported"); if (op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_RECT) { + const auto input_trait = cv::gapi::ie::TraitAs::IMAGE; // NB: Configuring input precision and network reshape must be done // only in the loadNetwork case. if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { auto inputs = uu.net.getInputsInfo(); // This is a cv::Rect -- configure the IE preprocessing auto ii = inputs.at(input_name); - configureInputInfo(ii, mm_0); if (uu.params.layer_names_to_reshape.find(input_name) != uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm_0, input_reshape_table); } - if (isApplicableForResize(ii->getTensorDesc())) { - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } + cfgInputPreprocessing(input_trait, ii, mm_0, + input_name, input_layout, interpolation); for (auto &&p : uu.params.const_inputs) { inputs.at(p.first)->setPrecision(toIE(p.second.first.depth())); @@ -1800,19 +2106,32 @@ struct InferList2: public cv::detail::KernelTag { if (!input_reshape_table.empty()) { const_cast(&uu.net)->reshape(input_reshape_table); } + const auto output_layout = broadcastLayerAttr(uu.params.output_layout, + uu.params.output_names); + configureOutputLayout(uu.net.getOutputsInfo(), output_layout); configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); auto inputs = uu.this_network.GetInputsInfo(); auto* non_const_prepm = const_cast(&uu.preproc_map); auto ii = inputs.at(input_name); - non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm_0)); + const auto explicit_resize = lookUp(interpolation, input_name); + non_const_prepm->emplace( + input_name, createPreProcInfo(input_trait, mm_0, explicit_resize)); } } else { // This is a cv::GMat (equals to: cv::Mat) // Just validate that it is really the type // (other types are prohibited here) GAPI_Assert(op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_MAT); + // NB: Well, it's even impossible to specify the precision since + // there is not such info in GArray + const auto explicit_resize = lookUp(interpolation, input_name); + const auto explicit_layout = lookUp(input_layout , input_name); + if (explicit_resize || explicit_layout) { + util::throw_error(std::logic_error( + "InferList2 doesn't support preprocessing for \"tensor\"'s arguments!")); + } } idx++; // NB: Never forget to increment the counter } @@ -1832,6 +2151,7 @@ struct InferList2: public cv::detail::KernelTag { // NB: This blob will be used to make roi from its, so // it should be treated as image IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE, + IE::Layout::ANY, ctx->uu.params.input_names[0u], cv::util::optional{}); const auto list_size = ctx->inArg(1u).size(); @@ -1851,7 +2171,7 @@ struct InferList2: public cv::detail::KernelTag { ctx->uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load ? ctx->uu.net.getOutputsInfo().at(out_name)->getTensorDesc() : ctx->uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); - cached_dims[i] = toCV(desc.getDims()); + cached_dims[i] = toCVDims(toCV(desc.getDims()), desc.getLayout()); // FIXME: Isn't this should be done automatically // by some resetInternalData(), etc? (Probably at the GExecutor level) auto& out_vec = ctx->outVecR(i); @@ -1874,8 +2194,10 @@ struct InferList2: public cv::detail::KernelTag { } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { const auto &vec = this_vec.rref(); const auto &mat = vec[list_idx]; - setBlob(req, ctx->uu.params.input_names[in_idx], - wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR), + const auto layer_name = ctx->uu.params.input_names[in_idx]; + const auto layout = req.GetBlob(layer_name)->getTensorDesc().getLayout(); + setBlob(req, layer_name, + wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR, layout), *ctx); } else { GAPI_Assert(false && diff --git a/modules/gapi/src/backends/ov/bindings_ov.cpp b/modules/gapi/src/backends/ov/bindings_ov.cpp new file mode 100644 index 0000000000..8bcbc497af --- /dev/null +++ b/modules/gapi/src/backends/ov/bindings_ov.cpp @@ -0,0 +1,168 @@ +#include + +cv::gapi::ov::PyParams::PyParams(const std::string &tag, + const std::string &model_path, + const std::string &bin_path, + const std::string &device) + : m_priv(std::make_shared>(tag, model_path, bin_path, device)) { +} + +cv::gapi::ov::PyParams::PyParams(const std::string &tag, + const std::string &blob_path, + const std::string &device) + : m_priv(std::make_shared>(tag, blob_path, device)) { +} + +cv::gapi::GBackend cv::gapi::ov::PyParams::backend() const { + return m_priv->backend(); +} + +std::string cv::gapi::ov::PyParams::tag() const { + return m_priv->tag(); +} + +cv::util::any cv::gapi::ov::PyParams::params() const { + return m_priv->params(); +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgPluginConfig( + const std::map &config) { + m_priv->cfgPluginConfig(config); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgInputTensorLayout(std::string tensor_layout) { + m_priv->cfgInputTensorLayout(std::move(tensor_layout)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgInputTensorLayout( + std::map layout_map) { + m_priv->cfgInputTensorLayout(std::move(layout_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgInputModelLayout(std::string tensor_layout) { + m_priv->cfgInputModelLayout(std::move(tensor_layout)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgInputModelLayout( + std::map layout_map) { + m_priv->cfgInputModelLayout(std::move(layout_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgOutputTensorLayout(std::string tensor_layout) { + m_priv->cfgOutputTensorLayout(std::move(tensor_layout)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgOutputTensorLayout( + std::map layout_map) { + m_priv->cfgOutputTensorLayout(std::move(layout_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgOutputModelLayout(std::string tensor_layout) { + m_priv->cfgOutputModelLayout(std::move(tensor_layout)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgOutputModelLayout( + std::map layout_map) { + m_priv->cfgOutputModelLayout(std::move(layout_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgOutputTensorPrecision(int precision) { + m_priv->cfgOutputTensorPrecision(precision); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgOutputTensorPrecision( + std::map precision_map) { + m_priv->cfgOutputTensorPrecision(precision_map); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgReshape(std::vector new_shape) { + m_priv->cfgReshape(std::move(new_shape)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgReshape( + std::map> new_shape_map) { + m_priv->cfgReshape(std::move(new_shape_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgNumRequests(const size_t nireq) { + m_priv->cfgNumRequests(nireq); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgMean(std::vector mean_values) { + m_priv->cfgMean(std::move(mean_values)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgMean( + std::map> mean_map) { + m_priv->cfgMean(std::move(mean_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgScale(std::vector scale_values) { + m_priv->cfgScale(std::move(scale_values)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgScale( + std::map> scale_map) { + m_priv->cfgScale(std::move(scale_map)); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgResize(int interpolation) { + m_priv->cfgResize(interpolation); + return *this; +} + +cv::gapi::ov::PyParams& +cv::gapi::ov::PyParams::cfgResize(std::map interpolation) { + m_priv->cfgResize(std::move(interpolation)); + return *this; +} + +cv::gapi::ov::PyParams cv::gapi::ov::params(const std::string &tag, + const std::string &model_path, + const std::string &weights, + const std::string &device) { + return {tag, model_path, weights, device}; +} + +cv::gapi::ov::PyParams cv::gapi::ov::params(const std::string &tag, + const std::string &blob_path, + const std::string &device) { + return {tag, blob_path, device}; +} diff --git a/modules/gapi/src/backends/ov/govbackend.cpp b/modules/gapi/src/backends/ov/govbackend.cpp new file mode 100644 index 0000000000..46eccd2bbd --- /dev/null +++ b/modules/gapi/src/backends/ov/govbackend.cpp @@ -0,0 +1,1001 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#include "precomp.hpp" + +// needs to be included regardless if IE is present or not +// (cv::gapi::ov::backend() is still there and is defined always) +#include "backends/ov/govbackend.hpp" + +#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 + +#include "backends/ov/util.hpp" +#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK! +#include "logger.hpp" + +#include +#include + +#if defined(HAVE_TBB) +# include // FIXME: drop it from here! +template using QueueClass = tbb::concurrent_bounded_queue; +#else +# include "executor/conc_queue.hpp" +template using QueueClass = cv::gapi::own::concurrent_bounded_queue; +#endif // TBB + +#include "utils/itt.hpp" + +#include + +#include + +#include + +using ParamDesc = cv::gapi::ov::detail::ParamDesc; + +static ov::Core getCore() { + static ov::Core core; + return core; +} + +static ov::AnyMap toOV(const ParamDesc::PluginConfigT &config) { + return {config.begin(), config.end()}; +} + +static std::map +toOV(const std::map> &shapes) { + std::map ov_shapes; + for (const auto &it : shapes) { + ov_shapes.emplace(it.first, ::ov::Shape(it.second)); + } + return ov_shapes; +} + +static ov::element::Type toOV(int depth) { + switch (depth) { + case CV_8U: return ov::element::u8; + case CV_32S: return ov::element::i32; + case CV_32F: return ov::element::f32; + case CV_16F: return ov::element::f16; + default: GAPI_Error("OV Backend: Unsupported data type"); + } + return ov::element::undefined; +} + +static ov::preprocess::ResizeAlgorithm toOVInterp(int interpolation) { + namespace pp = ov::preprocess; + switch (interpolation) { + case cv::INTER_LINEAR: return pp::ResizeAlgorithm::RESIZE_LINEAR; + case cv::INTER_NEAREST: return pp::ResizeAlgorithm::RESIZE_NEAREST; + case cv::INTER_CUBIC: return pp::ResizeAlgorithm::RESIZE_CUBIC; + default: GAPI_Error("OV Backend: Unsupported resize algorithm"); + } + // Unreachable code + GAPI_Assert(false); +} + +static std::vector toCV(const ov::Shape &shape) { + std::vector result; + result.reserve(shape.size()); + for (auto dim : shape) { + result.push_back(ade::util::checked_cast(dim)); + } + return result; +} + +static int toCV(const ov::element::Type &type) { + switch (type) { + case ov::element::u8: return CV_8U; + case ov::element::f32: return CV_32F; + case ov::element::i32: return CV_32S; + case ov::element::i64: return CV_32S; + case ov::element::f16: return CV_16F; + default: GAPI_Error("OV Backend: Unsupported data type"); + } + return -1; +} + +static void copyFromOV(const ov::Tensor &tensor, cv::Mat &mat) { + const auto total = mat.total() * mat.channels(); + if (tensor.get_element_type() != toOV(mat.depth()) || + tensor.get_size() != total ) { + std::stringstream ss; + ss << "Failed to copy data from ov::Tensor to cv::Mat." + << " Data type or number of elements mismatch." + << " cv::Mat: " << cv::descr_of(mat) << " and" + << " ov::Tensor: " << tensor.get_element_type() << " " + << tensor.get_shape(); + cv::util::throw_error(std::logic_error(ss.str())); + } + + if (tensor.get_element_type() == ov::element::i64) { + GAPI_LOG_WARNING(NULL, "INT64 isn't supported for cv::Mat. Conversion to INT32 is used."); + cv::gimpl::convertInt64ToInt32(tensor.data(), + mat.ptr(), + total); + } else { + std::copy_n(reinterpret_cast(tensor.data()), + tensor.get_byte_size(), + mat.ptr()); + } +} + +static void copyToOV(const cv::Mat &mat, ov::Tensor &tensor) { + // TODO: Ideally there should be check that mat and tensor + // dimensions are compatible. + const auto total = mat.total() * mat.channels(); + if (tensor.get_element_type() != toOV(mat.depth()) || + tensor.get_size() != total) { + std::stringstream ss; + ss << "Failed to copy data from cv::Mat to ov::Tensor." + << " Data type or number of elements mismatch." + << " ov::Tensor: " << tensor.get_element_type() << " " + << tensor.get_shape() << " and" + << " cv::Mat: " << cv::descr_of(mat); + cv::util::throw_error(std::logic_error(ss.str())); + } + + if (tensor.get_element_type() == ov::element::i64) { + cv::gimpl::convertInt32ToInt64(mat.ptr(), + tensor.data(), + total); + } else { + std::copy_n(mat.ptr(), + tensor.get_byte_size(), + reinterpret_cast(tensor.data())); + } +} + +std::vector cv::gapi::ov::util::to_ocv(const ::ov::Shape &shape) { + return toCV(shape); +} + +int cv::gapi::ov::util::to_ocv(const ::ov::element::Type &type) { + return toCV(type); +} + +struct OVUnit { + static const char *name() { return "OVUnit"; } + + explicit OVUnit(const ParamDesc &pd) + : params(pd) { + + // FIXME: Can this logic be encapsulated to prevent checking every time? + if (cv::util::holds_alternative(params.kind)) { + const auto desc = cv::util::get(params.kind); + model = getCore().read_model(desc.model_path, desc.bin_path); + GAPI_Assert(model); + + if (params.num_in == 1u && params.input_names.empty()) { + params.input_names = { model->inputs().begin()->get_any_name() }; + } + if (params.num_out == 1u && params.output_names.empty()) { + params.output_names = { model->outputs().begin()->get_any_name() }; + } + + } else { + GAPI_Assert(cv::util::holds_alternative(params.kind)); + std::ifstream file(cv::util::get(params.kind).blob_path, + std::ios_base::in | std::ios_base::binary); + GAPI_Assert(file.is_open()); + compiled_model = getCore().import_model(file, + params.device, + toOV(params.config)); + + if (params.num_in == 1u && params.input_names.empty()) { + params.input_names = { compiled_model.inputs().begin()->get_any_name() }; + } + if (params.num_out == 1u && params.output_names.empty()) { + params.output_names = { compiled_model.outputs().begin()->get_any_name() }; + } + } + }; + + cv::gimpl::ov::OVCompiled compile() { + if (cv::util::holds_alternative(params.kind)) { + compiled_model = getCore().compile_model(model, + params.device, + toOV(params.config)); + } + return {compiled_model}; + } + + cv::gapi::ov::detail::ParamDesc params; + std::shared_ptr model; + ov::CompiledModel compiled_model; +}; + +class OVCallContext +{ +public: + OVCallContext(const OVUnit & unit, + cv::gimpl::GIslandExecutable::IOutput & output, + const cv::GArgs & args, + const std::vector & outs, + cv::GRunArg::Meta && meta, + std::vector && input_objs, + std::vector && output_objs); + + const cv::GArgs& inArgs() const; + + // Generic accessor API + template + const T& inArg(std::size_t input) const { + return m_args.at(input).get(); + } + + template + std::vector& outVecR(std::size_t output) { + return outVecRef(output).wref(); + } + + // Syntax sugar + cv::GShape inShape(std::size_t input) const; + const cv::Mat& inMat (std::size_t input) const; + + cv::GRunArgP output (std::size_t idx); + cv::Mat& outMatR(std::size_t idx); + + const OVUnit &uu; + cv::gimpl::GIslandExecutable::IOutput &out; + + // To store exception appeared in callback. + std::exception_ptr eptr; + + const cv::GRunArg::Meta& getMeta() { return m_meta; }; +private: + cv::detail::VectorRef& outVecRef(std::size_t idx); + + cv::GArg packArg(const cv::GArg &arg); + + // To propagate accumulated meta from all inputs to output. + cv::GRunArg::Meta m_meta; + + // To store input/output data from frames + std::vector m_input_objs; + std::vector m_output_objs; + + // To simplify access to cv::Mat inside cv::RMat + cv::gimpl::Mag m_res; + + std::unordered_map m_results; + + // Input parameters passed to an inference operation. + cv::GArgs m_args; + cv::GShapes m_in_shapes; +}; + +OVCallContext::OVCallContext(const OVUnit & unit, + cv::gimpl::GIslandExecutable::IOutput & output, + const cv::GArgs & args, + const std::vector & outs, + cv::GRunArg::Meta && meta, + std::vector && input_objs, + std::vector && output_objs) +: uu(unit), out(output), m_meta(std::move(meta)), + m_input_objs(std::move(input_objs)), m_output_objs(std::move(output_objs)) +{ + for (auto& it : m_input_objs) cv::gimpl::magazine::bindInArg (m_res, it.first, it.second); + for (auto& it : m_output_objs) cv::gimpl::magazine::bindOutArg(m_res, it.first, it.second); + + m_args.reserve(args.size()); + using namespace std::placeholders; + ade::util::transform(args, + std::back_inserter(m_args), + std::bind(&OVCallContext::packArg, this, _1)); + + ade::util::transform(args, std::back_inserter(m_in_shapes), + [](const cv::GArg& arg) { + return arg.get().shape; + }); + + for (const auto out_it : ade::util::indexed(outs)) { + // FIXME: Can the same GArg type resolution mechanism be reused here? + const auto port = ade::util::index(out_it); + const auto desc = ade::util::value(out_it); + m_results[port] = cv::gimpl::magazine::getObjPtr(m_res, desc); + } +} + +const cv::GArgs& OVCallContext::inArgs() const { + return m_args; +} + +cv::GShape OVCallContext::inShape(std::size_t i) const { + return m_in_shapes[i]; +} + +const cv::Mat& OVCallContext::inMat(std::size_t input) const { + return inArg(input); +} + +cv::Mat& OVCallContext::outMatR(std::size_t idx) { + return *cv::util::get(m_results.at(idx)); +} + +cv::GRunArgP OVCallContext::output(std::size_t idx) { + return m_output_objs[idx].second; +}; + +cv::detail::VectorRef& OVCallContext::outVecRef(std::size_t idx) { + return cv::util::get(m_results.at(idx)); +} + +cv::GArg OVCallContext::packArg(const cv::GArg &arg) { + // No API placeholders allowed at this point + // FIXME: this check has to be done somewhere in compilation stage. + GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT + && arg.kind != cv::detail::ArgKind::GSCALAR + && arg.kind != cv::detail::ArgKind::GARRAY); + + if (arg.kind != cv::detail::ArgKind::GOBJREF) { + cv::util::throw_error(std::logic_error("Inference supports G-types ONLY!")); + } + GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF); + + // Wrap associated CPU object (either host or an internal one) + // FIXME: object can be moved out!!! GExecutor faced that. + const cv::gimpl::RcDesc &ref = arg.get(); + switch (ref.shape) + { + case cv::GShape::GMAT: return cv::GArg(m_res.slot()[ref.id]); + default: + cv::util::throw_error(std::logic_error("Unsupported GShape type")); + break; + } +} + +struct OVCallable { + static const char *name() { return "OVRequestCallable"; } + using Run = std::function, + cv::gimpl::ov::RequestPool&)>; + Run run; +}; + +struct KImpl { + cv::gimpl::CustomMetaFunction::CM customMetaFunc; + OVCallable::Run run; +}; + +using GOVModel = ade::TypedGraph + < cv::gimpl::Protocol + , cv::gimpl::Op + , cv::gimpl::NetworkParams + , cv::gimpl::CustomMetaFunction + , OVUnit + , OVCallable + >; + +// FIXME: Same issue with Typed and ConstTyped +using GConstGOVModel = ade::ConstTypedGraph + < cv::gimpl::Protocol + , cv::gimpl::Op + , cv::gimpl::NetworkParams + , cv::gimpl::CustomMetaFunction + , OVUnit + , OVCallable + >; + +namespace { +class IInferExecutor { +public: + using Ptr = std::shared_ptr; + using NotifyCallbackF = std::function; + using SetInputDataF = std::function; + using ReadOutputDataF = std::function; + + // NB: The task is represented by: + // SetInputDataF - function which set input data. + // ReadOutputDataF - function which read output data. + struct Task { + SetInputDataF set_input_data; + ReadOutputDataF read_output_data; + }; + + IInferExecutor(::ov::InferRequest request, NotifyCallbackF notify) + : m_request(std::move(request)), + m_notify(std::move(notify)) { + }; + + virtual void execute(const Task& task) = 0; + virtual ~IInferExecutor() = default; + +protected: + ::ov::InferRequest m_request; + NotifyCallbackF m_notify; +}; + +class SyncInferExecutor : public IInferExecutor { + using IInferExecutor::IInferExecutor; + virtual void execute(const IInferExecutor::Task &task) override; +}; + +void SyncInferExecutor::execute(const IInferExecutor::Task &task) { + try { + task.set_input_data(m_request); + m_request.infer(); + task.read_output_data(m_request, nullptr); + } catch (...) { + m_notify(); + throw; + } + // NB: Notify pool that executor has finished. + m_notify(); +} + +class AsyncInferExecutor : public IInferExecutor { +public: + using IInferExecutor::IInferExecutor; + virtual void execute(const IInferExecutor::Task& task) override; + +private: + void callback(Task task, + ::ov::InferRequest request, + std::exception_ptr eptr) noexcept; +}; + +void AsyncInferExecutor::execute(const IInferExecutor::Task& task) { + using namespace std::placeholders; + using callback_t = std::function; + m_request.set_callback( + static_cast( + std::bind(&AsyncInferExecutor::callback, this, task, m_request, _1))); + try { + task.set_input_data(m_request); + m_request.start_async(); + } catch (...) { + m_request.set_callback([](std::exception_ptr){}); + m_notify(); + throw; + } +} + +void AsyncInferExecutor::callback(IInferExecutor::Task task, + ::ov::InferRequest request, + std::exception_ptr eptr) noexcept { + task.read_output_data(request, eptr); + request.set_callback([](std::exception_ptr){}); + // NB: Notify pool that executor has finished. + m_notify(); +} + +} // anonymous namespace + +// TODO: Make it generic to reuse in IE and ONNX backends. +class cv::gimpl::ov::RequestPool { +public: + explicit RequestPool(std::vector<::ov::InferRequest>&& requests); + + IInferExecutor::Ptr getIdleRequest(); + void waitAll(); + +private: + void setup(); + void release(const size_t id); + + QueueClass m_idle_ids; + std::vector m_requests; +}; + +void cv::gimpl::ov::RequestPool::release(const size_t id) { + m_idle_ids.push(id); +} + +cv::gimpl::ov::RequestPool::RequestPool(std::vector<::ov::InferRequest>&& requests) { + GAPI_Assert(!requests.empty()); + if (requests.size() == 1u) { + m_requests.push_back( + std::make_shared( + requests.front(), std::bind(&RequestPool::release, this, 0u))); + } else { + for (size_t i = 0; i < requests.size(); ++i) { + m_requests.push_back( + std::make_shared( + requests[i], std::bind(&RequestPool::release, this, i))); + } + } + setup(); +} + +void cv::gimpl::ov::RequestPool::setup() { + for (size_t i = 0; i < m_requests.size(); ++i) { + m_idle_ids.push(i); + } +} + +IInferExecutor::Ptr cv::gimpl::ov::RequestPool::getIdleRequest() { + size_t id = 0u; + m_idle_ids.pop(id); + return m_requests[id]; +} + +// NB: Not thread-safe. +void cv::gimpl::ov::RequestPool::waitAll() { + // NB: It will be blocked if at least one request is busy. + for (size_t i = 0; i < m_requests.size(); ++i) { + size_t id = 0u; + m_idle_ids.pop(id); + } + setup(); +} + + +// NB: This is a callback used by async infer +// to post outputs blobs (cv::GMat's). +static void PostOutputs(::ov::InferRequest &infer_request, + std::exception_ptr eptr, + std::shared_ptr ctx) { + GAPI_ITT_STATIC_LOCAL_HANDLE(ov_cb_post_outputs_hndl, "OV_async_callback_PostOutputs"); + GAPI_ITT_AUTO_TRACE_GUARD(ov_cb_post_outputs_hndl); + + ctx->eptr = std::move(eptr); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + // NB: Copy data back only if execution finished sucessfuly. + // Otherwise just post outputs to keep streaming executor contract. + if (!ctx->eptr) { + const auto& out_name = ctx->uu.params.output_names[i]; + copyFromOV(infer_request.get_tensor(out_name), + ctx->outMatR(i)); + } + auto output = ctx->output(i); + ctx->out.meta(output, ctx->getMeta()); + ctx->out.post(std::move(output), ctx->eptr); + } +} + +namespace cv { +namespace gimpl { +namespace ov { + +template +using AttrMap = cv::gapi::ov::detail::AttrMap; + +template +using LayerVariantAttr = cv::gapi::ov::detail::LayerVariantAttr; + +template AttrMap +broadcastLayerAttr(const LayerVariantAttr &layer_attr, + const std::vector &layer_names) { + AttrMap map; + if (cv::util::holds_alternative>(layer_attr)) { + map = cv::util::get>(layer_attr); + // NB: Validate map: + std::unordered_set existing_layers = + {layer_names.begin(), layer_names.end()}; + + for (const auto &p : map) { + const auto it = existing_layers.find(p.first); + if (it == existing_layers.end()) { + cv::util::throw_error( + std::logic_error("OV Backend: Failed to" + " find layer with name: " + p.first)); + } + } + } else if (cv::util::holds_alternative(layer_attr)) { + // NB: Broadcast value to all layers. + auto elem = cv::util::get(layer_attr); + for (auto &&layer_name : layer_names) { + map.emplace(layer_name, elem); + } + } + return map; +} + +template +cv::optional lookUp(const std::map &map, const K& key) { + const auto it = map.find(key); + if (it == map.end()) { + return {}; + } + return cv::util::make_optional(std::move(it->second)); +} + +static bool isImage(const cv::GMatDesc &desc, + const ::ov::Shape &model_shape) { + return (model_shape.size() == 4u) && + (!desc.isND()) /* dims == 2 */ && + (desc.chan == 1 || desc.chan == 3) && + (desc.size.height != 1 && desc.size.width != 1) && + (desc.depth == CV_8U); +} + +struct Infer: public cv::detail::KernelTag { + using API = cv::GInferBase; + static cv::gapi::GBackend backend() { return cv::gapi::ov::backend(); } + static KImpl kernel() { return KImpl{outMeta, run}; } + + static cv::GMetaArgs outMeta(const ade::Graph &gr, + const ade::NodeHandle &nh, + const cv::GMetaArgs &in_metas, + const cv::GArgs &/*in_args*/) { + cv::GMetaArgs result; + + GConstGOVModel gm(gr); + const auto &uu = gm.metadata(nh).get(); + // Initialize input information + // Note our input layers list order matches the API order and so + // meta order. + GAPI_Assert(uu.params.input_names.size() == in_metas.size() + && "Known input layers count doesn't match input meta count"); + + // NB: Pre/Post processing configuration avaiable only for read models. + if (cv::util::holds_alternative(uu.params.kind)) { + const auto &model_info = cv::util::get(uu.params.kind); + const auto new_shapes = + broadcastLayerAttr(model_info.new_shapes, + uu.params.input_names); + const_cast&>(uu.model)->reshape(toOV(new_shapes)); + + const auto input_tensor_layout = + broadcastLayerAttr(model_info.input_tensor_layout, + uu.params.input_names); + const auto input_model_layout = + broadcastLayerAttr(model_info.input_model_layout, + uu.params.input_names); + + const auto interpolation = broadcastLayerAttr(model_info.interpolation, + uu.params.input_names); + const auto mean_values = broadcastLayerAttr(model_info.mean_values, + uu.params.input_names); + const auto scale_values = broadcastLayerAttr(model_info.scale_values, + uu.params.input_names); + // FIXME: Pre/Post processing step shouldn't be configured in this method. + ::ov::preprocess::PrePostProcessor ppp(uu.model); + for (auto &&it : ade::util::zip(ade::util::toRange(uu.params.input_names), + ade::util::toRange(in_metas))) { + const auto &mm = std::get<1>(it); + GAPI_Assert(cv::util::holds_alternative(mm)); + const auto &matdesc = cv::util::get(mm); + + const auto &input_name = std::get<0>(it); + auto &input_info = ppp.input(input_name); + input_info.tensor().set_element_type(toOV(matdesc.depth)); + + const auto explicit_in_model_layout = lookUp(input_model_layout, input_name); + if (explicit_in_model_layout) { + input_info.model().set_layout(::ov::Layout(*explicit_in_model_layout)); + } + const auto explicit_in_tensor_layout = lookUp(input_tensor_layout, input_name); + if (explicit_in_tensor_layout) { + input_info.tensor().set_layout(::ov::Layout(*explicit_in_tensor_layout)); + } + const auto explicit_resize = lookUp(interpolation, input_name); + // NB: Note that model layout still can't be empty. + // e.g If model converted to IRv11 without any additional + // info about layout via Model Optimizer. + const auto model_layout = ::ov::layout::get_layout(uu.model->input(input_name)); + const auto &input_shape = uu.model->input(input_name).get_shape(); + if (isImage(matdesc, input_shape)) { + // NB: Image case - all necessary preprocessng is configured automatically. + GAPI_LOG_DEBUG(NULL, "OV Backend: Input: \"" << input_name << "\" is image."); + // NB: Layout is already set just double check that + // user provided the correct one. In fact, there is only one correct for image. + if (explicit_in_tensor_layout && + *explicit_in_tensor_layout != "NHWC") { + std::stringstream ss; + ss << "OV Backend: Provided tensor layout " << *explicit_in_tensor_layout + << " is not compatible with input data " << matdesc << " for layer \"" + << input_name << "\". Expecting NHWC"; + util::throw_error(std::logic_error(ss.str())); + } + input_info.tensor().set_layout(::ov::Layout("NHWC")); + input_info.tensor().set_spatial_static_shape(matdesc.size.height, + matdesc.size.width); + // NB: Even though resize is automatically configured + // user have an opportunity to specify the interpolation algorithm. + auto interp = explicit_resize + ? toOVInterp(*explicit_resize) + : ::ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR; + input_info.preprocess().resize(interp); + } else { + // NB: Tensor case - resize or layout conversions must be explicitly specified. + GAPI_LOG_DEBUG(NULL, "OV Backend: Input: \"" << input_name << "\" is tensor."); + if (explicit_resize) { + if (matdesc.isND()) { + // NB: ND case - need to obtain "H" and "W" positions + // in order to configure resize. + if (!explicit_in_tensor_layout && model_layout.empty()) { + std::stringstream ss; + ss << "Resize for input layer: " << input_name + << "can't be configured." + << " Failed to extract H and W positions from layout."; + util::throw_error(std::logic_error(ss.str())); + } else { + const auto layout = explicit_in_tensor_layout + ? ::ov::Layout(*explicit_in_tensor_layout) : model_layout; + auto H_idx = ::ov::layout::height_idx(layout); + auto W_idx = ::ov::layout::width_idx(layout); + // NB: If layout is "...HW", H position is -2. + if (H_idx < 0) H_idx = matdesc.dims.size() + H_idx; + if (W_idx < 0) W_idx = matdesc.dims.size() + W_idx; + GAPI_Assert(H_idx >= 0 && H_idx < static_cast(matdesc.dims.size())); + GAPI_Assert(W_idx >= 0 && W_idx < static_cast(matdesc.dims.size())); + input_info.tensor().set_spatial_static_shape(matdesc.dims[H_idx], + matdesc.dims[W_idx]); + input_info.preprocess().resize(toOVInterp(*explicit_resize)); + } + } else { + // NB: 2D case - We know exactly where H and W... + input_info.tensor().set_spatial_static_shape(matdesc.size.height, + matdesc.size.width); + input_info.preprocess().resize(toOVInterp(*explicit_resize)); + } + } + } + // NB: Apply mean/scale as the last step of the preprocessing. + // Note that this can be applied to any input data if the + // position of "C" dimension is known. + const auto mean_vec = lookUp(mean_values, input_name); + if (mean_vec) { + input_info.preprocess().mean(*mean_vec); + } + + const auto scale_vec = lookUp(scale_values, input_name); + if (scale_vec) { + input_info.preprocess().scale(*scale_vec); + } + } + + const auto output_tensor_layout = + broadcastLayerAttr(model_info.output_tensor_layout, + uu.params.output_names); + const auto output_model_layout = + broadcastLayerAttr(model_info.output_model_layout, + uu.params.output_names); + const auto output_tensor_precision = + broadcastLayerAttr(model_info.output_tensor_precision, + uu.params.output_names); + + for (const auto &output_name : uu.params.output_names) { + const auto explicit_out_tensor_layout = + lookUp(output_tensor_layout, output_name); + if (explicit_out_tensor_layout) { + ppp.output(output_name).tensor() + .set_layout(::ov::Layout(*explicit_out_tensor_layout)); + } + + const auto explicit_out_model_layout = + lookUp(output_model_layout, output_name); + if (explicit_out_model_layout) { + ppp.output(output_name).model() + .set_layout(::ov::Layout(*explicit_out_model_layout)); + } + + const auto explicit_out_tensor_prec = + lookUp(output_tensor_precision, output_name); + if (explicit_out_tensor_prec) { + ppp.output(output_name).tensor() + .set_element_type(toOV(*explicit_out_tensor_prec)); + } + } + + GAPI_LOG_DEBUG(NULL, "OV Backend: PrePostProcessor: " << ppp); + const_cast&>(uu.model) = ppp.build(); + } + + for (const auto &out_name : uu.params.output_names) { + cv::GMatDesc outm; + if (cv::util::holds_alternative(uu.params.kind)) { + const auto &out = uu.model->output(out_name); + outm = cv::GMatDesc(toCV(out.get_element_type()), + toCV(out.get_shape())); + } else { + GAPI_Assert(cv::util::holds_alternative(uu.params.kind)); + const auto &out = uu.compiled_model.output(out_name); + outm = cv::GMatDesc(toCV(out.get_element_type()), + toCV(out.get_shape())); + } + result.emplace_back(std::move(outm)); + } + + return result; + } + + static void run(std::shared_ptr ctx, + cv::gimpl::ov::RequestPool &reqPool) { + using namespace std::placeholders; + reqPool.getIdleRequest()->execute( + IInferExecutor::Task { + [ctx](::ov::InferRequest &infer_request) { + for (auto i : ade::util::iota(ctx->uu.params.num_in)) { + const auto& input_name = ctx->uu.params.input_names[i]; + auto input_tensor = infer_request.get_tensor(input_name); + // TODO: In some cases wrapping existing data pointer + // might be faster than copy. Make it a strategy. + copyToOV(ctx->inMat(i), input_tensor); + } + }, + std::bind(PostOutputs, _1, _2, ctx) + } + ); + } +}; + +} // namespace ov +} // namespace gimpl +} // namespace cv + +// IE backend implementation of GBackend::Priv /////////////////////// +namespace { +class GOVBackendImpl final: public cv::gapi::GBackend::Priv { + virtual void unpackKernel(ade::Graph &gr, + const ade::NodeHandle &nh, + const cv::GKernelImpl &ii) override { + using namespace cv::gimpl; + // FIXME: Introduce a DNNBackend interface which'd specify + // the framework for this??? + GOVModel gm(gr); + auto &np = gm.metadata(nh).get(); + auto &pp = cv::util::any_cast(np.opaque); + const auto &ki = cv::util::any_cast(ii.opaque); + + GModel::Graph model(gr); + auto& op = model.metadata(nh).get(); + + // NB: In case generic infer, info about in/out names is stored in operation (op.params) + if (pp.is_generic) + { + auto& info = cv::util::any_cast(op.params); + pp.input_names = info.in_names; + pp.output_names = info.out_names; + pp.num_in = info.in_names.size(); + pp.num_out = info.out_names.size(); + } + + gm.metadata(nh).set(OVUnit{pp}); + gm.metadata(nh).set(OVCallable{ki.run}); + gm.metadata(nh).set(CustomMetaFunction{ki.customMetaFunc}); + } + + virtual EPtr compile(const ade::Graph &graph, + const cv::GCompileArgs &, + const std::vector &nodes) const override { + return EPtr{new cv::gimpl::ov::GOVExecutable(graph, nodes)}; + } + + virtual cv::GKernelPackage auxiliaryKernels() const override { + return cv::gapi::kernels< cv::gimpl::ov::Infer >(); + } + + virtual bool controlsMerge() const override { + return true; + } + + virtual bool allowsMerge(const cv::gimpl::GIslandModel::Graph &, + const ade::NodeHandle &, + const ade::NodeHandle &, + const ade::NodeHandle &) const override { + return false; + } +}; + +} // anonymous namespace + +cv::gapi::GBackend cv::gapi::ov::backend() { + static cv::gapi::GBackend this_backend(std::make_shared()); + return this_backend; +} + +static std::vector<::ov::InferRequest> +createInferRequests(::ov::CompiledModel &compiled_model, + size_t num_infer_requests) { + std::vector<::ov::InferRequest> infer_requests; + for (size_t i = 0; i < num_infer_requests; ++i) { + infer_requests.push_back(compiled_model.create_infer_request()); + } + return infer_requests; +} + +// GOVExecutable implementation ////////////////////////////////////////////// +cv::gimpl::ov::GOVExecutable::GOVExecutable(const ade::Graph &g, + const std::vector &nodes) + : m_g(g), m_gm(m_g) { + + // FIXME: Currently this backend is capable to run a single inference node only. + // Need to extend our island fusion with merge/not-to-merge decision making parametrization + GConstGOVModel ovm(g); + + for (auto &nh : nodes) { + switch (m_gm.metadata(nh).get().t) { + case NodeType::OP: + if (this_nh == nullptr) { + this_nh = nh; + compiled = const_cast(ovm.metadata(this_nh).get()).compile(); + m_reqPool.reset(new RequestPool(createInferRequests(compiled.compiled_model, 1))); + } + else + util::throw_error(std::logic_error("Multi-node inference is not supported!")); + break; + + case NodeType::DATA: { + m_dataNodes.push_back(nh); + const auto &desc = m_gm.metadata(nh).get(); + if (desc.storage == Data::Storage::CONST_VAL) { + util::throw_error(std::logic_error("No const data please!")); + } + if (desc.storage == Data::Storage::INTERNAL) { + util::throw_error(std::logic_error("No internal data please!")); + } + break; + } + default: util::throw_error(std::logic_error("Unsupported NodeType type")); + } + } +} + +void cv::gimpl::ov::GOVExecutable::run(cv::gimpl::GIslandExecutable::IInput &in, + cv::gimpl::GIslandExecutable::IOutput &out) { + std::vector input_objs; + std::vector output_objs; + + const auto &in_desc = in.desc(); + auto in_msg = in.get(); + + if (cv::util::holds_alternative(in_msg)) + { + out.post(cv::gimpl::EndOfStream{}); + return; + } + + GAPI_Assert(cv::util::holds_alternative(in_msg)); + const auto in_vector = cv::util::get(in_msg); + cv::GRunArg::Meta stub_meta; + for (auto &&in_arg : in_vector) + { + stub_meta.insert(in_arg.meta.begin(), in_arg.meta.end()); + } + + input_objs.reserve(in_desc.size()); + for (auto &&it: ade::util::zip(ade::util::toRange(in_desc), + ade::util::toRange(in_vector))) + { + input_objs.emplace_back(std::get<0>(it), std::get<1>(it)); + } + + const auto &out_desc = out.desc(); + output_objs.reserve(out_desc.size()); + for (auto &&it: ade::util::indexed(ade::util::toRange(out_desc))) + { + output_objs.emplace_back(ade::util::value(it), + out.get(ade::util::checked_cast(ade::util::index(it)))); + } + + GConstGOVModel giem(m_g); + const auto &uu = giem.metadata(this_nh).get(); + const auto &op = m_gm.metadata(this_nh).get(); + + auto ctx = std::make_shared(uu, out, op.args, op.outs, + std::move(stub_meta), std::move(input_objs), std::move(output_objs)); + + const auto &kk = giem.metadata(this_nh).get(); + + try { + kk.run(ctx, *m_reqPool); + } catch (...) { + auto eptr = std::current_exception(); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) + { + auto output = ctx->output(i); + ctx->out.meta(output, ctx->getMeta()); + ctx->out.post(std::move(output), eptr); + } + return; + } + + if (!m_gm.metadata().contains()) { + m_reqPool->waitAll(); + } +} + +#else // HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 + +cv::gapi::GBackend cv::gapi::ov::backend() { + // Still provide this symbol to avoid linking issues + util::throw_error(std::runtime_error("G-API has been compiled without OpenVINO support")); +} + +#endif // HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 diff --git a/modules/gapi/src/backends/ov/govbackend.hpp b/modules/gapi/src/backends/ov/govbackend.hpp new file mode 100644 index 0000000000..0ac858dc52 --- /dev/null +++ b/modules/gapi/src/backends/ov/govbackend.hpp @@ -0,0 +1,66 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#ifndef OPENCV_GAPI_GOVBACKEND_HPP +#define OPENCV_GAPI_GOVBACKEND_HPP + +// Include anyway - cv::gapi::ov::backend() still needs to be defined +#include "opencv2/gapi/infer/ov.hpp" + +#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 + +#include + +#include "backends/common/gbackend.hpp" + +namespace cv { +namespace gimpl { +namespace ov { + +struct OVCompiled { + ::ov::CompiledModel compiled_model; +}; + +class RequestPool; + +class GOVExecutable final: public GIslandExecutable +{ + const ade::Graph &m_g; + GModel::ConstGraph m_gm; + + // The only executable stuff in this graph + // (assuming it is always single-op) + ade::NodeHandle this_nh; + OVCompiled compiled; + + // List of all resources in graph (both internal and external) + std::vector m_dataNodes; + + // To manage multiple async requests + std::unique_ptr m_reqPool; + +public: + GOVExecutable(const ade::Graph &graph, + const std::vector &nodes); + + virtual inline bool canReshape() const override { return false; } + virtual inline void reshape(ade::Graph&, const GCompileArgs&) override { + GAPI_Error("InternalError"); // Not implemented yet + } + + virtual void run(std::vector &&, + std::vector &&) override { + GAPI_Error("Not implemented"); + } + + virtual void run(GIslandExecutable::IInput &in, + GIslandExecutable::IOutput &out) override; +}; + +}}} + +#endif // HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 +#endif // OPENCV_GAPI_GOVBACKEND_HPP diff --git a/modules/gapi/src/backends/ov/util.hpp b/modules/gapi/src/backends/ov/util.hpp new file mode 100644 index 0000000000..ea2aeb60a6 --- /dev/null +++ b/modules/gapi/src/backends/ov/util.hpp @@ -0,0 +1,35 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#ifndef OPENCV_GAPI_INFER_OV_UTIL_HPP +#define OPENCV_GAPI_INFER_OV_UTIL_HPP + +#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 + +// NOTE: This file is not included by default in infer/ov.hpp +// and won't be. infer/ov.hpp doesn't depend on OV headers itself. +// This file does -- so needs to be included separately by those who care. + +#include + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace ov { +namespace util { + +// NB: These functions are EXPORTed to make them accessible by the +// test suite only. +GAPI_EXPORTS std::vector to_ocv(const ::ov::Shape &shape); +GAPI_EXPORTS int to_ocv(const ::ov::element::Type &type); + +}}}} + +#endif // HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 + +#endif // OPENCV_GAPI_INFER_OV_UTIL_HPP diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index f7dc23e1e6..4056dd323f 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019-2021 Intel Corporation +// Copyright (C) 2019-2023 Intel Corporation #include "../test_precomp.hpp" @@ -2238,7 +2238,7 @@ TEST(TestAgeGenderIE, InferWithBatch) params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); params.device_id = "CPU"; - cv::Mat in_mat({batch_size, 3, 320, 240}, CV_8U); + cv::Mat in_mat({batch_size, 3, 62, 62}, CV_8U); cv::randu(in_mat, 0, 255); cv::Mat gapi_age, gapi_gender; @@ -2247,8 +2247,9 @@ TEST(TestAgeGenderIE, InferWithBatch) IE::Blob::Ptr ie_age, ie_gender; { auto plugin = cv::gimpl::ie::wrap::getPlugin(params); - auto net = cv::gimpl::ie::wrap::readNetwork(params); - setNetParameters(net); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + auto ii = net.getInputsInfo().at("data"); + ii->setPrecision(IE::Precision::U8); net.setBatchSize(batch_size); auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); auto infer_request = this_network.CreateInferRequest(); @@ -3056,6 +3057,73 @@ TEST_F(AgeGenderInferTest, ChangeSpecificOutputPrecison) { validate(); } +TEST_F(AgeGenderInferTest, ThrowIfSetLayoutForImage) { + auto pp = cv::gapi::ie::Params { + m_params.model_path, m_params.weights_path, m_params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }) + .cfgOutputPrecision({{"prob", CV_8U}}) + .cfgInputLayout("NHWC"); + + EXPECT_ANY_THROW(buildGraph().apply(cv::gin(m_in_mat), cv::gout(m_gapi_age, m_gapi_gender), + cv::compile_args(cv::gapi::networks(pp)))); +} + +TEST(TestAgeGenderIE, InferTensorWithPreproc) { + initDLDTDataPath(); + + cv::gapi::ie::detail::ParamDesc params; + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.device_id = "CPU"; + + // Load IE network, initialize input data using that. + cv::Mat in_mat({1, 240, 320, 3}, CV_8U); + cv::randu(in_mat, 0, 255); + cv::Mat gapi_age, gapi_gender; + + IE::Blob::Ptr ie_age, ie_gender; + { + auto plugin = cv::gimpl::ie::wrap::getPlugin(params); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + auto ii = net.getInputsInfo().at("data"); + + ii->setPrecision(IE::Precision::U8); + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + ii->setLayout(IE::Layout::NHWC); + + auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); + auto infer_request = this_network.CreateInferRequest(); + IE::TensorDesc desc{IE::Precision::U8, {1, 3, 240, 320}, IE::Layout::NHWC}; + auto blob = IE::make_shared_blob(desc, const_cast(in_mat.ptr())); + infer_request.SetBlob("data", blob); + infer_request.Infer(); + ie_age = infer_request.GetBlob("age_conv3"); + ie_gender = infer_request.GetBlob("prob"); + } + + // Configure & run G-API + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + cv::GMat in; + cv::GMat age, gender; + std::tie(age, gender) = cv::gapi::infer(in); + cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender)); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.weights_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }) + .cfgResize(cv::INTER_LINEAR) + .cfgInputLayout("NHWC"); + + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Validate with IE itself (avoid DNN module dependency here) + normAssert(cv::gapi::ie::util::to_ocv(ie_age), gapi_age, "Test age output" ); + normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output"); +} + } // namespace opencv_test #endif // HAVE_INF_ENGINE diff --git a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp new file mode 100644 index 0000000000..ef63f9e8f6 --- /dev/null +++ b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp @@ -0,0 +1,540 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#if defined HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 + +#include "../test_precomp.hpp" + +#include "backends/ov/util.hpp" + +#include + +#include + +namespace opencv_test +{ + +namespace { +// FIXME: taken from DNN module +void initDLDTDataPath() +{ +#ifndef WINRT + static bool initialized = false; + if (!initialized) + { + const char* omzDataPath = getenv("OPENCV_OPEN_MODEL_ZOO_DATA_PATH"); + if (omzDataPath) + cvtest::addDataSearchPath(omzDataPath); + const char* dnnDataPath = getenv("OPENCV_DNN_TEST_DATA_PATH"); + if (dnnDataPath) { + // Add the dnnDataPath itself - G-API is using some images there directly + cvtest::addDataSearchPath(dnnDataPath); + cvtest::addDataSearchPath(dnnDataPath + std::string("/omz_intel_models")); + } + initialized = true; + } +#endif // WINRT +} + +static const std::string SUBDIR = "intel/age-gender-recognition-retail-0013/FP32/"; + +void copyFromOV(ov::Tensor &tensor, cv::Mat &mat) { + GAPI_Assert(tensor.get_byte_size() == mat.total() * mat.elemSize()); + std::copy_n(reinterpret_cast(tensor.data()), + tensor.get_byte_size(), + mat.ptr()); +} + +void copyToOV(const cv::Mat &mat, ov::Tensor &tensor) { + GAPI_Assert(tensor.get_byte_size() == mat.total() * mat.elemSize()); + std::copy_n(mat.ptr(), + tensor.get_byte_size(), + reinterpret_cast(tensor.data())); +} + +// FIXME: taken from the DNN module +void normAssert(cv::InputArray ref, cv::InputArray test, + const char *comment /*= ""*/, + double l1 = 0.00001, double lInf = 0.0001) { + double normL1 = cvtest::norm(ref, test, cv::NORM_L1) / ref.getMat().total(); + EXPECT_LE(normL1, l1) << comment; + + double normInf = cvtest::norm(ref, test, cv::NORM_INF); + EXPECT_LE(normInf, lInf) << comment; +} + +ov::Core getCore() { + static ov::Core core; + return core; +} + +// TODO: AGNetGenComp, AGNetTypedComp, AGNetOVComp, AGNetOVCompiled +// can be generalized to work with any model and used as parameters for tests. + +struct AGNetGenComp { + static constexpr const char* tag = "age-gender-generic"; + using Params = cv::gapi::ov::Params; + + static Params params(const std::string &xml, + const std::string &bin, + const std::string &device) { + return {tag, xml, bin, device}; + } + + static Params params(const std::string &blob_path, + const std::string &device) { + return {tag, blob_path, device}; + } + + static cv::GComputation create() { + cv::GMat in; + GInferInputs inputs; + inputs["data"] = in; + auto outputs = cv::gapi::infer(tag, inputs); + auto age = outputs.at("age_conv3"); + auto gender = outputs.at("prob"); + return cv::GComputation{cv::GIn(in), cv::GOut(age, gender)}; + } +}; + +struct AGNetTypedComp { + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "typed-age-gender"); + using Params = cv::gapi::ov::Params; + + static Params params(const std::string &xml_path, + const std::string &bin_path, + const std::string &device) { + return Params { + xml_path, bin_path, device + }.cfgOutputLayers({ "age_conv3", "prob" }); + } + + static cv::GComputation create() { + cv::GMat in; + cv::GMat age, gender; + std::tie(age, gender) = cv::gapi::infer(in); + return cv::GComputation{cv::GIn(in), cv::GOut(age, gender)}; + } +}; + +class AGNetOVCompiled { +public: + AGNetOVCompiled(ov::CompiledModel &&compiled_model) + : m_compiled_model(std::move(compiled_model)) { + } + + void operator()(const cv::Mat &in_mat, + cv::Mat &age_mat, + cv::Mat &gender_mat) { + auto infer_request = m_compiled_model.create_infer_request(); + auto input_tensor = infer_request.get_input_tensor(); + copyToOV(in_mat, input_tensor); + + infer_request.infer(); + + auto age_tensor = infer_request.get_tensor("age_conv3"); + age_mat.create(cv::gapi::ov::util::to_ocv(age_tensor.get_shape()), + cv::gapi::ov::util::to_ocv(age_tensor.get_element_type())); + copyFromOV(age_tensor, age_mat); + + auto gender_tensor = infer_request.get_tensor("prob"); + gender_mat.create(cv::gapi::ov::util::to_ocv(gender_tensor.get_shape()), + cv::gapi::ov::util::to_ocv(gender_tensor.get_element_type())); + copyFromOV(gender_tensor, gender_mat); + } + + void export_model(const std::string &outpath) { + std::ofstream file{outpath, std::ios::out | std::ios::binary}; + GAPI_Assert(file.is_open()); + m_compiled_model.export_model(file); + } + +private: + ov::CompiledModel m_compiled_model; +}; + +struct ImageInputPreproc { + void operator()(ov::preprocess::PrePostProcessor &ppp) { + ppp.input().tensor().set_layout(ov::Layout("NHWC")) + .set_element_type(ov::element::u8) + .set_shape({1, size.height, size.width, 3}); + ppp.input().model().set_layout(ov::Layout("NCHW")); + ppp.input().preprocess().resize(::ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + cv::Size size; +}; + +class AGNetOVComp { +public: + AGNetOVComp(const std::string &xml_path, + const std::string &bin_path, + const std::string &device) + : m_device(device) { + m_model = getCore().read_model(xml_path, bin_path); + } + + using PrePostProcessF = std::function; + + void cfgPrePostProcessing(PrePostProcessF f) { + ov::preprocess::PrePostProcessor ppp(m_model); + f(ppp); + m_model = ppp.build(); + } + + AGNetOVCompiled compile() { + auto compiled_model = getCore().compile_model(m_model, m_device); + return {std::move(compiled_model)}; + } + + void apply(const cv::Mat &in_mat, + cv::Mat &age_mat, + cv::Mat &gender_mat) { + compile()(in_mat, age_mat, gender_mat); + } + +private: + std::string m_device; + std::shared_ptr m_model; +}; + +} // anonymous namespace + +// TODO: Make all of tests below parmetrized to avoid code duplication +TEST(TestAgeGenderOV, InferTypedTensor) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat({1, 3, 62, 62}, CV_32F); + cv::randu(in_mat, -1, 1); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetTypedComp::create(); + auto pp = AGNetTypedComp::params(xml_path, bin_path, device); + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferTypedImage) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat(300, 300, CV_8UC3); + cv::randu(in_mat, 0, 255); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing(ImageInputPreproc{in_mat.size()}); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetTypedComp::create(); + auto pp = AGNetTypedComp::params(xml_path, bin_path, device); + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferGenericTensor) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat({1, 3, 62, 62}, CV_32F); + cv::randu(in_mat, -1, 1); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(xml_path, bin_path, device); + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferGenericImage) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat(300, 300, CV_8UC3); + cv::randu(in_mat, 0, 255); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing(ImageInputPreproc{in_mat.size()}); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(xml_path, bin_path, device); + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferGenericImageBlob) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string blob_path = "age-gender-recognition-retail-0013.blob"; + const std::string device = "CPU"; + + cv::Mat in_mat(300, 300, CV_8UC3); + cv::randu(in_mat, 0, 255); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing(ImageInputPreproc{in_mat.size()}); + auto cc_ref = ref.compile(); + // NB: Output blob will contain preprocessing inside. + cc_ref.export_model(blob_path); + cc_ref(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(blob_path, device); + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferGenericTensorBlob) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string blob_path = "age-gender-recognition-retail-0013.blob"; + const std::string device = "CPU"; + + cv::Mat in_mat({1, 3, 62, 62}, CV_32F); + cv::randu(in_mat, -1, 1); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + auto cc_ref = ref.compile(); + cc_ref.export_model(blob_path); + cc_ref(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(blob_path, device); + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferBothOutputsFP16) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat({1, 3, 62, 62}, CV_32F); + cv::randu(in_mat, -1, 1); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing([](ov::preprocess::PrePostProcessor &ppp){ + ppp.output(0).tensor().set_element_type(ov::element::f16); + ppp.output(1).tensor().set_element_type(ov::element::f16); + }); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(xml_path, bin_path, device); + pp.cfgOutputTensorPrecision(CV_16F); + + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, InferOneOutputFP16) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat({1, 3, 62, 62}, CV_32F); + cv::randu(in_mat, -1, 1); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + const std::string fp16_output_name = "prob"; + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing([&](ov::preprocess::PrePostProcessor &ppp){ + ppp.output(fp16_output_name).tensor().set_element_type(ov::element::f16); + }); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(xml_path, bin_path, device); + pp.cfgOutputTensorPrecision({{fp16_output_name, CV_16F}}); + + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +TEST(TestAgeGenderOV, ThrowCfgOutputPrecForBlob) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string blob_path = "age-gender-recognition-retail-0013.blob"; + const std::string device = "CPU"; + + // OpenVINO (Just for blob compilation) + AGNetOVComp ref(xml_path, bin_path, device); + auto cc_ref = ref.compile(); + cc_ref.export_model(blob_path); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(blob_path, device); + + EXPECT_ANY_THROW(pp.cfgOutputTensorPrecision(CV_16F)); +} + +TEST(TestAgeGenderOV, ThrowInvalidConfigIR) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(xml_path, bin_path, device); + pp.cfgPluginConfig({{"some_key", "some_value"}}); + + EXPECT_ANY_THROW(comp.compile(cv::GMatDesc{CV_8U,3,cv::Size{320, 240}}, + cv::compile_args(cv::gapi::networks(pp)))); +} + +TEST(TestAgeGenderOV, ThrowInvalidConfigBlob) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string blob_path = "age-gender-recognition-retail-0013.blob"; + const std::string device = "CPU"; + + // OpenVINO (Just for blob compilation) + AGNetOVComp ref(xml_path, bin_path, device); + auto cc_ref = ref.compile(); + cc_ref.export_model(blob_path); + + // G-API + auto comp = AGNetGenComp::create(); + auto pp = AGNetGenComp::params(blob_path, device); + pp.cfgPluginConfig({{"some_key", "some_value"}}); + + EXPECT_ANY_THROW(comp.compile(cv::GMatDesc{CV_8U,3,cv::Size{320, 240}}, + cv::compile_args(cv::gapi::networks(pp)))); +} + +TEST(TestAgeGenderOV, ThrowInvalidImageLayout) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + // NB: This mat may only have "NHWC" layout. + cv::Mat in_mat(300, 300, CV_8UC3); + cv::randu(in_mat, 0, 255); + cv::Mat gender, gapi_age, gapi_gender; + auto comp = AGNetTypedComp::create(); + auto pp = AGNetTypedComp::params(xml_path, bin_path, device); + + pp.cfgInputTensorLayout("NCHW"); + + EXPECT_ANY_THROW(comp.compile(cv::descr_of(in_mat), + cv::compile_args(cv::gapi::networks(pp)))); +} + +TEST(TestAgeGenderOV, InferTensorWithPreproc) { + initDLDTDataPath(); + const std::string xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + const std::string bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + const std::string device = "CPU"; + + cv::Mat in_mat({1, 240, 320, 3}, CV_32F); + cv::randu(in_mat, -1, 1); + cv::Mat ov_age, ov_gender, gapi_age, gapi_gender; + + // OpenVINO + AGNetOVComp ref(xml_path, bin_path, device); + ref.cfgPrePostProcessing([](ov::preprocess::PrePostProcessor &ppp) { + auto& input = ppp.input(); + input.tensor().set_spatial_static_shape(240, 320) + .set_layout("NHWC"); + input.preprocess().resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + }); + ref.apply(in_mat, ov_age, ov_gender); + + // G-API + auto comp = AGNetTypedComp::create(); + auto pp = AGNetTypedComp::params(xml_path, bin_path, device); + pp.cfgResize(cv::INTER_LINEAR) + .cfgInputTensorLayout("NHWC"); + + comp.apply(cv::gin(in_mat), cv::gout(gapi_age, gapi_gender), + cv::compile_args(cv::gapi::networks(pp))); + + // Assert + normAssert(ov_age, gapi_age, "Test age output" ); + normAssert(ov_gender, gapi_gender, "Test gender output"); +} + +} // namespace opencv_test + +#endif // HAVE_INF_ENGINE && INF_ENGINE_RELEASE >= 2022010000 diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt index 1572543aff..8183837c43 100644 --- a/modules/imgcodecs/CMakeLists.txt +++ b/modules/imgcodecs/CMakeLists.txt @@ -13,6 +13,11 @@ if(HAVE_WINRT_CX AND NOT WINRT) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW") endif() +if (HAVE_AVIF) + ocv_include_directories(${AVIF_INCLUDE_DIR}) + list(APPEND GRFMT_LIBS ${AVIF_LIBRARY}) +endif() + if(HAVE_JPEG) ocv_include_directories(${JPEG_INCLUDE_DIR} ${${JPEG_LIBRARY}_BINARY_DIR}) list(APPEND GRFMT_LIBS ${JPEG_LIBRARIES}) diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index ca79b90d19..c1bdf72291 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -105,7 +105,10 @@ enum ImwriteFlags { IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI IMWRITE_TIFF_YDPI = 258,//!< For TIFF, use to specify the Y direction DPI IMWRITE_TIFF_COMPRESSION = 259,//!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default. - IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000. + IMWRITE_JPEG2000_COMPRESSION_X1000 = 272,//!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000. + IMWRITE_AVIF_QUALITY = 512,//!< For AVIF, it can be a quality between 0 and 100 (the higher the better). Default is 95. + IMWRITE_AVIF_DEPTH = 513,//!< For AVIF, it can be 8, 10 or 12. If >8, it is stored/read as CV_32F. Default is 8. + IMWRITE_AVIF_SPEED = 514 //!< For AVIF, it is between 0 (slowest) and (fastest). Default is 9. }; enum ImwriteJPEGSamplingFactorParams { @@ -185,6 +188,7 @@ Currently, the following file formats are supported: - JPEG 2000 files - \*.jp2 (see the *Note* section) - Portable Network Graphics - \*.png (see the *Note* section) - WebP - \*.webp (see the *Note* section) +- AVIF - \*.avif (see the *Note* section) - Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported) - PFM files - \*.pfm (see the *Note* section) - Sun rasters - \*.sr, \*.ras (always supported) diff --git a/modules/imgcodecs/src/grfmt_avif.cpp b/modules/imgcodecs/src/grfmt_avif.cpp new file mode 100644 index 0000000000..e8d1446cbe --- /dev/null +++ b/modules/imgcodecs/src/grfmt_avif.cpp @@ -0,0 +1,369 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level +// directory of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" + +#ifdef HAVE_AVIF + +#include +#include + +#include +#include "opencv2/imgproc.hpp" +#include "grfmt_avif.hpp" + +#define CV_AVIF_USE_QUALITY \ + (AVIF_VERSION > ((0 * 1000000) + (11 * 10000) + (1 * 100))) + +#if !CV_AVIF_USE_QUALITY +#define AVIF_QUALITY_LOSSLESS 100 +#define AVIF_QUALITY_WORST 0 +#define AVIF_QUALITY_BEST 100 + +#endif + +namespace cv { +namespace { + +struct AvifImageDeleter { + void operator()(avifImage *image) { avifImageDestroy(image); } +}; + +using AvifImageUniquePtr = std::unique_ptr; + +avifResult CopyToMat(const avifImage *image, int channels, Mat *mat) { + CV_Assert((int)image->height == mat->rows); + CV_Assert((int)image->width == mat->cols); + if (channels == 1) { + const cv::Mat image_wrap = + cv::Mat(image->height, image->width, + CV_MAKE_TYPE((image->depth == 8) ? CV_8U : CV_16U, 1), + image->yuvPlanes[0], image->yuvRowBytes[0]); + if ((image->depth == 8 && mat->depth() == CV_8U) || + (image->depth > 8 && mat->depth() == CV_16U)) { + image_wrap.copyTo(*mat); + } else { + CV_Assert(image->depth > 8 && mat->depth() == CV_8U); + image_wrap.convertTo(*mat, CV_8U, 1. / (1 << (image->depth - 8))); + } + return AVIF_RESULT_OK; + } + avifRGBImage rgba; + avifRGBImageSetDefaults(&rgba, image); + if (channels == 3) { + rgba.format = AVIF_RGB_FORMAT_BGR; + } else { + CV_Assert(channels == 4); + rgba.format = AVIF_RGB_FORMAT_BGRA; + } + rgba.rowBytes = mat->step[0]; + rgba.depth = (mat->depth() == CV_16U) ? image->depth : 8; + rgba.pixels = reinterpret_cast(mat->data); + return avifImageYUVToRGB(image, &rgba); +} + +AvifImageUniquePtr ConvertToAvif(const cv::Mat &img, bool lossless, + int bit_depth) { + CV_Assert(img.depth() == CV_8U || img.depth() == CV_16U); + + const int width = img.cols; + const int height = img.rows; + + avifImage *result; + + if (img.channels() == 1) { + result = avifImageCreateEmpty(); + if (result == nullptr) return nullptr; + result->width = width; + result->height = height; + result->depth = bit_depth; + result->yuvFormat = AVIF_PIXEL_FORMAT_YUV400; + result->colorPrimaries = AVIF_COLOR_PRIMARIES_UNSPECIFIED; + result->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_UNSPECIFIED; + result->matrixCoefficients = AVIF_MATRIX_COEFFICIENTS_IDENTITY; + result->yuvRange = AVIF_RANGE_FULL; + result->yuvPlanes[0] = img.data; + result->yuvRowBytes[0] = img.step[0]; + result->imageOwnsYUVPlanes = AVIF_FALSE; + return AvifImageUniquePtr(result); + } + + if (lossless) { + result = + avifImageCreate(width, height, bit_depth, AVIF_PIXEL_FORMAT_YUV444); + if (result == nullptr) return nullptr; + result->colorPrimaries = AVIF_COLOR_PRIMARIES_UNSPECIFIED; + result->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_UNSPECIFIED; + result->matrixCoefficients = AVIF_MATRIX_COEFFICIENTS_IDENTITY; + result->yuvRange = AVIF_RANGE_FULL; + } else { + result = + avifImageCreate(width, height, bit_depth, AVIF_PIXEL_FORMAT_YUV420); + if (result == nullptr) return nullptr; + result->colorPrimaries = AVIF_COLOR_PRIMARIES_BT709; + result->transferCharacteristics = AVIF_TRANSFER_CHARACTERISTICS_SRGB; + result->matrixCoefficients = AVIF_MATRIX_COEFFICIENTS_BT601; + result->yuvRange = AVIF_RANGE_FULL; + } + + avifRGBImage rgba; + avifRGBImageSetDefaults(&rgba, result); + if (img.channels() == 3) { + rgba.format = AVIF_RGB_FORMAT_BGR; + } else { + CV_Assert(img.channels() == 4); + rgba.format = AVIF_RGB_FORMAT_BGRA; + } + rgba.rowBytes = img.step[0]; + rgba.depth = bit_depth; + rgba.pixels = + const_cast(reinterpret_cast(img.data)); + + if (avifImageRGBToYUV(result, &rgba) != AVIF_RESULT_OK) { + avifImageDestroy(result); + return nullptr; + } + return AvifImageUniquePtr(result); +} + +} // namespace + +// 64Mb limit to avoid memory saturation. +static const size_t kParamMaxFileSize = utils::getConfigurationParameterSizeT( + "OPENCV_IMGCODECS_AVIF_MAX_FILE_SIZE", 64 * 1024 * 1024); + +static constexpr size_t kAvifSignatureSize = 500; + +AvifDecoder::AvifDecoder() { + m_buf_supported = true; + channels_ = 0; + decoder_ = avifDecoderCreate(); +} + +AvifDecoder::~AvifDecoder() { + if (decoder_ != nullptr) avifDecoderDestroy(decoder_); +} + +size_t AvifDecoder::signatureLength() const { return kAvifSignatureSize; } + +bool AvifDecoder::checkSignature(const String &signature) const { + avifDecoderSetIOMemory(decoder_, + reinterpret_cast(signature.c_str()), + signature.size()); + decoder_->io->sizeHint = 1e9; + const avifResult status = avifDecoderParse(decoder_); + return (status == AVIF_RESULT_OK || status == AVIF_RESULT_TRUNCATED_DATA); +} + +#define OPENCV_AVIF_CHECK_STATUS(X, ENCDEC) \ + { \ + const avifResult status = (X); \ + if (status != AVIF_RESULT_OK) { \ + const std::string error(ENCDEC->diag.error); \ + CV_Error(Error::StsParseError, \ + error + " " + avifResultToString(status)); \ + return false; \ + } \ + } + +ImageDecoder AvifDecoder::newDecoder() const { return makePtr(); } + +bool AvifDecoder::readHeader() { + if (!m_buf.empty()) { + CV_Assert(m_buf.type() == CV_8UC1); + CV_Assert(m_buf.rows == 1); + } + + OPENCV_AVIF_CHECK_STATUS( + m_buf.empty() + ? avifDecoderSetIOFile(decoder_, m_filename.c_str()) + : avifDecoderSetIOMemory( + decoder_, reinterpret_cast(m_buf.data), + m_buf.total()), + decoder_); + OPENCV_AVIF_CHECK_STATUS(avifDecoderParse(decoder_), decoder_); + + m_width = decoder_->image->width; + m_height = decoder_->image->height; + channels_ = (decoder_->image->yuvFormat == AVIF_PIXEL_FORMAT_YUV400) ? 1 : 3; + if (decoder_->alphaPresent) ++channels_; + bit_depth_ = decoder_->image->depth; + CV_Assert(bit_depth_ == 8 || bit_depth_ == 10 || bit_depth_ == 12); + m_type = CV_MAKETYPE(bit_depth_ == 8 ? CV_8U : CV_16U, channels_); + is_first_image_ = true; + return true; +} + +bool AvifDecoder::readData(Mat &img) { + CV_CheckGE(m_width, 0, ""); + CV_CheckGE(m_height, 0, ""); + + CV_CheckEQ(img.cols, m_width, ""); + CV_CheckEQ(img.rows, m_height, ""); + CV_CheckType( + img.type(), + (img.channels() == 1 || img.channels() == 3 || img.channels() == 4) && + (img.depth() == CV_8U || img.depth() == CV_16U), + "AVIF only supports 1, 3, 4 channels and CV_8U and CV_16U"); + + Mat read_img; + if (img.channels() == channels_) { + read_img = img; + } else { + // Use the asked depth but keep the number of channels. OpenCV and not + // libavif will do the color conversion. + read_img.create(m_height, m_width, CV_MAKE_TYPE(img.depth(), channels_)); + } + + if (is_first_image_) { + if (!nextPage()) return false; + is_first_image_ = false; + } + + if (CopyToMat(decoder_->image, channels_, &read_img) != AVIF_RESULT_OK) { + CV_Error(Error::StsInternal, "Cannot convert from AVIF to Mat"); + return false; + } + + if (decoder_->image->exif.size > 0) { + m_exif.parseExif(decoder_->image->exif.data, decoder_->image->exif.size); + } + + if (img.channels() == channels_) { + // We already wrote to the right buffer. + } else { + if (channels_ == 1 && img.channels() == 3) { + cvtColor(read_img, img, COLOR_GRAY2BGR); + } else if (channels_ == 1 && img.channels() == 4) { + cvtColor(read_img, img, COLOR_GRAY2BGRA); + } else if (channels_ == 3 && img.channels() == 1) { + cvtColor(read_img, img, COLOR_BGR2GRAY); + } else if (channels_ == 3 && img.channels() == 4) { + cvtColor(read_img, img, COLOR_BGR2BGRA); + } else if (channels_ == 4 && img.channels() == 1) { + cvtColor(read_img, img, COLOR_BGRA2GRAY); + } else if (channels_ == 4 && img.channels() == 3) { + cvtColor(read_img, img, COLOR_BGRA2BGR); + } else { + CV_Error(Error::StsInternal, ""); + } + } + return true; +} + +bool AvifDecoder::nextPage() { + const avifResult status = avifDecoderNextImage(decoder_); + if (status == AVIF_RESULT_NO_IMAGES_REMAINING) return false; + if (status != AVIF_RESULT_OK) { + const std::string error(decoder_->diag.error); + CV_Error(Error::StsParseError, error + " " + avifResultToString(status)); + return false; + } + return true; +} + +//////////////////////////////////////////////////////////////////////////////// + +AvifEncoder::AvifEncoder() { + m_description = "AVIF files (*.avif)"; + m_buf_supported = true; + encoder_ = avifEncoderCreate(); +} + +AvifEncoder::~AvifEncoder() { + if (encoder_) avifEncoderDestroy(encoder_); +} + +bool AvifEncoder::isFormatSupported(int depth) const { + return (depth == CV_8U || depth == CV_16U); +} + +bool AvifEncoder::write(const Mat &img, const std::vector ¶ms) { + std::vector img_vec(1, img); + return writeToOutput(img_vec, params); +} + +bool AvifEncoder::writemulti(const std::vector &img_vec, + const std::vector ¶ms) { + return writeToOutput(img_vec, params); +} + +bool AvifEncoder::writeToOutput(const std::vector &img_vec, + const std::vector ¶ms) { + int bit_depth = 8; + int speed = AVIF_SPEED_FASTEST; + for (size_t i = 0; i < params.size(); i += 2) { + if (params[i] == IMWRITE_AVIF_QUALITY) { + const int quality = std::min(std::max(params[i + 1], AVIF_QUALITY_WORST), + AVIF_QUALITY_BEST); +#if CV_AVIF_USE_QUALITY + encoder_->quality = quality; +#else + encoder_->minQuantizer = encoder_->maxQuantizer = + (AVIF_QUANTIZER_BEST_QUALITY - AVIF_QUANTIZER_WORST_QUALITY) * + quality / (AVIF_QUALITY_BEST - AVIF_QUALITY_WORST) + + AVIF_QUANTIZER_WORST_QUALITY; +#endif + } else if (params[i] == IMWRITE_AVIF_DEPTH) { + bit_depth = params[i + 1]; + } else if (params[i] == IMWRITE_AVIF_SPEED) { + speed = params[i + 1]; + } + } + + avifRWData output_ori = AVIF_DATA_EMPTY; + std::unique_ptr output(&output_ori, + avifRWDataFree); +#if CV_AVIF_USE_QUALITY + const bool do_lossless = (encoder_->quality == AVIF_QUALITY_LOSSLESS); +#else + const bool do_lossless = + (encoder_->minQuantizer == AVIF_QUANTIZER_BEST_QUALITY && + encoder_->maxQuantizer == AVIF_QUANTIZER_BEST_QUALITY); +#endif + encoder_->speed = speed; + + const avifAddImageFlags flag = (img_vec.size() == 1) + ? AVIF_ADD_IMAGE_FLAG_SINGLE + : AVIF_ADD_IMAGE_FLAG_NONE; + std::vector images; + std::vector imgs_scaled; + for (const cv::Mat &img : img_vec) { + CV_CheckType( + img.type(), + (bit_depth == 8 && img.depth() == CV_8U) || + ((bit_depth == 10 || bit_depth == 12) && img.depth() == CV_16U), + "AVIF only supports bit depth of 8 with CV_8U input or " + "bit depth of 10 or 12 with CV_16U input"); + CV_Check(img.channels(), + img.channels() == 1 || img.channels() == 3 || img.channels() == 4, + "AVIF only supports 1, 3, 4 channels"); + + images.emplace_back(ConvertToAvif(img, do_lossless, bit_depth)); + } + for (const AvifImageUniquePtr &image : images) { + OPENCV_AVIF_CHECK_STATUS( + avifEncoderAddImage(encoder_, image.get(), /*durationInTimescale=*/1, + flag), + encoder_); + } + + OPENCV_AVIF_CHECK_STATUS(avifEncoderFinish(encoder_, output.get()), encoder_); + + if (m_buf) { + m_buf->resize(output->size); + std::memcpy(m_buf->data(), output->data, output->size); + } else { + std::ofstream(m_filename, std::ofstream::binary) + .write(reinterpret_cast(output->data), output->size); + } + + return (output->size > 0); +} + +ImageEncoder AvifEncoder::newEncoder() const { return makePtr(); } + +} // namespace cv + +#endif diff --git a/modules/imgcodecs/src/grfmt_avif.hpp b/modules/imgcodecs/src/grfmt_avif.hpp new file mode 100644 index 0000000000..e64357366a --- /dev/null +++ b/modules/imgcodecs/src/grfmt_avif.hpp @@ -0,0 +1,62 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level +// directory of this distribution and at http://opencv.org/license.html + +#ifndef _GRFMT_AVIF_H_ +#define _GRFMT_AVIF_H_ + +#include "grfmt_base.hpp" + +#ifdef HAVE_AVIF + +struct avifDecoder; +struct avifEncoder; +struct avifRWData; + +namespace cv { + +class AvifDecoder CV_FINAL : public BaseImageDecoder { + public: + AvifDecoder(); + ~AvifDecoder(); + + bool readHeader() CV_OVERRIDE; + bool readData(Mat& img) CV_OVERRIDE; + bool nextPage() CV_OVERRIDE; + + size_t signatureLength() const CV_OVERRIDE; + bool checkSignature(const String& signature) const CV_OVERRIDE; + ImageDecoder newDecoder() const CV_OVERRIDE; + + protected: + int channels_; + int bit_depth_; + avifDecoder* decoder_; + bool is_first_image_; +}; + +class AvifEncoder CV_FINAL : public BaseImageEncoder { + public: + AvifEncoder(); + ~AvifEncoder() CV_OVERRIDE; + + bool isFormatSupported(int depth) const CV_OVERRIDE; + + bool write(const Mat& img, const std::vector& params) CV_OVERRIDE; + + bool writemulti(const std::vector& img_vec, + const std::vector& params) CV_OVERRIDE; + + ImageEncoder newEncoder() const CV_OVERRIDE; + + private: + bool writeToOutput(const std::vector& img_vec, + const std::vector& params); + avifEncoder* encoder_; +}; + +} // namespace cv + +#endif + +#endif /*_GRFMT_AVIF_H_*/ diff --git a/modules/imgcodecs/src/grfmts.hpp b/modules/imgcodecs/src/grfmts.hpp index 637538d223..46b79ff96c 100644 --- a/modules/imgcodecs/src/grfmts.hpp +++ b/modules/imgcodecs/src/grfmts.hpp @@ -43,6 +43,7 @@ #define _GRFMTS_H_ #include "grfmt_base.hpp" +#include "grfmt_avif.hpp" #include "grfmt_bmp.hpp" #include "grfmt_sunras.hpp" #include "grfmt_jpeg.hpp" diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp index e7b5c7035f..a1f49a882c 100644 --- a/modules/imgcodecs/src/loadsave.cpp +++ b/modules/imgcodecs/src/loadsave.cpp @@ -132,6 +132,10 @@ struct ImageCodecInitializer */ ImageCodecInitializer() { +#ifdef HAVE_AVIF + decoders.push_back(makePtr()); + encoders.push_back(makePtr()); +#endif /// BMP Support decoders.push_back( makePtr() ); encoders.push_back( makePtr() ); diff --git a/modules/imgcodecs/test/test_avif.cpp b/modules/imgcodecs/test/test_avif.cpp new file mode 100644 index 0000000000..99b8f7769c --- /dev/null +++ b/modules/imgcodecs/test/test_avif.cpp @@ -0,0 +1,355 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level +// directory of this distribution and at http://opencv.org/license.html + +#include +#include + +#include "test_precomp.hpp" + +#ifdef HAVE_AVIF + +namespace opencv_test { +namespace { + +class Imgcodecs_Avif_RoundTripSuite + : public testing::TestWithParam> { + protected: + static cv::Mat modifyImage(const cv::Mat& img_original, int channels, + int bit_depth) { + cv::Mat img; + if (channels == 1) { + cv::cvtColor(img_original, img, cv::COLOR_BGR2GRAY); + } else if (channels == 4) { + std::vector imgs; + cv::split(img_original, imgs); + imgs.push_back(cv::Mat(imgs[0])); + imgs[imgs.size() - 1] = cv::Scalar::all(128); + cv::merge(imgs, img); + } else { + img = img_original.clone(); + } + + cv::Mat img_final = img; + // Convert image to CV_16U for some bit depths. + if (bit_depth > 8) img.convertTo(img_final, CV_16U, 1 << (bit_depth - 8)); + + return img_final; + } + + void SetUp() { + bit_depth_ = std::get<0>(GetParam()); + channels_ = std::get<1>(GetParam()); + quality_ = std::get<2>(GetParam()); + imread_mode_ = std::get<3>(GetParam()); + encoding_params_ = {cv::IMWRITE_AVIF_QUALITY, quality_, + cv::IMWRITE_AVIF_DEPTH, bit_depth_}; + } + + bool IsBitDepthValid() const { + return (bit_depth_ == 8 || bit_depth_ == 10 || bit_depth_ == 12); + } + + // Makes sure images are close enough after encode/decode roundtrip. + void ValidateRead(const cv::Mat& img_original, const cv::Mat& img) const { + EXPECT_EQ(img_original.size(), img.size()); + if (imread_mode_ == IMREAD_UNCHANGED) { + ASSERT_EQ(img_original.type(), img.type()); + // Lossless. + if (quality_ == 100) { + EXPECT_EQ(0, cvtest::norm(img, img_original, NORM_INF)); + } else { + const float norm = cvtest::norm(img, img_original, NORM_L2) / + img.channels() / img.cols / img.rows / + (1 << (bit_depth_ - 8)); + if (quality_ == 50) { + EXPECT_LE(norm, 10); + } else if (quality_ == 0) { + EXPECT_LE(norm, 13); + } else { + EXPECT_FALSE(true); + } + } + } + } + + public: + int bit_depth_; + int channels_; + int quality_; + int imread_mode_; + std::vector encoding_params_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class Imgcodecs_Avif_Image_RoundTripSuite + : public Imgcodecs_Avif_RoundTripSuite { + public: + const cv::Mat& get_img_original() { + const Key key = {channels_, (bit_depth_ < 8) ? 8 : bit_depth_}; + return imgs_[key]; + } + + // Prepare the original image modified for different number of channels and + // bit depth. + static void SetUpTestCase() { + const string root = cvtest::TS::ptr()->get_data_path(); + const string filename = root + "../cv/shared/lena.png"; + const cv::Mat img_original = cv::imread(filename); + cv::Mat img_resized; + cv::resize(img_original, img_resized, cv::Size(kWidth, kHeight), 0, 0); + for (int channels : {1, 3, 4}) { + for (int bit_depth : {8, 10, 12}) { + const Key key{channels, bit_depth}; + imgs_[key] = modifyImage(img_resized, channels, bit_depth); + } + } + } + + static const int kWidth; + static const int kHeight; + + private: + typedef std::tuple Key; + static std::map imgs_; +}; +std::map, cv::Mat> + Imgcodecs_Avif_Image_RoundTripSuite::imgs_; +const int Imgcodecs_Avif_Image_RoundTripSuite::kWidth = 51; +const int Imgcodecs_Avif_Image_RoundTripSuite::kHeight = 31; + +class Imgcodecs_Avif_Image_WriteReadSuite + : public Imgcodecs_Avif_Image_RoundTripSuite {}; + +TEST_P(Imgcodecs_Avif_Image_WriteReadSuite, imwrite_imread) { + const cv::Mat& img_original = get_img_original(); + ASSERT_FALSE(img_original.empty()); + + // Encode. + const string output = cv::tempfile(".avif"); + if (!IsBitDepthValid()) { + EXPECT_NO_FATAL_FAILURE( + cv::imwrite(output, img_original, encoding_params_)); + EXPECT_NE(0, remove(output.c_str())); + return; + } + EXPECT_NO_THROW(cv::imwrite(output, img_original, encoding_params_)); + + // Read from file. + const cv::Mat img = cv::imread(output, imread_mode_); + + ValidateRead(img_original, img); + + EXPECT_EQ(0, remove(output.c_str())); +} + +INSTANTIATE_TEST_CASE_P( + Imgcodecs_AVIF, Imgcodecs_Avif_Image_WriteReadSuite, + ::testing::Combine(::testing::ValuesIn({6, 8, 10, 12}), + ::testing::ValuesIn({1, 3, 4}), + ::testing::ValuesIn({0, 50, 100}), + ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE, + IMREAD_COLOR}))); + +class Imgcodecs_Avif_Image_EncodeDecodeSuite + : public Imgcodecs_Avif_Image_RoundTripSuite {}; + +TEST_P(Imgcodecs_Avif_Image_EncodeDecodeSuite, imencode_imdecode) { + const cv::Mat& img_original = get_img_original(); + ASSERT_FALSE(img_original.empty()); + + // Encode. + std::vector buf; + if (!IsBitDepthValid()) { + EXPECT_THROW(cv::imencode(".avif", img_original, buf, encoding_params_), + cv::Exception); + return; + } + bool result; + EXPECT_NO_THROW( + result = cv::imencode(".avif", img_original, buf, encoding_params_);); + EXPECT_TRUE(result); + + // Read back. + const cv::Mat img = cv::imdecode(buf, imread_mode_); + + ValidateRead(img_original, img); +} + +INSTANTIATE_TEST_CASE_P( + Imgcodecs_AVIF, Imgcodecs_Avif_Image_EncodeDecodeSuite, + ::testing::Combine(::testing::ValuesIn({6, 8, 10, 12}), + ::testing::ValuesIn({1, 3, 4}), + ::testing::ValuesIn({0, 50, 100}), + ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE, + IMREAD_COLOR}))); + +//////////////////////////////////////////////////////////////////////////////// + +typedef testing::TestWithParam Imgcodecs_AVIF_Exif; + +TEST_P(Imgcodecs_AVIF_Exif, exif_orientation) { + const string root = cvtest::TS::ptr()->get_data_path(); + const string filename = root + GetParam(); + const int colorThresholdHigh = 250; + const int colorThresholdLow = 5; + + Mat m_img = imread(filename); + ASSERT_FALSE(m_img.empty()); + Vec3b vec; + + // Checking the first quadrant (with supposed red) + vec = m_img.at(2, 2); // some point inside the square + EXPECT_LE(vec.val[0], colorThresholdLow); + EXPECT_LE(vec.val[1], colorThresholdLow); + EXPECT_GE(vec.val[2], colorThresholdHigh); + + // Checking the second quadrant (with supposed green) + vec = m_img.at(2, 7); // some point inside the square + EXPECT_LE(vec.val[0], colorThresholdLow); + EXPECT_GE(vec.val[1], colorThresholdHigh); + EXPECT_LE(vec.val[2], colorThresholdLow); + + // Checking the third quadrant (with supposed blue) + vec = m_img.at(7, 2); // some point inside the square + EXPECT_GE(vec.val[0], colorThresholdHigh); + EXPECT_LE(vec.val[1], colorThresholdLow); + EXPECT_LE(vec.val[2], colorThresholdLow); +} + +const string exif_files[] = {"readwrite/testExifOrientation_1.avif", + "readwrite/testExifOrientation_2.avif", + "readwrite/testExifOrientation_3.avif", + "readwrite/testExifOrientation_4.avif", + "readwrite/testExifOrientation_5.avif", + "readwrite/testExifOrientation_6.avif", + "readwrite/testExifOrientation_7.avif", + "readwrite/testExifOrientation_8.avif"}; + +INSTANTIATE_TEST_CASE_P(ExifFiles, Imgcodecs_AVIF_Exif, + testing::ValuesIn(exif_files)); + +//////////////////////////////////////////////////////////////////////////////// + +class Imgcodecs_Avif_Animation_RoundTripSuite + : public Imgcodecs_Avif_RoundTripSuite { + public: + const std::vector& get_anim_original() { + const Key key = {channels_, bit_depth_}; + return anims_[key]; + } + + // Prepare the original image modified for different number of channels and + // bit depth. + static void SetUpTestCase() { + const string root = cvtest::TS::ptr()->get_data_path(); + const string filename = root + "../cv/shared/lena.png"; + const cv::Mat img_original = cv::imread(filename); + cv::Mat img_resized; + cv::resize(img_original, img_resized, cv::Size(kWidth, kHeight), 0, 0); + for (int channels : {1, 3, 4}) { + for (int bit_depth : {8, 10, 12}) { + const Key key{channels, bit_depth}; + const cv::Mat img = modifyImage(img_resized, channels, bit_depth); + cv::Mat img2, img3; + cv::flip(img, img2, 0); + cv::flip(img, img3, -1); + anims_[key] = {img, img2, img3}; + } + } + } + + void ValidateRead(const std::vector& anim_original, + const std::vector& anim) const { + ASSERT_EQ(anim_original.size(), anim.size()); + for (size_t i = 0; i < anim.size(); ++i) { + Imgcodecs_Avif_RoundTripSuite::ValidateRead(anim_original[i], anim[i]); + } + } + + static const int kWidth; + static const int kHeight; + + private: + typedef std::tuple Key; + static std::map> anims_; +}; +std::map, std::vector> + Imgcodecs_Avif_Animation_RoundTripSuite::anims_; +const int Imgcodecs_Avif_Animation_RoundTripSuite::kWidth = 5; +const int Imgcodecs_Avif_Animation_RoundTripSuite::kHeight = 5; + +class Imgcodecs_Avif_Animation_WriteReadSuite + : public Imgcodecs_Avif_Animation_RoundTripSuite {}; + +TEST_P(Imgcodecs_Avif_Animation_WriteReadSuite, encode_decode) { + const std::vector& anim_original = get_anim_original(); + ASSERT_FALSE(anim_original.empty()); + + // Encode. + const string output = cv::tempfile(".avif"); + if (!IsBitDepthValid()) { + EXPECT_THROW(cv::imwritemulti(output, anim_original, encoding_params_), + cv::Exception); + EXPECT_NE(0, remove(output.c_str())); + return; + } + EXPECT_NO_THROW(cv::imwritemulti(output, anim_original, encoding_params_)); + + // Read from file. + std::vector anim; + ASSERT_TRUE(cv::imreadmulti(output, anim, imread_mode_)); + + ValidateRead(anim_original, anim); + + EXPECT_EQ(0, remove(output.c_str())); +} + +INSTANTIATE_TEST_CASE_P( + Imgcodecs_AVIF, Imgcodecs_Avif_Animation_WriteReadSuite, + ::testing::Combine(::testing::ValuesIn({8, 10, 12}), + ::testing::ValuesIn({1, 3}), ::testing::ValuesIn({50}), + ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE, + IMREAD_COLOR}))); +class Imgcodecs_Avif_Animation_WriteDecodeSuite + : public Imgcodecs_Avif_Animation_RoundTripSuite {}; + +TEST_P(Imgcodecs_Avif_Animation_WriteDecodeSuite, encode_decode) { + const std::vector& anim_original = get_anim_original(); + ASSERT_FALSE(anim_original.empty()); + + // Encode. + const string output = cv::tempfile(".avif"); + if (!IsBitDepthValid()) { + EXPECT_THROW(cv::imwritemulti(output, anim_original, encoding_params_), + cv::Exception); + EXPECT_NE(0, remove(output.c_str())); + return; + } + EXPECT_NO_THROW(cv::imwritemulti(output, anim_original, encoding_params_)); + + // Put file into buffer and read from buffer. + std::ifstream file(output, std::ios::binary | std::ios::ate); + std::streamsize size = file.tellg(); + file.seekg(0, std::ios::beg); + std::vector buf(size); + EXPECT_TRUE(file.read(reinterpret_cast(buf.data()), size)); + EXPECT_EQ(0, remove(output.c_str())); + std::vector anim; + ASSERT_TRUE(cv::imdecodemulti(buf, imread_mode_, anim)); + + ValidateRead(anim_original, anim); +} + +INSTANTIATE_TEST_CASE_P( + Imgcodecs_AVIF, Imgcodecs_Avif_Animation_WriteDecodeSuite, + ::testing::Combine(::testing::ValuesIn({8, 10, 12}), + ::testing::ValuesIn({1, 3}), ::testing::ValuesIn({50}), + ::testing::ValuesIn({IMREAD_UNCHANGED, IMREAD_GRAYSCALE, + IMREAD_COLOR}))); + +} // namespace +} // namespace opencv_test + +#endif // HAVE_AVIF diff --git a/modules/imgproc/src/color_hsv.simd.hpp b/modules/imgproc/src/color_hsv.simd.hpp index 033629e8ee..bea1decc3a 100644 --- a/modules/imgproc/src/color_hsv.simd.hpp +++ b/modules/imgproc/src/color_hsv.simd.hpp @@ -813,11 +813,10 @@ struct RGB2HLS_b //TODO: fix that when v_interleave is available float CV_DECL_ALIGNED(CV_SIMD_WIDTH) interTmpM[VTraits::max_nlanes*3]; v_store_interleave(interTmpM, vx_setall_f32(1.f), vx_setall_f32(255.f), vx_setall_f32(255.f)); - v_float32 mhls0, mhls1, mhls2, mhls3; + v_float32 mhls0, mhls1, mhls2; mhls0 = vx_load_aligned(interTmpM); mhls1 = vx_load_aligned(interTmpM + fsize); mhls2 = vx_load_aligned(interTmpM + 2*fsize); - mhls3 = vx_load_aligned(interTmpM + 3*fsize); #endif for(int i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 ) diff --git a/modules/imgproc/src/intelligent_scissors.cpp b/modules/imgproc/src/intelligent_scissors.cpp index 1b7e3dd163..6e2dfc3288 100644 --- a/modules/imgproc/src/intelligent_scissors.cpp +++ b/modules/imgproc/src/intelligent_scissors.cpp @@ -40,7 +40,7 @@ static const int neighbors_encode[8] = { }; #define ACOS_TABLE_SIZE 64 -// acos_table[x + ACOS_TABLE_SIZE] = acos(x / ACOS_TABLE_SIZE) / CV_PI (see local_cost) +// acos_table[x + ACOS_TABLE_SIZE] = acos(x / ACOS_TABLE_SIZE) / CV_PI (see add_local_cost) // x = [ -ACOS_TABLE_SIZE .. ACOS_TABLE_SIZE ] float* getAcosTable() { @@ -495,55 +495,76 @@ struct IntelligentScissorsMB::Impl // details: see section 3.1 of the article const float* acos_table = getAcosTable(); - float local_cost(const Point& p, const Point& q) const + const float sqrt2_inv = 0.7071067811865475f; // 1.0 / sqrt(2) + + /** @brief Adds local_cost(p, q) to cost_p. + * + * local_cost(p, q) is computed as + weight_non_edge_compute * non_edge_feature.at(q) + + weight_gradient_direction * fD + + weight_gradient_magnitude * fG + * + * @param p point p (input) + * @param q point q (input) + * @param cost_p cost for p (input/output) + * @param cost_q cost for q (input) + * + * @return The boolean result of the (cost_p < cost_q) comparison. + * + * @note The computed output cost_p can be partial if (cost_p < cost_q) is false. + */ + bool add_local_cost(const Point& p, const Point& q, float& cost_p, const float cost_q) const { - const bool isDiag = (p.x != q.x) && (p.y != q.y); + if ((cost_p += weight_non_edge_compute * non_edge_feature.at(q)) < cost_q) + { + const bool isDiag = (p.x != q.x) && (p.y != q.y); - float fG = gradient_magnitude.at(q); + float fG = gradient_magnitude.at(q); + if (!isDiag) + { + fG *= sqrt2_inv; + } - const Point2f diff((float)(q.x - p.x), (float)(q.y - p.y)); + if ((cost_p += weight_gradient_magnitude * fG) < cost_q) + { - const Point2f Ip = gradient_direction(p); - const Point2f Iq = gradient_direction(q); + const Point2f diff((float)(q.x - p.x), (float)(q.y - p.y)); - const Point2f Dp(Ip.y, -Ip.x); // D(p) - 90 degrees clockwise - const Point2f Dq(Iq.y, -Iq.x); // D(q) - 90 degrees clockwise + const Point2f Ip = gradient_direction(p); + const Point2f Iq = gradient_direction(q); - float dp = Dp.dot(diff); // dp(p, q) - float dq = Dq.dot(diff); // dq(p, q) - if (dp < 0) - { - dp = -dp; // ensure dp >= 0 - dq = -dq; - } + const Point2f Dp(Ip.y, -Ip.x); // D(p) - 90 degrees clockwise + const Point2f Dq(Iq.y, -Iq.x); // D(q) - 90 degrees clockwise - const float sqrt2_inv = 0.7071067811865475f; // 1.0 / sqrt(2) - if (isDiag) - { - dp *= sqrt2_inv; // normalize length of (q - p) - dq *= sqrt2_inv; // normalize length of (q - p) - } - else - { - fG *= sqrt2_inv; - } + float dp = Dp.dot(diff); // dp(p, q) + float dq = Dq.dot(diff); // dq(p, q) + if (dp < 0) + { + dp = -dp; // ensure dp >= 0 + dq = -dq; + } + + if (isDiag) + { + dp *= sqrt2_inv; // normalize length of (q - p) + dq *= sqrt2_inv; // normalize length of (q - p) + } #if 1 - int dp_i = cvFloor(dp * ACOS_TABLE_SIZE); // dp is in range 0..1 - dp_i = std::min(ACOS_TABLE_SIZE, std::max(0, dp_i)); - int dq_i = cvFloor(dq * ACOS_TABLE_SIZE); // dq is in range -1..1 - dq_i = std::min(ACOS_TABLE_SIZE, std::max(-ACOS_TABLE_SIZE, dq_i)); - const float fD = acos_table[dp_i + ACOS_TABLE_SIZE] + acos_table[dq_i + ACOS_TABLE_SIZE]; + int dp_i = cvFloor(dp * ACOS_TABLE_SIZE); // dp is in range 0..1 + dp_i = std::min(ACOS_TABLE_SIZE, std::max(0, dp_i)); + int dq_i = cvFloor(dq * ACOS_TABLE_SIZE); // dq is in range -1..1 + dq_i = std::min(ACOS_TABLE_SIZE, std::max(-ACOS_TABLE_SIZE, dq_i)); + const float fD = acos_table[dp_i + ACOS_TABLE_SIZE] + acos_table[dq_i + ACOS_TABLE_SIZE]; #else - const float CV_PI_inv = static_cast(1.0 / CV_PI); - const float fD = (acosf(dp) + acosf(dq)) * CV_PI_inv; // TODO optimize acos calls (through tables) + const float CV_PI_inv = static_cast(1.0 / CV_PI); + const float fD = (acosf(dp) + acosf(dq)) * CV_PI_inv; // TODO optimize acos calls (through tables) #endif - float cost = - weight_non_edge_compute * non_edge_feature.at(q) + - weight_gradient_direction * fD + - weight_gradient_magnitude * fG; - return cost; + cost_p += weight_gradient_direction * fD; + } + } + return cost_p < cost_q; } struct Pix @@ -625,8 +646,8 @@ struct IntelligentScissorsMB::Impl CV_DbgCheckLE(cost_q, cost_r, "INTERNAL ERROR: sorted queue is corrupted"); #endif - float cost = cost_q + local_cost(q, r); // TODO(opt): compute partially until cost < cost_r - if (cost < cost_r) + float cost = cost_q; + if (add_local_cost(q, r, cost, cost_r)) { #if 0 // avoid compiler warning if (cost_r != FLT_MAX) diff --git a/modules/imgproc/test/test_intelligent_scissors.cpp b/modules/imgproc/test/test_intelligent_scissors.cpp index c6b51fd6b6..bdd4debbca 100644 --- a/modules/imgproc/test/test_intelligent_scissors.cpp +++ b/modules/imgproc/test/test_intelligent_scissors.cpp @@ -147,11 +147,72 @@ void show(const Mat& img, const std::vector pts) } } +Size estimateContourSize(const std::vector& pts) +{ + Size s(0,0); + for (size_t i = 0; i < pts.size(); i++) + { + if (s.width < pts[i].x) + s.width = pts[i].x; + if (s.height < pts[i].y) + s.height = pts[i].y; + } + return s; +} + +int contoursAreaPixelsMismatch(const std::vector& pts, const std::vector& gt) +{ + Size ptsSize = estimateContourSize(pts); + Size gtSize = estimateContourSize(gt); + + Size imgSize(std::max(ptsSize.width, gtSize.width)+1, std::max(ptsSize.height, gtSize.height)+1); + Mat ptsArea = Mat::zeros(imgSize, CV_8UC1); + Mat gtArea = Mat::zeros(imgSize, CV_8UC1); + + std::vector> pts_wrapped = {pts}; + std::vector> gt_wrapped = {gt}; + drawContours(ptsArea, pts_wrapped, -1, Scalar(255), FILLED); + drawContours(gtArea, gt_wrapped, -1, Scalar(255), FILLED); + + Mat uni = ptsArea | gtArea; + Mat intersection = ptsArea & gtArea; + bitwise_not(intersection, intersection); + Mat delta = uni & intersection; + + return countNonZero(delta); +} + +void checkContour(std::vector& pts, + const bool backward = false, + int allowed_mismatch = 0) +{ + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + CV_Assert(test_info); + const std::string name = std::string(cvtest::TS::ptr()->get_data_path() + "imgproc/" + test_info->test_case_name() + "-" + test_info->name() + (backward ? "-backward" : "") + ".xml"); + + std::vector reference_pts; +#ifdef GENERATE_TEST_DATA + { + cv::FileStorage fs(name, cv::FileStorage::WRITE); + fs << "pts" << pts; + } + reference_pts = pts; +#else + FileStorage fs(name, FileStorage::READ); + read(fs["pts"], reference_pts, std::vector()); +#endif + + if (!allowed_mismatch) + EXPECT_EQ(pts, reference_pts); + else + EXPECT_LE(contoursAreaPixelsMismatch(pts, reference_pts), allowed_mismatch); +} + TEST(Imgproc_IntelligentScissorsMB, rect) { segmentation::IntelligentScissorsMB tool; - - tool.applyImage(getTestImage1()); + Mat image = getTestImage1(); + tool.applyImage(image); Point source_point(50, 30); tool.buildMap(source_point); @@ -159,15 +220,18 @@ TEST(Imgproc_IntelligentScissorsMB, rect) Point target_point(100, 30); std::vector pts; tool.getContour(target_point, pts); + checkContour(pts); + show(image, pts); - tool.applyImage(getTestImage2()); + Mat image2 = getTestImage2(); + tool.applyImage(image2); tool.buildMap(source_point); std::vector pts2; tool.getContour(target_point, pts2, true/*backward*/); - - EXPECT_EQ(pts.size(), pts2.size()); + checkContour(pts2, true/*backward*/); + show(image2, pts2); } TEST(Imgproc_IntelligentScissorsMB, lines) @@ -182,8 +246,7 @@ TEST(Imgproc_IntelligentScissorsMB, lines) Point target_point(150, 50); std::vector pts; tool.getContour(target_point, pts); - - EXPECT_EQ((size_t)121, pts.size()); + checkContour(pts); show(image, pts); } @@ -201,8 +264,7 @@ TEST(Imgproc_IntelligentScissorsMB, circles) Point target_point(150, 50); std::vector pts; tool.getContour(target_point, pts); - - EXPECT_EQ((size_t)101, pts.size()); + checkContour(pts); show(image, pts); } @@ -218,13 +280,10 @@ TEST(Imgproc_IntelligentScissorsMB, circles_gradient) Point target_point(150, 50); std::vector pts; tool.getContour(target_point, pts); - - EXPECT_EQ((size_t)101, pts.size()); + checkContour(pts); show(image, pts); } -#define PTS_SIZE_EPS 2 - TEST(Imgproc_IntelligentScissorsMB, grayscale) { segmentation::IntelligentScissorsMB tool; @@ -238,10 +297,7 @@ TEST(Imgproc_IntelligentScissorsMB, grayscale) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 206; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 2); show(image, pts); } @@ -260,10 +316,7 @@ TEST(Imgproc_IntelligentScissorsMB, check_features_grayscale_1_0_0_zerro_crossin Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 207; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 11); show(image, pts); } @@ -282,10 +335,7 @@ TEST(Imgproc_IntelligentScissorsMB, check_features_grayscale_1_0_0_canny) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 201; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 6); show(image, pts); } @@ -303,10 +353,7 @@ TEST(Imgproc_IntelligentScissorsMB, check_features_grayscale_0_1_0) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 166; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 4); show(image, pts); } @@ -324,10 +371,7 @@ TEST(Imgproc_IntelligentScissorsMB, check_features_grayscale_0_0_1) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 197; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 2); show(image, pts); } @@ -344,10 +388,7 @@ TEST(Imgproc_IntelligentScissorsMB, color) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 205; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 2); show(image, pts); } @@ -365,10 +406,7 @@ TEST(Imgproc_IntelligentScissorsMB, color_canny) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 200; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 2); show(image, pts); } @@ -397,10 +435,7 @@ TEST(Imgproc_IntelligentScissorsMB, color_custom_features_edge) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 201; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 2); show(image, pts); } @@ -427,10 +462,7 @@ TEST(Imgproc_IntelligentScissorsMB, color_custom_features_all) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 201; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 9); show(image, pts); } @@ -456,10 +488,7 @@ TEST(Imgproc_IntelligentScissorsMB, color_custom_features_edge_magnitude) Point target_point(413, 155); std::vector pts; tool.getContour(target_point, pts); - - size_t gold = 201; - EXPECT_GE(pts.size(), gold - PTS_SIZE_EPS); - EXPECT_LE(pts.size(), gold + PTS_SIZE_EPS); + checkContour(pts, false, 9); show(image, pts); } diff --git a/modules/js/generator/embindgen.py b/modules/js/generator/embindgen.py index f9b8431402..8a16f92f5e 100644 --- a/modules/js/generator/embindgen.py +++ b/modules/js/generator/embindgen.py @@ -319,19 +319,31 @@ class JSWrapperGenerator(object): sys.exit(-1) self.classes[class_info.name] = class_info - if class_info.bases: - chunks = class_info.bases[0].split('::') - base = '_'.join(chunks) - while base not in self.classes and len(chunks) > 1: - del chunks[-2] + def resolve_class_inheritance(self): + new_classes = {} + for name, class_info in self.classes.items(): + + if not hasattr(class_info, 'bases'): + new_classes[name] = class_info + continue # not class + + if class_info.bases: + chunks = class_info.bases[0].split('::') base = '_'.join(chunks) - if base not in self.classes: - print("Generator error: unable to resolve base %s for %s" - % (class_info.bases[0], class_info.name)) - sys.exit(-1) - else: - class_info.bases[0] = "::".join(chunks) - class_info.isalgorithm |= self.classes[base].isalgorithm + while base not in self.classes and len(chunks) > 1: + del chunks[-2] + base = '_'.join(chunks) + if base not in self.classes: + print("Generator error: unable to resolve base %s for %s" + % (class_info.bases[0], class_info.name)) + sys.exit(-1) + else: + class_info.bases[0] = "::".join(chunks) + class_info.isalgorithm |= self.classes[base].isalgorithm + + new_classes[name] = class_info + + self.classes = new_classes def split_decl_name(self, name): chunks = name.split('.') @@ -759,6 +771,8 @@ class JSWrapperGenerator(object): else: # class/global function self.add_func(decl) + self.resolve_class_inheritance() + # step 2: generate bindings # Global functions for ns_name, ns in sorted(self.namespaces.items()): @@ -812,6 +826,7 @@ class JSWrapperGenerator(object): for name, class_info in sorted(self.classes.items()): class_bindings = [] if not name in white_list: + #print('Not in whitelist: "{}" from ns={}'.format(name, ns_name)) continue # Generate bindings for methods diff --git a/modules/js/src/core_bindings.cpp b/modules/js/src/core_bindings.cpp index 1f86711c16..60fe496ce3 100644 --- a/modules/js/src/core_bindings.cpp +++ b/modules/js/src/core_bindings.cpp @@ -89,18 +89,21 @@ using namespace cv; using namespace cv::segmentation; // FIXIT +using namespace cv::aruco; +typedef aruco::DetectorParameters aruco_DetectorParameters; +typedef QRCodeDetectorAruco::Params QRCodeDetectorAruco_Params; + #ifdef HAVE_OPENCV_DNN using namespace cv::dnn; #endif -#ifdef HAVE_OPENCV_ARUCO -using namespace aruco; -#endif - #ifdef HAVE_OPENCV_VIDEO typedef TrackerMIL::Params TrackerMIL_Params; #endif +// HACK: JS generator ommits namespace for parameter types for some reason. Added typedef to handle std::string correctly +typedef std::string string; + namespace binding_utils { template diff --git a/modules/js/test/test_objdetect.js b/modules/js/test/test_objdetect.js index dc863d682f..e7190db337 100644 --- a/modules/js/test/test_objdetect.js +++ b/modules/js/test/test_objdetect.js @@ -199,4 +199,102 @@ QUnit.test('QR code detect and decode', function (assert) { mat.delete(); } -}); \ No newline at end of file +}); +QUnit.test('Aruco-based QR code detect', function (assert) { + { + let qrcode_params = new cv.QRCodeDetectorAruco_Params(); + let detector = new cv.QRCodeDetectorAruco(); + let mat = cv.Mat.ones(800, 600, cv.CV_8U); + assert.ok(mat); + + detector.setDetectorParameters(qrcode_params); + + let points = new cv.Mat(); + let qrCodeFound = detector.detect(mat, points); + assert.equal(points.rows, 0) + assert.equal(points.cols, 0) + assert.equal(qrCodeFound, false); + + qrcode_params.delete(); + detector.delete(); + points.delete(); + mat.delete(); + } +}); +QUnit.test('Bar code detect', function (assert) { + { + let detector = new cv.barcode_BarcodeDetector(); + let mat = cv.Mat.ones(800, 600, cv.CV_8U); + assert.ok(mat); + + let points = new cv.Mat(); + let codeFound = detector.detect(mat, points); + assert.equal(points.rows, 0) + assert.equal(points.cols, 0) + assert.equal(codeFound, false); + + codeContent = detector.detectAndDecode(mat); + assert.equal(typeof codeContent, 'string'); + assert.equal(codeContent, ''); + + detector.delete(); + points.delete(); + mat.delete(); + } +}); +QUnit.test('Aruco detector', function (assert) { + { + let dictionary = cv.getPredefinedDictionary(cv.DICT_4X4_50); + let aruco_image = new cv.Mat(); + let detectorParameters = new cv.aruco_DetectorParameters(); + let refineParameters = new cv.aruco_RefineParameters(10, 3, true); + let detector = new cv.aruco_ArucoDetector(dictionary, detectorParameters,refineParameters); + let corners = new cv.MatVector(); + let ids = new cv.Mat(); + + dictionary.generateImageMarker(10, 128, aruco_image); + assert.ok(!aruco_image.empty()); + + detector.detectMarkers(aruco_image, corners, ids); + + dictionary.delete(); + aruco_image.delete(); + detectorParameters.delete(); + refineParameters.delete(); + detector.delete(); + corners.delete(); + ids.delete(); + } +}); +QUnit.test('Charuco detector', function (assert) { + { + let dictionary = new cv.getPredefinedDictionary(cv.DICT_4X4_50); + let boardIds = new cv.Mat(); + let board = new cv.aruco_CharucoBoard(new cv.Size(3, 5), 64, 32, dictionary, boardIds); + let charucoParameters = new cv.aruco_CharucoParameters(); + let detectorParameters = new cv.aruco_DetectorParameters(); + let refineParameters = new cv.aruco_RefineParameters(10, 3, true); + let detector = new cv.aruco_CharucoDetector(board, charucoParameters, detectorParameters, refineParameters); + let board_image = new cv.Mat(); + let corners = new cv.Mat(); + let ids = new cv.Mat(); + + board.generateImage(new cv.Size(300, 500), board_image); + assert.ok(!board_image.empty()); + + detector.detectBoard(board_image, corners, ids); + assert.ok(!corners.empty()); + assert.ok(!ids.empty()); + + dictionary.delete(); + boardIds.delete(); + board.delete(); + board_image.delete(); + charucoParameters.delete(); + detectorParameters.delete(); + refineParameters.delete(); + detector.delete(); + corners.delete(); + ids.delete(); + } +}); diff --git a/modules/objdetect/doc/objdetect.bib b/modules/objdetect/doc/objdetect.bib index 394eff8537..f3623732d5 100644 --- a/modules/objdetect/doc/objdetect.bib +++ b/modules/objdetect/doc/objdetect.bib @@ -18,3 +18,32 @@ year = {2016}, month = {October} } + +@mastersthesis{Xiangmin2015research, + title={Research on Barcode Recognition Technology In a Complex Background}, + author={Xiangmin, Wang}, + year={2015}, + school={Huazhong University of Science and Technology} +} + +@article{bazen2002systematic, + title={Systematic methods for the computation of the directional fields and singular points of fingerprints}, + author={Bazen, Asker M and Gerez, Sabih H}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={24}, + number={7}, + pages={905--919}, + year={2002}, + publisher={IEEE} +} + +@article{kass1987analyzing, + title={Analyzing oriented patterns}, + author={Kass, Michael and Witkin, Andrew}, + journal={Computer vision, graphics, and image processing}, + volume={37}, + number={3}, + pages={362--385}, + year={1987}, + publisher={Elsevier} +} diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index 27a6921400..6e1e22953a 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -45,6 +45,8 @@ #define OPENCV_OBJDETECT_HPP #include "opencv2/core.hpp" +#include "opencv2/objdetect/aruco_detector.hpp" +#include "opencv2/objdetect/graphical_code_detector.hpp" /** @defgroup objdetect Object Detection @@ -101,6 +103,7 @@ using a Boosted Cascade of Simple Features. IEEE CVPR, 2001. The paper is availa @defgroup objdetect_hog HOG (Histogram of Oriented Gradients) descriptor and object detector + @defgroup objdetect_barcode Barcode detection and decoding @defgroup objdetect_qrcode QRCode detection and encoding @defgroup objdetect_dnn_face DNN-based face detection and recognition Check @ref tutorial_dnn_face "the corresponding tutorial" for more details. @@ -753,44 +756,27 @@ public: CV_WRAP virtual void encodeStructuredAppend(const String& encoded_info, OutputArrayOfArrays qrcodes) = 0; }; - -class CV_EXPORTS_W QRCodeDetector +class CV_EXPORTS_W_SIMPLE QRCodeDetector : public GraphicalCodeDetector { public: CV_WRAP QRCodeDetector(); - ~QRCodeDetector(); /** @brief sets the epsilon used during the horizontal scan of QR code stop marker detection. @param epsX Epsilon neighborhood, which allows you to determine the horizontal pattern of the scheme 1:1:3:1:1 according to QR code standard. */ - CV_WRAP void setEpsX(double epsX); + CV_WRAP QRCodeDetector& setEpsX(double epsX); /** @brief sets the epsilon used during the vertical scan of QR code stop marker detection. @param epsY Epsilon neighborhood, which allows you to determine the vertical pattern of the scheme 1:1:3:1:1 according to QR code standard. */ - CV_WRAP void setEpsY(double epsY); + CV_WRAP QRCodeDetector& setEpsY(double epsY); /** @brief use markers to improve the position of the corners of the QR code * * alignmentMarkers using by default */ - CV_WRAP void setUseAlignmentMarkers(bool useAlignmentMarkers); - - /** @brief Detects QR code in image and returns the quadrangle containing the code. - @param img grayscale or color (BGR) image containing (or not) QR code. - @param points Output vector of vertices of the minimum-area quadrangle containing the code. - */ - CV_WRAP bool detect(InputArray img, OutputArray points) const; - - /** @brief Decodes QR code in image once it's found by the detect() method. - - Returns UTF8-encoded output string or empty string if the code cannot be decoded. - @param img grayscale or color (BGR) image containing QR code. - @param points Quadrangle vertices found by detect() method (or some other algorithm). - @param straight_qrcode The optional output image containing rectified and binarized QR code - */ - CV_WRAP std::string decode(InputArray img, InputArray points, OutputArray straight_qrcode = noArray()); + CV_WRAP QRCodeDetector& setUseAlignmentMarkers(bool useAlignmentMarkers); /** @brief Decodes QR code on a curved surface in image once it's found by the detect() method. @@ -801,15 +787,6 @@ public: */ CV_WRAP cv::String decodeCurved(InputArray img, InputArray points, OutputArray straight_qrcode = noArray()); - /** @brief Both detects and decodes QR code - - @param img grayscale or color (BGR) image containing QR code. - @param points optional output array of vertices of the found QR code quadrangle. Will be empty if not found. - @param straight_qrcode The optional output image containing rectified and binarized QR code - */ - CV_WRAP std::string detectAndDecode(InputArray img, OutputArray points=noArray(), - OutputArray straight_qrcode = noArray()); - /** @brief Both detects and decodes QR code on a curved surface @param img grayscale or color (BGR) image containing QR code. @@ -818,43 +795,58 @@ public: */ CV_WRAP std::string detectAndDecodeCurved(InputArray img, OutputArray points=noArray(), OutputArray straight_qrcode = noArray()); +}; - /** @brief Detects QR codes in image and returns the vector of the quadrangles containing the codes. - @param img grayscale or color (BGR) image containing (or not) QR codes. - @param points Output vector of vector of vertices of the minimum-area quadrangle containing the codes. - */ - CV_WRAP - bool detectMulti(InputArray img, OutputArray points) const; - - /** @brief Decodes QR codes in image once it's found by the detect() method. - @param img grayscale or color (BGR) image containing QR codes. - @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. - @param points vector of Quadrangle vertices found by detect() method (or some other algorithm). - @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes - */ - CV_WRAP - bool decodeMulti( - InputArray img, InputArray points, - CV_OUT std::vector& decoded_info, - OutputArrayOfArrays straight_qrcode = noArray() - ) const; - - /** @brief Both detects and decodes QR codes - @param img grayscale or color (BGR) image containing QR codes. - @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. - @param points optional output vector of vertices of the found QR code quadrangles. Will be empty if not found. - @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes - */ - CV_WRAP - bool detectAndDecodeMulti( - InputArray img, CV_OUT std::vector& decoded_info, - OutputArray points = noArray(), - OutputArrayOfArrays straight_qrcode = noArray() - ) const; +class CV_EXPORTS_W_SIMPLE QRCodeDetectorAruco : public GraphicalCodeDetector { +public: + CV_WRAP QRCodeDetectorAruco(); -protected: - struct Impl; - Ptr p; + struct CV_EXPORTS_W_SIMPLE Params { + CV_WRAP Params(); + + /** @brief The minimum allowed pixel size of a QR module in the smallest image in the image pyramid, default 4.f */ + CV_PROP_RW float minModuleSizeInPyramid; + + /** @brief The maximum allowed relative rotation for finder patterns in the same QR code, default pi/12 */ + CV_PROP_RW float maxRotation; + + /** @brief The maximum allowed relative mismatch in module sizes for finder patterns in the same QR code, default 1.75f */ + CV_PROP_RW float maxModuleSizeMismatch; + + /** @brief The maximum allowed module relative mismatch for timing pattern module, default 2.f + * + * If relative mismatch of timing pattern module more this value, penalty points will be added. + * If a lot of penalty points are added, QR code will be rejected. */ + CV_PROP_RW float maxTimingPatternMismatch; + + /** @brief The maximum allowed percentage of penalty points out of total pins in timing pattern, default 0.4f */ + CV_PROP_RW float maxPenalties; + + /** @brief The maximum allowed relative color mismatch in the timing pattern, default 0.2f*/ + CV_PROP_RW float maxColorsMismatch; + + /** @brief The algorithm find QR codes with almost minimum timing pattern score and minimum size, default 0.9f + * + * The QR code with the minimum "timing pattern score" and minimum "size" is selected as the best QR code. + * If for the current QR code "timing pattern score" * scaleTimingPatternScore < "previous timing pattern score" and "size" < "previous size", then + * current QR code set as the best QR code. */ + CV_PROP_RW float scaleTimingPatternScore; + }; + + /** @brief QR code detector constructor for Aruco-based algorithm. See cv::QRCodeDetectorAruco::Params */ + CV_WRAP explicit QRCodeDetectorAruco(const QRCodeDetectorAruco::Params& params); + + /** @brief Detector parameters getter. See cv::QRCodeDetectorAruco::Params */ + CV_WRAP const QRCodeDetectorAruco::Params& getDetectorParameters() const; + + /** @brief Detector parameters setter. See cv::QRCodeDetectorAruco::Params */ + CV_WRAP QRCodeDetectorAruco& setDetectorParameters(const QRCodeDetectorAruco::Params& params); + + /** @brief Aruco detector parameters are used to search for the finder patterns. */ + CV_WRAP aruco::DetectorParameters getArucoParameters(); + + /** @brief Aruco detector parameters are used to search for the finder patterns. */ + CV_WRAP void setArucoParameters(const aruco::DetectorParameters& params); }; //! @} @@ -862,7 +854,7 @@ protected: #include "opencv2/objdetect/detection_based_tracker.hpp" #include "opencv2/objdetect/face.hpp" -#include "opencv2/objdetect/aruco_detector.hpp" #include "opencv2/objdetect/charuco_detector.hpp" +#include "opencv2/objdetect/barcode.hpp" #endif diff --git a/modules/objdetect/include/opencv2/objdetect/aruco_board.hpp b/modules/objdetect/include/opencv2/objdetect/aruco_board.hpp index fbd420c90e..1f41474405 100644 --- a/modules/objdetect/include/opencv2/objdetect/aruco_board.hpp +++ b/modules/objdetect/include/opencv2/objdetect/aruco_board.hpp @@ -90,7 +90,7 @@ public: */ CV_WRAP void generateImage(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1) const; - CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to “protected” (need to fix bindings first) + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) Board(); struct Impl; @@ -122,7 +122,7 @@ public: CV_WRAP float getMarkerLength() const; CV_WRAP float getMarkerSeparation() const; - CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to “protected” (need to fix bindings first) + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) GridBoard(); }; @@ -187,7 +187,7 @@ public: */ CV_WRAP bool checkCharucoCornersCollinear(InputArray charucoIds) const; - CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to “protected” (need to fix bindings first) + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) CharucoBoard(); }; diff --git a/modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp b/modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp index 0f64d45aa0..f885a2af87 100644 --- a/modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp +++ b/modules/objdetect/include/opencv2/objdetect/aruco_detector.hpp @@ -34,7 +34,7 @@ struct CV_EXPORTS_W_SIMPLE DetectorParameters { minCornerDistanceRate = 0.05; minDistanceToBorder = 3; minMarkerDistanceRate = 0.05; - cornerRefinementMethod = CORNER_REFINE_NONE; + cornerRefinementMethod = (int)CORNER_REFINE_NONE; cornerRefinementWinSize = 5; cornerRefinementMaxIterations = 30; cornerRefinementMinAccuracy = 0.1; @@ -106,7 +106,7 @@ struct CV_EXPORTS_W_SIMPLE DetectorParameters { CV_PROP_RW double minMarkerDistanceRate; /** @brief default value CORNER_REFINE_NONE */ - CV_PROP_RW CornerRefineMethod cornerRefinementMethod; + CV_PROP_RW int cornerRefinementMethod; /// window size for the corner refinement process (in pixels) (default 5). CV_PROP_RW int cornerRefinementWinSize; diff --git a/modules/objdetect/include/opencv2/objdetect/aruco_dictionary.hpp b/modules/objdetect/include/opencv2/objdetect/aruco_dictionary.hpp index 343d876b6c..c46b5fbfb5 100644 --- a/modules/objdetect/include/opencv2/objdetect/aruco_dictionary.hpp +++ b/modules/objdetect/include/opencv2/objdetect/aruco_dictionary.hpp @@ -110,7 +110,8 @@ enum PredefinedDictionaryType { DICT_APRILTAG_16h5, ///< 4x4 bits, minimum hamming distance between any two codes = 5, 30 codes DICT_APRILTAG_25h9, ///< 5x5 bits, minimum hamming distance between any two codes = 9, 35 codes DICT_APRILTAG_36h10, ///< 6x6 bits, minimum hamming distance between any two codes = 10, 2320 codes - DICT_APRILTAG_36h11 ///< 6x6 bits, minimum hamming distance between any two codes = 11, 587 codes + DICT_APRILTAG_36h11, ///< 6x6 bits, minimum hamming distance between any two codes = 11, 587 codes + DICT_ARUCO_MIP_36h12 ///< 6x6 bits, minimum hamming distance between any two codes = 12, 250 codes }; diff --git a/modules/objdetect/include/opencv2/objdetect/barcode.hpp b/modules/objdetect/include/opencv2/objdetect/barcode.hpp new file mode 100644 index 0000000000..958490a422 --- /dev/null +++ b/modules/objdetect/include/opencv2/objdetect/barcode.hpp @@ -0,0 +1,65 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_OBJDETECT_BARCODE_HPP +#define OPENCV_OBJDETECT_BARCODE_HPP + +#include +#include + +namespace cv { +namespace barcode { + +//! @addtogroup objdetect_barcode +//! @{ + +class CV_EXPORTS_W_SIMPLE BarcodeDetector : public cv::GraphicalCodeDetector +{ +public: + /** @brief Initialize the BarcodeDetector. + */ + CV_WRAP BarcodeDetector(); + /** @brief Initialize the BarcodeDetector. + * + * Parameters allow to load _optional_ Super Resolution DNN model for better quality. + * @param prototxt_path prototxt file path for the super resolution model + * @param model_path model file path for the super resolution model + */ + CV_WRAP BarcodeDetector(const std::string &prototxt_path, const std::string &model_path); + ~BarcodeDetector(); + + /** @brief Decodes barcode in image once it's found by the detect() method. + * + * @param img grayscale or color (BGR) image containing bar code. + * @param points vector of rotated rectangle vertices found by detect() method (or some other algorithm). + * For N detected barcodes, the dimensions of this array should be [N][4]. + * Order of four points in vector is bottomLeft, topLeft, topRight, bottomRight. + * @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + * @param decoded_type vector strings, specifies the type of these barcodes + * @return true if at least one valid barcode have been found + */ + CV_WRAP bool decodeWithType(InputArray img, + InputArray points, + CV_OUT std::vector &decoded_info, + CV_OUT std::vector &decoded_type) const; + + /** @brief Both detects and decodes barcode + + * @param img grayscale or color (BGR) image containing barcode. + * @param decoded_info UTF8-encoded output vector of string(s) or empty vector of string if the codes cannot be decoded. + * @param decoded_type vector of strings, specifies the type of these barcodes + * @param points optional output vector of vertices of the found barcode rectangle. Will be empty if not found. + * @return true if at least one valid barcode have been found + */ + CV_WRAP bool detectAndDecodeWithType(InputArray img, + CV_OUT std::vector &decoded_info, + CV_OUT std::vector &decoded_type, + OutputArray points = noArray()) const; +}; +//! @} + +}} // cv::barcode:: + +#endif // OPENCV_OBJDETECT_BARCODE_HPP diff --git a/modules/objdetect/include/opencv2/objdetect/graphical_code_detector.hpp b/modules/objdetect/include/opencv2/objdetect/graphical_code_detector.hpp new file mode 100644 index 0000000000..3535a8da1c --- /dev/null +++ b/modules/objdetect/include/opencv2/objdetect/graphical_code_detector.hpp @@ -0,0 +1,81 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html +#ifndef OPENCV_OBJDETECT_GRAPHICAL_CODE_DETECTOR_HPP +#define OPENCV_OBJDETECT_GRAPHICAL_CODE_DETECTOR_HPP + +#include + +namespace cv { + +//! @addtogroup objdetect_common +//! @{ + +class CV_EXPORTS_W_SIMPLE GraphicalCodeDetector { +public: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + GraphicalCodeDetector(); + + GraphicalCodeDetector(const GraphicalCodeDetector&) = default; + GraphicalCodeDetector(GraphicalCodeDetector&&) = default; + GraphicalCodeDetector& operator=(const GraphicalCodeDetector&) = default; + GraphicalCodeDetector& operator=(GraphicalCodeDetector&&) = default; + + /** @brief Detects graphical code in image and returns the quadrangle containing the code. + @param img grayscale or color (BGR) image containing (or not) graphical code. + @param points Output vector of vertices of the minimum-area quadrangle containing the code. + */ + CV_WRAP bool detect(InputArray img, OutputArray points) const; + + /** @brief Decodes graphical code in image once it's found by the detect() method. + + Returns UTF8-encoded output string or empty string if the code cannot be decoded. + @param img grayscale or color (BGR) image containing graphical code. + @param points Quadrangle vertices found by detect() method (or some other algorithm). + @param straight_code The optional output image containing binarized code, will be empty if not found. + */ + CV_WRAP std::string decode(InputArray img, InputArray points, OutputArray straight_code = noArray()) const; + + /** @brief Both detects and decodes graphical code + + @param img grayscale or color (BGR) image containing graphical code. + @param points optional output array of vertices of the found graphical code quadrangle, will be empty if not found. + @param straight_code The optional output image containing binarized code + */ + CV_WRAP std::string detectAndDecode(InputArray img, OutputArray points = noArray(), + OutputArray straight_code = noArray()) const; + + + /** @brief Detects graphical codes in image and returns the vector of the quadrangles containing the codes. + @param img grayscale or color (BGR) image containing (or not) graphical codes. + @param points Output vector of vector of vertices of the minimum-area quadrangle containing the codes. + */ + CV_WRAP bool detectMulti(InputArray img, OutputArray points) const; + + /** @brief Decodes graphical codes in image once it's found by the detect() method. + @param img grayscale or color (BGR) image containing graphical codes. + @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + @param points vector of Quadrangle vertices found by detect() method (or some other algorithm). + @param straight_code The optional output vector of images containing binarized codes + */ + CV_WRAP bool decodeMulti(InputArray img, InputArray points, CV_OUT std::vector& decoded_info, + OutputArrayOfArrays straight_code = noArray()) const; + + /** @brief Both detects and decodes graphical codes + @param img grayscale or color (BGR) image containing graphical codes. + @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded. + @param points optional output vector of vertices of the found graphical code quadrangles. Will be empty if not found. + @param straight_code The optional vector of images containing binarized codes + */ + CV_WRAP bool detectAndDecodeMulti(InputArray img, CV_OUT std::vector& decoded_info, OutputArray points = noArray(), + OutputArrayOfArrays straight_code = noArray()) const; + struct Impl; +protected: + Ptr p; +}; + +//! @} + +} + +#endif \ No newline at end of file diff --git a/modules/objdetect/misc/java/test/BarcodeDetectorTest.java b/modules/objdetect/misc/java/test/BarcodeDetectorTest.java new file mode 100644 index 0000000000..92dfef667a --- /dev/null +++ b/modules/objdetect/misc/java/test/BarcodeDetectorTest.java @@ -0,0 +1,50 @@ +package org.opencv.test.barcode; + +import java.util.List; +import org.opencv.core.Mat; +import org.opencv.objdetect.BarcodeDetector; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.test.OpenCVTestCase; +import java.util.ArrayList; + +public class BarcodeDetectorTest extends OpenCVTestCase { + + private final static String ENV_OPENCV_TEST_DATA_PATH = "OPENCV_TEST_DATA_PATH"; + private String testDataPath; + + @Override + protected void setUp() throws Exception { + super.setUp(); + + testDataPath = System.getenv(ENV_OPENCV_TEST_DATA_PATH); + if (testDataPath == null) + throw new Exception(ENV_OPENCV_TEST_DATA_PATH + " has to be defined!"); + } + + public void testDetectAndDecode() { + Mat img = Imgcodecs.imread(testDataPath + "/cv/barcode/multiple/4_barcodes.jpg"); + assertFalse(img.empty()); + BarcodeDetector detector = new BarcodeDetector(); + assertNotNull(detector); + List < String > infos = new ArrayList< String >(); + List < String > types = new ArrayList< String >(); + + boolean result = detector.detectAndDecodeWithType(img, infos, types); + assertTrue(result); + assertEquals(infos.size(), 4); + assertEquals(types.size(), 4); + final String[] correctResults = {"9787122276124", "9787118081473", "9787564350840", "9783319200064"}; + for (int i = 0; i < 4; i++) { + assertEquals(types.get(i), "EAN_13"); + result = false; + for (int j = 0; j < 4; j++) { + if (correctResults[j].equals(infos.get(i))) { + result = true; + break; + } + } + assertTrue(result); + } + + } +} diff --git a/modules/objdetect/misc/objc/gen_dict.json b/modules/objdetect/misc/objc/gen_dict.json new file mode 100644 index 0000000000..0311e1e5d6 --- /dev/null +++ b/modules/objdetect/misc/objc/gen_dict.json @@ -0,0 +1,7 @@ +{ + "ManualFuncs" : { + "QRCodeDetectorAruco": { + "getDetectorParameters": { "declaration" : [""], "implementation" : [""] } + } + } +} diff --git a/modules/objdetect/misc/python/test/test_barcode_detector.py b/modules/objdetect/misc/python/test/test_barcode_detector.py new file mode 100644 index 0000000000..e4c297951f --- /dev/null +++ b/modules/objdetect/misc/python/test/test_barcode_detector.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +''' +=============================================================================== +Barcode detect and decode pipeline. +=============================================================================== +''' +import os +import numpy as np +import cv2 as cv + +from tests_common import NewOpenCVTests + +class barcode_detector_test(NewOpenCVTests): + + def test_detect(self): + img = cv.imread(os.path.join(self.extraTestDataPath, 'cv/barcode/multiple/4_barcodes.jpg')) + self.assertFalse(img is None) + detector = cv.barcode_BarcodeDetector() + retval, corners = detector.detect(img) + self.assertTrue(retval) + self.assertEqual(corners.shape, (4, 4, 2)) + + def test_detect_and_decode(self): + img = cv.imread(os.path.join(self.extraTestDataPath, 'cv/barcode/single/book.jpg')) + self.assertFalse(img is None) + detector = cv.barcode_BarcodeDetector() + retval, decoded_info, decoded_type, corners = detector.detectAndDecodeWithType(img) + self.assertTrue(retval) + self.assertTrue(len(decoded_info) > 0) + self.assertTrue(len(decoded_type) > 0) + self.assertEqual(decoded_info[0], "9787115279460") + self.assertEqual(decoded_type[0], "EAN_13") + self.assertEqual(corners.shape, (1, 4, 2)) diff --git a/modules/objdetect/perf/perf_aruco.cpp b/modules/objdetect/perf/perf_aruco.cpp index 3a5a659482..c9e15d9e75 100644 --- a/modules/objdetect/perf/perf_aruco.cpp +++ b/modules/objdetect/perf/perf_aruco.cpp @@ -171,7 +171,7 @@ PERF_TEST_P(EstimateAruco, ArucoFirst, ESTIMATE_PARAMS) { aruco::DetectorParameters detectorParams; detectorParams.minDistanceToBorder = 1; detectorParams.markerBorderBits = 1; - detectorParams.cornerRefinementMethod = cv::aruco::CORNER_REFINE_SUBPIX; + detectorParams.cornerRefinementMethod = (int)cv::aruco::CORNER_REFINE_SUBPIX; const int markerSize = 100; const int numMarkersInRow = 9; @@ -203,7 +203,7 @@ PERF_TEST_P(EstimateAruco, ArucoSecond, ESTIMATE_PARAMS) { aruco::DetectorParameters detectorParams; detectorParams.minDistanceToBorder = 1; detectorParams.markerBorderBits = 1; - detectorParams.cornerRefinementMethod = cv::aruco::CORNER_REFINE_SUBPIX; + detectorParams.cornerRefinementMethod = (int)cv::aruco::CORNER_REFINE_SUBPIX; //USE_ARUCO3 detectorParams.useAruco3Detection = get<0>(testParams); @@ -255,7 +255,7 @@ PERF_TEST_P(EstimateLargeAruco, ArucoFHD, ESTIMATE_FHD_PARAMS) { aruco::DetectorParameters detectorParams; detectorParams.minDistanceToBorder = 1; detectorParams.markerBorderBits = 1; - detectorParams.cornerRefinementMethod = cv::aruco::CORNER_REFINE_SUBPIX; + detectorParams.cornerRefinementMethod = (int)cv::aruco::CORNER_REFINE_SUBPIX; //USE_ARUCO3 detectorParams.useAruco3Detection = get<0>(testParams).useAruco3Detection; diff --git a/modules/objdetect/perf/perf_barcode.cpp b/modules/objdetect/perf/perf_barcode.cpp new file mode 100644 index 0000000000..b960518a1e --- /dev/null +++ b/modules/objdetect/perf/perf_barcode.cpp @@ -0,0 +1,114 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include "opencv2/objdetect/barcode.hpp" + +namespace opencv_test{namespace{ + +typedef ::perf::TestBaseWithParam< tuple > Perf_Barcode_multi; +typedef ::perf::TestBaseWithParam< tuple > Perf_Barcode_single; + +PERF_TEST_P_(Perf_Barcode_multi, detect) +{ + const string root = "cv/barcode/multiple/"; + const string name_current_image = get<0>(GetParam()); + const cv::Size sz = get<1>(GetParam()); + const string image_path = findDataFile(root + name_current_image); + + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + cv::resize(src, src, sz); + + vector< Point > corners; + auto bardet = barcode::BarcodeDetector(); + bool res = false; + TEST_CYCLE() + { + res = bardet.detectMulti(src, corners); + } + SANITY_CHECK_NOTHING(); + ASSERT_TRUE(res); +} + +PERF_TEST_P_(Perf_Barcode_multi, detect_decode) +{ + const string root = "cv/barcode/multiple/"; + const string name_current_image = get<0>(GetParam()); + const cv::Size sz = get<1>(GetParam()); + const string image_path = findDataFile(root + name_current_image); + + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + cv::resize(src, src, sz); + + vector decoded_info; + vector decoded_type; + vector< Point > corners; + auto bardet = barcode::BarcodeDetector(); + bool res = false; + TEST_CYCLE() + { + res = bardet.detectAndDecodeWithType(src, decoded_info, decoded_type, corners); + } + SANITY_CHECK_NOTHING(); + ASSERT_TRUE(res); +} + +PERF_TEST_P_(Perf_Barcode_single, detect) +{ + const string root = "cv/barcode/single/"; + const string name_current_image = get<0>(GetParam()); + const cv::Size sz = get<1>(GetParam()); + const string image_path = findDataFile(root + name_current_image); + + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + cv::resize(src, src, sz); + + vector< Point > corners; + auto bardet = barcode::BarcodeDetector(); + bool res = false; + TEST_CYCLE() + { + res = bardet.detectMulti(src, corners); + } + SANITY_CHECK_NOTHING(); + ASSERT_TRUE(res); +} + +PERF_TEST_P_(Perf_Barcode_single, detect_decode) +{ + const string root = "cv/barcode/single/"; + const string name_current_image = get<0>(GetParam()); + const cv::Size sz = get<1>(GetParam()); + const string image_path = findDataFile(root + name_current_image); + + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + cv::resize(src, src, sz); + + vector decoded_info; + vector decoded_type; + vector< Point > corners; + auto bardet = barcode::BarcodeDetector(); + bool res = false; + TEST_CYCLE() + { + res = bardet.detectAndDecodeWithType(src, decoded_info, decoded_type, corners); + } + SANITY_CHECK_NOTHING(); + ASSERT_TRUE(res); +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Barcode_multi, + testing::Combine( + testing::Values("4_barcodes.jpg"), + testing::Values(cv::Size(2041, 2722), cv::Size(1361, 1815), cv::Size(680, 907)))); +INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Barcode_single, + testing::Combine( + testing::Values("book.jpg", "bottle_1.jpg", "bottle_2.jpg"), + testing::Values(cv::Size(480, 360), cv::Size(640, 480), cv::Size(800, 600)))); + +}} //namespace diff --git a/modules/objdetect/perf/perf_qrcode_pipeline.cpp b/modules/objdetect/perf/perf_qrcode_pipeline.cpp index 6722978b9a..150ed8cbbe 100644 --- a/modules/objdetect/perf/perf_qrcode_pipeline.cpp +++ b/modules/objdetect/perf/perf_qrcode_pipeline.cpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include "perf_precomp.hpp" +#include "../test/test_qr_utils.hpp" namespace opencv_test { @@ -23,7 +24,9 @@ PERF_TEST_P_(Perf_Objdetect_QRCode, detect) std::vector< Point > corners; QRCodeDetector qrcode; TEST_CYCLE() ASSERT_TRUE(qrcode.detect(src, corners)); - SANITY_CHECK(corners); + const int pixels_error = 3; + check_qr(root, name_current_image, "test_images", corners, {}, pixels_error); + SANITY_CHECK_NOTHING(); } #ifdef HAVE_QUIRC @@ -45,48 +48,52 @@ PERF_TEST_P_(Perf_Objdetect_QRCode, decode) decoded_info = qrcode.decode(src, corners, straight_barcode); ASSERT_FALSE(decoded_info.empty()); } - - std::vector decoded_info_uint8_t(decoded_info.begin(), decoded_info.end()); - SANITY_CHECK(decoded_info_uint8_t); - SANITY_CHECK(straight_barcode); - + const int pixels_error = 3; + check_qr(root, name_current_image, "test_images", corners, {decoded_info}, pixels_error); + SANITY_CHECK_NOTHING(); } #endif -typedef ::perf::TestBaseWithParam< std::string > Perf_Objdetect_QRCode_Multi; +typedef ::perf::TestBaseWithParam> Perf_Objdetect_QRCode_Multi; -static inline bool compareCorners(const Point2f& corner1, const Point2f& corner2) { - return corner1.x == corner2.x ? corner1.y < corner2.y : corner1.x < corner2.x; -} +static std::set> disabled_samples = {{"5_qrcodes.png", "aruco_based"}}; PERF_TEST_P_(Perf_Objdetect_QRCode_Multi, detectMulti) { - const std::string name_current_image = GetParam(); + const std::string name_current_image = get<0>(GetParam()); + const std::string method = get<1>(GetParam()); const std::string root = "cv/qrcode/multiple/"; std::string image_path = findDataFile(root + name_current_image); Mat src = imread(image_path); ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; - std::vector corners; - QRCodeDetector qrcode; + std::vector corners; + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (method == "aruco_based") { + qrcode = QRCodeDetectorAruco(); + } TEST_CYCLE() ASSERT_TRUE(qrcode.detectMulti(src, corners)); - sort(corners.begin(), corners.end(), compareCorners); - SANITY_CHECK(corners); -} - -static inline bool compareQR(const pair& v1, const pair& v2) { - return v1.first < v2.first; + const int pixels_error = 7; + check_qr(root, name_current_image, "multiple_images", corners, {}, pixels_error, true); + SANITY_CHECK_NOTHING(); } #ifdef HAVE_QUIRC PERF_TEST_P_(Perf_Objdetect_QRCode_Multi, decodeMulti) { - const std::string name_current_image = GetParam(); + const std::string name_current_image = get<0>(GetParam()); + std::string method = get<1>(GetParam()); const std::string root = "cv/qrcode/multiple/"; std::string image_path = findDataFile(root + name_current_image); Mat src = imread(image_path); ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; - QRCodeDetector qrcode; + if (disabled_samples.find({name_current_image, method}) != disabled_samples.end()) { + throw SkipTestException(name_current_image + " is disabled sample for method " + method); + } + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (method == "aruco_based") { + qrcode = QRCodeDetectorAruco(); + } std::vector corners; ASSERT_TRUE(qrcode.detectMulti(src, corners)); std::vector straight_barcode; @@ -94,26 +101,20 @@ PERF_TEST_P_(Perf_Objdetect_QRCode_Multi, decodeMulti) TEST_CYCLE() { ASSERT_TRUE(qrcode.decodeMulti(src, corners, decoded_info, straight_barcode)); - for(size_t i = 0; i < decoded_info.size(); i++) - { - ASSERT_FALSE(decoded_info[i].empty()); - } + } + ASSERT_TRUE(decoded_info.size() > 0ull); + for(size_t i = 0; i < decoded_info.size(); i++) { + ASSERT_FALSE(decoded_info[i].empty()); } ASSERT_EQ(decoded_info.size(), straight_barcode.size()); - vector > result; - for (size_t i = 0ull; i < decoded_info.size(); i++) { - result.push_back(make_pair(decoded_info[i], straight_barcode[i])); + vector corners_result(corners.size()); + for (size_t i = 0ull; i < corners_result.size(); i++) { + corners_result[i] = corners[i]; } - sort(result.begin(), result.end(), compareQR); - vector > decoded_info_sort; - vector straight_barcode_sort; - for (size_t i = 0ull; i < result.size(); i++) { - vector tmp(result[i].first.begin(), result[i].first.end()); - decoded_info_sort.push_back(tmp); - straight_barcode_sort.push_back(result[i].second); - } - SANITY_CHECK(decoded_info_sort); + const int pixels_error = 7; + check_qr(root, name_current_image, "multiple_images", corners_result, decoded_info, pixels_error, true); + SANITY_CHECK_NOTHING(); } #endif @@ -127,11 +128,10 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Objdetect_QRCode, // version_5_right.jpg DISABLED after tile fix, PR #22025 INSTANTIATE_TEST_CASE_P(/*nothing*/, Perf_Objdetect_QRCode_Multi, - ::testing::Values( - "2_qrcodes.png", "3_close_qrcodes.png", "3_qrcodes.png", "4_qrcodes.png", - "5_qrcodes.png", "6_qrcodes.png", "7_qrcodes.png", "8_close_qrcodes.png" - ) -); + testing::Combine(testing::Values("2_qrcodes.png", "3_close_qrcodes.png", "3_qrcodes.png", "4_qrcodes.png", + "5_qrcodes.png", "6_qrcodes.png", "7_qrcodes.png", "8_close_qrcodes.png"), + testing::Values("contours_based", "aruco_based"))); + typedef ::perf::TestBaseWithParam< tuple< std::string, Size > > Perf_Objdetect_Not_QRCode; diff --git a/modules/objdetect/src/aruco/aruco_detector.cpp b/modules/objdetect/src/aruco/aruco_detector.cpp index c23621e520..4b3af1b2c7 100644 --- a/modules/objdetect/src/aruco/aruco_detector.cpp +++ b/modules/objdetect/src/aruco/aruco_detector.cpp @@ -881,7 +881,7 @@ void ArucoDetector::detectMarkers(InputArray _image, OutputArrayOfArrays _corner } else { // always turn on corner refinement in case of Aruco3, due to upsampling - detectorParams.cornerRefinementMethod = CORNER_REFINE_SUBPIX; + detectorParams.cornerRefinementMethod = (int)CORNER_REFINE_SUBPIX; // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection // Todo: update other CORNER_REFINE methods } @@ -923,7 +923,7 @@ void ArucoDetector::detectMarkers(InputArray _image, OutputArrayOfArrays _corner vector > > contoursSet; /// STEP 2.a Detect marker candidates :: using AprilTag - if(detectorParams.cornerRefinementMethod == CORNER_REFINE_APRILTAG){ + if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_APRILTAG){ _apriltag(grey, detectorParams, candidates, contours); candidatesSet.push_back(candidates); @@ -938,7 +938,7 @@ void ArucoDetector::detectMarkers(InputArray _image, OutputArrayOfArrays _corner candidates, contours, ids, detectorParams, _rejectedImgPoints); /// STEP 3: Corner refinement :: use corner subpix - if (detectorParams.cornerRefinementMethod == CORNER_REFINE_SUBPIX) { + if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) { CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 && detectorParams.cornerRefinementMinAccuracy > 0); // Do subpixel estimation. In Aruco3 start on the lowest pyramid level and upscale the corners @@ -963,7 +963,7 @@ void ArucoDetector::detectMarkers(InputArray _image, OutputArrayOfArrays _corner } /// STEP 3, Optional : Corner refinement :: use contour container - if (detectorParams.cornerRefinementMethod == CORNER_REFINE_CONTOUR){ + if (detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_CONTOUR){ if (!ids.empty()) { @@ -976,7 +976,7 @@ void ArucoDetector::detectMarkers(InputArray _image, OutputArrayOfArrays _corner } } - if (detectorParams.cornerRefinementMethod != CORNER_REFINE_SUBPIX && fxfy != 1.f) { + if (detectorParams.cornerRefinementMethod != (int)CORNER_REFINE_SUBPIX && fxfy != 1.f) { // only CORNER_REFINE_SUBPIX implement correctly for useAruco3Detection // Todo: update other CORNER_REFINE methods @@ -1213,7 +1213,7 @@ void ArucoDetector::refineDetectedMarkers(InputArray _image, const Board& _board if(closestCandidateIdx >= 0) { // subpixel refinement - if(detectorParams.cornerRefinementMethod == CORNER_REFINE_SUBPIX) { + if(detectorParams.cornerRefinementMethod == (int)CORNER_REFINE_SUBPIX) { CV_Assert(detectorParams.cornerRefinementWinSize > 0 && detectorParams.cornerRefinementMaxIterations > 0 && detectorParams.cornerRefinementMinAccuracy > 0); diff --git a/modules/objdetect/src/aruco/aruco_dictionary.cpp b/modules/objdetect/src/aruco/aruco_dictionary.cpp index 79eac9a649..f73cea3357 100644 --- a/modules/objdetect/src/aruco/aruco_dictionary.cpp +++ b/modules/objdetect/src/aruco/aruco_dictionary.cpp @@ -258,6 +258,8 @@ Dictionary getPredefinedDictionary(PredefinedDictionaryType name) { static const Dictionary DICT_APRILTAG_36h10_DATA = Dictionary(Mat(2320, (6 * 6 + 7) / 8, CV_8UC4, (uchar*)DICT_APRILTAG_36h10_BYTES), 6, 0); static const Dictionary DICT_APRILTAG_36h11_DATA = Dictionary(Mat(587, (6 * 6 + 7) / 8, CV_8UC4, (uchar*)DICT_APRILTAG_36h11_BYTES), 6, 0); + static const Dictionary DICT_ARUCO_MIP_36h12_DATA = Dictionary(Mat(250, (6 * 6 + 7) / 8, CV_8UC4, (uchar*)DICT_ARUCO_MIP_36h12_BYTES), 6, 12); + switch(name) { case DICT_ARUCO_ORIGINAL: @@ -308,6 +310,8 @@ Dictionary getPredefinedDictionary(PredefinedDictionaryType name) { case DICT_APRILTAG_36h11: return Dictionary(DICT_APRILTAG_36h11_DATA); + case DICT_ARUCO_MIP_36h12: + return Dictionary(DICT_ARUCO_MIP_36h12_DATA); } return Dictionary(DICT_4X4_50_DATA); } diff --git a/modules/objdetect/src/aruco/predefined_dictionaries.hpp b/modules/objdetect/src/aruco/predefined_dictionaries.hpp index f343183059..2f22e38655 100644 --- a/modules/objdetect/src/aruco/predefined_dictionaries.hpp +++ b/modules/objdetect/src/aruco/predefined_dictionaries.hpp @@ -20124,4 +20124,6 @@ static unsigned char DICT_7X7_1000_BYTES[][4][7] = { 195, 108, 74, 190, 250, 124, 0 }, { 252, 176, 35, 180, 179, 243, 1 }, { 31, 47, 190, 169, 27, 97, 1 }, }, }; + + static unsigned char DICT_ARUCO_MIP_36h12_BYTES[][4][5] = {{{210,182,58,9,13},{69,104,93,183,4},{185,5,198,212,11},{46,219,161,106,2}},{{96,1,19,78,5},{28,98,96,152,1},{167,44,136,0,6},{129,144,100,99,8}},{{18,6,251,231,2},{56,216,14,61,7},{78,125,246,4,8},{235,199,1,177,12}},{{255,138,214,203,4},{162,233,120,223,11},{45,54,181,31,15},{223,177,233,116,5}},{{133,218,155,196,9},{220,196,29,90,10},{146,61,149,186,1},{85,171,130,51,11}},{{180,97,175,233,12},{145,239,103,14,6},{57,127,88,98,13},{103,14,111,120,9}},{{109,181,31,225,3},{215,83,54,252,2},{200,127,138,219,6},{67,246,204,174,11}},{{82,72,197,65,15},{52,157,65,141,8},{248,42,49,36,10},{27,24,43,146,12}},{{0,143,52,80,3},{4,19,152,56,12},{192,162,207,16,0},{49,193,156,130,0}},{{142,164,98,236,14},{171,112,83,43,6},{115,116,98,87,1},{109,76,160,237,5}},{{234,194,190,118,13},{14,197,255,155,5},{182,231,212,53,7},{173,159,250,55,0}},{{26,246,21,196,4},{91,13,88,57,2},{34,58,134,245,8},{73,193,171,13,10}},{{180,138,73,242,7},{180,24,254,10,11},{228,249,37,18,13},{213,7,241,130,13}},{{46,78,18,131,11},{134,84,41,53,11},{220,20,135,39,4},{218,201,66,166,1}},{{120,177,242,250,8},{99,234,179,152,7},{21,244,248,209,14},{225,156,213,124,6}},{{39,211,79,87,14},{248,87,253,77,1},{126,175,44,190,4},{139,43,254,161,15}},{{137,34,47,255,1},{31,97,142,78,7},{143,255,68,73,1},{231,39,24,111,8}},{{76,22,105,64,6},{242,16,76,168,4},{96,41,102,131,2},{33,83,32,132,15}},{{191,73,179,81,1},{150,206,160,95,12},{136,172,217,47,13},{63,160,87,54,9}},{{220,25,28,213,13},{206,11,197,158,10},{186,179,137,131,11},{87,154,61,7,3}},{{17,215,195,248,5},{116,238,218,104,2},{161,252,62,184,8},{65,101,183,114,14}},{{22,161,48,227,5},{133,10,82,29,7},{172,112,200,86,8},{235,132,165,10,1}},{{226,159,39,239,15},{92,115,123,175,15},{255,126,79,148,7},{255,93,236,227,10}},{{66,141,138,224,12},{0,194,87,169,10},{48,117,27,20,2},{89,94,164,48,0}},{{144,213,72,71,7},{108,30,84,46,1},{238,33,42,176,9},{135,66,167,131,6}},{{35,25,203,201,3},{116,242,36,77,10},{201,61,57,140,4},{91,34,68,242,14}},{{195,176,195,223,12},{121,224,209,207,3},{63,188,48,220,3},{207,56,176,121,14}},{{4,36,188,204,9},{141,161,5,56,6},{147,51,210,66,0},{97,202,8,91,1}},{{42,8,29,99,0},{18,1,38,29,9},{12,107,129,5,4},{155,134,72,4,8}},{{118,39,67,217,6},{177,122,232,173,2},{105,188,46,70,14},{75,81,117,232,13}},{{208,100,91,241,9},{53,76,135,190,2},{152,253,162,96,11},{71,222,19,42,12}},{{243,141,127,214,0},{56,75,180,251,15},{6,191,235,28,15},{253,242,221,33,12}},{{198,203,249,161,0},{176,134,30,151,14},{8,89,253,54,3},{126,151,134,16,13}},{{60,27,231,198,5},{254,203,104,8,15},{166,62,125,131,12},{241,1,109,55,15}},{{39,111,117,230,3},{189,23,42,121,15},{198,122,239,110,4},{249,229,78,139,13}},{{68,144,163,246,3},{220,208,146,136,7},{198,252,80,146,2},{225,20,144,179,11}},{{218,96,172,213,2},{11,157,132,143,6},{74,179,80,101,11},{111,18,27,157,0}},{{60,198,141,245,9},{158,141,191,44,2},{154,251,22,51,12},{67,79,219,23,9}},{{171,70,249,218,14},{50,180,237,123,7},{117,185,246,45,5},{237,235,114,212,12}},{{136,213,51,215,8},{90,70,145,62,7},{30,188,202,177,1},{231,200,150,37,10}},{{182,214,46,194,1},{196,77,60,43,7},{132,55,70,182,13},{237,67,203,34,3}},{{179,192,43,100,6},{24,92,118,75,4},{98,109,64,60,13},{45,38,227,161,8}},{{34,229,109,64,8},{49,7,53,41,4},{16,43,106,116,4},{41,74,206,8,12}},{{172,95,87,112,10},{242,87,171,58,8},{80,238,175,163,5},{21,205,94,164,15}},{{170,169,147,246,6},{27,210,242,27,11},{102,252,153,85,5},{221,132,244,189,8}},{{76,170,7,200,13},{151,97,89,136,10},{177,62,5,83,2},{81,25,168,110,9}},{{92,155,79,123,0},{242,107,158,140,9},{13,239,45,147,10},{147,23,157,100,15}},{{170,158,240,224,5},{102,128,122,59,14},{160,112,247,149,5},{125,197,224,22,6}},{{7,5,197,117,0},{184,131,130,109,0},{10,234,58,14,0},{11,100,28,17,13}},{{172,129,245,69,14},{186,147,113,30,4},{122,42,248,19,5},{39,136,236,149,13}},{{115,91,145,231,4},{88,142,106,221,11},{46,120,157,172,14},{219,181,103,17,10}},{{140,195,92,238,4},{170,39,94,26,3},{39,115,172,51,1},{197,135,174,69,5}},{{228,70,148,208,4},{128,133,232,186,2},{32,178,150,34,7},{69,209,122,16,1}},{{181,225,33,222,0},{153,46,176,74,7},{7,184,72,122,13},{229,32,215,73,9}},{{38,16,23,208,15},{212,81,225,25,2},{240,190,128,134,4},{73,136,120,162,11}},{{241,212,57,235,5},{84,44,118,254,7},{173,121,194,184,15},{231,246,227,66,10}},{{161,163,58,201,6},{1,114,124,94,6},{105,53,204,88,5},{103,163,228,232,0}},{{23,76,98,192,2},{160,92,0,105,14},{64,52,99,46,8},{121,96,3,160,5}},{{30,226,127,113,6},{179,93,222,29,4},{104,239,228,119,8},{43,135,187,172,13}},{{139,28,94,206,9},{110,97,5,123,11},{151,55,163,141,1},{221,234,8,103,6}},{{106,5,176,198,10},{10,146,33,185,7},{86,48,218,5,6},{233,216,68,149,0}},{{13,5,104,223,12},{170,34,197,108,7},{63,177,106,11,0},{227,106,52,69,5}},{{25,45,37,229,15},{31,27,67,108,14},{250,122,75,73,8},{115,108,45,143,8}},{{26,219,236,204,8},{106,175,29,9,14},{19,51,125,181,8},{121,11,143,85,6}},{{207,236,135,240,0},{147,197,146,235,10},{0,254,19,127,3},{93,116,154,60,9}},{{208,185,221,231,10},{121,155,23,158,11},{94,123,185,208,11},{215,158,141,153,14}},{{136,220,239,129,14},{114,213,85,38,14},{120,31,115,177,1},{118,74,170,180,14}},{{68,86,129,203,9},{212,164,9,172,3},{157,56,22,162,2},{195,89,2,82,11}},{{219,178,255,200,3},{119,249,28,219,6},{193,63,244,221,11},{109,179,137,254,14}},{{164,141,150,223,1},{140,227,176,62,11},{143,182,155,18,5},{215,192,220,115,1}},{{183,44,194,231,13},{173,200,99,111,11},{190,116,51,78,13},{223,108,97,59,5}},{{12,41,91,83,15},{183,82,197,28,9},{252,173,169,67,0},{147,138,52,174,13}},{{244,152,50,112,4},{192,72,242,154,12},{32,228,193,146,15},{53,148,241,32,3}},{{153,104,237,194,9},{55,141,5,74,15},{148,59,113,105,9},{245,42,11,30,12}},{{158,78,26,248,5},{134,108,206,59,10},{161,245,135,39,9},{93,199,51,102,1}},{{134,131,226,209,11},{164,210,153,15,6},{216,180,124,22,1},{111,9,148,178,5}},{{129,11,69,192,4},{48,3,72,74,10},{32,58,45,8,1},{85,33,44,0,12}},{{106,196,75,254,2},{58,116,182,169,3},{71,253,34,53,6},{201,86,210,229,12}},{{100,83,70,97,5},{228,71,106,140,0},{168,102,44,162,6},{3,21,110,34,7}},{{57,144,189,89,8},{82,169,181,92,4},{25,171,208,153,12},{35,170,217,84,10}},{{28,158,208,246,10},{234,152,155,56,11},{86,240,183,147,8},{209,205,145,149,7}},{{194,103,41,214,5},{29,6,204,171,7},{166,185,78,100,3},{237,83,54,11,8}},{{131,153,63,121,5},{84,99,214,95,12},{169,239,201,156,1},{63,166,188,98,10}},{{58,192,90,197,13},{46,76,117,29,2},{186,53,160,53,12},{75,138,227,39,4}},{{53,122,223,243,11},{245,221,175,92,11},{220,255,181,234,12},{211,175,91,186,15}},{{13,92,5,86,5},{222,5,192,104,9},{166,170,3,171,0},{145,96,58,7,11}},{{47,84,126,244,4},{234,69,230,121,6},{34,247,226,175,4},{105,230,122,37,7}},{{134,193,21,4,1},{156,7,16,19,0},{130,10,136,54,1},{12,128,142,3,9}},{{100,15,217,229,15},{188,146,111,188,10},{250,121,191,2,6},{83,223,100,147,13}},{{206,8,187,207,7},{158,240,68,159,15},{239,61,209,7,3},{255,146,32,247,9}},{{16,155,179,67,14},{80,218,89,28,13},{124,44,221,144,8},{179,137,165,176,10}},{{194,20,53,201,2},{80,49,0,191,6},{73,58,194,132,3},{111,208,8,192,10}},{{53,180,223,206,4},{249,233,116,120,3},{39,63,178,218,12},{193,226,233,121,15}},{{69,151,82,207,2},{232,114,24,252,3},{79,52,174,154,2},{195,241,132,225,7}},{{236,145,91,130,12},{242,66,117,146,3},{52,29,168,147,7},{196,154,228,36,15}},{{81,136,30,237,0},{8,105,22,220,10},{11,119,129,24,10},{83,182,137,97,0}},{{45,218,125,201,7},{246,53,124,92,14},{233,59,229,187,4},{115,163,234,198,15}},{{46,1,66,20,4},{170,66,224,1,0},{34,132,40,7,4},{8,0,116,37,5}},{{66,232,144,249,9},{5,164,147,157,10},{153,240,145,116,2},{91,156,146,90,0}},{{154,136,86,82,7},{38,89,208,27,9},{228,166,161,21,9},{157,128,185,166,4}},{{142,128,217,216,0},{178,160,148,27,2},{1,185,176,23,1},{77,130,144,84,13}},{{137,28,188,243,4},{66,129,198,126,15},{44,243,211,137,1},{247,230,56,20,2}},{{37,221,130,65,0},{192,198,48,108,8},{8,36,27,186,4},{19,96,198,48,3}},{{35,149,81,211,4},{112,2,240,125,3},{44,184,170,156,4},{203,224,244,0,14}},{{143,232,240,199,0},{171,132,16,95,15},{14,48,241,127,1},{255,160,130,29,5}},{{148,16,106,151,0},{232,72,132,6,7},{14,149,96,130,9},{230,2,17,33,7}},{{130,96,155,64,12},{17,196,69,27,0},{48,45,144,100,1},{13,138,34,56,8}},{{15,201,202,243,6},{162,214,214,77,11},{108,245,57,63,0},{219,38,182,180,5}},{{104,129,129,209,1},{22,130,176,140,2},{136,184,24,17,6},{67,16,212,22,8}},{{113,134,19,192,8},{16,72,57,248,2},{16,60,134,24,14},{65,249,193,32,8}},{{15,26,183,98,9},{214,193,11,89,13},{148,110,213,143,0},{185,173,8,54,11}},{{163,87,191,193,8},{80,199,45,127,6},{24,63,222,172,5},{111,235,78,48,10}},{{76,3,183,164,6},{154,211,74,144,6},{98,94,220,3,2},{96,149,44,181,9}},{{32,77,237,206,6},{56,183,100,40,15},{103,59,123,32,4},{241,66,110,209,12}},{{173,99,0,211,7},{135,22,232,78,3},{236,176,12,107,5},{199,33,118,142,1}},{{132,204,76,208,9},{164,5,149,42,10},{144,179,35,50,1},{85,74,154,2,5}},{{66,22,14,92,4},{72,97,204,169,0},{35,167,6,132,2},{9,83,56,97,2}},{{135,210,173,250,8},{208,165,159,75,7},{21,251,84,190,1},{237,47,154,80,11}},{{120,80,231,116,9},{126,205,163,136,4},{146,238,112,161,14},{33,28,91,55,14}},{{78,117,15,199,12},{219,71,69,173,3},{62,63,10,231,2},{203,90,46,45,11}},{{191,46,93,253,10},{187,57,175,127,10},{91,251,167,79,13},{95,239,89,205,13}},{{216,131,36,218,5},{6,43,216,138,7},{165,178,76,17,11},{229,17,189,70,0}},{{35,75,82,248,0},{32,102,170,89,10},{1,244,173,44,4},{89,165,86,96,4}},{{55,130,4,81,4},{128,9,248,77,0},{40,162,4,30,12},{11,33,249,0,1}},{{171,223,42,213,3},{78,86,188,111,14},{202,181,79,189,5},{127,99,214,167,2}},{{54,94,120,239,9},{236,44,47,61,15},{159,113,231,166,12},{251,207,67,67,7}},{{73,202,166,202,2},{2,245,24,200,15},{69,54,85,57,2},{241,49,138,244,0}},{{3,195,157,223,3},{28,183,156,93,3},{207,187,156,60,0},{203,163,158,211,8}},{{198,140,83,133,13},{188,64,81,183,10},{186,28,163,22,3},{94,216,160,35,13}},{{91,252,187,246,7},{95,220,214,249,15},{230,253,211,253,10},{249,246,179,191,10}},{{98,50,65,226,1},{117,0,42,137,3},{132,120,36,196,6},{201,21,64,10,14}},{{171,201,13,92,12},{26,39,245,75,8},{51,171,9,61,5},{29,42,254,69,8}},{{56,140,111,232,5},{54,105,118,40,14},{161,127,99,17,12},{113,70,233,102,12}},{{218,14,45,98,13},{22,9,79,171,13},{180,107,71,5,11},{189,95,41,6,8}},{{16,133,93,254,9},{60,43,151,56,3},{151,251,170,16,8},{193,206,157,67,12}},{{77,70,239,214,11},{190,213,141,232,7},{214,191,118,43,2},{225,123,26,183,13}},{{118,234,18,214,1},{141,76,184,153,11},{134,180,133,118,14},{217,145,211,43,1}},{{157,179,119,211,13},{247,75,217,94,7},{188,190,236,219,9},{231,169,189,46,15}},{{238,208,239,167,1},{254,197,54,135,7},{142,95,112,183,7},{238,22,202,55,15}},{{230,236,58,226,15},{133,84,119,187,15},{244,117,195,118,7},{253,222,226,170,1}},{{68,31,174,232,3},{196,243,14,168,14},{193,119,95,130,2},{113,87,12,242,3}},{{186,25,200,255,5},{110,170,230,15,11},{175,241,57,133,13},{223,6,117,87,6}},{{49,48,53,234,11},{85,57,35,88,7},{213,122,192,200,12},{225,172,73,202,10}},{{108,232,247,98,5},{183,197,114,152,13},{164,110,241,115,6},{177,148,234,62,13}},{{136,13,171,88,13},{22,226,197,42,12},{177,173,91,1,1},{53,74,52,118,8}},{{141,52,9,224,13},{215,0,71,106,2},{176,121,2,203,1},{69,110,32,14,11}},{{43,233,46,226,1},{7,71,54,73,15},{132,119,73,125,4},{249,38,206,46,0}},{{214,3,2,198,12},{136,74,73,139,3},{54,52,12,6,11},{205,25,37,33,1}},{{70,159,252,114,4},{224,131,222,185,13},{36,227,255,150,2},{185,215,188,16,7}},{{135,238,190,237,3},{141,245,30,127,14},{203,119,215,126,1},{127,231,138,251,1}},{{66,88,126,247,10},{104,85,135,157,15},{94,247,225,164,2},{251,158,26,161,6}},{{122,140,196,229,2},{42,153,50,173,10},{74,114,51,21,14},{91,84,201,149,4}},{{118,164,55,101,0},{153,73,50,189,4},{10,110,194,86,14},{43,212,201,41,9}},{{153,158,65,239,4},{122,40,90,110,11},{47,120,39,153,9},{215,101,161,69,14}},{{125,9,105,228,2},{186,26,38,200,14},{66,121,105,11,14},{113,54,69,133,13}},{{192,43,175,70,11},{29,211,13,138,13},{214,47,93,64,3},{181,27,12,187,8}},{{146,89,243,228,7},{124,222,66,27,14},{226,124,249,164,9},{125,132,39,179,14}},{{33,22,161,220,0},{88,160,168,104,6},{3,184,86,136,4},{97,97,80,81,10}},{{159,45,228,216,4},{163,171,192,107,14},{33,178,123,79,9},{125,96,61,92,5}},{{0,239,250,194,9},{37,198,29,56,15},{148,53,255,112,0},{241,203,134,58,4}},{{123,55,31,248,12},{83,107,239,249,2},{49,255,142,205,14},{73,255,125,108,10}},{{102,131,57,218,9},{148,34,189,153,7},{149,185,204,22,6},{233,155,212,66,9}},{{208,16,174,227,15},{68,217,71,142,7},{252,119,80,128,11},{231,30,41,178,2}},{{28,208,11,76,0},{218,108,20,8,0},{3,45,0,179,8},{1,2,131,101,11}},{{149,7,15,195,11},{148,91,13,110,3},{220,63,14,10,9},{199,107,13,162,9}},{{248,76,154,119,0},{10,204,166,190,9},{14,229,147,33,15},{151,214,83,53,0}},{{56,248,99,215,6},{123,92,240,12,15},{110,188,97,241,12},{243,0,243,173,14}},{{54,70,255,4,5},{188,205,108,49,4},{162,15,246,38,12},{40,195,107,51,13}},{{206,27,150,65,2},{194,211,8,159,8},{72,38,157,135,3},{31,145,12,180,3}},{{122,93,69,218,8},{114,47,161,169,11},{21,186,43,165,14},{217,88,95,68,14}},{{20,224,14,246,12},{137,77,215,8,3},{54,247,0,114,8},{193,14,187,41,1}},{{94,149,171,253,8},{218,234,151,173,6},{27,253,90,151,10},{107,94,149,117,11}},{{178,233,203,114,9},{53,206,183,11,9},{148,237,57,116,13},{157,14,215,58,12}},{{3,108,71,221,7},{61,117,192,109,10},{235,190,35,108,0},{91,96,58,235,12}},{{184,238,151,198,11},{31,221,57,58,11},{214,62,151,113,13},{213,201,203,191,8}},{{14,158,143,101,7},{222,209,94,45,8},{234,111,23,151,0},{27,71,168,183,11}},{{212,173,46,241,10},{129,91,151,174,14},{88,247,75,82,11},{119,94,157,168,1}},{{136,17,199,243,2},{114,211,130,14,3},{76,254,56,129,1},{199,4,28,180,14}},{{71,189,231,195,1},{245,195,16,237,15},{140,62,123,222,2},{251,112,140,58,15}},{{58,218,223,182,4},{122,205,254,17,11},{38,223,181,181,12},{216,135,251,53,14}},{{110,91,40,87,4},{202,6,236,141,13},{46,161,77,167,6},{187,19,118,5,3}},{{51,230,124,217,1},{37,45,188,125,6},{137,179,230,124,12},{107,227,219,74,4}},{{42,185,253,210,13},{119,131,245,25,15},{180,187,249,213,4},{249,138,252,30,14}},{{138,250,103,242,11},{119,85,155,11,15},{212,254,101,245,1},{253,13,154,174,14}},{{230,162,143,197,14},{153,209,125,143,2},{122,63,20,86,7},{79,27,232,185,9}},{{114,4,156,219,13},{4,169,229,189,3},{189,179,146,4,14},{203,218,121,82,0}},{{174,101,218,193,2},{163,214,36,63,2},{72,53,186,103,5},{79,194,70,188,5}},{{18,81,164,82,6},{64,159,192,9,5},{100,162,88,164,8},{169,0,63,144,2}},{{16,137,171,132,1},{28,202,20,0,14},{130,29,89,16,8},{112,2,133,51,8}},{{226,240,150,238,0},{73,229,50,155,3},{7,118,144,244,7},{205,148,202,121,2}},{{176,202,238,87,3},{44,221,188,14,13},{206,167,117,48,13},{183,3,219,179,4}},{{253,102,119,232,6},{179,125,106,250,6},{97,126,230,107,15},{101,245,107,236,13}},{{68,75,63,81,8},{144,71,141,156,12},{24,175,205,34,2},{51,155,30,32,9}},{{190,139,58,86,10},{138,90,189,27,13},{86,165,205,23,13},{189,139,213,165,1}},{{104,10,117,207,12},{58,33,105,156,15},{63,58,229,1,6},{243,153,104,69,12}},{{172,2,186,234,8},{130,224,47,26,7},{21,117,212,3,5},{229,143,64,116,1}},{{151,216,21,225,12},{208,13,83,95,10},{56,122,129,190,9},{95,172,171,0,11}},{{29,67,134,224,8},{130,207,11,72,2},{16,118,28,43,8},{65,45,15,52,1}},{{26,20,245,176,14},{114,153,195,49,6},{112,218,242,133,8},{104,204,57,148,14}},{{230,88,168,216,1},{196,164,164,139,14},{129,177,81,166,7},{125,18,82,82,3}},{{163,134,142,250,7},{4,241,254,107,3},{229,247,22,28,5},{205,103,248,242,0}},{{54,104,169,103,3},{157,156,38,13,13},{206,105,81,102,12},{187,6,67,155,9}},{{232,252,83,216,5},{119,100,240,186,10},{161,188,163,241,7},{85,208,242,110,14}},{{46,43,126,221,5},{175,99,236,29,14},{171,183,237,71,4},{123,131,124,111,5}},{{139,36,112,241,3},{39,16,130,127,6},{200,240,226,77,1},{111,228,16,142,4}},{{246,151,149,243,2},{208,155,186,191,3},{76,250,158,150,15},{207,213,221,144,11}},{{69,137,255,200,14},{176,243,85,216,14},{113,63,249,26,2},{113,186,172,240,13}},{{46,32,128,201,12},{131,160,97,13,2},{57,48,16,71,4},{75,8,96,92,1}},{{6,66,101,247,13},{188,5,203,13,7},{190,250,100,38,0},{235,13,58,3,13}},{{61,113,77,209,0},{243,15,164,76,2},{8,187,40,235,12},{67,34,95,12,15}},{{22,146,198,239,1},{236,233,26,13,3},{143,118,52,150,8},{203,5,137,115,7}},{{62,103,242,244,9},{175,206,171,57,6},{146,244,254,103,12},{105,205,87,63,5}},{{80,65,218,214,3},{44,222,132,152,3},{198,181,184,32,10},{193,146,23,179,4}},{{26,21,3,65,5},{86,74,64,45,0},{168,44,10,133,8},{11,64,37,38,10}},{{100,193,140,116,2},{136,151,182,136,0},{66,227,24,50,6},{1,22,222,145,1}},{{10,114,238,195,5},{103,197,76,13,7},{172,55,116,229,0},{235,3,42,62,6}},{{31,15,157,198,0},{154,139,12,121,11},{6,59,159,15,8},{217,227,13,21,9}},{{169,85,155,198,7},{94,214,100,122,3},{230,61,154,169,5},{197,226,102,183,10}},{{243,41,17,208,13},{21,10,225,219,10},{176,184,137,76,15},{93,184,117,10,8}},{{33,192,212,255,12},{40,165,243,92,3},{63,242,176,56,4},{195,172,250,81,4}},{{224,28,239,91,0},{112,225,164,174,13},{13,175,115,128,7},{183,82,88,112,14}},{{78,35,163,82,0},{147,194,136,137,5},{4,172,92,71,2},{169,17,20,60,9}},{{170,79,4,228,9},{14,7,43,43,10},{146,114,15,37,5},{93,77,78,7,0}},{{225,196,252,196,3},{44,149,52,250,6},{194,51,242,56,7},{101,242,202,147,4}},{{32,142,143,110,8},{24,225,63,40,9},{23,111,23,16,4},{145,79,200,113,8}},{{132,134,119,74,5},{180,97,88,58,5},{165,46,230,18,1},{165,193,168,98,13}},{{158,152,199,85,8},{250,201,145,15,8},{26,174,49,151,9},{31,8,153,53,15}},{{44,89,251,125,12},{250,230,231,28,12},{59,237,249,163,4},{51,142,118,117,15}},{{148,70,164,97,3},{132,157,10,46,4},{200,98,86,34,9},{39,69,11,146,1}},{{130,146,220,194,14},{96,145,93,27,3},{116,51,180,148,1},{205,139,168,144,6}},{{0,77,97,99,1},{52,6,2,44,13},{140,104,107,32,0},{179,68,6,2,12}},{{208,85,39,128,9},{84,79,1,162,6},{144,30,74,160,11},{100,88,15,34,10}},{{160,22,56,82,13},{68,0,237,58,5},{180,161,198,128,5},{165,203,112,2,2}},{{143,101,127,99,9},{183,71,7,127,5},{156,111,234,111,1},{175,238,14,46,13}},{{204,166,195,227,7},{183,208,90,174,3},{236,124,54,83,3},{199,85,160,190,13}},{{203,19,107,199,10},{122,82,13,207,7},{94,61,108,141,3},{239,59,4,165,14}},{{252,90,131,229,3},{222,220,42,142,10},{202,124,21,163,15},{87,21,67,183,11}},{{154,164,79,195,0},{51,73,20,47,3},{12,63,34,85,9},{207,66,137,44,12}},{{189,236,27,211,12},{147,76,245,126,11},{60,189,131,123,13},{215,234,243,44,9}},{{224,32,185,247,12},{25,128,231,158,7},{62,249,208,64,7},{231,158,112,25,8}},{{75,143,53,251,0},{18,35,154,253,15},{13,250,207,29,2},{251,245,156,68,8}},{{184,22,95,99,7},{118,89,110,62,1},{236,111,166,129,13},{135,199,105,166,14}},{{51,220,136,214,9},{76,140,181,105,11},{150,177,19,188,12},{217,106,211,19,2}},{{16,162,247,228,13},{61,201,91,24,6},{178,126,244,80,8},{97,141,169,59,12}},{{200,203,95,245,3},{62,87,158,158,10},{202,255,173,49,3},{87,151,158,167,12}},{{222,37,159,246,11},{159,219,135,187,3},{214,255,154,71,11},{205,222,29,191,9}},{{70,208,112,221,4},{232,36,208,157,6},{43,176,224,182,2},{107,144,178,65,7}},{{50,211,185,116,1},{92,142,190,25,4},{130,233,220,180,12},{41,135,215,19,10}},{{112,117,241,192,4},{113,142,96,184,6},{32,56,250,224,14},{97,208,103,24,14}},{{77,88,219,234,0},{242,228,6,216,11},{5,125,177,171,2},{209,182,2,116,15}}}; } diff --git a/modules/objdetect/src/barcode.cpp b/modules/objdetect/src/barcode.cpp new file mode 100644 index 0000000000..549ea84a0a --- /dev/null +++ b/modules/objdetect/src/barcode.cpp @@ -0,0 +1,374 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "precomp.hpp" +#include +#include +#include "barcode_decoder/ean13_decoder.hpp" +#include "barcode_decoder/ean8_decoder.hpp" +#include "barcode_detector/bardetect.hpp" +#include "barcode_decoder/common/super_scale.hpp" +#include "barcode_decoder/common/utils.hpp" +#include "graphical_code_detector_impl.hpp" + +using std::string; +using std::vector; +using std::make_shared; +using std::array; +using std::shared_ptr; +using std::dynamic_pointer_cast; + +namespace cv { +namespace barcode { + +//================================================================================================== + +static bool checkBarInputImage(InputArray img, Mat &gray) +{ + CV_Assert(!img.empty()); + CV_CheckDepthEQ(img.depth(), CV_8U, ""); + if (img.cols() <= 40 || img.rows() <= 40) + { + return false; // image data is not enough for providing reliable results + } + int incn = img.channels(); + CV_Check(incn, incn == 1 || incn == 3 || incn == 4, ""); + if (incn == 3 || incn == 4) + { + cvtColor(img, gray, COLOR_BGR2GRAY); + } + else + { + gray = img.getMat(); + } + return true; +} + +static void updatePointsResult(OutputArray points_, const vector &points) +{ + if (points_.needed()) + { + int N = int(points.size() / 4); + if (N > 0) + { + Mat m_p(N, 4, CV_32FC2, (void *) &points[0]); + int points_type = points_.fixedType() ? points_.type() : CV_32FC2; + m_p.reshape(2, points_.rows()).convertTo(points_, points_type); // Mat layout: N x 4 x 2cn + } + else + { + points_.release(); + } + } +} + +inline const array, 2> &getDecoders() +{ + //indicate Decoder + static const array, 2> decoders{ + shared_ptr(new Ean13Decoder()), shared_ptr(new Ean8Decoder())}; + return decoders; +} + +//================================================================================================== + +class BarDecode +{ +public: + void init(const vector &bar_imgs_); + + const vector &getDecodeInformation() + { return result_info; } + + bool decodeMultiplyProcess(); + +private: + vector bar_imgs; + vector result_info; +}; + +void BarDecode::init(const vector &bar_imgs_) +{ + bar_imgs = bar_imgs_; +} + +bool BarDecode::decodeMultiplyProcess() +{ + static float constexpr THRESHOLD_CONF = 0.6f; + result_info.clear(); + result_info.resize(bar_imgs.size()); + parallel_for_(Range(0, int(bar_imgs.size())), [&](const Range &range) { + for (int i = range.start; i < range.end; i++) + { + Mat bin_bar; + Result max_res; + float max_conf = -1.f; + bool decoded = false; + for (const auto &decoder:getDecoders()) + { + if (decoded) + { break; } + for (const auto binary_type : binary_types) + { + binarize(bar_imgs[i], bin_bar, binary_type); + auto cur_res = decoder->decodeROI(bin_bar); + if (cur_res.second > max_conf) + { + max_res = cur_res.first; + max_conf = cur_res.second; + if (max_conf > THRESHOLD_CONF) + { + // code decoded + decoded = true; + break; + } + } + } //binary types + } //decoder types + + result_info[i] = max_res; + } + }); + return !result_info.empty(); +} + +//================================================================================================== +// Private class definition and implementation (pimpl) + +struct BarcodeImpl : public GraphicalCodeDetector::Impl +{ +public: + shared_ptr sr; + bool use_nn_sr = false; + +public: + //================= + // own methods + BarcodeImpl() = default; + vector initDecode(const Mat &src, const vector> &points) const; + bool decodeWithType(InputArray img, + InputArray points, + vector &decoded_info, + vector &decoded_type) const; + bool detectAndDecodeWithType(InputArray img, + vector &decoded_info, + vector &decoded_type, + OutputArray points_) const; + + //================= + // implement interface + ~BarcodeImpl() CV_OVERRIDE {} + bool detect(InputArray img, OutputArray points) const CV_OVERRIDE; + string decode(InputArray img, InputArray points, OutputArray straight_code) const CV_OVERRIDE; + string detectAndDecode(InputArray img, OutputArray points, OutputArray straight_code) const CV_OVERRIDE; + bool detectMulti(InputArray img, OutputArray points) const CV_OVERRIDE; + bool decodeMulti(InputArray img, InputArray points, vector& decoded_info, OutputArrayOfArrays straight_code) const CV_OVERRIDE; + bool detectAndDecodeMulti(InputArray img, vector& decoded_info, OutputArray points, OutputArrayOfArrays straight_code) const CV_OVERRIDE; +}; + +// return cropped and scaled bar img +vector BarcodeImpl::initDecode(const Mat &src, const vector> &points) const +{ + vector bar_imgs; + for (auto &corners : points) + { + Mat bar_img; + cropROI(src, bar_img, corners); +// sharpen(bar_img, bar_img); + // empirical settings + if (bar_img.cols < 320 || bar_img.cols > 640) + { + float scale = 560.0f / static_cast(bar_img.cols); + sr->processImageScale(bar_img, bar_img, scale, use_nn_sr); + } + bar_imgs.emplace_back(bar_img); + } + return bar_imgs; +} + +bool BarcodeImpl::decodeWithType(InputArray img, + InputArray points, + vector &decoded_info, + vector &decoded_type) const +{ + Mat inarr; + if (!checkBarInputImage(img, inarr)) + { + return false; + } + CV_Assert(points.size().width > 0); + CV_Assert((points.size().width % 4) == 0); + vector> src_points; + Mat bar_points = points.getMat(); + bar_points = bar_points.reshape(2, 1); + for (int i = 0; i < bar_points.size().width; i += 4) + { + vector tempMat = bar_points.colRange(i, i + 4); + if (contourArea(tempMat) > 0.0) + { + src_points.push_back(tempMat); + } + } + CV_Assert(!src_points.empty()); + vector bar_imgs = initDecode(inarr, src_points); + BarDecode bardec; + bardec.init(bar_imgs); + bardec.decodeMultiplyProcess(); + const vector info = bardec.getDecodeInformation(); + decoded_info.clear(); + decoded_type.clear(); + bool ok = false; + for (const auto &res : info) + { + if (res.isValid()) + { + ok = true; + } + + decoded_info.emplace_back(res.result); + decoded_type.emplace_back(res.typeString()); + } + return ok; +} + +bool BarcodeImpl::detectAndDecodeWithType(InputArray img, + vector &decoded_info, + vector &decoded_type, + OutputArray points_) const +{ + Mat inarr; + if (!checkBarInputImage(img, inarr)) + { + points_.release(); + return false; + } + vector points; + bool ok = this->detect(inarr, points); + if (!ok) + { + points_.release(); + return false; + } + updatePointsResult(points_, points); + decoded_info.clear(); + decoded_type.clear(); + ok = decodeWithType(inarr, points, decoded_info, decoded_type); + return ok; +} + +bool BarcodeImpl::detect(InputArray img, OutputArray points) const +{ + Mat inarr; + if (!checkBarInputImage(img, inarr)) + { + points.release(); + return false; + } + + Detect bardet; + bardet.init(inarr); + bardet.localization(); + if (!bardet.computeTransformationPoints()) + { return false; } + vector> pnts2f = bardet.getTransformationPoints(); + vector trans_points; + for (auto &i : pnts2f) + { + for (const auto &j : i) + { + trans_points.push_back(j); + } + } + updatePointsResult(points, trans_points); + return true; +} + +string BarcodeImpl::decode(InputArray img, InputArray points, OutputArray straight_code) const +{ + CV_UNUSED(straight_code); + vector decoded_info; + vector decoded_type; + if (!decodeWithType(img, points, decoded_info, decoded_type)) + return string(); + if (decoded_info.size() < 1) + return string(); + return decoded_info[0]; +} + +string BarcodeImpl::detectAndDecode(InputArray img, OutputArray points, OutputArray straight_code) const +{ + CV_UNUSED(straight_code); + vector decoded_info; + vector decoded_type; + vector points_; + if (!detectAndDecodeWithType(img, decoded_info, decoded_type, points_)) + return string(); + if (points_.size() < 4 || decoded_info.size() < 1) + return string(); + points_.resize(4); + points.setTo(points_); + return decoded_info[0]; +} + +bool BarcodeImpl::detectMulti(InputArray img, OutputArray points) const +{ + return detect(img, points); +} + +bool BarcodeImpl::decodeMulti(InputArray img, InputArray points, vector &decoded_info, OutputArrayOfArrays straight_code) const +{ + CV_UNUSED(straight_code); + vector decoded_type; + return decodeWithType(img, points, decoded_info, decoded_type); +} + +bool BarcodeImpl::detectAndDecodeMulti(InputArray img, vector &decoded_info, OutputArray points, OutputArrayOfArrays straight_code) const +{ + CV_UNUSED(straight_code); + vector decoded_type; + return detectAndDecodeWithType(img, decoded_info, decoded_type, points); +} + +//================================================================================================== +// Public class implementation + +BarcodeDetector::BarcodeDetector() + : BarcodeDetector(string(), string()) +{ +} + +BarcodeDetector::BarcodeDetector(const string &prototxt_path, const string &model_path) +{ + Ptr p_ = new BarcodeImpl(); + p = p_; + if (!prototxt_path.empty() && !model_path.empty()) + { + CV_Assert(utils::fs::exists(prototxt_path)); + CV_Assert(utils::fs::exists(model_path)); + p_->sr = make_shared(); + int res = p_->sr->init(prototxt_path, model_path); + CV_Assert(res == 0); + p_->use_nn_sr = true; + } +} + +BarcodeDetector::~BarcodeDetector() = default; + +bool BarcodeDetector::decodeWithType(InputArray img, InputArray points, vector &decoded_info, vector &decoded_type) const +{ + Ptr p_ = dynamic_pointer_cast(p); + CV_Assert(p_); + return p_->decodeWithType(img, points, decoded_info, decoded_type); +} + +bool BarcodeDetector::detectAndDecodeWithType(InputArray img, vector &decoded_info, vector &decoded_type, OutputArray points_) const +{ + Ptr p_ = dynamic_pointer_cast(p); + CV_Assert(p_); + return p_->detectAndDecodeWithType(img, decoded_info, decoded_type, points_); +} + +}// namespace barcode +} // namespace cv diff --git a/modules/objdetect/src/barcode_decoder/abs_decoder.cpp b/modules/objdetect/src/barcode_decoder/abs_decoder.cpp new file mode 100644 index 0000000000..9eadf4bc31 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/abs_decoder.cpp @@ -0,0 +1,118 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "../precomp.hpp" +#include "abs_decoder.hpp" + +namespace cv { +namespace barcode { + +void cropROI(const Mat &src, Mat &dst, const std::vector &rects) +{ + std::vector vertices = rects; + int height = cvRound(norm(vertices[0] - vertices[1])); + int width = cvRound(norm(vertices[1] - vertices[2])); + if (height > width) + { + std::swap(height, width); + Point2f v0 = vertices[0]; + vertices.erase(vertices.begin()); + vertices.push_back(v0); + } + std::vector dst_vertices{ + Point2f(0, (float) (height - 1)), Point2f(0, 0), Point2f((float) (width - 1), 0), + Point2f((float) (width - 1), (float) (height - 1))}; + dst.create(Size(width, height), CV_8UC1); + Mat M = getPerspectiveTransform(vertices, dst_vertices); + warpPerspective(src, dst, M, dst.size(), cv::INTER_LINEAR, BORDER_CONSTANT, Scalar(255)); +} + +void fillCounter(const std::vector &row, uint start, Counter &counter) +{ + size_t counter_length = counter.pattern.size(); + std::fill(counter.pattern.begin(), counter.pattern.end(), 0); + counter.sum = 0; + size_t end = row.size(); + uchar color = row[start]; + uint counterPosition = 0; + while (start < end) + { + if (row[start] == color) + { // that is, exactly one is true + counter.pattern[counterPosition]++; + counter.sum++; + } + else + { + counterPosition++; + if (counterPosition == counter_length) + { + break; + } + else + { + counter.pattern[counterPosition] = 1; + counter.sum++; + color = 255 - color; + } + } + ++start; + } +} + +static inline uint +patternMatchVariance(const Counter &counter, const std::vector &pattern, uint maxIndividualVariance) +{ + size_t numCounters = counter.pattern.size(); + int total = static_cast(counter.sum); + int patternLength = std::accumulate(pattern.cbegin(), pattern.cend(), 0); + if (total < patternLength) + { + // If we don't even have one pixel per unit of bar width, assume this is too small + // to reliably match, so fail: + // and use constexpr functions + return WHITE;// max + } + // We're going to fake floating-point math in integers. We just need to use more bits. + // Scale up patternLength so that intermediate values below like scaledCounter will have + // more "significant digits" + + int unitBarWidth = (total << INTEGER_MATH_SHIFT) / patternLength; + maxIndividualVariance = (maxIndividualVariance * unitBarWidth) >> INTEGER_MATH_SHIFT; + uint totalVariance = 0; + for (uint x = 0; x < numCounters; x++) + { + int cnt = counter.pattern[x] << INTEGER_MATH_SHIFT; + int scaledPattern = pattern[x] * unitBarWidth; + uint variance = std::abs(cnt - scaledPattern); + if (variance > maxIndividualVariance) + { + return WHITE; + } + totalVariance += variance; + } + return totalVariance / total; +} + +/** +* Determines how closely a set of observed counts of runs of black/white values matches a given +* target pattern. This is reported as the ratio of the total variance from the expected pattern +* proportions across all pattern elements, to the length of the pattern. +* +* @param counters observed counters +* @param pattern expected pattern +* @param maxIndividualVariance The most any counter can differ before we give up +* @return ratio of total variance between counters and pattern compared to total pattern size, +* where the ratio has been multiplied by 256. So, 0 means no variance (perfect match); 256 means +* the total variance between counters and patterns equals the pattern length, higher values mean +* even more variance +*/ +uint patternMatch(const Counter &counters, const std::vector &pattern, uint maxIndividual) +{ + CV_Assert(counters.pattern.size() == pattern.size()); + return patternMatchVariance(counters, pattern, maxIndividual); +} +} +} diff --git a/modules/objdetect/src/barcode_decoder/abs_decoder.hpp b/modules/objdetect/src/barcode_decoder/abs_decoder.hpp new file mode 100644 index 0000000000..87b33e7f1d --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/abs_decoder.hpp @@ -0,0 +1,99 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_BARCODE_ABS_DECODER_HPP +#define OPENCV_BARCODE_ABS_DECODER_HPP + +#include "opencv2/objdetect/barcode.hpp" + +namespace cv { +namespace barcode { +using std::string; +using std::vector; +constexpr static uchar BLACK = std::numeric_limits::min(); +// WHITE elemental area is 0xff +constexpr static uchar WHITE = std::numeric_limits::max(); + + +struct Result +{ + enum BarcodeType + { + BARCODE_NONE, + BARCODE_EAN_8, + BARCODE_EAN_13, + BARCODE_UPC_A, + BARCODE_UPC_E, + BARCODE_UPC_EAN_EXTENSION + }; + + std::string result; + BarcodeType format = Result::BARCODE_NONE; + + Result() = default; + + Result(const std::string &_result, BarcodeType _format) + { + result = _result; + format = _format; + } + string typeString() const + { + switch (format) + { + case Result::BARCODE_EAN_8: return "EAN_8"; + case Result::BARCODE_EAN_13: return "EAN_13"; + case Result::BARCODE_UPC_E: return "UPC_E"; + case Result::BARCODE_UPC_A: return "UPC_A"; + case Result::BARCODE_UPC_EAN_EXTENSION: return "UPC_EAN_EXTENSION"; + default: return string(); + } + } + bool isValid() const + { + return format != BARCODE_NONE; + } +}; + +struct Counter +{ + std::vector pattern; + uint sum; + + explicit Counter(const vector &_pattern) + { + pattern = _pattern; + sum = 0; + } +}; + +class AbsDecoder +{ +public: + virtual std::pair decodeROI(const Mat &bar_img) const = 0; + + virtual ~AbsDecoder() = default; + +protected: + virtual Result decode(const vector &data) const = 0; + + virtual bool isValid(const string &result) const = 0; + + size_t bits_num{}; + size_t digit_number{}; +}; + +void cropROI(const Mat &_src, Mat &_dst, const std::vector &rect); + +void fillCounter(const std::vector &row, uint start, Counter &counter); + +constexpr static uint INTEGER_MATH_SHIFT = 8; +constexpr static uint PATTERN_MATCH_RESULT_SCALE_FACTOR = 1 << INTEGER_MATH_SHIFT; + +uint patternMatch(const Counter &counters, const std::vector &pattern, uint maxIndividual); +} +} // namespace cv + +#endif // OPENCV_BARCODE_ABS_DECODER_HPP diff --git a/modules/objdetect/src/barcode_decoder/common/hybrid_binarizer.cpp b/modules/objdetect/src/barcode_decoder/common/hybrid_binarizer.cpp new file mode 100644 index 0000000000..76d63d6e46 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/common/hybrid_binarizer.cpp @@ -0,0 +1,195 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Modified from ZXing. Copyright ZXing authors. +// Licensed under the Apache License, Version 2.0 (the "License"). + +#include "../../precomp.hpp" +#include "hybrid_binarizer.hpp" + +namespace cv { +namespace barcode { + + +#define CLAMP(x, x1, x2) x < (x1) ? (x1) : ((x) > (x2) ? (x2) : (x)) + +// This class uses 5x5 blocks to compute local luminance, where each block is 8x8 pixels. +// So this is the smallest dimension in each axis we can accept. +constexpr static int BLOCK_SIZE_POWER = 3; +constexpr static int BLOCK_SIZE = 1 << BLOCK_SIZE_POWER; // ...0100...00 +constexpr static int BLOCK_SIZE_MASK = BLOCK_SIZE - 1; // ...0011...11 +constexpr static int MINIMUM_DIMENSION = BLOCK_SIZE * 5; +constexpr static int MIN_DYNAMIC_RANGE = 24; + +void +calculateThresholdForBlock(const std::vector &luminances, int sub_width, int sub_height, int width, int height, + const Mat &black_points, Mat &dst) +{ + int maxYOffset = height - BLOCK_SIZE; + int maxXOffset = width - BLOCK_SIZE; + for (int y = 0; y < sub_height; y++) + { + int yoffset = y << BLOCK_SIZE_POWER; + if (yoffset > maxYOffset) + { + yoffset = maxYOffset; + } + int top = CLAMP(y, 2, sub_height - 3); + for (int x = 0; x < sub_width; x++) + { + int xoffset = x << BLOCK_SIZE_POWER; + if (xoffset > maxXOffset) + { + xoffset = maxXOffset; + } + int left = CLAMP(x, 2, sub_width - 3); + int sum = 0; + const auto *black_row = black_points.ptr(top - 2); + for (int z = 0; z <= 4; z++) + { + sum += black_row[left - 2] + black_row[left - 1] + black_row[left] + black_row[left + 1] + + black_row[left + 2]; + black_row += black_points.cols; + } + int average = sum / 25; + int temp_y = 0; + + auto *ptr = dst.ptr(yoffset, xoffset); + for (int offset = yoffset * width + xoffset; temp_y < 8; offset += width) + { + for (int temp_x = 0; temp_x < 8; ++temp_x) + { + *(ptr + temp_x) = (luminances[offset + temp_x] & 255) <= average ? 0 : 255; + } + ++temp_y; + ptr += width; + } + } + } + +} + +Mat calculateBlackPoints(std::vector luminances, int sub_width, int sub_height, int width, int height) +{ + int maxYOffset = height - BLOCK_SIZE; + int maxXOffset = width - BLOCK_SIZE; + Mat black_points(Size(sub_width, sub_height), CV_8UC1); + for (int y = 0; y < sub_height; y++) + { + int yoffset = y << BLOCK_SIZE_POWER; + if (yoffset > maxYOffset) + { + yoffset = maxYOffset; + } + for (int x = 0; x < sub_width; x++) + { + int xoffset = x << BLOCK_SIZE_POWER; + if (xoffset > maxXOffset) + { + xoffset = maxXOffset; + } + int sum = 0; + int min = 0xFF; + int max = 0; + for (int yy = 0, offset = yoffset * width + xoffset; yy < BLOCK_SIZE; yy++, offset += width) + { + for (int xx = 0; xx < BLOCK_SIZE; xx++) + { + int pixel = luminances[offset + xx] & 0xFF; + sum += pixel; + // still looking for good contrast + if (pixel < min) + { + min = pixel; + } + if (pixel > max) + { + max = pixel; + } + } + // short-circuit min/max tests once dynamic range is met + if (max - min > MIN_DYNAMIC_RANGE) + { + // finish the rest of the rows quickly + for (yy++, offset += width; yy < BLOCK_SIZE; yy++, offset += width) + { + for (int xx = 0; xx < BLOCK_SIZE; xx++) + { + sum += luminances[offset + xx] & 0xFF; + } + } + } + } + + // The default estimate is the average of the values in the block. + int average = sum >> (BLOCK_SIZE_POWER * 2); + if (max - min <= MIN_DYNAMIC_RANGE) + { + // If variation within the block is low, assume this is a block with only light or only + // dark pixels. In that case we do not want to use the average, as it would divide this + // low contrast area into black and white pixels, essentially creating data out of noise. + // + // The default assumption is that the block is light/background. Since no estimate for + // the level of dark pixels exists locally, use half the min for the block. + average = min / 2; + + if (y > 0 && x > 0) + { + // Correct the "white background" assumption for blocks that have neighbors by comparing + // the pixels in this block to the previously calculated black points. This is based on + // the fact that dark barcode symbology is always surrounded by some amount of light + // background for which reasonable black point estimates were made. The bp estimated at + // the boundaries is used for the interior. + + // The (min < bp) is arbitrary but works better than other heuristics that were tried. + int averageNeighborBlackPoint = + (black_points.at(y - 1, x) + (2 * black_points.at(y, x - 1)) + + black_points.at(y - 1, x - 1)) / 4; + if (min < averageNeighborBlackPoint) + { + average = averageNeighborBlackPoint; + } + } + } + black_points.at(y, x) = (uchar) average; + } + } + return black_points; + +} + + +void hybridBinarization(const Mat &src, Mat &dst) +{ + int width = src.cols; + int height = src.rows; + + if (width >= MINIMUM_DIMENSION && height >= MINIMUM_DIMENSION) + { + std::vector luminances(src.begin(), src.end()); + + int sub_width = width >> BLOCK_SIZE_POWER; + if ((width & BLOCK_SIZE_MASK) != 0) + { + sub_width++; + } + + int sub_height = height >> BLOCK_SIZE_POWER; + if ((height & BLOCK_SIZE_MASK) != 0) + { + sub_height++; + } + + Mat black_points = calculateBlackPoints(luminances, sub_width, sub_height, width, height); + + dst.create(src.size(), src.type()); + calculateThresholdForBlock(luminances, sub_width, sub_height, width, height, black_points, dst); + } + else + { + threshold(src, dst, 155, 255, THRESH_OTSU + THRESH_BINARY); + } + +} +} +} diff --git a/modules/objdetect/src/barcode_decoder/common/hybrid_binarizer.hpp b/modules/objdetect/src/barcode_decoder/common/hybrid_binarizer.hpp new file mode 100644 index 0000000000..88f93d03c6 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/common/hybrid_binarizer.hpp @@ -0,0 +1,22 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Modified from ZXing. Copyright ZXing authors. +// Licensed under the Apache License, Version 2.0 (the "License"). + +#ifndef OPENCV_BARCODE_HYBRID_BINARIZER_HPP +#define OPENCV_BARCODE_HYBRID_BINARIZER_HPP + +namespace cv { +namespace barcode { + +void hybridBinarization(const Mat &src, Mat &dst); + +void +calculateThresholdForBlock(const std::vector &luminances, int sub_width, int sub_height, int width, int height, + const Mat &black_points, Mat &dst); + +Mat calculateBlackPoints(std::vector luminances, int sub_width, int sub_height, int width, int height); +} +} +#endif // OPENCV_BARCODE_HYBRID_BINARIZER_HPP diff --git a/modules/objdetect/src/barcode_decoder/common/super_scale.cpp b/modules/objdetect/src/barcode_decoder/common/super_scale.cpp new file mode 100644 index 0000000000..0c9f75f156 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/common/super_scale.cpp @@ -0,0 +1,77 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Tencent is pleased to support the open source community by making WeChat QRCode available. +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// Modified by darkliang wangberlinT + +#include "../../precomp.hpp" +#include "super_scale.hpp" + +#ifdef HAVE_OPENCV_DNN + +namespace cv { +namespace barcode { +constexpr static float MAX_SCALE = 4.0f; + +int SuperScale::init(const std::string &proto_path, const std::string &model_path) +{ + srnet_ = dnn::readNetFromCaffe(proto_path, model_path); + net_loaded_ = true; + return 0; +} + +void SuperScale::processImageScale(const Mat &src, Mat &dst, float scale, const bool &use_sr, int sr_max_size) +{ + scale = min(scale, MAX_SCALE); + if (scale > .0 && scale < 1.0) + { // down sample + resize(src, dst, Size(), scale, scale, INTER_AREA); + } + else if (scale > 1.5 && scale < 2.0) + { + resize(src, dst, Size(), scale, scale, INTER_CUBIC); + } + else if (scale >= 2.0) + { + int width = src.cols; + int height = src.rows; + if (use_sr && (int) sqrt(width * height * 1.0) < sr_max_size && net_loaded_) + { + superResolutionScale(src, dst); + if (scale > 2.0) + { + processImageScale(dst, dst, scale / 2.0f, use_sr); + } + } + else + { resize(src, dst, Size(), scale, scale, INTER_CUBIC); } + } +} + +int SuperScale::superResolutionScale(const Mat &src, Mat &dst) +{ + Mat blob; + dnn::blobFromImage(src, blob, 1.0 / 255, Size(src.cols, src.rows), {0.0f}, false, false); + + srnet_.setInput(blob); + auto prob = srnet_.forward(); + + dst = Mat(prob.size[2], prob.size[3], CV_8UC1); + + for (int row = 0; row < prob.size[2]; row++) + { + const float *prob_score = prob.ptr(0, 0, row); + auto *dst_row = dst.ptr(row); + for (int col = 0; col < prob.size[3]; col++) + { + dst_row[col] = saturate_cast(prob_score[col] * 255.0f); + } + } + return 0; +} +} // namespace barcode +} // namespace cv + +#endif // HAVE_OPENCV_DNN diff --git a/modules/objdetect/src/barcode_decoder/common/super_scale.hpp b/modules/objdetect/src/barcode_decoder/common/super_scale.hpp new file mode 100644 index 0000000000..70e47424e4 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/common/super_scale.hpp @@ -0,0 +1,69 @@ +/// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Tencent is pleased to support the open source community by making WeChat QRCode available. +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + +#ifndef OPENCV_BARCODE_SUPER_SCALE_HPP +#define OPENCV_BARCODE_SUPER_SCALE_HPP + +#ifdef HAVE_OPENCV_DNN + +#include "opencv2/dnn.hpp" + +namespace cv { +namespace barcode { + +class SuperScale +{ +public: + SuperScale() = default; + + ~SuperScale() = default; + + int init(const std::string &proto_path, const std::string &model_path); + + void processImageScale(const Mat &src, Mat &dst, float scale, const bool &use_sr, int sr_max_size = 160); + +private: + dnn::Net srnet_; + bool net_loaded_ = false; + + int superResolutionScale(const cv::Mat &src, cv::Mat &dst); +}; + +} // namespace barcode +} // namespace cv + +#else // HAVE_OPENCV_DNN + +#include "opencv2/core.hpp" +#include "opencv2/core/utils/logger.hpp" + +namespace cv { +namespace barcode { + +class SuperScale +{ +public: + int init(const std::string &, const std::string &) + { + return 0; + } + void processImageScale(const Mat &src, Mat &dst, float scale, const bool & isEnabled, int) + { + if (isEnabled) + { + CV_LOG_WARNING(NULL, "objdetect/barcode: SuperScaling disabled - OpenCV has been built without DNN support"); + } + resize(src, dst, Size(), scale, scale, INTER_CUBIC); + } +}; + +} // namespace barcode +} // namespace cv + +#endif // !HAVE_OPENCV_DNN + +#endif // OPENCV_BARCODE_SUPER_SCALE_HPP diff --git a/modules/objdetect/src/barcode_decoder/common/utils.cpp b/modules/objdetect/src/barcode_decoder/common/utils.cpp new file mode 100644 index 0000000000..123955c665 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/common/utils.cpp @@ -0,0 +1,36 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "../../precomp.hpp" +#include "utils.hpp" +#include "hybrid_binarizer.hpp" + +namespace cv { +namespace barcode { + + +void sharpen(const Mat &src, const Mat &dst) +{ + Mat blur; + GaussianBlur(src, blur, Size(0, 0), 25); + addWeighted(src, 2, blur, -1, -20, dst); +} + +void binarize(const Mat &src, Mat &dst, BinaryType mode) +{ + switch (mode) + { + case OTSU: + threshold(src, dst, 155, 255, THRESH_OTSU + THRESH_BINARY); + break; + case HYBRID: + hybridBinarization(src, dst); + break; + default: + CV_Error(Error::StsNotImplemented, "This binary type is not yet implemented"); + } +} +} +} diff --git a/modules/objdetect/src/barcode_decoder/common/utils.hpp b/modules/objdetect/src/barcode_decoder/common/utils.hpp new file mode 100644 index 0000000000..85597c017b --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/common/utils.hpp @@ -0,0 +1,26 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_BARCODE_UTILS_HPP +#define OPENCV_BARCODE_UTILS_HPP + + +namespace cv { +namespace barcode { + +enum BinaryType +{ + OTSU = 0, HYBRID = 1 +}; +static constexpr BinaryType binary_types[] = {OTSU, HYBRID}; + +void sharpen(const Mat &src, const Mat &dst); + +void binarize(const Mat &src, Mat &dst, BinaryType mode); + +} +} + +#endif // OPENCV_BARCODE_UTILS_HPP diff --git a/modules/objdetect/src/barcode_decoder/ean13_decoder.cpp b/modules/objdetect/src/barcode_decoder/ean13_decoder.cpp new file mode 100644 index 0000000000..8be6122a7c --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/ean13_decoder.cpp @@ -0,0 +1,92 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "../precomp.hpp" +#include "ean13_decoder.hpp" + +// three digit decode method from https://baike.baidu.com/item/EAN-13 + +namespace cv { +namespace barcode { + +static constexpr size_t EAN13BITS_NUM = 95; +static constexpr size_t EAN13DIGIT_NUM = 13; +// default thought that mat is a matrix after binary-transfer. +/** +* decode EAN-13 +* @prama: data: the input array, +* @prama: start: the index of start order, begin at 0, max-value is data.size()-1 +* it scan begin at the data[start] +*/ +Result Ean13Decoder::decode(const vector &data) const +{ + string result; + char decode_result[EAN13DIGIT_NUM + 1]{'\0'}; + if (data.size() < EAN13BITS_NUM) + { + return Result("Wrong Size", Result::BARCODE_NONE); + } + pair pattern; + if (!findStartGuardPatterns(data, pattern)) + { + return Result("Begin Pattern Not Found", Result::BARCODE_NONE); + } + uint start = pattern.second; + Counter counter(vector{0, 0, 0, 0}); + size_t end = data.size(); + int first_char_bit = 0; + // [1,6] are left part of EAN, [7,12] are right part, index 0 is calculated by left part + for (int i = 1; i < 7 && start < end; ++i) + { + int bestMatch = decodeDigit(data, counter, start, get_AB_Patterns()); + if (bestMatch == -1) + { + return Result("Decode Error", Result::BARCODE_NONE); + } + decode_result[i] = static_cast('0' + bestMatch % 10); + start = counter.sum + start; + first_char_bit += (bestMatch >= 10) << i; + } + decode_result[0] = static_cast(FIRST_CHAR_ARRAY()[first_char_bit >> 2] + '0'); + // why there need >> 2? + // first, the i in for-cycle is begin in 1 + // second, the first i = 1 is always + Counter middle_counter(vector(MIDDLE_PATTERN().size())); + if (!findGuardPatterns(data, start, true, MIDDLE_PATTERN(), middle_counter, pattern)) + { + return Result("Middle Pattern Not Found", Result::BARCODE_NONE); + + } + start = pattern.second; + for (int i = 0; i < 6 && start < end; ++i) + { + int bestMatch = decodeDigit(data, counter, start, get_A_or_C_Patterns()); + if (bestMatch == -1) + { + return Result("Decode Error", Result::BARCODE_NONE); + } + decode_result[i + 7] = static_cast('0' + bestMatch); + start = counter.sum + start; + } + Counter end_counter(vector(BEGIN_PATTERN().size())); + if (!findGuardPatterns(data, start, false, BEGIN_PATTERN(), end_counter, pattern)) + { + return Result("End Pattern Not Found", Result::BARCODE_NONE); + } + result = string(decode_result); + if (!isValid(result)) + { + return Result("Wrong: " + result.append(string(EAN13DIGIT_NUM - result.size(), ' ')), Result::BARCODE_NONE); + } + return Result(result, Result::BARCODE_EAN_13); +} + +Ean13Decoder::Ean13Decoder() +{ + this->bits_num = EAN13BITS_NUM; + this->digit_number = EAN13DIGIT_NUM; +} +} +} diff --git a/modules/objdetect/src/barcode_decoder/ean13_decoder.hpp b/modules/objdetect/src/barcode_decoder/ean13_decoder.hpp new file mode 100644 index 0000000000..1fcedd7c67 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/ean13_decoder.hpp @@ -0,0 +1,31 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_BARCODE_EAN13_DECODER_HPP +#define OPENCV_BARCODE_EAN13_DECODER_HPP + +#include "upcean_decoder.hpp" + +namespace cv { +namespace barcode { +//extern struct EncodePair; +using std::string; +using std::vector; +using std::pair; + + +class Ean13Decoder : public UPCEANDecoder +{ +public: + Ean13Decoder(); + + ~Ean13Decoder() override = default; + +protected: + Result decode(const vector &data) const override; +}; +} +} // namespace cv +#endif // OPENCV_BARCODE_EAN13_DECODER_HPP diff --git a/modules/objdetect/src/barcode_decoder/ean8_decoder.cpp b/modules/objdetect/src/barcode_decoder/ean8_decoder.cpp new file mode 100644 index 0000000000..23be9dcd6c --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/ean8_decoder.cpp @@ -0,0 +1,79 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "../precomp.hpp" +#include "ean8_decoder.hpp" + +namespace cv { +namespace barcode { +static constexpr size_t EAN8BITS_NUM = 70; +static constexpr size_t EAN8DIGIT_NUM = 8; + +Result Ean8Decoder::decode(const vector &data) const +{ + std::string result; + char decode_result[EAN8DIGIT_NUM + 1]{'\0'}; + if (data.size() < EAN8BITS_NUM) + { + return Result("Wrong Size", Result::BARCODE_NONE); + } + pair pattern; + if (!findStartGuardPatterns(data, pattern)) + { + return Result("Begin Pattern Not Found", Result::BARCODE_NONE); + } + uint start = pattern.second; + Counter counter(vector{0, 0, 0, 0}); + size_t end = data.size(); + for (int i = 0; i < 4 && start < end; ++i) + { + int bestMatch = decodeDigit(data, counter, start, get_A_or_C_Patterns()); + if (bestMatch == -1) + { + return Result("Decode Error", Result::BARCODE_NONE); + } + decode_result[i] = static_cast('0' + bestMatch % 10); + start = counter.sum + start; + } + + Counter middle_counter(vector(MIDDLE_PATTERN().size())); + + if (!findGuardPatterns(data, start, true, MIDDLE_PATTERN(), middle_counter, pattern)) + { + return Result("Middle Pattern Not Found", Result::BARCODE_NONE); + } + + start = pattern.second; + for (int i = 0; i < 4 && start < end; ++i) + { + int bestMatch = decodeDigit(data, counter, start, get_A_or_C_Patterns()); + if (bestMatch == -1) + { + return Result("Decode Error", Result::BARCODE_NONE); + } + decode_result[i + 4] = static_cast('0' + bestMatch); + start = counter.sum + start; + } + Counter end_counter(vector(BEGIN_PATTERN().size())); + if (!findGuardPatterns(data, start, false, BEGIN_PATTERN(), end_counter, pattern)) + { + return Result("End Pattern Not Found", Result::BARCODE_NONE); + } + result = string(decode_result); + if (!isValid(result)) + { + return Result("Wrong: " + result.append(string(EAN8DIGIT_NUM - result.size(), ' ')), Result::BARCODE_NONE); + } + return Result(result, Result::BARCODE_EAN_8); +} + +Ean8Decoder::Ean8Decoder() +{ + this->digit_number = EAN8DIGIT_NUM; + this->bits_num = EAN8BITS_NUM; +} + +} +} diff --git a/modules/objdetect/src/barcode_decoder/ean8_decoder.hpp b/modules/objdetect/src/barcode_decoder/ean8_decoder.hpp new file mode 100644 index 0000000000..4f5a0624ef --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/ean8_decoder.hpp @@ -0,0 +1,32 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_BARCODE_EAN8_DECODER_HPP +#define OPENCV_BARCODE_EAN8_DECODER_HPP + +#include "upcean_decoder.hpp" + +namespace cv { +namespace barcode { + +using std::string; +using std::vector; +using std::pair; + +class Ean8Decoder : public UPCEANDecoder +{ + +public: + Ean8Decoder(); + + ~Ean8Decoder() override = default; + +protected: + Result decode(const vector &data) const override; +}; +} +} + +#endif // OPENCV_BARCODE_EAN8_DECODER_HPP diff --git a/modules/objdetect/src/barcode_decoder/upcean_decoder.cpp b/modules/objdetect/src/barcode_decoder/upcean_decoder.cpp new file mode 100644 index 0000000000..2288f5b81f --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/upcean_decoder.cpp @@ -0,0 +1,290 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "../precomp.hpp" +#include "upcean_decoder.hpp" +#include + +namespace cv { +namespace barcode { + +static constexpr int DIVIDE_PART = 15; +static constexpr int BIAS_PART = 2; + +#if 0 +void UPCEANDecoder::drawDebugLine(Mat &debug_img, const Point2i &begin, const Point2i &end) const +{ + Result result; + std::vector middle; + LineIterator line = LineIterator(debug_img, begin, end); + middle.reserve(line.count); + for (int cnt = 0; cnt < line.count; cnt++, line++) + { + middle.push_back(debug_img.at(line.pos())); + } + std::pair start_range; + if (findStartGuardPatterns(middle, start_range)) + { + circle(debug_img, Point2i(begin.x + start_range.second, begin.y), 2, Scalar(0), 2); + } + result = this->decode(middle); + if (result.format == Result::BARCODE_NONE) + { + result = this->decode(std::vector(middle.crbegin(), middle.crend())); + } + if (result.format == Result::BARCODE_NONE) + { + cv::line(debug_img, begin, end, Scalar(0), 2); + cv::putText(debug_img, result.result, begin, cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(0, 0, 255), 1); + } +} +#endif + +bool UPCEANDecoder::findGuardPatterns(const std::vector &row, uint rowOffset, uchar whiteFirst, + const std::vector &pattern, Counter &counter, std::pair &result) +{ + size_t patternLength = pattern.size(); + size_t width = row.size(); + uchar color = whiteFirst ? WHITE : BLACK; + rowOffset = (int) (std::find(row.cbegin() + rowOffset, row.cend(), color) - row.cbegin()); + uint counterPosition = 0; + uint patternStart = rowOffset; + for (uint x = rowOffset; x < width; x++) + { + if (row[x] == color) + { + counter.pattern[counterPosition]++; + counter.sum++; + } + else + { + if (counterPosition == patternLength - 1) + { + if (patternMatch(counter, pattern, MAX_INDIVIDUAL_VARIANCE) < MAX_AVG_VARIANCE) + { + result.first = patternStart; + result.second = x; + return true; + } + patternStart += counter.pattern[0] + counter.pattern[1]; + counter.sum -= counter.pattern[0] + counter.pattern[1]; + + std::copy(counter.pattern.begin() + 2, counter.pattern.end(), counter.pattern.begin()); + + counter.pattern[patternLength - 2] = 0; + counter.pattern[patternLength - 1] = 0; + counterPosition--; + } + else + { + counterPosition++; + } + counter.pattern[counterPosition] = 1; + counter.sum++; + color = (std::numeric_limits::max() - color); + } + } + return false; +} + +bool UPCEANDecoder::findStartGuardPatterns(const std::vector &row, std::pair &start_range) +{ + bool is_find = false; + int next_start = 0; + while (!is_find) + { + Counter guard_counters(std::vector{0, 0, 0}); + if (!findGuardPatterns(row, next_start, BLACK, BEGIN_PATTERN(), guard_counters, start_range)) + { + return false; + } + int start = static_cast(start_range.first); + next_start = static_cast(start_range.second); + int quiet_start = max(start - (next_start - start), 0); + is_find = (quiet_start != start) && + (std::find(std::begin(row) + quiet_start, std::begin(row) + start, BLACK) == std::begin(row) + start); + } + return true; +} + +int UPCEANDecoder::decodeDigit(const std::vector &row, Counter &counters, uint rowOffset, + const std::vector> &patterns) +{ + fillCounter(row, rowOffset, counters); + int bestMatch = -1; + uint bestVariance = MAX_AVG_VARIANCE; // worst variance we'll accept + int i = 0; + for (const auto &pattern : patterns) + { + uint variance = patternMatch(counters, pattern, MAX_INDIVIDUAL_VARIANCE); + if (variance < bestVariance) + { + bestVariance = variance; + bestMatch = i; + } + i++; + } + return std::max(-1, bestMatch); + // -1 is Mismatch or means error. +} + +/*Input a ROI mat return result */ +std::pair UPCEANDecoder::decodeROI(const Mat &bar_img) const +{ + if ((size_t) bar_img.cols < this->bits_num) + { + return std::make_pair(Result{string(), Result::BARCODE_NONE}, 0.0F); + } + + std::map result_vote; + std::map format_vote; + int vote_cnt = 0; + int total_vote = 0; + std::string max_result; + Result::BarcodeType max_type = Result::BARCODE_NONE; + + const int step = bar_img.rows / (DIVIDE_PART + BIAS_PART); + Result result; + int row_num; + for (int i = 0; i < DIVIDE_PART; ++i) + { + row_num = (i + BIAS_PART / 2) * step; + if (row_num < 0 || row_num > bar_img.rows) + { + continue; + } + const auto *ptr = bar_img.ptr(row_num); + vector line(ptr, ptr + bar_img.cols); + result = decodeLine(line); + if (result.format != Result::BARCODE_NONE) + { + total_vote++; + result_vote[result.result] += 1; + if (result_vote[result.result] > vote_cnt) + { + vote_cnt = result_vote[result.result]; + max_result = result.result; + max_type = result.format; + } + } + } + if (total_vote == 0 || (vote_cnt << 2) < total_vote) + { + return std::make_pair(Result(string(), Result::BARCODE_NONE), 0.0f); + } + + float confidence = (float) vote_cnt / (float) DIVIDE_PART; + //Check if it is UPC-A format + if (max_type == Result::BARCODE_EAN_13 && max_result[0] == '0') + { + max_result = max_result.substr(1, 12); //UPC-A length 12 + max_type = Result::BARCODE_UPC_A; + } + return std::make_pair(Result(max_result, max_type), confidence); +} + + +Result UPCEANDecoder::decodeLine(const vector &line) const +{ + Result result = this->decode(line); + if (result.format == Result::BARCODE_NONE) + { + result = this->decode(std::vector(line.crbegin(), line.crend())); + } + return result; +} + +bool UPCEANDecoder::isValid(const string &result) const +{ + if (result.size() != digit_number) + { + return false; + } + int sum = 0; + for (int index = (int) result.size() - 2, i = 1; index >= 0; index--, i++) + { + int temp = result[index] - '0'; + sum += (temp + ((i & 1) != 0 ? temp << 1 : 0)); + } + return (result.back() - '0') == ((10 - (sum % 10)) % 10); +} + +// right for A +const std::vector> &get_A_or_C_Patterns() +{ + static const std::vector> A_or_C_Patterns{{3, 2, 1, 1}, // 0 + {2, 2, 2, 1}, // 1 + {2, 1, 2, 2}, // 2 + {1, 4, 1, 1}, // 3 + {1, 1, 3, 2}, // 4 + {1, 2, 3, 1}, // 5 + {1, 1, 1, 4}, // 6 + {1, 3, 1, 2}, // 7 + {1, 2, 1, 3}, // 8 + {3, 1, 1, 2} // 9 + }; + return A_or_C_Patterns; +} + +const std::vector> &get_AB_Patterns() +{ + static const std::vector> AB_Patterns = [] { + constexpr uint offset = 10; + auto AB_Patterns_inited = std::vector>(offset << 1, std::vector(PATTERN_LENGTH, 0)); + std::copy(get_A_or_C_Patterns().cbegin(), get_A_or_C_Patterns().cend(), AB_Patterns_inited.begin()); + //AB pattern is + for (uint i = 0; i < offset; ++i) + { + for (uint j = 0; j < PATTERN_LENGTH; ++j) + { + AB_Patterns_inited[i + offset][j] = AB_Patterns_inited[i][PATTERN_LENGTH - j - 1]; + } + } + return AB_Patterns_inited; + }(); + return AB_Patterns; +} + +const std::vector &BEGIN_PATTERN() +{ + // it just need it's 1:1:1(black:white:black) + static const std::vector BEGIN_PATTERN_(3, 1); + return BEGIN_PATTERN_; +} + +const std::vector &MIDDLE_PATTERN() +{ + // it just need it's 1:1:1:1:1(white:black:white:black:white) + static const std::vector MIDDLE_PATTERN_(5, 1); + return MIDDLE_PATTERN_; +} + +const std::array &FIRST_CHAR_ARRAY() +{ + // use array to simulation a Hashmap, + // because the data's size is small, + // use a hashmap or brute-force search 10 times both can not accept + static const std::array pattern{ + '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x00', '\x00', '\x00', '\x09', '\x00', + '\x08', '\x03', '\x00', '\x00', '\x00', '\x00', '\x05', '\x00', '\x07', '\x02', '\x00', '\x00', '\x04', + '\x01', '\x00', '\x00', '\x00', '\x00', '\x00'}; + // length is 32 to ensure the security + // 0x00000 -> 0 -> 0 + // 0x11010 -> 26 -> 1 + // 0x10110 -> 22 -> 2 + // 0x01110 -> 14 -> 3 + // 0x11001 -> 25 -> 4 + // 0x10011 -> 19 -> 5 + // 0x00111 -> 7 -> 6 + // 0x10101 -> 21 -> 7 + // 0x01101 -> 13 -> 8 + // 0x01011 -> 11 -> 9 + // delete the 1-13's 2 number's bit, + // it always be A which do not need to count. + return pattern; +} +} + +} // namespace cv diff --git a/modules/objdetect/src/barcode_decoder/upcean_decoder.hpp b/modules/objdetect/src/barcode_decoder/upcean_decoder.hpp new file mode 100644 index 0000000000..6efc1094a5 --- /dev/null +++ b/modules/objdetect/src/barcode_decoder/upcean_decoder.hpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_BARCODE_UPCEAN_DECODER_HPP +#define OPENCV_BARCODE_UPCEAN_DECODER_HPP + +#include "abs_decoder.hpp" + +/** + * upcean_decoder the abstract basic class for decode formats, + * it will have ean13/8,upc_a,upc_e , etc.. class extend this class +*/ +namespace cv { +namespace barcode { +using std::string; +using std::vector; + +class UPCEANDecoder : public AbsDecoder +{ + +public: + ~UPCEANDecoder() override = default; + + std::pair decodeROI(const Mat &bar_img) const override; + +protected: + static int decodeDigit(const std::vector &row, Counter &counters, uint rowOffset, + const std::vector> &patterns); + + static bool + findGuardPatterns(const std::vector &row, uint rowOffset, uchar whiteFirst, const std::vector &pattern, + Counter &counter, std::pair &result); + + static bool findStartGuardPatterns(const std::vector &row, std::pair &start_range); + + Result decodeLine(const vector &line) const; + + Result decode(const vector &bar) const override = 0; + + bool isValid(const string &result) const override; + +private: + #if 0 + void drawDebugLine(Mat &debug_img, const Point2i &begin, const Point2i &end) const; + #endif +}; + +const std::vector> &get_A_or_C_Patterns(); + +const std::vector> &get_AB_Patterns(); + +const std::vector &BEGIN_PATTERN(); + +const std::vector &MIDDLE_PATTERN(); + +const std::array &FIRST_CHAR_ARRAY(); + +constexpr static uint PATTERN_LENGTH = 4; +constexpr static uint MAX_AVG_VARIANCE = static_cast(PATTERN_MATCH_RESULT_SCALE_FACTOR * 0.48f); +constexpr static uint MAX_INDIVIDUAL_VARIANCE = static_cast(PATTERN_MATCH_RESULT_SCALE_FACTOR * 0.7f); + +} +} // namespace cv + +#endif // OPENCV_BARCODE_UPCEAN_DECODER_HPP diff --git a/modules/objdetect/src/barcode_detector/bardetect.cpp b/modules/objdetect/src/barcode_detector/bardetect.cpp new file mode 100644 index 0000000000..b156d1b25d --- /dev/null +++ b/modules/objdetect/src/barcode_detector/bardetect.cpp @@ -0,0 +1,510 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#include "../precomp.hpp" +#include "bardetect.hpp" + + +namespace cv { +namespace barcode { +static constexpr float PI = static_cast(CV_PI); +static constexpr float HALF_PI = static_cast(CV_PI / 2); + +#define CALCULATE_SUM(ptr, result) \ + top_left = static_cast(*((ptr) + left_col + integral_cols * top_row));\ + top_right = static_cast(*((ptr) + integral_cols * top_row + right_col));\ + bottom_right = static_cast(*((ptr) + right_col + bottom_row * integral_cols));\ + bottom_left = static_cast(*((ptr) + bottom_row * integral_cols + left_col));\ + (result) = (bottom_right - bottom_left - top_right + top_left); + + +inline bool Detect::isValidCoord(const Point &coord, const Size &limit) +{ + if ((coord.x < 0) || (coord.y < 0)) + { + return false; + } + + if ((unsigned) coord.x > (unsigned) (limit.width - 1) || ((unsigned) coord.y > (unsigned) (limit.height - 1))) + { + return false; + } + + return true; +} + +//============================================================================== +// NMSBoxes copied from modules/dnn/src/nms.inl.hpp +// TODO: move NMSBoxes outside the dnn module to allow other modules use it + +namespace +{ + +template +static inline bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2) +{ + return pair1.first > pair2.first; +} + +inline void GetMaxScoreIndex(const std::vector& scores, const float threshold, const int top_k, + std::vector >& score_index_vec) +{ + CV_DbgAssert(score_index_vec.empty()); + // Generate index score pairs. + for (size_t i = 0; i < scores.size(); ++i) + { + if (scores[i] > threshold) + { + score_index_vec.push_back(std::make_pair(scores[i], (int)i)); + } + } + + // Sort the score pair according to the scores in descending order + std::stable_sort(score_index_vec.begin(), score_index_vec.end(), + SortScorePairDescend); + + // Keep top_k scores if needed. + if (top_k > 0 && top_k < (int)score_index_vec.size()) + { + score_index_vec.resize(top_k); + } +} + +template +inline void NMSFast_(const std::vector& bboxes, + const std::vector& scores, const float score_threshold, + const float nms_threshold, const float eta, const int top_k, + std::vector& indices, + float (*computeOverlap)(const BoxType&, const BoxType&), + size_t limit = std::numeric_limits::max()) +{ + CV_Assert(bboxes.size() == scores.size()); + + // Get top_k scores (with corresponding indices). + std::vector > score_index_vec; + GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec); + + // Do nms. + float adaptive_threshold = nms_threshold; + indices.clear(); + for (size_t i = 0; i < score_index_vec.size(); ++i) { + const int idx = score_index_vec[i].second; + bool keep = true; + for (int k = 0; k < (int)indices.size() && keep; ++k) { + const int kept_idx = indices[k]; + float overlap = computeOverlap(bboxes[idx], bboxes[kept_idx]); + keep = overlap <= adaptive_threshold; + } + if (keep) { + indices.push_back(idx); + if (indices.size() >= limit) { + break; + } + } + if (keep && eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= eta; + } + } +} + +static inline float rotatedRectIOU(const RotatedRect& a, const RotatedRect& b) +{ + std::vector inter; + int res = rotatedRectangleIntersection(a, b, inter); + if (inter.empty() || res == INTERSECT_NONE) + return 0.0f; + if (res == INTERSECT_FULL) + return 1.0f; + float interArea = (float)contourArea(inter); + return interArea / (a.size.area() + b.size.area() - interArea); +} + +static void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + const float score_threshold, const float nms_threshold, + std::vector& indices, const float eta = 1.f, const int top_k = 0) +{ + CV_Assert_N(bboxes.size() == scores.size(), score_threshold >= 0, + nms_threshold >= 0, eta > 0); + NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rotatedRectIOU); +} + +} // namespace :: + + +//============================================================================== + +void Detect::init(const Mat &src) +{ + const double min_side = std::min(src.size().width, src.size().height); + if (min_side > 512.0) + { + purpose = SHRINKING; + coeff_expansion = min_side / 512.0; + width = cvRound(src.size().width / coeff_expansion); + height = cvRound(src.size().height / coeff_expansion); + Size new_size(width, height); + resize(src, resized_barcode, new_size, 0, 0, INTER_AREA); + } +// else if (min_side < 512.0) +// { +// purpose = ZOOMING; +// coeff_expansion = 512.0 / min_side; +// width = cvRound(src.size().width * coeff_expansion); +// height = cvRound(src.size().height * coeff_expansion); +// Size new_size(width, height); +// resize(src, resized_barcode, new_size, 0, 0, INTER_CUBIC); +// } + else + { + purpose = UNCHANGED; + coeff_expansion = 1.0; + width = src.size().width; + height = src.size().height; + resized_barcode = src.clone(); + } + // median blur: sometimes it reduces the noise, but also reduces the recall + // medianBlur(resized_barcode, resized_barcode, 3); + +} + + +void Detect::localization() +{ + + localization_bbox.clear(); + bbox_scores.clear(); + + // get integral image + preprocess(); + // empirical setting + static constexpr float SCALE_LIST[] = {0.01f, 0.03f, 0.06f, 0.08f}; + const auto min_side = static_cast(std::min(width, height)); + int window_size; + for (const float scale:SCALE_LIST) + { + window_size = cvRound(min_side * scale); + if(window_size == 0) { + window_size = 1; + } + calCoherence(window_size); + barcodeErode(); + regionGrowing(window_size); + } + +} + + +bool Detect::computeTransformationPoints() +{ + + bbox_indices.clear(); + transformation_points.clear(); + transformation_points.reserve(bbox_indices.size()); + RotatedRect rect; + Point2f temp[4]; + const float THRESHOLD_SCORE = float(width * height) / 300.f; + NMSBoxes(localization_bbox, bbox_scores, THRESHOLD_SCORE, 0.1f, bbox_indices); + + for (const auto &bbox_index : bbox_indices) + { + rect = localization_bbox[bbox_index]; + if (purpose == ZOOMING) + { + rect.center /= coeff_expansion; + rect.size.height /= static_cast(coeff_expansion); + rect.size.width /= static_cast(coeff_expansion); + } + else if (purpose == SHRINKING) + { + rect.center *= coeff_expansion; + rect.size.height *= static_cast(coeff_expansion); + rect.size.width *= static_cast(coeff_expansion); + } + rect.points(temp); + transformation_points.emplace_back(vector{temp[0], temp[1], temp[2], temp[3]}); + } + + return !transformation_points.empty(); +} + + +void Detect::preprocess() +{ + Mat scharr_x, scharr_y, temp; + static constexpr double THRESHOLD_MAGNITUDE = 64.; + Scharr(resized_barcode, scharr_x, CV_32F, 1, 0); + Scharr(resized_barcode, scharr_y, CV_32F, 0, 1); + // calculate magnitude of gradient and truncate + magnitude(scharr_x, scharr_y, temp); + threshold(temp, temp, THRESHOLD_MAGNITUDE, 1, THRESH_BINARY); + temp.convertTo(gradient_magnitude, CV_8U); + integral(gradient_magnitude, integral_edges, CV_32F); + + + for (int y = 0; y < height; y++) + { + auto *const x_row = scharr_x.ptr(y); + auto *const y_row = scharr_y.ptr(y); + auto *const magnitude_row = gradient_magnitude.ptr(y); + for (int pos = 0; pos < width; pos++) + { + if (magnitude_row[pos] == 0) + { + x_row[pos] = 0; + y_row[pos] = 0; + continue; + } + if (x_row[pos] < 0) + { + x_row[pos] *= -1; + y_row[pos] *= -1; + } + } + } + integral(scharr_x, temp, integral_x_sq, CV_32F, CV_32F); + integral(scharr_y, temp, integral_y_sq, CV_32F, CV_32F); + integral(scharr_x.mul(scharr_y), integral_xy, temp, CV_32F, CV_32F); +} + + +// Change coherence orientation edge_nums +// depend on width height integral_edges integral_x_sq integral_y_sq integral_xy +void Detect::calCoherence(int window_size) +{ + static constexpr float THRESHOLD_COHERENCE = 0.9f; + int right_col, left_col, top_row, bottom_row; + float xy, x_sq, y_sq, d, rect_area; + const float THRESHOLD_AREA = float(window_size * window_size) * 0.42f; + Size new_size(width / window_size, height / window_size); + coherence = Mat(new_size, CV_8U), orientation = Mat(new_size, CV_32F), edge_nums = Mat(new_size, CV_32F); + + float top_left, top_right, bottom_left, bottom_right; + int integral_cols = width + 1; + const auto *edges_ptr = integral_edges.ptr(), *x_sq_ptr = integral_x_sq.ptr(), *y_sq_ptr = integral_y_sq.ptr(), *xy_ptr = integral_xy.ptr(); + for (int y = 0; y < new_size.height; y++) + { + auto *coherence_row = coherence.ptr(y); + auto *orientation_row = orientation.ptr(y); + auto *edge_nums_row = edge_nums.ptr(y); + if (y * window_size >= height) + { + continue; + } + top_row = y * window_size; + bottom_row = min(height, (y + 1) * window_size); + + for (int pos = 0; pos < new_size.width; pos++) + { + + // then calculate the column locations of the rectangle and set them to -1 + // if they are outside the matrix bounds + if (pos * window_size >= width) + { + continue; + } + left_col = pos * window_size; + right_col = min(width, (pos + 1) * window_size); + + //we had an integral image to count non-zero elements + CALCULATE_SUM(edges_ptr, rect_area) + if (rect_area < THRESHOLD_AREA) + { + // smooth region + coherence_row[pos] = 0; + continue; + } + + CALCULATE_SUM(x_sq_ptr, x_sq) + CALCULATE_SUM(y_sq_ptr, y_sq) + CALCULATE_SUM(xy_ptr, xy) + + // get the values of the rectangle corners from the integral image - 0 if outside bounds + d = sqrt((x_sq - y_sq) * (x_sq - y_sq) + 4 * xy * xy) / (x_sq + y_sq); + if (d > THRESHOLD_COHERENCE) + { + coherence_row[pos] = 255; + orientation_row[pos] = atan2(x_sq - y_sq, 2 * xy) / 2.0f; + edge_nums_row[pos] = rect_area; + } + else + { + coherence_row[pos] = 0; + } + + } + + } +} + +// will change localization_bbox bbox_scores +// will change coherence, +// depend on coherence orientation edge_nums +void Detect::regionGrowing(int window_size) +{ + static constexpr float LOCAL_THRESHOLD_COHERENCE = 0.95f, THRESHOLD_RADIAN = + PI / 30, LOCAL_RATIO = 0.5f, EXPANSION_FACTOR = 1.2f; + static constexpr uint THRESHOLD_BLOCK_NUM = 35; + Point pt_to_grow, pt; //point to grow + + float src_value; + float cur_value; + float edge_num; + float rect_orientation; + float sin_sum, cos_sum; + uint counter; + //grow direction + static constexpr int DIR[8][2] = {{-1, -1}, + {0, -1}, + {1, -1}, + {1, 0}, + {1, 1}, + {0, 1}, + {-1, 1}, + {-1, 0}}; + vector growingPoints, growingImgPoints; + for (int y = 0; y < coherence.rows; y++) + { + auto *coherence_row = coherence.ptr(y); + + for (int x = 0; x < coherence.cols; x++) + { + if (coherence_row[x] == 0) + { + continue; + } + // flag + coherence_row[x] = 0; + growingPoints.clear(); + growingImgPoints.clear(); + + pt = Point(x, y); + cur_value = orientation.at(pt); + sin_sum = sin(2 * cur_value); + cos_sum = cos(2 * cur_value); + counter = 1; + edge_num = edge_nums.at(pt); + growingPoints.push_back(pt); + growingImgPoints.push_back(Point(pt)); + while (!growingPoints.empty()) + { + pt = growingPoints.back(); + growingPoints.pop_back(); + src_value = orientation.at(pt); + + //growing in eight directions + for (auto i : DIR) + { + pt_to_grow = Point(pt.x + i[0], pt.y + i[1]); + + //check if out of boundary + if (!isValidCoord(pt_to_grow, coherence.size())) + { + continue; + } + + if (coherence.at(pt_to_grow) == 0) + { + continue; + } + cur_value = orientation.at(pt_to_grow); + if (abs(cur_value - src_value) < THRESHOLD_RADIAN || + abs(cur_value - src_value) > PI - THRESHOLD_RADIAN) + { + coherence.at(pt_to_grow) = 0; + sin_sum += sin(2 * cur_value); + cos_sum += cos(2 * cur_value); + counter += 1; + edge_num += edge_nums.at(pt_to_grow); + growingPoints.push_back(pt_to_grow); //push next point to grow back to stack + growingImgPoints.push_back(pt_to_grow); + } + } + } + //minimum block num + if (counter < THRESHOLD_BLOCK_NUM) + { + continue; + } + float local_coherence = (sin_sum * sin_sum + cos_sum * cos_sum) / static_cast(counter * counter); + // minimum local gradient orientation_arg coherence_arg + if (local_coherence < LOCAL_THRESHOLD_COHERENCE) + { + continue; + } + RotatedRect minRect = minAreaRect(growingImgPoints); + if (edge_num < minRect.size.area() * float(window_size * window_size) * LOCAL_RATIO || + static_cast(counter) < minRect.size.area() * LOCAL_RATIO) + { + continue; + } + const float local_orientation = atan2(cos_sum, sin_sum) / 2.0f; + // only orientation_arg is approximately equal to the rectangle orientation_arg + rect_orientation = (minRect.angle) * PI / 180.f; + if (minRect.size.width < minRect.size.height) + { + rect_orientation += (rect_orientation <= 0.f ? HALF_PI : -HALF_PI); + std::swap(minRect.size.width, minRect.size.height); + } + if (abs(local_orientation - rect_orientation) > THRESHOLD_RADIAN && + abs(local_orientation - rect_orientation) < PI - THRESHOLD_RADIAN) + { + continue; + } + minRect.angle = local_orientation * 180.f / PI; + minRect.size.width *= static_cast(window_size) * EXPANSION_FACTOR; + minRect.size.height *= static_cast(window_size); + minRect.center.x = (minRect.center.x + 0.5f) * static_cast(window_size); + minRect.center.y = (minRect.center.y + 0.5f) * static_cast(window_size); + localization_bbox.push_back(minRect); + bbox_scores.push_back(edge_num); + + } + } +} + +inline const std::array &getStructuringElement() +{ + static const std::array structuringElement{ + Mat_{{3, 3}, + {255, 0, 0, 0, 0, 0, 0, 0, 255}}, Mat_{{3, 3}, + {0, 0, 255, 0, 0, 0, 255, 0, 0}}, + Mat_{{3, 3}, + {0, 0, 0, 255, 0, 255, 0, 0, 0}}, Mat_{{3, 3}, + {0, 255, 0, 0, 0, 0, 0, 255, 0}}}; + return structuringElement; +} + +// Change mat +void Detect::barcodeErode() +{ + static const std::array &structuringElement = getStructuringElement(); + Mat m0, m1, m2, m3; + dilate(coherence, m0, structuringElement[0]); + dilate(coherence, m1, structuringElement[1]); + dilate(coherence, m2, structuringElement[2]); + dilate(coherence, m3, structuringElement[3]); + int sum; + for (int y = 0; y < coherence.rows; y++) + { + auto coherence_row = coherence.ptr(y); + auto m0_row = m0.ptr(y); + auto m1_row = m1.ptr(y); + auto m2_row = m2.ptr(y); + auto m3_row = m3.ptr(y); + + for (int pos = 0; pos < coherence.cols; pos++) + { + if (coherence_row[pos] != 0) + { + sum = m0_row[pos] + m1_row[pos] + m2_row[pos] + m3_row[pos]; + //more than 2 group + coherence_row[pos] = sum > 600 ? 255 : 0; + } + } + } +} +} +} diff --git a/modules/objdetect/src/barcode_detector/bardetect.hpp b/modules/objdetect/src/barcode_detector/bardetect.hpp new file mode 100644 index 0000000000..9f084d20aa --- /dev/null +++ b/modules/objdetect/src/barcode_detector/bardetect.hpp @@ -0,0 +1,62 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds + +#ifndef OPENCV_BARCODE_BARDETECT_HPP +#define OPENCV_BARCODE_BARDETECT_HPP + + +#include + +namespace cv { +namespace barcode { +using std::vector; + +class Detect +{ +private: + vector localization_rects; + vector localization_bbox; + vector bbox_scores; + vector bbox_indices; + vector> transformation_points; + + +public: + void init(const Mat &src); + + void localization(); + + vector> getTransformationPoints() + { return transformation_points; } + + bool computeTransformationPoints(); + +protected: + enum resize_direction + { + ZOOMING, SHRINKING, UNCHANGED + } purpose = UNCHANGED; + + + double coeff_expansion = 1.0; + int height, width; + Mat resized_barcode, gradient_magnitude, coherence, orientation, edge_nums, integral_x_sq, integral_y_sq, integral_xy, integral_edges; + + void preprocess(); + + void calCoherence(int window_size); + + static inline bool isValidCoord(const Point &coord, const Size &limit); + + void regionGrowing(int window_size); + + void barcodeErode(); + + +}; +} +} + +#endif // OPENCV_BARCODE_BARDETECT_HPP diff --git a/modules/objdetect/src/graphical_code_detector.cpp b/modules/objdetect/src/graphical_code_detector.cpp new file mode 100644 index 0000000000..971fd597ab --- /dev/null +++ b/modules/objdetect/src/graphical_code_detector.cpp @@ -0,0 +1,45 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "opencv2/objdetect/graphical_code_detector.hpp" +#include "graphical_code_detector_impl.hpp" + +namespace cv { + +GraphicalCodeDetector::GraphicalCodeDetector() {} + +bool GraphicalCodeDetector::detect(InputArray img, OutputArray points) const { + CV_Assert(p); + return p->detect(img, points); +} + +std::string GraphicalCodeDetector::decode(InputArray img, InputArray points, OutputArray straight_code) const { + CV_Assert(p); + return p->decode(img, points, straight_code); +} + +std::string GraphicalCodeDetector::detectAndDecode(InputArray img, OutputArray points, OutputArray straight_code) const { + CV_Assert(p); + return p->detectAndDecode(img, points, straight_code); +} + +bool GraphicalCodeDetector::detectMulti(InputArray img, OutputArray points) const { + CV_Assert(p); + return p->detectMulti(img, points); +} + +bool GraphicalCodeDetector::decodeMulti(InputArray img, InputArray points, std::vector& decoded_info, + OutputArrayOfArrays straight_code) const { + CV_Assert(p); + return p->decodeMulti(img, points, decoded_info, straight_code); +} + +bool GraphicalCodeDetector::detectAndDecodeMulti(InputArray img, std::vector& decoded_info, OutputArray points, + OutputArrayOfArrays straight_code) const { + CV_Assert(p); + return p->detectAndDecodeMulti(img, decoded_info, points, straight_code); +} + +} diff --git a/modules/objdetect/src/graphical_code_detector_impl.hpp b/modules/objdetect/src/graphical_code_detector_impl.hpp new file mode 100644 index 0000000000..76429222ff --- /dev/null +++ b/modules/objdetect/src/graphical_code_detector_impl.hpp @@ -0,0 +1,25 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html +#ifndef OPENCV_OBJDETECT_GRAPHICAL_CODE_DETECTOR_IMPL_HPP +#define OPENCV_OBJDETECT_GRAPHICAL_CODE_DETECTOR_IMPL_HPP + +#include + +namespace cv { + +struct GraphicalCodeDetector::Impl { + virtual ~Impl() {} + virtual bool detect(InputArray img, OutputArray points) const = 0; + virtual std::string decode(InputArray img, InputArray points, OutputArray straight_code) const = 0; + virtual std::string detectAndDecode(InputArray img, OutputArray points, OutputArray straight_code) const = 0; + virtual bool detectMulti(InputArray img, OutputArray points) const = 0; + virtual bool decodeMulti(InputArray img, InputArray points, std::vector& decoded_info, + OutputArrayOfArrays straight_code) const = 0; + virtual bool detectAndDecodeMulti(InputArray img, std::vector& decoded_info, + OutputArray points, OutputArrayOfArrays straight_code) const = 0; +}; + +} + +#endif \ No newline at end of file diff --git a/modules/objdetect/src/precomp.hpp b/modules/objdetect/src/precomp.hpp index cbefc396be..790a980697 100644 --- a/modules/objdetect/src/precomp.hpp +++ b/modules/objdetect/src/precomp.hpp @@ -44,10 +44,13 @@ #define __OPENCV_PRECOMP_H__ #include "opencv2/objdetect.hpp" +#include "opencv2/objdetect/barcode.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/core/utility.hpp" #include "opencv2/core/ocl.hpp" #include "opencv2/core/private.hpp" +#include + #endif diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 010fd669f6..0e32a2dea3 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -9,6 +9,7 @@ #include "opencv2/objdetect.hpp" #include "opencv2/3d.hpp" #include +#include "graphical_code_detector_impl.hpp" #ifdef HAVE_QUIRC #include "quirc.h" @@ -950,34 +951,53 @@ vector QRDetect::getQuadrilateral(vector angle_list) return result_angle_list; } - -struct QRCodeDetector::Impl +struct ImplContour : public GraphicalCodeDetector::Impl { public: - Impl() { epsX = 0.2; epsY = 0.1; } - ~Impl() {} + ImplContour(): epsX(0.2), epsY(0.1) {} double epsX, epsY; - vector> alignmentMarkers; - vector updateQrCorners; + mutable vector> alignmentMarkers; + mutable vector updateQrCorners; bool useAlignmentMarkers = true; + + bool detect(InputArray in, OutputArray points) const override; + std::string decode(InputArray img, InputArray points, OutputArray straight_qrcode) const override; + std::string detectAndDecode(InputArray img, OutputArray points, OutputArray straight_qrcode) const override; + + bool detectMulti(InputArray img, OutputArray points) const override; + bool decodeMulti(InputArray img, InputArray points, std::vector& decoded_info, + OutputArrayOfArrays straight_qrcode) const override; + bool detectAndDecodeMulti(InputArray img, std::vector& decoded_info, OutputArray points, + OutputArrayOfArrays straight_qrcode) const override; + + String decodeCurved(InputArray in, InputArray points, OutputArray straight_qrcode); + + std::string detectAndDecodeCurved(InputArray in, OutputArray points, OutputArray straight_qrcode); }; -QRCodeDetector::QRCodeDetector() : p(new Impl) {} +QRCodeDetector::QRCodeDetector() { + p = makePtr(); +} -QRCodeDetector::~QRCodeDetector() {} +QRCodeDetector& QRCodeDetector::setEpsX(double epsX) { + std::dynamic_pointer_cast(p)->epsX = epsX; + return *this; +} -void QRCodeDetector::setEpsX(double epsX) { p->epsX = epsX; } -void QRCodeDetector::setEpsY(double epsY) { p->epsY = epsY; } +QRCodeDetector& QRCodeDetector::setEpsY(double epsY) { + std::dynamic_pointer_cast(p)->epsY = epsY; + return *this; +} -bool QRCodeDetector::detect(InputArray in, OutputArray points) const +bool ImplContour::detect(InputArray in, OutputArray points) const { Mat inarr; if (!checkQRInputImage(in, inarr)) return false; QRDetect qrdet; - qrdet.init(inarr, p->epsX, p->epsY); + qrdet.init(inarr, epsX, epsY); if (!qrdet.localization()) { return false; } if (!qrdet.computeTransformationPoints()) { return false; } vector pnts2f = qrdet.getTransformationPoints(); @@ -2789,9 +2809,7 @@ QRDecode::QRDecode(bool _useAlignmentMarkers): test_perspective_size(0.f) {} -std::string QRCodeDetector::decode(InputArray in, InputArray points, - OutputArray straight_qrcode) -{ +std::string ImplContour::decode(InputArray in, InputArray points, OutputArray straight_qrcode) const { Mat inarr; if (!checkQRInputImage(in, inarr)) return std::string(); @@ -2801,7 +2819,7 @@ std::string QRCodeDetector::decode(InputArray in, InputArray points, CV_Assert(src_points.size() == 4); CV_CheckGT(contourArea(src_points), 0.0, "Invalid QR code source points"); - QRDecode qrdec(p->useAlignmentMarkers); + QRDecode qrdec(useAlignmentMarkers); qrdec.init(inarr, src_points); bool ok = qrdec.straightDecodingProcess(); @@ -2815,14 +2833,18 @@ std::string QRCodeDetector::decode(InputArray in, InputArray points, qrdec.getStraightBarcode().convertTo(straight_qrcode, CV_8UC1); } if (ok && !decoded_info.empty()) { - p->alignmentMarkers = {qrdec.alignment_coords}; - p->updateQrCorners = qrdec.getOriginalPoints(); + alignmentMarkers = {qrdec.alignment_coords}; + updateQrCorners = qrdec.getOriginalPoints(); } return ok ? decoded_info : std::string(); } -cv::String QRCodeDetector::decodeCurved(InputArray in, InputArray points, - OutputArray straight_qrcode) +String QRCodeDetector::decodeCurved(InputArray in, InputArray points, OutputArray straight_qrcode) { + CV_Assert(p); + return std::dynamic_pointer_cast(p)->decodeCurved(in, points, straight_qrcode); +} + +String ImplContour::decodeCurved(InputArray in, InputArray points, OutputArray straight_qrcode) { Mat inarr; if (!checkQRInputImage(in, inarr)) @@ -2833,7 +2855,7 @@ cv::String QRCodeDetector::decodeCurved(InputArray in, InputArray points, CV_Assert(src_points.size() == 4); CV_CheckGT(contourArea(src_points), 0.0, "Invalid QR code source points"); - QRDecode qrdec(p->useAlignmentMarkers); + QRDecode qrdec(useAlignmentMarkers); qrdec.init(inarr, src_points); bool ok = qrdec.curvedDecodingProcess(); @@ -2851,10 +2873,7 @@ cv::String QRCodeDetector::decodeCurved(InputArray in, InputArray points, return ok ? decoded_info : std::string(); } -std::string QRCodeDetector::detectAndDecode(InputArray in, - OutputArray points_, - OutputArray straight_qrcode) -{ +std::string ImplContour::detectAndDecode(InputArray in, OutputArray points_, OutputArray straight_qrcode) const { Mat inarr; if (!checkQRInputImage(in, inarr)) { @@ -2874,9 +2893,14 @@ std::string QRCodeDetector::detectAndDecode(InputArray in, return decoded_info; } -std::string QRCodeDetector::detectAndDecodeCurved(InputArray in, - OutputArray points_, - OutputArray straight_qrcode) +std::string QRCodeDetector::detectAndDecodeCurved(InputArray in, OutputArray points, + OutputArray straight_qrcode) { + CV_Assert(p); + return std::dynamic_pointer_cast(p)->detectAndDecodeCurved(in, points, straight_qrcode); +} + +std::string ImplContour::detectAndDecodeCurved(InputArray in, OutputArray points_, + OutputArray straight_qrcode) { Mat inarr; if (!checkQRInputImage(in, inarr)) @@ -3817,31 +3841,28 @@ bool QRDetectMulti::computeTransformationPoints(const size_t cur_ind) return true; } -bool QRCodeDetector::detectMulti(InputArray in, OutputArray points) const -{ - Mat inarr; - if (!checkQRInputImage(in, inarr)) - { +bool ImplContour::detectMulti(InputArray in, OutputArray points) const { + Mat gray; + if (!checkQRInputImage(in, gray)) { points.release(); return false; } - + vector result; QRDetectMulti qrdet; - qrdet.init(inarr, p->epsX, p->epsY); - if (!qrdet.localization()) - { + qrdet.init(gray, epsX, epsY); + if (!qrdet.localization()) { points.release(); return false; } - vector< vector< Point2f > > pnts2f = qrdet.getTransformationPoints(); - vector trans_points; + vector > pnts2f = qrdet.getTransformationPoints(); for(size_t i = 0; i < pnts2f.size(); i++) for(size_t j = 0; j < pnts2f[i].size(); j++) - trans_points.push_back(pnts2f[i][j]); - - updatePointsResult(points, trans_points); - - return true; + result.push_back(pnts2f[i][j]); + if (result.size() >= 4) { + updatePointsResult(points, result); + return true; + } + return false; } class ParallelDecodeProcess : public ParallelLoopBody @@ -3902,7 +3923,7 @@ private: }; -bool QRCodeDetector::decodeMulti( +bool ImplContour::decodeMulti( InputArray img, InputArray points, CV_OUT std::vector& decoded_info, @@ -3926,7 +3947,7 @@ bool QRCodeDetector::decodeMulti( } } CV_Assert(src_points.size() > 0); - vector qrdec(src_points.size(), p->useAlignmentMarkers); + vector qrdec(src_points.size(), useAlignmentMarkers); vector straight_barcode(src_points.size()); vector info(src_points.size()); ParallelDecodeProcess parallelDecodeProcess(inarr, qrdec, info, straight_barcode, src_points); @@ -3957,12 +3978,12 @@ bool QRCodeDetector::decodeMulti( { decoded_info.push_back(info[i]); } - p->alignmentMarkers.resize(src_points.size()); - p->updateQrCorners.resize(src_points.size()*4ull); + alignmentMarkers.resize(src_points.size()); + updateQrCorners.resize(src_points.size()*4ull); for (size_t i = 0ull; i < src_points.size(); i++) { - p->alignmentMarkers[i] = qrdec[i].alignment_coords; + alignmentMarkers[i] = qrdec[i].alignment_coords; for (size_t j = 0ull; j < 4ull; j++) - p->updateQrCorners[i*4ull+j] = qrdec[i].getOriginalPoints()[j] * qrdec[i].coeff_expansion; + updateQrCorners[i*4ull+j] = qrdec[i].getOriginalPoints()[j] * qrdec[i].coeff_expansion; } if (!decoded_info.empty()) return true; @@ -3970,7 +3991,7 @@ bool QRCodeDetector::decodeMulti( return false; } -bool QRCodeDetector::detectAndDecodeMulti( +bool ImplContour::detectAndDecodeMulti( InputArray img, CV_OUT std::vector& decoded_info, OutputArray points_, @@ -3994,13 +4015,537 @@ bool QRCodeDetector::detectAndDecodeMulti( updatePointsResult(points_, points); decoded_info.clear(); ok = decodeMulti(inarr, points, decoded_info, straight_qrcode); - updatePointsResult(points_, p->updateQrCorners); + updatePointsResult(points_, updateQrCorners); return ok; } -void QRCodeDetector::setUseAlignmentMarkers(bool useAlignmentMarkers) { - p->useAlignmentMarkers = useAlignmentMarkers; +QRCodeDetector& QRCodeDetector::setUseAlignmentMarkers(bool useAlignmentMarkers) { + (std::dynamic_pointer_cast)(p)->useAlignmentMarkers = useAlignmentMarkers; + return *this; } +QRCodeDetectorAruco::Params::Params() { + minModuleSizeInPyramid = 4.f; + maxRotation = (float)CV_PI/12.f; + maxModuleSizeMismatch = 1.75f; + maxTimingPatternMismatch = 2.f; + maxPenalties = 0.4f; + maxColorsMismatch = 0.2f; + scaleTimingPatternScore = 0.9f; +} + +namespace { + +struct FinderPatternInfo { + FinderPatternInfo() {} + + FinderPatternInfo(const vector& patternPoints): points(patternPoints) { + float minSin = 1.f; + for (int i = 0; i < 4; i++) { + center += points[i]; + const Point2f side = points[i]-points[(i+1) % 4]; + const float lenSide = sqrt(normL2Sqr(side)); + minSin = min(minSin, abs(side.y) / lenSide); + moduleSize += lenSide; + } + moduleSize /= (4.f * 7.f); // 4 sides, 7 modules in one side + center /= 4.f; + minQrAngle = asin(minSin); + } + + enum TypePattern { + CENTER, + RIGHT, + BOTTOM, + NONE + }; + + void setType(const TypePattern& _typePattern, const Point2f& centerQR) { + typePattern = _typePattern; + float bestLen = normL2Sqr(centerQR - points[0]); + int id = 0; + for (int i = 1; i < 4; i++) { + float len = normL2Sqr(centerQR - points[i]); + if (len < bestLen) { + bestLen = len; + id = i; + } + } + innerCornerId = id; + } + + Point2f getDirectionTo(const TypePattern& other) const { + Point2f res = points[innerCornerId]; + if (typePattern == TypePattern::CENTER) { + if (other == TypePattern::RIGHT) { + res -= points[(innerCornerId + 1) % 4]; + res = 0.5f*(res + points[(innerCornerId + 3) % 4] - points[(innerCornerId + 2) % 4]); + } + else if (other == TypePattern::BOTTOM) { + res -= points[(innerCornerId + 3) % 4]; + res = 0.5f*(res + points[(innerCornerId + 1) % 4] - points[(innerCornerId + 2) % 4]); + } + } + else if (typePattern == TypePattern::RIGHT && other == TypePattern::CENTER) { + res = res - points[(innerCornerId + 3) % 4]; + res = 0.5f*(res + points[(innerCornerId + 1) % 4] - points[(innerCornerId + 2) % 4]); + } + else if (typePattern == TypePattern::BOTTOM && other == TypePattern::CENTER) { + res = res - points[(innerCornerId + 1) % 4]; + res = 0.5f*(res + points[(innerCornerId + 3) % 4] - points[(innerCornerId + 2) % 4]); + } + return res; + } + + bool checkTriangleAngle(const FinderPatternInfo& patternRight, const FinderPatternInfo& patternBottom, const float length2Vec) { + // check the triangle angle btw right & center & bootom sides of QR code + // the triangle angle shoud be between 30 and 150 degrees + // abs(pi/2 - triangle_angle) should be less 60 degrees + const float angle = abs((float)CV_PI/2.f - acos((center - patternRight.center).dot((center - patternBottom.center)) / length2Vec)); + + const float maxTriangleDeltaAngle = (float)CV_PI / 3.f; + if (angle > maxTriangleDeltaAngle) { + return false; + } + return true; + } + + bool checkAngle(const FinderPatternInfo& other, const float maxRotation) { + Point2f toOther = getDirectionTo(other.typePattern); + Point2f toThis = other.getDirectionTo(typePattern); + const float cosAngle = getCosAngle(toOther, toThis); + if (cosAngle < 0.f && (CV_PI - acos(cosAngle)) / 2.f < maxRotation) { + const float angleCenter = max(acos(getCosAngle(toOther, other.center - center)), acos(getCosAngle(toThis, center - other.center))); + if (angleCenter < maxRotation) + return true; + } + return false; + } + + static float getCosAngle(const Point2f& vec1, const Point2f& vec2) { + float cosAngle = vec1.dot(vec2) / (sqrt(normL2Sqr(vec1)) * sqrt(normL2Sqr(vec2))); + cosAngle = std::max(-1.f, cosAngle); + cosAngle = std::min(1.f, cosAngle); + return cosAngle; + } + + pair getQRCorner() const { + if (typePattern == TypePattern::CENTER) { + int id = (innerCornerId + 2) % 4; + return std::make_pair(id, points[id]); + } + else if (typePattern != TypePattern::NONE) { + int id = (innerCornerId + 2) % 4; + return std::make_pair(id, points[id]); + } + return std::make_pair(-1, Point2f()); + } + + pair getCornerForIntersection() const { + if (typePattern == TypePattern::RIGHT) { + int id = (innerCornerId + 3) % 4; + return std::make_pair(id, points[id]); + } + else if (typePattern == TypePattern::BOTTOM) { + int id = (innerCornerId + 1) % 4; + return std::make_pair(id, points[id]); + } + return std::make_pair(-1, Point2f()); + } + + Point2f getTimingStart(TypePattern direction) const { + const float timingStartPosition = .5f; + const float patternLength = 7.f; + Point2f start = points[innerCornerId]*((patternLength - timingStartPosition)/patternLength); + if (typePattern == TypePattern::CENTER && direction == TypePattern::RIGHT) { + start += points[(innerCornerId + 3) % 4]*(timingStartPosition/patternLength); + } + else if (typePattern == TypePattern::CENTER && direction == TypePattern::BOTTOM) { + start += points[(innerCornerId + 1) % 4]*(timingStartPosition/patternLength); + } + else if (typePattern == TypePattern::RIGHT && direction == TypePattern::CENTER) { + start += points[(innerCornerId + 1) % 4]*(timingStartPosition/patternLength); + } + else if (typePattern == TypePattern::BOTTOM && direction == TypePattern::CENTER) { + start += points[(innerCornerId + 3) % 4]*(timingStartPosition/patternLength); + } + return start + getDirectionTo(direction)/(patternLength*2.f); + } + + // return total white+black modules in timing pattern, total white modules, penaltyPoints + Point3i getTimingPatternScore(const Point2f& start, const Point2f& end, Mat &img, const float maxTimingPatternMismatch) const { + Rect imageRect(Point(), img.size()); + int penaltyPoints = 0; + int colorCounters[2] = {0, 0}; + if (imageRect.contains(Point(cvRound(end.x), cvRound(end.y)))) { + LineIterator lineIterator(start, end); + uint8_t prevValue = img.at(lineIterator.pos()); + + vector vec = {lineIterator.pos()}; + + // the starting position in the timing pattern is the white module white module next to the finder pattern. + bool whiteColor = true; + lineIterator++; + colorCounters[whiteColor]++; + + for(int i = 1; i < lineIterator.count; i++, ++lineIterator) { + const uint8_t value = img.at(lineIterator.pos()); + if (prevValue != value) { + const float dist = sqrt(normL2Sqr((Point2f)(vec.back()-lineIterator.pos()))); + // check long and short lines in timing pattern + const float relativeDiff = max(moduleSize, dist)/min(moduleSize, dist); + if (relativeDiff > maxTimingPatternMismatch) { + if (dist < moduleSize || relativeDiff < maxTimingPatternMismatch*8.f) + penaltyPoints++; + else + penaltyPoints += cvRound(relativeDiff); + } + vec.push_back(lineIterator.pos()); + prevValue = value; + whiteColor ^= true; + colorCounters[whiteColor]++; + } + } + } + return Point3i(colorCounters[0] + colorCounters[1], colorCounters[1], penaltyPoints); + } + + FinderPatternInfo& operator*=(const float scale) { + moduleSize *= scale; + center *= scale; + for (auto& point: points) + point *= scale; + return *this; + } + + float moduleSize = 0.f; + + // Index of inner QR corner. + // The inner corner is the corner closest to the center of the QR code. + int innerCornerId = 0; + + float minQrAngle = 0.f; + TypePattern typePattern = NONE; + + Point2f center; + vector points; +}; + +struct QRCode { + QRCode() {} + + QRCode(const FinderPatternInfo& _centerPattern, const FinderPatternInfo& _rightPattern, const FinderPatternInfo& _bottomPattern, + Point2f _center, float dist): centerPattern(_centerPattern), rightPattern(_rightPattern), bottomPattern(_bottomPattern), + center(_center), distance(dist) { + moduleSize = (centerPattern.moduleSize + rightPattern.moduleSize + bottomPattern.moduleSize) / 3.f; + } + + vector getQRCorners() const { + Point2f a1 = rightPattern.getQRCorner().second; + Point2f a2 = rightPattern.getCornerForIntersection().second; + + Point2f b1 = bottomPattern.getQRCorner().second; + Point2f b2 = bottomPattern.getCornerForIntersection().second; + + Point2f rightBottom = intersectionLines(a1, a2, b1, b2); + + return {centerPattern.getQRCorner().second, rightPattern.getQRCorner().second, rightBottom, bottomPattern.getQRCorner().second}; + } + + static QRCode checkCompatibilityPattern(const FinderPatternInfo &_pattern1, const FinderPatternInfo& _pattern2, const FinderPatternInfo& _pattern3, + Point3i& index, const QRCodeDetectorAruco::Params& qrDetectorParameters) { + FinderPatternInfo pattern1 = _pattern1, pattern2 = _pattern2, pattern3 = _pattern3; + Point2f centerQR; + float distance = std::numeric_limits::max(); + + if (abs(pattern1.minQrAngle - pattern2.minQrAngle) > qrDetectorParameters.maxRotation || + abs(pattern1.minQrAngle - pattern3.minQrAngle) > qrDetectorParameters.maxRotation) // check maxRotation + return QRCode(pattern1, pattern2, pattern3, centerQR, distance); + if (max(pattern1.moduleSize, pattern2.moduleSize) / min(pattern1.moduleSize, pattern2.moduleSize) > qrDetectorParameters.maxModuleSizeMismatch || + max(pattern1.moduleSize, pattern3.moduleSize) / min(pattern1.moduleSize, pattern3.moduleSize) > qrDetectorParameters.maxModuleSizeMismatch) + return QRCode(pattern1, pattern2, pattern3, centerQR, distance); + // QR code: + // center right + // 1 ________ 2 + // |_| |_| + // | / | + // | / | + // | / | + // |_ / | + // |_|______| + // 4 + // bottom + + // sides length check + const float side1 = sqrt(normL2Sqr(pattern1.center - pattern2.center)); + const float side2 = sqrt(normL2Sqr(pattern1.center - pattern3.center)); + const float side3 = sqrt(normL2Sqr(pattern2.center - pattern3.center)); + std::array sides = {side1, side2, side3}; + std::sort(sides.begin(), sides.end()); + // check sides diff + if (sides[1] / sides[0] < qrDetectorParameters.maxModuleSizeMismatch) { + // find center pattern + if (side1 > side2 && side1 > side3) { // centerPattern is pattern3 + std::swap(pattern3, pattern1); // now pattern1 is centerPattern + std::swap(index.x, index.z); + } + else if (side2 > side1 && side2 > side3) { // centerPattern is pattern2 + std::swap(pattern2, pattern1); // now pattern1 is centerPattern + std::swap(index.x, index.y); + } + // now pattern1 is centerPattern + centerQR = (pattern2.center + pattern3.center) / 2.f; + pattern1.setType(FinderPatternInfo::TypePattern::CENTER, centerQR); + // check triangle angle + if (pattern1.checkTriangleAngle(pattern2, pattern3, sides[0]*sides[1]) == false) + return QRCode(pattern1, pattern2, pattern3, centerQR, distance); + // check that pattern2 is right + pattern2.setType(FinderPatternInfo::TypePattern::RIGHT, centerQR); + bool ok = pattern1.checkAngle(pattern2, qrDetectorParameters.maxRotation); + if (!ok) { + // check that pattern3 is right + pattern3.setType(FinderPatternInfo::TypePattern::RIGHT, centerQR); + ok = pattern1.checkAngle(pattern3, qrDetectorParameters.maxRotation); + if (ok) { + std::swap(pattern3, pattern2); // now pattern2 is rightPattern + std::swap(index.y, index.z); + } + } + if (ok) { + // check that pattern3 is bottom + pattern3.setType(FinderPatternInfo::TypePattern::BOTTOM, centerQR); + ok = pattern1.checkAngle(pattern3, qrDetectorParameters.maxRotation); + if (ok) { + // intersection check + Point2f c1 = intersectionLines(pattern1.getQRCorner().second, pattern1.points[pattern1.innerCornerId], + pattern2.getQRCorner().second, pattern2.points[pattern2.innerCornerId]); + Point2f c2 = intersectionLines(pattern1.getQRCorner().second, pattern1.points[pattern1.innerCornerId], + pattern3.getQRCorner().second, pattern3.points[pattern3.innerCornerId]); + const float centerDistance = sqrt(normL2Sqr(c1 - c2)); + distance = (sides[0] + sides[1] + centerDistance)*(sides[1] / sides[0]); + } + } + } + QRCode qrcode(pattern1, pattern2, pattern3, centerQR, distance); + return qrcode; + } + + int calculateScoreByTimingPattern(Mat &img, const QRCodeDetectorAruco::Params& params) { + const int minModulesInTimingPattern = 4; + + const Point3i v1 = centerPattern.getTimingPatternScore(rightPattern.getTimingStart(FinderPatternInfo::CENTER), + centerPattern.getTimingStart(FinderPatternInfo::RIGHT), img, + params.maxTimingPatternMismatch); + + if ((float)v1.z > params.maxPenalties*v1.x || v1.x <= minModulesInTimingPattern || abs(v1.y / (float)v1.x - 0.5f) > params.maxColorsMismatch) + return std::numeric_limits::max(); + + const Point3i v2 = centerPattern.getTimingPatternScore(bottomPattern.getTimingStart(FinderPatternInfo::CENTER), + centerPattern.getTimingStart(FinderPatternInfo::BOTTOM), img, + params.maxTimingPatternMismatch); + + + if ((float)v2.z > params.maxPenalties*v2.x || v2.x <= minModulesInTimingPattern || abs(v2.y / (float)v2.x - 0.5f) > params.maxColorsMismatch) + return std::numeric_limits::max(); + + // TODO: add v1, v2 check, add "y" checks + float numModules = (sqrt(normL2Sqr((centerPattern.getQRCorner().second - rightPattern.getQRCorner().second)))*0.5f + + sqrt(normL2Sqr((centerPattern.getQRCorner().second - bottomPattern.getQRCorner().second))*0.5f)) / moduleSize; + + const int sizeDelta = abs(cvRound(numModules) - (14 + v1.z < v2.z ? v1.x : v2.x)); + const int colorDelta = abs(v1.x - v1.y - v1.y) + abs(v2.x - v2.y - v2.y); + const int score = v1.z + v2.z + sizeDelta + colorDelta; + return score; + } + + QRCode& operator*=(const float scale) { + centerPattern *= scale; + rightPattern *= scale; + bottomPattern *= scale; + center *= scale; + moduleSize *= scale; + return *this; + } + + FinderPatternInfo centerPattern; + FinderPatternInfo rightPattern; + FinderPatternInfo bottomPattern; + Point2f center; + float distance = std::numeric_limits::max(); + int timingPatternScore = std::numeric_limits::max(); + float moduleSize = 0.f; +}; + +} // namespace + +static +vector analyzeFinderPatterns(const vector > &corners, const Mat& img, + const QRCodeDetectorAruco::Params& qrDetectorParameters) { + vector qrCodes; + vector patterns; + if (img.empty()) + return qrCodes; + float maxModuleSize = 0.f; + for (size_t i = 0ull; i < corners.size(); i++) { + FinderPatternInfo pattern = FinderPatternInfo(corners[i]); + // TODO: improve thinning Aruco markers + bool isUniq = true; + for (const FinderPatternInfo& tmp : patterns) { + Point2f dist = pattern.center - tmp.center; + if (max(abs(dist.x), abs(dist.y)) < 3.f * tmp.moduleSize) { + isUniq = false; + break; + } + } + if (isUniq) { + patterns.push_back(pattern); + maxModuleSize = max(maxModuleSize, patterns.back().moduleSize); + } + } + const int threshold = cvRound(qrDetectorParameters.minModuleSizeInPyramid * 12.5f) + + (cvRound(qrDetectorParameters.minModuleSizeInPyramid * 12.5f) % 2 ? 0 : 1); + int maxLevelPyramid = 0; + while (maxModuleSize / 2.f > qrDetectorParameters.minModuleSizeInPyramid) { + maxLevelPyramid++; + maxModuleSize /= 2.f; + } + vector pyramid; + buildPyramid(img, pyramid, maxLevelPyramid); + // TODO: ADAPTIVE_THRESH_GAUSSIAN_C vs ADAPTIVE_THRESH_MEAN_C + for (Mat& pyr: pyramid) { + adaptiveThreshold(pyr, pyr, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY, threshold, -1); + } + + for (size_t i = 0ull; i < patterns.size(); i++) { + QRCode qrCode; + int indexes[3] = {0}; + for (size_t j = i + 1ull; j < patterns.size(); j++) { + for (size_t k = j + 1ull; k < patterns.size(); k++) { + Point3i index((int)i, (int)j, (int)k); + QRCode tmp = QRCode::checkCompatibilityPattern(patterns[i], patterns[j], patterns[k], index, qrDetectorParameters); + if (tmp.distance != std::numeric_limits::max()) { + int levelPyramid = 0; + QRCode qrCopy = tmp; + while (tmp.moduleSize / 2.f > qrDetectorParameters.minModuleSizeInPyramid) { + tmp *= 0.5f; + levelPyramid++; + } + qrCopy.timingPatternScore = tmp.calculateScoreByTimingPattern(pyramid[levelPyramid], qrDetectorParameters); + if (qrCopy.timingPatternScore != std::numeric_limits::max() && + qrCopy.timingPatternScore * qrDetectorParameters.scaleTimingPatternScore < (float)qrCode.timingPatternScore + && qrCopy.distance < qrCode.distance) + { + qrCode = qrCopy; + indexes[0] = (int)i; + indexes[1] = (int)j; + indexes[2] = (int)k; + } + } + } + } + if (qrCode.distance != std::numeric_limits::max()) { + qrCodes.push_back(qrCode); + std::swap(patterns[indexes[2]], patterns.back()); + patterns.pop_back(); + std::swap(patterns[indexes[1]], patterns.back()); + patterns.pop_back(); + std::swap(patterns[indexes[0]], patterns.back()); + patterns.pop_back(); + i--; + } + } + return qrCodes; +} + +struct PimplQRAruco : public ImplContour { + QRCodeDetectorAruco::Params qrParams; + aruco::ArucoDetector arucoDetector; + aruco::DetectorParameters arucoParams; + + PimplQRAruco() { + Mat bits = Mat::ones(Size(5, 5), CV_8UC1); + Mat(bits, Rect(1, 1, 3, 3)).setTo(Scalar(0)); + Mat byteList = aruco::Dictionary::getByteListFromBits(bits); + aruco::Dictionary dictionary = aruco::Dictionary(byteList, 5, 4); + arucoParams.minMarkerPerimeterRate = 0.02; + arucoDetector = aruco::ArucoDetector(dictionary, arucoParams); + } + + bool detectMulti(InputArray in, OutputArray points) const override { + Mat gray; + if (!checkQRInputImage(in, gray)) { + points.release(); + return false; + } + vector result; + vector > corners; + vector ids; + arucoDetector.detectMarkers(gray, corners, ids); + if (corners.size() >= 3ull) { + vector qrCodes = analyzeFinderPatterns(corners, gray.clone(), qrParams); + if (qrCodes.size() == 0ull) + return false; + for (auto& qr : qrCodes) { + for (Point2f& corner : qr.getQRCorners()) { + result.push_back(corner); + } + } + } + if (result.size() >= 4) { + updatePointsResult(points, result); + return true; + } + return false; + } + + bool detect(InputArray img, OutputArray points) const override { + vector corners, result; + bool flag = detectMulti(img, corners); + CV_Assert((int)corners.size() % 4 == 0); + + Point2f imageCenter(((float)img.cols())/2.f, ((float)img.rows())/2.f); + size_t minQrId = 0ull; + float minDist = std::numeric_limits::max(); + for (size_t i = 0ull; i < corners.size(); i += 4ull) { + Point2f qrCenter((corners[i] + corners[i+1ull] + corners[i+2ull] + corners[i+3ull]) / 4.f); + float dist = sqrt(normL2Sqr(qrCenter - imageCenter)); + if (dist < minDist) { + minQrId = i; + minDist = dist; + } + } + if (flag) { + result = {corners[minQrId], corners[minQrId+1ull], corners[minQrId+2ull], corners[minQrId+3ull]}; + updatePointsResult(points, result); + } + return flag; + } +}; + +QRCodeDetectorAruco::QRCodeDetectorAruco() { + p = makePtr(); +} + +QRCodeDetectorAruco::QRCodeDetectorAruco(const QRCodeDetectorAruco::Params& params) { + p = makePtr(); + std::dynamic_pointer_cast(p)->qrParams = params; +} + +const QRCodeDetectorAruco::Params& QRCodeDetectorAruco::getDetectorParameters() const { + return std::dynamic_pointer_cast(p)->qrParams; +} + +QRCodeDetectorAruco& QRCodeDetectorAruco::setDetectorParameters(const QRCodeDetectorAruco::Params& params) { + std::dynamic_pointer_cast(p)->qrParams = params; + return *this; +} + +aruco::DetectorParameters QRCodeDetectorAruco::getArucoParameters() { + return std::dynamic_pointer_cast(p)->arucoParams; +} + +void QRCodeDetectorAruco::setArucoParameters(const aruco::DetectorParameters& params) { + std::dynamic_pointer_cast(p)->arucoParams = params; +} } // namespace diff --git a/modules/objdetect/test/test_arucodetection.cpp b/modules/objdetect/test/test_arucodetection.cpp index 4d60358085..7369f80647 100644 --- a/modules/objdetect/test/test_arucodetection.cpp +++ b/modules/objdetect/test/test_arucodetection.cpp @@ -268,12 +268,12 @@ void CV_ArucoDetectionPerspective::run(int) { } if(ArucoAlgParams::USE_APRILTAG == arucoAlgParams){ - detectorParameters.cornerRefinementMethod = aruco::CORNER_REFINE_APRILTAG; + detectorParameters.cornerRefinementMethod = (int)aruco::CORNER_REFINE_APRILTAG; } if (ArucoAlgParams::USE_ARUCO3 == arucoAlgParams) { detectorParameters.useAruco3Detection = true; - detectorParameters.cornerRefinementMethod = aruco::CORNER_REFINE_SUBPIX; + detectorParameters.cornerRefinementMethod = (int)aruco::CORNER_REFINE_SUBPIX; } detector.setDetectorParameters(detectorParameters); @@ -653,7 +653,7 @@ TEST_P(ArucoThreading, number_of_threads_does_not_change_results) img_marker.copyTo(img(Rect(shift, shift, height_marker, height_marker))); aruco::DetectorParameters detectorParameters = detector.getDetectorParameters(); - detectorParameters.cornerRefinementMethod = GetParam(); + detectorParameters.cornerRefinementMethod = (int)GetParam(); detector.setDetectorParameters(detectorParameters); vector > original_corners; diff --git a/modules/objdetect/test/test_barcode.cpp b/modules/objdetect/test/test_barcode.cpp new file mode 100644 index 0000000000..d8e2002f23 --- /dev/null +++ b/modules/objdetect/test/test_barcode.cpp @@ -0,0 +1,140 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include "opencv2/objdetect/barcode.hpp" +#include + +using namespace std; + +namespace opencv_test{namespace{ + +typedef std::set StringSet; + +// Convert ';'-separated strings to a set +inline static StringSet toSet(const string &line) +{ + StringSet res; + string::size_type it = 0, ti; + while (true) + { + ti = line.find(';', it); + if (ti == string::npos) + { + res.insert(string(line, it, line.size() - it)); + break; + } + res.insert(string(line, it, ti - it)); + it = ti + 1; + } + return res; +} + +// Convert vector of strings to a set +inline static StringSet toSet(const vector &lines) +{ + StringSet res; + for (const string & line : lines) + res.insert(line); + return res; +} + +// Get all keys of a map in a vector +template +inline static vector getKeys(const map &m) +{ + vector res; + for (const auto & it : m) + res.push_back(it.first); + return res; +} + +struct BarcodeResult +{ + string type; + string data; +}; + +map testResults { + { "single/book.jpg", {"EAN_13", "9787115279460"} }, + { "single/bottle_1.jpg", {"EAN_13", "6922255451427"} }, + { "single/bottle_2.jpg", {"EAN_13", "6921168509256"} }, + { "multiple/4_barcodes.jpg", {"EAN_13;EAN_13;EAN_13;EAN_13", "9787564350840;9783319200064;9787118081473;9787122276124"} } +}; + +typedef testing::TestWithParam< string > BarcodeDetector_main; + +TEST_P(BarcodeDetector_main, interface) +{ + const string fname = GetParam(); + const string image_path = findDataFile(string("barcode/") + fname); + const StringSet expected_lines = toSet(testResults[fname].data); + const StringSet expected_types = toSet(testResults[fname].type); + const size_t expected_count = expected_lines.size(); // assume codes are unique + // TODO: verify points location + + Mat img = imread(image_path); + ASSERT_FALSE(img.empty()) << "Can't read image: " << image_path; + + barcode::BarcodeDetector det; + vector points; + vector types; + vector lines; + + // common interface (single) + { + bool res = det.detect(img, points); + ASSERT_TRUE(res); + EXPECT_EQ(expected_count * 4, points.size()); + } + + { + string res = det.decode(img, points); + ASSERT_FALSE(res.empty()); + EXPECT_EQ(1u, expected_lines.count(res)); + } + + // common interface (multi) + { + bool res = det.detectMulti(img, points); + ASSERT_TRUE(res); + EXPECT_EQ(expected_count * 4, points.size()); + } + + { + bool res = det.decodeMulti(img, points, lines); + ASSERT_TRUE(res); + EXPECT_EQ(expected_lines, toSet(lines)); + } + + // specific interface + { + bool res = det.decodeWithType(img, points, lines, types); + ASSERT_TRUE(res); + EXPECT_EQ(expected_types, toSet(types)); + EXPECT_EQ(expected_lines, toSet(lines)); + } + + { + bool res = det.detectAndDecodeWithType(img, lines, types, points); + ASSERT_TRUE(res); + EXPECT_EQ(expected_types, toSet(types)); + EXPECT_EQ(expected_lines, toSet(lines)); + } +} + +INSTANTIATE_TEST_CASE_P(/**/, BarcodeDetector_main, testing::ValuesIn(getKeys(testResults))); + +TEST(BarcodeDetector_base, invalid) +{ + auto bardet = barcode::BarcodeDetector(); + std::vector corners; + vector decoded_info; + Mat zero_image = Mat::zeros(256, 256, CV_8UC1); + EXPECT_FALSE(bardet.detectMulti(zero_image, corners)); + corners = std::vector(4); + EXPECT_ANY_THROW(bardet.decodeMulti(zero_image, corners, decoded_info)); +} + +}} // opencv_test:::: diff --git a/modules/objdetect/test/test_boarddetection.cpp b/modules/objdetect/test/test_boarddetection.cpp index ed940069fb..e47e6c3cb6 100644 --- a/modules/objdetect/test/test_boarddetection.cpp +++ b/modules/objdetect/test/test_boarddetection.cpp @@ -26,7 +26,7 @@ class CV_ArucoBoardPose : public cvtest::BaseTest { params.minDistanceToBorder = 3; if (arucoAlgParams == ArucoAlgParams::USE_ARUCO3) { params.useAruco3Detection = true; - params.cornerRefinementMethod = aruco::CORNER_REFINE_SUBPIX; + params.cornerRefinementMethod = (int)aruco::CORNER_REFINE_SUBPIX; params.minSideLengthCanonicalImg = 16; params.errorCorrectionRate = 0.8; } @@ -137,7 +137,7 @@ class CV_ArucoRefine : public cvtest::BaseTest { aruco::Dictionary dictionary = aruco::getPredefinedDictionary(aruco::DICT_6X6_250); aruco::DetectorParameters params; params.minDistanceToBorder = 3; - params.cornerRefinementMethod = aruco::CORNER_REFINE_SUBPIX; + params.cornerRefinementMethod = (int)aruco::CORNER_REFINE_SUBPIX; if (arucoAlgParams == ArucoAlgParams::USE_ARUCO3) params.useAruco3Detection = true; aruco::RefineParameters refineParams(10.f, 3.f, true); diff --git a/modules/objdetect/test/test_charucodetection.cpp b/modules/objdetect/test/test_charucodetection.cpp index 87d2f12b02..3a459e11fc 100644 --- a/modules/objdetect/test/test_charucodetection.cpp +++ b/modules/objdetect/test/test_charucodetection.cpp @@ -612,7 +612,7 @@ TEST(Charuco, testBoardSubpixelCoords) cv::GaussianBlur(gray, gray, Size(5, 5), 1.0); aruco::DetectorParameters params; - params.cornerRefinementMethod = cv::aruco::CORNER_REFINE_APRILTAG; + params.cornerRefinementMethod = (int)cv::aruco::CORNER_REFINE_APRILTAG; aruco::CharucoParameters charucoParameters; charucoParameters.cameraMatrix = K; @@ -636,7 +636,7 @@ TEST(Charuco, issue_14014) Mat img = imread(imgPath); aruco::DetectorParameters detectorParams; - detectorParams.cornerRefinementMethod = aruco::CORNER_REFINE_SUBPIX; + detectorParams.cornerRefinementMethod = (int)aruco::CORNER_REFINE_SUBPIX; detectorParams.cornerRefinementMinAccuracy = 0.01; aruco::ArucoDetector detector(aruco::getPredefinedDictionary(aruco::DICT_7X7_250), detectorParams); aruco::CharucoBoard board(Size(8, 5), 0.03455f, 0.02164f, detector.getDictionary()); diff --git a/modules/objdetect/test/test_qr_utils.hpp b/modules/objdetect/test/test_qr_utils.hpp new file mode 100644 index 0000000000..cfbe1a5078 --- /dev/null +++ b/modules/objdetect/test/test_qr_utils.hpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" + +namespace opencv_test { + +static inline +void check_qr(const string& root, const string& name_current_image, const string& config_name, + const std::vector& corners, + const std::vector& decoded_info, const int max_pixel_error, + bool isMulti = false) { + const std::string dataset_config = findDataFile(root + "dataset_config.json"); + FileStorage file_config(dataset_config, FileStorage::READ); + ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; + FileNode images_list = file_config[config_name]; + size_t images_count = static_cast(images_list.size()); + ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; + for (size_t index = 0; index < images_count; index++) { + FileNode config = images_list[(int)index]; + std::string name_test_image = config["image_name"]; + if (name_test_image == name_current_image) { + if (isMulti) { + for(int j = 0; j < int(corners.size()); j += 4) { + bool ok = false; + for (int k = 0; k < int(corners.size() / 4); k++) { + int count_eq_points = 0; + for (int i = 0; i < 4; i++) { + int x = config["x"][k][i]; + int y = config["y"][k][i]; + if(((abs(corners[j + i].x - x)) <= max_pixel_error) && ((abs(corners[j + i].y - y)) <= max_pixel_error)) + count_eq_points++; + } + if (count_eq_points == 4) { + ok = true; + break; + } + } + EXPECT_TRUE(ok); + } + } + else { + for (int i = 0; i < (int)corners.size(); i++) { + int x = config["x"][i]; + int y = config["y"][i]; + EXPECT_NEAR(x, corners[i].x, max_pixel_error); + EXPECT_NEAR(y, corners[i].y, max_pixel_error); + } + } +#ifdef HAVE_QUIRC + if (decoded_info.size() == 0ull) + return; + if (isMulti) { + size_t count_eq_info = 0; + for(int i = 0; i < int(decoded_info.size()); i++) { + for(int j = 0; j < int(decoded_info.size()); j++) { + std::string original_info = config["info"][j]; + if(original_info == decoded_info[i]) { + count_eq_info++; + break; + } + } + } + EXPECT_EQ(decoded_info.size(), count_eq_info); + } + else { + std::string original_info = config["info"]; + EXPECT_EQ(decoded_info[0], original_info); + } +#endif + return; // done + } + } + FAIL() << "Not found results for '" << name_current_image << "' image in config file:" << dataset_config << + "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data.\n"; +} + +} diff --git a/modules/objdetect/test/test_qrcode.cpp b/modules/objdetect/test/test_qrcode.cpp index 0868932eab..5e6ec6faf5 100644 --- a/modules/objdetect/test/test_qrcode.cpp +++ b/modules/objdetect/test/test_qrcode.cpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include "test_qr_utils.hpp" #include "opencv2/imgproc.hpp" namespace opencv_test { namespace { @@ -33,6 +34,8 @@ std::string qrcode_images_multiple[] = { "5_qrcodes.png", "6_qrcodes.png", "7_qrcodes.png", "8_close_qrcodes.png" }; +static std::set> disabled_samples = {{"5_qrcodes.png", "aruco_based"}}; + //#define UPDATE_QRCODE_TEST_DATA #ifdef UPDATE_QRCODE_TEST_DATA @@ -262,43 +265,7 @@ TEST_P(Objdetect_QRCode, regression) #else ASSERT_TRUE(qrcode.detect(src, corners)); #endif - - const std::string dataset_config = findDataFile(root + "dataset_config.json"); - FileStorage file_config(dataset_config, FileStorage::READ); - ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; - { - FileNode images_list = file_config["test_images"]; - size_t images_count = static_cast(images_list.size()); - ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; - - for (size_t index = 0; index < images_count; index++) - { - FileNode config = images_list[(int)index]; - std::string name_test_image = config["image_name"]; - if (name_test_image == name_current_image) - { - for (int i = 0; i < 4; i++) - { - int x = config["x"][i]; - int y = config["y"][i]; - EXPECT_NEAR(x, corners[i].x, pixels_error); - EXPECT_NEAR(y, corners[i].y, pixels_error); - } - -#ifdef HAVE_QUIRC - std::string original_info = config["info"]; - EXPECT_EQ(decoded_info, original_info); -#endif - - return; // done - } - } - std::cerr - << "Not found results for '" << name_current_image - << "' image in config file:" << dataset_config << std::endl - << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data." - << std::endl; - } + check_qr(root, name_current_image, "test_images", corners, {decoded_info}, pixels_error); } typedef testing::TestWithParam< std::string > Objdetect_QRCode_Close; @@ -329,43 +296,7 @@ TEST_P(Objdetect_QRCode_Close, regression) #else ASSERT_TRUE(qrcode.detect(barcode, corners)); #endif - - const std::string dataset_config = findDataFile(root + "dataset_config.json"); - FileStorage file_config(dataset_config, FileStorage::READ); - ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; - { - FileNode images_list = file_config["close_images"]; - size_t images_count = static_cast(images_list.size()); - ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; - - for (size_t index = 0; index < images_count; index++) - { - FileNode config = images_list[(int)index]; - std::string name_test_image = config["image_name"]; - if (name_test_image == name_current_image) - { - for (int i = 0; i < 4; i++) - { - int x = config["x"][i]; - int y = config["y"][i]; - EXPECT_NEAR(x, corners[i].x, pixels_error); - EXPECT_NEAR(y, corners[i].y, pixels_error); - } - -#ifdef HAVE_QUIRC - std::string original_info = config["info"]; - EXPECT_EQ(decoded_info, original_info); -#endif - - return; // done - } - } - std::cerr - << "Not found results for '" << name_current_image - << "' image in config file:" << dataset_config << std::endl - << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data." - << std::endl; - } + check_qr(root, name_current_image, "close_images", corners, {decoded_info}, pixels_error); } typedef testing::TestWithParam< std::string > Objdetect_QRCode_Monitor; @@ -396,43 +327,7 @@ TEST_P(Objdetect_QRCode_Monitor, regression) #else ASSERT_TRUE(qrcode.detect(barcode, corners)); #endif - - const std::string dataset_config = findDataFile(root + "dataset_config.json"); - FileStorage file_config(dataset_config, FileStorage::READ); - ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; - { - FileNode images_list = file_config["monitor_images"]; - size_t images_count = static_cast(images_list.size()); - ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; - - for (size_t index = 0; index < images_count; index++) - { - FileNode config = images_list[(int)index]; - std::string name_test_image = config["image_name"]; - if (name_test_image == name_current_image) - { - for (int i = 0; i < 4; i++) - { - int x = config["x"][i]; - int y = config["y"][i]; - EXPECT_NEAR(x, corners[i].x, pixels_error); - EXPECT_NEAR(y, corners[i].y, pixels_error); - } - -#ifdef HAVE_QUIRC - std::string original_info = config["info"]; - EXPECT_EQ(decoded_info, original_info); -#endif - - return; // done - } - } - std::cerr - << "Not found results for '" << name_current_image - << "' image in config file:" << dataset_config << std::endl - << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data." - << std::endl; - } + check_qr(root, name_current_image, "monitor_images", corners, {decoded_info}, pixels_error); } typedef testing::TestWithParam< std::string > Objdetect_QRCode_Curved; @@ -458,56 +353,26 @@ TEST_P(Objdetect_QRCode_Curved, regression) #else ASSERT_TRUE(qrcode.detect(src, corners)); #endif - - const std::string dataset_config = findDataFile(root + "dataset_config.json"); - FileStorage file_config(dataset_config, FileStorage::READ); - ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; - { - FileNode images_list = file_config["test_images"]; - size_t images_count = static_cast(images_list.size()); - ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; - - for (size_t index = 0; index < images_count; index++) - { - FileNode config = images_list[(int)index]; - std::string name_test_image = config["image_name"]; - if (name_test_image == name_current_image) - { - for (int i = 0; i < 4; i++) - { - int x = config["x"][i]; - int y = config["y"][i]; - EXPECT_NEAR(x, corners[i].x, pixels_error); - EXPECT_NEAR(y, corners[i].y, pixels_error); - } - -#ifdef HAVE_QUIRC - std::string original_info = config["info"]; - EXPECT_EQ(decoded_info, original_info); -#endif - - return; // done - } - } - std::cerr - << "Not found results for '" << name_current_image - << "' image in config file:" << dataset_config << std::endl - << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data." - << std::endl; - } + check_qr(root, name_current_image, "test_images", corners, {decoded_info}, pixels_error); } -typedef testing::TestWithParam < std::string > Objdetect_QRCode_Multi; +typedef testing::TestWithParam> Objdetect_QRCode_Multi; TEST_P(Objdetect_QRCode_Multi, regression) { - const std::string name_current_image = GetParam(); + const std::string name_current_image = get<0>(GetParam()); const std::string root = "qrcode/multiple/"; + const std::string method = get<1>(GetParam()); const int pixels_error = 4; std::string image_path = findDataFile(root + name_current_image); Mat src = imread(image_path); ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; - QRCodeDetector qrcode; + if (disabled_samples.find({name_current_image, method}) != disabled_samples.end()) + throw SkipTestException(name_current_image + " is disabled sample for method " + method); + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (method == "aruco_based") { + qrcode = QRCodeDetectorAruco(); + } std::vector corners; #ifdef HAVE_QUIRC std::vector decoded_info; @@ -521,75 +386,15 @@ TEST_P(Objdetect_QRCode_Multi, regression) #else ASSERT_TRUE(qrcode.detectMulti(src, corners)); #endif - - const std::string dataset_config = findDataFile(root + "dataset_config.json"); - FileStorage file_config(dataset_config, FileStorage::READ); - ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config; - { - FileNode images_list = file_config["multiple_images"]; - size_t images_count = static_cast(images_list.size()); - ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config; - for (size_t index = 0; index < images_count; index++) - { - FileNode config = images_list[(int)index]; - std::string name_test_image = config["image_name"]; - if (name_test_image == name_current_image) - { - for(int j = 0; j < int(corners.size()); j += 4) - { - bool ok = false; - for (int k = 0; k < int(corners.size() / 4); k++) - { - int count_eq_points = 0; - for (int i = 0; i < 4; i++) - { - int x = config["x"][k][i]; - int y = config["y"][k][i]; - if(((abs(corners[j + i].x - x)) <= pixels_error) && ((abs(corners[j + i].y - y)) <= pixels_error)) - count_eq_points++; - } - if (count_eq_points == 4) - { - ok = true; - break; - } - } - EXPECT_TRUE(ok); - } - -#ifdef HAVE_QUIRC - size_t count_eq_info = 0; - for(int i = 0; i < int(decoded_info.size()); i++) - { - for(int j = 0; j < int(decoded_info.size()); j++) - { - std::string original_info = config["info"][j]; - if(original_info == decoded_info[i]) - { - count_eq_info++; - break; - } - } - } - EXPECT_EQ(decoded_info.size(), count_eq_info); -#endif - - return; // done - } - } - std::cerr - << "Not found results for '" << name_current_image - << "' image in config file:" << dataset_config << std::endl - << "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data." - << std::endl; - } + check_qr(root, name_current_image, "multiple_images", corners, decoded_info, pixels_error, true); } INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name)); INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Close, testing::ValuesIn(qrcode_images_close)); INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Monitor, testing::ValuesIn(qrcode_images_monitor)); INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Curved, testing::ValuesIn(qrcode_images_curved)); -INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Multi, testing::ValuesIn(qrcode_images_multiple)); +INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Multi, testing::Combine(testing::ValuesIn(qrcode_images_multiple), + testing::Values("contours_based", "aruco_based"))); TEST(Objdetect_QRCode_decodeMulti, decode_regression_16491) { @@ -611,8 +416,10 @@ TEST(Objdetect_QRCode_decodeMulti, decode_regression_16491) #endif } -TEST(Objdetect_QRCode_detectMulti, detect_regression_16961) +typedef testing::TestWithParam Objdetect_QRCode_detectMulti; +TEST_P(Objdetect_QRCode_detectMulti, detect_regression_16961) { + const std::string method = GetParam(); const std::string name_current_image = "9_qrcodes.jpg"; const std::string root = "qrcode/multiple/"; @@ -620,7 +427,10 @@ TEST(Objdetect_QRCode_detectMulti, detect_regression_16961) Mat src = imread(image_path); ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; - QRCodeDetector qrcode; + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (method == "aruco_based") { + qrcode = QRCodeDetectorAruco(); + } std::vector corners; EXPECT_TRUE(qrcode.detectMulti(src, corners)); ASSERT_FALSE(corners.empty()); @@ -628,21 +438,27 @@ TEST(Objdetect_QRCode_detectMulti, detect_regression_16961) EXPECT_EQ(corners.size(), expect_corners_size); } -TEST(Objdetect_QRCode_decodeMulti, check_output_parameters_type_19363) +INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_detectMulti, testing::Values("contours_based", "aruco_based")); +typedef testing::TestWithParam Objdetect_QRCode_detectAndDecodeMulti; +TEST_P(Objdetect_QRCode_detectAndDecodeMulti, check_output_parameters_type_19363) { const std::string name_current_image = "9_qrcodes.jpg"; const std::string root = "qrcode/multiple/"; + const std::string method = GetParam(); std::string image_path = findDataFile(root + name_current_image); Mat src = imread(image_path); ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; #ifdef HAVE_QUIRC - QRCodeDetector qrcode; + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (method == "aruco_based") { + qrcode = QRCodeDetectorAruco(); + } std::vector corners; std::vector decoded_info; #if 0 // FIXIT: OutputArray::create() type check std::vector straight_barcode_nchannels; - EXPECT_ANY_THROW(qrcode.detectAndDecodeMulti(src, decoded_info, corners, straight_barcode_nchannels)); + EXPECT_ANY_THROW(qrcode->detectAndDecodeMulti(src, decoded_info, corners, straight_barcode_nchannels)); #endif int expected_barcode_type = CV_8UC1; @@ -653,6 +469,8 @@ TEST(Objdetect_QRCode_decodeMulti, check_output_parameters_type_19363) EXPECT_EQ(expected_barcode_type, straight_barcode[i].type()); #endif } +INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_detectAndDecodeMulti, testing::Values("contours_based", "aruco_based")); + TEST(Objdetect_QRCode_detect, detect_regression_20882) { @@ -793,14 +611,18 @@ TEST(Objdetect_QRCode_decode, decode_regression_version_25) #endif } -TEST(Objdetect_QRCode_decodeMulti, decode_9_qrcodes_version7) +TEST_P(Objdetect_QRCode_detectAndDecodeMulti, decode_9_qrcodes_version7) { const std::string name_current_image = "9_qrcodes_version7.jpg"; const std::string root = "qrcode/multiple/"; std::string image_path = findDataFile(root + name_current_image); Mat src = imread(image_path); - QRCodeDetector qrcode; + const std::string method = GetParam(); + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (method == "aruco_based") { + qrcode = QRCodeDetectorAruco(); + } std::vector corners; std::vector decoded_info; diff --git a/modules/python/package/setup.py b/modules/python/package/setup.py index f7e562b0f0..631c0c769c 100644 --- a/modules/python/package/setup.py +++ b/modules/python/package/setup.py @@ -46,33 +46,31 @@ def main(): maintainer="OpenCV Team", install_requires="numpy", classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Information Technology', - 'Intended Audience :: Science/Research', - 'License :: Apache 2.0 License', - 'Operating System :: MacOS', - 'Operating System :: Microsoft :: Windows', - 'Operating System :: POSIX', - 'Operating System :: Unix', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: C++', - 'Programming Language :: Python :: Implementation :: CPython', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Image Recognition', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Operating System :: Unix", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: C++", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Image Recognition", + "Topic :: Software Development", ], ) diff --git a/modules/python/src2/cv2_convert.cpp b/modules/python/src2/cv2_convert.cpp index f2be05d92d..f03a2e2d86 100644 --- a/modules/python/src2/cv2_convert.cpp +++ b/modules/python/src2/cv2_convert.cpp @@ -327,7 +327,7 @@ bool pyopencv_to(PyObject *o, Scalar& s, const ArgInfo& info) } } else { if (PyFloat_Check(o) || PyInt_Check(o)) { - s[0] = PyFloat_AsDouble(o); + s = PyFloat_AsDouble(o); } else { failmsg("Scalar value for argument '%s' is not numeric", info.name); return false; diff --git a/modules/python/src2/cv2_convert.hpp b/modules/python/src2/cv2_convert.hpp index eae20b2c98..43ef7b2302 100644 --- a/modules/python/src2/cv2_convert.hpp +++ b/modules/python/src2/cv2_convert.hpp @@ -6,6 +6,8 @@ #include "cv2_numpy.hpp" #include #include +#include +#include #include // std::enable_if extern PyTypeObject* pyopencv_Mat_TypePtr; @@ -263,6 +265,43 @@ PyObject* pyopencv_from(const std::vector& value) return pyopencvVecConverter::from(value); } +template +bool pyopencv_to(PyObject *obj, std::map &map, const ArgInfo& info) +{ + if (!obj || obj == Py_None) + { + return true; + } + + PyObject* py_key = nullptr; + PyObject* py_value = nullptr; + Py_ssize_t pos = 0; + + if (!PyDict_Check(obj)) { + failmsg("Can't parse '%s'. Input argument isn't dict or" + " an instance of subtype of the dict type", info.name); + return false; + } + + while(PyDict_Next(obj, &pos, &py_key, &py_value)) + { + K cpp_key; + if (!pyopencv_to(py_key, cpp_key, ArgInfo("key", false))) { + failmsg("Can't parse dict key. Key on position %lu has a wrong type", pos); + return false; + } + + V cpp_value; + if (!pyopencv_to(py_value, cpp_value, ArgInfo("value", false))) { + failmsg("Can't parse dict value. Value on position %lu has a wrong type", pos); + return false; + } + + map.emplace(cpp_key, cpp_value); + } + return true; +} + template static bool pyopencv_to_generic_vec(PyObject* obj, std::vector& value, const ArgInfo& info) { diff --git a/modules/python/src2/typing_stubs_generation/generation.py b/modules/python/src2/typing_stubs_generation/generation.py index 018c55414a..4330683774 100644 --- a/modules/python/src2/typing_stubs_generation/generation.py +++ b/modules/python/src2/typing_stubs_generation/generation.py @@ -611,9 +611,7 @@ def _generate_typing_module(root: NamespaceNode, output_path: Path) -> None: """ def register_alias_links_from_aggregated_type(type_node: TypeNode) -> None: assert isinstance(type_node, AggregatedTypeNode), \ - "Provided type node '{}' is not an aggregated type".format( - type_node.ctype_name - ) + f"Provided type node '{type_node.ctype_name}' is not an aggregated type" for item in filter(lambda i: isinstance(i, AliasRefTypeNode), type_node): register_alias(PREDEFINED_TYPES[item.ctype_name]) # type: ignore @@ -631,8 +629,8 @@ def _generate_typing_module(root: NamespaceNode, output_path: Path) -> None: aliases[typename] = alias_node.value.full_typename.replace( root.export_name + ".typing.", "" ) - if alias_node.comment is not None: - aliases[typename] += " # " + alias_node.comment + if alias_node.doc is not None: + aliases[typename] += f'\n"""{alias_node.doc}"""' for required_import in alias_node.required_definition_imports: required_imports.add(required_import) @@ -643,12 +641,18 @@ def _generate_typing_module(root: NamespaceNode, output_path: Path) -> None: aliases: Dict[str, str] = {} # Resolve each node and register aliases + TypeNode.compatible_to_runtime_usage = True for node in PREDEFINED_TYPES.values(): node.resolve(root) if isinstance(node, AliasTypeNode): register_alias(node) output_stream = StringIO() + output_stream.write("__all__ = [\n") + for alias_name in aliases: + output_stream.write(f' "{alias_name}",\n') + output_stream.write("]\n\n") + _write_required_imports(required_imports, output_stream) for alias_name, alias_type in aliases.items(): @@ -657,7 +661,8 @@ def _generate_typing_module(root: NamespaceNode, output_path: Path) -> None: output_stream.write(alias_type) output_stream.write("\n") - (output_path / "__init__.pyi").write_text(output_stream.getvalue()) + TypeNode.compatible_to_runtime_usage = False + (output_path / "__init__.py").write_text(output_stream.getvalue()) StubGenerator = Callable[[ASTNode, StringIO, int], None] diff --git a/modules/python/src2/typing_stubs_generation/nodes/type_node.py b/modules/python/src2/typing_stubs_generation/nodes/type_node.py index 3a7b47ee79..a21f983b20 100644 --- a/modules/python/src2/typing_stubs_generation/nodes/type_node.py +++ b/modules/python/src2/typing_stubs_generation/nodes/type_node.py @@ -19,6 +19,17 @@ class TypeNode(abc.ABC): e.g. `cv::Rect`. - There is no information about types visibility (see `ASTNodeTypeNode`). """ + compatible_to_runtime_usage = False + """Class-wide property that switches exported type names for several nodes. + Example: + >>> node = OptionalTypeNode(ASTNodeTypeNode("Size")) + >>> node.typename # TypeNode.compatible_to_runtime_usage == False + "Size | None" + >>> TypeNode.compatible_to_runtime_usage = True + >>> node.typename + "typing.Optional[Size]" + """ + def __init__(self, ctype_name: str) -> None: self.ctype_name = ctype_name @@ -247,11 +258,11 @@ class AliasTypeNode(TypeNode): """ def __init__(self, ctype_name: str, value: TypeNode, export_name: Optional[str] = None, - comment: Optional[str] = None) -> None: + doc: Optional[str] = None) -> None: super().__init__(ctype_name) self.value = value self._export_name = export_name - self.comment = comment + self.doc = doc @property def typename(self) -> str: @@ -287,82 +298,82 @@ class AliasTypeNode(TypeNode): @classmethod def int_(cls, ctype_name: str, export_name: Optional[str] = None, - comment: Optional[str] = None): - return cls(ctype_name, PrimitiveTypeNode.int_(), export_name, comment) + doc: Optional[str] = None): + return cls(ctype_name, PrimitiveTypeNode.int_(), export_name, doc) @classmethod def float_(cls, ctype_name: str, export_name: Optional[str] = None, - comment: Optional[str] = None): - return cls(ctype_name, PrimitiveTypeNode.float_(), export_name, comment) + doc: Optional[str] = None): + return cls(ctype_name, PrimitiveTypeNode.float_(), export_name, doc) @classmethod def array_(cls, ctype_name: str, shape: Optional[Tuple[int, ...]], dtype: Optional[str] = None, export_name: Optional[str] = None, - comment: Optional[str] = None): - if comment is None: - comment = "Shape: " + str(shape) + doc: Optional[str] = None): + if doc is None: + doc = "Shape: " + str(shape) else: - comment += ". Shape: " + str(shape) + doc += ". Shape: " + str(shape) return cls(ctype_name, NDArrayTypeNode(ctype_name, shape, dtype), - export_name, comment) + export_name, doc) @classmethod def union_(cls, ctype_name: str, items: Tuple[TypeNode, ...], export_name: Optional[str] = None, - comment: Optional[str] = None): + doc: Optional[str] = None): return cls(ctype_name, UnionTypeNode(ctype_name, items), - export_name, comment) + export_name, doc) @classmethod def optional_(cls, ctype_name: str, item: TypeNode, export_name: Optional[str] = None, - comment: Optional[str] = None): - return cls(ctype_name, OptionalTypeNode(item), export_name, comment) + doc: Optional[str] = None): + return cls(ctype_name, OptionalTypeNode(item), export_name, doc) @classmethod def sequence_(cls, ctype_name: str, item: TypeNode, export_name: Optional[str] = None, - comment: Optional[str] = None): + doc: Optional[str] = None): return cls(ctype_name, SequenceTypeNode(ctype_name, item), - export_name, comment) + export_name, doc) @classmethod def tuple_(cls, ctype_name: str, items: Tuple[TypeNode, ...], export_name: Optional[str] = None, - comment: Optional[str] = None): + doc: Optional[str] = None): return cls(ctype_name, TupleTypeNode(ctype_name, items), - export_name, comment) + export_name, doc) @classmethod def class_(cls, ctype_name: str, class_name: str, export_name: Optional[str] = None, - comment: Optional[str] = None): + doc: Optional[str] = None): return cls(ctype_name, ASTNodeTypeNode(class_name), - export_name, comment) + export_name, doc) @classmethod def callable_(cls, ctype_name: str, arg_types: Union[TypeNode, Sequence[TypeNode]], ret_type: TypeNode = NoneTypeNode("void"), export_name: Optional[str] = None, - comment: Optional[str] = None): + doc: Optional[str] = None): return cls(ctype_name, CallableTypeNode(ctype_name, arg_types, ret_type), - export_name, comment) + export_name, doc) @classmethod def ref_(cls, ctype_name: str, alias_ctype_name: str, alias_export_name: Optional[str] = None, - export_name: Optional[str] = None, comment: Optional[str] = None): + export_name: Optional[str] = None, doc: Optional[str] = None): return cls(ctype_name, AliasRefTypeNode(alias_ctype_name, alias_export_name), - export_name, comment) + export_name, doc) @classmethod def dict_(cls, ctype_name: str, key_type: TypeNode, value_type: TypeNode, - export_name: Optional[str] = None, comment: Optional[str] = None): + export_name: Optional[str] = None, doc: Optional[str] = None): return cls(ctype_name, DictTypeNode(ctype_name, key_type, value_type), - export_name, comment) + export_name, doc) class NDArrayTypeNode(TypeNode): @@ -543,6 +554,16 @@ class ContainerTypeNode(AggregatedTypeNode): item.relative_typename(module) for item in self )) + @property + def required_definition_imports(self) -> Generator[str, None, None]: + yield "import typing" + return super().required_definition_imports + + @property + def required_usage_imports(self) -> Generator[str, None, None]: + if TypeNode.compatible_to_runtime_usage: + yield "import typing" + return super().required_usage_imports @abc.abstractproperty def type_format(self) -> str: pass @@ -560,30 +581,22 @@ class SequenceTypeNode(ContainerTypeNode): super().__init__(ctype_name, (item, )) @property - def type_format(self): + def type_format(self) -> str: return "typing.Sequence[{}]" @property - def types_separator(self): + def types_separator(self) -> str: return ", " - @property - def required_definition_imports(self) -> Generator[str, None, None]: - yield "import typing" - yield from super().required_definition_imports - - @property - def required_usage_imports(self) -> Generator[str, None, None]: - yield "import typing" - yield from super().required_usage_imports - class TupleTypeNode(ContainerTypeNode): - """Type node representing possibly heterogenous collection of types with + """Type node representing possibly heterogeneous collection of types with possibly unspecified length. """ @property - def type_format(self): + def type_format(self) -> str: + if TypeNode.compatible_to_runtime_usage: + return "typing.Tuple[{}]" return "tuple[{}]" @property @@ -595,20 +608,34 @@ class UnionTypeNode(ContainerTypeNode): """Type node representing type that can be one of the predefined set of types. """ @property - def type_format(self): + def type_format(self) -> str: + if TypeNode.compatible_to_runtime_usage: + return "typing.Union[{}]" return "{}" @property - def types_separator(self): + def types_separator(self) -> str: + if TypeNode.compatible_to_runtime_usage: + return ", " return " | " -class OptionalTypeNode(UnionTypeNode): +class OptionalTypeNode(ContainerTypeNode): """Type node representing optional type which is effectively is a union of value type node and None. """ def __init__(self, value: TypeNode) -> None: - super().__init__(value.ctype_name, (value, NoneTypeNode(value.ctype_name))) + super().__init__(value.ctype_name, (value,)) + + @property + def type_format(self) -> str: + if TypeNode.compatible_to_runtime_usage: + return "typing.Optional[{}]" + return "{} | None" + + @property + def types_separator(self) -> str: + return ", " class DictTypeNode(ContainerTypeNode): @@ -627,11 +654,13 @@ class DictTypeNode(ContainerTypeNode): return self.items[1] @property - def type_format(self): + def type_format(self) -> str: + if TypeNode.compatible_to_runtime_usage: + return "typing.Dict[{}]" return "dict[{}]" @property - def types_separator(self): + def types_separator(self) -> str: return ", " diff --git a/modules/python/src2/typing_stubs_generation/predefined_types.py b/modules/python/src2/typing_stubs_generation/predefined_types.py index 36f0eac9eb..842ab3be6e 100644 --- a/modules/python/src2/typing_stubs_generation/predefined_types.py +++ b/modules/python/src2/typing_stubs_generation/predefined_types.py @@ -40,65 +40,65 @@ _PREDEFINED_TYPES = ( ), AliasTypeNode.sequence_("MatShape", PrimitiveTypeNode.int_()), AliasTypeNode.sequence_("Size", PrimitiveTypeNode.int_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Size2f", PrimitiveTypeNode.float_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Scalar", PrimitiveTypeNode.float_(), - comment="Required length is at most 4"), + doc="Required length is at most 4"), AliasTypeNode.sequence_("Point", PrimitiveTypeNode.int_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.ref_("Point2i", "Point"), AliasTypeNode.sequence_("Point2f", PrimitiveTypeNode.float_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Point2d", PrimitiveTypeNode.float_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Point3i", PrimitiveTypeNode.int_(), - comment="Required length is 3"), + doc="Required length is 3"), AliasTypeNode.sequence_("Point3f", PrimitiveTypeNode.float_(), - comment="Required length is 3"), + doc="Required length is 3"), AliasTypeNode.sequence_("Point3d", PrimitiveTypeNode.float_(), - comment="Required length is 3"), + doc="Required length is 3"), AliasTypeNode.sequence_("Range", PrimitiveTypeNode.int_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Rect", PrimitiveTypeNode.int_(), - comment="Required length is 4"), + doc="Required length is 4"), AliasTypeNode.sequence_("Rect2i", PrimitiveTypeNode.int_(), - comment="Required length is 4"), + doc="Required length is 4"), AliasTypeNode.sequence_("Rect2d", PrimitiveTypeNode.float_(), - comment="Required length is 4"), + doc="Required length is 4"), AliasTypeNode.dict_("Moments", PrimitiveTypeNode.str_("Moments::key"), PrimitiveTypeNode.float_("Moments::value")), AliasTypeNode.tuple_("RotatedRect", items=(AliasRefTypeNode("Point2f"), AliasRefTypeNode("Size"), PrimitiveTypeNode.float_()), - comment="Any type providing sequence protocol is supported"), + doc="Any type providing sequence protocol is supported"), AliasTypeNode.tuple_("TermCriteria", items=( ASTNodeTypeNode("TermCriteria.Type"), PrimitiveTypeNode.int_(), PrimitiveTypeNode.float_()), - comment="Any type providing sequence protocol is supported"), + doc="Any type providing sequence protocol is supported"), AliasTypeNode.sequence_("Vec2i", PrimitiveTypeNode.int_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Vec2f", PrimitiveTypeNode.float_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Vec2d", PrimitiveTypeNode.float_(), - comment="Required length is 2"), + doc="Required length is 2"), AliasTypeNode.sequence_("Vec3i", PrimitiveTypeNode.int_(), - comment="Required length is 3"), + doc="Required length is 3"), AliasTypeNode.sequence_("Vec3f", PrimitiveTypeNode.float_(), - comment="Required length is 3"), + doc="Required length is 3"), AliasTypeNode.sequence_("Vec3d", PrimitiveTypeNode.float_(), - comment="Required length is 3"), + doc="Required length is 3"), AliasTypeNode.sequence_("Vec4i", PrimitiveTypeNode.int_(), - comment="Required length is 4"), + doc="Required length is 4"), AliasTypeNode.sequence_("Vec4f", PrimitiveTypeNode.float_(), - comment="Required length is 4"), + doc="Required length is 4"), AliasTypeNode.sequence_("Vec4d", PrimitiveTypeNode.float_(), - comment="Required length is 4"), + doc="Required length is 4"), AliasTypeNode.sequence_("Vec6f", PrimitiveTypeNode.float_(), - comment="Required length is 6"), + doc="Required length is 6"), AliasTypeNode.class_("FeatureDetector", "Feature2D", export_name="FeatureDetector"), AliasTypeNode.class_("DescriptorExtractor", "Feature2D", @@ -190,6 +190,18 @@ _PREDEFINED_TYPES = ( PrimitiveTypeNode.float_(), PrimitiveTypeNode.str_()) ), export_name="SearchParams"), + AliasTypeNode.dict_("map_string_and_string", PrimitiveTypeNode.str_("map_string_and_string::key"), + PrimitiveTypeNode.str_("map_string_and_string::value")), + AliasTypeNode.dict_("map_string_and_int", PrimitiveTypeNode.str_("map_string_and_int::key"), + PrimitiveTypeNode.int_("map_string_and_int::value")), + AliasTypeNode.dict_("map_string_and_vector_size_t", PrimitiveTypeNode.str_("map_string_and_vector_size_t::key"), + SequenceTypeNode("map_string_and_vector_size_t::value", PrimitiveTypeNode.int_("size_t"))), + AliasTypeNode.dict_("map_string_and_vector_float", PrimitiveTypeNode.str_("map_string_and_vector_float::key"), + SequenceTypeNode("map_string_and_vector_float::value", PrimitiveTypeNode.float_())), + AliasTypeNode.dict_("map_int_and_double", PrimitiveTypeNode.int_("map_int_and_double::key"), + PrimitiveTypeNode.float_("map_int_and_double::value")), ) -PREDEFINED_TYPES = dict(zip((t.ctype_name for t in _PREDEFINED_TYPES), _PREDEFINED_TYPES)) +PREDEFINED_TYPES = dict( + zip((t.ctype_name for t in _PREDEFINED_TYPES), _PREDEFINED_TYPES) +) diff --git a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp index 95919ea009..c03aa52090 100644 --- a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp @@ -353,8 +353,8 @@ void CV_EXPORTS_W waveCorrect(CV_IN_OUT std::vector &rmats, WaveCorrectKind // Auxiliary functions // Returns matches graph representation in DOT language -String CV_EXPORTS_W matchesGraphAsString(std::vector &pathes, std::vector &pairwise_matches, - float conf_threshold); +String CV_EXPORTS_W matchesGraphAsString(std::vector &paths, std::vector &pairwise_matches, + float conf_threshold); CV_EXPORTS_W std::vector leaveBiggestComponent( std::vector &features, diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp index 111a6e13ae..33a86ee360 100644 --- a/modules/stitching/src/blenders.cpp +++ b/modules/stitching/src/blenders.cpp @@ -70,7 +70,7 @@ Ptr Blender::createDefault(int type, bool try_gpu) if (type == NO) return makePtr(); if (type == FEATHER) - return makePtr(try_gpu); + return makePtr(); if (type == MULTI_BAND) return makePtr(try_gpu); CV_Error(Error::StsBadArg, "unsupported blending method"); diff --git a/modules/stitching/src/motion_estimators.cpp b/modules/stitching/src/motion_estimators.cpp index 394aeb8961..be4a5fb4ce 100644 --- a/modules/stitching/src/motion_estimators.cpp +++ b/modules/stitching/src/motion_estimators.cpp @@ -1003,13 +1003,13 @@ void waveCorrect(std::vector &rmats, WaveCorrectKind kind) ////////////////////////////////////////////////////////////////////////////// -String matchesGraphAsString(std::vector &pathes, std::vector &pairwise_matches, - float conf_threshold) +String matchesGraphAsString(std::vector &paths, std::vector &pairwise_matches, + float conf_threshold) { std::stringstream str; str << "graph matches_graph{\n"; - const int num_images = static_cast(pathes.size()); + const int num_images = static_cast(paths.size()); std::set > span_tree_edges; DisjointSets comps(num_images); @@ -1035,12 +1035,12 @@ String matchesGraphAsString(std::vector &pathes, std::vector edge = *itr; if (span_tree_edges.find(edge) != span_tree_edges.end()) { - String name_src = pathes[edge.first]; + String name_src = paths[edge.first]; size_t prefix_len = name_src.find_last_of("/\\"); if (prefix_len != String::npos) prefix_len++; else prefix_len = 0; name_src = name_src.substr(prefix_len, name_src.size() - prefix_len); - String name_dst = pathes[edge.second]; + String name_dst = paths[edge.second]; prefix_len = name_dst.find_last_of("/\\"); if (prefix_len != String::npos) prefix_len++; else prefix_len = 0; name_dst = name_dst.substr(prefix_len, name_dst.size() - prefix_len); @@ -1057,7 +1057,7 @@ String matchesGraphAsString(std::vector &pathes, std::vector +#include "opencv2/objdetect.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/highgui.hpp" + +using namespace cv; +using namespace std; + +static const Scalar greenColor(0, 255, 0); +static const Scalar redColor(0, 0, 255); +static const Scalar yellowColor(0, 255, 255); +static Scalar randColor() +{ + RNG &rng = theRNG(); + return Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)); +} + +//============================================================================== + +struct TheApp +{ + Ptr bardet; + //! [output] + vector corners; + vector decode_info; + vector decode_type; + //! [output] + bool detectOnly; + + void cleanup() + { + corners.clear(); + decode_info.clear(); + decode_type.clear(); + } + + inline string modeString() const + { + return detectOnly ? "" : ""; + } + + void drawResults(Mat &frame) const + { + //! [visualize] + for (size_t i = 0; i < corners.size(); i += 4) + { + const size_t idx = i / 4; + const bool isDecodable = idx < decode_info.size() + && idx < decode_type.size() + && !decode_type[idx].empty(); + const Scalar lineColor = isDecodable ? greenColor : redColor; + // draw barcode rectangle + vector contour(corners.begin() + i, corners.begin() + i + 4); + const vector< vector > contours {contour}; + drawContours(frame, contours, 0, lineColor, 1); + // draw vertices + for (size_t j = 0; j < 4; j++) + circle(frame, contour[j], 2, randColor(), -1); + // write decoded text + if (isDecodable) + { + ostringstream buf; + buf << "[" << decode_type[idx] << "] " << decode_info[idx]; + putText(frame, buf.str(), contour[1], FONT_HERSHEY_COMPLEX, 0.8, yellowColor, 1); + } + } + //! [visualize] + } + + void drawFPS(Mat &frame, double fps) const + { + ostringstream buf; + buf << modeString() + << " (" << corners.size() / 4 << "/" << decode_type.size() << "/" << decode_info.size() << ") " + << cv::format("%.2f", fps) << " FPS "; + putText(frame, buf.str(), Point(25, 25), FONT_HERSHEY_COMPLEX, 0.8, redColor, 2); + } + + inline void call_decode(Mat &frame) + { + cleanup(); + if (detectOnly) + { + //! [detect] + bardet->detectMulti(frame, corners); + //! [detect] + } + else + { + //! [detectAndDecode] + bardet->detectAndDecodeWithType(frame, decode_info, decode_type, corners); + //! [detectAndDecode] + } + } + + int liveBarCodeDetect() + { + VideoCapture cap(0); + if (!cap.isOpened()) + { + cout << "Cannot open a camera" << endl; + return 2; + } + Mat frame; + Mat result; + cap >> frame; + cout << "Image size: " << frame.size() << endl; + cout << "Press 'd' to switch between and modes" << endl; + cout << "Press 'ESC' to exit" << endl; + for (;;) + { + cap >> frame; + if (frame.empty()) + { + cout << "End of video stream" << endl; + break; + } + if (frame.channels() == 1) + cvtColor(frame, frame, COLOR_GRAY2BGR); + TickMeter timer; + timer.start(); + call_decode(frame); + timer.stop(); + drawResults(frame); + drawFPS(frame, timer.getFPS()); + imshow("barcode", frame); + const char c = (char)waitKey(1); + if (c == 'd') + { + detectOnly = !detectOnly; + cout << "Mode switched to " << modeString() << endl; + } + else if (c == 27) + { + cout << "'ESC' is pressed. Exiting..." << endl; + break; + } + } + return 0; + } + + int imageBarCodeDetect(const string &in_file, const string &out_file) + { + Mat frame = imread(in_file, IMREAD_COLOR); + cout << "Image size: " << frame.size() << endl; + cout << "Mode is " << modeString() << endl; + const int count_experiments = 100; + TickMeter timer; + for (size_t i = 0; i < count_experiments; i++) + { + timer.start(); + call_decode(frame); + timer.stop(); + } + cout << "FPS: " << timer.getFPS() << endl; + drawResults(frame); + if (!out_file.empty()) + { + cout << "Saving result: " << out_file << endl; + imwrite(out_file, frame); + } + imshow("barcode", frame); + cout << "Press any key to exit ..." << endl; + waitKey(0); + return 0; + } +}; + + +//============================================================================== + +int main(int argc, char **argv) +{ + const string keys = "{h help ? | | print help messages }" + "{i in | | input image path (also switches to image detection mode) }" + "{detect | false | detect 1D barcode only (skip decoding) }" + "{o out | | path to result file (only for single image decode) }" + "{sr_prototxt| | super resolution prototxt path }" + "{sr_model | | super resolution model path }"; + CommandLineParser cmd_parser(argc, argv, keys); + cmd_parser.about("This program detects the 1D barcodes from camera or images using the OpenCV library."); + if (cmd_parser.has("help")) + { + cmd_parser.printMessage(); + return 0; + } + const string in_file = cmd_parser.get("in"); + const string out_file = cmd_parser.get("out"); + const string sr_prototxt = cmd_parser.get("sr_prototxt"); + const string sr_model = cmd_parser.get("sr_model"); + if (!cmd_parser.check()) + { + cmd_parser.printErrors(); + return -1; + } + + TheApp app; + app.detectOnly = cmd_parser.has("detect") && cmd_parser.get("detect"); + //! [initialize] + try + { + app.bardet = makePtr(sr_prototxt, sr_model); + } + catch (const std::exception& e) + { + cout << + "\n---------------------------------------------------------------\n" + "Failed to initialize super resolution.\n" + "Please, download 'sr.*' from\n" + "https://github.com/WeChatCV/opencv_3rdparty/tree/wechat_qrcode\n" + "and put them into the current directory.\n" + "Or you can leave sr_prototxt and sr_model unspecified.\n" + "---------------------------------------------------------------\n"; + cout << e.what() << endl; + return -1; + } + //! [initialize] + + if (in_file.empty()) + return app.liveBarCodeDetect(); + else + return app.imageBarCodeDetect(in_file, out_file); +} diff --git a/samples/cpp/qrcode.cpp b/samples/cpp/qrcode.cpp index af332d307c..6b914f78ef 100644 --- a/samples/cpp/qrcode.cpp +++ b/samples/cpp/qrcode.cpp @@ -12,6 +12,7 @@ using namespace cv; static int liveQRCodeDetect(); static int imageQRCodeDetect(const string& in_file); +static bool g_useArucoBased = false; static bool g_modeMultiQR = false; static bool g_detectOnly = false; @@ -35,6 +36,7 @@ int main(int argc, char *argv[]) const string keys = "{h help ? | | print help messages }" "{i in | | input image path (also switches to image detection mode) }" + "{aruco_based | false | use Aruco-based QR code detector instead of contour-based }" "{detect | false | detect QR code only (skip decoding) }" "{m multi | | use detect for multiple qr-codes }" "{o out | qr_code.png | path to result file }" @@ -75,6 +77,7 @@ int main(int argc, char *argv[]) g_modeMultiQR = cmd_parser.has("multi") && cmd_parser.get("multi"); g_detectOnly = cmd_parser.has("detect") && cmd_parser.get("detect"); + g_useArucoBased = cmd_parser.has("aruco_based") && cmd_parser.get("aruco_based"); g_saveDetections = cmd_parser.has("save_detections") && cmd_parser.get("save_detections"); g_saveAll = cmd_parser.has("save_all") && cmd_parser.get("save_all"); @@ -157,7 +160,7 @@ void drawQRCodeResults(Mat& frame, const vector& corners, const vector& corners, vector& decode_info // +global: bool g_modeMultiQR, bool g_detectOnly ) @@ -191,7 +194,7 @@ void runQR( } static -double processQRCodeDetection(QRCodeDetector& qrcode, const Mat& input, Mat& result, vector& corners) +double processQRCodeDetection(const GraphicalCodeDetector& qrcode, const Mat& input, Mat& result, vector& corners) { if (input.channels() == 1) cvtColor(input, result, COLOR_GRAY2BGR); @@ -229,7 +232,9 @@ int liveQRCodeDetect() cout << "Press 'd' to switch between decoder and detector" << endl; cout << "Press ' ' (space) to save result into images" << endl; cout << "Press 'ESC' to exit" << endl; - QRCodeDetector qrcode; + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (g_useArucoBased) + qrcode = QRCodeDetectorAruco(); for (;;) { @@ -310,7 +315,10 @@ int imageQRCodeDetect(const string& in_file) << " on image: " << input.size() << " (" << typeToString(input.type()) << ")" << endl; - QRCodeDetector qrcode; + GraphicalCodeDetector qrcode = QRCodeDetector(); + if (g_useArucoBased) + qrcode = QRCodeDetectorAruco(); + vector corners; vector decode_info; diff --git a/samples/python/qrcode.py b/samples/python/qrcode.py index 21b1a59073..73c6cd3bd2 100644 --- a/samples/python/qrcode.py +++ b/samples/python/qrcode.py @@ -33,6 +33,7 @@ class QrSample: self.multi = args.multi self.saveDetections = args.save_detections self.saveAll = args.save_all + self.arucoBased = args.aruco_based def getQRModeString(self): msg1 = "multi " if self.multi else "" @@ -104,7 +105,12 @@ class QrSample: return print('Run {:s} on image [{:d}x{:d}]'.format( self.getQRModeString(), inputimg.shape[1], inputimg.shape[0])) - qrCode = cv.QRCodeDetector() + + if self.arucoBased: + qrCode = cv.QRCodeDetectorAruco() + else: + qrCode = cv.QRCodeDetector() + count = 10 timer = cv.TickMeter() for _ in range(count): @@ -152,7 +158,10 @@ class QrSample: print("Press ' ' (space) to save result into images") print("Press 'ESC' to exit") - qrcode = cv.QRCodeDetector() + if self.arucoBased: + qrcode = cv.QRCodeDetectorAruco() + else: + qrcode = cv.QRCodeDetector() while True: ret, frame = cap.read() @@ -204,6 +213,10 @@ def main(): help="input image path (for example, 'opencv_extra/testdata/cv/qrcode/multiple/*_qrcodes.png)", default="", metavar="") + parser.add_argument( + '--aruco_based', + help="use aruco-based detector", + action='store_true') parser.add_argument( '-d', '--detect',