diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c757d8f55..11a2ee0eb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -403,7 +403,7 @@ if(ANDROID) if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13) message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.") endif() -elseif(ANT_EXECUTABLE) +else() find_package(JNI) endif() @@ -457,15 +457,15 @@ if(BUILD_EXAMPLES OR BUILD_ANDROID_EXAMPLES OR INSTALL_PYTHON_EXAMPLES) endif() if(ANDROID) - add_subdirectory(android/service) + add_subdirectory(platforms/android/service) endif() if(BUILD_ANDROID_PACKAGE) - add_subdirectory(android/package) + add_subdirectory(platforms/android/package) endif() if (ANDROID) - add_subdirectory(android/libinfo) + add_subdirectory(platforms/android/libinfo) endif() # ---------------------------------------------------------------------------- @@ -840,7 +840,7 @@ status(" ant:" ANT_EXECUTABLE THEN "${ANT_EXECUTABLE} (ver ${A if(NOT ANDROID) status(" JNI:" JNI_INCLUDE_DIRS THEN "${JNI_INCLUDE_DIRS}" ELSE NO) endif() -status(" Java tests:" BUILD_TESTS AND (NOT ANDROID OR CAN_BUILD_ANDROID_PROJECTS) THEN YES ELSE NO) +status(" Java tests:" BUILD_TESTS AND (CAN_BUILD_ANDROID_PROJECTS OR HAVE_opencv_java) THEN YES ELSE NO) # ========================== documentation ========================== if(BUILD_DOCS) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 8fc54b1b8e..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,11 +0,0 @@ -We greatly appreciate your support and contributions and they are always welcomed! - -Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes: - -* Are scoped to one specific issue -* Include a test to demonstrate the correctness -* Update the docs if relevant -* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide) -* Don't messed by "oops" commits - -You can find more detailes about contributing process on http://opencv.org/contribute.html \ No newline at end of file diff --git a/README b/README index 9dd45a230b..0799dff89f 100644 --- a/README +++ b/README @@ -4,3 +4,14 @@ Homepage: http://opencv.org Online docs: http://docs.opencv.org Q&A forum: http://answers.opencv.org Dev zone: http://code.opencv.org + +Please read before starting work on a pull request: + http://code.opencv.org/projects/opencv/wiki/How_to_contribute + +Summary of guidelines: + +* One pull request per issue; +* Choose the right base branch; +* Include tests and documentation; +* Clean up "oops" commits before submitting; +* Follow the coding style guide. diff --git a/android/android.toolchain.cmake b/android/android.toolchain.cmake index 0f7e340678..df365fc2c0 100644 --- a/android/android.toolchain.cmake +++ b/android/android.toolchain.cmake @@ -1,4 +1,6 @@ -# Copyright (c) 2010-2011, Ethan Rublee +message(STATUS "Android toolchain was moved to platfroms/android!") +message(STATUS "This file is depricated and will be removed!") + # Copyright (c) 2011-2013, Andrey Kamaev # All rights reserved. # diff --git a/android/readme.txt b/android/readme.txt new file mode 100644 index 0000000000..2d5f3962fe --- /dev/null +++ b/android/readme.txt @@ -0,0 +1 @@ +All Android specific sources are moved to platforms/android. \ No newline at end of file diff --git a/android/scripts/build.cmd b/android/scripts/build.cmd deleted file mode 100644 index 3e0f1666b6..0000000000 --- a/android/scripts/build.cmd +++ /dev/null @@ -1,90 +0,0 @@ -@ECHO OFF - -:: enable command extensions -VERIFY BADVALUE 2>NUL -SETLOCAL ENABLEEXTENSIONS || (ECHO Unable to enable command extensions. & EXIT \B) - -:: build environment -SET SOURCE_DIR=%cd% -IF EXIST .\android.toolchain.cmake (SET BUILD_OPENCV=1) ELSE (SET BUILD_OPENCV=0) -IF EXIST .\jni\nul (SET BUILD_JAVA_PART=1) ELSE (SET BUILD_JAVA_PART=0) - -:: load configuration -PUSHD %~dp0 -SET SCRIPTS_DIR=%cd% -IF EXIST .\wincfg.cmd CALL .\wincfg.cmd -POPD - -:: inherit old names -IF NOT DEFINED CMAKE SET CMAKE=%CMAKE_EXE% -IF NOT DEFINED MAKE SET MAKE=%MAKE_EXE% - -:: defaults -IF NOT DEFINED BUILD_DIR SET BUILD_DIR=build -IF NOT DEFINED ANDROID_ABI SET ANDROID_ABI=armeabi-v7a -SET OPENCV_BUILD_DIR=%SCRIPTS_DIR%\..\%BUILD_DIR% - -:: check that all required variables defined -PUSHD . -IF NOT DEFINED ANDROID_NDK (ECHO. & ECHO You should set an environment variable ANDROID_NDK to the full path to your copy of Android NDK & GOTO end) -(CD "%ANDROID_NDK%") || (ECHO. & ECHO Directory "%ANDROID_NDK%" specified by ANDROID_NDK variable does not exist & GOTO end) - -IF NOT EXIST "%CMAKE%" (ECHO. & ECHO You should set an environment variable CMAKE to the full path to cmake executable & GOTO end) -IF NOT EXIST "%MAKE%" (ECHO. & ECHO You should set an environment variable MAKE to the full path to native port of make executable & GOTO end) - -IF NOT %BUILD_JAVA_PART%==1 GOTO required_variables_checked - -IF NOT DEFINED ANDROID_SDK (ECHO. & ECHO You should set an environment variable ANDROID_SDK to the full path to your copy of Android SDK & GOTO end) -(CD "%ANDROID_SDK%" 2>NUL) || (ECHO. & ECHO Directory "%ANDROID_SDK%" specified by ANDROID_SDK variable does not exist & GOTO end) - -IF NOT DEFINED ANT_DIR (ECHO. & ECHO You should set an environment variable ANT_DIR to the full path to Apache Ant root & GOTO end) -(CD "%ANT_DIR%" 2>NUL) || (ECHO. & ECHO Directory "%ANT_DIR%" specified by ANT_DIR variable does not exist & GOTO end) - -IF NOT DEFINED JAVA_HOME (ECHO. & ECHO You should set an environment variable JAVA_HOME to the full path to JDK & GOTO end) -(CD "%JAVA_HOME%" 2>NUL) || (ECHO. & ECHO Directory "%JAVA_HOME%" specified by JAVA_HOME variable does not exist & GOTO end) - -:required_variables_checked -POPD - -:: check for ninja -echo "%MAKE%"|findstr /i ninja >nul: -IF %errorlevel%==1 (SET BUILD_WITH_NINJA=0) ELSE (SET BUILD_WITH_NINJA=1) -IF %BUILD_WITH_NINJA%==1 (SET CMAKE_GENERATOR=Ninja) ELSE (SET CMAKE_GENERATOR=MinGW Makefiles) - -:: create build dir -IF DEFINED REBUILD rmdir /S /Q "%BUILD_DIR%" 2>NUL -MKDIR "%BUILD_DIR%" 2>NUL -PUSHD "%BUILD_DIR%" || (ECHO. & ECHO Directory "%BUILD_DIR%" is not found & GOTO end) - -:: run cmake -ECHO. & ECHO Runnning cmake... -ECHO ANDROID_ABI=%ANDROID_ABI% -ECHO. -IF NOT %BUILD_OPENCV%==1 GOTO other-cmake -:opencv-cmake -("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DCMAKE_TOOLCHAIN_FILE="%SOURCE_DIR%"\android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%\..") && GOTO cmakefin -ECHO. & ECHO cmake failed & GOTO end -:other-cmake -("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DOpenCV_DIR="%OPENCV_BUILD_DIR%" -DCMAKE_TOOLCHAIN_FILE="%OPENCV_BUILD_DIR%\..\android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%") && GOTO cmakefin -ECHO. & ECHO cmake failed & GOTO end -:cmakefin - -:: run make -ECHO. & ECHO Building native libs... -IF %BUILD_WITH_NINJA%==0 ("%MAKE%" -j %NUMBER_OF_PROCESSORS% VERBOSE=%VERBOSE%) || (ECHO. & ECHO make failed & GOTO end) -IF %BUILD_WITH_NINJA%==1 ("%MAKE%") || (ECHO. & ECHO ninja failed & GOTO end) - -IF NOT %BUILD_JAVA_PART%==1 GOTO end -POPD && PUSHD %SOURCE_DIR% - -:: configure java part -ECHO. & ECHO Updating Android project... -(CALL "%ANDROID_SDK%\tools\android" update project --name %PROJECT_NAME% --path .) || (ECHO. & ECHO failed to update android project & GOTO end) - -:: compile java part -ECHO. & ECHO Compiling Android project... -(CALL "%ANT_DIR%\bin\ant" debug) || (ECHO. & ECHO failed to compile android project & GOTO end) - -:end -POPD -ENDLOCAL diff --git a/android/scripts/cmake_android.cmd b/android/scripts/cmake_android.cmd deleted file mode 100644 index 212c04b47e..0000000000 --- a/android/scripts/cmake_android.cmd +++ /dev/null @@ -1,5 +0,0 @@ -@ECHO OFF - -PUSHD %~dp0.. -CALL .\scripts\build.cmd %* -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -POPD \ No newline at end of file diff --git a/android/scripts/cmake_android_armeabi.sh b/android/scripts/cmake_android_armeabi.sh deleted file mode 100755 index 9c711d8855..0000000000 --- a/android/scripts/cmake_android_armeabi.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_armeabi -cd build_armeabi - -cmake -DANDROID_ABI=armeabi -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/cmake_android_mips.sh b/android/scripts/cmake_android_mips.sh deleted file mode 100755 index 17d2ff937e..0000000000 --- a/android/scripts/cmake_android_mips.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_mips -cd build_mips - -cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/cmake_android_neon.sh b/android/scripts/cmake_android_neon.sh deleted file mode 100755 index 5e85605b56..0000000000 --- a/android/scripts/cmake_android_neon.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_neon -cd build_neon - -cmake -DANDROID_ABI="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/cmake_android_service.sh b/android/scripts/cmake_android_service.sh deleted file mode 100755 index 0dbd482520..0000000000 --- a/android/scripts/cmake_android_service.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_service -cd build_service - -cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../.. diff --git a/android/scripts/cmake_android_x86.sh b/android/scripts/cmake_android_x86.sh deleted file mode 100755 index a01df2e668..0000000000 --- a/android/scripts/cmake_android_x86.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -cd `dirname $0`/.. - -mkdir -p build_x86 -cd build_x86 - -cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/wincfg.cmd.tmpl b/android/scripts/wincfg.cmd.tmpl deleted file mode 100644 index 166a5e7b02..0000000000 --- a/android/scripts/wincfg.cmd.tmpl +++ /dev/null @@ -1,30 +0,0 @@ -:: variables required for OpenCV build :: -:: Note: all pathes should be specified without tailing slashes! -SET ANDROID_NDK=C:\full\path\to\your\copy\of\android\NDK\android-ndk-r7b -SET CMAKE_EXE=C:\full\path\to\cmake\utility\cmake.exe -SET MAKE_EXE=%ANDROID_NDK%\prebuilt\windows\bin\make.exe - -:: variables required for android-opencv build :: -SET ANDROID_SDK=C:\full\path\to\your\copy\of\android\SDK\android-sdk-windows -SET ANT_DIR=C:\full\path\to\ant\directory\apache-ant-1.8.2 -SET JAVA_HOME=C:\full\path\to\JDK\jdk1.6.0_25 - -:: configuration options :: -:::: general ARM-V7 settings -SET ANDROID_ABI=armeabi-v7a -SET BUILD_DIR=build - -:::: uncomment following lines to compile for old emulator or old device -::SET ANDROID_ABI=armeabi -::SET BUILD_DIR=build_armeabi - -:::: uncomment following lines to compile for ARM-V7 with NEON support -::SET ANDROID_ABI=armeabi-v7a with NEON -::SET BUILD_DIR=build_neon - -:::: uncomment following lines to compile for x86 -::SET ANDROID_ABI=x86 -::SET BUILD_DIR=build_x86 - -:::: other options -::SET ANDROID_NATIVE_API_LEVEL=8 &:: android-3 is enough for native part of OpenCV but android-8 is required for Java API diff --git a/android/service/doc/Makefile b/android/service/doc/Makefile deleted file mode 100644 index b8e7bba113..0000000000 --- a/android/service/doc/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - -rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenCVEngine.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenCVEngine.qhc" - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." diff --git a/apps/traincascade/boost.cpp b/apps/traincascade/boost.cpp index c5a89e45cf..29ac4bc9af 100644 --- a/apps/traincascade/boost.cpp +++ b/apps/traincascade/boost.cpp @@ -815,7 +815,7 @@ float CvCascadeBoostTrainData::getVarValue( int vi, int si ) } -struct FeatureIdxOnlyPrecalc +struct FeatureIdxOnlyPrecalc : ParallelLoopBody { FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u ) { @@ -825,11 +825,11 @@ struct FeatureIdxOnlyPrecalc idst = _buf->data.i; is_buf_16u = _is_buf_16u; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { cv::AutoBuffer valCache(sample_count); float* valCachePtr = (float*)valCache; - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) { for( int si = 0; si < sample_count; si++ ) { @@ -852,7 +852,7 @@ struct FeatureIdxOnlyPrecalc bool is_buf_16u; }; -struct FeatureValAndIdxPrecalc +struct FeatureValAndIdxPrecalc : ParallelLoopBody { FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u ) { @@ -863,9 +863,9 @@ struct FeatureValAndIdxPrecalc idst = _buf->data.i; is_buf_16u = _is_buf_16u; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) { for( int si = 0; si < sample_count; si++ ) { @@ -889,7 +889,7 @@ struct FeatureValAndIdxPrecalc bool is_buf_16u; }; -struct FeatureValOnlyPrecalc +struct FeatureValOnlyPrecalc : ParallelLoopBody { FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count ) { @@ -897,9 +897,9 @@ struct FeatureValOnlyPrecalc valCache = _valCache; sample_count = _sample_count; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) for( int si = 0; si < sample_count; si++ ) valCache->at(fi,si) = (*featureEvaluator)( fi, si ); } @@ -913,12 +913,12 @@ void CvCascadeBoostTrainData::precalculate() int minNum = MIN( numPrecalcVal, numPrecalcIdx); double proctime = -TIME( 0 ); - parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx), - FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); - parallel_for( BlockedRange(0, minNum), - FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); - parallel_for( BlockedRange(minNum, numPrecalcVal), - FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); + parallel_for_( Range(numPrecalcVal, numPrecalcIdx), + FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); + parallel_for_( Range(0, minNum), + FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); + parallel_for_( Range(minNum, numPrecalcVal), + FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl; } diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index f3d101ab21..8db667762e 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -26,6 +26,15 @@ if(CUDA_FOUND) set(HAVE_CUBLAS 1) endif() + if(${CUDA_VERSION} VERSION_LESS "5.5") + find_cuda_helper_libs(npp) + else() + find_cuda_helper_libs(nppc) + find_cuda_helper_libs(nppi) + find_cuda_helper_libs(npps) + set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY}) + endif() + if(WITH_NVCUVID) find_cuda_helper_libs(nvcuvid) set(HAVE_NVCUVID 1) @@ -136,8 +145,6 @@ if(CUDA_FOUND) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR) - find_cuda_helper_libs(npp) - macro(ocv_cuda_compile VAR) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) set(${var}_backup_in_cuda_compile_ "${${var}}") diff --git a/cmake/OpenCVGenConfig.cmake b/cmake/OpenCVGenConfig.cmake index 705ccc8df1..c99cae7883 100644 --- a/cmake/OpenCVGenConfig.cmake +++ b/cmake/OpenCVGenConfig.cmake @@ -162,7 +162,7 @@ if(UNIX) endif() if(ANDROID) - install(FILES "${OpenCV_SOURCE_DIR}/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) + install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) endif() # -------------------------------------------------------------------------------------------- diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 0f2695fc9a..70f4809d22 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -53,8 +53,8 @@ if(BUILD_DOCS AND HAVE_SPHINX) endif() endforeach() - file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/android/service/doc/*.rst") - file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/android/service/doc/*.jpg") + file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst") + file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg") list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF}) list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT}) diff --git a/doc/conf.py b/doc/conf.py index 4c7a15c891..f3f7aec58a 100755 --- a/doc/conf.py +++ b/doc/conf.py @@ -239,7 +239,7 @@ latex_documents = [ u'', 'manual'), ('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials', u'', 'manual'), - ('android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual', + ('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual', u'', 'manual'), ] diff --git a/index.rst b/index.rst index 909bf908b8..5f50b66d0f 100644 --- a/index.rst +++ b/index.rst @@ -10,7 +10,7 @@ Welcome to opencv documentation! :maxdepth: 2 modules/refman.rst - android/refman.rst + platforms/android/refman.rst doc/user_guide/user_guide.rst doc/tutorials/tutorials.rst diff --git a/modules/androidcamera/CMakeLists.txt b/modules/androidcamera/CMakeLists.txt index d54dd5d208..8ac8ced88e 100644 --- a/modules/androidcamera/CMakeLists.txt +++ b/modules/androidcamera/CMakeLists.txt @@ -6,7 +6,7 @@ set(the_description "Auxiliary module for Android native camera support") set(OPENCV_MODULE_TYPE STATIC) ocv_define_module(androidcamera INTERNAL opencv_core log dl) -ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/android/service/engine/jni/include") +ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/platforms/android/service/engine/jni/include") # Android source tree for native camera SET (ANDROID_SOURCE_TREE "ANDROID_SOURCE_TREE-NOTFOUND" CACHE PATH diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index 7b5b0d4d6d..e395b0dc55 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -117,31 +117,6 @@ namespace cv transform(points, modif_points, transformation); } - class Mutex - { - public: - Mutex() { - } - void lock() - { -#ifdef HAVE_TBB - resultsMutex.lock(); -#endif - } - - void unlock() - { -#ifdef HAVE_TBB - resultsMutex.unlock(); -#endif - } - - private: -#ifdef HAVE_TBB - tbb::mutex resultsMutex; -#endif - }; - struct CameraParameters { void init(Mat _intrinsics, Mat _distCoeffs) diff --git a/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp b/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp index 9246b0dafe..a0a673410a 100644 --- a/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp +++ b/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp @@ -120,11 +120,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - - __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_) - :unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} }; template <> struct RGB2RGB : unary_function @@ -141,8 +138,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2RGB():unary_function(){} - __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} }; } @@ -203,8 +200,8 @@ namespace cv { namespace gpu { namespace cudev return RGB2RGB5x5Converter::cvt(src); } - __device__ __forceinline__ RGB2RGB5x5():unary_function(){} - __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} }; template struct RGB2RGB5x5<4, bidx,green_bits> : unary_function @@ -214,8 +211,8 @@ namespace cv { namespace gpu { namespace cudev return RGB2RGB5x5Converter::cvt(src); } - __device__ __forceinline__ RGB2RGB5x5():unary_function(){} - __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} }; } @@ -282,8 +279,8 @@ namespace cv { namespace gpu { namespace cudev RGB5x52RGBConverter::cvt(src, dst); return dst; } - __device__ __forceinline__ RGB5x52RGB():unary_function(){} - __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} }; @@ -295,8 +292,8 @@ namespace cv { namespace gpu { namespace cudev RGB5x52RGBConverter::cvt(src, dst); return dst; } - __device__ __forceinline__ RGB5x52RGB():unary_function(){} - __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} }; } @@ -325,9 +322,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ Gray2RGB():unary_function::vec_type>(){} - __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_) - : unary_function::vec_type>(){} + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} }; template <> struct Gray2RGB : unary_function @@ -342,8 +338,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ Gray2RGB():unary_function(){} - __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} }; } @@ -384,8 +380,8 @@ namespace cv { namespace gpu { namespace cudev return Gray2RGB5x5Converter::cvt(src); } - __device__ __forceinline__ Gray2RGB5x5():unary_function(){} - __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5& other_):unary_function(){} + __host__ __device__ __forceinline__ Gray2RGB5x5() {} + __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {} }; } @@ -426,8 +422,8 @@ namespace cv { namespace gpu { namespace cudev { return RGB5x52GrayConverter::cvt(src); } - __device__ __forceinline__ RGB5x52Gray() : unary_function(){} - __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB5x52Gray() {} + __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {} }; } @@ -467,9 +463,8 @@ namespace cv { namespace gpu { namespace cudev { return RGB2GrayConvert(&src.x); } - __device__ __forceinline__ RGB2Gray() : unary_function::vec_type, T>(){} - __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) - : unary_function::vec_type, T>(){} + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} }; template struct RGB2Gray : unary_function @@ -478,8 +473,8 @@ namespace cv { namespace gpu { namespace cudev { return RGB2GrayConvert(src); } - __device__ __forceinline__ RGB2Gray() : unary_function(){} - __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} }; } @@ -529,10 +524,8 @@ namespace cv { namespace gpu { namespace cudev RGB2YUVConvert(&src.x, dst); return dst; } - __device__ __forceinline__ RGB2YUV() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2YUV(const RGB2YUV& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2YUV() {} + __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {} }; } @@ -609,10 +602,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ YUV2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} }; template struct YUV2RGB : unary_function @@ -621,8 +612,8 @@ namespace cv { namespace gpu { namespace cudev { return YUV2RGBConvert(src); } - __device__ __forceinline__ YUV2RGB() : unary_function(){} - __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} }; } @@ -689,10 +680,8 @@ namespace cv { namespace gpu { namespace cudev RGB2YCrCbConvert(&src.x, dst); return dst; } - __device__ __forceinline__ RGB2YCrCb() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} }; template struct RGB2YCrCb : unary_function @@ -702,8 +691,8 @@ namespace cv { namespace gpu { namespace cudev return RGB2YCrCbConvert(src); } - __device__ __forceinline__ RGB2YCrCb() : unary_function(){} - __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} }; } @@ -771,10 +760,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ YCrCb2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} }; template struct YCrCb2RGB : unary_function @@ -783,8 +770,8 @@ namespace cv { namespace gpu { namespace cudev { return YCrCb2RGBConvert(src); } - __device__ __forceinline__ YCrCb2RGB() : unary_function(){} - __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} }; } @@ -849,10 +836,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2XYZ() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} }; template struct RGB2XYZ : unary_function @@ -861,8 +846,8 @@ namespace cv { namespace gpu { namespace cudev { return RGB2XYZConvert(src); } - __device__ __forceinline__ RGB2XYZ() : unary_function(){} - __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} }; } @@ -926,10 +911,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ XYZ2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} }; template struct XYZ2RGB : unary_function @@ -938,8 +921,8 @@ namespace cv { namespace gpu { namespace cudev { return XYZ2RGBConvert(src); } - __device__ __forceinline__ XYZ2RGB() : unary_function(){} - __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} }; } @@ -1066,10 +1049,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2HSV() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} }; template struct RGB2HSV : unary_function @@ -1078,8 +1059,8 @@ namespace cv { namespace gpu { namespace cudev { return RGB2HSVConvert(src); } - __device__ __forceinline__ RGB2HSV():unary_function(){} - __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} }; } @@ -1208,10 +1189,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ HSV2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} }; template struct HSV2RGB : unary_function @@ -1220,8 +1199,8 @@ namespace cv { namespace gpu { namespace cudev { return HSV2RGBConvert(src); } - __device__ __forceinline__ HSV2RGB():unary_function(){} - __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} }; } @@ -1343,10 +1322,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2HLS() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} }; template struct RGB2HLS : unary_function @@ -1355,8 +1332,8 @@ namespace cv { namespace gpu { namespace cudev { return RGB2HLSConvert(src); } - __device__ __forceinline__ RGB2HLS() : unary_function(){} - __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} }; } @@ -1485,10 +1462,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ HLS2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} }; template struct HLS2RGB : unary_function @@ -1497,8 +1472,8 @@ namespace cv { namespace gpu { namespace cudev { return HLS2RGBConvert(src); } - __device__ __forceinline__ HLS2RGB() : unary_function(){} - __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} }; } @@ -1651,8 +1626,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2Lab() {} - __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {} + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} }; template struct RGB2Lab @@ -1666,8 +1641,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2Lab() {} - __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {} + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} }; } @@ -1764,8 +1739,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ Lab2RGB() {} - __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {} + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} }; template struct Lab2RGB @@ -1779,8 +1754,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ Lab2RGB() {} - __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {} + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} }; } @@ -1863,8 +1838,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2Luv() {} - __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {} + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} }; template struct RGB2Luv @@ -1878,8 +1853,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ RGB2Luv() {} - __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {} + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} }; } @@ -1964,8 +1939,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ Luv2RGB() {} - __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {} + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} }; template struct Luv2RGB @@ -1979,8 +1954,8 @@ namespace cv { namespace gpu { namespace cudev return dst; } - __device__ __forceinline__ Luv2RGB() {} - __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {} + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} }; } diff --git a/modules/core/include/opencv2/core/cuda/functional.hpp b/modules/core/include/opencv2/core/cuda/functional.hpp index 506ccd8768..cb665fb318 100644 --- a/modules/core/include/opencv2/core/cuda/functional.hpp +++ b/modules/core/include/opencv2/core/cuda/functional.hpp @@ -63,8 +63,8 @@ namespace cv { namespace gpu { namespace cudev { return a + b; } - __device__ __forceinline__ plus(const plus& other):binary_function(){} - __device__ __forceinline__ plus():binary_function(){} + __host__ __device__ __forceinline__ plus() {} + __host__ __device__ __forceinline__ plus(const plus&) {} }; template struct minus : binary_function @@ -74,8 +74,8 @@ namespace cv { namespace gpu { namespace cudev { return a - b; } - __device__ __forceinline__ minus(const minus& other):binary_function(){} - __device__ __forceinline__ minus():binary_function(){} + __host__ __device__ __forceinline__ minus() {} + __host__ __device__ __forceinline__ minus(const minus&) {} }; template struct multiplies : binary_function @@ -85,8 +85,8 @@ namespace cv { namespace gpu { namespace cudev { return a * b; } - __device__ __forceinline__ multiplies(const multiplies& other):binary_function(){} - __device__ __forceinline__ multiplies():binary_function(){} + __host__ __device__ __forceinline__ multiplies() {} + __host__ __device__ __forceinline__ multiplies(const multiplies&) {} }; template struct divides : binary_function @@ -96,8 +96,8 @@ namespace cv { namespace gpu { namespace cudev { return a / b; } - __device__ __forceinline__ divides(const divides& other):binary_function(){} - __device__ __forceinline__ divides():binary_function(){} + __host__ __device__ __forceinline__ divides() {} + __host__ __device__ __forceinline__ divides(const divides&) {} }; template struct modulus : binary_function @@ -107,8 +107,8 @@ namespace cv { namespace gpu { namespace cudev { return a % b; } - __device__ __forceinline__ modulus(const modulus& other):binary_function(){} - __device__ __forceinline__ modulus():binary_function(){} + __host__ __device__ __forceinline__ modulus() {} + __host__ __device__ __forceinline__ modulus(const modulus&) {} }; template struct negate : unary_function @@ -117,8 +117,8 @@ namespace cv { namespace gpu { namespace cudev { return -a; } - __device__ __forceinline__ negate(const negate& other):unary_function(){} - __device__ __forceinline__ negate():unary_function(){} + __host__ __device__ __forceinline__ negate() {} + __host__ __device__ __forceinline__ negate(const negate&) {} }; // Comparison Operations @@ -129,8 +129,8 @@ namespace cv { namespace gpu { namespace cudev { return a == b; } - __device__ __forceinline__ equal_to(const equal_to& other):binary_function(){} - __device__ __forceinline__ equal_to():binary_function(){} + __host__ __device__ __forceinline__ equal_to() {} + __host__ __device__ __forceinline__ equal_to(const equal_to&) {} }; template struct not_equal_to : binary_function @@ -140,8 +140,8 @@ namespace cv { namespace gpu { namespace cudev { return a != b; } - __device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function(){} - __device__ __forceinline__ not_equal_to():binary_function(){} + __host__ __device__ __forceinline__ not_equal_to() {} + __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {} }; template struct greater : binary_function @@ -151,8 +151,8 @@ namespace cv { namespace gpu { namespace cudev { return a > b; } - __device__ __forceinline__ greater(const greater& other):binary_function(){} - __device__ __forceinline__ greater():binary_function(){} + __host__ __device__ __forceinline__ greater() {} + __host__ __device__ __forceinline__ greater(const greater&) {} }; template struct less : binary_function @@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace cudev { return a < b; } - __device__ __forceinline__ less(const less& other):binary_function(){} - __device__ __forceinline__ less():binary_function(){} + __host__ __device__ __forceinline__ less() {} + __host__ __device__ __forceinline__ less(const less&) {} }; template struct greater_equal : binary_function @@ -173,8 +173,8 @@ namespace cv { namespace gpu { namespace cudev { return a >= b; } - __device__ __forceinline__ greater_equal(const greater_equal& other):binary_function(){} - __device__ __forceinline__ greater_equal():binary_function(){} + __host__ __device__ __forceinline__ greater_equal() {} + __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {} }; template struct less_equal : binary_function @@ -184,8 +184,8 @@ namespace cv { namespace gpu { namespace cudev { return a <= b; } - __device__ __forceinline__ less_equal(const less_equal& other):binary_function(){} - __device__ __forceinline__ less_equal():binary_function(){} + __host__ __device__ __forceinline__ less_equal() {} + __host__ __device__ __forceinline__ less_equal(const less_equal&) {} }; // Logical Operations @@ -196,8 +196,8 @@ namespace cv { namespace gpu { namespace cudev { return a && b; } - __device__ __forceinline__ logical_and(const logical_and& other):binary_function(){} - __device__ __forceinline__ logical_and():binary_function(){} + __host__ __device__ __forceinline__ logical_and() {} + __host__ __device__ __forceinline__ logical_and(const logical_and&) {} }; template struct logical_or : binary_function @@ -207,8 +207,8 @@ namespace cv { namespace gpu { namespace cudev { return a || b; } - __device__ __forceinline__ logical_or(const logical_or& other):binary_function(){} - __device__ __forceinline__ logical_or():binary_function(){} + __host__ __device__ __forceinline__ logical_or() {} + __host__ __device__ __forceinline__ logical_or(const logical_or&) {} }; template struct logical_not : unary_function @@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace cudev { return !a; } - __device__ __forceinline__ logical_not(const logical_not& other):unary_function(){} - __device__ __forceinline__ logical_not():unary_function(){} + __host__ __device__ __forceinline__ logical_not() {} + __host__ __device__ __forceinline__ logical_not(const logical_not&) {} }; // Bitwise Operations @@ -229,8 +229,8 @@ namespace cv { namespace gpu { namespace cudev { return a & b; } - __device__ __forceinline__ bit_and(const bit_and& other):binary_function(){} - __device__ __forceinline__ bit_and():binary_function(){} + __host__ __device__ __forceinline__ bit_and() {} + __host__ __device__ __forceinline__ bit_and(const bit_and&) {} }; template struct bit_or : binary_function @@ -240,8 +240,8 @@ namespace cv { namespace gpu { namespace cudev { return a | b; } - __device__ __forceinline__ bit_or(const bit_or& other):binary_function(){} - __device__ __forceinline__ bit_or():binary_function(){} + __host__ __device__ __forceinline__ bit_or() {} + __host__ __device__ __forceinline__ bit_or(const bit_or&) {} }; template struct bit_xor : binary_function @@ -251,8 +251,8 @@ namespace cv { namespace gpu { namespace cudev { return a ^ b; } - __device__ __forceinline__ bit_xor(const bit_xor& other):binary_function(){} - __device__ __forceinline__ bit_xor():binary_function(){} + __host__ __device__ __forceinline__ bit_xor() {} + __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {} }; template struct bit_not : unary_function @@ -261,8 +261,8 @@ namespace cv { namespace gpu { namespace cudev { return ~v; } - __device__ __forceinline__ bit_not(const bit_not& other):unary_function(){} - __device__ __forceinline__ bit_not():unary_function(){} + __host__ __device__ __forceinline__ bit_not() {} + __host__ __device__ __forceinline__ bit_not(const bit_not&) {} }; // Generalized Identity Operations @@ -272,8 +272,8 @@ namespace cv { namespace gpu { namespace cudev { return x; } - __device__ __forceinline__ identity(const identity& other):unary_function(){} - __device__ __forceinline__ identity():unary_function(){} + __host__ __device__ __forceinline__ identity() {} + __host__ __device__ __forceinline__ identity(const identity&) {} }; template struct project1st : binary_function @@ -282,8 +282,8 @@ namespace cv { namespace gpu { namespace cudev { return lhs; } - __device__ __forceinline__ project1st(const project1st& other):binary_function(){} - __device__ __forceinline__ project1st():binary_function(){} + __host__ __device__ __forceinline__ project1st() {} + __host__ __device__ __forceinline__ project1st(const project1st&) {} }; template struct project2nd : binary_function @@ -292,8 +292,8 @@ namespace cv { namespace gpu { namespace cudev { return rhs; } - __device__ __forceinline__ project2nd(const project2nd& other):binary_function(){} - __device__ __forceinline__ project2nd():binary_function(){} + __host__ __device__ __forceinline__ project2nd() {} + __host__ __device__ __forceinline__ project2nd(const project2nd&) {} }; // Min/Max Operations @@ -302,8 +302,8 @@ namespace cv { namespace gpu { namespace cudev template <> struct name : binary_function \ { \ __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ - __device__ __forceinline__ name() {}\ - __device__ __forceinline__ name(const name&) {}\ + __host__ __device__ __forceinline__ name() {}\ + __host__ __device__ __forceinline__ name(const name&) {}\ }; template struct maximum : binary_function @@ -312,8 +312,8 @@ namespace cv { namespace gpu { namespace cudev { return max(lhs, rhs); } - __device__ __forceinline__ maximum() {} - __device__ __forceinline__ maximum(const maximum&) {} + __host__ __device__ __forceinline__ maximum() {} + __host__ __device__ __forceinline__ maximum(const maximum&) {} }; OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max) @@ -332,8 +332,8 @@ namespace cv { namespace gpu { namespace cudev { return min(lhs, rhs); } - __device__ __forceinline__ minimum() {} - __device__ __forceinline__ minimum(const minimum&) {} + __host__ __device__ __forceinline__ minimum() {} + __host__ __device__ __forceinline__ minimum(const minimum&) {} }; OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min) @@ -349,7 +349,6 @@ namespace cv { namespace gpu { namespace cudev #undef OPENCV_GPU_IMPLEMENT_MINMAX // Math functions -///bound========================================= template struct abs_func : unary_function { @@ -358,8 +357,8 @@ namespace cv { namespace gpu { namespace cudev return abs(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -368,8 +367,8 @@ namespace cv { namespace gpu { namespace cudev return x; } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -378,8 +377,8 @@ namespace cv { namespace gpu { namespace cudev return ::abs((int)x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -388,8 +387,8 @@ namespace cv { namespace gpu { namespace cudev return ::abs((int)x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -398,8 +397,8 @@ namespace cv { namespace gpu { namespace cudev return x; } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -408,8 +407,8 @@ namespace cv { namespace gpu { namespace cudev return ::abs((int)x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -418,8 +417,8 @@ namespace cv { namespace gpu { namespace cudev return x; } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -428,8 +427,8 @@ namespace cv { namespace gpu { namespace cudev return ::abs(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -438,8 +437,8 @@ namespace cv { namespace gpu { namespace cudev return ::fabsf(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -448,8 +447,8 @@ namespace cv { namespace gpu { namespace cudev return ::fabs(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; #define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \ @@ -459,8 +458,8 @@ namespace cv { namespace gpu { namespace cudev { \ return func ## f(v); \ } \ - __device__ __forceinline__ name ## _func() {} \ - __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; \ template <> struct name ## _func : unary_function \ { \ @@ -468,8 +467,8 @@ namespace cv { namespace gpu { namespace cudev { \ return func(v); \ } \ - __device__ __forceinline__ name ## _func() {} \ - __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \ @@ -479,6 +478,8 @@ namespace cv { namespace gpu { namespace cudev { \ return func ## f(v1, v2); \ } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; \ template <> struct name ## _func : binary_function \ { \ @@ -486,6 +487,8 @@ namespace cv { namespace gpu { namespace cudev { \ return func(v1, v2); \ } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt) @@ -522,8 +525,8 @@ namespace cv { namespace gpu { namespace cudev { return src1 * src1 + src2 * src2; } - __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function(){} - __device__ __forceinline__ hypot_sqr_func() : binary_function(){} + __host__ __device__ __forceinline__ hypot_sqr_func() {} + __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {} }; // Saturate Cast Functor @@ -533,8 +536,8 @@ namespace cv { namespace gpu { namespace cudev { return saturate_cast(v); } - __device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function(){} - __device__ __forceinline__ saturate_cast_func():unary_function(){} + __host__ __device__ __forceinline__ saturate_cast_func() {} + __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {} }; // Threshold Functors @@ -547,10 +550,9 @@ namespace cv { namespace gpu { namespace cudev return (src > thresh) * maxVal; } - __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) - : unary_function(), thresh(other.thresh), maxVal(other.maxVal){} - - __device__ __forceinline__ thresh_binary_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_binary_func() {} + __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} const T thresh; const T maxVal; @@ -565,10 +567,9 @@ namespace cv { namespace gpu { namespace cudev return (src <= thresh) * maxVal; } - __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) - : unary_function(), thresh(other.thresh), maxVal(other.maxVal){} - - __device__ __forceinline__ thresh_binary_inv_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_binary_inv_func() {} + __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} const T thresh; const T maxVal; @@ -583,10 +584,9 @@ namespace cv { namespace gpu { namespace cudev return minimum()(src, thresh); } - __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) - : unary_function(), thresh(other.thresh){} - - __device__ __forceinline__ thresh_trunc_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_trunc_func() {} + __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) + : thresh(other.thresh) {} const T thresh; }; @@ -599,10 +599,10 @@ namespace cv { namespace gpu { namespace cudev { return (src > thresh) * src; } - __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) - : unary_function(), thresh(other.thresh){} - __device__ __forceinline__ thresh_to_zero_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_to_zero_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) + : thresh(other.thresh) {} const T thresh; }; @@ -615,14 +615,14 @@ namespace cv { namespace gpu { namespace cudev { return (src <= thresh) * src; } - __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) - : unary_function(), thresh(other.thresh){} - __device__ __forceinline__ thresh_to_zero_inv_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) + : thresh(other.thresh) {} const T thresh; }; -//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============> + // Function Object Adaptors template struct unary_negate : unary_function { @@ -633,8 +633,8 @@ namespace cv { namespace gpu { namespace cudev return !pred(x); } - __device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function(){} - __device__ __forceinline__ unary_negate() : unary_function(){} + __host__ __device__ __forceinline__ unary_negate() {} + __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {} const Predicate pred; }; @@ -653,11 +653,9 @@ namespace cv { namespace gpu { namespace cudev { return !pred(x,y); } - __device__ __forceinline__ binary_negate(const binary_negate& other) - : binary_function(){} - __device__ __forceinline__ binary_negate() : - binary_function(){} + __host__ __device__ __forceinline__ binary_negate() {} + __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {} const Predicate pred; }; @@ -676,8 +674,8 @@ namespace cv { namespace gpu { namespace cudev return op(arg1, a); } - __device__ __forceinline__ binder1st(const binder1st& other) : - unary_function(){} + __host__ __device__ __forceinline__ binder1st() {} + __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {} const Op op; const typename Op::first_argument_type arg1; @@ -697,8 +695,8 @@ namespace cv { namespace gpu { namespace cudev return op(a, arg2); } - __device__ __forceinline__ binder2nd(const binder2nd& other) : - unary_function(), op(other.op), arg2(other.arg2){} + __host__ __device__ __forceinline__ binder2nd() {} + __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {} const Op op; const typename Op::second_argument_type arg2; diff --git a/modules/core/include/opencv2/core/cuda/utility.hpp b/modules/core/include/opencv2/core/cuda/utility.hpp index 83fe388950..82c61a2014 100644 --- a/modules/core/include/opencv2/core/cuda/utility.hpp +++ b/modules/core/include/opencv2/core/cuda/utility.hpp @@ -124,8 +124,8 @@ namespace cv { namespace gpu { namespace cudev struct WithOutMask { - __device__ __forceinline__ WithOutMask(){} - __device__ __forceinline__ WithOutMask(const WithOutMask& mask){} + __host__ __device__ __forceinline__ WithOutMask(){} + __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){} __device__ __forceinline__ void next() const { diff --git a/modules/features2d/src/detectors.cpp b/modules/features2d/src/detectors.cpp index 8752dd2851..c20d573552 100644 --- a/modules/features2d/src/detectors.cpp +++ b/modules/features2d/src/detectors.cpp @@ -212,7 +212,7 @@ static void keepStrongest( int N, std::vector& keypoints ) } namespace { -class GridAdaptedFeatureDetectorInvoker +class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody { private: int gridRows_, gridCols_; @@ -221,29 +221,24 @@ private: const Mat& image_; const Mat& mask_; const Ptr& detector_; -#ifdef HAVE_TBB - tbb::mutex* kptLock_; -#endif + Mutex* kptLock_; GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC public: - GridAdaptedFeatureDetectorInvoker(const Ptr& detector, const Mat& image, const Mat& mask, std::vector& keypoints, int maxPerCell, int gridRows, int gridCols -#ifdef HAVE_TBB - , tbb::mutex* kptLock -#endif - ) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), - keypoints_(keypoints), image_(image), mask_(mask), detector_(detector) -#ifdef HAVE_TBB - , kptLock_(kptLock) -#endif + GridAdaptedFeatureDetectorInvoker(const Ptr& detector, const Mat& image, const Mat& mask, + std::vector& keypoints, int maxPerCell, int gridRows, int gridCols, + cv::Mutex* kptLock) + : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), + keypoints_(keypoints), image_(image), mask_(mask), detector_(detector), + kptLock_(kptLock) { } - void operator() (const BlockedRange& range) const + void operator() (const Range& range) const { - for (int i = range.begin(); i < range.end(); ++i) + for (int i = range.start; i < range.end; ++i) { int celly = i / gridCols_; int cellx = i - celly * gridCols_; @@ -268,9 +263,8 @@ public: it->pt.x += col_range.start; it->pt.y += row_range.start; } -#ifdef HAVE_TBB - tbb::mutex::scoped_lock join_keypoints(*kptLock_); -#endif + + cv::AutoLock join_keypoints(*kptLock_); keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() ); } } @@ -287,13 +281,9 @@ void GridAdaptedFeatureDetector::detectImpl( const Mat& image, std::vector subset_indices(subset_size); @@ -172,7 +172,7 @@ namespace Mat rot_mat(3, 3, CV_64F); Mat transl_vec(1, 3, CV_64F); - for (int iter = range.begin(); iter < range.end(); ++iter) + for (int iter = range.start; iter < range.end; ++iter) { selectRandom(subset_size, num_points, subset_indices); for (int i = 0; i < subset_size; ++i) @@ -238,7 +238,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam // Generate set of hypotheses using small subsets of the input data TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat, num_points, subset_size, rot_matrices, transl_vectors); - parallel_for(BlockedRange(0, num_iters), body); + parallel_for_(Range(0, num_iters), body); // Compute scores (i.e. number of inliers) for each hypothesis GpuMat d_object(object); diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu index 6085e716de..9adc7806f6 100644 --- a/modules/gpu/src/cuda/calib3d.cu +++ b/modules/gpu/src/cuda/calib3d.cu @@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace cudev crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); } - __device__ __forceinline__ TransformOp() {} - __device__ __forceinline__ TransformOp(const TransformOp&) {} + __host__ __device__ __forceinline__ TransformOp() {} + __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {} }; void call(const PtrStepSz src, const float* rot, @@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace cudev (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z, (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z); } - __device__ __forceinline__ ProjectOp() {} - __device__ __forceinline__ ProjectOp(const ProjectOp&) {} + __host__ __device__ __forceinline__ ProjectOp() {} + __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {} }; void call(const PtrStepSz src, const float* rot, diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu index 042e9afcc6..d477d128a4 100644 --- a/modules/gpu/src/cuda/canny.cu +++ b/modules/gpu/src/cuda/canny.cu @@ -62,8 +62,8 @@ namespace canny return ::abs(x) + ::abs(y); } - __device__ __forceinline__ L1() {} - __device__ __forceinline__ L1(const L1&) {} + __host__ __device__ __forceinline__ L1() {} + __host__ __device__ __forceinline__ L1(const L1&) {} }; struct L2 : binary_function { @@ -72,8 +72,8 @@ namespace canny return ::sqrtf(x * x + y * y); } - __device__ __forceinline__ L2() {} - __device__ __forceinline__ L2(const L2&) {} + __host__ __device__ __forceinline__ L2() {} + __host__ __device__ __forceinline__ L2(const L2&) {} }; } @@ -470,8 +470,8 @@ namespace canny return (uchar)(-(e >> 1)); } - __device__ __forceinline__ GetEdges() {} - __device__ __forceinline__ GetEdges(const GetEdges&) {} + __host__ __device__ __forceinline__ GetEdges() {} + __host__ __device__ __forceinline__ GetEdges(const GetEdges&) {} }; } diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 095d8bac06..e15ef9f8e7 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -162,8 +162,8 @@ namespace arithm return vadd4(a, b); } - __device__ __forceinline__ VAdd4() {} - __device__ __forceinline__ VAdd4(const VAdd4& other) {} + __host__ __device__ __forceinline__ VAdd4() {} + __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {} }; //////////////////////////////////// @@ -175,8 +175,8 @@ namespace arithm return vadd2(a, b); } - __device__ __forceinline__ VAdd2() {} - __device__ __forceinline__ VAdd2(const VAdd2& other) {} + __host__ __device__ __forceinline__ VAdd2() {} + __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {} }; //////////////////////////////////// @@ -188,8 +188,8 @@ namespace arithm return saturate_cast(a + b); } - __device__ __forceinline__ AddMat() {} - __device__ __forceinline__ AddMat(const AddMat& other) {} + __host__ __device__ __forceinline__ AddMat() {} + __host__ __device__ __forceinline__ AddMat(const AddMat&) {} }; } @@ -397,8 +397,8 @@ namespace arithm return vsub4(a, b); } - __device__ __forceinline__ VSub4() {} - __device__ __forceinline__ VSub4(const VSub4& other) {} + __host__ __device__ __forceinline__ VSub4() {} + __host__ __device__ __forceinline__ VSub4(const VSub4&) {} }; //////////////////////////////////// @@ -410,8 +410,8 @@ namespace arithm return vsub2(a, b); } - __device__ __forceinline__ VSub2() {} - __device__ __forceinline__ VSub2(const VSub2& other) {} + __host__ __device__ __forceinline__ VSub2() {} + __host__ __device__ __forceinline__ VSub2(const VSub2&) {} }; //////////////////////////////////// @@ -423,8 +423,8 @@ namespace arithm return saturate_cast(a - b); } - __device__ __forceinline__ SubMat() {} - __device__ __forceinline__ SubMat(const SubMat& other) {} + __host__ __device__ __forceinline__ SubMat() {} + __host__ __device__ __forceinline__ SubMat(const SubMat&) {} }; } @@ -617,8 +617,8 @@ namespace arithm return res; } - __device__ __forceinline__ Mul_8uc4_32f() {} - __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} + __host__ __device__ __forceinline__ Mul_8uc4_32f() {} + __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {} }; struct Mul_16sc4_32f : binary_function @@ -629,8 +629,8 @@ namespace arithm saturate_cast(a.z * b), saturate_cast(a.w * b)); } - __device__ __forceinline__ Mul_16sc4_32f() {} - __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} + __host__ __device__ __forceinline__ Mul_16sc4_32f() {} + __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {} }; template struct Mul : binary_function @@ -640,8 +640,8 @@ namespace arithm return saturate_cast(a * b); } - __device__ __forceinline__ Mul() {} - __device__ __forceinline__ Mul(const Mul& other) {} + __host__ __device__ __forceinline__ Mul() {} + __host__ __device__ __forceinline__ Mul(const Mul&) {} }; template struct MulScale : binary_function @@ -888,8 +888,8 @@ namespace arithm return b != 0 ? saturate_cast(a / b) : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct Div : binary_function { @@ -898,8 +898,8 @@ namespace arithm return b != 0 ? static_cast(a) / b : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct Div : binary_function { @@ -908,8 +908,8 @@ namespace arithm return b != 0 ? static_cast(a) / b : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct DivScale : binary_function @@ -1196,8 +1196,8 @@ namespace arithm return vabsdiff4(a, b); } - __device__ __forceinline__ VAbsDiff4() {} - __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} + __host__ __device__ __forceinline__ VAbsDiff4() {} + __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {} }; //////////////////////////////////// @@ -1209,8 +1209,8 @@ namespace arithm return vabsdiff2(a, b); } - __device__ __forceinline__ VAbsDiff2() {} - __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} + __host__ __device__ __forceinline__ VAbsDiff2() {} + __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {} }; //////////////////////////////////// @@ -1235,8 +1235,8 @@ namespace arithm return saturate_cast(_abs(a - b)); } - __device__ __forceinline__ AbsDiffMat() {} - __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} + __host__ __device__ __forceinline__ AbsDiffMat() {} + __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {} }; } @@ -1370,8 +1370,8 @@ namespace arithm return saturate_cast(x * x); } - __device__ __forceinline__ Sqr() {} - __device__ __forceinline__ Sqr(const Sqr& other) {} + __host__ __device__ __forceinline__ Sqr() {} + __host__ __device__ __forceinline__ Sqr(const Sqr&) {} }; } @@ -1466,8 +1466,8 @@ namespace arithm return saturate_cast(f(x)); } - __device__ __forceinline__ Exp() {} - __device__ __forceinline__ Exp(const Exp& other) {} + __host__ __device__ __forceinline__ Exp() {} + __host__ __device__ __forceinline__ Exp(const Exp&) {} }; } @@ -1507,8 +1507,8 @@ namespace arithm return vcmpeq4(a, b); } - __device__ __forceinline__ VCmpEq4() {} - __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} + __host__ __device__ __forceinline__ VCmpEq4() {} + __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {} }; struct VCmpNe4 : binary_function { @@ -1517,8 +1517,8 @@ namespace arithm return vcmpne4(a, b); } - __device__ __forceinline__ VCmpNe4() {} - __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} + __host__ __device__ __forceinline__ VCmpNe4() {} + __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {} }; struct VCmpLt4 : binary_function { @@ -1527,8 +1527,8 @@ namespace arithm return vcmplt4(a, b); } - __device__ __forceinline__ VCmpLt4() {} - __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} + __host__ __device__ __forceinline__ VCmpLt4() {} + __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {} }; struct VCmpLe4 : binary_function { @@ -1537,8 +1537,8 @@ namespace arithm return vcmple4(a, b); } - __device__ __forceinline__ VCmpLe4() {} - __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} + __host__ __device__ __forceinline__ VCmpLe4() {} + __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {} }; //////////////////////////////////// @@ -2008,8 +2008,8 @@ namespace arithm return vmin4(a, b); } - __device__ __forceinline__ VMin4() {} - __device__ __forceinline__ VMin4(const VMin4& other) {} + __host__ __device__ __forceinline__ VMin4() {} + __host__ __device__ __forceinline__ VMin4(const VMin4&) {} }; //////////////////////////////////// @@ -2021,8 +2021,8 @@ namespace arithm return vmin2(a, b); } - __device__ __forceinline__ VMin2() {} - __device__ __forceinline__ VMin2(const VMin2& other) {} + __host__ __device__ __forceinline__ VMin2() {} + __host__ __device__ __forceinline__ VMin2(const VMin2&) {} }; } @@ -2100,8 +2100,8 @@ namespace arithm return vmax4(a, b); } - __device__ __forceinline__ VMax4() {} - __device__ __forceinline__ VMax4(const VMax4& other) {} + __host__ __device__ __forceinline__ VMax4() {} + __host__ __device__ __forceinline__ VMax4(const VMax4&) {} }; //////////////////////////////////// @@ -2113,8 +2113,8 @@ namespace arithm return vmax2(a, b); } - __device__ __forceinline__ VMax2() {} - __device__ __forceinline__ VMax2(const VMax2& other) {} + __host__ __device__ __forceinline__ VMax2() {} + __host__ __device__ __forceinline__ VMax2(const VMax2&) {} }; } diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index b56cb20fed..360efa33e8 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -188,10 +188,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); - typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, - NppiSize oSizeROI, Npp64f* pRetVal); +#if CUDA_VERSION < 5050 + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); - static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; + static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; +#else + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, + NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer); + + typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize); + + static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; + + static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; +#endif NppiSize sz; sz.width = src1.cols; @@ -203,7 +213,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) DeviceBuffer dbuf; - nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); +#if CUDA_VERSION < 5050 + nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); +#else + int bufSize; + buf_size_funcs[funcIdx](sz, &bufSize); + + GpuMat buf(1, bufSize, CV_8UC1); + + nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf, buf.data) ); +#endif cudaSafeCall( cudaDeviceSynchronize() ); diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index b2072c25fc..d949b2f5f5 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -352,7 +352,7 @@ GPU_TEST_P(Add_Scalar, WithOutMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::add(mat, val, dst_gold, cv::noArray(), depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -383,7 +383,7 @@ GPU_TEST_P(Add_Scalar, WithMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::add(mat, val, dst_gold, mask, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -567,7 +567,7 @@ GPU_TEST_P(Subtract_Scalar, WithOutMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -598,7 +598,7 @@ GPU_TEST_P(Subtract_Scalar, WithMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::subtract(mat, val, dst_gold, mask, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -2148,7 +2148,7 @@ GPU_TEST_P(Min, Scalar) cv::Mat dst_gold = cv::min(src, val); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } } @@ -2231,7 +2231,7 @@ GPU_TEST_P(Max, Scalar) cv::Mat dst_gold = cv::max(src, val); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } } diff --git a/modules/imgproc/doc/miscellaneous_transformations.rst b/modules/imgproc/doc/miscellaneous_transformations.rst index b2965b5eda..4e16ba059d 100644 --- a/modules/imgproc/doc/miscellaneous_transformations.rst +++ b/modules/imgproc/doc/miscellaneous_transformations.rst @@ -113,6 +113,7 @@ If you use ``cvtColor`` with 8-bit images, the conversion will have some informa The function can do the following transformations: * + RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB`` ) Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using: .. math:: @@ -755,7 +756,7 @@ Runs the GrabCut algorithm. * **GC_PR_BGD** defines a possible background pixel. - * **GC_PR_BGD** defines a possible foreground pixel. + * **GC_PR_FGD** defines a possible foreground pixel. :param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` . diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp new file mode 100644 index 0000000000..4ce479713e --- /dev/null +++ b/modules/imgproc/src/clahe.cpp @@ -0,0 +1,334 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +// ---------------------------------------------------------------------- +// CLAHE + +namespace +{ + class CLAHE_CalcLut_Body : public cv::ParallelLoopBody + { + public: + CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) : + src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale) + { + } + + void operator ()(const cv::Range& range) const; + + private: + cv::Mat src_; + mutable cv::Mat lut_; + + cv::Size tileSize_; + int tilesX_; + int tilesY_; + int clipLimit_; + float lutScale_; + }; + + void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const + { + const int histSize = 256; + + uchar* tileLut = lut_.ptr(range.start); + const size_t lut_step = lut_.step; + + for (int k = range.start; k < range.end; ++k, tileLut += lut_step) + { + const int ty = k / tilesX_; + const int tx = k % tilesX_; + + // retrieve tile submatrix + + cv::Rect tileROI; + tileROI.x = tx * tileSize_.width; + tileROI.y = ty * tileSize_.height; + tileROI.width = tileSize_.width; + tileROI.height = tileSize_.height; + + const cv::Mat tile = src_(tileROI); + + // calc histogram + + int tileHist[histSize] = {0, }; + + int height = tileROI.height; + const size_t sstep = tile.step; + for (const uchar* ptr = tile.ptr(0); height--; ptr += sstep) + { + int x = 0; + for (; x <= tileROI.width - 4; x += 4) + { + int t0 = ptr[x], t1 = ptr[x+1]; + tileHist[t0]++; tileHist[t1]++; + t0 = ptr[x+2]; t1 = ptr[x+3]; + tileHist[t0]++; tileHist[t1]++; + } + + for (; x < tileROI.width; ++x) + tileHist[ptr[x]]++; + } + + // clip histogram + + if (clipLimit_ > 0) + { + // how many pixels were clipped + int clipped = 0; + for (int i = 0; i < histSize; ++i) + { + if (tileHist[i] > clipLimit_) + { + clipped += tileHist[i] - clipLimit_; + tileHist[i] = clipLimit_; + } + } + + // redistribute clipped pixels + int redistBatch = clipped / histSize; + int residual = clipped - redistBatch * histSize; + + for (int i = 0; i < histSize; ++i) + tileHist[i] += redistBatch; + + for (int i = 0; i < residual; ++i) + tileHist[i]++; + } + + // calc Lut + + int sum = 0; + for (int i = 0; i < histSize; ++i) + { + sum += tileHist[i]; + tileLut[i] = cv::saturate_cast(sum * lutScale_); + } + } + } + + class CLAHE_Interpolation_Body : public cv::ParallelLoopBody + { + public: + CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) : + src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) + { + } + + void operator ()(const cv::Range& range) const; + + private: + cv::Mat src_; + mutable cv::Mat dst_; + cv::Mat lut_; + + cv::Size tileSize_; + int tilesX_; + int tilesY_; + }; + + void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const + { + const size_t lut_step = lut_.step; + + for (int y = range.start; y < range.end; ++y) + { + const uchar* srcRow = src_.ptr(y); + uchar* dstRow = dst_.ptr(y); + + const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; + + int ty1 = cvFloor(tyf); + int ty2 = ty1 + 1; + + const float ya = tyf - ty1; + + ty1 = std::max(ty1, 0); + ty2 = std::min(ty2, tilesY_ - 1); + + const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_); + const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_); + + for (int x = 0; x < src_.cols; ++x) + { + const float txf = (static_cast(x) / tileSize_.width) - 0.5f; + + int tx1 = cvFloor(txf); + int tx2 = tx1 + 1; + + const float xa = txf - tx1; + + tx1 = std::max(tx1, 0); + tx2 = std::min(tx2, tilesX_ - 1); + + const int srcVal = srcRow[x]; + + const size_t ind1 = tx1 * lut_step + srcVal; + const size_t ind2 = tx2 * lut_step + srcVal; + + float res = 0; + + res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); + res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); + res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); + res += lutPlane2[ind2] * ((xa) * (ya)); + + dstRow[x] = cv::saturate_cast(res); + } + } + } + + class CLAHE_Impl : public cv::CLAHE + { + public: + CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); + + cv::AlgorithmInfo* info() const; + + void apply(cv::InputArray src, cv::OutputArray dst); + + void setClipLimit(double clipLimit); + double getClipLimit() const; + + void setTilesGridSize(cv::Size tileGridSize); + cv::Size getTilesGridSize() const; + + void collectGarbage(); + + private: + double clipLimit_; + int tilesX_; + int tilesY_; + + cv::Mat srcExt_; + cv::Mat lut_; + }; + + CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : + clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) + { + } + + CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE", + obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); + obj.info()->addParam(obj, "tilesX", obj.tilesX_); + obj.info()->addParam(obj, "tilesY", obj.tilesY_)) + + void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) + { + cv::Mat src = _src.getMat(); + + CV_Assert( src.type() == CV_8UC1 ); + + _dst.create( src.size(), src.type() ); + cv::Mat dst = _dst.getMat(); + + const int histSize = 256; + + lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1); + + cv::Size tileSize; + cv::Mat srcForLut; + + if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) + { + tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); + srcForLut = src; + } + else + { + cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101); + + tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); + srcForLut = srcExt_; + } + + const int tileSizeTotal = tileSize.area(); + const float lutScale = static_cast(histSize - 1) / tileSizeTotal; + + int clipLimit = 0; + if (clipLimit_ > 0.0) + { + clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); + clipLimit = std::max(clipLimit, 1); + } + + CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale); + cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody); + + CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_); + cv::parallel_for_(cv::Range(0, src.rows), interpolationBody); + } + + void CLAHE_Impl::setClipLimit(double clipLimit) + { + clipLimit_ = clipLimit; + } + + double CLAHE_Impl::getClipLimit() const + { + return clipLimit_; + } + + void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) + { + tilesX_ = tileGridSize.width; + tilesY_ = tileGridSize.height; + } + + cv::Size CLAHE_Impl::getTilesGridSize() const + { + return cv::Size(tilesX_, tilesY_); + } + + void CLAHE_Impl::collectGarbage() + { + srcExt_.release(); + lut_.release(); + } +} + +cv::Ptr cv::createCLAHE(double clipLimit, cv::Size tileGridSize) +{ + return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); +} diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index f569ec98cc..e07902cade 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -1815,7 +1815,7 @@ const int ITUR_BT_601_CGV = -385875; const int ITUR_BT_601_CBV = -74448; template -struct YUV420sp2RGB888Invoker +struct YUV420sp2RGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *muv; @@ -1824,10 +1824,10 @@ struct YUV420sp2RGB888Invoker YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; //R = 1.164(Y - 16) + 1.596(V - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) @@ -1884,7 +1884,7 @@ struct YUV420sp2RGB888Invoker }; template -struct YUV420sp2RGBA8888Invoker +struct YUV420sp2RGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *muv; @@ -1893,10 +1893,10 @@ struct YUV420sp2RGBA8888Invoker YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; //R = 1.164(Y - 16) + 1.596(V - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) @@ -1957,7 +1957,7 @@ struct YUV420sp2RGBA8888Invoker }; template -struct YUV420p2RGB888Invoker +struct YUV420p2RGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *mu, *mv; @@ -1967,19 +1967,19 @@ struct YUV420p2RGB888Invoker YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - const int rangeBegin = range.begin() * 2; - const int rangeEnd = range.end() * 2; + const int rangeBegin = range.start * 2; + const int rangeEnd = range.end * 2; size_t uvsteps[2] = {width/2, stride - width/2}; int usIdx = ustepIdx, vsIdx = vstepIdx; const uchar* y1 = my1 + rangeBegin * stride; - const uchar* u1 = mu + (range.begin() / 2) * stride; - const uchar* v1 = mv + (range.begin() / 2) * stride; + const uchar* u1 = mu + (range.start / 2) * stride; + const uchar* v1 = mv + (range.start / 2) * stride; - if(range.begin() % 2 == 1) + if(range.start % 2 == 1) { u1 += uvsteps[(usIdx++) & 1]; v1 += uvsteps[(vsIdx++) & 1]; @@ -2025,7 +2025,7 @@ struct YUV420p2RGB888Invoker }; template -struct YUV420p2RGBA8888Invoker +struct YUV420p2RGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *mu, *mv; @@ -2035,19 +2035,19 @@ struct YUV420p2RGBA8888Invoker YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; size_t uvsteps[2] = {width/2, stride - width/2}; int usIdx = ustepIdx, vsIdx = vstepIdx; const uchar* y1 = my1 + rangeBegin * stride; - const uchar* u1 = mu + (range.begin() / 2) * stride; - const uchar* v1 = mv + (range.begin() / 2) * stride; + const uchar* u1 = mu + (range.start / 2) * stride; + const uchar* v1 = mv + (range.start / 2) * stride; - if(range.begin() % 2 == 1) + if(range.start % 2 == 1) { u1 += uvsteps[(usIdx++) & 1]; v1 += uvsteps[(vsIdx++) & 1]; @@ -2102,48 +2102,40 @@ template inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) { YUV420sp2RGB888Invoker converter(&_dst, _stride, _y1, _uv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) { YUV420sp2RGBA8888Invoker converter(&_dst, _stride, _y1, _uv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) { YUV420p2RGB888Invoker converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) { YUV420p2RGBA8888Invoker converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } ///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// @@ -2227,7 +2219,7 @@ static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) ///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// template -struct YUV422toRGB888Invoker +struct YUV422toRGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* src; @@ -2236,10 +2228,10 @@ struct YUV422toRGB888Invoker YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv) : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin(); - int rangeEnd = range.end(); + int rangeBegin = range.start; + int rangeEnd = range.end; const int uidx = 1 - yIdx + uIdx * 2; const int vidx = (2 + uidx) % 4; @@ -2273,7 +2265,7 @@ struct YUV422toRGB888Invoker }; template -struct YUV422toRGBA8888Invoker +struct YUV422toRGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* src; @@ -2282,10 +2274,10 @@ struct YUV422toRGBA8888Invoker YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv) : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin(); - int rangeEnd = range.end(); + int rangeBegin = range.start; + int rangeEnd = range.end; const int uidx = 1 - yIdx + uIdx * 2; const int vidx = (2 + uidx) % 4; @@ -2326,24 +2318,20 @@ template inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv) { YUV422toRGB888Invoker converter(&_dst, _stride, _yuv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows), converter); + parallel_for_(Range(0, _dst.rows), converter); else -#endif - converter(BlockedRange(0, _dst.rows)); + converter(Range(0, _dst.rows)); } template inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv) { YUV422toRGBA8888Invoker converter(&_dst, _stride, _yuv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows), converter); + parallel_for_(Range(0, _dst.rows), converter); else -#endif - converter(BlockedRange(0, _dst.rows)); + converter(Range(0, _dst.rows)); } /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) ////////////// diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index b8ab50ccd1..8ebb206b38 100644 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -442,7 +442,7 @@ static void getDistanceTransformMask( int maskType, float *metrics ) } } -struct DTColumnInvoker +struct DTColumnInvoker : ParallelLoopBody { DTColumnInvoker( const Mat* _src, Mat* _dst, const int* _sat_tab, const float* _sqr_tab) { @@ -452,9 +452,9 @@ struct DTColumnInvoker sqr_tab = _sqr_tab; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - int i, i1 = range.begin(), i2 = range.end(); + int i, i1 = range.start, i2 = range.end; int m = src->rows; size_t sstep = src->step, dstep = dst->step/sizeof(float); AutoBuffer _d(m); @@ -489,7 +489,7 @@ struct DTColumnInvoker }; -struct DTRowInvoker +struct DTRowInvoker : ParallelLoopBody { DTRowInvoker( Mat* _dst, const float* _sqr_tab, const float* _inv_tab ) { @@ -498,10 +498,10 @@ struct DTRowInvoker inv_tab = _inv_tab; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { const float inf = 1e15f; - int i, i1 = range.begin(), i2 = range.end(); + int i, i1 = range.start, i2 = range.end; int n = dst->cols; AutoBuffer _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int)); float* f = (float*)(uchar*)_buf; @@ -578,7 +578,7 @@ trueDistTrans( const Mat& src, Mat& dst ) for( ; i <= m*3; i++ ) sat_tab[i] = i - shift; - cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab)); + cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab)); // stage 2: compute modified distance transform for each row float* inv_tab = sqr_tab + n; @@ -590,7 +590,7 @@ trueDistTrans( const Mat& src, Mat& dst ) sqr_tab[i] = (float)(i*i); } - cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(&dst, sqr_tab, inv_tab)); + cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(&dst, sqr_tab, inv_tab)); } @@ -664,7 +664,7 @@ distanceATS_L1_8u( const Mat& src, Mat& dst ) // do right edge a = lut[dbase[width-1+dststep]]; dbase[width-1] = (uchar)(MIN(a, dbase[width-1])); - + for( x = width - 2; x >= 0; x-- ) { int b = dbase[x+dststep]; diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index 6357147dfa..718ac8a0ec 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -2985,29 +2985,23 @@ cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask, } } -class EqualizeHistCalcHist_Invoker +class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody { public: enum {HIST_SZ = 256}; -#ifdef HAVE_TBB - typedef tbb::mutex* MutextPtr; -#else - typedef void* MutextPtr; -#endif - - EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock) + EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock) : src_(src), globalHistogram_(histogram), histogramLock_(histogramLock) { } - void operator()( const cv::BlockedRange& rowRange ) const + void operator()( const cv::Range& rowRange ) const { int localHistogram[HIST_SZ] = {0, }; const size_t sstep = src_.step; int width = src_.cols; - int height = rowRange.end() - rowRange.begin(); + int height = rowRange.end - rowRange.start; if (src_.isContinuous()) { @@ -3015,7 +3009,7 @@ public: height = 1; } - for (const uchar* ptr = src_.ptr(rowRange.begin()); height--; ptr += sstep) + for (const uchar* ptr = src_.ptr(rowRange.start); height--; ptr += sstep) { int x = 0; for (; x <= width - 4; x += 4) @@ -3030,9 +3024,7 @@ public: localHistogram[ptr[x]]++; } -#ifdef HAVE_TBB - tbb::mutex::scoped_lock lock(*histogramLock_); -#endif + cv::AutoLock lock(*histogramLock_); for( int i = 0; i < HIST_SZ; i++ ) globalHistogram_[i] += localHistogram[i]; @@ -3040,12 +3032,7 @@ public: static bool isWorthParallel( const cv::Mat& src ) { -#ifdef HAVE_TBB return ( src.total() >= 640*480 ); -#else - (void)src; - return false; -#endif } private: @@ -3053,10 +3040,10 @@ private: cv::Mat& src_; int* globalHistogram_; - MutextPtr histogramLock_; + cv::Mutex* histogramLock_; }; -class EqualizeHistLut_Invoker +class EqualizeHistLut_Invoker : public cv::ParallelLoopBody { public: EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut ) @@ -3065,13 +3052,13 @@ public: lut_(lut) { } - void operator()( const cv::BlockedRange& rowRange ) const + void operator()( const cv::Range& rowRange ) const { const size_t sstep = src_.step; const size_t dstep = dst_.step; int width = src_.cols; - int height = rowRange.end() - rowRange.begin(); + int height = rowRange.end - rowRange.start; int* lut = lut_; if (src_.isContinuous() && dst_.isContinuous()) @@ -3080,8 +3067,8 @@ public: height = 1; } - const uchar* sptr = src_.ptr(rowRange.begin()); - uchar* dptr = dst_.ptr(rowRange.begin()); + const uchar* sptr = src_.ptr(rowRange.start); + uchar* dptr = dst_.ptr(rowRange.start); for (; height--; sptr += sstep, dptr += dstep) { @@ -3110,12 +3097,7 @@ public: static bool isWorthParallel( const cv::Mat& src ) { -#ifdef HAVE_TBB return ( src.total() >= 640*480 ); -#else - (void)src; - return false; -#endif } private: @@ -3142,23 +3124,18 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) if(src.empty()) return; -#ifdef HAVE_TBB - tbb::mutex histogramLockInstance; - EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance; -#else - EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0; -#endif + Mutex histogramLockInstance; const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ; int hist[hist_sz] = {0,}; int lut[hist_sz]; - EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock); + EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance); EqualizeHistLut_Invoker lutBody(src, dst, lut); - cv::BlockedRange heightRange(0, src.rows); + cv::Range heightRange(0, src.rows); if(EqualizeHistCalcHist_Invoker::isWorthParallel(src)) - parallel_for(heightRange, calcBody); + parallel_for_(heightRange, calcBody); else calcBody(heightRange); @@ -3182,303 +3159,11 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) } if(EqualizeHistLut_Invoker::isWorthParallel(src)) - parallel_for(heightRange, lutBody); + parallel_for_(heightRange, lutBody); else lutBody(heightRange); } -// ---------------------------------------------------------------------- -// CLAHE - -namespace -{ - class CLAHE_CalcLut_Body : public cv::ParallelLoopBody - { - public: - CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) : - src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale) - { - } - - void operator ()(const cv::Range& range) const; - - private: - cv::Mat src_; - mutable cv::Mat lut_; - - cv::Size tileSize_; - int tilesX_; - int tilesY_; - int clipLimit_; - float lutScale_; - }; - - void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const - { - const int histSize = 256; - - uchar* tileLut = lut_.ptr(range.start); - const size_t lut_step = lut_.step; - - for (int k = range.start; k < range.end; ++k, tileLut += lut_step) - { - const int ty = k / tilesX_; - const int tx = k % tilesX_; - - // retrieve tile submatrix - - cv::Rect tileROI; - tileROI.x = tx * tileSize_.width; - tileROI.y = ty * tileSize_.height; - tileROI.width = tileSize_.width; - tileROI.height = tileSize_.height; - - const cv::Mat tile = src_(tileROI); - - // calc histogram - - int tileHist[histSize] = {0, }; - - int height = tileROI.height; - const size_t sstep = tile.step; - for (const uchar* ptr = tile.ptr(0); height--; ptr += sstep) - { - int x = 0; - for (; x <= tileROI.width - 4; x += 4) - { - int t0 = ptr[x], t1 = ptr[x+1]; - tileHist[t0]++; tileHist[t1]++; - t0 = ptr[x+2]; t1 = ptr[x+3]; - tileHist[t0]++; tileHist[t1]++; - } - - for (; x < tileROI.width; ++x) - tileHist[ptr[x]]++; - } - - // clip histogram - - if (clipLimit_ > 0) - { - // how many pixels were clipped - int clipped = 0; - for (int i = 0; i < histSize; ++i) - { - if (tileHist[i] > clipLimit_) - { - clipped += tileHist[i] - clipLimit_; - tileHist[i] = clipLimit_; - } - } - - // redistribute clipped pixels - int redistBatch = clipped / histSize; - int residual = clipped - redistBatch * histSize; - - for (int i = 0; i < histSize; ++i) - tileHist[i] += redistBatch; - - for (int i = 0; i < residual; ++i) - tileHist[i]++; - } - - // calc Lut - - int sum = 0; - for (int i = 0; i < histSize; ++i) - { - sum += tileHist[i]; - tileLut[i] = cv::saturate_cast(sum * lutScale_); - } - } - } - - class CLAHE_Interpolation_Body : public cv::ParallelLoopBody - { - public: - CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) : - src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) - { - } - - void operator ()(const cv::Range& range) const; - - private: - cv::Mat src_; - mutable cv::Mat dst_; - cv::Mat lut_; - - cv::Size tileSize_; - int tilesX_; - int tilesY_; - }; - - void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const - { - const size_t lut_step = lut_.step; - - for (int y = range.start; y < range.end; ++y) - { - const uchar* srcRow = src_.ptr(y); - uchar* dstRow = dst_.ptr(y); - - const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; - - int ty1 = cvFloor(tyf); - int ty2 = ty1 + 1; - - const float ya = tyf - ty1; - - ty1 = std::max(ty1, 0); - ty2 = std::min(ty2, tilesY_ - 1); - - const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_); - const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_); - - for (int x = 0; x < src_.cols; ++x) - { - const float txf = (static_cast(x) / tileSize_.width) - 0.5f; - - int tx1 = cvFloor(txf); - int tx2 = tx1 + 1; - - const float xa = txf - tx1; - - tx1 = std::max(tx1, 0); - tx2 = std::min(tx2, tilesX_ - 1); - - const int srcVal = srcRow[x]; - - const size_t ind1 = tx1 * lut_step + srcVal; - const size_t ind2 = tx2 * lut_step + srcVal; - - float res = 0; - - res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); - res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); - res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); - res += lutPlane2[ind2] * ((xa) * (ya)); - - dstRow[x] = cv::saturate_cast(res); - } - } - } - - class CLAHE_Impl : public cv::CLAHE - { - public: - CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); - - cv::AlgorithmInfo* info() const; - - void apply(cv::InputArray src, cv::OutputArray dst); - - void setClipLimit(double clipLimit); - double getClipLimit() const; - - void setTilesGridSize(cv::Size tileGridSize); - cv::Size getTilesGridSize() const; - - void collectGarbage(); - - private: - double clipLimit_; - int tilesX_; - int tilesY_; - - cv::Mat srcExt_; - cv::Mat lut_; - }; - - CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : - clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) - { - } - - CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE", - obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); - obj.info()->addParam(obj, "tilesX", obj.tilesX_); - obj.info()->addParam(obj, "tilesY", obj.tilesY_)) - - void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) - { - cv::Mat src = _src.getMat(); - - CV_Assert( src.type() == CV_8UC1 ); - - _dst.create( src.size(), src.type() ); - cv::Mat dst = _dst.getMat(); - - const int histSize = 256; - - lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1); - - cv::Size tileSize; - cv::Mat srcForLut; - - if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) - { - tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); - srcForLut = src; - } - else - { - cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101); - - tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); - srcForLut = srcExt_; - } - - const int tileSizeTotal = tileSize.area(); - const float lutScale = static_cast(histSize - 1) / tileSizeTotal; - - int clipLimit = 0; - if (clipLimit_ > 0.0) - { - clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); - clipLimit = std::max(clipLimit, 1); - } - - CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale); - cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody); - - CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_); - cv::parallel_for_(cv::Range(0, src.rows), interpolationBody); - } - - void CLAHE_Impl::setClipLimit(double clipLimit) - { - clipLimit_ = clipLimit; - } - - double CLAHE_Impl::getClipLimit() const - { - return clipLimit_; - } - - void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) - { - tilesX_ = tileGridSize.width; - tilesY_ = tileGridSize.height; - } - - cv::Size CLAHE_Impl::getTilesGridSize() const - { - return cv::Size(tilesX_, tilesY_); - } - - void CLAHE_Impl::collectGarbage() - { - srcExt_.release(); - lut_.release(); - } -} - -cv::Ptr cv::createCLAHE(double clipLimit, cv::Size tileGridSize) -{ - return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); -} - // ---------------------------------------------------------------------- /* Implementation of RTTI and Generic Functions for CvHistogram */ diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index b4e08a6fb8..e0e27cc2b8 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1081,7 +1081,7 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor) namespace cv { -class MorphologyRunner +class MorphologyRunner : public ParallelLoopBody { public: MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations, @@ -1102,14 +1102,14 @@ public: columnBorderType = _columnBorderType; } - void operator () ( const BlockedRange& range ) const + void operator () ( const Range& range ) const { - int row0 = std::min(cvRound(range.begin() * src.rows / nStripes), src.rows); - int row1 = std::min(cvRound(range.end() * src.rows / nStripes), src.rows); + int row0 = std::min(cvRound(range.start * src.rows / nStripes), src.rows); + int row1 = std::min(cvRound(range.end * src.rows / nStripes), src.rows); /*if(0) printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", - src.rows, src.cols, range.begin(), range.end(), row0, row1);*/ + src.rows, src.cols, range.start, range.end, row0, row1);*/ Mat srcStripe = src.rowRange(row0, row1); Mat dstStripe = dst.rowRange(row0, row1); @@ -1173,15 +1173,15 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, } int nStripes = 1; -#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION +#if defined HAVE_TEGRA_OPTIMIZATION if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types src.rows >= 64 ) //NOTE: just heuristics nStripes = 4; #endif - parallel_for(BlockedRange(0, nStripes), - MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); + parallel_for_(Range(0, nStripes), + MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); //Ptr f = createMorphologyFilter(op, src.type(), // kernel, anchor, borderType, borderType, borderValue ); diff --git a/modules/ml/src/ann_mlp.cpp b/modules/ml/src/ann_mlp.cpp index 438872ae8c..bf85425b9c 100644 --- a/modules/ml/src/ann_mlp.cpp +++ b/modules/ml/src/ann_mlp.cpp @@ -40,10 +40,6 @@ #include "precomp.hpp" -#ifdef HAVE_TBB -#include -#endif - CvANN_MLP_TrainParams::CvANN_MLP_TrainParams() { term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 ); @@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw ) return iter; } -struct rprop_loop { +struct rprop_loop : cv::ParallelLoopBody { rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0, int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count, CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz) @@ -1063,7 +1059,7 @@ struct rprop_loop { int buf_sz; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { double* buf_ptr; double** x = 0; @@ -1084,7 +1080,7 @@ struct rprop_loop { buf_ptr += (df[i] - x[i])*2; } - for(int si = range.begin(); si < range.end(); si++ ) + for(int si = range.start; si < range.end; si++ ) { if (si % dcount0 != 0) continue; int n1, n2, k; @@ -1170,36 +1166,33 @@ struct rprop_loop { } // backward pass, update dEdw - #ifdef HAVE_TBB - static tbb::spin_mutex mutex; - tbb::spin_mutex::scoped_lock lock; - #endif + static cv::Mutex mutex; + for(int i = l_count-1; i > 0; i-- ) { n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] ); cvMul( grad1, &_df, grad1 ); - #ifdef HAVE_TBB - lock.acquire(mutex); - #endif - cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); - cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); - cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); - - // update bias part of dEdw - for( k = 0; k < dcount; k++ ) - { - double* dst = _dEdw.data.db + n1*n2; - const double* src = grad1->data.db + k*n2; - for(int j = 0; j < n2; j++ ) - dst[j] += src[j]; + + { + cv::AutoLock lock(mutex); + cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); + cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); + cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); + + // update bias part of dEdw + for( k = 0; k < dcount; k++ ) + { + double* dst = _dEdw.data.db + n1*n2; + const double* src = grad1->data.db + k*n2; + for(int j = 0; j < n2; j++ ) + dst[j] += src[j]; + } + + if (i > 1) + cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); } - if (i > 1) - cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); - #ifdef HAVE_TBB - lock.release(); - #endif cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db ); if( i > 1 ) cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); @@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw ) double E = 0; // first, iterate through all the samples and compute dEdw - cv::parallel_for(cv::BlockedRange(0, count), + cv::parallel_for_(cv::Range(0, count), rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes, ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz) ); diff --git a/modules/ml/src/gbt.cpp b/modules/ml/src/gbt.cpp index f2e54524a8..42d0d4f3af 100644 --- a/modules/ml/src/gbt.cpp +++ b/modules/ml/src/gbt.cpp @@ -884,7 +884,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing, } -class Tree_predictor +class Tree_predictor : public cv::ParallelLoopBody { private: pCvSeq* weak; @@ -894,9 +894,7 @@ private: const CvMat* missing; const float shrinkage; -#ifdef HAVE_TBB - static tbb::spin_mutex SumMutex; -#endif + static cv::Mutex SumMutex; public: @@ -915,14 +913,11 @@ public: Tree_predictor& operator=( const Tree_predictor& ) { return *this; } - virtual void operator()(const cv::BlockedRange& range) const + virtual void operator()(const cv::Range& range) const { -#ifdef HAVE_TBB - tbb::spin_mutex::scoped_lock lock; -#endif CvSeqReader reader; - int begin = range.begin(); - int end = range.end(); + int begin = range.start; + int end = range.end; int weak_count = end - begin; CvDTree* tree; @@ -940,13 +935,11 @@ public: tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value); } } -#ifdef HAVE_TBB - lock.acquire(SumMutex); - sum[i] += tmp_sum; - lock.release(); -#else - sum[i] += tmp_sum; -#endif + + { + cv::AutoLock lock(SumMutex); + sum[i] += tmp_sum; + } } } // Tree_predictor::operator() @@ -954,11 +947,7 @@ public: }; // class Tree_predictor - -#ifdef HAVE_TBB -tbb::spin_mutex Tree_predictor::SumMutex; -#endif - +cv::Mutex Tree_predictor::SumMutex; float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing, @@ -976,12 +965,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing, Tree_predictor predictor = Tree_predictor(weak_seq, class_count, params.shrinkage, _sample, _missing, sum); -//#ifdef HAVE_TBB -// tbb::parallel_for(cv::BlockedRange(begin, end), predictor, -// tbb::auto_partitioner()); -//#else - cv::parallel_for(cv::BlockedRange(begin, end), predictor); -//#endif + cv::parallel_for_(cv::Range(begin, end), predictor); for (int i=0; i *resp ) Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(), _data->get_missing(), _sample_idx); -//#ifdef HAVE_TBB -// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner()); -//#else - cv::parallel_for(cv::BlockedRange(0,n), predictor); -//#endif + cv::parallel_for_(cv::Range(0,n), predictor); int* sidx = _sample_idx ? _sample_idx->data.i : 0; int r_step = CV_IS_MAT_CONT(response->type) ? diff --git a/modules/ml/src/knearest.cpp b/modules/ml/src/knearest.cpp index 3c2f9ebada..6b6f5e6afa 100644 --- a/modules/ml/src/knearest.cpp +++ b/modules/ml/src/knearest.cpp @@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end, return result; } -struct P1 { +struct P1 : cv::ParallelLoopBody { P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors, int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result) { @@ -333,10 +333,10 @@ struct P1 { float* result; int buf_sz; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { cv::AutoBuffer buf(buf_sz); - for(int i = range.begin(); i < range.end(); i += 1 ) + for(int i = range.start; i < range.end; i += 1 ) { float* neighbor_responses = &buf[0]; float* dist = neighbor_responses + 1*k; @@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results, int k1 = get_sample_count(); k1 = MIN( k1, k ); - cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, - _results, _neighbor_responses, _dist, &result) + cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, + _results, _neighbor_responses, _dist, &result) ); return result; diff --git a/modules/ml/src/nbayes.cpp b/modules/ml/src/nbayes.cpp index 15146d6f4e..f1f7a24ec0 100644 --- a/modules/ml/src/nbayes.cpp +++ b/modules/ml/src/nbayes.cpp @@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res return result; } -struct predict_body { +struct predict_body : cv::ParallelLoopBody { predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg, const CvMat* _samples, const int* _vidx, CvMat* _cls_labels, CvMat* _results, float* _value, int _var_count1 @@ -307,7 +307,7 @@ struct predict_body { float* value; int var_count1; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { int cls = -1; @@ -324,7 +324,7 @@ struct predict_body { cv::AutoBuffer buffer(nclasses + var_count1); CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] ); - for(int k = range.begin(); k < range.end(); k += 1 ) + for(int k = range.start; k < range.end; k += 1 ) { int ival; double opt = FLT_MAX; @@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c const int* vidx = var_idx ? var_idx->data.i : 0; - cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, - vidx, cls_labels, results, &value, var_count - )); + cv::parallel_for_(cv::Range(0, samples->rows), + predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, + vidx, cls_labels, results, &value, var_count)); return value; } diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 7884ef5ee6..581abb61be 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -2192,7 +2192,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const return result; } -struct predict_body_svm { +struct predict_body_svm : ParallelLoopBody { predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results) { pointer = _pointer; @@ -2206,9 +2206,9 @@ struct predict_body_svm { const CvMat* samples; CvMat* results; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { - for(int i = range.begin(); i < range.end(); i++ ) + for(int i = range.start; i < range.end; i++ ) { CvMat sample; cvGetRow( samples, &sample, i ); @@ -2224,7 +2224,7 @@ struct predict_body_svm { float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const { float result = 0; - cv::parallel_for(cv::BlockedRange(0, samples->rows), + cv::parallel_for_(cv::Range(0, samples->rows), predict_body_svm(this, &result, samples, results) ); return result; diff --git a/modules/nonfree/src/surf.cpp b/modules/nonfree/src/surf.cpp index 34d7b96f45..98d1449828 100644 --- a/modules/nonfree/src/surf.cpp +++ b/modules/nonfree/src/surf.cpp @@ -258,7 +258,7 @@ interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt ) } // Multi-threaded construction of the scale-space pyramid -struct SURFBuildInvoker +struct SURFBuildInvoker : ParallelLoopBody { SURFBuildInvoker( const Mat& _sum, const std::vector& _sizes, const std::vector& _sampleSteps, @@ -271,9 +271,9 @@ struct SURFBuildInvoker traces = &_traces; } - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - for( int i=range.begin(); i& _dets, const std::vector& _traces, @@ -310,9 +310,9 @@ struct SURFFindInvoker const std::vector& sizes, std::vector& keypoints, int octave, int layer, float hessianThreshold, int sampleStep ); - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - for( int i=range.begin(); i& o int stripCount, stripSize; - #ifdef HAVE_TBB const int PTS_PER_THREAD = 1000; stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD; stripCount = std::min(std::max(stripCount, 1), 100); stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep; - #else - stripCount = 1; - stripSize = processingRectSize.height; - #endif if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates, rejectLevels, levelWeights, outputRejectLevels ) ) diff --git a/modules/objdetect/src/latentsvm.cpp b/modules/objdetect/src/latentsvm.cpp index 641e634a6b..613da1ceaa 100644 --- a/modules/objdetect/src/latentsvm.cpp +++ b/modules/objdetect/src/latentsvm.cpp @@ -582,7 +582,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, // For each component perform searching for (i = 0; i < kComponents; i++) { -#ifdef HAVE_TBB int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], b[i], maxXBorder, maxYBorder, scoreThreshold, &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), @@ -598,13 +597,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, free(partsDisplacementArr); return LATENT_SVM_SEARCH_OBJECT_FAILED; } -#else - (void)numThreads; - searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], - b[i], maxXBorder, maxYBorder, scoreThreshold, - &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), - &(scoreArr[i]), &(partsDisplacementArr[i])); -#endif estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i])); componentIndex += (kPartFilters[i] + 1); diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp index 0df96dbf14..45862266df 100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@ -121,8 +121,9 @@ namespace cv CV_EXPORTS void setBinpath(const char *path); //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue + //returns cl_context * CV_EXPORTS void* getoclContext(); - + //returns cl_command_queue * CV_EXPORTS void* getoclCommandQueue(); //explicit call clFinish. The global command queue will be used. @@ -460,6 +461,7 @@ namespace cv // support all C1 types CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); + CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf); //! finds global minimum and maximum array elements and returns their values with locations // support all C1 types @@ -808,7 +810,11 @@ namespace cv CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum); CV_EXPORTS void integral(const oclMat &src, oclMat &sum); CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// @@ -826,13 +832,14 @@ namespace cv }; #endif +#if 0 class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier { public: OclCascadeClassifierBuf() : m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {} - ~OclCascadeClassifierBuf() {} + ~OclCascadeClassifierBuf() { release(); } void detectMultiScale(oclMat &image, CV_OUT std::vector& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, @@ -864,6 +871,7 @@ namespace cv oclMat gimg1, gsum, gsqsum; void * buffers; }; +#endif /////////////////////////////// Pyramid ///////////////////////////////////// CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst); @@ -1388,6 +1396,51 @@ namespace cv explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {} }; + class CV_EXPORTS GoodFeaturesToTrackDetector_OCL + { + public: + explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, + int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); + + //! return 1 rows matrix with CV_32FC2 type + void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat()); + //! download points of type Point2f to a vector. the vector's content will be erased + void downloadPoints(const oclMat &points, std::vector &points_v); + + int maxCorners; + double qualityLevel; + double minDistance; + + int blockSize; + bool useHarrisDetector; + double harrisK; + void releaseMemory() + { + Dx_.release(); + Dy_.release(); + eig_.release(); + minMaxbuf_.release(); + tmpCorners_.release(); + } + private: + oclMat Dx_; + oclMat Dy_; + oclMat eig_; + oclMat minMaxbuf_; + oclMat tmpCorners_; + }; + + inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, + int blockSize_, bool useHarrisDetector_, double harrisK_) + { + maxCorners = maxCorners_; + qualityLevel = qualityLevel_; + minDistance = minDistance_; + blockSize = blockSize_; + useHarrisDetector = useHarrisDetector_; + harrisK = harrisK_; + } + /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// class CV_EXPORTS PyrLKOpticalFlow diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 355e3b50a6..9daec2eaf9 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -47,7 +47,7 @@ #define __OPENCV_OCL_PRIVATE_UTIL__ #if defined __APPLE__ -#include +#include #else #include #endif @@ -121,6 +121,33 @@ namespace cv cl_mem CV_EXPORTS bindTexture(const oclMat &mat); void CV_EXPORTS releaseTexture(cl_mem& texture); + //Represents an image texture object + class CV_EXPORTS TextureCL + { + public: + TextureCL(cl_mem tex, int r, int c, int t) + : tex_(tex), rows(r), cols(c), type(t) {} + ~TextureCL() + { + openCLFree(tex_); + } + operator cl_mem() + { + return tex_; + } + cl_mem const tex_; + const int rows; + const int cols; + const int type; + private: + //disable assignment + void operator=(const TextureCL&); + }; + // bind oclMat to OpenCL image textures and retunrs an TextureCL object + // note: + // for faster clamping, there is no buffer padding for the constructed texture + Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); + // returns whether the current context supports image2d_t format or not bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); @@ -132,7 +159,7 @@ namespace cv }; template _ty queryDeviceInfo(cl_kernel kernel = NULL); - //info should have been pre-allocated + template<> int CV_EXPORTS queryDeviceInfo(cl_kernel kernel); template<> diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index e69fecd647..4f690e0912 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -48,7 +48,7 @@ ///////////// Lut //////////////////////// PERFTEST(lut) { - Mat src, lut, dst; + Mat src, lut, dst, ocl_dst; ocl::oclMat d_src, d_lut, d_dst; int all_type[] = {CV_8UC1, CV_8UC3}; @@ -77,11 +77,6 @@ PERFTEST(lut) ocl::LUT(d_src, d_lut, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0)); - GPU_ON; ocl::LUT(d_src, d_lut, d_dst); GPU_OFF; @@ -90,9 +85,10 @@ PERFTEST(lut) d_src.upload(src); d_lut.upload(lut); ocl::LUT(d_src, d_lut, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0); } } @@ -101,7 +97,7 @@ PERFTEST(lut) ///////////// Exp //////////////////////// PERFTEST(Exp) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -121,11 +117,6 @@ PERFTEST(Exp) ocl::exp(d_src, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 2)); - GPU_ON; ocl::exp(d_src, d_dst); GPU_OFF; @@ -133,15 +124,17 @@ PERFTEST(Exp) GPU_FULL_ON; d_src.upload(src); ocl::exp(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 2); } } ///////////// LOG //////////////////////// PERFTEST(Log) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -161,11 +154,6 @@ PERFTEST(Log) ocl::log(d_src, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1)); - GPU_ON; ocl::log(d_src, d_dst); GPU_OFF; @@ -173,15 +161,17 @@ PERFTEST(Log) GPU_FULL_ON; d_src.upload(src); ocl::log(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1); } } ///////////// Add //////////////////////// PERFTEST(Add) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_8UC1, CV_32FC1}; @@ -201,6 +191,7 @@ PERFTEST(Add) CPU_ON; add(src1, src2, dst); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -208,11 +199,6 @@ PERFTEST(Add) ocl::add(d_src1, d_src2, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); - GPU_ON; ocl::add(d_src1, d_src2, d_dst); GPU_OFF; @@ -221,8 +207,10 @@ PERFTEST(Add) d_src1.upload(src1); d_src2.upload(src2); ocl::add(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } @@ -231,7 +219,7 @@ PERFTEST(Add) ///////////// Mul //////////////////////// PERFTEST(Mul) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -260,11 +248,6 @@ PERFTEST(Mul) ocl::multiply(d_src1, d_src2, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); - GPU_ON; ocl::multiply(d_src1, d_src2, d_dst); GPU_OFF; @@ -273,8 +256,10 @@ PERFTEST(Mul) d_src1.upload(src1); d_src2.upload(src2); ocl::multiply(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } @@ -283,7 +268,7 @@ PERFTEST(Mul) ///////////// Div //////////////////////// PERFTEST(Div) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -304,6 +289,7 @@ PERFTEST(Div) CPU_ON; divide(src1, src2, dst); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -311,11 +297,6 @@ PERFTEST(Div) ocl::divide(d_src1, d_src2, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1)); - GPU_ON; ocl::divide(d_src1, d_src2, d_dst); GPU_OFF; @@ -324,8 +305,10 @@ PERFTEST(Div) d_src1.upload(src1); d_src2.upload(src2); ocl::divide(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1); } } @@ -334,7 +317,7 @@ PERFTEST(Div) ///////////// Absdiff //////////////////////// PERFTEST(Absdiff) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -355,6 +338,7 @@ PERFTEST(Absdiff) CPU_ON; absdiff(src1, src2, dst); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -362,11 +346,6 @@ PERFTEST(Absdiff) ocl::absdiff(d_src1, d_src2, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); - GPU_ON; ocl::absdiff(d_src1, d_src2, d_dst); GPU_OFF; @@ -375,8 +354,10 @@ PERFTEST(Absdiff) d_src1.upload(src1); d_src2.upload(src2); ocl::absdiff(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } @@ -385,7 +366,7 @@ PERFTEST(Absdiff) ///////////// CartToPolar //////////////////////// PERFTEST(CartToPolar) { - Mat src1, src2, dst, dst1; + Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1; ocl::oclMat d_src1, d_src2, d_dst, d_dst1; int all_type[] = {CV_32FC1}; @@ -408,6 +389,7 @@ PERFTEST(CartToPolar) CPU_ON; cartToPolar(src1, src2, dst, dst1, 1); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -415,14 +397,6 @@ PERFTEST(CartToPolar) ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - cv::Mat ocl_mat_dst1; - d_dst1.download(ocl_mat_dst1); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5)); - GPU_ON; ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); GPU_OFF; @@ -431,9 +405,15 @@ PERFTEST(CartToPolar) d_src1.upload(src1); d_src2.upload(src2); ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - d_dst.download(dst); - d_dst1.download(dst1); + d_dst.download(ocl_dst); + d_dst1.download(ocl_dst1); GPU_FULL_OFF; + + double diff1 = checkNorm(ocl_dst1, dst1); + double diff2 = checkNorm(ocl_dst, dst); + double max_diff = max(diff1, diff2); + TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff); + } } @@ -442,7 +422,7 @@ PERFTEST(CartToPolar) ///////////// PolarToCart //////////////////////// PERFTEST(PolarToCart) { - Mat src1, src2, dst, dst1; + Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1; ocl::oclMat d_src1, d_src2, d_dst, d_dst1; int all_type[] = {CV_32FC1}; @@ -472,14 +452,6 @@ PERFTEST(PolarToCart) ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - cv::Mat ocl_mat_dst1; - d_dst1.download(ocl_mat_dst1); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5)); - GPU_ON; ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); GPU_OFF; @@ -488,9 +460,15 @@ PERFTEST(PolarToCart) d_src1.upload(src1); d_src2.upload(src2); ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - d_dst.download(dst); - d_dst1.download(dst1); + d_dst.download(ocl_dst); + d_dst1.download(ocl_dst1); GPU_FULL_OFF; + + double diff1 = checkNorm(ocl_dst1, dst1); + double diff2 = checkNorm(ocl_dst, dst); + double max_diff = max(diff1, diff2); + TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff); + } } @@ -499,7 +477,7 @@ PERFTEST(PolarToCart) ///////////// Magnitude //////////////////////// PERFTEST(magnitude) { - Mat x, y, mag; + Mat x, y, mag, ocl_mag; ocl::oclMat d_x, d_y, d_mag; int all_type[] = {CV_32FC1}; @@ -526,11 +504,6 @@ PERFTEST(magnitude) ocl::magnitude(d_x, d_y, d_mag); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_mag.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, mag, 1e-5)); - GPU_ON; ocl::magnitude(d_x, d_y, d_mag); GPU_OFF; @@ -539,8 +512,10 @@ PERFTEST(magnitude) d_x.upload(x); d_y.upload(y); ocl::magnitude(d_x, d_y, d_mag); - d_mag.download(mag); + d_mag.download(ocl_mag); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_mag, mag, 1e-5); } } @@ -549,7 +524,7 @@ PERFTEST(magnitude) ///////////// Transpose //////////////////////// PERFTEST(Transpose) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -575,11 +550,6 @@ PERFTEST(Transpose) ocl::transpose(d_src, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5)); - GPU_ON; ocl::transpose(d_src, d_dst); GPU_OFF; @@ -587,8 +557,10 @@ PERFTEST(Transpose) GPU_FULL_ON; d_src.upload(src); ocl::transpose(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } @@ -597,7 +569,7 @@ PERFTEST(Transpose) ///////////// Flip //////////////////////// PERFTEST(Flip) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -623,11 +595,6 @@ PERFTEST(Flip) ocl::flip(d_src, d_dst, 0); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5)); - GPU_ON; ocl::flip(d_src, d_dst, 0); GPU_OFF; @@ -635,8 +602,10 @@ PERFTEST(Flip) GPU_FULL_ON; d_src.upload(src); ocl::flip(d_src, d_dst, 0); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } @@ -671,7 +640,10 @@ PERFTEST(minMax) ocl::minMax(d_src, &min_val_, &max_val_); WARMUP_OFF; - TestSystem::instance().setAccurate(EeceptDoubleEQ(max_val_, max_val)&&EeceptDoubleEQ(min_val_, min_val)); + if(EeceptDoubleEQ(max_val_, max_val) && EeceptDoubleEQ(min_val_, min_val)) + TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val))); + else + TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val))); GPU_ON; ocl::minMax(d_src, &min_val, &max_val); @@ -724,8 +696,6 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } if(src.depth() == 1) { @@ -733,8 +703,6 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } if(src.depth() == 2) { @@ -742,8 +710,6 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } if(src.depth() == 3) { @@ -751,8 +717,6 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } if(src.depth() == 4) { @@ -760,8 +724,6 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } if(src.depth() == 5) { @@ -769,8 +731,6 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } if(src.depth() == 6) { @@ -778,16 +738,16 @@ PERFTEST(minMaxLoc) minlocVal_ = src.at(min_loc_); maxlocVal = src.at(max_loc); maxlocVal_ = src.at(max_loc_); - error0 = ::abs(src.at(min_loc_) - src.at(min_loc)); - error1 = ::abs(src.at(max_loc_) - src.at(max_loc)); } - - TestSystem::instance().setAccurate(EeceptDoubleEQ(error1, 0.0) - &&EeceptDoubleEQ(error0, 0.0) - &&EeceptDoubleEQ(maxlocVal_, maxlocVal) + error0 = ::abs(minlocVal_ - minlocVal); + error1 = ::abs(maxlocVal_ - maxlocVal); + if( EeceptDoubleEQ(maxlocVal_, maxlocVal) &&EeceptDoubleEQ(minlocVal_, minlocVal) &&EeceptDoubleEQ(max_val_, max_val) - &&EeceptDoubleEQ(min_val_, min_val)); + &&EeceptDoubleEQ(min_val_, min_val)) + TestSystem::instance().setAccurate(1, 0.); + else + TestSystem::instance().setAccurate(0, max(error0, error1)); GPU_ON; ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); @@ -831,11 +791,13 @@ PERFTEST(Sum) gpures = ocl::sum(d_src); WARMUP_OFF; - TestSystem::instance().setAccurate(ExceptDoubleNear(cpures[3], gpures[3], 0.1) - &&ExceptDoubleNear(cpures[2], gpures[2], 0.1) - &&ExceptDoubleNear(cpures[1], gpures[1], 0.1) - &&ExceptDoubleNear(cpures[0], gpures[0], 0.1)); - + vector diffs(4); + diffs[3] = fabs(cpures[3] - gpures[3]); + diffs[2] = fabs(cpures[2] - gpures[2]); + diffs[1] = fabs(cpures[1] - gpures[1]); + diffs[0] = fabs(cpures[0] - gpures[0]); + double max_diff = *max_element(diffs.begin(), diffs.end()); + TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff); GPU_ON; gpures = ocl::sum(d_src); @@ -879,7 +841,11 @@ PERFTEST(countNonZero) gpures = ocl::countNonZero(d_src); WARMUP_OFF; - TestSystem::instance().setAccurate((EeceptDoubleEQ((double)cpures, (double)gpures))); + int diff = abs(cpures - gpures); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); GPU_ON; ocl::countNonZero(d_src); @@ -897,7 +863,7 @@ PERFTEST(countNonZero) ///////////// Phase //////////////////////// PERFTEST(Phase) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_32FC1}; @@ -913,12 +879,12 @@ PERFTEST(Phase) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - phase(src1, src2, dst, 1); CPU_ON; phase(src1, src2, dst, 1); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -926,11 +892,6 @@ PERFTEST(Phase) ocl::phase(d_src1, d_src2, d_dst, 1); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-2)); - GPU_ON; ocl::phase(d_src1, d_src2, d_dst, 1); GPU_OFF; @@ -939,8 +900,10 @@ PERFTEST(Phase) d_src1.upload(src1); d_src2.upload(src2); ocl::phase(d_src1, d_src2, d_dst, 1); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2); } } @@ -949,7 +912,7 @@ PERFTEST(Phase) ///////////// bitwise_and//////////////////////// PERFTEST(bitwise_and) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_8UC1, CV_32SC1}; @@ -965,7 +928,6 @@ PERFTEST(bitwise_and) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - bitwise_and(src1, src2, dst); CPU_ON; @@ -978,11 +940,6 @@ PERFTEST(bitwise_and) ocl::bitwise_and(d_src1, d_src2, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); - GPU_ON; ocl::bitwise_and(d_src1, d_src2, d_dst); GPU_OFF; @@ -991,8 +948,10 @@ PERFTEST(bitwise_and) d_src1.upload(src1); d_src2.upload(src2); ocl::bitwise_and(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } @@ -1001,7 +960,7 @@ PERFTEST(bitwise_and) ///////////// bitwise_not//////////////////////// PERFTEST(bitwise_not) { - Mat src1, dst; + Mat src1, dst, ocl_dst; ocl::oclMat d_src1, d_dst; int all_type[] = {CV_8UC1, CV_32SC1}; @@ -1016,7 +975,6 @@ PERFTEST(bitwise_not) gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - bitwise_not(src1, dst); CPU_ON; @@ -1028,11 +986,6 @@ PERFTEST(bitwise_not) ocl::bitwise_not(d_src1, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); - GPU_ON; ocl::bitwise_not(d_src1, d_dst); GPU_OFF; @@ -1040,8 +993,10 @@ PERFTEST(bitwise_not) GPU_FULL_ON; d_src1.upload(src1); ocl::bitwise_not(d_src1, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } @@ -1050,7 +1005,7 @@ PERFTEST(bitwise_not) ///////////// compare//////////////////////// PERFTEST(compare) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int CMP_EQ = 0; @@ -1067,12 +1022,12 @@ PERFTEST(compare) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - compare(src1, src2, dst, CMP_EQ); CPU_ON; compare(src1, src2, dst, CMP_EQ); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -1080,11 +1035,6 @@ PERFTEST(compare) ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0)); - GPU_ON; ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); GPU_OFF; @@ -1093,8 +1043,10 @@ PERFTEST(compare) d_src1.upload(src1); d_src2.upload(src2); ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } @@ -1103,7 +1055,7 @@ PERFTEST(compare) ///////////// pow //////////////////////// PERFTEST(pow) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_32FC1}; @@ -1129,11 +1081,6 @@ PERFTEST(pow) ocl::pow(d_src, -2.0, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0)); - GPU_ON; ocl::pow(d_src, -2.0, d_dst); GPU_OFF; @@ -1141,8 +1088,10 @@ PERFTEST(pow) GPU_FULL_ON; d_src.upload(src); ocl::pow(d_src, -2.0, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } @@ -1151,7 +1100,7 @@ PERFTEST(pow) ///////////// MagnitudeSqr//////////////////////// PERFTEST(MagnitudeSqr) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_32FC1}; @@ -1167,53 +1116,36 @@ PERFTEST(MagnitudeSqr) gen(src2, size, size, all_type[t], 0, 256); gen(dst, size, size, all_type[t], 0, 256); - + CPU_ON; for (int i = 0; i < src1.rows; ++i) - for (int j = 0; j < src1.cols; ++j) { float val1 = src1.at(i, j); float val2 = src2.at(i, j); - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; } + CPU_OFF; - CPU_ON; - - for (int i = 0; i < src1.rows; ++i) - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at(i, j); - float val2 = src2.at(i, j); - - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - - } - - CPU_OFF; - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - WARMUP_OFF; + d_src1.upload(src1); + d_src2.upload(src2); - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); + WARMUP_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0)); + GPU_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + GPU_OFF; - GPU_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - GPU_OFF; + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + d_dst.download(ocl_dst); + GPU_FULL_OFF; - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } @@ -1222,7 +1154,7 @@ PERFTEST(MagnitudeSqr) ///////////// AddWeighted//////////////////////// PERFTEST(AddWeighted) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; double alpha = 2.0, beta = 1.0, gama = 3.0; @@ -1252,11 +1184,6 @@ PERFTEST(AddWeighted) ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); WARMUP_OFF; - cv::Mat ocl_mat_dst; - d_dst.download(ocl_mat_dst); - - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5)); - GPU_ON; ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); GPU_OFF; @@ -1265,8 +1192,10 @@ PERFTEST(AddWeighted) d_src1.upload(src1); d_src2.upload(src2); ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index 6dda464bd7..8ebb6482ba 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -71,7 +71,7 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we } PERFTEST(blend) { - Mat src1, src2, weights1, weights2, dst; + Mat src1, src2, weights1, weights2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -103,10 +103,6 @@ PERFTEST(blend) ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); WARMUP_OFF; - cv::Mat ocl_mat; - d_dst.download(ocl_mat); - TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 1.f)); - GPU_ON; ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); GPU_OFF; @@ -117,8 +113,10 @@ PERFTEST(blend) d_weights1.upload(weights1); d_weights2.upload(weights2); ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f); } } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp index ba87bd8924..406b46a324 100644 --- a/modules/ocl/perf/perf_brute_force_matcher.cpp +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -88,9 +88,6 @@ PERFTEST(BruteForceMatcher) d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); WARMUP_OFF; - d_matcher.match(d_query, d_train, d_matches[0]); - TestSystem::instance().setAccurate(AssertEQ(d_matches[0].size(), matches[0].size())); - GPU_ON; d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); GPU_OFF; @@ -98,9 +95,15 @@ PERFTEST(BruteForceMatcher) GPU_FULL_ON; d_query.upload(query); d_train.upload(train); - d_matcher.match(d_query, d_train, matches[0]); + d_matcher.match(d_query, d_train, d_matches[0]); GPU_FULL_OFF; + int diff = abs((int)d_matches[0].size() - (int)matches[0].size()); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); + SUBTEST << size << "; knnMatch"; matcher.knnMatch(query, train, matches, 2); @@ -123,7 +126,11 @@ PERFTEST(BruteForceMatcher) d_matcher.knnMatch(d_query, d_train, d_matches, 2); GPU_FULL_OFF; - TestSystem::instance().setAccurate(AssertEQ(d_matches[0].size(), matches[0].size())); + diff = abs((int)d_matches[0].size() - (int)matches[0].size()); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); SUBTEST << size << "; radiusMatch"; @@ -151,6 +158,10 @@ PERFTEST(BruteForceMatcher) d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance); GPU_FULL_OFF; - TestSystem::instance().setAccurate(AssertEQ(d_matches[0].size(), matches[0].size())); + diff = abs((int)d_matches[0].size() - (int)matches[0].size()); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index e9c09756e0..8fc0d13ccd 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -57,7 +57,7 @@ PERFTEST(Canny) SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; - Mat edges(img.size(), CV_8UC1); + Mat edges(img.size(), CV_8UC1), ocl_edges; CPU_ON; Canny(img, edges, 50.0, 100.0); @@ -71,8 +71,6 @@ PERFTEST(Canny) ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); WARMUP_OFF; - TestSystem::instance().setAccurate(ExceptedMatSimilar(edges, d_edges, 2e-2)); - GPU_ON; ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); GPU_OFF; @@ -80,6 +78,8 @@ PERFTEST(Canny) GPU_FULL_ON; d_img.upload(img); ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - d_edges.download(edges); + d_edges.download(ocl_edges); GPU_FULL_OFF; + + TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2); } \ No newline at end of file diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp index 1faef4081f..44dc8f855c 100644 --- a/modules/ocl/perf/perf_color.cpp +++ b/modules/ocl/perf/perf_color.cpp @@ -48,7 +48,7 @@ ///////////// cvtColor//////////////////////// PERFTEST(cvtColor) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC4}; @@ -73,10 +73,6 @@ PERFTEST(cvtColor) ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4); WARMUP_OFF; - cv::Mat ocl_mat; - d_dst.download(ocl_mat); - TestSystem::instance().setAccurate(ExceptedMatSimilar(dst, ocl_mat, 1e-5)); - GPU_ON; ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4); GPU_OFF; @@ -84,8 +80,10 @@ PERFTEST(cvtColor) GPU_FULL_ON; d_src.upload(src); ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5); } diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_columnsum.cpp index a07af17793..ff7ebcd1de 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_columnsum.cpp @@ -48,7 +48,7 @@ ///////////// columnSum//////////////////////// PERFTEST(columnSum) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -63,23 +63,16 @@ PERFTEST(columnSum) dst.at(0, j) = src.at(0, j); for (int i = 1; i < src.rows; ++i) - {for (int j = 0; j < src.cols; ++j) - { + for (int j = 0; j < src.cols; ++j) dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); - } - } - CPU_OFF; d_src.upload(src); + WARMUP_ON; ocl::columnSum(d_src, d_dst); WARMUP_OFF; - cv::Mat ocl_mat; - d_dst.download(ocl_mat); - TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 5e-1)); - GPU_ON; ocl::columnSum(d_src, d_dst); GPU_OFF; @@ -87,7 +80,9 @@ PERFTEST(columnSum) GPU_FULL_ON; d_src.upload(src); ocl::columnSum(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index 49c88821dd..6e0be3f19d 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -48,7 +48,7 @@ ///////////// dft //////////////////////// PERFTEST(dft) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_32FC2}; @@ -74,8 +74,6 @@ PERFTEST(dft) ocl::dft(d_src, d_dst, Size(size, size)); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), src.size().area() * 1e-4)); - GPU_ON; ocl::dft(d_src, d_dst, Size(size, size)); GPU_OFF; @@ -83,8 +81,10 @@ PERFTEST(dft) GPU_FULL_ON; d_src.upload(src); ocl::dft(d_src, d_dst, Size(size, size)); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4); } } diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index c1cf19eefc..c8c840d0e8 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -48,7 +48,7 @@ ///////////// Blur//////////////////////// PERFTEST(Blur) { - Mat src1, dst; + Mat src1, dst, ocl_dst; ocl::oclMat d_src1, d_dst; Size ksize = Size(3, 3); @@ -65,7 +65,6 @@ PERFTEST(Blur) gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - blur(src1, dst, ksize, Point(-1, -1), bordertype); CPU_ON; @@ -78,8 +77,6 @@ PERFTEST(Blur) ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0)); - GPU_ON; ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); GPU_OFF; @@ -87,8 +84,10 @@ PERFTEST(Blur) GPU_FULL_ON; d_src1.upload(src1); ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } @@ -96,7 +95,7 @@ PERFTEST(Blur) ///////////// Laplacian//////////////////////// PERFTEST(Laplacian) { - Mat src1, dst; + Mat src1, dst, ocl_dst; ocl::oclMat d_src1, d_dst; int ksize = 3; @@ -112,7 +111,6 @@ PERFTEST(Laplacian) gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - Laplacian(src1, dst, -1, ksize, 1); CPU_ON; @@ -125,8 +123,6 @@ PERFTEST(Laplacian) ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5)); - GPU_ON; ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); GPU_OFF; @@ -134,8 +130,10 @@ PERFTEST(Laplacian) GPU_FULL_ON; d_src1.upload(src1); ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } @@ -144,7 +142,7 @@ PERFTEST(Laplacian) ///////////// Erode //////////////////// PERFTEST(Erode) { - Mat src, dst, ker; + Mat src, dst, ker, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; @@ -171,8 +169,6 @@ PERFTEST(Erode) ocl::erode(d_src, d_dst, ker); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5)); - GPU_ON; ocl::erode(d_src, d_dst, ker); GPU_OFF; @@ -180,8 +176,10 @@ PERFTEST(Erode) GPU_FULL_ON; d_src.upload(src); ocl::erode(d_src, d_dst, ker); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } @@ -190,7 +188,7 @@ PERFTEST(Erode) ///////////// Sobel //////////////////////// PERFTEST(Sobel) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int dx = 1; @@ -218,8 +216,6 @@ PERFTEST(Sobel) ocl::Sobel(d_src, d_dst, -1, dx, dy); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1)); - GPU_ON; ocl::Sobel(d_src, d_dst, -1, dx, dy); GPU_OFF; @@ -227,8 +223,10 @@ PERFTEST(Sobel) GPU_FULL_ON; d_src.upload(src); ocl::Sobel(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1); } } @@ -236,7 +234,7 @@ PERFTEST(Sobel) ///////////// Scharr //////////////////////// PERFTEST(Scharr) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int dx = 1; @@ -264,8 +262,6 @@ PERFTEST(Scharr) ocl::Scharr(d_src, d_dst, -1, dx, dy); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1)); - GPU_ON; ocl::Scharr(d_src, d_dst, -1, dx, dy); GPU_OFF; @@ -273,8 +269,10 @@ PERFTEST(Scharr) GPU_FULL_ON; d_src.upload(src); ocl::Scharr(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1); } } @@ -283,7 +281,7 @@ PERFTEST(Scharr) ///////////// GaussianBlur //////////////////////// PERFTEST(GaussianBlur) { - Mat src, dst; + Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; @@ -311,9 +309,6 @@ PERFTEST(GaussianBlur) ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0)); - - GPU_ON; ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); GPU_OFF; @@ -321,8 +316,10 @@ PERFTEST(GaussianBlur) GPU_FULL_ON; d_src.upload(src); ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } @@ -349,7 +346,7 @@ PERFTEST(filter2D) Mat kernel; gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); - Mat dst(src); + Mat dst, ocl_dst; dst.setTo(0); cv::filter2D(src, dst, -1, kernel); @@ -357,17 +354,12 @@ PERFTEST(filter2D) cv::filter2D(src, dst, -1, kernel); CPU_OFF; - ocl::oclMat d_src(src); - ocl::oclMat d_dst(d_src); - d_dst.setTo(0); + ocl::oclMat d_src(src), d_dst; WARMUP_ON; ocl::filter2D(d_src, d_dst, -1, kernel); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5)); - - GPU_ON; ocl::filter2D(d_src, d_dst, -1, kernel); GPU_OFF; @@ -375,8 +367,10 @@ PERFTEST(filter2D) GPU_FULL_ON; d_src.upload(src); ocl::filter2D(d_src, d_dst, -1, kernel); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp index 280a0394ce..f197c5f5a0 100644 --- a/modules/ocl/perf/perf_gemm.cpp +++ b/modules/ocl/perf/perf_gemm.cpp @@ -48,7 +48,7 @@ ///////////// gemm //////////////////////// PERFTEST(gemm) { - Mat src1, src2, src3, dst; + Mat src1, src2, src3, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_src3, d_dst; for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -72,7 +72,6 @@ PERFTEST(gemm) WARMUP_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, src1.cols * src1.rows * 1e-4)); GPU_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); @@ -83,7 +82,9 @@ PERFTEST(gemm) d_src2.upload(src2); d_src3.upload(src3); ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4); } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index 348c140934..372949521d 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -125,8 +125,10 @@ PERFTEST(Haar) 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); WARMUP_OFF; - //Testing whether the expected is equal to the actual. - TestSystem::instance().setAccurate(ExpectedEQ::size_type, vector::size_type>(faces.size(), oclfaces.size())); + if(faces.size() == oclfaces.size()) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size())); faces.clear(); diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index c425ef4848..05093811fe 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -146,10 +146,8 @@ PERFTEST(HOG) } } - cv::Mat ocl_mat; - ocl_mat = cv::Mat(d_comp); - ocl_mat.convertTo(ocl_mat, cv::Mat(comp).type()); - TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat, cv::Mat(comp), 3)); + cv::Mat gpu_rst(d_comp), cpu_rst(comp); + TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3); GPU_ON; ocl_hog.detectMultiScale(d_src, found_locations); diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 9bba2b06b7..ccfd18c669 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -48,7 +48,7 @@ ///////////// equalizeHist //////////////////////// PERFTEST(equalizeHist) { - Mat src, dst; + Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1}; std::string type_name[] = {"CV_8UC1"}; @@ -75,9 +75,6 @@ PERFTEST(equalizeHist) ocl::equalizeHist(d_src, d_dst); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.1)); - - GPU_ON; ocl::equalizeHist(d_src, d_dst); GPU_OFF; @@ -85,8 +82,10 @@ PERFTEST(equalizeHist) GPU_FULL_ON; d_src.upload(src); ocl::equalizeHist(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1); } } @@ -94,7 +93,7 @@ PERFTEST(equalizeHist) /////////// CopyMakeBorder ////////////////////// PERFTEST(CopyMakeBorder) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_dst; int bordertype = BORDER_CONSTANT; @@ -122,9 +121,6 @@ PERFTEST(CopyMakeBorder) ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); - - GPU_ON; ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); GPU_OFF; @@ -132,8 +128,10 @@ PERFTEST(CopyMakeBorder) GPU_FULL_ON; d_src.upload(src); ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } @@ -141,7 +139,7 @@ PERFTEST(CopyMakeBorder) ///////////// cornerMinEigenVal //////////////////////// PERFTEST(cornerMinEigenVal) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_dst; int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); @@ -155,7 +153,6 @@ PERFTEST(cornerMinEigenVal) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 256); cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); @@ -170,9 +167,6 @@ PERFTEST(cornerMinEigenVal) ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - - GPU_ON; ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); GPU_OFF; @@ -180,8 +174,10 @@ PERFTEST(cornerMinEigenVal) GPU_FULL_ON; d_src.upload(src); ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } @@ -189,7 +185,7 @@ PERFTEST(cornerMinEigenVal) ///////////// cornerHarris //////////////////////// PERFTEST(cornerHarris) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_32FC1}; @@ -215,8 +211,6 @@ PERFTEST(cornerHarris) ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - GPU_ON; ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); GPU_OFF; @@ -224,8 +218,10 @@ PERFTEST(cornerHarris) GPU_FULL_ON; d_src.upload(src); ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } @@ -234,7 +230,7 @@ PERFTEST(cornerHarris) ///////////// integral //////////////////////// PERFTEST(integral) { - Mat src, sum; + Mat src, sum, ocl_sum; ocl::oclMat d_src, d_sum, d_buf; int all_type[] = {CV_8UC1}; @@ -260,12 +256,6 @@ PERFTEST(integral) ocl::integral(d_src, d_sum); WARMUP_OFF; - cv::Mat ocl_mat; - d_sum.download(ocl_mat); - if(sum.type() == ocl_mat.type()) //we won't test accuracy when cpu function overlow - TestSystem::instance().setAccurate(ExpectedMatNear(sum, ocl_mat, 0.0)); - - GPU_ON; ocl::integral(d_src, d_sum); GPU_OFF; @@ -273,8 +263,12 @@ PERFTEST(integral) GPU_FULL_ON; d_src.upload(src); ocl::integral(d_src, d_sum); - d_sum.download(sum); + d_sum.download(ocl_sum); GPU_FULL_OFF; + + if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow + TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0); + } } @@ -282,7 +276,7 @@ PERFTEST(integral) ///////////// WarpAffine //////////////////////// PERFTEST(WarpAffine) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; static const double coeffs[2][3] = @@ -319,8 +313,6 @@ PERFTEST(WarpAffine) ocl::warpAffine(d_src, d_dst, M, size1, interpolation); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - GPU_ON; ocl::warpAffine(d_src, d_dst, M, size1, interpolation); GPU_OFF; @@ -328,8 +320,10 @@ PERFTEST(WarpAffine) GPU_FULL_ON; d_src.upload(src); ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } @@ -337,7 +331,7 @@ PERFTEST(WarpAffine) ///////////// WarpPerspective //////////////////////// PERFTEST(WarpPerspective) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; static const double coeffs[3][3] = @@ -374,8 +368,6 @@ PERFTEST(WarpPerspective) ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - GPU_ON; ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); GPU_OFF; @@ -383,8 +375,10 @@ PERFTEST(WarpPerspective) GPU_FULL_ON; d_src.upload(src); ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } @@ -393,7 +387,7 @@ PERFTEST(WarpPerspective) ///////////// resize //////////////////////// PERFTEST(resize) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; @@ -420,9 +414,6 @@ PERFTEST(resize) ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - - GPU_ON; ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); GPU_OFF; @@ -430,8 +421,10 @@ PERFTEST(resize) GPU_FULL_ON; d_src.upload(src); ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } @@ -456,8 +449,6 @@ PERFTEST(resize) ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - GPU_ON; ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); GPU_OFF; @@ -465,8 +456,10 @@ PERFTEST(resize) GPU_FULL_ON; d_src.upload(src); ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } @@ -474,10 +467,9 @@ PERFTEST(resize) ///////////// threshold//////////////////////// PERFTEST(threshold) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; - for (int size = Min_Size; size <= Max_Size; size *= Multiple) { SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; @@ -496,9 +488,6 @@ PERFTEST(threshold) ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - - GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); GPU_OFF; @@ -506,9 +495,10 @@ PERFTEST(threshold) GPU_FULL_ON; d_src.upload(src); ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -529,8 +519,6 @@ PERFTEST(threshold) ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); GPU_OFF; @@ -538,8 +526,10 @@ PERFTEST(threshold) GPU_FULL_ON; d_src.upload(src); ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } ///////////// meanShiftFiltering//////////////////////// @@ -726,7 +716,7 @@ void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::T PERFTEST(meanShiftFiltering) { int sp = 5, sr = 6; - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; @@ -753,11 +743,6 @@ PERFTEST(meanShiftFiltering) ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); WARMUP_OFF; - cv::Mat ocl_mat; - d_dst.download(ocl_mat); - - TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 0.0)); - GPU_ON; ocl::meanShiftFiltering(d_src, d_dst, sp, sr); GPU_OFF; @@ -765,8 +750,10 @@ PERFTEST(meanShiftFiltering) GPU_FULL_ON; d_src.upload(src); ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } ///////////// meanShiftProc//////////////////////// @@ -1010,8 +997,9 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, } PERFTEST(meanShiftProc) { - Mat src, dst, dstCoor_roi; - ocl::oclMat d_src, d_dst, d_dstCoor_roi; + Mat src; + vector dst(2), ocl_dst(2); + ocl::oclMat d_src, d_dst, d_dstCoor; TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); @@ -1020,42 +1008,41 @@ PERFTEST(meanShiftProc) SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); + gen(dst[0], size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + gen(dst[1], size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); + meanShiftProc_(src, dst[0], dst[1], 5, 6, crit); CPU_ON; - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); + meanShiftProc_(src, dst[0], dst[1], 5, 6, crit); CPU_OFF; d_src.upload(src); WARMUP_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dstCoor_roi, cv::Mat(d_dstCoor_roi), 0.0) - &&ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); - GPU_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - d_dst.download(dst); - d_dstCoor_roi.download(dstCoor_roi); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit); + d_dst.download(ocl_dst[0]); + d_dstCoor.download(ocl_dst[1]); GPU_FULL_OFF; + vector eps(2, 0.); + TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps); } } ///////////// remap//////////////////////// PERFTEST(remap) { - Mat src, dst, xmap, ymap; + Mat src, dst, xmap, ymap, ocl_dst; ocl::oclMat d_src, d_dst, d_xmap, d_ymap; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -1088,7 +1075,6 @@ PERFTEST(remap) } } - remap(src, dst, xmap, ymap, interpolation, borderMode); CPU_ON; @@ -1104,12 +1090,6 @@ PERFTEST(remap) ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); WARMUP_OFF; - if(interpolation == 0) - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0)); - else - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 2.0)); - - GPU_ON; ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); GPU_OFF; @@ -1117,8 +1097,10 @@ PERFTEST(remap) GPU_FULL_ON; d_src.upload(src); ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0); } } diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index 396cb0b1cf..1330e268f8 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -56,11 +56,9 @@ PERFTEST(matchTemplate) { //InitMatchTemplate(); - - Mat src, templ, dst; + Mat src, templ, dst, ocl_dst; int templ_size = 5; - for (int size = Min_Size; size <= Max_Size; size *= Multiple) { int all_type[] = {CV_32FC1, CV_32FC4}; @@ -82,16 +80,12 @@ PERFTEST(matchTemplate) matchTemplate(src, templ, dst, TM_CCORR); CPU_OFF; - ocl::oclMat d_src(src), d_templ, d_dst; - - d_templ.upload(templ); + ocl::oclMat d_src(src), d_templ(templ), d_dst; WARMUP_ON; ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1)); - GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR); GPU_OFF; @@ -100,8 +94,10 @@ PERFTEST(matchTemplate) d_src.upload(src); d_templ.upload(templ); ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1); } } @@ -131,8 +127,6 @@ PERFTEST(matchTemplate) ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1)); - GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED); GPU_OFF; @@ -141,8 +135,10 @@ PERFTEST(matchTemplate) d_src.upload(src); d_templ.upload(templ); ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1); } } } diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index 4b364b01b6..b724cdbe64 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -48,7 +48,7 @@ ///////////// ConvertTo//////////////////////// PERFTEST(ConvertTo) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -77,9 +77,6 @@ PERFTEST(ConvertTo) d_src.convertTo(d_dst, CV_32FC1); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); - - GPU_ON; d_src.convertTo(d_dst, CV_32FC1); GPU_OFF; @@ -87,8 +84,10 @@ PERFTEST(ConvertTo) GPU_FULL_ON; d_src.upload(src); d_src.convertTo(d_dst, CV_32FC1); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } @@ -96,7 +95,7 @@ PERFTEST(ConvertTo) ///////////// copyTo//////////////////////// PERFTEST(copyTo) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -125,9 +124,6 @@ PERFTEST(copyTo) d_src.copyTo(d_dst); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0)); - - GPU_ON; d_src.copyTo(d_dst); GPU_OFF; @@ -135,8 +131,10 @@ PERFTEST(copyTo) GPU_FULL_ON; d_src.upload(src); d_src.copyTo(d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } @@ -144,9 +142,9 @@ PERFTEST(copyTo) ///////////// setTo//////////////////////// PERFTEST(setTo) { - Mat src, dst; + Mat src, ocl_src; Scalar val(1, 2, 3, 4); - ocl::oclMat d_src, d_dst; + ocl::oclMat d_src; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -171,10 +169,10 @@ PERFTEST(setTo) d_src.setTo(val); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_src), 1.0)); + d_src.download(ocl_src); + TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0); - - GPU_ON; + GPU_ON;; d_src.setTo(val); GPU_OFF; diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp index 78ff001248..1d986c8e49 100644 --- a/modules/ocl/perf/perf_norm.cpp +++ b/modules/ocl/perf/perf_norm.cpp @@ -48,39 +48,40 @@ ///////////// norm//////////////////////// PERFTEST(norm) { - Mat src, buf; - ocl::oclMat d_src, d_buf; - + Mat src1, src2, ocl_src1; + ocl::oclMat d_src1, d_src2; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; - gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - norm(src, NORM_INF); + norm(src1, src2, NORM_INF); CPU_ON; - norm(src, NORM_INF); + norm(src1, src2, NORM_INF); CPU_OFF; - d_src.upload(src); - d_buf.upload(buf); + d_src1.upload(src1); + d_src2.upload(src2); WARMUP_ON; - ocl::norm(d_src, d_buf, NORM_INF); + ocl::norm(d_src1, d_src2, NORM_INF); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_buf), .5)); + d_src1.download(ocl_src1); + TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5); GPU_ON; - ocl::norm(d_src, d_buf, NORM_INF); + ocl::norm(d_src1, d_src2, NORM_INF); GPU_OFF; GPU_FULL_ON; - d_src.upload(src); - ocl::norm(d_src, d_buf, NORM_INF); + d_src1.upload(src1); + d_src2.upload(src2); + ocl::norm(d_src1, d_src2, NORM_INF); GPU_FULL_OFF; } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrlk.cpp b/modules/ocl/perf/perf_opticalflow.cpp similarity index 64% rename from modules/ocl/perf/perf_pyrlk.cpp rename to modules/ocl/perf/perf_opticalflow.cpp index 32bf145b9f..9887970204 100644 --- a/modules/ocl/perf/perf_pyrlk.cpp +++ b/modules/ocl/perf/perf_opticalflow.cpp @@ -82,8 +82,8 @@ PERFTEST(PyrLKOpticalFlow) SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; else SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; - Mat nextPts_cpu; - Mat status_cpu; + Mat ocl_nextPts; + Mat ocl_status; vector pts; goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); @@ -116,12 +116,6 @@ PERFTEST(PyrLKOpticalFlow) d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); WARMUP_OFF; - std::vector ocl_nextPts(d_nextPts.cols); - std::vector ocl_status(d_status.cols); - TestSystem::instance().setAccurate(AssertEQ(nextPts.size(), ocl_nextPts.size())); - TestSystem::instance().setAccurate(AssertEQ(status.size(), ocl_status.size())); - - GPU_ON; d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); GPU_OFF; @@ -133,17 +127,102 @@ PERFTEST(PyrLKOpticalFlow) d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); if (!d_nextPts.empty()) - { - d_nextPts.download(nextPts_cpu); - } + d_nextPts.download(ocl_nextPts); if (!d_status.empty()) + d_status.download(ocl_status); + GPU_FULL_OFF; + + size_t mismatch = 0; + for (int i = 0; i < (int)nextPts.size(); ++i) { - d_status.download(status_cpu); + if(status[i] != ocl_status.at(0, i)){ + mismatch++; + continue; + } + if(status[i]){ + Point2f gpu_rst = ocl_nextPts.at(0, i); + Point2f cpu_rst = nextPts[i]; + if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.) + mismatch++; + } } - - GPU_FULL_OFF; + double ratio = (double)mismatch / (double)nextPts.size(); + if(ratio < .02) + TestSystem::instance().setAccurate(1, ratio); + else + TestSystem::instance().setAccurate(0, ratio); } } } + + +PERFTEST(tvl1flow) +{ + cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE); + assert(!frame0.empty()); + + cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE); + assert(!frame1.empty()); + + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1); + cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1); + + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::Mat flow; + + + SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<calc(frame0, frame1, flow); + + CPU_ON; + alg->calc(frame0, frame1, flow); + CPU_OFF; + + cv::Mat gold[2]; + cv::split(flow, gold); + + cv::ocl::oclMat d0(frame0.size(), CV_32FC1); + d0.upload(frame0); + cv::ocl::oclMat d1(frame1.size(), CV_32FC1); + d1.upload(frame1); + + WARMUP_ON; + d_alg(d0, d1, d_flowx, d_flowy); + WARMUP_OFF; +/* + double diff1 = 0.0, diff2 = 0.0; + if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1 + &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1) + TestSystem::instance().setAccurate(1); + else + TestSystem::instance().setAccurate(0); + + TestSystem::instance().setDiff(diff1); + TestSystem::instance().setDiff(diff2); +*/ + + + GPU_ON; + d_alg(d0, d1, d_flowx, d_flowy); + d_alg.collectGarbage(); + GPU_OFF; + + + cv::Mat flowx, flowy; + + GPU_FULL_ON; + d0.upload(frame0); + d1.upload(frame1); + d_alg(d0, d1, d_flowx, d_flowy); + d_alg.collectGarbage(); + d_flowx.download(flowx); + d_flowy.download(flowy); + GPU_FULL_OFF; + + TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3); + TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3); +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrdown.cpp b/modules/ocl/perf/perf_pyramid.cpp similarity index 72% rename from modules/ocl/perf/perf_pyrdown.cpp rename to modules/ocl/perf/perf_pyramid.cpp index 36d2e7ec70..3b96251e5d 100644 --- a/modules/ocl/perf/perf_pyrdown.cpp +++ b/modules/ocl/perf/perf_pyramid.cpp @@ -48,7 +48,7 @@ ///////////// pyrDown ////////////////////// PERFTEST(pyrDown) { - Mat src, dst; + Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -73,9 +73,6 @@ PERFTEST(pyrDown) ocl::pyrDown(d_src, d_dst); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), dst.depth() == CV_32F ? 1e-4f : 1.0f)); - - GPU_ON; ocl::pyrDown(d_src, d_dst); GPU_OFF; @@ -83,8 +80,53 @@ PERFTEST(pyrDown) GPU_FULL_ON; d_src.upload(src); ocl::pyrDown(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f); + } + } +} + +///////////// pyrUp //////////////////////// +PERFTEST(pyrUp) +{ + Mat src, dst, ocl_dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = 500; size <= 2000; size *= 2) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + pyrUp(src, dst); + + CPU_ON; + pyrUp(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::pyrUp(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pyrUp(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pyrUp(d_src, d_dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0)); } } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyrup.cpp deleted file mode 100644 index 3b2022e096..0000000000 --- a/modules/ocl/perf/perf_pyrup.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Fangfang Bai, fangfang@multicorewareinc.com -// Jin Ma, jin@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ -#include "precomp.hpp" - -///////////// pyrUp //////////////////////// -PERFTEST(pyrUp) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 500; size <= 2000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - pyrUp(src, dst); - - CPU_ON; - pyrUp(src, dst); - CPU_OFF; - - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::pyrUp(d_src, d_dst); - WARMUP_OFF; - - TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), (src.depth() == CV_32F ? 1e-4f : 1.0))); - - GPU_ON; - ocl::pyrUp(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pyrUp(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; - } - } -} \ No newline at end of file diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index fc720c5b0d..0fafd14aba 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -48,7 +48,7 @@ ///////////// Merge//////////////////////// PERFTEST(Merge) { - Mat dst; + Mat dst, ocl_dst; ocl::oclMat d_dst; int channels = 4; @@ -85,22 +85,20 @@ PERFTEST(Merge) ocl::merge(d_src, d_dst); WARMUP_OFF; - TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(dst), cv::Mat(d_dst), 0.0)); - GPU_ON; ocl::merge(d_src, d_dst); GPU_OFF; GPU_FULL_ON; - for (int i = 0; i < channels; ++i) { - d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); + d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); } - ocl::merge(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } @@ -122,7 +120,7 @@ PERFTEST(Split) Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); - std::vector dst; + std::vector dst, ocl_dst(4); split(src, dst); @@ -135,22 +133,7 @@ PERFTEST(Split) WARMUP_ON; ocl::split(d_src, d_dst); - WARMUP_OFF; - - if(d_dst.size() == dst.size()) - { - TestSystem::instance().setAccurate(1); - for(size_t i = 0; i < dst.size(); i++) - { - if(ExpectedMatNear(dst[i], cv::Mat(d_dst[i]), 0.0) == 0) - { - TestSystem::instance().setAccurate(0); - break; - } - } - }else - TestSystem::instance().setAccurate(0); - + WARMUP_OFF; GPU_ON; ocl::split(d_src, d_dst); @@ -159,7 +142,12 @@ PERFTEST(Split) GPU_FULL_ON; d_src.upload(src); ocl::split(d_src, d_dst); + for(size_t i = 0; i < dst.size(); i++) + d_dst[i].download(ocl_dst[i]); GPU_FULL_OFF; + + vector eps(4, 0.); + TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps); } } diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index 65f8f8a919..65e2d51816 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -114,7 +114,6 @@ void TestSystem::finishCurrentSubtest() return; } - int is_accurate = is_accurate_; double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; @@ -171,8 +170,8 @@ void TestSystem::finishCurrentSubtest() deviation = std::sqrt(sum / gpu_times_.size()); } - printMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); - writeMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); + printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); + writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); num_subtests_called_++; resetCurrentSubtest(); @@ -219,7 +218,7 @@ void TestSystem::writeHeading() } } - fprintf(record_, "NAME,DESCRIPTION,ACCURACY,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); + fprintf(record_, "NAME,DESCRIPTION,ACCURACY,DIFFERENCE,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); fflush(record_); } @@ -392,7 +391,7 @@ void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, #endif } -void TestSystem::writeMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) +void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) { if (!record_) { @@ -402,21 +401,24 @@ void TestSystem::writeMetrics(int is_accurate, double cpu_time, double gpu_time, string _is_accurate_; - if(is_accurate == 1) + if(is_accurate_ == 1) _is_accurate_ = "Pass"; - else if(is_accurate == 0) + else if(is_accurate_ == 0) _is_accurate_ = "Fail"; - else if(is_accurate == -1) + else if(is_accurate_ == -1) _is_accurate_ = " "; else { - std::cout<<"is_accurate errer: "< 0&&n <= cols * rows); - assert(type == CV_8UC1||type == CV_8UC3||type == CV_8UC4 - ||type == CV_32FC1||type == CV_32FC3||type == CV_32FC4); - - RNG rng; - //generate random position without duplication - std::vector pos; - for(int i = 0; i < cols * rows; i++) - { - pos.push_back(i); - } - - for(int i = 0; i < cols * rows; i++) - { - int temp = i + rng.uniform(0, cols * rows - 1 - i); - int temp1 = pos[temp]; - pos[temp]= pos[i]; - pos[i] = temp1; - } - - std::vector selected_pos; - for(int i = 0; i < n; i++) - { - selected_pos.push_back(pos[i]); - } - - pos.clear(); - //end of generating random y without duplication - - if(type == CV_8UC1) - { - typedef struct coorStruct_ - { - int x; - int y; - uchar xy; - }coorStruct; - - coorStruct coor_struct; - - std::vector coor; - - for(int i = 0; i < n; i++) - { - coor_struct.x = -1; - coor_struct.y = -1; - coor_struct.xy = (uchar)rng.uniform(low, high); - coor.push_back(coor_struct); - } - - for(int i = 0; i < n; i++) - { - coor[i].y = selected_pos[i]/cols; - coor[i].x = selected_pos[i]%cols; - } - selected_pos.clear(); - - mat.create(rows, cols, type); - mat.setTo(0); - - for(int i = 0; i < n; i++) - { - mat.at(coor[i].y, coor[i].x) = coor[i].xy; - } - } - - if(type == CV_8UC4 || type == CV_8UC3) - { - mat.create(rows, cols, type); - mat.setTo(0); - - typedef struct Coor - { - int x; - int y; - - uchar r; - uchar g; - uchar b; - uchar alpha; - }coor; - - std::vector coor_vect; - - coor xy_coor; - - for(int i = 0; i < n; i++) - { - xy_coor.r = (uchar)rng.uniform(low, high); - xy_coor.g = (uchar)rng.uniform(low, high); - xy_coor.b = (uchar)rng.uniform(low, high); - if(type == CV_8UC4) - xy_coor.alpha = (uchar)rng.uniform(low, high); - - coor_vect.push_back(xy_coor); - } - - for(int i = 0; i < n; i++) - { - coor_vect[i].y = selected_pos[i]/((int)mat.step1()/mat.elemSize()); - coor_vect[i].x = selected_pos[i]%((int)mat.step1()/mat.elemSize()); - //printf("coor_vect[%d] = (%d, %d)\n", i, coor_vect[i].y, coor_vect[i].x); - } - - if(type == CV_8UC4) - { - for(int i = 0; i < n; i++) - { - mat.at(coor_vect[i].y, 4 * coor_vect[i].x) = coor_vect[i].r; - mat.at(coor_vect[i].y, 4 * coor_vect[i].x + 1) = coor_vect[i].g; - mat.at(coor_vect[i].y, 4 * coor_vect[i].x + 2) = coor_vect[i].b; - mat.at(coor_vect[i].y, 4 * coor_vect[i].x + 3) = coor_vect[i].alpha; - } - }else if(type == CV_8UC3) - { - for(int i = 0; i < n; i++) - { - mat.at(coor_vect[i].y, 3 * coor_vect[i].x) = coor_vect[i].r; - mat.at(coor_vect[i].y, 3 * coor_vect[i].x + 1) = coor_vect[i].g; - mat.at(coor_vect[i].y, 3 * coor_vect[i].x + 2) = coor_vect[i].b; - } - } - } -} -#endif string abspath(const string &relpath) { @@ -619,31 +493,3 @@ double checkSimilarity(const Mat &m1, const Mat &m2) matchTemplate(m1, m2, diff, TM_CCORR_NORMED); return std::abs(diff.at(0, 0) - 1.f); } - - -int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps) -{ - assert(dst.type() == cpu_dst.type()); - assert(dst.size() == cpu_dst.size()); - if(checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) < eps ||checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) == eps) - return 1; - return 0; -} - -int ExceptDoubleNear(double val1, double val2, double abs_error) -{ - const double diff = fabs(val1 - val2); - if (diff <= abs_error) - return 1; - - return 0; -} - -int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps) -{ - assert(dst.type() == cpu_dst.type()); - assert(dst.size() == cpu_dst.size()); - if(checkSimilarity(cv::Mat(cpu_dst), cv::Mat(dst)) <= eps) - return 1; - return 0; -} diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index 221293982c..385320beea 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -322,9 +322,46 @@ public: itname_changed_ = true; } - void setAccurate(int is_accurate = -1) + void setAccurate(int accurate, double diff) { - is_accurate_ = is_accurate; + is_accurate_ = accurate; + accurate_diff_ = diff; + } + + void ExpectMatsNear(vector& dst, vector& cpu_dst, vector& eps) + { + assert(dst.size() == cpu_dst.size()); + assert(cpu_dst.size() == eps.size()); + is_accurate_ = 1; + for(size_t i=0; i eps[i]) + is_accurate_ = 0; + } + } + + void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps) + { + assert(dst.type() == cpu_dst.type()); + assert(dst.size() == cpu_dst.size()); + accurate_diff_ = checkNorm(dst, cpu_dst); + if(accurate_diff_ <= eps) + is_accurate_ = 1; + else + is_accurate_ = 0; + } + + void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps) + { + assert(dst.type() == cpu_dst.type()); + assert(dst.size() == cpu_dst.size()); + accurate_diff_ = checkSimilarity(cpu_dst, dst); + if(accurate_diff_ <= eps) + is_accurate_ = 1; + else + is_accurate_ = 0; } std::stringstream &getCurSubtestDescription() @@ -342,7 +379,7 @@ private: num_iters_(10), cpu_num_iters_(2), gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), record_(0), recordname_("performance"), itname_changed_(true), - is_accurate_(-1) + is_accurate_(-1), accurate_diff_(0.) { cpu_times_.reserve(num_iters_); gpu_times_.reserve(num_iters_); @@ -363,6 +400,7 @@ private: gpu_times_.clear(); gpu_full_times_.clear(); is_accurate_ = -1; + accurate_diff_ = 0.; } double meanTime(const std::vector &samples); @@ -373,7 +411,7 @@ private: void writeHeading(); void writeSummary(); - void writeMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, + void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f, double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f); @@ -425,6 +463,7 @@ private: bool itname_changed_; int is_accurate_; + double accurate_diff_; }; diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index d425344d8a..1778c9839a 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -412,11 +412,11 @@ static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, String kernelN args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); + float f_scalar = (float)scalar; if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) args.push_back( std::make_pair( sizeof(cl_double), (void *)&scalar )); else { - float f_scalar = (float)scalar; args.push_back( std::make_pair( sizeof(cl_float), (void *)&f_scalar)); } @@ -783,45 +783,55 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl } } -template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) +template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, + const oclMat &mask, oclMat &buf) { size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); groupnum = groupnum * 2; int vlen = 8; int dbsize = groupnum * 2 * vlen * sizeof(T) ; - Context *clCxt = src.clCxt; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize); - *minVal = std::numeric_limits::max() , *maxVal = -std::numeric_limits::max(); + + ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); + + cl_mem buf_data = reinterpret_cast(buf.data); + if (mask.empty()) { - arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax"); + arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax"); } else { - arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask"); + arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask"); } - T *p = new T[groupnum * vlen * 2]; - memset(p, 0, dbsize); - openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize); - if(minVal != NULL){ + + Mat matbuf = Mat(buf); + T *p = matbuf.ptr(); + if(minVal != NULL) + { + *minVal = std::numeric_limits::max(); for(int i = 0; i < vlen * (int)groupnum; i++) { *minVal = *minVal < p[i] ? *minVal : p[i]; } } - if(maxVal != NULL){ + if(maxVal != NULL) + { + *maxVal = -std::numeric_limits::max(); for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) { *maxVal = *maxVal > p[i] ? *maxVal : p[i]; } } - delete[] p; - openCLFree(dstBuffer); } -typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask); +typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) +{ + oclMat buf; + minMax_buf(src, minVal, maxVal, mask, buf); +} +void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { CV_Assert(src.oclchannels() == 1); if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) @@ -841,7 +851,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc }; minMaxFunc func; func = functab[src.depth()]; - func(src, minVal, maxVal, mask); + func(src, minVal, maxVal, mask, buf); } ////////////////////////////////////////////////////////////////////////////// @@ -1688,10 +1698,11 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String ker args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); + T scalar; if(_scalar != NULL) { double scalar1 = *((double *)_scalar); - T scalar = (T)scalar1; + scalar = (T)scalar1; args.push_back( std::make_pair( sizeof(T), (void *)&scalar )); } @@ -2308,9 +2319,9 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, String args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); + float pf = p; if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE)) { - float pf = p; args.push_back( std::make_pair( sizeof(cl_float), (void *)&pf )); } else diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index e8e0e588c5..d7255afd0d 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -244,11 +244,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx, distance, distType); } @@ -263,11 +264,12 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType); } @@ -283,11 +285,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType); } @@ -465,11 +468,12 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &trainIdx, const oclMat &distance, int distType) { + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType); } diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index e06d29904e..abe98cf590 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -239,7 +239,7 @@ void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_b size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad) @@ -269,12 +269,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - char build_options [15] = ""; - if(L2Grad) - { - strcat(build_options, "-D L2GRAD"); - } - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); + const char * build_options = L2Grad ? "-D L2GRAD":""; + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad) { @@ -297,12 +293,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, i size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - char build_options [15] = ""; - if(L2Grad) - { - strcat(build_options, "-D L2GRAD"); - } - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); + const char * build_options = L2Grad ? "-D L2GRAD":""; + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh) @@ -333,7 +325,7 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro String kernelName = "calcMap"; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols) @@ -353,7 +345,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) @@ -383,7 +375,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset)); - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); std::swap(st1, st2); } @@ -408,5 +400,5 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols) size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index a98443d416..d7b6267d29 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -356,8 +356,7 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, char compile_option[128]; sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s", anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], - rectKernel?"-D RECTKERNEL":"", - s); + s, rectKernel?"-D RECTKERNEL":""); std::vector< std::pair > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gfft.cpp new file mode 100644 index 0000000000..c9376f9407 --- /dev/null +++ b/modules/ocl/src/gfft.cpp @@ -0,0 +1,349 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; + +static bool use_cpu_sorter = true; + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *imgproc_gfft; + } +} + +namespace +{ +enum SortMethod +{ + CPU_STL, + BITONIC, + SELECTION +}; + +const int GROUP_SIZE = 256; + +template +struct Sorter +{ + //typedef EigType; +}; + +//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed. +template<> +struct Sorter +{ + typedef oclMat EigType; + static cv::Mutex cs; + static Mat mat_eig; + + //prototype + static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2) + { + float v1 = mat_eig.at(cvRound(pt1.s[1]), cvRound(pt1.s[0])); + float v2 = mat_eig.at(cvRound(pt2.s[1]), cvRound(pt2.s[0])); + return v1 > v2; + } + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + cv::AutoLock lock(cs); + //temporarily use STL's sort function + Mat mat_corners = corners; + mat_eig = eig_tex; + std::sort(mat_corners.begin(), mat_corners.begin() + count, clfloat2Gt); + corners = mat_corners; + } +}; +cv::Mutex Sorter::cs; +cv::Mat Sorter::mat_eig; + +template<> +struct Sorter +{ + typedef TextureCL EigType; + + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + Context * cxt = Context::getContext(); + size_t globalThreads[3] = {count / 2, 1, 1}; + size_t localThreads[3] = {GROUP_SIZE, 1, 1}; + + // 2^numStages should be equal to count or the output is invalid + int numStages = 0; + for(int i = count; i > 1; i >>= 1) + { + ++numStages; + } + const int argc = 5; + std::vector< std::pair > args(argc); + String kernelname = "sortCorners_bitonicSort"; + args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex); + args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data); + args[2] = std::make_pair(sizeof(cl_int), (void *)&count); + for(int stage = 0; stage < numStages; ++stage) + { + args[3] = std::make_pair(sizeof(cl_int), (void *)&stage); + for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) + { + args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + } + } + } +}; + +template<> +struct Sorter +{ + typedef TextureCL EigType; + + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + Context * cxt = Context::getContext(); + + size_t globalThreads[3] = {count, 1, 1}; + size_t localThreads[3] = {GROUP_SIZE, 1, 1}; + + std::vector< std::pair > args; + //local + String kernelname = "sortCorners_selectionSortLocal"; + int lds_size = GROUP_SIZE * sizeof(cl_float2); + args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) ); + args.push_back( std::make_pair( lds_size, (void*)NULL) ); + + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + + //final + kernelname = "sortCorners_selectionSortFinal"; + args.pop_back(); + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + } +}; + +int findCorners_caller( + const TextureCL& eig, + const float threshold, + const oclMat& mask, + oclMat& corners, + const int max_count) +{ + std::vector k; + Context * cxt = Context::getContext(); + + std::vector< std::pair > args; + String kernelname = "findCorners"; + + const int mask_strip = mask.step / mask.elemSize1(); + + oclMat g_counter(1, 1, CV_32SC1); + g_counter.setTo(0); + + args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig )); + args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data )); + args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data )); + args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip)); + args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold )); + args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows )); + args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols )); + args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count )); + args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data )); + + size_t globalThreads[3] = {eig.cols, eig.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + + const char * opt = mask.empty() ? "" : "-D WITH_MASK"; + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt); + return std::min(Mat(g_counter).at(0), max_count); +} +}//unnamed namespace + +void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) +{ + CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); + CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); + + CV_DbgAssert(support_image2d()); + + ensureSizeIsEnough(image.size(), CV_32F, eig_); + + if (useHarrisDetector) + cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); + else + cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); + + double maxVal = 0; + minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_); + + ensureSizeIsEnough(1, std::max(1000, static_cast(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); + + Ptr eig_tex = bindTexturePtr(eig_); + int total = findCorners_caller( + *eig_tex, + static_cast(maxVal * qualityLevel), + mask, + tmpCorners_, + tmpCorners_.cols); + + if (total == 0) + { + corners.release(); + return; + } + if(use_cpu_sorter) + { + Sorter::sortCorners_caller(eig_, tmpCorners_, total); + } + else + { + //if total is power of 2 + if(((total - 1) & (total)) == 0) + { + Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); + } + else + { + Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); + } + } + + if (minDistance < 1) + { + corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1)); + } + else + { + std::vector tmp(total); + downloadPoints(tmpCorners_, tmp); + + std::vector tmp2; + tmp2.reserve(total); + + const int cell_size = cvRound(minDistance); + const int grid_width = (image.cols + cell_size - 1) / cell_size; + const int grid_height = (image.rows + cell_size - 1) / cell_size; + + std::vector< std::vector > grid(grid_width * grid_height); + + for (int i = 0; i < total; ++i) + { + Point2f p = tmp[i]; + + bool good = true; + + int x_cell = static_cast(p.x / cell_size); + int y_cell = static_cast(p.y / cell_size); + + int x1 = x_cell - 1; + int y1 = y_cell - 1; + int x2 = x_cell + 1; + int y2 = y_cell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(grid_width - 1, x2); + y2 = std::min(grid_height - 1, y2); + + for (int yy = y1; yy <= y2; yy++) + { + for (int xx = x1; xx <= x2; xx++) + { + std::vector& m = grid[yy * grid_width + xx]; + + if (!m.empty()) + { + for(size_t j = 0; j < m.size(); j++) + { + float dx = p.x - m[j].x; + float dy = p.y - m[j].y; + + if (dx * dx + dy * dy < minDistance * minDistance) + { + good = false; + goto break_out; + } + } + } + } + } + + break_out: + + if(good) + { + grid[y_cell * grid_width + x_cell].push_back(p); + + tmp2.push_back(p); + + if (maxCorners > 0 && tmp2.size() == static_cast(maxCorners)) + break; + } + } + + corners.upload(Mat(1, static_cast(tmp2.size()), CV_32FC2, &tmp2[0])); + } +} +void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector &points_v) +{ + CV_DbgAssert(points.type() == CV_32FC2); + points_v.resize(points.cols); + openCLSafeCall(clEnqueueReadBuffer( + *reinterpret_cast(getoclCommandQueue()), + reinterpret_cast(points.data), + CL_TRUE, + 0, + points.cols * sizeof(Point2f), + &points_v[0], + 0, + NULL, + NULL)); +} diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 09e1816e37..8fb69567af 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -136,47 +136,22 @@ struct CvHidHaarClassifierCascade }; typedef struct { - //int rows; - //int ystep; int width_height; - //int height; int grpnumperline_totalgrp; - //int totalgrp; int imgoff; float factor; } detect_piramid_info; - -#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__ +#ifdef WIN32 #define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT)) -typedef _ALIGNED_ON(128) struct GpuHidHaarFeature -{ - _ALIGNED_ON(32) struct - { - _ALIGNED_ON(4) int p0 ; - _ALIGNED_ON(4) int p1 ; - _ALIGNED_ON(4) int p2 ; - _ALIGNED_ON(4) int p3 ; - _ALIGNED_ON(4) float weight ; - } - /*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ; -} -GpuHidHaarFeature; - typedef _ALIGNED_ON(128) struct GpuHidHaarTreeNode { _ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4]; - //_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ; - //_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ; - //_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ; - /*_ALIGNED_ON(16)*/ float weight[CV_HAAR_FEATURE_MAX] ; - /*_ALIGNED_ON(4)*/ float threshold ; - _ALIGNED_ON(8) float alpha[2] ; + _ALIGNED_ON(16) float alpha[3] ; _ALIGNED_ON(4) int left ; _ALIGNED_ON(4) int right ; - // GpuHidHaarFeature feature __attribute__((aligned (128))); } GpuHidHaarTreeNode; @@ -184,7 +159,6 @@ GpuHidHaarTreeNode; typedef _ALIGNED_ON(32) struct GpuHidHaarClassifier { _ALIGNED_ON(4) int count; - //CvHaarFeature* orig_feature; _ALIGNED_ON(8) GpuHidHaarTreeNode *node ; _ALIGNED_ON(8) float *alpha ; } @@ -219,32 +193,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade _ALIGNED_ON(4) int p2 ; _ALIGNED_ON(4) int p3 ; _ALIGNED_ON(4) float inv_window_area ; - // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8))); } GpuHidHaarClassifierCascade; #else #define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) )) -typedef struct _ALIGNED_ON(128) GpuHidHaarFeature -{ - struct _ALIGNED_ON(32) -{ - int p0 _ALIGNED_ON(4); - int p1 _ALIGNED_ON(4); - int p2 _ALIGNED_ON(4); - int p3 _ALIGNED_ON(4); - float weight _ALIGNED_ON(4); -} -rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32); -} -GpuHidHaarFeature; - - typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode { int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64); float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16); float threshold;// _ALIGNED_ON(4); - float alpha[2] _ALIGNED_ON(8); + float alpha[3] _ALIGNED_ON(16); int left _ALIGNED_ON(4); int right _ALIGNED_ON(4); } @@ -287,7 +245,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade int p2 _ALIGNED_ON(4); int p3 _ALIGNED_ON(4); float inv_window_area _ALIGNED_ON(4); - // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8))); } GpuHidHaarClassifierCascade; #endif @@ -295,36 +252,6 @@ const int icv_object_win_border = 1; const float icv_stage_threshold_bias = 0.0001f; double globaltime = 0; - -// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count ) -// { -// CvHaarClassifierCascade *cascade = 0; - -// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier); - -// if( stage_count <= 0 ) -// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" ); - -// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size ); -// memset( cascade, 0, block_size ); - -// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1); -// cascade->flags = CV_HAAR_MAGIC_VAL; -// cascade->count = stage_count; - -// return cascade; -// } - -//static int globalcounter = 0; - -// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade ) -// { -// if( _cascade && *_cascade ) -// { -// cvFree( _cascade ); -// } -// } - /* create more efficient internal representation of haar classifier cascade */ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier) { @@ -440,24 +367,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl hid_stage_classifier->two_rects = 1; haar_classifier_ptr += stage_classifier->count; - /* - hid_stage_classifier->parent = (stage_classifier->parent == -1) - ? NULL : stage_classifier_ptr + stage_classifier->parent; - hid_stage_classifier->next = (stage_classifier->next == -1) - ? NULL : stage_classifier_ptr + stage_classifier->next; - hid_stage_classifier->child = (stage_classifier->child == -1) - ? NULL : stage_classifier_ptr + stage_classifier->child; - - out->is_tree |= hid_stage_classifier->next != NULL; - */ - for( j = 0; j < stage_classifier->count; j++ ) { CvHaarClassifier *classifier = stage_classifier->classifier + j; GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j; int node_count = classifier->count; - // float* alpha_ptr = (float*)(haar_node_ptr + node_count); float *alpha_ptr = &haar_node_ptr->alpha[0]; hid_classifier->count = node_count; @@ -484,16 +399,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl node->p[2][3] = 0; node->weight[2] = 0; } - // memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) ); else hid_stage_classifier->two_rects = 0; - } - - memcpy( alpha_ptr, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0])); - haar_node_ptr = haar_node_ptr + 1; - // (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*)); - // (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1); + memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0])); + haar_node_ptr = haar_node_ptr + 1; + } out->is_stump_based &= node_count == 1; } } @@ -506,25 +417,19 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl #define sum_elem_ptr(sum,row,col) \ - ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype))) + ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype))) #define sqsum_elem_ptr(sqsum,row,col) \ - ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype))) + ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype))) #define calc_sum(rect,offset) \ - ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) + ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade, - /* const CvArr* _sum, - const CvArr* _sqsum, - const CvArr* _tilted_sum,*/ double scale, int step) { - // CvMat sum_stub, *sum = (CvMat*)_sum; - // CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum; - // CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum; GpuHidHaarClassifierCascade *cascade; int coi0 = 0, coi1 = 0; int i; @@ -540,61 +445,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc if( scale <= 0 ) CV_Error( CV_StsOutOfRange, "Scale must be positive" ); - // sum = cvGetMat( sum, &sum_stub, &coi0 ); - // sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 ); - if( coi0 || coi1 ) CV_Error( CV_BadCOI, "COI is not supported" ); - // if( !CV_ARE_SIZES_EQ( sum, sqsum )) - // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); - - // if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 || - // CV_MAT_TYPE(sum->type) != CV_32SC1 ) - // CV_Error( CV_StsUnsupportedFormat, - // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - if( !_cascade->hid_cascade ) gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total); cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade; stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1); - if( cascade->has_tilted_features ) - { - // tilted = cvGetMat( tilted, &tilted_stub, &coi1 ); - - // if( CV_MAT_TYPE(tilted->type) != CV_32SC1 ) - // CV_Error( CV_StsUnsupportedFormat, - // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - - // if( sum->step != tilted->step ) - // CV_Error( CV_StsUnmatchedSizes, - // "Sum and tilted_sum must have the same stride (step, widthStep)" ); - - // if( !CV_ARE_SIZES_EQ( sum, tilted )) - // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); - // cascade->tilted = *tilted; - } - _cascade->scale = scale; _cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale ); _cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale ); - //cascade->sum = *sum; - //cascade->sqsum = *sqsum; - equRect.x = equRect.y = cvRound(scale); equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale); equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale); weight_scale = 1. / (equRect.width * equRect.height); cascade->inv_window_area = weight_scale; - // cascade->pq0 = equRect.y * step + equRect.x; - // cascade->pq1 = equRect.y * step + equRect.x + equRect.width ; - // cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x; - // cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ; - cascade->pq0 = equRect.x; cascade->pq1 = equRect.y; cascade->pq2 = equRect.x + equRect.width; @@ -617,10 +486,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc { CvHaarFeature *feature = &_cascade->stage_classifier[i].classifier[j].haar_feature[l]; - /* GpuHidHaarClassifier* classifier = - cascade->stage_classifier[i].classifier + j; */ - //GpuHidHaarFeature* hidfeature = - // &cascade->stage_classifier[i].classifier[j].node[l].feature; GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l]; double sum0 = 0, area0 = 0; CvRect r[3]; @@ -635,8 +500,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc /* align blocks */ for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ ) { - //if( !hidfeature->rect[k].p0 ) - // break; if(!hidnode->p[k][0]) break; r[k] = feature->rect[k].r; @@ -716,15 +579,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc if( !feature->tilted ) { - /* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x; - hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width; - hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x; - hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width; - */ - /*hidnode->p0[k] = tr.y * step + tr.x; - hidnode->p1[k] = tr.y * step + tr.x + tr.width; - hidnode->p2[k] = (tr.y + tr.height) * step + tr.x; - hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/ hidnode->p[k][0] = tr.x; hidnode->p[k][1] = tr.y; hidnode->p[k][2] = tr.x + tr.width; @@ -732,37 +586,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc } else { - /* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width; - hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height; - hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x; - hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height; - */ - hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width; hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height; hidnode->p[k][0] = tr.y * step + tr.x; hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height; } - - //hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio); hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio); if( k == 0 ) area0 = tr.width * tr.height; else - //sum0 += hidfeature->rect[k].weight * tr.width * tr.height; sum0 += hidnode->weight[k] * tr.width * tr.height; } - - // hidfeature->rect[0].weight = (float)(-sum0/area0); hidnode->weight[0] = (float)(-sum0 / area0); } /* l */ } /* j */ } } -static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade - /*double scale=0.0,*/ - /*int step*/) +static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade) { GpuHidHaarClassifierCascade *cascade; int i; @@ -816,11 +657,7 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade if(!hidnode->p[k][0]) break; r[k] = feature->rect[k].r; - // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) ); - // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) ); - // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) ); - // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) ); - } + } nr = k; for( k = 0; k < nr; k++ ) @@ -838,7 +675,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade hidnode->p[k][3] = tr.height; hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio); } - //hidnode->weight[0]=(float)(-sum0/area0); } /* l */ } /* j */ } @@ -851,7 +687,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS const double GROUP_EPS = 0.2; CvSeq *result_seq = 0; - cv::Ptr temp_storage; cv::ConcurrentRectVector allCandidates; std::vector rectList; @@ -909,6 +744,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { CvSize winSize0 = cascade->orig_window_size; @@ -951,7 +787,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -996,7 +832,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 ); stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count); - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); @@ -1043,7 +878,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction )); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options); openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz ); @@ -1058,6 +895,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLSafeCall(clReleaseMemObject(scaleinfobuffer)); openCLSafeCall(clReleaseMemObject(nodebuffer)); openCLSafeCall(clReleaseMemObject(candidatebuffer)); + } else { @@ -1115,7 +953,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode), node, 0, NULL, NULL)); @@ -1157,7 +994,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args1.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnodenum )); size_t globalThreads2[3] = {nodenum, 1, 1}; - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); } @@ -1193,7 +1029,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer )); args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum )); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options); candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status); @@ -1281,7 +1117,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std int blocksize = 8; int grp_per_CU = 12; size_t localThreads[3] = { blocksize, blocksize, 1 }; - size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0], + size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1297,8 +1133,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std CvHaarClassifierCascade *cascade = oldCascade; GpuHidHaarClassifierCascade *gcascade; GpuHidHaarStageClassifier *stage; - GpuHidHaarClassifier *classifier; - GpuHidHaarTreeNode *node; if( CV_MAT_DEPTH(gimg.type()) != CV_8U ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" ); @@ -1311,7 +1145,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std } int *candidate; - + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { int indexy = 0; @@ -1337,19 +1171,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade); stage = (GpuHidHaarStageClassifier *)(gcascade + 1); - classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); - node = (GpuHidHaarTreeNode *)(classifier->node); - - gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 ); - - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, - sizeof(GpuHidHaarStageClassifier) * gcascade->count, - stage, 0, NULL, NULL)); - - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, - m_nodenum * sizeof(GpuHidHaarTreeNode), - node, 0, NULL, NULL)); int startstage = 0; int endstage = gcascade->count; @@ -1386,17 +1207,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction )); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options); candidate = (int *)malloc(4 * sizeof(int) * outputsz); memset(candidate, 0, 4 * sizeof(int) * outputsz); + openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz ); for(int i = 0; i < outputsz; i++) + { if(candidate[4 * i + 2] != 0) + { allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3])); - + } + } free((void *)candidate); candidate = NULL; } @@ -1404,56 +1231,14 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std { cv::ocl::integral(gimg, gsum, gsqsum); - gpuSetHaarClassifierCascade(cascade); - gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade; - stage = (GpuHidHaarStageClassifier *)(gcascade + 1); - classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); - node = (GpuHidHaarTreeNode *)(classifier->node); - - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, - m_nodenum * sizeof(GpuHidHaarTreeNode), - node, 0, NULL, NULL)); - - cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount); - float *correction = (float *)malloc(sizeof(float) * m_loopcount); - int startstage = 0; - int endstage = gcascade->count; - double factor; - for(int i = 0; i < m_loopcount; i++) - { - factor = scalev[i]; - int equRect_x = (int)(factor * gcascade->p0 + 0.5); - int equRect_y = (int)(factor * gcascade->p1 + 0.5); - int equRect_w = (int)(factor * gcascade->p3 + 0.5); - int equRect_h = (int)(factor * gcascade->p2 + 0.5); - p[i].s[0] = equRect_x; - p[i].s[1] = equRect_y; - p[i].s[2] = equRect_x + equRect_w; - p[i].s[3] = equRect_y + equRect_h; - correction[i] = 1. / (equRect_w * equRect_h); - int startnodenum = m_nodenum * i; - float factor2 = (float)factor; - - vector > args1; - args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer )); - args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer )); - args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 )); - args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] )); - args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum )); - - size_t globalThreads2[3] = {m_nodenum, 1, 1}; - - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); - } int step = gsum.step / 4; int startnode = 0; int splitstage = 3; - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL)); - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL)); + + int startstage = 0; + int endstage = gcascade->count; vector > args; args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer )); @@ -1474,7 +1259,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer )); args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum )); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options); candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL); @@ -1484,12 +1270,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3])); } - - free(p); - free(correction); clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0); } - rectList.resize(allCandidates.size()); if(!allCandidates.empty()) std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin()); @@ -1507,6 +1289,10 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, const int outputsz, const size_t localThreads[], CvSize minSize, CvSize maxSize) { + if(initialized) + { + return; // we only allow one time initialization + } CvHaarClassifierCascade *cascade = oldCascade; if( !CV_IS_HAAR_CLASSIFIER(cascade) ) @@ -1522,7 +1308,9 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, int totalclassifier=0; if( !cascade->hid_cascade ) + { gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier); + } if( maxSize.height == 0 || maxSize.width == 0 ) { @@ -1544,6 +1332,78 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, m_minSize = minSize; m_maxSize = maxSize; + // initialize nodes + GpuHidHaarClassifierCascade *gcascade; + GpuHidHaarStageClassifier *stage; + GpuHidHaarClassifier *classifier; + GpuHidHaarTreeNode *node; + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + if( (flags & CV_HAAR_SCALE_IMAGE) ) + { + gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade); + stage = (GpuHidHaarStageClassifier *)(gcascade + 1); + classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); + node = (GpuHidHaarTreeNode *)(classifier->node); + + gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 ); + + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, + sizeof(GpuHidHaarStageClassifier) * gcascade->count, + stage, 0, NULL, NULL)); + + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, + m_nodenum * sizeof(GpuHidHaarTreeNode), + node, 0, NULL, NULL)); + } + else + { + gpuSetHaarClassifierCascade(cascade); + + gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade; + stage = (GpuHidHaarStageClassifier *)(gcascade + 1); + classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); + node = (GpuHidHaarTreeNode *)(classifier->node); + + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, + m_nodenum * sizeof(GpuHidHaarTreeNode), + node, 0, NULL, NULL)); + + cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount); + float *correction = (float *)malloc(sizeof(float) * m_loopcount); + double factor; + for(int i = 0; i < m_loopcount; i++) + { + factor = scalev[i]; + int equRect_x = (int)(factor * gcascade->p0 + 0.5); + int equRect_y = (int)(factor * gcascade->p1 + 0.5); + int equRect_w = (int)(factor * gcascade->p3 + 0.5); + int equRect_h = (int)(factor * gcascade->p2 + 0.5); + p[i].s[0] = equRect_x; + p[i].s[1] = equRect_y; + p[i].s[2] = equRect_x + equRect_w; + p[i].s[3] = equRect_y + equRect_h; + correction[i] = 1. / (equRect_w * equRect_h); + int startnodenum = m_nodenum * i; + float factor2 = (float)factor; + + vector > args1; + args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer )); + args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer )); + args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 )); + args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] )); + args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum )); + + size_t globalThreads2[3] = {m_nodenum, 1, 1}; + + openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); + } + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL)); + + free(p); + free(correction); + } initialized = true; } @@ -1642,6 +1502,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs( CvSize sz; CvSize winSize0 = oldCascade->orig_window_size; detect_piramid_info *scaleinfo; + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); if (flags & CV_HAAR_SCALE_IMAGE) { for(factor = 1.f;; factor *= scaleFactor) @@ -1743,7 +1604,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs( ((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount); } - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0, + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); free(scaleinfo); @@ -1755,7 +1616,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector& f const std::vector &rectList, const std::vector &rweights) { - CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) ); + MemStorage tempStorage(cvCreateMemStorage(0)); + CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage ); if( findBiggestObject && rectList.size() ) { @@ -1791,168 +1653,32 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector& f void cv::ocl::OclCascadeClassifierBuf::release() { - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer)); - - if( (m_flags & CV_HAAR_SCALE_IMAGE) ) - { - cvFree(&oldCascade->hid_cascade); - } - else + if(initialized) { - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer)); - } + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer)); + + if( (m_flags & CV_HAAR_SCALE_IMAGE) ) + { + cvFree(&oldCascade->hid_cascade); + } + else + { + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer)); + } - free(buffers); - buffers = NULL; + free(buffers); + buffers = NULL; + initialized = false; + } } #ifndef _MAX_PATH #define _MAX_PATH 1024 #endif - -/****************************************************************************************\ -* Persistence functions * -\****************************************************************************************/ - -/* field names */ - -#define ICV_HAAR_SIZE_NAME "size" -#define ICV_HAAR_STAGES_NAME "stages" -#define ICV_HAAR_TREES_NAME "trees" -#define ICV_HAAR_FEATURE_NAME "feature" -#define ICV_HAAR_RECTS_NAME "rects" -#define ICV_HAAR_TILTED_NAME "tilted" -#define ICV_HAAR_THRESHOLD_NAME "threshold" -#define ICV_HAAR_LEFT_NODE_NAME "left_node" -#define ICV_HAAR_LEFT_VAL_NAME "left_val" -#define ICV_HAAR_RIGHT_NODE_NAME "right_node" -#define ICV_HAAR_RIGHT_VAL_NAME "right_val" -#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold" -#define ICV_HAAR_PARENT_NAME "parent" -#define ICV_HAAR_NEXT_NAME "next" - -static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */) -{ - return 1; -} - -namespace cv -{ -namespace ocl -{ - -struct gpuHaarDetectObjects_ScaleImage_Invoker -{ - gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade, - int _stripSize, double _factor, - const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1, - Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec ) - { - cascade = _cascade; - stripSize = _stripSize; - factor = _factor; - sum1 = _sum1; - sqsum1 = _sqsum1; - norm1 = _norm1; - mask1 = _mask1; - equRect = _equRect; - vec = &_vec; - } - - void operator()( const BlockedRange &range ) const - { - Size winSize0 = cascade->orig_window_size; - Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor)); - int y1 = range.begin() * stripSize, y2 = std::min(range.end() * stripSize, sum1.rows - 1 - winSize0.height); - Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1); - int x, y, ystep = factor > 2 ? 1 : 2; - - for( y = y1; y < y2; y += ystep ) - for( x = 0; x < ssz.width; x += ystep ) - { - if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 ) - vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor), - winSize.width, winSize.height)); - } - } - - const CvHaarClassifierCascade *cascade; - int stripSize; - double factor; - Mat sum1, sqsum1, *norm1, *mask1; - Rect equRect; - ConcurrentRectVector *vec; -}; - - -struct gpuHaarDetectObjects_ScaleCascade_Invoker -{ - gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade, - Size _winsize, const Range &_xrange, double _ystep, - size_t _sumstep, const int **_p, const int **_pq, - ConcurrentRectVector &_vec ) - { - cascade = _cascade; - winsize = _winsize; - xrange = _xrange; - ystep = _ystep; - sumstep = _sumstep; - p = _p; - pq = _pq; - vec = &_vec; - } - - void operator()( const BlockedRange &range ) const - { - int iy, startY = range.begin(), endY = range.end(); - const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3]; - const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3]; - bool doCannyPruning = p0 != 0; - int sstep = (int)(sumstep / sizeof(p0[0])); - - for( iy = startY; iy < endY; iy++ ) - { - int ix, y = cvRound(iy * ystep), ixstep = 1; - for( ix = xrange.start; ix < xrange.end; ix += ixstep ) - { - int x = cvRound(ix * ystep); // it should really be ystep, not ixstep - - if( doCannyPruning ) - { - int offset = y * sstep + x; - int s = p0[offset] - p1[offset] - p2[offset] + p3[offset]; - int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset]; - if( s < 100 || sq < 20 ) - { - ixstep = 2; - continue; - } - } - - int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */); - if( result > 0 ) - vec->push_back(Rect(x, y, winsize.width, winsize.height)); - ixstep = result != 0 ? 1 : 2; - } - } - } - - const CvHaarClassifierCascade *cascade; - double ystep; - size_t sumstep; - Size winsize; - Range xrange; - const int **p; - const int **pq; - ConcurrentRectVector *vec; -}; - -} -} #endif diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index d703a61b20..4b8fe58b89 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -270,7 +270,7 @@ namespace cv size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; - + float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; std::vector< std::pair > args; if(map1.channels() == 2) { @@ -292,7 +292,7 @@ namespace cv args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols)); float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -326,7 +326,6 @@ namespace cv } else { - float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; args.push_back( std::make_pair(sizeof(cl_float4), (void *)&borderFloat)); } } @@ -1209,31 +1208,42 @@ namespace cv void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int borderType) + { + oclMat dx, dy; + cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType); + } + + void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, + double k, int borderType) { if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(Error::GpuNotSupported, "select device don't support double"); } CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - oclMat Dx, Dy; CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); - extractCovData(src, Dx, Dy, blockSize, ksize, borderType); + extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), Dx, Dy, dst, borderType); + corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); } void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) + { + oclMat dx, dy; + cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType); + } + + void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType) { if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(Error::GpuNotSupported, "select device don't support double"); } CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - oclMat Dx, Dy; CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); - extractCovData(src, Dx, Dy, blockSize, ksize, borderType); + extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType); + corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index 0bdaf0d36c..27f8d26ecf 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -43,9 +43,28 @@ // //M*/ -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include "precomp.hpp" +#ifdef __GNUC__ +#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 +#define GCC_DIAG_STR(s) #s +#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y) +# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x) +# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x) +# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 +# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \ +GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x)) +# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop) +# else +# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x)) +# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x)) +# endif +#else +# define GCC_DIAG_OFF(x) +# define GCC_DIAG_ON(x) +#endif +#endif /* __GNUC__ */ + using namespace std; namespace cv @@ -121,6 +140,9 @@ namespace cv build_options, finish_mode); } +#ifdef __GNUC__ + GCC_DIAG_OFF(deprecated-declarations) +#endif cl_mem bindTexture(const oclMat &mat) { cl_mem texture; @@ -156,7 +178,7 @@ namespace cv format.image_channel_order = CL_RGBA; break; default: - CV_Error(-1, "Image forma is not supported"); + CV_Error(-1, "Image format is not supported"); break; } #ifdef CL_VERSION_1_2 @@ -180,10 +202,6 @@ namespace cv else #endif { -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif texture = clCreateImage2D( (cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, @@ -193,9 +211,6 @@ namespace cv 0, NULL, &err); -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif } size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; @@ -225,6 +240,14 @@ namespace cv openCLSafeCall(err); return texture; } +#ifdef __GNUC__ + GCC_DIAG_ON(deprecated-declarations) +#endif + + Ptr bindTexturePtr(const oclMat &mat) + { + return Ptr(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type())); + } void releaseTexture(cl_mem& texture) { openCLFree(texture); diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index 7d4b0a7653..070ced4731 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -127,7 +127,7 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 3) +#define dst_align ((dst_offset / 2) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -165,7 +165,7 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 3) +#define dst_align ((dst_offset / 2) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -335,7 +335,7 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step, #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -375,7 +375,7 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -507,7 +507,7 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index fdf65923cd..3dbd376ecf 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -126,7 +126,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -164,7 +164,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -288,7 +288,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); diff --git a/modules/ocl/src/opencl/filtering_morph.cl b/modules/ocl/src/opencl/filtering_morph.cl index 49640008f4..e659a59f51 100644 --- a/modules/ocl/src/opencl/filtering_morph.cl +++ b/modules/ocl/src/opencl/filtering_morph.cl @@ -120,7 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src, int gidy = get_global_id(1); int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel); - if(gidx+3p[1][0])); int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0])); + float nodethreshold = w.w * variance_norm_factor; info1.x +=lcl_off; @@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; - stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; + bool passThres = classsum >= nodethreshold; +#if STUMP_BASED + stage_sum += passThres ? alpha3.y : alpha3.x; nodecounter++; + nodeloop++; +#else + bool isRootNode = (nodecounter & 1) == 0; + if(isRootNode) + { + if( (passThres && currentnodeptr->right) || + (!passThres && currentnodeptr->left)) + { + nodecounter ++; + } + else + { + stage_sum += alpha3.x; + nodecounter += 2; + nodeloop ++; + } + } + else + { + stage_sum += passThres ? alpha3.z : alpha3.y; + nodecounter ++; + nodeloop ++; + } +#endif } result = (stage_sum >= stagethreshold); @@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa if(lcl_compute_win_id < queuecount) { - int tempnodecounter = lcl_compute_id; float part_sum = 0.f; - for(int lcl_loop=0; lcl_loopp[0][0])); int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0])); float nodethreshold = w.w * variance_norm_factor; info1.x +=queue_pixel; @@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; - part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; - tempnodecounter +=lcl_compute_win; + bool passThres = classsum >= nodethreshold; +#if STUMP_BASED + part_sum += passThres ? alpha3.y : alpha3.x; + tempnodecounter += lcl_compute_win; + lcl_loop++; +#else + if(root_offset == 0) + { + if( (passThres && currentnodeptr->right) || + (!passThres && currentnodeptr->left)) + { + root_offset = 1; + } + else + { + part_sum += alpha3.x; + tempnodecounter += lcl_compute_win; + lcl_loop++; + } + } + else + { + part_sum += passThres ? alpha3.z : alpha3.y; + tempnodecounter += lcl_compute_win; + lcl_loop++; + root_offset = 0; + } +#endif }//end for(int lcl_loop=0;lcl_looptwo_rects) -{ - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1,p_offset,0) * t1.weight[0]; - - classsum += calc_sum1(t1, p_offset,1) * t1.weight[1]; - stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0]; - - counter++; - } -} -else -{ - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t = node[counter].threshold*variance_norm_factor; - classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0]; - classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1]; - - if( node[counter].p0[2] ) - classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2]; - - stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify - - counter++; - } -} -*/ -/* -__kernel void gpuRunHaarClassifierCascade_ScaleWindow( - constant GpuHidHaarClassifierCascade * _cascade, - global GpuHidHaarStageClassifier * stagecascadeptr, - //global GpuHidHaarClassifier * classifierptr, - global GpuHidHaarTreeNode * nodeptr, - global int * sum, - global float * sqsum, - global int * _candidate, - int pixel_step, - int cols, - int rows, - int start_stage, - int end_stage, - //int counts, - int nodenum, - int ystep, - int detect_width, - //int detect_height, - int loopcount, - int outputstep) - //float scalefactor) -{ -unsigned int x1 = get_global_id(0); -unsigned int y1 = get_global_id(1); -int p_offset; -int m, n; -int result; -int counter; -float mean, variance_norm_factor; -for(int i=0;ip1 - cascade->p0; -int window_height = window_width; -result = 1; -counter = 0; -unsigned int x = mul24(x1,ystep); -unsigned int y = mul24(y1,ystep); -if((x < cols - window_width - 1) && (y < rows - window_height -1)) -{ -global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage; -//global GpuHidHaarClassifier *classifier = classifierptr; -global GpuHidHaarTreeNode *node = nodeptr + nodenum*i; - -p_offset = mad24(y, pixel_step, x);// modify - -mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) - - *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3)) - *cascade->inv_window_area; - -variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) - - *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset); -variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean; -variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify - -// if( cascade->is_stump_based ) -//{ -for( m = start_stage; m < end_stage; m++ ) -{ -float stage_sum = 0.f; -float t, classsum; -GpuHidHaarTreeNode t1; - -//#pragma unroll -for( n = 0; n < stagecascade->count; n++ ) -{ - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1]; - - if((t1.p0[2]) && (!stagecascade->two_rects)) - classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2]; - - stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify - counter++; -} - -if (stage_sum < stagecascade->threshold) -{ - result = 0; - break; -} - -stagecascade++; - -} -if(result) -{ - candidate[4 * (y1 * detect_width + x1)] = x; - candidate[4 * (y1 * detect_width + x1) + 1] = y; - candidate[4 * (y1 * detect_width + x1)+2] = window_width; - candidate[4 * (y1 * detect_width + x1) + 3] = window_height; -} -//} -} -} -} -*/ diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl index 44877f3860..8507972ff2 100644 --- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl +++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl @@ -17,7 +17,7 @@ // @Authors // Wu Xinglong, wxl370@126.com // Sen Liu, swjtuls1987@126.com -// +// Peng Xiao, pengxiao@outlook.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -49,25 +49,13 @@ #define CV_HAAR_FEATURE_MAX 3 typedef int sumtype; typedef float sqsumtype; -typedef struct __attribute__((aligned(128))) GpuHidHaarFeature -{ - struct __attribute__((aligned(32))) -{ - int p0 __attribute__((aligned(4))); - int p1 __attribute__((aligned(4))); - int p2 __attribute__((aligned(4))); - int p3 __attribute__((aligned(4))); - float weight __attribute__((aligned(4))); -} -rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32))); -} -GpuHidHaarFeature; + typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode { int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64))); float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/; float threshold /*__attribute__((aligned (4)))*/; - float alpha[2] __attribute__((aligned(8))); + float alpha[3] __attribute__((aligned(16))); int left __attribute__((aligned(4))); int right __attribute__((aligned(4))); } @@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( const int p_offset = mad24(y, step, x); cascadeinfo.x += p_offset; cascadeinfo.z += p_offset; - mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - - sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]) + mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] + - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]) * correction_t; - variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - - sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]; + variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] + - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]; variance_norm_factor = variance_norm_factor * correction_t - mean * mean; variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f; bool result = true; nodecounter = startnode + nodecount * scalei; - for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++) { float stage_sum = 0.f; int stagecount = stagecascadeptr[stageloop].count; - for (int nodeloop = 0; nodeloop < stagecount; nodeloop++) + for (int nodeloop = 0; nodeloop < stagecount;) { __global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter); int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0])); int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0])); int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4 *)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0])); float nodethreshold = w.w * variance_norm_factor; + info1.x += p_offset; info1.z += p_offset; info2.x += p_offset; info2.z += p_offset; - float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] - - sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x; - classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] - - sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y; info3.x += p_offset; info3.z += p_offset; - classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] - - sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z; - stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; + float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] + - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] - + sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x; + classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] + - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] - + sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y; + classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] + - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] - + sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z; + + bool passThres = classsum >= nodethreshold; + +#if STUMP_BASED + stage_sum += passThres ? alpha3.y : alpha3.x; nodecounter++; + nodeloop++; +#else + bool isRootNode = (nodecounter & 1) == 0; + if(isRootNode) + { + if( (passThres && currentnodeptr->right) || + (!passThres && currentnodeptr->left)) + { + nodecounter ++; + } + else + { + stage_sum += alpha3.x; + nodecounter += 2; + nodeloop ++; + } + } + else + { + stage_sum += (passThres ? alpha3.z : alpha3.y); + nodecounter ++; + nodeloop ++; + } +#endif } - result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold); + result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold); } barrier(CLK_LOCAL_MEM_FENCE); @@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( int queueindex = atomic_inc(lclcount); lcloutindex[queueindex] = (y << 16) | x; } - barrier(CLK_LOCAL_MEM_FENCE); int queuecount = lclcount[0]; @@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH newnode[counter].threshold = t1.threshold; newnode[counter].alpha[0] = t1.alpha[0]; newnode[counter].alpha[1] = t1.alpha[1]; + newnode[counter].alpha[2] = t1.alpha[2]; } diff --git a/modules/ocl/src/opencl/imgproc_canny.cl b/modules/ocl/src/opencl/imgproc_canny.cl index ceaaed1eb6..5402759e3c 100644 --- a/modules/ocl/src/opencl/imgproc_canny.cl +++ b/modules/ocl/src/opencl/imgproc_canny.cl @@ -297,6 +297,9 @@ calcMap map_step /= sizeof(*map); map_offset /= sizeof(*map); + mag += mag_offset; + map += map_offset; + __local float smem[18][18]; int gidx = get_global_id(0); @@ -389,7 +392,7 @@ edgesHysteresisLocal ( __global int * map, __global ushort2 * st, - volatile __global unsigned int * counter, + __global unsigned int * counter, int rows, int cols, int map_step, @@ -399,6 +402,8 @@ edgesHysteresisLocal map_step /= sizeof(*map); map_offset /= sizeof(*map); + map += map_offset; + __local int smem[18][18]; int gidx = get_global_id(0); @@ -416,12 +421,12 @@ edgesHysteresisLocal if(ly < 14) { smem[ly][lx] = - map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset]; + map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step]; } if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols) { smem[ly + 14][lx] = - map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset]; + map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step]; } barrier(CLK_LOCAL_MEM_FENCE); @@ -482,14 +487,17 @@ edgesHysteresisLocal __constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; + #define stack_size 512 __kernel -void edgesHysteresisGlobal +void +__attribute__((reqd_work_group_size(128,1,1))) +edgesHysteresisGlobal ( __global int * map, __global ushort2 * st1, __global ushort2 * st2, - volatile __global int * counter, + __global int * counter, int rows, int cols, int count, @@ -501,6 +509,8 @@ void edgesHysteresisGlobal map_step /= sizeof(*map); map_offset /= sizeof(*map); + map += map_offset; + int gidx = get_global_id(0); int gidy = get_global_id(1); @@ -510,7 +520,7 @@ void edgesHysteresisGlobal int grp_idx = get_group_id(0); int grp_idy = get_group_id(1); - volatile __local unsigned int s_counter; + __local unsigned int s_counter; __local unsigned int s_ind; __local ushort2 s_st[stack_size]; @@ -564,9 +574,9 @@ void edgesHysteresisGlobal pos.x += c_dx[lidx & 7]; pos.y += c_dy[lidx & 7]; - if (map[pos.x + map_offset + pos.y * map_step] == 1) + if (map[pos.x + pos.y * map_step] == 1) { - map[pos.x + map_offset + pos.y * map_step] = 2; + map[pos.x + pos.y * map_step] = 2; ind = atomic_inc(&s_counter); @@ -621,6 +631,6 @@ void getEdges if(gidy < rows && gidx < cols) { - dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1)); + dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1)); } } diff --git a/modules/ocl/src/opencl/imgproc_gfft.cl b/modules/ocl/src/opencl/imgproc_gfft.cl new file mode 100644 index 0000000000..5fa27ffc1b --- /dev/null +++ b/modules/ocl/src/opencl/imgproc_gfft.cl @@ -0,0 +1,276 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef WITH_MASK +#define WITH_MASK 0 +#endif + +__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + +inline float ELEM_INT2(image2d_t _eig, int _x, int _y) +{ + return read_imagef(_eig, sampler, (int2)(_x, _y)).x; +} + +inline float ELEM_FLT2(image2d_t _eig, float2 pt) +{ + return read_imagef(_eig, sampler, pt).x; +} + +__kernel + void findCorners + ( + image2d_t eig, + __global const char * mask, + __global float2 * corners, + const int mask_strip,// in pixels + const float threshold, + const int rows, + const int cols, + const int max_count, + __global int * g_counter + ) +{ + const int j = get_global_id(0); + const int i = get_global_id(1); + + if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 +#if WITH_MASK + && mask[i * mask_strip + j] != 0 +#endif + ) + { + const float val = ELEM_INT2(eig, j, i); + + if (val > threshold) + { + float maxVal = val; + + maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal); + + maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal); + + maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal); + + if (val == maxVal) + { + const int ind = atomic_inc(g_counter); + + if (ind < max_count) + corners[ind] = (float2)(j, i); + } + } + } +} + +//bitonic sort +__kernel + void sortCorners_bitonicSort + ( + image2d_t eig, + __global float2 * corners, + const int count, + const int stage, + const int passOfStage + ) +{ + const int threadId = get_global_id(0); + if(threadId >= count / 2) + { + return; + } + + const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent + + const int pairDistance = 1 << (stage - passOfStage); + const int blockWidth = 2 * pairDistance; + + const int leftId = min( (threadId % pairDistance) + + (threadId / pairDistance) * blockWidth, count ); + + const int rightId = min( leftId + pairDistance, count ); + + const float2 leftPt = corners[leftId]; + const float2 rightPt = corners[rightId]; + + const float leftVal = ELEM_FLT2(eig, leftPt); + const float rightVal = ELEM_FLT2(eig, rightPt); + + const bool compareResult = leftVal > rightVal; + + float2 greater = compareResult ? leftPt:rightPt; + float2 lesser = compareResult ? rightPt:leftPt; + + corners[leftId] = sortOrder ? lesser : greater; + corners[rightId] = sortOrder ? greater : lesser; +} + +//selection sort for gfft +//kernel is ported from Bolt library: +//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl +// Local sort will firstly sort elements of each workgroup using selection sort +// its performance is O(n) +__kernel + void sortCorners_selectionSortLocal + ( + image2d_t eig, + __global float2 * corners, + const int count, + __local float2 * scratch + ) +{ + int i = get_local_id(0); // index in workgroup + int numOfGroups = get_num_groups(0); // index in workgroup + int groupID = get_group_id(0); + int wg = get_local_size(0); // workgroup size = block size + int n; // number of elements to be processed for this work group + + int offset = groupID * wg; + int same = 0; + corners += offset; + n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg; + float2 pt1, pt2; + + pt1 = corners[min(i, n)]; + scratch[i] = pt1; + barrier(CLK_LOCAL_MEM_FENCE); + + if(i >= n) + { + return; + } + + float val1 = ELEM_FLT2(eig, pt1); + float val2; + + int pos = 0; + for (int j=0;j val1) + pos++;//calculate the rank of this element in this work group + else + { + if(val1 > val2) + continue; + else + { + // val1 and val2 are same + same++; + } + } + } + for (int j=0; j< same; j++) + corners[pos + j] = pt1; +} +__kernel + void sortCorners_selectionSortFinal + ( + image2d_t eig, + __global float2 * corners, + const int count + ) +{ + const int i = get_local_id(0); // index in workgroup + const int numOfGroups = get_num_groups(0); // index in workgroup + const int groupID = get_group_id(0); + const int wg = get_local_size(0); // workgroup size = block size + int pos = 0, same = 0; + const int offset = get_group_id(0) * wg; + const int remainder = count - wg*(numOfGroups-1); + + if((offset + i ) >= count) + return; + float2 pt1, pt2; + pt1 = corners[groupID*wg + i]; + + float val1 = ELEM_FLT2(eig, pt1); + float val2; + + for(int j=0; j val2) + break; + else + { + //Increment only if the value is not the same. + if( val2 > val1 ) + pos++; + else + same++; + } + } + } + + for(int k=0; k val2) + break; + else + { + //Don't increment if the value is the same. + //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false + if(val2 > val1) + pos++; + else + same++; + } + } + for (int j=0; j< same; j++) + corners[pos + j] = pt1; +} + diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl index 8ad501f7c1..9162abb7ef 100644 --- a/modules/ocl/src/opencl/imgproc_threshold.cl +++ b/modules/ocl/src/opencl/imgproc_threshold.cl @@ -143,7 +143,7 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3); float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart); int4 con = dpos >= 0 && dpos < dst_cols; - ddata = convert_float4(con) != 0 ? ddata : dVal; + ddata = convert_float4(con) != (float4)(0) ? ddata : dVal; if(dstart < dst_cols) { *(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata; diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl index 1043b8410b..40a1993952 100644 --- a/modules/ocl/src/opencl/pyrlk.cl +++ b/modules/ocl/src/opencl/pyrlk.cl @@ -46,145 +46,10 @@ //#pragma OPENCL EXTENSION cl_amd_printf : enable -__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - if (y < rows && x < cols * cn) - { - const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x]; - const uchar src_val1 = (src + y * srcStep)[x]; - const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x]; - - ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10; - ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0; - } -} - -__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - if (y < rows && x < cols * cn) - { - const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x]; - const uchar src_val1 = (src + y * srcStep)[x]; - const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x]; - - ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10; - ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0; - } -} - -__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - const int colsn = cols * cn; - - if (y < rows && x < colsn) - { - __global const short* dx_buf_row = dx_buf + y * dx_bufStep; - __global const short* dy_buf_row = dy_buf + y * dy_bufStep; - - const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn; - const int xl = x - cn >= 0 ? x - cn : cn + x; - - ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl]; - ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; - } -} - -__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - const int colsn = cols * cn; - - if (y < rows && x < colsn) - { - __global const short* dx_buf_row = dx_buf + y * dx_bufStep; - __global const short* dy_buf_row = dy_buf + y * dy_bufStep; - - const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn; - const int xl = x - cn >= 0 ? x - cn : cn + x; - - ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl]; - ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; - } -} - -#define W_BITS 14 -#define W_BITS1 14 - -#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) - -int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y) -{ - int2 ipt; - ipt.x = convert_int_sat_rtn(pt.x); - ipt.y = convert_int_sat_rtn(pt.y); - - float a = pt.x - ipt.x; - float b = pt.y - ipt.y; - - int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS)); - int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS)); - int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS)); - int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; - - __global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn; - __global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn; - - return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5); -} - -int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y) -{ - int2 ipt; - ipt.x = convert_int_sat_rtn(pt.x); - ipt.y = convert_int_sat_rtn(pt.y); - - float a = pt.x - ipt.x; - float b = pt.y - ipt.y; - - int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS)); - int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS)); - int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS)); - int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; - - __global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn; - __global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn; - - return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1); -} - -float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y) -{ - int2 ipt; - ipt.x = convert_int_sat_rtn(pt.x); - ipt.y = convert_int_sat_rtn(pt.y); - - float a = pt.x - ipt.x; - float b = pt.y - ipt.y; - - float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS)); - float iw01 = (a * (1.0f - b) * (1 << W_BITS)); - float iw10 = ((1.0f - a) * b * (1 << W_BITS)); - float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; - - __global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn; - __global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn; - - return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5; -} - #define BUFFER 64 - +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif #ifdef CPU void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) { @@ -193,71 +58,51 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local smem3[tid] = val3; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - smem3[tid] = val3 += smem3[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - smem3[tid] = val3 += smem3[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - smem1[tid] = val1 += smem1[tid + 32]; - smem2[tid] = val2 += smem2[tid + 32]; - smem3[tid] = val3 += smem3[tid + 32]; + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; + smem3[tid] += smem3[tid + 32]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { - smem1[tid] = val1 += smem1[tid + 16]; - smem2[tid] = val2 += smem2[tid + 16]; - smem3[tid] = val3 += smem3[tid + 16]; + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; + smem3[tid] += smem3[tid + 16]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { - smem1[tid] = val1 += smem1[tid + 8]; - smem2[tid] = val2 += smem2[tid + 8]; - smem3[tid] = val3 += smem3[tid + 8]; + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; + smem3[tid] += smem3[tid + 8]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) { - smem1[tid] = val1 += smem1[tid + 4]; - smem2[tid] = val2 += smem2[tid + 4]; - smem3[tid] = val3 += smem3[tid + 4]; + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; + smem3[tid] += smem3[tid + 4]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) { - smem1[tid] = val1 += smem1[tid + 2]; - smem2[tid] = val2 += smem2[tid + 2]; - smem3[tid] = val3 += smem3[tid + 2]; + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; + smem3[tid] += smem3[tid + 2]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) { - smem1[BUFFER] = val1 += smem1[tid + 1]; - smem2[BUFFER] = val2 += smem2[tid + 1]; - smem3[BUFFER] = val3 += smem3[tid + 1]; + smem1[BUFFER] = smem1[tid] + smem1[tid + 1]; + smem2[BUFFER] = smem2[tid] + smem2[tid + 1]; + smem3[BUFFER] = smem3[tid] + smem3[tid + 1]; } barrier(CLK_LOCAL_MEM_FENCE); } @@ -268,63 +113,45 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l smem2[tid] = val2; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = (val1 += smem1[tid + 128]); - smem2[tid] = (val2 += smem2[tid + 128]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = (val1 += smem1[tid + 64]); - smem2[tid] = (val2 += smem2[tid + 64]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - smem1[tid] = (val1 += smem1[tid + 32]); - smem2[tid] = (val2 += smem2[tid + 32]); + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { - smem1[tid] = (val1 += smem1[tid + 16]); - smem2[tid] = (val2 += smem2[tid + 16]); + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { - smem1[tid] = (val1 += smem1[tid + 8]); - smem2[tid] = (val2 += smem2[tid + 8]); + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) { - smem1[tid] = (val1 += smem1[tid + 4]); - smem2[tid] = (val2 += smem2[tid + 4]); + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) { - smem1[tid] = (val1 += smem1[tid + 2]); - smem2[tid] = (val2 += smem2[tid + 2]); + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) { - smem1[BUFFER] = (val1 += smem1[tid + 1]); - smem2[BUFFER] = (val2 += smem2[tid + 1]); + smem1[BUFFER] = smem1[tid] + smem1[tid + 1]; + smem2[BUFFER] = smem2[tid] + smem2[tid + 1]; } barrier(CLK_LOCAL_MEM_FENCE); } @@ -334,205 +161,146 @@ void reduce1(float val1, volatile __local float* smem1, int tid) smem1[tid] = val1; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = (val1 += smem1[tid + 128]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = (val1 += smem1[tid + 64]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - smem1[tid] = (val1 += smem1[tid + 32]); + smem1[tid] += smem1[tid + 32]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { - smem1[tid] = (val1 += smem1[tid + 16]); + smem1[tid] += smem1[tid + 16]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { - smem1[tid] = (val1 += smem1[tid + 8]); + smem1[tid] += smem1[tid + 8]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) { - smem1[tid] = (val1 += smem1[tid + 4]); + smem1[tid] += smem1[tid + 4]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) { - smem1[tid] = (val1 += smem1[tid + 2]); + smem1[tid] += smem1[tid + 2]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) { - smem1[BUFFER] = (val1 += smem1[tid + 1]); + smem1[BUFFER] = smem1[tid] + smem1[tid + 1]; } barrier(CLK_LOCAL_MEM_FENCE); } #else -void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) +void reduce3(float val1, float val2, float val3, +__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid) { smem1[tid] = val1; smem2[tid] = val2; smem3[tid] = val3; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) + if (tid < 32) { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - smem3[tid] = val3 += smem3[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; + smem3[tid] += smem3[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { #endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - smem3[tid] = val3 += smem3[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; + smem3[tid] += smem3[tid + 16]; +#if WAVE_SIZE <16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { #endif + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; + smem3[tid] += smem3[tid + 8]; - if (tid < 32) - { - volatile __local float* vmem1 = smem1; - volatile __local float* vmem2 = smem2; - volatile __local float* vmem3 = smem3; - - vmem1[tid] = val1 += vmem1[tid + 32]; - vmem2[tid] = val2 += vmem2[tid + 32]; - vmem3[tid] = val3 += vmem3[tid + 32]; - - vmem1[tid] = val1 += vmem1[tid + 16]; - vmem2[tid] = val2 += vmem2[tid + 16]; - vmem3[tid] = val3 += vmem3[tid + 16]; - - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem2[tid] = val2 += vmem2[tid + 8]; - vmem3[tid] = val3 += vmem3[tid + 8]; + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; + smem3[tid] += smem3[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem2[tid] = val2 += vmem2[tid + 4]; - vmem3[tid] = val3 += vmem3[tid + 4]; + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; + smem3[tid] += smem3[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem2[tid] = val2 += vmem2[tid + 2]; - vmem3[tid] = val3 += vmem3[tid + 2]; - - vmem1[tid] = val1 += vmem1[tid + 1]; - vmem2[tid] = val2 += vmem2[tid + 1]; - vmem3[tid] = val3 += vmem3[tid + 1]; + smem1[tid] += smem1[tid + 1]; + smem2[tid] += smem2[tid + 1]; + smem3[tid] += smem3[tid + 1]; } } -void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid) +void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid) { smem1[tid] = val1; smem2[tid] = val2; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) + if (tid < 32) { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { #endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; +#if WAVE_SIZE <16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { #endif + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; - if (tid < 32) - { - volatile __local float* vmem1 = smem1; - volatile __local float* vmem2 = smem2; - - vmem1[tid] = val1 += vmem1[tid + 32]; - vmem2[tid] = val2 += vmem2[tid + 32]; - - vmem1[tid] = val1 += vmem1[tid + 16]; - vmem2[tid] = val2 += vmem2[tid + 16]; + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem2[tid] = val2 += vmem2[tid + 8]; + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem2[tid] = val2 += vmem2[tid + 4]; - - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem2[tid] = val2 += vmem2[tid + 2]; - - vmem1[tid] = val1 += vmem1[tid + 1]; - vmem2[tid] = val2 += vmem2[tid + 1]; + smem1[tid] += smem1[tid + 1]; + smem2[tid] += smem2[tid + 1]; } } -void reduce1(float val1, __local float* smem1, int tid) +void reduce1(float val1, __local volatile float* smem1, int tid) { smem1[tid] = val1; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) + if (tid < 32) { - smem1[tid] = val1 += smem1[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); + smem1[tid] += smem1[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { #endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); + smem1[tid] += smem1[tid + 16]; +#if WAVE_SIZE <16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { #endif - - if (tid < 32) - { - volatile __local float* vmem1 = smem1; - - vmem1[tid] = val1 += vmem1[tid + 32]; - vmem1[tid] = val1 += vmem1[tid + 16]; - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 1]; + smem1[tid] += smem1[tid + 8]; + smem1[tid] += smem1[tid + 4]; + smem1[tid] += smem1[tid + 2]; + smem1[tid] += smem1[tid + 1]; } } #endif #define SCALE (1.0f / (1 << 20)) #define THRESHOLD 0.01f -#define DIMENSION 21 // Image read mode __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 76d15513af..3b75f303d4 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -61,6 +61,8 @@ #include #include +#undef OPENCV_NOSTL + #include "opencv2/imgproc.hpp" #include "opencv2/objdetect.hpp" #include "opencv2/ocl.hpp" @@ -74,6 +76,7 @@ #if defined (HAVE_OPENCL) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include "opencv2/ocl/private/util.hpp" #include "safe_call.hpp" diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index feb84c7b6f..e95729c643 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com -// Yao Wang, yao@multicorewareinc.com +// Dachuan Zhao, dachuan@multicorewareinc.com +// Yao Wang, yao@multicorewareinc.com // Nathan, liujun@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, @@ -54,35 +54,20 @@ namespace cv { namespace ocl { -///////////////////////////OpenCL kernel strings/////////////////////////// extern const char *pyrlk; extern const char *pyrlk_no_image; extern const char *operator_setTo; extern const char *operator_convertTo; extern const char *operator_copyToM; -extern const char *arithm_mul; extern const char *pyr_down; } } - struct dim3 { unsigned int x, y, z; }; -struct float2 -{ - float x, y; -}; - -struct int2 -{ - int x, y; -}; - -namespace -{ -void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) +static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) { winSize.width *= cn; @@ -102,12 +87,6 @@ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDe block.z = patch.z = 1; } -} - -inline int divUp(int total, int grain) -{ - return (total + grain - 1) / grain; -} /////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// @@ -448,89 +427,6 @@ static void copyTo(const oclMat &src, oclMat &m ) src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset); } -// static void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask) -// { -// if (mask.empty()) -// { -// copyTo(src, mat); -// } -// else -// { -// mat.create(src.size(), src.type()); -// copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask"); -// } -// } - -static void arithmetic_run(const oclMat &src1, oclMat &dst, String kernelName, const char **kernelString, void *_scalar) -{ - if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n"); - return; - } - - //dst.create(src1.size(), src1.type()); - //CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && - // src1.rows == src2.rows && src2.rows == dst.rows); - CV_Assert(src1.cols == dst.cols && - src1.rows == dst.rows); - - CV_Assert(src1.type() == dst.type()); - CV_Assert(src1.depth() != CV_8S); - - Context *clCxt = src1.clCxt; - //int channels = dst.channels(); - //int depth = dst.depth(); - - //int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1}, - // {4, 0, 4, 4, 1, 1, 1}, - // {4, 0, 4, 4, 1, 1, 1}, - // {4, 0, 4, 4, 1, 1, 1} - //}; - - //size_t vector_length = vector_lengths[channels-1][depth]; - //int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); - //int cols = divUp(dst.cols * channels + offset_cols, vector_length); - - size_t localThreads[3] = { 16, 16, 1 }; - //size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - // divUp(dst.rows, localThreads[1]) * localThreads[1], - // 1 - // }; - size_t globalThreads[3] = { src1.cols, - src1.rows, - 1 - }; - - int dst_step1 = dst.cols * dst.elemSize(); - std::vector > args; - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset )); - //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data )); - //args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step )); - //args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset )); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); - - //if(_scalar != NULL) - //{ - float scalar1 = *((float *)_scalar); - args.push_back( std::make_pair( sizeof(float), (float *)&scalar1 )); - //} - - openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH); -} - -static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar) -{ - arithmetic_run(src1, dst, "arithm_muls", &arithm_mul, (void *)(&scalar)); -} - static void pyrdown_run_cus(const oclMat &src, const oclMat &dst) { @@ -576,15 +472,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 }; size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1}; int cn = I.oclchannels(); - char calcErr; - if (level == 0) - { - calcErr = 1; - } - else - { - calcErr = 0; - } + char calcErr = level==0?1:0; std::vector > args; @@ -614,7 +502,16 @@ static void lkSparse_run(oclMat &I, oclMat &J, if(isImageSupported) { - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + std::stringstream idxStr; + idxStr << kernelName.c_str() << "_C" << I.oclchannels() << "_D" << I.depth(); + cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str().c_str()); + int wave_size = queryDeviceInfo(kernel); + openCLSafeCall(clReleaseKernel(kernel)); + + static char opt[16] = {0}; + sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + + openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH); releaseTexture(ITex); releaseTexture(JTex); } @@ -656,9 +553,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); oclMat temp2 = nextPts.reshape(1); - //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f)); - multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f); - //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); + multiply(1.0f/(1<= 0; level--) { lkSparse_run(prevPyr_[level], nextPyr_[level], diff --git a/modules/ocl/src/safe_call.hpp b/modules/ocl/src/safe_call.hpp index e187272e5c..4cf39b85e8 100644 --- a/modules/ocl/src/safe_call.hpp +++ b/modules/ocl/src/safe_call.hpp @@ -47,7 +47,7 @@ #define __OPENCV_OPENCL_SAFE_CALL_HPP__ #if defined __APPLE__ -#include +#include #else #include #endif diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp index e7b9316d8e..10032e897c 100644 --- a/modules/ocl/test/test_canny.cpp +++ b/modules/ocl/test/test_canny.cpp @@ -73,7 +73,6 @@ TEST_P(Canny, Accuracy) double low_thresh = 50.0; double high_thresh = 100.0; - cv::resize(img, img, cv::Size(512, 384)); cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img); cv::ocl::oclMat edges; diff --git a/modules/ocl/test/test_haar.cpp b/modules/ocl/test/test_haar.cpp index 652109d75b..fa6dd68073 100644 --- a/modules/ocl/test/test_haar.cpp +++ b/modules/ocl/test/test_haar.cpp @@ -55,6 +55,12 @@ using namespace testing; using namespace std; using namespace cv; extern string workdir; + +namespace +{ +IMPLEMENT_PARAM_CLASS(CascadeName, std::string); +CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml")); +CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml")); struct getRect { Rect operator ()(const CvAvgComp &e) const @@ -62,23 +68,24 @@ struct getRect return e.rect; } }; +} -PARAM_TEST_CASE(Haar, double, int) +PARAM_TEST_CASE(Haar, double, int, CascadeName) { cv::ocl::OclCascadeClassifier cascade, nestedCascade; - cv::ocl::OclCascadeClassifierBuf cascadebuf; cv::CascadeClassifier cpucascade, cpunestedCascade; double scale; int flags; + std::string cascadeName; virtual void SetUp() { scale = GET_PARAM(0); flags = GET_PARAM(1); - string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml"; + cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2)); - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName ))) + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) ) { cout << "ERROR: Could not load classifier cascade" << endl; return; @@ -115,7 +122,7 @@ TEST_P(Haar, FaceDetect) Seq(_objects).copyTo(vecAvgComp); oclfaces.resize(vecAvgComp.size()); std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - + cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, flags, Size(30, 30), Size(0, 0) ); @@ -136,7 +143,6 @@ TEST_P(Haar, FaceDetectUseBuf) vector faces, oclfaces; Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); cvtColor( img, gray, CV_BGR2GRAY ); resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); equalizeHist( smallImg, smallImg ); @@ -144,19 +150,31 @@ TEST_P(Haar, FaceDetectUseBuf) cv::ocl::oclMat image; image.upload(smallImg); + cv::ocl::OclCascadeClassifierBuf cascadebuf; + if( !cascadebuf.load( cascadeName ) ) + { + cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl; + return; + } cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, flags, Size(30, 30), Size(0, 0) ); - cascadebuf.release(); cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, flags, Size(30, 30), Size(0, 0) ); EXPECT_EQ(faces.size(), oclfaces.size()); + + // intentionally run ocl facedetect again and check if it still works after the first run + cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, + flags, + Size(30, 30)); + cascadebuf.release(); + EXPECT_EQ(faces.size(), oclfaces.size()); } INSTANTIATE_TEST_CASE_P(FaceDetect, Haar, Combine(Values(1.0), - Values(CV_HAAR_SCALE_IMAGE, 0))); + Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index b08d33a08f..0121be8f9e 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -55,6 +55,83 @@ using namespace testing; using namespace std; extern string workdir; + + +////////////////////////////////////////////////////// +// GoodFeaturesToTrack +namespace +{ + IMPLEMENT_PARAM_CLASS(MinDistance, double) +} +PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance) +{ + double minDistance; + + virtual void SetUp() + { + minDistance = GET_PARAM(0); + } +}; + +TEST_P(GoodFeaturesToTrack, Accuracy) +{ + cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()); + + int maxCorners = 1000; + double qualityLevel = 0.01; + + cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); + + cv::ocl::oclMat d_pts; + detector(oclMat(frame), d_pts); + + ASSERT_FALSE(d_pts.empty()); + + std::vector pts(d_pts.cols); + + detector.downloadPoints(d_pts, pts); + + std::vector pts_gold; + cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance); + + ASSERT_EQ(pts_gold.size(), pts.size()); + + size_t mistmatch = 0; + for (size_t i = 0; i < pts.size(); ++i) + { + cv::Point2i a = pts_gold[i]; + cv::Point2i b = pts[i]; + + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; + + if (!eq) + ++mistmatch; + } + + double bad_ratio = static_cast(mistmatch) / pts.size(); + + ASSERT_LE(bad_ratio, 0.01); +} + +TEST_P(GoodFeaturesToTrack, EmptyCorners) +{ + int maxCorners = 1000; + double qualityLevel = 0.01; + + cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); + + cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0)); + cv::ocl::oclMat corners(1, maxCorners, CV_32FC2); + + detector(src, corners); + + ASSERT_TRUE(corners.empty()); +} + +INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack, + testing::Values(MinDistance(0.0), MinDistance(3.0))); + ////////////////////////////////////////////////////////////////////////// PARAM_TEST_CASE(TVL1, bool) { diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 4d3e6c8f94..d61a05f7e6 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -59,17 +59,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; @@ -159,19 +159,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds switch (srcImgs[0].type()) { case CV_8U: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index 232dba88da..3e9cc008bc 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -51,12 +51,12 @@ using namespace cv; template -struct FastNlMeansDenoisingInvoker { +struct FastNlMeansDenoisingInvoker : ParallelLoopBody { public: FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst, int template_window_size, int search_window_size, const float h); - void operator() (const BlockedRange& range) const; + void operator() (const Range& range) const; private: void operator= (const FastNlMeansDenoisingInvoker&); @@ -152,9 +152,9 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( } template -void FastNlMeansDenoisingInvoker::operator() (const BlockedRange& range) const { - int row_from = range.begin(); - int row_to = range.end() - 1; +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { + int row_from = range.start; + int row_to = range.end - 1; Array2d dist_sums(search_window_size_, search_window_size_); diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index ee7d3bc7fa..e2351a23c0 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -51,13 +51,13 @@ using namespace cv; template -struct FastNlMeansMultiDenoisingInvoker { +struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { public: FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, Mat& dst, int template_window_size, int search_window_size, const float h); - void operator() (const BlockedRange& range) const; + void operator() (const Range& range) const; private: void operator= (const FastNlMeansMultiDenoisingInvoker&); @@ -171,9 +171,9 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( } template -void FastNlMeansMultiDenoisingInvoker::operator() (const BlockedRange& range) const { - int row_from = range.begin(); - int row_to = range.end() - 1; +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { + int row_from = range.start; + int row_to = range.end - 1; Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); diff --git a/modules/photo/src/precomp.hpp b/modules/photo/src/precomp.hpp index 60cc99b19d..38ac3ffcda 100644 --- a/modules/photo/src/precomp.hpp +++ b/modules/photo/src/precomp.hpp @@ -43,8 +43,9 @@ #ifndef __OPENCV_PRECOMP_H__ #define __OPENCV_PRECOMP_H__ -#include "opencv2/photo.hpp" #include "opencv2/core/private.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/photo.hpp" #ifdef HAVE_TEGRA_OPTIMIZATION #include "opencv2/photo/photo_tegra.hpp" diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 3a48711a8b..35eb4738b4 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -65,7 +65,7 @@ struct DistIdxPair }; -struct MatchPairsBody +struct MatchPairsBody : ParallelLoopBody { MatchPairsBody(const MatchPairsBody& other) : matcher(other.matcher), features(other.features), @@ -76,10 +76,10 @@ struct MatchPairsBody : matcher(_matcher), features(_features), pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {} - void operator ()(const BlockedRange &r) const + void operator ()(const Range &r) const { const int num_images = static_cast(features.size()); - for (int i = r.begin(); i < r.end(); ++i) + for (int i = r.start; i < r.end; ++i) { int from = near_pairs[i].first; int to = near_pairs[i].second; @@ -525,9 +525,9 @@ void FeaturesMatcher::operator ()(const std::vector &features, st MatchPairsBody body(*this, features, pairwise_matches, near_pairs); if (is_thread_safe_) - parallel_for(BlockedRange(0, static_cast(near_pairs.size())), body); + parallel_for_(Range(0, static_cast(near_pairs.size())), body); else - body(BlockedRange(0, static_cast(near_pairs.size()))); + body(Range(0, static_cast(near_pairs.size()))); LOGLN_CHAT(""); } diff --git a/modules/stitching/src/precomp.hpp b/modules/stitching/src/precomp.hpp index 9e056bc0fa..b0fa2b1577 100644 --- a/modules/stitching/src/precomp.hpp +++ b/modules/stitching/src/precomp.hpp @@ -53,6 +53,7 @@ #include #include #include "opencv2/core.hpp" +#include "opencv2/core/utility.hpp" #include "opencv2/stitching.hpp" #include "opencv2/stitching/detail/autocalib.hpp" #include "opencv2/stitching/detail/blenders.hpp" diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp index 35f5d0678a..2c99c01b05 100644 --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@ -702,14 +702,14 @@ void BackgroundSubtractorMOG2Impl::apply(InputArray _image, OutputArray _fgmask, parallel_for_(Range(0, image.rows), MOG2Invoker(image, fgmask, - (GMM*)bgmodel.data, - (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols), - bgmodelUsedModes.data, nmixtures, (float)learningRate, - (float)varThreshold, - backgroundRatio, varThresholdGen, - fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau, - bShadowDetection, nShadowDetection), - image.total()/(double)(1 << 16)); + (GMM*)bgmodel.data, + (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols), + bgmodelUsedModes.data, nmixtures, (float)learningRate, + (float)varThreshold, + backgroundRatio, varThresholdGen, + fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau, + bShadowDetection, nShadowDetection), + image.total()/(double)(1 << 16)); } void BackgroundSubtractorMOG2Impl::getBackgroundImage(OutputArray backgroundImage) const diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index e465172688..155737ba79 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -167,7 +167,7 @@ typedef float acctype; typedef float itemtype; #endif -void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const +void cv::detail::LKTrackerInvoker::operator()(const Range& range) const { Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f); const Mat& I = *prevImg; @@ -181,7 +181,7 @@ void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf); Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn); - for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ ) + for( int ptidx = range.start; ptidx < range.end; ptidx++ ) { Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level)); Point2f nextPt; @@ -746,11 +746,11 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg, typedef cv::detail::LKTrackerInvoker LKTrackerInvoker; #endif - parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, - nextPyr[level * lvlStep2], prevPts, nextPts, - status, err, - winSize, criteria, level, maxLevel, - flags, (float)minEigThreshold)); + parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, + nextPyr[level * lvlStep2], prevPts, nextPts, + status, err, + winSize, criteria, level, maxLevel, + flags, (float)minEigThreshold)); } } diff --git a/modules/video/src/lkpyramid.hpp b/modules/video/src/lkpyramid.hpp index 390e46bf99..4aff37ef84 100644 --- a/modules/video/src/lkpyramid.hpp +++ b/modules/video/src/lkpyramid.hpp @@ -7,7 +7,7 @@ namespace detail typedef short deriv_type; - struct LKTrackerInvoker + struct LKTrackerInvoker : ParallelLoopBody { LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg, const Point2f* _prevPts, Point2f* _nextPts, @@ -15,7 +15,7 @@ namespace detail Size _winSize, TermCriteria _criteria, int _level, int _maxLevel, int _flags, float _minEigThreshold ); - void operator()(const BlockedRange& range) const; + void operator()(const Range& range) const; const Mat* prevImg; const Mat* nextImg; diff --git a/android/README.android b/platforms/android/README.android similarity index 100% rename from android/README.android rename to platforms/android/README.android diff --git a/platforms/android/android.toolchain.cmake b/platforms/android/android.toolchain.cmake new file mode 100644 index 0000000000..0f7e340678 --- /dev/null +++ b/platforms/android/android.toolchain.cmake @@ -0,0 +1,1632 @@ +# Copyright (c) 2010-2011, Ethan Rublee +# Copyright (c) 2011-2013, Andrey Kamaev +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. The name of the copyright holders may be used to endorse or promote +# products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# ------------------------------------------------------------------------------ +# Android CMake toolchain file, for use with the Android NDK r5-r8 +# Requires cmake 2.6.3 or newer (2.8.5 or newer is recommended). +# See home page: https://github.com/taka-no-me/android-cmake +# +# The file is mantained by the OpenCV project. The latest version can be get at +# http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake +# +# Usage Linux: +# $ export ANDROID_NDK=/absolute/path/to/the/android-ndk +# $ mkdir build && cd build +# $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake .. +# $ make -j8 +# +# Usage Linux (using standalone toolchain): +# $ export ANDROID_STANDALONE_TOOLCHAIN=/absolute/path/to/android-toolchain +# $ mkdir build && cd build +# $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake .. +# $ make -j8 +# +# Usage Windows: +# You need native port of make to build your project. +# Android NDK r7 (or newer) already has make.exe on board. +# For older NDK you have to install it separately. +# For example, this one: http://gnuwin32.sourceforge.net/packages/make.htm +# +# $ SET ANDROID_NDK=C:\absolute\path\to\the\android-ndk +# $ mkdir build && cd build +# $ cmake.exe -G"MinGW Makefiles" +# -DCMAKE_TOOLCHAIN_FILE=path\to\the\android.toolchain.cmake +# -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%\prebuilt\windows\bin\make.exe" .. +# $ cmake.exe --build . +# +# +# Options (can be set as cmake parameters: -D=): +# ANDROID_NDK=/opt/android-ndk - path to the NDK root. +# Can be set as environment variable. Can be set only at first cmake run. +# +# ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain - path to the +# standalone toolchain. This option is not used if full NDK is found +# (ignored if ANDROID_NDK is set). +# Can be set as environment variable. Can be set only at first cmake run. +# +# ANDROID_ABI=armeabi-v7a - specifies the target Application Binary +# Interface (ABI). This option nearly matches to the APP_ABI variable +# used by ndk-build tool from Android NDK. +# +# Possible targets are: +# "armeabi" - matches to the NDK ABI with the same name. +# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation. +# "armeabi-v7a" - matches to the NDK ABI with the same name. +# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation. +# "armeabi-v7a with NEON" - same as armeabi-v7a, but +# sets NEON as floating-point unit +# "armeabi-v7a with VFPV3" - same as armeabi-v7a, but +# sets VFPV3 as floating-point unit (has 32 registers instead of 16). +# "armeabi-v6 with VFP" - tuned for ARMv6 processors having VFP. +# "x86" - matches to the NDK ABI with the same name. +# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation. +# "mips" - matches to the NDK ABI with the same name +# (It is not tested on real devices by the authos of this toolchain) +# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation. +# +# ANDROID_NATIVE_API_LEVEL=android-8 - level of Android API compile for. +# Option is read-only when standalone toolchain is used. +# +# ANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.6 - the name of compiler +# toolchain to be used. The list of possible values depends on the NDK +# version. For NDK r8c the possible values are: +# +# * arm-linux-androideabi-4.4.3 +# * arm-linux-androideabi-4.6 +# * arm-linux-androideabi-clang3.1 +# * mipsel-linux-android-4.4.3 +# * mipsel-linux-android-4.6 +# * mipsel-linux-android-clang3.1 +# * x86-4.4.3 +# * x86-4.6 +# * x86-clang3.1 +# +# ANDROID_FORCE_ARM_BUILD=OFF - set ON to generate 32-bit ARM instructions +# instead of Thumb. Is not available for "x86" (inapplicable) and +# "armeabi-v6 with VFP" (is forced to be ON) ABIs. +# +# ANDROID_NO_UNDEFINED=ON - set ON to show all undefined symbols as linker +# errors even if they are not used. +# +# ANDROID_SO_UNDEFINED=OFF - set ON to allow undefined symbols in shared +# libraries. Automatically turned for NDK r5x and r6x due to GLESv2 +# problems. +# +# LIBRARY_OUTPUT_PATH_ROOT=${CMAKE_SOURCE_DIR} - where to output binary +# files. See additional details below. +# +# ANDROID_SET_OBSOLETE_VARIABLES=ON - if set, then toolchain defines some +# obsolete variables which were used by previous versions of this file for +# backward compatibility. +# +# ANDROID_STL=gnustl_static - specify the runtime to use. +# +# Possible values are: +# none -> Do not configure the runtime. +# system -> Use the default minimal system C++ runtime library. +# Implies -fno-rtti -fno-exceptions. +# Is not available for standalone toolchain. +# system_re -> Use the default minimal system C++ runtime library. +# Implies -frtti -fexceptions. +# Is not available for standalone toolchain. +# gabi++_static -> Use the GAbi++ runtime as a static library. +# Implies -frtti -fno-exceptions. +# Available for NDK r7 and newer. +# Is not available for standalone toolchain. +# gabi++_shared -> Use the GAbi++ runtime as a shared library. +# Implies -frtti -fno-exceptions. +# Available for NDK r7 and newer. +# Is not available for standalone toolchain. +# stlport_static -> Use the STLport runtime as a static library. +# Implies -fno-rtti -fno-exceptions for NDK before r7. +# Implies -frtti -fno-exceptions for NDK r7 and newer. +# Is not available for standalone toolchain. +# stlport_shared -> Use the STLport runtime as a shared library. +# Implies -fno-rtti -fno-exceptions for NDK before r7. +# Implies -frtti -fno-exceptions for NDK r7 and newer. +# Is not available for standalone toolchain. +# gnustl_static -> Use the GNU STL as a static library. +# Implies -frtti -fexceptions. +# gnustl_shared -> Use the GNU STL as a shared library. +# Implies -frtti -fno-exceptions. +# Available for NDK r7b and newer. +# Silently degrades to gnustl_static if not available. +# +# ANDROID_STL_FORCE_FEATURES=ON - turn rtti and exceptions support based on +# chosen runtime. If disabled, then the user is responsible for settings +# these options. +# +# What?: +# android-cmake toolchain searches for NDK/toolchain in the following order: +# ANDROID_NDK - cmake parameter +# ANDROID_NDK - environment variable +# ANDROID_STANDALONE_TOOLCHAIN - cmake parameter +# ANDROID_STANDALONE_TOOLCHAIN - environment variable +# ANDROID_NDK - default locations +# ANDROID_STANDALONE_TOOLCHAIN - default locations +# +# Make sure to do the following in your scripts: +# SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${my_cxx_flags}" ) +# SET( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${my_cxx_flags}" ) +# The flags will be prepopulated with critical flags, so don't loose them. +# Also be aware that toolchain also sets configuration-specific compiler +# flags and linker flags. +# +# ANDROID and BUILD_ANDROID will be set to true, you may test any of these +# variables to make necessary Android-specific configuration changes. +# +# Also ARMEABI or ARMEABI_V7A or X86 or MIPS will be set true, mutually +# exclusive. NEON option will be set true if VFP is set to NEON. +# +# LIBRARY_OUTPUT_PATH_ROOT should be set in cache to determine where Android +# libraries will be installed. +# Default is ${CMAKE_SOURCE_DIR}, and the android libs will always be +# under the ${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME} +# (depending on the target ABI). This is convenient for Android packaging. +# +# Change Log: +# - initial version December 2010 +# - April 2011 +# [+] added possibility to build with NDK (without standalone toolchain) +# [+] support cross-compilation on Windows (native, no cygwin support) +# [+] added compiler option to force "char" type to be signed +# [+] added toolchain option to compile to 32-bit ARM instructions +# [+] added toolchain option to disable SWIG search +# [+] added platform "armeabi-v7a with VFPV3" +# [~] ARM_TARGETS renamed to ARM_TARGET +# [+] EXECUTABLE_OUTPUT_PATH is set by toolchain (required on Windows) +# [~] Fixed bug with ANDROID_API_LEVEL variable +# [~] turn off SWIG search if it is not found first time +# - May 2011 +# [~] ANDROID_LEVEL is renamed to ANDROID_API_LEVEL +# [+] ANDROID_API_LEVEL is detected by toolchain if not specified +# [~] added guard to prevent changing of output directories on the first +# cmake pass +# [~] toolchain exits with error if ARM_TARGET is not recognized +# - June 2011 +# [~] default NDK path is updated for version r5c +# [+] variable CMAKE_SYSTEM_PROCESSOR is set based on ARM_TARGET +# [~] toolchain install directory is added to linker paths +# [-] removed SWIG-related stuff from toolchain +# [+] added macro find_host_package, find_host_program to search +# packages/programs on the host system +# [~] fixed path to STL library +# - July 2011 +# [~] fixed options caching +# [~] search for all supported NDK versions +# [~] allowed spaces in NDK path +# - September 2011 +# [~] updated for NDK r6b +# - November 2011 +# [*] rewritten for NDK r7 +# [+] x86 toolchain support (experimental) +# [+] added "armeabi-v6 with VFP" ABI for ARMv6 processors. +# [~] improved compiler and linker flags management +# [+] support different build flags for Release and Debug configurations +# [~] by default compiler flags the same as used by ndk-build (but only +# where reasonable) +# [~] ANDROID_NDK_TOOLCHAIN_ROOT is splitted to ANDROID_STANDALONE_TOOLCHAIN +# and ANDROID_TOOLCHAIN_ROOT +# [~] ARM_TARGET is renamed to ANDROID_ABI +# [~] ARMEABI_NDK_NAME is renamed to ANDROID_NDK_ABI_NAME +# [~] ANDROID_API_LEVEL is renamed to ANDROID_NATIVE_API_LEVEL +# - January 2012 +# [+] added stlport_static support (experimental) +# [+] added special check for cygwin +# [+] filtered out hidden files (starting with .) while globbing inside NDK +# [+] automatically applied GLESv2 linkage fix for NDK revisions 5-6 +# [+] added ANDROID_GET_ABI_RAWNAME to get NDK ABI names by CMake flags +# - February 2012 +# [+] updated for NDK r7b +# [~] fixed cmake try_compile() command +# [~] Fix for missing install_name_tool on OS X +# - March 2012 +# [~] fixed incorrect C compiler flags +# [~] fixed CMAKE_SYSTEM_PROCESSOR change on ANDROID_ABI change +# [+] improved toolchain loading speed +# [+] added assembler language support (.S) +# [+] allowed preset search paths and extra search suffixes +# - April 2012 +# [+] updated for NDK r7c +# [~] fixed most of problems with compiler/linker flags and caching +# [+] added option ANDROID_FUNCTION_LEVEL_LINKING +# - May 2012 +# [+] updated for NDK r8 +# [+] added mips architecture support +# - August 2012 +# [+] updated for NDK r8b +# [~] all intermediate files generated by toolchain are moved to CMakeFiles +# [~] libstdc++ and libsupc are removed from explicit link libraries +# [+] added CCache support (via NDK_CCACHE environment or cmake variable) +# [+] added gold linker support for NDK r8b +# [~] fixed mips linker flags for NDK r8b +# - September 2012 +# [+] added NDK release name detection (see ANDROID_NDK_RELEASE) +# [+] added support for all C++ runtimes from NDK +# (system, gabi++, stlport, gnustl) +# [+] improved warnings on known issues of NDKs +# [~] use gold linker as default if available (NDK r8b) +# [~] globally turned off rpath +# [~] compiler options are aligned with NDK r8b +# - October 2012 +# [~] fixed C++ linking: explicitly link with math library (OpenCV #2426) +# - November 2012 +# [+] updated for NDK r8c +# [+] added support for clang compiler +# - December 2012 +# [+] suppress warning about unused CMAKE_TOOLCHAIN_FILE variable +# [+] adjust API level to closest compatible as NDK does +# [~] fixed ccache full path search +# [+] updated for NDK r8d +# [~] compiler options are aligned with NDK r8d +# - March 2013 +# [+] updated for NDK r8e (x86 version) +# [+] support x86_64 version of NDK +# ------------------------------------------------------------------------------ + +cmake_minimum_required( VERSION 2.6.3 ) + +if( DEFINED CMAKE_CROSSCOMPILING ) + # subsequent toolchain loading is not really needed + return() +endif() + +if( CMAKE_TOOLCHAIN_FILE ) + # touch toolchain variable only to suppress "unused variable" warning +endif() + +get_property( _CMAKE_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE ) +if( _CMAKE_IN_TRY_COMPILE ) + include( "${CMAKE_CURRENT_SOURCE_DIR}/../android.toolchain.config.cmake" OPTIONAL ) +endif() + +# this one is important +set( CMAKE_SYSTEM_NAME Linux ) +# this one not so much +set( CMAKE_SYSTEM_VERSION 1 ) + +# rpath makes low sence for Android +set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." ) + +set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" ) +if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS) + if( CMAKE_HOST_WIN32 ) + file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS ) + set( ANDROID_NDK_SEARCH_PATHS "${ANDROID_NDK_SEARCH_PATHS}/android-ndk" "$ENV{SystemDrive}/NVPACK/android-ndk" ) + else() + file( TO_CMAKE_PATH "$ENV{HOME}" ANDROID_NDK_SEARCH_PATHS ) + set( ANDROID_NDK_SEARCH_PATHS /opt/android-ndk "${ANDROID_NDK_SEARCH_PATHS}/NVPACK/android-ndk" ) + endif() +endif() +if(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH) + set( ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH /opt/android-toolchain ) +endif() + +set( ANDROID_SUPPORTED_ABIS_arm "armeabi-v7a;armeabi;armeabi-v7a with NEON;armeabi-v7a with VFPV3;armeabi-v6 with VFP" ) +set( ANDROID_SUPPORTED_ABIS_x86 "x86" ) +set( ANDROID_SUPPORTED_ABIS_mipsel "mips" ) + +set( ANDROID_DEFAULT_NDK_API_LEVEL 8 ) +set( ANDROID_DEFAULT_NDK_API_LEVEL_x86 9 ) +set( ANDROID_DEFAULT_NDK_API_LEVEL_mips 9 ) + + +macro( __LIST_FILTER listvar regex ) + if( ${listvar} ) + foreach( __val ${${listvar}} ) + if( __val MATCHES "${regex}" ) + list( REMOVE_ITEM ${listvar} "${__val}" ) + endif() + endforeach() + endif() +endmacro() + +macro( __INIT_VARIABLE var_name ) + set( __test_path 0 ) + foreach( __var ${ARGN} ) + if( __var STREQUAL "PATH" ) + set( __test_path 1 ) + break() + endif() + endforeach() + if( __test_path AND NOT EXISTS "${${var_name}}" ) + unset( ${var_name} CACHE ) + endif() + if( "${${var_name}}" STREQUAL "" ) + set( __values 0 ) + foreach( __var ${ARGN} ) + if( __var STREQUAL "VALUES" ) + set( __values 1 ) + elseif( NOT __var STREQUAL "PATH" ) + set( __obsolete 0 ) + if( __var MATCHES "^OBSOLETE_.*$" ) + string( REPLACE "OBSOLETE_" "" __var "${__var}" ) + set( __obsolete 1 ) + endif() + if( __var MATCHES "^ENV_.*$" ) + string( REPLACE "ENV_" "" __var "${__var}" ) + set( __value "$ENV{${__var}}" ) + elseif( DEFINED ${__var} ) + set( __value "${${__var}}" ) + else() + if( __values ) + set( __value "${__var}" ) + else() + set( __value "" ) + endif() + endif() + if( NOT "${__value}" STREQUAL "" ) + if( __test_path ) + if( EXISTS "${__value}" ) + file( TO_CMAKE_PATH "${__value}" ${var_name} ) + if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE ) + message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." ) + endif() + break() + endif() + else() + set( ${var_name} "${__value}" ) + if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE ) + message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." ) + endif() + break() + endif() + endif() + endif() + endforeach() + unset( __value ) + unset( __values ) + unset( __obsolete ) + elseif( __test_path ) + file( TO_CMAKE_PATH "${${var_name}}" ${var_name} ) + endif() + unset( __test_path ) +endmacro() + +macro( __DETECT_NATIVE_API_LEVEL _var _path ) + SET( __ndkApiLevelRegex "^[\t ]*#define[\t ]+__ANDROID_API__[\t ]+([0-9]+)[\t ]*$" ) + FILE( STRINGS ${_path} __apiFileContent REGEX "${__ndkApiLevelRegex}" ) + if( NOT __apiFileContent ) + message( SEND_ERROR "Could not get Android native API level. Probably you have specified invalid level value, or your copy of NDK/toolchain is broken." ) + endif() + string( REGEX REPLACE "${__ndkApiLevelRegex}" "\\1" ${_var} "${__apiFileContent}" ) + unset( __apiFileContent ) + unset( __ndkApiLevelRegex ) +endmacro() + +macro( __DETECT_TOOLCHAIN_MACHINE_NAME _var _root ) + if( EXISTS "${_root}" ) + file( GLOB __gccExePath RELATIVE "${_root}/bin/" "${_root}/bin/*-gcc${TOOL_OS_SUFFIX}" ) + __LIST_FILTER( __gccExePath "^[.].*" ) + list( LENGTH __gccExePath __gccExePathsCount ) + if( NOT __gccExePathsCount EQUAL 1 AND NOT _CMAKE_IN_TRY_COMPILE ) + message( WARNING "Could not determine machine name for compiler from ${_root}" ) + set( ${_var} "" ) + else() + get_filename_component( __gccExeName "${__gccExePath}" NAME_WE ) + string( REPLACE "-gcc" "" ${_var} "${__gccExeName}" ) + endif() + unset( __gccExePath ) + unset( __gccExePathsCount ) + unset( __gccExeName ) + else() + set( ${_var} "" ) + endif() +endmacro() + + +# fight against cygwin +set( ANDROID_FORBID_SYGWIN TRUE CACHE BOOL "Prevent cmake from working under cygwin and using cygwin tools") +mark_as_advanced( ANDROID_FORBID_SYGWIN ) +if( ANDROID_FORBID_SYGWIN ) + if( CYGWIN ) + message( FATAL_ERROR "Android NDK and android-cmake toolchain are not welcome Cygwin. It is unlikely that this cmake toolchain will work under cygwin. But if you want to try then you can set cmake variable ANDROID_FORBID_SYGWIN to FALSE and rerun cmake." ) + endif() + + if( CMAKE_HOST_WIN32 ) + # remove cygwin from PATH + set( __new_path "$ENV{PATH}") + __LIST_FILTER( __new_path "cygwin" ) + set(ENV{PATH} "${__new_path}") + unset(__new_path) + endif() +endif() + + +# detect current host platform +if( NOT DEFINED ANDROID_NDK_HOST_X64 AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64") + set( ANDROID_NDK_HOST_X64 1 CACHE BOOL "Try to use 64-bit compiler toolchain" ) + mark_as_advanced( ANDROID_NDK_HOST_X64 ) +endif() + +set( TOOL_OS_SUFFIX "" ) +if( CMAKE_HOST_APPLE ) + set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86_64" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME2 "darwin-x86" ) +elseif( CMAKE_HOST_WIN32 ) + set( ANDROID_NDK_HOST_SYSTEM_NAME "windows-x86_64" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME2 "windows" ) + set( TOOL_OS_SUFFIX ".exe" ) +elseif( CMAKE_HOST_UNIX ) + set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86_64" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME2 "linux-x86" ) +else() + message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" ) +endif() + +if( NOT ANDROID_NDK_HOST_X64 ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) +endif() + +# see if we have path to Android NDK +__INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK ) +if( NOT ANDROID_NDK ) + # see if we have path to Android standalone toolchain + __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ENV_ANDROID_STANDALONE_TOOLCHAIN OBSOLETE_ANDROID_NDK_TOOLCHAIN_ROOT OBSOLETE_ENV_ANDROID_NDK_TOOLCHAIN_ROOT ) + + if( NOT ANDROID_STANDALONE_TOOLCHAIN ) + #try to find Android NDK in one of the the default locations + set( __ndkSearchPaths ) + foreach( __ndkSearchPath ${ANDROID_NDK_SEARCH_PATHS} ) + foreach( suffix ${ANDROID_SUPPORTED_NDK_VERSIONS} ) + list( APPEND __ndkSearchPaths "${__ndkSearchPath}${suffix}" ) + endforeach() + endforeach() + __INIT_VARIABLE( ANDROID_NDK PATH VALUES ${__ndkSearchPaths} ) + unset( __ndkSearchPaths ) + + if( ANDROID_NDK ) + message( STATUS "Using default path for Android NDK: ${ANDROID_NDK}" ) + message( STATUS " If you prefer to use a different location, please define a cmake or environment variable: ANDROID_NDK" ) + else() + #try to find Android standalone toolchain in one of the the default locations + __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH ) + + if( ANDROID_STANDALONE_TOOLCHAIN ) + message( STATUS "Using default path for standalone toolchain ${ANDROID_STANDALONE_TOOLCHAIN}" ) + message( STATUS " If you prefer to use a different location, please define the variable: ANDROID_STANDALONE_TOOLCHAIN" ) + endif( ANDROID_STANDALONE_TOOLCHAIN ) + endif( ANDROID_NDK ) + endif( NOT ANDROID_STANDALONE_TOOLCHAIN ) +endif( NOT ANDROID_NDK ) +# remember found paths +if( ANDROID_NDK ) + get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE ) + # try to detect change + if( CMAKE_AR ) + string( LENGTH "${ANDROID_NDK}" __length ) + string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) + if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK ) + message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. + " ) + endif() + unset( __androidNdkPreviousPath ) + unset( __length ) + endif() + set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE ) + set( BUILD_WITH_ANDROID_NDK True ) + file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) + string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) +elseif( ANDROID_STANDALONE_TOOLCHAIN ) + get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE ) + # try to detect change + if( CMAKE_AR ) + string( LENGTH "${ANDROID_STANDALONE_TOOLCHAIN}" __length ) + string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidStandaloneToolchainPreviousPath ) + if( NOT __androidStandaloneToolchainPreviousPath STREQUAL ANDROID_STANDALONE_TOOLCHAIN ) + message( FATAL_ERROR "It is not possible to change path to the Android standalone toolchain on subsequent run." ) + endif() + unset( __androidStandaloneToolchainPreviousPath ) + unset( __length ) + endif() + set( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" CACHE INTERNAL "Path of the Android standalone toolchain" FORCE ) + set( BUILD_WITH_STANDALONE_TOOLCHAIN True ) +else() + list(GET ANDROID_NDK_SEARCH_PATHS 0 ANDROID_NDK_SEARCH_PATH) + message( FATAL_ERROR "Could not find neither Android NDK nor Android standalone toolchain. + You should either set an environment variable: + export ANDROID_NDK=~/my-android-ndk + or + export ANDROID_STANDALONE_TOOLCHAIN=~/my-android-toolchain + or put the toolchain or NDK in the default path: + sudo ln -s ~/my-android-ndk ${ANDROID_NDK_SEARCH_PATH} + sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" ) +endif() + +# get all the details about standalone toolchain +if( BUILD_WITH_STANDALONE_TOOLCHAIN ) + __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" ) + set( ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} ) + set( __availableToolchains "standalone" ) + __DETECT_TOOLCHAIN_MACHINE_NAME( __availableToolchainMachines "${ANDROID_STANDALONE_TOOLCHAIN}" ) + if( NOT __availableToolchainMachines ) + message( FATAL_ERROR "Could not determine machine name of your toolchain. Probably your Android standalone toolchain is broken." ) + endif() + if( __availableToolchainMachines MATCHES i686 ) + set( __availableToolchainArchs "x86" ) + elseif( __availableToolchainMachines MATCHES arm ) + set( __availableToolchainArchs "arm" ) + elseif( __availableToolchainMachines MATCHES mipsel ) + set( __availableToolchainArchs "mipsel" ) + endif() + execute_process( COMMAND "${ANDROID_STANDALONE_TOOLCHAIN}/bin/${__availableToolchainMachines}-gcc${TOOL_OS_SUFFIX}" -dumpversion + OUTPUT_VARIABLE __availableToolchainCompilerVersions OUTPUT_STRIP_TRAILING_WHITESPACE ) + string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?" __availableToolchainCompilerVersions "${__availableToolchainCompilerVersions}" ) + if( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/bin/clang${TOOL_OS_SUFFIX}" ) + list( APPEND __availableToolchains "standalone-clang" ) + list( APPEND __availableToolchainMachines ${__availableToolchainMachines} ) + list( APPEND __availableToolchainArchs ${__availableToolchainArchs} ) + list( APPEND __availableToolchainCompilerVersions ${__availableToolchainCompilerVersions} ) + endif() +endif() + +macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name ) + foreach( __toolchain ${${__availableToolchainsLst}} ) + if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" ) + string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" ) + else() + set( __gcc_toolchain "${__toolchain}" ) + endif() + __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" ) + if( __machine ) + string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" ) + string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" ) + list( APPEND __availableToolchainMachines "${__machine}" ) + list( APPEND __availableToolchainArchs "${__arch}" ) + list( APPEND __availableToolchainCompilerVersions "${__version}" ) + list( APPEND ${__availableToolchainsVar} "${__toolchain}" ) + endif() + unset( __gcc_toolchain ) + endforeach() +endmacro() + +# get all the details about NDK +if( BUILD_WITH_ANDROID_NDK ) + file( GLOB ANDROID_SUPPORTED_NATIVE_API_LEVELS RELATIVE "${ANDROID_NDK}/platforms" "${ANDROID_NDK}/platforms/android-*" ) + string( REPLACE "android-" "" ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_SUPPORTED_NATIVE_API_LEVELS}" ) + set( __availableToolchains "" ) + set( __availableToolchainMachines "" ) + set( __availableToolchainArchs "" ) + set( __availableToolchainCompilerVersions "" ) + if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" ) + # do not go through all toolchains if we know the name + set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + if( __availableToolchains ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + endif() + endif() + endif() + if( NOT __availableToolchains ) + file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" ) + if( __availableToolchains ) + list(SORT __availableToolchainsLst) # we need clang to go after gcc + endif() + __LIST_FILTER( __availableToolchainsLst "^[.]" ) + __LIST_FILTER( __availableToolchainsLst "llvm" ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + if( __availableToolchains ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + endif() + endif() + endif() + if( NOT __availableToolchains ) + message( FATAL_ERROR "Could not find any working toolchain in the NDK. Probably your Android NDK is broken." ) + endif() +endif() + +# build list of available ABIs +set( ANDROID_SUPPORTED_ABIS "" ) +set( __uniqToolchainArchNames ${__availableToolchainArchs} ) +list( REMOVE_DUPLICATES __uniqToolchainArchNames ) +list( SORT __uniqToolchainArchNames ) +foreach( __arch ${__uniqToolchainArchNames} ) + list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} ) +endforeach() +unset( __uniqToolchainArchNames ) +if( NOT ANDROID_SUPPORTED_ABIS ) + message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." ) +endif() + +# choose target ABI +__INIT_VARIABLE( ANDROID_ABI OBSOLETE_ARM_TARGET OBSOLETE_ARM_TARGETS VALUES ${ANDROID_SUPPORTED_ABIS} ) +# verify that target ABI is supported +list( FIND ANDROID_SUPPORTED_ABIS "${ANDROID_ABI}" __androidAbiIdx ) +if( __androidAbiIdx EQUAL -1 ) + string( REPLACE ";" "\", \"", PRINTABLE_ANDROID_SUPPORTED_ABIS "${ANDROID_SUPPORTED_ABIS}" ) + message( FATAL_ERROR "Specified ANDROID_ABI = \"${ANDROID_ABI}\" is not supported by this cmake toolchain or your NDK/toolchain. + Supported values are: \"${PRINTABLE_ANDROID_SUPPORTED_ABIS}\" + " ) +endif() +unset( __androidAbiIdx ) + +# set target ABI options +if( ANDROID_ABI STREQUAL "x86" ) + set( X86 true ) + set( ANDROID_NDK_ABI_NAME "x86" ) + set( ANDROID_ARCH_NAME "x86" ) + set( ANDROID_ARCH_FULLNAME "x86" ) + set( ANDROID_LLVM_TRIPLE "i686-none-linux-android" ) + set( CMAKE_SYSTEM_PROCESSOR "i686" ) +elseif( ANDROID_ABI STREQUAL "mips" ) + set( MIPS true ) + set( ANDROID_NDK_ABI_NAME "mips" ) + set( ANDROID_ARCH_NAME "mips" ) + set( ANDROID_ARCH_FULLNAME "mipsel" ) + set( ANDROID_LLVM_TRIPLE "mipsel-none-linux-android" ) + set( CMAKE_SYSTEM_PROCESSOR "mips" ) +elseif( ANDROID_ABI STREQUAL "armeabi" ) + set( ARMEABI true ) + set( ANDROID_NDK_ABI_NAME "armeabi" ) + set( ANDROID_ARCH_NAME "arm" ) + set( ANDROID_ARCH_FULLNAME "arm" ) + set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" ) + set( CMAKE_SYSTEM_PROCESSOR "armv5te" ) +elseif( ANDROID_ABI STREQUAL "armeabi-v6 with VFP" ) + set( ARMEABI_V6 true ) + set( ANDROID_NDK_ABI_NAME "armeabi" ) + set( ANDROID_ARCH_NAME "arm" ) + set( ANDROID_ARCH_FULLNAME "arm" ) + set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" ) + set( CMAKE_SYSTEM_PROCESSOR "armv6" ) + # need always fallback to older platform + set( ARMEABI true ) +elseif( ANDROID_ABI STREQUAL "armeabi-v7a") + set( ARMEABI_V7A true ) + set( ANDROID_NDK_ABI_NAME "armeabi-v7a" ) + set( ANDROID_ARCH_NAME "arm" ) + set( ANDROID_ARCH_FULLNAME "arm" ) + set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" ) + set( CMAKE_SYSTEM_PROCESSOR "armv7-a" ) +elseif( ANDROID_ABI STREQUAL "armeabi-v7a with VFPV3" ) + set( ARMEABI_V7A true ) + set( ANDROID_NDK_ABI_NAME "armeabi-v7a" ) + set( ANDROID_ARCH_NAME "arm" ) + set( ANDROID_ARCH_FULLNAME "arm" ) + set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" ) + set( CMAKE_SYSTEM_PROCESSOR "armv7-a" ) + set( VFPV3 true ) +elseif( ANDROID_ABI STREQUAL "armeabi-v7a with NEON" ) + set( ARMEABI_V7A true ) + set( ANDROID_NDK_ABI_NAME "armeabi-v7a" ) + set( ANDROID_ARCH_NAME "arm" ) + set( ANDROID_ARCH_FULLNAME "arm" ) + set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" ) + set( CMAKE_SYSTEM_PROCESSOR "armv7-a" ) + set( VFPV3 true ) + set( NEON true ) +else() + message( SEND_ERROR "Unknown ANDROID_ABI=\"${ANDROID_ABI}\" is specified." ) +endif() + +if( CMAKE_BINARY_DIR AND EXISTS "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" ) + # really dirty hack + # it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run... + file( APPEND "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" "SET(CMAKE_SYSTEM_PROCESSOR \"${CMAKE_SYSTEM_PROCESSOR}\")\n" ) +endif() + +if( ANDROID_ARCH_NAME STREQUAL "arm" AND NOT ARMEABI_V6 ) + __INIT_VARIABLE( ANDROID_FORCE_ARM_BUILD OBSOLETE_FORCE_ARM VALUES OFF ) + set( ANDROID_FORCE_ARM_BUILD ${ANDROID_FORCE_ARM_BUILD} CACHE BOOL "Use 32-bit ARM instructions instead of Thumb-1" FORCE ) + mark_as_advanced( ANDROID_FORCE_ARM_BUILD ) +else() + unset( ANDROID_FORCE_ARM_BUILD CACHE ) +endif() + +# choose toolchain +if( ANDROID_TOOLCHAIN_NAME ) + list( FIND __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" __toolchainIdx ) + if( __toolchainIdx EQUAL -1 ) + list( SORT __availableToolchains ) + string( REPLACE ";" "\n * " toolchains_list "${__availableToolchains}" ) + set( toolchains_list " * ${toolchains_list}") + message( FATAL_ERROR "Specified toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is missing in your NDK or broken. Please verify that your NDK is working or select another compiler toolchain. +To configure the toolchain set CMake variable ANDROID_TOOLCHAIN_NAME to one of the following values:\n${toolchains_list}\n" ) + endif() + list( GET __availableToolchainArchs ${__toolchainIdx} __toolchainArch ) + if( NOT __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME ) + message( SEND_ERROR "Selected toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is not able to compile binaries for the \"${ANDROID_ARCH_NAME}\" platform." ) + endif() +else() + set( __toolchainIdx -1 ) + set( __applicableToolchains "" ) + set( __toolchainMaxVersion "0.0.0" ) + list( LENGTH __availableToolchains __availableToolchainsCount ) + math( EXPR __availableToolchainsCount "${__availableToolchainsCount}-1" ) + foreach( __idx RANGE ${__availableToolchainsCount} ) + list( GET __availableToolchainArchs ${__idx} __toolchainArch ) + if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME ) + list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion ) + if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion ) + set( __toolchainMaxVersion "${__toolchainVersion}" ) + set( __toolchainIdx ${__idx} ) + endif() + endif() + endforeach() + unset( __availableToolchainsCount ) + unset( __toolchainMaxVersion ) + unset( __toolchainVersion ) +endif() +unset( __toolchainArch ) +if( __toolchainIdx EQUAL -1 ) + message( FATAL_ERROR "No one of available compiler toolchains is able to compile for ${ANDROID_ARCH_NAME} platform." ) +endif() +list( GET __availableToolchains ${__toolchainIdx} ANDROID_TOOLCHAIN_NAME ) +list( GET __availableToolchainMachines ${__toolchainIdx} ANDROID_TOOLCHAIN_MACHINE_NAME ) +list( GET __availableToolchainCompilerVersions ${__toolchainIdx} ANDROID_COMPILER_VERSION ) + +unset( __toolchainIdx ) +unset( __availableToolchains ) +unset( __availableToolchainMachines ) +unset( __availableToolchainArchs ) +unset( __availableToolchainCompilerVersions ) + +# choose native API level +__INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL ) +string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" ) +# adjust API level +set( __real_api_level ${ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME}} ) +foreach( __level ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} ) + if( NOT __level GREATER ANDROID_NATIVE_API_LEVEL AND NOT __level LESS __real_api_level ) + set( __real_api_level ${__level} ) + endif() +endforeach() +if( __real_api_level AND NOT ANDROID_NATIVE_API_LEVEL EQUAL __real_api_level ) + message( STATUS "Adjusting Android API level 'android-${ANDROID_NATIVE_API_LEVEL}' to 'android-${__real_api_level}'") + set( ANDROID_NATIVE_API_LEVEL ${__real_api_level} ) +endif() +unset(__real_api_level) +# validate +list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx ) +if( __levelIdx EQUAL -1 ) + message( SEND_ERROR "Specified Android native API level 'android-${ANDROID_NATIVE_API_LEVEL}' is not supported by your NDK/toolchain." ) +else() + if( BUILD_WITH_ANDROID_NDK ) + __DETECT_NATIVE_API_LEVEL( __realApiLevel "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}/usr/include/android/api-level.h" ) + if( NOT __realApiLevel EQUAL ANDROID_NATIVE_API_LEVEL ) + message( SEND_ERROR "Specified Android API level (${ANDROID_NATIVE_API_LEVEL}) does not match to the level found (${__realApiLevel}). Probably your copy of NDK is broken." ) + endif() + unset( __realApiLevel ) + endif() + set( ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android API level for native code" FORCE ) + if( CMAKE_VERSION VERSION_GREATER "2.8" ) + list( SORT ANDROID_SUPPORTED_NATIVE_API_LEVELS ) + set_property( CACHE ANDROID_NATIVE_API_LEVEL PROPERTY STRINGS ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} ) + endif() +endif() +unset( __levelIdx ) + + +# remember target ABI +set( ANDROID_ABI "${ANDROID_ABI}" CACHE STRING "The target ABI for Android. If arm, then armeabi-v7a is recommended for hardware floating point." FORCE ) +if( CMAKE_VERSION VERSION_GREATER "2.8" ) + list( SORT ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME} ) + set_property( CACHE ANDROID_ABI PROPERTY STRINGS ${ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME}} ) +endif() + + +# runtime choice (STL, rtti, exceptions) +if( NOT ANDROID_STL ) + # honor legacy ANDROID_USE_STLPORT + if( DEFINED ANDROID_USE_STLPORT ) + if( ANDROID_USE_STLPORT ) + set( ANDROID_STL stlport_static ) + endif() + message( WARNING "You are using an obsolete variable ANDROID_USE_STLPORT to select the STL variant. Use -DANDROID_STL=stlport_static instead." ) + endif() + if( NOT ANDROID_STL ) + set( ANDROID_STL gnustl_static ) + endif() +endif() +set( ANDROID_STL "${ANDROID_STL}" CACHE STRING "C++ runtime" ) +set( ANDROID_STL_FORCE_FEATURES ON CACHE BOOL "automatically configure rtti and exceptions support based on C++ runtime" ) +mark_as_advanced( ANDROID_STL ANDROID_STL_FORCE_FEATURES ) + +if( BUILD_WITH_ANDROID_NDK ) + if( NOT "${ANDROID_STL}" MATCHES "^(none|system|system_re|gabi\\+\\+_static|gabi\\+\\+_shared|stlport_static|stlport_shared|gnustl_static|gnustl_shared)$") + message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\". +The possible values are: + none -> Do not configure the runtime. + system -> Use the default minimal system C++ runtime library. + system_re -> Same as system but with rtti and exceptions. + gabi++_static -> Use the GAbi++ runtime as a static library. + gabi++_shared -> Use the GAbi++ runtime as a shared library. + stlport_static -> Use the STLport runtime as a static library. + stlport_shared -> Use the STLport runtime as a shared library. + gnustl_static -> (default) Use the GNU STL as a static library. + gnustl_shared -> Use the GNU STL as a shared library. +" ) + endif() +elseif( BUILD_WITH_STANDALONE_TOOLCHAIN ) + if( NOT "${ANDROID_STL}" MATCHES "^(none|gnustl_static|gnustl_shared)$") + message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\". +The possible values are: + none -> Do not configure the runtime. + gnustl_static -> (default) Use the GNU STL as a static library. + gnustl_shared -> Use the GNU STL as a shared library. +" ) + endif() +endif() + +unset( ANDROID_RTTI ) +unset( ANDROID_EXCEPTIONS ) +unset( ANDROID_STL_INCLUDE_DIRS ) +unset( __libstl ) +unset( __libsupcxx ) + +if( NOT _CMAKE_IN_TRY_COMPILE AND ANDROID_NDK_RELEASE STREQUAL "r7b" AND ARMEABI_V7A AND NOT VFPV3 AND ANDROID_STL MATCHES "gnustl" ) + message( WARNING "The GNU STL armeabi-v7a binaries from NDK r7b can crash non-NEON devices. The files provided with NDK r7b were not configured properly, resulting in crashes on Tegra2-based devices and others when trying to use certain floating-point functions (e.g., cosf, sinf, expf). +You are strongly recommended to switch to another NDK release. +" ) +endif() + +if( NOT _CMAKE_IN_TRY_COMPILE AND X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" ) + message( WARNING "The x86 system header file from NDK r6 has incorrect definition for ptrdiff_t. You are recommended to upgrade to a newer NDK release or manually patch the header: +See https://android.googlesource.com/platform/development.git f907f4f9d4e56ccc8093df6fee54454b8bcab6c2 + diff --git a/ndk/platforms/android-9/arch-x86/include/machine/_types.h b/ndk/platforms/android-9/arch-x86/include/machine/_types.h + index 5e28c64..65892a1 100644 + --- a/ndk/platforms/android-9/arch-x86/include/machine/_types.h + +++ b/ndk/platforms/android-9/arch-x86/include/machine/_types.h + @@ -51,7 +51,11 @@ typedef long int ssize_t; + #endif + #ifndef _PTRDIFF_T + #define _PTRDIFF_T + -typedef long ptrdiff_t; + +# ifdef __ANDROID__ + + typedef int ptrdiff_t; + +# else + + typedef long ptrdiff_t; + +# endif + #endif +" ) +endif() + + +# setup paths and STL for standalone toolchain +if( BUILD_WITH_STANDALONE_TOOLCHAIN ) + set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" ) + set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" ) + set( ANDROID_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot" ) + + if( NOT ANDROID_STL STREQUAL "none" ) + set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/include/c++/${ANDROID_COMPILER_VERSION}" ) + if( ARMEABI_V7A AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}/bits" ) + list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}" ) + elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb/bits" ) + list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb" ) + else() + list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" ) + endif() + # always search static GNU STL to get the location of libsupc++.a + if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb" ) + elseif( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}" ) + elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libstdc++.a" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb" ) + elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libstdc++.a" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib" ) + endif() + if( __libstl ) + set( __libsupcxx "${__libstl}/libsupc++.a" ) + set( __libstl "${__libstl}/libstdc++.a" ) + endif() + if( NOT EXISTS "${__libsupcxx}" ) + message( FATAL_ERROR "The required libstdsupc++.a is missing in your standalone toolchain. + Usually it happens because of bug in make-standalone-toolchain.sh script from NDK r7, r7b and r7c. + You need to either upgrade to newer NDK or manually copy + $ANDROID_NDK/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a + to + ${__libsupcxx} + " ) + endif() + if( ANDROID_STL STREQUAL "gnustl_shared" ) + if( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" ) + elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" ) + elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" ) + set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" ) + endif() + endif() + endif() +endif() + +# clang +if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" ) + set( ANDROID_COMPILER_IS_CLANG 1 ) + execute_process( COMMAND "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/clang${TOOL_OS_SUFFIX}" --version OUTPUT_VARIABLE ANDROID_CLANG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE ) + string( REGEX MATCH "[0-9]+[.][0-9]+" ANDROID_CLANG_VERSION "${ANDROID_CLANG_VERSION}") +elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" ) + string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}") + string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) + if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" ) + message( FATAL_ERROR "Could not find the Clang compiler driver" ) + endif() + set( ANDROID_COMPILER_IS_CLANG 1 ) + set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) +else() + set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) + unset( ANDROID_COMPILER_IS_CLANG CACHE ) +endif() + +string( REPLACE "." "" _clang_name "clang${ANDROID_CLANG_VERSION}" ) +if( NOT EXISTS "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" ) + set( _clang_name "clang" ) +endif() + + +# setup paths and STL for NDK +if( BUILD_WITH_ANDROID_NDK ) + set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" ) + + if( ANDROID_STL STREQUAL "none" ) + # do nothing + elseif( ANDROID_STL STREQUAL "system" ) + set( ANDROID_RTTI OFF ) + set( ANDROID_EXCEPTIONS OFF ) + set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" ) + elseif( ANDROID_STL STREQUAL "system_re" ) + set( ANDROID_RTTI ON ) + set( ANDROID_EXCEPTIONS ON ) + set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" ) + elseif( ANDROID_STL MATCHES "gabi" ) + if( ANDROID_NDK_RELEASE STRLESS "r7" ) + message( FATAL_ERROR "gabi++ is not awailable in your NDK. You have to upgrade to NDK r7 or newer to use gabi++.") + endif() + set( ANDROID_RTTI ON ) + set( ANDROID_EXCEPTIONS OFF ) + set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/gabi++/include" ) + set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gabi++/libs/${ANDROID_NDK_ABI_NAME}/libgabi++_static.a" ) + elseif( ANDROID_STL MATCHES "stlport" ) + if( NOT ANDROID_NDK_RELEASE STRLESS "r8d" ) + set( ANDROID_EXCEPTIONS ON ) + else() + set( ANDROID_EXCEPTIONS OFF ) + endif() + if( ANDROID_NDK_RELEASE STRLESS "r7" ) + set( ANDROID_RTTI OFF ) + else() + set( ANDROID_RTTI ON ) + endif() + set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/stlport/stlport" ) + set( __libstl "${ANDROID_NDK}/sources/cxx-stl/stlport/libs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" ) + elseif( ANDROID_STL MATCHES "gnustl" ) + set( ANDROID_EXCEPTIONS ON ) + set( ANDROID_RTTI ON ) + if( EXISTS "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" ) + if( ARMEABI_V7A AND ANDROID_COMPILER_VERSION VERSION_EQUAL "4.7" AND ANDROID_NDK_RELEASE STREQUAL "r8d" ) + # gnustl binary for 4.7 compiler is buggy :( + # TODO: look for right fix + set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.6" ) + else() + set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" ) + endif() + else() + set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++" ) + endif() + set( ANDROID_STL_INCLUDE_DIRS "${__libstl}/include" "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/include" ) + if( EXISTS "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" ) + set( __libstl "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" ) + else() + set( __libstl "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" ) + endif() + else() + message( FATAL_ERROR "Unknown runtime: ${ANDROID_STL}" ) + endif() + # find libsupc++.a - rtti & exceptions + if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" ) + if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) + elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b") + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) + else( ANDROID_NDK_RELEASE STRLESS "r7" ) + if( ARMEABI_V7A ) + if( ANDROID_FORCE_ARM_BUILD ) + set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" ) + else() + set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" ) + endif() + elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD ) + set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" ) + else() + set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" ) + endif() + endif() + if( NOT EXISTS "${__libsupcxx}") + message( ERROR "Could not find libsupc++.a for a chosen platform. Either your NDK is not supported or is broken.") + endif() + endif() +endif() + + +# case of shared STL linkage +if( ANDROID_STL MATCHES "shared" AND DEFINED __libstl ) + string( REPLACE "_static.a" "_shared.so" __libstl "${__libstl}" ) + if( NOT _CMAKE_IN_TRY_COMPILE AND __libstl MATCHES "[.]so$" ) + get_filename_component( __libstlname "${__libstl}" NAME ) + execute_process( COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__libstl}" "${LIBRARY_OUTPUT_PATH}/${__libstlname}" RESULT_VARIABLE __fileCopyProcess ) + if( NOT __fileCopyProcess EQUAL 0 OR NOT EXISTS "${LIBRARY_OUTPUT_PATH}/${__libstlname}") + message( SEND_ERROR "Failed copying of ${__libstl} to the ${LIBRARY_OUTPUT_PATH}/${__libstlname}" ) + endif() + unset( __fileCopyProcess ) + unset( __libstlname ) + endif() +endif() + + +# ccache support +__INIT_VARIABLE( _ndk_ccache NDK_CCACHE ENV_NDK_CCACHE ) +if( _ndk_ccache ) + if( DEFINED NDK_CCACHE AND NOT EXISTS NDK_CCACHE ) + unset( NDK_CCACHE CACHE ) + endif() + find_program( NDK_CCACHE "${_ndk_ccache}" DOC "The path to ccache binary") +else() + unset( NDK_CCACHE CACHE ) +endif() +unset( _ndk_ccache ) + + +# setup the cross-compiler +if( NOT CMAKE_C_COMPILER ) + if( NDK_CCACHE ) + set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" ) + set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" ) + if( ANDROID_COMPILER_IS_CLANG ) + set( CMAKE_C_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" CACHE PATH "C compiler") + set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler") + else() + set( CMAKE_C_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler") + set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler") + endif() + else() + if( ANDROID_COMPILER_IS_CLANG ) + set( CMAKE_C_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" CACHE PATH "C compiler") + set( CMAKE_CXX_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler") + else() + set( CMAKE_C_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler" ) + set( CMAKE_CXX_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler" ) + endif() + endif() + set( CMAKE_ASM_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "assembler" ) + set( CMAKE_STRIP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-strip${TOOL_OS_SUFFIX}" CACHE PATH "strip" ) + set( CMAKE_AR "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ar${TOOL_OS_SUFFIX}" CACHE PATH "archive" ) + set( CMAKE_LINKER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ld${TOOL_OS_SUFFIX}" CACHE PATH "linker" ) + set( CMAKE_NM "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-nm${TOOL_OS_SUFFIX}" CACHE PATH "nm" ) + set( CMAKE_OBJCOPY "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objcopy${TOOL_OS_SUFFIX}" CACHE PATH "objcopy" ) + set( CMAKE_OBJDUMP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objdump${TOOL_OS_SUFFIX}" CACHE PATH "objdump" ) + set( CMAKE_RANLIB "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ranlib${TOOL_OS_SUFFIX}" CACHE PATH "ranlib" ) +endif() + +set( _CMAKE_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_MACHINE_NAME}-" ) +if( CMAKE_VERSION VERSION_LESS 2.8.5 ) + set( CMAKE_ASM_COMPILER_ARG1 "-c" ) +endif() +if( APPLE ) + find_program( CMAKE_INSTALL_NAME_TOOL NAMES install_name_tool ) + if( NOT CMAKE_INSTALL_NAME_TOOL ) + message( FATAL_ERROR "Could not find install_name_tool, please check your installation." ) + endif() + mark_as_advanced( CMAKE_INSTALL_NAME_TOOL ) +endif() + +# Force set compilers because standard identification works badly for us +include( CMakeForceCompiler ) +CMAKE_FORCE_C_COMPILER( "${CMAKE_C_COMPILER}" GNU ) +if( ANDROID_COMPILER_IS_CLANG ) + set( CMAKE_C_COMPILER_ID Clang) +endif() +set( CMAKE_C_PLATFORM_ID Linux ) +set( CMAKE_C_SIZEOF_DATA_PTR 4 ) +set( CMAKE_C_HAS_ISYSROOT 1 ) +set( CMAKE_C_COMPILER_ABI ELF ) +CMAKE_FORCE_CXX_COMPILER( "${CMAKE_CXX_COMPILER}" GNU ) +if( ANDROID_COMPILER_IS_CLANG ) + set( CMAKE_CXX_COMPILER_ID Clang) +endif() +set( CMAKE_CXX_PLATFORM_ID Linux ) +set( CMAKE_CXX_SIZEOF_DATA_PTR 4 ) +set( CMAKE_CXX_HAS_ISYSROOT 1 ) +set( CMAKE_CXX_COMPILER_ABI ELF ) +set( CMAKE_CXX_SOURCE_FILE_EXTENSIONS cc cp cxx cpp CPP c++ C ) +# force ASM compiler (required for CMake < 2.8.5) +set( CMAKE_ASM_COMPILER_ID_RUN TRUE ) +set( CMAKE_ASM_COMPILER_ID GNU ) +set( CMAKE_ASM_COMPILER_WORKS TRUE ) +set( CMAKE_ASM_COMPILER_FORCED TRUE ) +set( CMAKE_COMPILER_IS_GNUASM 1) +set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm ) + +# flags and definitions +remove_definitions( -DANDROID ) +add_definitions( -DANDROID ) + +if(ANDROID_SYSROOT MATCHES "[ ;\"]") + set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) + if( NOT _CMAKE_IN_TRY_COMPILE ) + # quotes will break try_compile and compiler identification + message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.") + endif() +else() + set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) +endif() + +# NDK flags +if( ARMEABI OR ARMEABI_V7A ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables" ) + if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 ) + set( ANDROID_CXX_FLAGS_RELEASE "-mthumb -fomit-frame-pointer -fno-strict-aliasing" ) + set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -finline-limit=64" ) + endif() + else() + # always compile ARMEABI_V6 in arm mode; otherwise there is no difference from ARMEABI + set( ANDROID_CXX_FLAGS_RELEASE "-marm -fomit-frame-pointer -fstrict-aliasing" ) + set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" ) + endif() + endif() +elseif( X86 ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" ) + else() + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fPIC" ) + endif() + set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer -fstrict-aliasing" ) + set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer -fno-strict-aliasing" ) +elseif( MIPS ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -fno-strict-aliasing -finline-functions -ffunction-sections -funwind-tables -fmessage-length=0" ) + set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer" ) + set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fno-inline-functions-called-once -fgcse-after-reload -frerun-cse-after-loop -frename-registers" ) + set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -funswitch-loops -finline-limit=300" ) + endif() +elseif() + set( ANDROID_CXX_FLAGS_RELEASE "" ) + set( ANDROID_CXX_FLAGS_DEBUG "" ) +endif() + +set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries + +if( NOT X86 AND NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" ) +endif() + +if( NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.6" ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -no-canonical-prefixes" ) # see https://android-review.googlesource.com/#/c/47564/ +endif() + +# ABI-specific flags +if( ARMEABI_V7A ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv7-a -mfloat-abi=softfp" ) + if( NEON ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=neon" ) + elseif( VFPV3 ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3" ) + else() + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3-d16" ) + endif() +elseif( ARMEABI_V6 ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv6 -mfloat-abi=softfp -mfpu=vfp" ) # vfp == vfpv2 +elseif( ARMEABI ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" ) +endif() + +# STL +if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) + if( ANDROID_STL MATCHES "gnustl" ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) + else() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) + endif() + if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" ) + # workaround "undefined reference to `__dso_handle'" problem + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) + endif() + if( EXISTS "${__libstl}" ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" ) + set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libstl}\"" ) + endif() + if( EXISTS "${__libsupcxx}" ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" ) + set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libsupcxx}\"" ) + # C objects: + set( CMAKE_C_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_C_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_C_LINK_EXECUTABLE " -o " ) + set( CMAKE_C_CREATE_SHARED_LIBRARY "${CMAKE_C_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" ) + set( CMAKE_C_CREATE_SHARED_MODULE "${CMAKE_C_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" ) + set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" ) + endif() + if( ANDROID_STL MATCHES "gnustl" ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" ) + set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lm" ) + endif() +endif() + +# variables controlling optional build flags +if (ANDROID_NDK_RELEASE STRLESS "r7") + # libGLESv2.so in NDK's prior to r7 refers to missing external symbols. + # So this flag option is required for all projects using OpenGL from native. + __INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES ON ) +else() + __INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES OFF ) +endif() +__INIT_VARIABLE( ANDROID_NO_UNDEFINED OBSOLETE_NO_UNDEFINED VALUES ON ) +__INIT_VARIABLE( ANDROID_FUNCTION_LEVEL_LINKING VALUES ON ) +__INIT_VARIABLE( ANDROID_GOLD_LINKER VALUES ON ) +__INIT_VARIABLE( ANDROID_NOEXECSTACK VALUES ON ) +__INIT_VARIABLE( ANDROID_RELRO VALUES ON ) + +set( ANDROID_NO_UNDEFINED ${ANDROID_NO_UNDEFINED} CACHE BOOL "Show all undefined symbols as linker errors" ) +set( ANDROID_SO_UNDEFINED ${ANDROID_SO_UNDEFINED} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" ) +set( ANDROID_FUNCTION_LEVEL_LINKING ${ANDROID_FUNCTION_LEVEL_LINKING} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" ) +set( ANDROID_GOLD_LINKER ${ANDROID_GOLD_LINKER} CACHE BOOL "Enables gold linker (only avaialble for NDK r8b for ARM and x86 architectures on linux-86 and darwin-x86 hosts)" ) +set( ANDROID_NOEXECSTACK ${ANDROID_NOEXECSTACK} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" ) +set( ANDROID_RELRO ${ANDROID_RELRO} CACHE BOOL "Enables RELRO - a memory corruption mitigation technique" ) +mark_as_advanced( ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_FUNCTION_LEVEL_LINKING ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO ) + +# linker flags +set( ANDROID_LINKER_FLAGS "" ) + +if( ARMEABI_V7A ) + # this is *required* to use the following linker flags that routes around + # a CPU bug in some Cortex-A8 implementations: + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--fix-cortex-a8" ) +endif() + +if( ANDROID_NO_UNDEFINED ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) +endif() + +if( ANDROID_SO_UNDEFINED ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-allow-shlib-undefined" ) +endif() + +if( ANDROID_FUNCTION_LEVEL_LINKING ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fdata-sections -ffunction-sections" ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--gc-sections" ) +endif() + +if( ANDROID_COMPILER_VERSION VERSION_EQUAL "4.6" ) + if( ANDROID_GOLD_LINKER AND (CMAKE_HOST_UNIX OR ANDROID_NDK_RELEASE STRGREATER "r8b") AND (ARMEABI OR ARMEABI_V7A OR X86) ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=gold" ) + elseif( ANDROID_NDK_RELEASE STRGREATER "r8b") + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=bfd" ) + elseif( ANDROID_NDK_RELEASE STREQUAL "r8b" AND ARMEABI AND NOT _CMAKE_IN_TRY_COMPILE ) + message( WARNING "The default bfd linker from arm GCC 4.6 toolchain can fail with 'unresolvable R_ARM_THM_CALL relocation' error message. See https://code.google.com/p/android/issues/detail?id=35342 + On Linux and OS X host platform you can workaround this problem using gold linker (default). + Rerun cmake with -DANDROID_GOLD_LINKER=ON option in case of problems. +" ) + endif() +endif() # version 4.6 + +if( ANDROID_NOEXECSTACK ) + if( ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -Xclang -mnoexecstack" ) + else() + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -Wa,--noexecstack" ) + endif() + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,noexecstack" ) +endif() + +if( ANDROID_RELRO ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now" ) +endif() + +if( ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "-Qunused-arguments ${ANDROID_CXX_FLAGS}" ) + if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD ) + set( ANDROID_CXX_FLAGS_RELEASE "-target thumbv7-none-linux-androideabi ${ANDROID_CXX_FLAGS_RELEASE}" ) + set( ANDROID_CXX_FLAGS_DEBUG "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS_DEBUG}" ) + else() + set( ANDROID_CXX_FLAGS "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS}" ) + endif() + if( BUILD_WITH_ANDROID_NDK ) + set( ANDROID_CXX_FLAGS "-gcc-toolchain ${ANDROID_TOOLCHAIN_ROOT} ${ANDROID_CXX_FLAGS}" ) + endif() +endif() + +# cache flags +set( CMAKE_CXX_FLAGS "" CACHE STRING "c++ flags" ) +set( CMAKE_C_FLAGS "" CACHE STRING "c flags" ) +set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "c++ Release flags" ) +set( CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "c Release flags" ) +set( CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c++ Debug flags" ) +set( CMAKE_C_FLAGS_DEBUG "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c Debug flags" ) +set( CMAKE_SHARED_LINKER_FLAGS "" CACHE STRING "shared linker flags" ) +set( CMAKE_MODULE_LINKER_FLAGS "" CACHE STRING "module linker flags" ) +set( CMAKE_EXE_LINKER_FLAGS "-Wl,-z,nocopyreloc" CACHE STRING "executable linker flags" ) + +# put flags to cache (for debug purpose only) +set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS}" CACHE INTERNAL "Android specific c/c++ flags" ) +set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE}" CACHE INTERNAL "Android specific c/c++ Release flags" ) +set( ANDROID_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG}" CACHE INTERNAL "Android specific c/c++ Debug flags" ) +set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}" CACHE INTERNAL "Android specific c/c++ linker flags" ) + +# finish flags +set( CMAKE_CXX_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" ) +set( CMAKE_C_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_C_FLAGS}" ) +set( CMAKE_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_RELEASE}" ) +set( CMAKE_C_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_C_FLAGS_RELEASE}" ) +set( CMAKE_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_DEBUG}" ) +set( CMAKE_C_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_C_FLAGS_DEBUG}" ) +set( CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" ) +set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}" ) +set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" ) + +if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" ) + set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) + set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) +endif() + +# configure rtti +if( DEFINED ANDROID_RTTI AND ANDROID_STL_FORCE_FEATURES ) + if( ANDROID_RTTI ) + set( CMAKE_CXX_FLAGS "-frtti ${CMAKE_CXX_FLAGS}" ) + else() + set( CMAKE_CXX_FLAGS "-fno-rtti ${CMAKE_CXX_FLAGS}" ) + endif() +endif() + +# configure exceptios +if( DEFINED ANDROID_EXCEPTIONS AND ANDROID_STL_FORCE_FEATURES ) + if( ANDROID_EXCEPTIONS ) + set( CMAKE_CXX_FLAGS "-fexceptions ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "-fexceptions ${CMAKE_C_FLAGS}" ) + else() + set( CMAKE_CXX_FLAGS "-fno-exceptions ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "-fno-exceptions ${CMAKE_C_FLAGS}" ) + endif() +endif() + +# global includes and link directories +include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} ) +link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" ) + +# setup output directories +set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" ) +set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" ) + +if(NOT _CMAKE_IN_TRY_COMPILE) + if( EXISTS "${CMAKE_SOURCE_DIR}/jni/CMakeLists.txt" ) + set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin/${ANDROID_NDK_ABI_NAME}" CACHE PATH "Output directory for applications" ) + else() + set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin" CACHE PATH "Output directory for applications" ) + endif() + set( LIBRARY_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}" CACHE PATH "path for android libs" ) +endif() + +# set these global flags for cmake client scripts to change behavior +set( ANDROID True ) +set( BUILD_ANDROID True ) + +# where is the target environment +set( CMAKE_FIND_ROOT_PATH "${ANDROID_TOOLCHAIN_ROOT}/bin" "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" "${ANDROID_SYSROOT}" "${CMAKE_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/share" ) + +# only search for libraries and includes in the ndk toolchain +set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY ) +set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) +set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) + + +# macro to find packages on the host OS +macro( find_host_package ) + set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER ) + set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER ) + set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER ) + if( CMAKE_HOST_WIN32 ) + SET( WIN32 1 ) + SET( UNIX ) + elseif( CMAKE_HOST_APPLE ) + SET( APPLE 1 ) + SET( UNIX ) + endif() + find_package( ${ARGN} ) + SET( WIN32 ) + SET( APPLE ) + SET( UNIX 1 ) + set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY ) + set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) + set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) +endmacro() + + +# macro to find programs on the host OS +macro( find_host_program ) + set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER ) + set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER ) + set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER ) + if( CMAKE_HOST_WIN32 ) + SET( WIN32 1 ) + SET( UNIX ) + elseif( CMAKE_HOST_APPLE ) + SET( APPLE 1 ) + SET( UNIX ) + endif() + find_program( ${ARGN} ) + SET( WIN32 ) + SET( APPLE ) + SET( UNIX 1 ) + set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY ) + set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) + set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) +endmacro() + + +macro( ANDROID_GET_ABI_RAWNAME TOOLCHAIN_FLAG VAR ) + if( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI" ) + set( ${VAR} "armeabi" ) + elseif( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI_V7A" ) + set( ${VAR} "armeabi-v7a" ) + elseif( "${TOOLCHAIN_FLAG}" STREQUAL "X86" ) + set( ${VAR} "x86" ) + elseif( "${TOOLCHAIN_FLAG}" STREQUAL "MIPS" ) + set( ${VAR} "mips" ) + else() + set( ${VAR} "unknown" ) + endif() +endmacro() + + +# export toolchain settings for the try_compile() command +if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) + set( __toolchain_config "") + foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES + ANDROID_NDK_HOST_X64 + ANDROID_NDK + ANDROID_STANDALONE_TOOLCHAIN + ANDROID_TOOLCHAIN_NAME + ANDROID_ABI + ANDROID_NATIVE_API_LEVEL + ANDROID_STL + ANDROID_STL_FORCE_FEATURES + ANDROID_FORCE_ARM_BUILD + ANDROID_NO_UNDEFINED + ANDROID_SO_UNDEFINED + ANDROID_FUNCTION_LEVEL_LINKING + ANDROID_GOLD_LINKER + ANDROID_NOEXECSTACK + ANDROID_RELRO + ) + if( DEFINED ${__var} ) + if( "${__var}" MATCHES " ") + set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" CACHE INTERNAL \"\" )\n" ) + else() + set( __toolchain_config "${__toolchain_config}set( ${__var} ${${__var}} CACHE INTERNAL \"\" )\n" ) + endif() + endif() + endforeach() + file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" ) + unset( __toolchain_config ) +endif() + + +# set some obsolete variables for backward compatibility +set( ANDROID_SET_OBSOLETE_VARIABLES ON CACHE BOOL "Define obsolete Andrid-specific cmake variables" ) +mark_as_advanced( ANDROID_SET_OBSOLETE_VARIABLES ) +if( ANDROID_SET_OBSOLETE_VARIABLES ) + set( ANDROID_API_LEVEL ${ANDROID_NATIVE_API_LEVEL} ) + set( ARM_TARGET "${ANDROID_ABI}" ) + set( ARMEABI_NDK_NAME "${ANDROID_NDK_ABI_NAME}" ) +endif() + + +# Variables controlling behavior or set by cmake toolchain: +# ANDROID_ABI : "armeabi-v7a" (default), "armeabi", "armeabi-v7a with NEON", "armeabi-v7a with VFPV3", "armeabi-v6 with VFP", "x86", "mips" +# ANDROID_NATIVE_API_LEVEL : 3,4,5,8,9,14 (depends on NDK version) +# ANDROID_STL : gnustl_static/gnustl_shared/stlport_static/stlport_shared/gabi++_static/gabi++_shared/system_re/system/none +# ANDROID_FORBID_SYGWIN : ON/OFF +# ANDROID_NO_UNDEFINED : ON/OFF +# ANDROID_SO_UNDEFINED : OFF/ON (default depends on NDK version) +# ANDROID_FUNCTION_LEVEL_LINKING : ON/OFF +# ANDROID_GOLD_LINKER : ON/OFF +# ANDROID_NOEXECSTACK : ON/OFF +# ANDROID_RELRO : ON/OFF +# ANDROID_FORCE_ARM_BUILD : ON/OFF +# ANDROID_STL_FORCE_FEATURES : ON/OFF +# ANDROID_SET_OBSOLETE_VARIABLES : ON/OFF +# Can be set only at the first run: +# ANDROID_NDK +# ANDROID_STANDALONE_TOOLCHAIN +# ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain +# ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems) +# LIBRARY_OUTPUT_PATH_ROOT : +# NDK_CCACHE : +# Obsolete: +# ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL +# ARM_TARGET : superseded by ANDROID_ABI +# ARM_TARGETS : superseded by ANDROID_ABI (can be set only) +# ANDROID_NDK_TOOLCHAIN_ROOT : superseded by ANDROID_STANDALONE_TOOLCHAIN (can be set only) +# ANDROID_USE_STLPORT : superseded by ANDROID_STL=stlport_static +# ANDROID_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL (completely removed) +# +# Primary read-only variables: +# ANDROID : always TRUE +# ARMEABI : TRUE for arm v6 and older devices +# ARMEABI_V6 : TRUE for arm v6 +# ARMEABI_V7A : TRUE for arm v7a +# NEON : TRUE if NEON unit is enabled +# VFPV3 : TRUE if VFP version 3 is enabled +# X86 : TRUE if configured for x86 +# MIPS : TRUE if configured for mips +# BUILD_ANDROID : always TRUE +# BUILD_WITH_ANDROID_NDK : TRUE if NDK is used +# BUILD_WITH_STANDALONE_TOOLCHAIN : TRUE if standalone toolchain is used +# ANDROID_NDK_HOST_SYSTEM_NAME : "windows", "linux-x86" or "darwin-x86" depending on host platform +# ANDROID_NDK_ABI_NAME : "armeabi", "armeabi-v7a", "x86" or "mips" depending on ANDROID_ABI +# ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d, r8e; set only for NDK +# ANDROID_ARCH_NAME : "arm" or "x86" or "mips" depending on ANDROID_ABI +# ANDROID_SYSROOT : path to the compiler sysroot +# TOOL_OS_SUFFIX : "" or ".exe" depending on host platform +# ANDROID_COMPILER_IS_CLANG : TRUE if clang compiler is used +# Obsolete: +# ARMEABI_NDK_NAME : superseded by ANDROID_NDK_ABI_NAME +# +# Secondary (less stable) read-only variables: +# ANDROID_COMPILER_VERSION : GCC version used +# ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform +# ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI +# ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux" +# ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK) +# ANDROID_CLANG_TOOLCHAIN_ROOT : path to clang tools +# ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK +# ANDROID_STL_INCLUDE_DIRS : stl include paths +# ANDROID_RTTI : if rtti is enabled by the runtime +# ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime +# ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used +# ANDROID_CLANG_VERSION : version of clang compiler if clang is used +# +# Defaults: +# ANDROID_DEFAULT_NDK_API_LEVEL +# ANDROID_DEFAULT_NDK_API_LEVEL_${ARCH} +# ANDROID_NDK_SEARCH_PATHS +# ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH +# ANDROID_SUPPORTED_ABIS_${ARCH} +# ANDROID_SUPPORTED_NDK_VERSIONS diff --git a/android/java.rst b/platforms/android/java.rst similarity index 100% rename from android/java.rst rename to platforms/android/java.rst diff --git a/android/libinfo/CMakeLists.txt b/platforms/android/libinfo/CMakeLists.txt similarity index 100% rename from android/libinfo/CMakeLists.txt rename to platforms/android/libinfo/CMakeLists.txt diff --git a/android/libinfo/info.c b/platforms/android/libinfo/info.c similarity index 100% rename from android/libinfo/info.c rename to platforms/android/libinfo/info.c diff --git a/android/package/AndroidManifest.xml b/platforms/android/package/AndroidManifest.xml similarity index 100% rename from android/package/AndroidManifest.xml rename to platforms/android/package/AndroidManifest.xml diff --git a/android/package/CMakeLists.txt b/platforms/android/package/CMakeLists.txt similarity index 100% rename from android/package/CMakeLists.txt rename to platforms/android/package/CMakeLists.txt diff --git a/android/package/res/drawable/icon.png b/platforms/android/package/res/drawable/icon.png similarity index 100% rename from android/package/res/drawable/icon.png rename to platforms/android/package/res/drawable/icon.png diff --git a/android/package/res/values/strings.xml b/platforms/android/package/res/values/strings.xml similarity index 100% rename from android/package/res/values/strings.xml rename to platforms/android/package/res/values/strings.xml diff --git a/android/refman.rst b/platforms/android/refman.rst similarity index 100% rename from android/refman.rst rename to platforms/android/refman.rst diff --git a/android/service/CMakeLists.txt b/platforms/android/service/CMakeLists.txt similarity index 100% rename from android/service/CMakeLists.txt rename to platforms/android/service/CMakeLists.txt diff --git a/android/service/all.py b/platforms/android/service/all.py similarity index 100% rename from android/service/all.py rename to platforms/android/service/all.py diff --git a/android/service/device.conf b/platforms/android/service/device.conf similarity index 100% rename from android/service/device.conf rename to platforms/android/service/device.conf diff --git a/android/service/doc/AndroidAppUsageModel.dia b/platforms/android/service/doc/AndroidAppUsageModel.dia similarity index 100% rename from android/service/doc/AndroidAppUsageModel.dia rename to platforms/android/service/doc/AndroidAppUsageModel.dia diff --git a/android/service/doc/BaseLoaderCallback.rst b/platforms/android/service/doc/BaseLoaderCallback.rst similarity index 100% rename from android/service/doc/BaseLoaderCallback.rst rename to platforms/android/service/doc/BaseLoaderCallback.rst diff --git a/android/service/doc/InstallCallbackInterface.rst b/platforms/android/service/doc/InstallCallbackInterface.rst similarity index 100% rename from android/service/doc/InstallCallbackInterface.rst rename to platforms/android/service/doc/InstallCallbackInterface.rst diff --git a/android/service/doc/Intro.rst b/platforms/android/service/doc/Intro.rst similarity index 100% rename from android/service/doc/Intro.rst rename to platforms/android/service/doc/Intro.rst diff --git a/android/service/doc/JavaHelper.rst b/platforms/android/service/doc/JavaHelper.rst similarity index 100% rename from android/service/doc/JavaHelper.rst rename to platforms/android/service/doc/JavaHelper.rst diff --git a/android/service/doc/LibInstallAproved.dia b/platforms/android/service/doc/LibInstallAproved.dia similarity index 100% rename from android/service/doc/LibInstallAproved.dia rename to platforms/android/service/doc/LibInstallAproved.dia diff --git a/android/service/doc/LibInstallCanceled.dia b/platforms/android/service/doc/LibInstallCanceled.dia similarity index 100% rename from android/service/doc/LibInstallCanceled.dia rename to platforms/android/service/doc/LibInstallCanceled.dia diff --git a/android/service/doc/LibInstalled.dia b/platforms/android/service/doc/LibInstalled.dia similarity index 100% rename from android/service/doc/LibInstalled.dia rename to platforms/android/service/doc/LibInstalled.dia diff --git a/android/service/doc/LoaderCallbackInterface.rst b/platforms/android/service/doc/LoaderCallbackInterface.rst similarity index 100% rename from android/service/doc/LoaderCallbackInterface.rst rename to platforms/android/service/doc/LoaderCallbackInterface.rst diff --git a/android/service/doc/NoService.dia b/platforms/android/service/doc/NoService.dia similarity index 100% rename from android/service/doc/NoService.dia rename to platforms/android/service/doc/NoService.dia diff --git a/android/service/doc/Structure.dia b/platforms/android/service/doc/Structure.dia similarity index 100% rename from android/service/doc/Structure.dia rename to platforms/android/service/doc/Structure.dia diff --git a/android/service/doc/UseCases.rst b/platforms/android/service/doc/UseCases.rst similarity index 100% rename from android/service/doc/UseCases.rst rename to platforms/android/service/doc/UseCases.rst diff --git a/android/service/doc/build_uml.py b/platforms/android/service/doc/build_uml.py similarity index 100% rename from android/service/doc/build_uml.py rename to platforms/android/service/doc/build_uml.py diff --git a/android/service/doc/img/AndroidAppUsageModel.png b/platforms/android/service/doc/img/AndroidAppUsageModel.png similarity index 100% rename from android/service/doc/img/AndroidAppUsageModel.png rename to platforms/android/service/doc/img/AndroidAppUsageModel.png diff --git a/android/service/doc/img/LibInstallAproved.png b/platforms/android/service/doc/img/LibInstallAproved.png similarity index 100% rename from android/service/doc/img/LibInstallAproved.png rename to platforms/android/service/doc/img/LibInstallAproved.png diff --git a/android/service/doc/img/LibInstallCanceled.png b/platforms/android/service/doc/img/LibInstallCanceled.png similarity index 100% rename from android/service/doc/img/LibInstallCanceled.png rename to platforms/android/service/doc/img/LibInstallCanceled.png diff --git a/android/service/doc/img/LibInstalled.png b/platforms/android/service/doc/img/LibInstalled.png similarity index 100% rename from android/service/doc/img/LibInstalled.png rename to platforms/android/service/doc/img/LibInstalled.png diff --git a/android/service/doc/img/NoService.png b/platforms/android/service/doc/img/NoService.png similarity index 100% rename from android/service/doc/img/NoService.png rename to platforms/android/service/doc/img/NoService.png diff --git a/android/service/doc/img/Structure.png b/platforms/android/service/doc/img/Structure.png similarity index 100% rename from android/service/doc/img/Structure.png rename to platforms/android/service/doc/img/Structure.png diff --git a/android/service/doc/index.rst b/platforms/android/service/doc/index.rst similarity index 100% rename from android/service/doc/index.rst rename to platforms/android/service/doc/index.rst diff --git a/android/service/engine/.classpath b/platforms/android/service/engine/.classpath similarity index 100% rename from android/service/engine/.classpath rename to platforms/android/service/engine/.classpath diff --git a/android/service/engine/.project b/platforms/android/service/engine/.project similarity index 100% rename from android/service/engine/.project rename to platforms/android/service/engine/.project diff --git a/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml similarity index 100% rename from android/service/engine/AndroidManifest.xml rename to platforms/android/service/engine/AndroidManifest.xml diff --git a/android/service/engine/CMakeLists.txt b/platforms/android/service/engine/CMakeLists.txt similarity index 97% rename from android/service/engine/CMakeLists.txt rename to platforms/android/service/engine/CMakeLists.txt index 8b88393942..852a028cab 100644 --- a/android/service/engine/CMakeLists.txt +++ b/platforms/android/service/engine/CMakeLists.txt @@ -24,7 +24,7 @@ else() message(WARNING "Can not automatically determine the value for ANDROID_PLATFORM_VERSION_CODE") endif() -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY) +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY) link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}") @@ -72,4 +72,3 @@ file(GLOB engine_test_files "jni/Tests/*.cpp") add_executable(opencv_test_engine ${engine_test_files} jni/Tests/gtest/gtest-all.cpp) target_link_libraries(opencv_test_engine z binder log utils android_runtime ${engine} ${engine}_jni) - diff --git a/android/service/engine/build.xml b/platforms/android/service/engine/build.xml similarity index 100% rename from android/service/engine/build.xml rename to platforms/android/service/engine/build.xml diff --git a/android/service/engine/jni/Android.mk b/platforms/android/service/engine/jni/Android.mk similarity index 100% rename from android/service/engine/jni/Android.mk rename to platforms/android/service/engine/jni/Android.mk diff --git a/android/service/engine/jni/Application.mk b/platforms/android/service/engine/jni/Application.mk similarity index 100% rename from android/service/engine/jni/Application.mk rename to platforms/android/service/engine/jni/Application.mk diff --git a/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp rename to platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp diff --git a/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h similarity index 100% rename from android/service/engine/jni/BinderComponent/BnOpenCVEngine.h rename to platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h diff --git a/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp rename to platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp diff --git a/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h similarity index 100% rename from android/service/engine/jni/BinderComponent/BpOpenCVEngine.h rename to platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h diff --git a/android/service/engine/jni/BinderComponent/HardwareDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/HardwareDetector.cpp rename to platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp diff --git a/android/service/engine/jni/BinderComponent/HardwareDetector.h b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h similarity index 100% rename from android/service/engine/jni/BinderComponent/HardwareDetector.h rename to platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/OpenCVEngine.cpp rename to platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.h similarity index 100% rename from android/service/engine/jni/BinderComponent/OpenCVEngine.h rename to platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.h diff --git a/android/service/engine/jni/BinderComponent/ProcReader.cpp b/platforms/android/service/engine/jni/BinderComponent/ProcReader.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/ProcReader.cpp rename to platforms/android/service/engine/jni/BinderComponent/ProcReader.cpp diff --git a/android/service/engine/jni/BinderComponent/ProcReader.h b/platforms/android/service/engine/jni/BinderComponent/ProcReader.h similarity index 100% rename from android/service/engine/jni/BinderComponent/ProcReader.h rename to platforms/android/service/engine/jni/BinderComponent/ProcReader.h diff --git a/android/service/engine/jni/BinderComponent/StringUtils.cpp b/platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/StringUtils.cpp rename to platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp diff --git a/android/service/engine/jni/BinderComponent/StringUtils.h b/platforms/android/service/engine/jni/BinderComponent/StringUtils.h similarity index 100% rename from android/service/engine/jni/BinderComponent/StringUtils.h rename to platforms/android/service/engine/jni/BinderComponent/StringUtils.h diff --git a/android/service/engine/jni/BinderComponent/TegraDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/TegraDetector.cpp rename to platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp diff --git a/android/service/engine/jni/BinderComponent/TegraDetector.h b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h similarity index 100% rename from android/service/engine/jni/BinderComponent/TegraDetector.h rename to platforms/android/service/engine/jni/BinderComponent/TegraDetector.h diff --git a/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp b/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp rename to platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp diff --git a/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h b/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h rename to platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h diff --git a/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp b/platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp rename to platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp diff --git a/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h b/platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h rename to platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h diff --git a/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp b/platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp diff --git a/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h b/platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h diff --git a/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp b/platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp diff --git a/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h b/platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h diff --git a/android/service/engine/jni/NativeClient/ClientMain.cpp b/platforms/android/service/engine/jni/NativeClient/ClientMain.cpp similarity index 100% rename from android/service/engine/jni/NativeClient/ClientMain.cpp rename to platforms/android/service/engine/jni/NativeClient/ClientMain.cpp diff --git a/android/service/engine/jni/NativeService/CommonPackageManager.cpp b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp similarity index 100% rename from android/service/engine/jni/NativeService/CommonPackageManager.cpp rename to platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp diff --git a/android/service/engine/jni/NativeService/CommonPackageManager.h b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.h similarity index 100% rename from android/service/engine/jni/NativeService/CommonPackageManager.h rename to platforms/android/service/engine/jni/NativeService/CommonPackageManager.h diff --git a/android/service/engine/jni/NativeService/NativePackageManager.cpp b/platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp similarity index 100% rename from android/service/engine/jni/NativeService/NativePackageManager.cpp rename to platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp diff --git a/android/service/engine/jni/NativeService/NativePackageManager.h b/platforms/android/service/engine/jni/NativeService/NativePackageManager.h similarity index 100% rename from android/service/engine/jni/NativeService/NativePackageManager.h rename to platforms/android/service/engine/jni/NativeService/NativePackageManager.h diff --git a/android/service/engine/jni/NativeService/PackageInfo.cpp b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp similarity index 100% rename from android/service/engine/jni/NativeService/PackageInfo.cpp rename to platforms/android/service/engine/jni/NativeService/PackageInfo.cpp diff --git a/android/service/engine/jni/NativeService/PackageInfo.h b/platforms/android/service/engine/jni/NativeService/PackageInfo.h similarity index 100% rename from android/service/engine/jni/NativeService/PackageInfo.h rename to platforms/android/service/engine/jni/NativeService/PackageInfo.h diff --git a/android/service/engine/jni/NativeService/ServiceMain.cpp b/platforms/android/service/engine/jni/NativeService/ServiceMain.cpp similarity index 100% rename from android/service/engine/jni/NativeService/ServiceMain.cpp rename to platforms/android/service/engine/jni/NativeService/ServiceMain.cpp diff --git a/android/service/engine/jni/Tests/HardwareDetectionTest.cpp b/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp similarity index 100% rename from android/service/engine/jni/Tests/HardwareDetectionTest.cpp rename to platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp diff --git a/android/service/engine/jni/Tests/OpenCVEngineTest.cpp b/platforms/android/service/engine/jni/Tests/OpenCVEngineTest.cpp similarity index 100% rename from android/service/engine/jni/Tests/OpenCVEngineTest.cpp rename to platforms/android/service/engine/jni/Tests/OpenCVEngineTest.cpp diff --git a/android/service/engine/jni/Tests/PackageInfoTest.cpp b/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp similarity index 99% rename from android/service/engine/jni/Tests/PackageInfoTest.cpp rename to platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp index 6cbb069431..36fdae764f 100644 --- a/android/service/engine/jni/Tests/PackageInfoTest.cpp +++ b/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp @@ -222,4 +222,3 @@ TEST(PackageInfo, Comparator3) EXPECT_EQ(info1, info2); } #endif - diff --git a/android/service/engine/jni/Tests/PackageManagerStub.cpp b/platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp similarity index 100% rename from android/service/engine/jni/Tests/PackageManagerStub.cpp rename to platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp diff --git a/android/service/engine/jni/Tests/PackageManagerStub.h b/platforms/android/service/engine/jni/Tests/PackageManagerStub.h similarity index 100% rename from android/service/engine/jni/Tests/PackageManagerStub.h rename to platforms/android/service/engine/jni/Tests/PackageManagerStub.h diff --git a/android/service/engine/jni/Tests/PackageManagmentTest.cpp b/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp similarity index 99% rename from android/service/engine/jni/Tests/PackageManagmentTest.cpp rename to platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp index e21dcf7604..61d6e01c24 100644 --- a/android/service/engine/jni/Tests/PackageManagmentTest.cpp +++ b/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp @@ -137,5 +137,3 @@ TEST(PackageManager, GetPackagePathForMips) // string path = pm.GetPackagePathByVersion("240", PLATFORM_TEGRA2, 0); // EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra2/lib", path.c_str()); // } - - diff --git a/android/service/engine/jni/Tests/TestMain.cpp b/platforms/android/service/engine/jni/Tests/TestMain.cpp similarity index 100% rename from android/service/engine/jni/Tests/TestMain.cpp rename to platforms/android/service/engine/jni/Tests/TestMain.cpp diff --git a/android/service/engine/jni/Tests/Tests.mk b/platforms/android/service/engine/jni/Tests/Tests.mk similarity index 100% rename from android/service/engine/jni/Tests/Tests.mk rename to platforms/android/service/engine/jni/Tests/Tests.mk diff --git a/android/service/engine/jni/Tests/gtest/gtest-all.cpp b/platforms/android/service/engine/jni/Tests/gtest/gtest-all.cpp similarity index 100% rename from android/service/engine/jni/Tests/gtest/gtest-all.cpp rename to platforms/android/service/engine/jni/Tests/gtest/gtest-all.cpp diff --git a/android/service/engine/jni/Tests/gtest/gtest.h b/platforms/android/service/engine/jni/Tests/gtest/gtest.h similarity index 100% rename from android/service/engine/jni/Tests/gtest/gtest.h rename to platforms/android/service/engine/jni/Tests/gtest/gtest.h diff --git a/android/service/engine/jni/include/EngineCommon.h b/platforms/android/service/engine/jni/include/EngineCommon.h similarity index 100% rename from android/service/engine/jni/include/EngineCommon.h rename to platforms/android/service/engine/jni/include/EngineCommon.h diff --git a/android/service/engine/jni/include/IOpenCVEngine.h b/platforms/android/service/engine/jni/include/IOpenCVEngine.h similarity index 100% rename from android/service/engine/jni/include/IOpenCVEngine.h rename to platforms/android/service/engine/jni/include/IOpenCVEngine.h diff --git a/android/service/engine/jni/include/IPackageManager.h b/platforms/android/service/engine/jni/include/IPackageManager.h similarity index 100% rename from android/service/engine/jni/include/IPackageManager.h rename to platforms/android/service/engine/jni/include/IPackageManager.h diff --git a/android/service/engine/jni/include/OpenCVEngineHelper.h b/platforms/android/service/engine/jni/include/OpenCVEngineHelper.h similarity index 100% rename from android/service/engine/jni/include/OpenCVEngineHelper.h rename to platforms/android/service/engine/jni/include/OpenCVEngineHelper.h diff --git a/android/service/engine/project.properties b/platforms/android/service/engine/project.properties similarity index 100% rename from android/service/engine/project.properties rename to platforms/android/service/engine/project.properties diff --git a/android/service/engine/res/drawable/icon.png b/platforms/android/service/engine/res/drawable/icon.png similarity index 100% rename from android/service/engine/res/drawable/icon.png rename to platforms/android/service/engine/res/drawable/icon.png diff --git a/android/service/engine/res/layout-small/info.xml b/platforms/android/service/engine/res/layout-small/info.xml similarity index 100% rename from android/service/engine/res/layout-small/info.xml rename to platforms/android/service/engine/res/layout-small/info.xml diff --git a/android/service/engine/res/layout-small/main.xml b/platforms/android/service/engine/res/layout-small/main.xml similarity index 100% rename from android/service/engine/res/layout-small/main.xml rename to platforms/android/service/engine/res/layout-small/main.xml diff --git a/android/service/engine/res/layout/info.xml b/platforms/android/service/engine/res/layout/info.xml similarity index 100% rename from android/service/engine/res/layout/info.xml rename to platforms/android/service/engine/res/layout/info.xml diff --git a/android/service/engine/res/layout/main.xml b/platforms/android/service/engine/res/layout/main.xml similarity index 100% rename from android/service/engine/res/layout/main.xml rename to platforms/android/service/engine/res/layout/main.xml diff --git a/android/service/engine/res/values/strings.xml b/platforms/android/service/engine/res/values/strings.xml similarity index 100% rename from android/service/engine/res/values/strings.xml rename to platforms/android/service/engine/res/values/strings.xml diff --git a/android/service/engine/src/org/opencv/engine/BinderConnector.java b/platforms/android/service/engine/src/org/opencv/engine/BinderConnector.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/BinderConnector.java rename to platforms/android/service/engine/src/org/opencv/engine/BinderConnector.java diff --git a/android/service/engine/src/org/opencv/engine/HardwareDetector.java b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/HardwareDetector.java rename to platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java diff --git a/android/service/engine/src/org/opencv/engine/MarketConnector.java b/platforms/android/service/engine/src/org/opencv/engine/MarketConnector.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/MarketConnector.java rename to platforms/android/service/engine/src/org/opencv/engine/MarketConnector.java diff --git a/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl similarity index 100% rename from android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl rename to platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl diff --git a/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/OpenCVEngineService.java rename to platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java diff --git a/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java b/platforms/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java rename to platforms/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java diff --git a/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java rename to platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java diff --git a/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java b/platforms/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java rename to platforms/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java diff --git a/android/service/engine_test/.classpath b/platforms/android/service/engine_test/.classpath similarity index 100% rename from android/service/engine_test/.classpath rename to platforms/android/service/engine_test/.classpath diff --git a/android/service/engine_test/.project b/platforms/android/service/engine_test/.project similarity index 100% rename from android/service/engine_test/.project rename to platforms/android/service/engine_test/.project diff --git a/android/service/engine_test/AndroidManifest.xml b/platforms/android/service/engine_test/AndroidManifest.xml similarity index 100% rename from android/service/engine_test/AndroidManifest.xml rename to platforms/android/service/engine_test/AndroidManifest.xml diff --git a/android/service/engine_test/build.xml b/platforms/android/service/engine_test/build.xml similarity index 100% rename from android/service/engine_test/build.xml rename to platforms/android/service/engine_test/build.xml diff --git a/android/service/engine_test/project.properties b/platforms/android/service/engine_test/project.properties similarity index 100% rename from android/service/engine_test/project.properties rename to platforms/android/service/engine_test/project.properties diff --git a/android/service/engine_test/res/drawable-hdpi/ic_launcher.png b/platforms/android/service/engine_test/res/drawable-hdpi/ic_launcher.png similarity index 100% rename from android/service/engine_test/res/drawable-hdpi/ic_launcher.png rename to platforms/android/service/engine_test/res/drawable-hdpi/ic_launcher.png diff --git a/android/service/engine_test/res/drawable-ldpi/ic_launcher.png b/platforms/android/service/engine_test/res/drawable-ldpi/ic_launcher.png similarity index 100% rename from android/service/engine_test/res/drawable-ldpi/ic_launcher.png rename to platforms/android/service/engine_test/res/drawable-ldpi/ic_launcher.png diff --git a/android/service/engine_test/res/drawable-mdpi/ic_launcher.png b/platforms/android/service/engine_test/res/drawable-mdpi/ic_launcher.png similarity index 100% rename from android/service/engine_test/res/drawable-mdpi/ic_launcher.png rename to platforms/android/service/engine_test/res/drawable-mdpi/ic_launcher.png diff --git a/android/service/engine_test/res/layout/main.xml b/platforms/android/service/engine_test/res/layout/main.xml similarity index 100% rename from android/service/engine_test/res/layout/main.xml rename to platforms/android/service/engine_test/res/layout/main.xml diff --git a/android/service/engine_test/res/values/strings.xml b/platforms/android/service/engine_test/res/values/strings.xml similarity index 100% rename from android/service/engine_test/res/values/strings.xml rename to platforms/android/service/engine_test/res/values/strings.xml diff --git a/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java b/platforms/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java similarity index 100% rename from android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java rename to platforms/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java diff --git a/android/service/push_native.py b/platforms/android/service/push_native.py similarity index 100% rename from android/service/push_native.py rename to platforms/android/service/push_native.py diff --git a/android/service/readme.txt b/platforms/android/service/readme.txt similarity index 100% rename from android/service/readme.txt rename to platforms/android/service/readme.txt diff --git a/android/service/test_native.py b/platforms/android/service/test_native.py similarity index 99% rename from android/service/test_native.py rename to platforms/android/service/test_native.py index 9a39032b18..328b9a8a51 100755 --- a/android/service/test_native.py +++ b/platforms/android/service/test_native.py @@ -34,4 +34,3 @@ if (__name__ == "__main__"): os.system("adb %s shell mkdir -p \"%s\"" % (DEVICE_STR, DEVICE_LOG_PATH)) RunTestApp("OpenCVEngineTestApp") - diff --git a/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh b/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh deleted file mode 100755 index 1d2153a384..0000000000 --- a/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_hardfp -cd build_hardfp - -cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../.. diff --git a/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh b/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh deleted file mode 100755 index 5caf5a4e1b..0000000000 --- a/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_softfp -cd build_softfp - -cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../.. diff --git a/android/scripts/ABI_compat_generator.py b/platforms/scripts/ABI_compat_generator.py similarity index 98% rename from android/scripts/ABI_compat_generator.py rename to platforms/scripts/ABI_compat_generator.py index 39253bbdec..fdabf00611 100755 --- a/android/scripts/ABI_compat_generator.py +++ b/platforms/scripts/ABI_compat_generator.py @@ -6,9 +6,7 @@ import os architecture = 'armeabi' -excludedHeaders = set(['hdf5.h', 'cap_ios.h', - 'eigen.hpp', 'cxeigen.hpp' #TOREMOVE - ]) +excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE systemIncludes = ['sources/cxx-stl/gnu-libstdc++/4.6/include', \ '/opt/android-ndk-r8c/platforms/android-8/arch-arm', # TODO: check if this one could be passed as command line arg 'sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include'] diff --git a/android/scripts/camera_build.conf b/platforms/scripts/camera_build.conf similarity index 100% rename from android/scripts/camera_build.conf rename to platforms/scripts/camera_build.conf diff --git a/android/scripts/cmake_android_all_cameras.py b/platforms/scripts/cmake_android_all_cameras.py similarity index 90% rename from android/scripts/cmake_android_all_cameras.py rename to platforms/scripts/cmake_android_all_cameras.py index 0ef430a3d4..0739004776 100755 --- a/android/scripts/cmake_android_all_cameras.py +++ b/platforms/scripts/cmake_android_all_cameras.py @@ -60,7 +60,7 @@ for s in ConfFile.readlines(): os.chdir(BuildDir) BuildLog = os.path.join(BuildDir, "build.log") - CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../../ > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog) + CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../.. > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog) MakeCmdLine = "make %s >> \"%s\" 2>&1" % (MakeTarget, BuildLog); #print(CmakeCmdLine) os.system(CmakeCmdLine) diff --git a/android/scripts/cmake_android.sh b/platforms/scripts/cmake_android_arm.sh similarity index 50% rename from android/scripts/cmake_android.sh rename to platforms/scripts/cmake_android_arm.sh index 101ba3cee8..84c88a8159 100755 --- a/android/scripts/cmake_android.sh +++ b/platforms/scripts/cmake_android_arm.sh @@ -1,8 +1,7 @@ #!/bin/sh cd `dirname $0`/.. -mkdir -p build -cd build - -cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. +mkdir -p build_android_arm +cd build_android_arm +cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_android_mips.sh b/platforms/scripts/cmake_android_mips.sh new file mode 100755 index 0000000000..6bc7944b6d --- /dev/null +++ b/platforms/scripts/cmake_android_mips.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_android_mips +cd build_android_mips + +cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_android_service.sh b/platforms/scripts/cmake_android_service.sh new file mode 100755 index 0000000000..7ba8865b2a --- /dev/null +++ b/platforms/scripts/cmake_android_service.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_android_service +cd build_android_service + +cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../.. diff --git a/platforms/scripts/cmake_android_x86.sh b/platforms/scripts/cmake_android_x86.sh new file mode 100755 index 0000000000..8fb8abda7e --- /dev/null +++ b/platforms/scripts/cmake_android_x86.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +cd `dirname $0`/.. + +mkdir -p build_android_x86 +cd build_android_x86 + +cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_arm_gnueabi_hardfp.sh b/platforms/scripts/cmake_arm_gnueabi_hardfp.sh new file mode 100755 index 0000000000..1fce4f9dc1 --- /dev/null +++ b/platforms/scripts/cmake_arm_gnueabi_hardfp.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_linux_arm_hardfp +cd build_linux_arm_hardfp + +cmake -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_arm_gnueabi_softfp.sh b/platforms/scripts/cmake_arm_gnueabi_softfp.sh new file mode 100755 index 0000000000..734348907c --- /dev/null +++ b/platforms/scripts/cmake_arm_gnueabi_softfp.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_linux_arm_softfp +cd build_linux_arm_softfp + +cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../.. diff --git a/platforms/linux/scripts/cmake_carma.sh b/platforms/scripts/cmake_carma.sh similarity index 100% rename from platforms/linux/scripts/cmake_carma.sh rename to platforms/scripts/cmake_carma.sh diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/scripts/cmake_winrt.cmd similarity index 100% rename from platforms/winrt/scripts/cmake_winrt.cmd rename to platforms/scripts/cmake_winrt.cmd diff --git a/samples/ocl/aloe-L.png b/samples/ocl/aloe-L.png deleted file mode 100644 index 47587668e2..0000000000 Binary files a/samples/ocl/aloe-L.png and /dev/null differ diff --git a/samples/ocl/aloe-R.png b/samples/ocl/aloe-R.png deleted file mode 100644 index 5d11c57a9e..0000000000 Binary files a/samples/ocl/aloe-R.png and /dev/null differ diff --git a/samples/ocl/aloe-disp.png b/samples/ocl/aloe-disp.png deleted file mode 100644 index dd4a499bed..0000000000 Binary files a/samples/ocl/aloe-disp.png and /dev/null differ diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index dc2f8b23ba..5ffed2e40b 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -1,5 +1,3 @@ -//This sample is inherited from facedetect.cpp in smaple/c - #include "opencv2/objdetect/objdetect.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" @@ -16,80 +14,86 @@ int main( int, const char** ) { return 0; } using namespace std; using namespace cv; +#define LOOP_NUM 10 + +const static Scalar colors[] = { CV_RGB(0,0,255), + CV_RGB(0,128,255), + CV_RGB(0,255,255), + CV_RGB(0,255,0), + CV_RGB(255,128,0), + CV_RGB(255,255,0), + CV_RGB(255,0,0), + CV_RGB(255,0,255)} ; -static void help() +int64 work_begin = 0; +int64 work_end = 0; + +static void workBegin() +{ + work_begin = getTickCount(); +} +static void workEnd() { - cout << "\nThis program demonstrates the cascade recognizer.\n" - "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n" - "Usage:\n" - "./facedetect [--cascade= this is the primary trained classifier such as frontal face]\n" - " [--scale=\n" - " [filename|camera_index]\n\n" - "see facedetect.cmd for one call:\n" - "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --scale=1.3 \n" - "Hit any key to quit.\n" - "Using OpenCV version " << CV_VERSION << "\n" << endl; + work_end += (getTickCount() - work_begin); } -struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; -void detectAndDraw( Mat& img, - cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier& nestedCascade, - double scale); +static double getTime(){ + return work_end /((double)cvGetTickFrequency() * 1000.); +} + +void detect( Mat& img, vector& faces, + cv::ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime); -string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml"; +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime); + +void Draw(Mat& img, vector& faces, double scale); + +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) +double checkRectSimilarity(Size sz, std::vector& cpu_rst, std::vector& gpu_rst); int main( int argc, const char** argv ) { - CvCapture* capture = 0; - Mat frame, frameCopy, image; - const string scaleOpt = "--scale="; - size_t scaleOptLen = scaleOpt.length(); - const string cascadeOpt = "--cascade="; - size_t cascadeOptLen = cascadeOpt.length(); - string inputName; - - help(); - cv::ocl::OclCascadeClassifier cascade; - CascadeClassifier nestedCascade; - double scale = 1; - - for( int i = 1; i < argc; i++ ) + const char* keys = + "{ h | help | false | print help message }" + "{ i | input | | specify input image }" + "{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }" + "{ c | scale | 1.0 | scale image }" + "{ s | use_cpu | false | use cpu or gpu to process the image }"; + + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) { - cout << "Processing " << i << " " << argv[i] << endl; - if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 ) - { - cascadeName.assign( argv[i] + cascadeOptLen ); - cout << " from which we have cascadeName= " << cascadeName << endl; - } - else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 ) - { - if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 ) - scale = 1; - cout << " from which we read scale = " << scale << endl; - } - else if( argv[i][0] == '-' ) - { - cerr << "WARNING: Unknown option %s" << argv[i] << endl; - } - else - inputName.assign( argv[i] ); + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; } + CvCapture* capture = 0; + Mat frame, frameCopy, image; - if( !cascade.load( cascadeName ) ) + bool useCPU = cmd.get("s"); + string inputName = cmd.get("i"); + string cascadeName = cmd.get("t"); + double scale = cmd.get("c"); + cv::ocl::OclCascadeClassifierBuf cascade; + CascadeClassifier cpu_cascade; + + if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) { cerr << "ERROR: Could not load classifier cascade" << endl; - cerr << "Usage: facedetect [--cascade=]\n" - " [--scale[=\n" - " [filename|camera_index]\n" << endl ; return -1; } - if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') ) + if( inputName.empty() ) { - capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' ); - int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ; - if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl; + capture = cvCaptureFromCAM(0); + if(!capture) + cout << "Capture from CAM 0 didn't work" << endl; } else if( inputName.size() ) { @@ -97,26 +101,30 @@ int main( int argc, const char** argv ) if( image.empty() ) { capture = cvCaptureFromAVI( inputName.c_str() ); - if(!capture) cout << "Capture from AVI didn't work" << endl; + if(!capture) + cout << "Capture from AVI didn't work" << endl; + return -1; } } else { image = imread( "lena.jpg", 1 ); - if(image.empty()) cout << "Couldn't read lena.jpg" << endl; + if(image.empty()) + cout << "Couldn't read lena.jpg" << endl; + return -1; } cvNamedWindow( "result", 1 ); std::vector oclinfo; int devnums = cv::ocl::getDevice(oclinfo); - if(devnums<1) + if( devnums < 1 ) { std::cout << "no device found\n"; return -1; } //if you want to use undefault device, set it here //setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); + ocl::setBinpath("./"); if( capture ) { cout << "In capture ..." << endl; @@ -124,15 +132,20 @@ int main( int argc, const char** argv ) { IplImage* iplImg = cvQueryFrame( capture ); frame = cv::cvarrToMat(iplImg); + vector faces; if( frame.empty() ) break; if( iplImg->origin == IPL_ORIGIN_TL ) frame.copyTo( frameCopy ); else flip( frame, frameCopy, 0 ); - - detectAndDraw( frameCopy, cascade, nestedCascade, scale ); - + if(useCPU){ + detectCPU(frameCopy, faces, cpu_cascade, scale, false); + } + else{ + detect(frameCopy, faces, cascade, scale, false); + } + Draw(frameCopy, faces, scale); if( waitKey( 10 ) >= 0 ) goto _cleanup_; } @@ -145,42 +158,34 @@ _cleanup_: else { cout << "In image read" << endl; - if( !image.empty() ) - { - detectAndDraw( image, cascade, nestedCascade, scale ); - waitKey(0); - } - else if( !inputName.empty() ) + vector faces; + vector ref_rst; + double accuracy = 0.; + for(int i = 0; i <= LOOP_NUM;i ++) { - /* assume it is a text file containing the - list of the image filenames to be processed - one per line */ - FILE* f = fopen( inputName.c_str(), "rt" ); - if( f ) - { - char buf[1000+1]; - while( fgets( buf, 1000, f ) ) - { - int len = (int)strlen(buf), c; - while( len > 0 && isspace(buf[len-1]) ) - len--; - buf[len] = '\0'; - cout << "file " << buf << endl; - image = imread( buf, 1 ); - if( !image.empty() ) - { - detectAndDraw( image, cascade, nestedCascade, scale ); - c = waitKey(0); - if( c == 27 || c == 'q' || c == 'Q' ) - break; - } - else - { - cerr << "Aw snap, couldn't read image " << buf << endl; - } + cout << "loop" << i << endl; + if(useCPU){ + detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); + } + else{ + detect(image, faces, cascade, scale, i==0?false:true); + if(i == 0){ + detectCPU(image, ref_rst, cpu_cascade, scale, false); + accuracy = checkRectSimilarity(image.size(), ref_rst, faces); } - fclose(f); + } + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + cout << getTime() / LOOP_NUM << " ms" << endl; + cout << "accuracy value: " << accuracy <& faces, + cv::ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime) { - int i = 0; - double t = 0; - vector faces; - const static Scalar colors[] = { CV_RGB(0,0,255), - CV_RGB(0,128,255), - CV_RGB(0,255,255), - CV_RGB(0,255,0), - CV_RGB(255,128,0), - CV_RGB(255,255,0), - CV_RGB(255,0,0), - CV_RGB(255,0,255)} ; cv::ocl::oclMat image(img); cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); - + if(calTime) workBegin(); cv::ocl::cvtColor( image, gray, COLOR_BGR2GRAY ); cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); cv::ocl::equalizeHist( smallImg, smallImg ); - CvSeq* _objects; - MemStorage storage(cvCreateMemStorage(0)); - t = (double)cvGetTickCount(); - _objects = cascade.oclHaarDetectObjects( smallImg, storage, 1.1, + cascade.detectMultiScale( smallImg, faces, 1.1, 3, 0 |CV_HAAR_SCALE_IMAGE , Size(30,30), Size(0, 0) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - faces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); - t = (double)cvGetTickCount() - t; - printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); + if(calTime) workEnd(); +} + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime) +{ + if(calTime) workBegin(); + Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); + cvtColor(img, cpu_gray, CV_BGR2GRAY); + resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR); + equalizeHist(cpu_smallImg, cpu_smallImg); + cascade.detectMultiScale(cpu_smallImg, faces, 1.1, + 3, 0 | CV_HAAR_SCALE_IMAGE, + Size(30, 30), Size(0, 0)); + if(calTime) workEnd(); +} + +void Draw(Mat& img, vector& faces, double scale) +{ + int i = 0; for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) { - Mat smallImgROI; Point center; Scalar color = colors[i%8]; int radius; @@ -236,4 +241,43 @@ void detectAndDraw( Mat& img, } cv::imshow( "result", img ); } + +double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + else + { + cv::Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + cv::Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = cv::countNonZero(cpu_result > 0); + + cv::Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + cv::Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = cv::countNonZero(result_ > 0); + + final_test_result = 1.0 - (double)result/(double)cpu_area; + } + return final_test_result; +} #endif diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index 8a310e4e3e..daff267718 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -46,7 +46,6 @@ public: bool gamma_corr; }; - class App { public: @@ -65,6 +64,13 @@ public: string message() const; +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else if will return +// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) + double checkRectSimilarity(Size sz, + std::vector& cpu_rst, + std::vector& gpu_rst); private: App operator=(App&); @@ -291,6 +297,7 @@ void App::run() ocl::oclMat gpu_img; // Iterate over all frames + bool verify = false; while (running && !frame.empty()) { workBegin(); @@ -317,7 +324,18 @@ void App::run() gpu_img.upload(img); gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); - } + if (!verify) + { + // verify if GPU output same objects with CPU at 1st run + verify = true; + vector ref_rst; + cvtColor(img, img, COLOR_BGRA2BGR); + cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride, + Size(0, 0), scale, gr_threshold-2); + double accuracy = checkRectSimilarity(img.size(), ref_rst, found); + cout << "\naccuracy value: " << accuracy << endl; + } + } else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); hogWorkEnd(); @@ -458,3 +476,45 @@ inline string App::workFps() const return ss.str(); } +double App::checkRectSimilarity(Size sz, + std::vector& ob1, + std::vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + else + { + cv::Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + cv::Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = cv::countNonZero(cpu_result > 0); + + cv::Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + cv::Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = cv::countNonZero(result_ > 0); + + final_test_result = 1.0 - (double)result/(double)cpu_area; + } + return final_test_result; + +} + diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp new file mode 100644 index 0000000000..392d455851 --- /dev/null +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -0,0 +1,287 @@ +#include +#include +#include + +#include "opencv2/core/utility.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/video/video.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + +typedef unsigned char uchar; +#define LOOP_NUM 10 +int64 work_begin = 0; +int64 work_end = 0; + +static void workBegin() +{ + work_begin = getTickCount(); +} +static void workEnd() +{ + work_end += (getTickCount() - work_begin); +} +static double getTime(){ + return work_end * 1000. / getTickFrequency(); +} + +static void download(const oclMat& d_mat, vector& vec) +{ + vec.resize(d_mat.cols); + Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]); + d_mat.download(mat); +} + +static void download(const oclMat& d_mat, vector& vec) +{ + vec.resize(d_mat.cols); + Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]); + d_mat.download(mat); +} + +static void drawArrows(Mat& frame, const vector& prevPts, const vector& nextPts, const vector& status, Scalar line_color = Scalar(0, 0, 255)) +{ + for (size_t i = 0; i < prevPts.size(); ++i) + { + if (status[i]) + { + int line_thickness = 1; + + Point p = prevPts[i]; + Point q = nextPts[i]; + + double angle = atan2((double) p.y - q.y, (double) p.x - q.x); + + double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) ); + + if (hypotenuse < 1.0) + continue; + + // Here we lengthen the arrow by a factor of three. + q.x = (int) (p.x - 3 * hypotenuse * cos(angle)); + q.y = (int) (p.y - 3 * hypotenuse * sin(angle)); + + // Now we draw the main line of the arrow. + line(frame, p, q, line_color, line_thickness); + + // Now draw the tips of the arrow. I do some scaling so that the + // tips look proportional to the main line of the arrow. + + p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4)); + p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4)); + line(frame, p, q, line_color, line_thickness); + + p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4)); + p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4)); + line(frame, p, q, line_color, line_thickness); + } + } +} + + +int main(int argc, const char* argv[]) +{ + static std::vector ocl_info; + ocl::getDevice(ocl_info); + //if you want to use undefault device, set it here + setDevice(ocl_info[0]); + + //set this to save kernel compile time from second time you run + ocl::setBinpath("./"); + const char* keys = + "{ help h | false | print help message }" + "{ left l | | specify left image }" + "{ right r | | specify right image }" + "{ camera c | 0 | enable camera capturing }" + "{ use_cpu s | false | use cpu or gpu to process the image }" + "{ video v | | use video as input }" + "{ points | 1000 | specify points count [GoodFeatureToTrack] }" + "{ min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; + + CommandLineParser cmd(argc, argv, keys); + + if (cmd.has("help")) + { + cmd.printMessage(); + return 0; + } + + bool defaultPicturesFail = false; + string fname0 = cmd.get("left"); + string fname1 = cmd.get("right"); + string vdofile = cmd.get("video"); + int points = cmd.get("points"); + double minDist = cmd.get("min_dist"); + bool useCPU = cmd.has("s"); + bool useCamera = cmd.has("c"); + int inputName = cmd.get("c"); + oclMat d_nextPts, d_status; + + Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE); + Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE); + PyrLKOpticalFlow d_pyrLK; + vector pts; + vector nextPts; + vector status; + vector err; + + if (frame0.empty() || frame1.empty()) + { + useCamera = true; + defaultPicturesFail = true; + VideoCapture capture(inputName); + if (!capture.isOpened()) + { + cout << "Can't load input images" << endl; + return -1; + } + } + + cout << "Points count : " << points << endl << endl; + + if (useCamera) + { + VideoCapture capture; + Mat frame, frameCopy; + Mat frame0Gray, frame1Gray; + Mat ptr0, ptr1; + + if(vdofile == "") + capture.open( inputName ); + else + capture.open(vdofile.c_str()); + + int c = inputName ; + if(!capture.isOpened()) + { + if(vdofile == "") + cout << "Capture from CAM " << c << " didn't work" << endl; + else + cout << "Capture from file " << vdofile << " failed" <= 0 ) + goto _cleanup_; + } + + waitKey(0); + +_cleanup_: + capture.release(); + } + else + { +nocamera: + for(int i = 0; i <= LOOP_NUM;i ++) + { + cout << "loop" << i << endl; + if (i > 0) workBegin(); + + cv::goodFeaturesToTrack(frame0, pts, points, 0.01, minDist); + + if (useCPU) + { + cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + } + else + { + oclMat d_prevPts(1, points, CV_32FC2, (void*)&pts[0]); + + d_pyrLK.sparse(oclMat(frame0), oclMat(frame1), d_prevPts, d_nextPts, d_status); + + download(d_prevPts, pts); + download(d_nextPts, nextPts); + download(d_status, status); + } + + if (i > 0 && i <= LOOP_NUM) + workEnd(); + + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + + cout << getTime() / LOOP_NUM << " ms" << endl; + + drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0)); + + imshow("PyrLK [Sparse]", frame0); + } + } + } + + waitKey(); + + return 0; +} diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp new file mode 100644 index 0000000000..8cc6530d50 --- /dev/null +++ b/samples/ocl/stereo_match.cpp @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include + +#include "opencv2/core/utility.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/highgui/highgui.hpp" + +using namespace cv; +using namespace std; +using namespace ocl; + +bool help_showed = false; + +struct Params +{ + Params(); + static Params read(int argc, char** argv); + + string left; + string right; + + string method_str() const + { + switch (method) + { + case BM: return "BM"; + case BP: return "BP"; + case CSBP: return "CSBP"; + } + return ""; + } + enum {BM, BP, CSBP} method; + int ndisp; // Max disparity + 1 + enum {GPU, CPU} type; +}; + + +struct App +{ + App(const Params& p); + void run(); + void handleKey(char key); + void printParams() const; + + void workBegin() { work_begin = getTickCount(); } + void workEnd() + { + int64 d = getTickCount() - work_begin; + double f = getTickFrequency(); + work_fps = f / d; + } + + string text() const + { + stringstream ss; + ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left) + << setprecision(4) << work_fps; + return ss.str(); + } +private: + Params p; + bool running; + + Mat left_src, right_src; + Mat left, right; + oclMat d_left, d_right; + + StereoBM_OCL bm; + StereoBeliefPropagation bp; + StereoConstantSpaceBP csbp; + + int64 work_begin; + double work_fps; +}; + +static void printHelp() +{ + cout << "Usage: stereo_match_gpu\n" + << "\t--left --right # must be rectified\n" + << "\t--method # BM | BP | CSBP\n" + << "\t--ndisp # number of disparity levels\n" + << "\t--type # cpu | CPU | gpu | GPU\n"; + help_showed = true; +} + +int main(int argc, char** argv) +{ + try + { + if (argc < 2) + { + printHelp(); + return 1; + } + + Params args = Params::read(argc, argv); + if (help_showed) + return -1; + + int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU }; + vector info; + + if(getDevice(info, flags[args.type]) == 0) + { + throw runtime_error("Error: Did not find a valid OpenCL device!"); + } + cout << "Device name:" << info[0].DeviceName[0] << endl; + + App app(args); + app.run(); + } + catch (const exception& e) + { + cout << "error: " << e.what() << endl; + } + return 0; +} + + +Params::Params() +{ + method = BM; + ndisp = 64; + type = GPU; +} + + +Params Params::read(int argc, char** argv) +{ + Params p; + + for (int i = 1; i < argc; i++) + { + if (string(argv[i]) == "--left") p.left = argv[++i]; + else if (string(argv[i]) == "--right") p.right = argv[++i]; + else if (string(argv[i]) == "--method") + { + if (string(argv[i + 1]) == "BM") p.method = BM; + else if (string(argv[i + 1]) == "BP") p.method = BP; + else if (string(argv[i + 1]) == "CSBP") p.method = CSBP; + else throw runtime_error("unknown stereo match method: " + string(argv[i + 1])); + i++; + } + else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]); + else if (string(argv[i]) == "--type") + { + string t(argv[++i]); + if (t == "cpu" || t == "CPU") + { + p.type = CPU; + } + else if (t == "gpu" || t == "GPU") + { + p.type = GPU; + } + else throw runtime_error("unknown device type: " + t); + } + else if (string(argv[i]) == "--help") printHelp(); + else throw runtime_error("unknown key: " + string(argv[i])); + } + + return p; +} + + +App::App(const Params& params) + : p(params), running(false) +{ + cout << "stereo_match_ocl sample\n"; + cout << "\nControls:\n" + << "\tesc - exit\n" + << "\tp - print current parameters\n" + << "\tg - convert source images into gray\n" + << "\tm - change stereo match method\n" + << "\ts - change Sobel prefiltering flag (for BM only)\n" + << "\t1/q - increase/decrease maximum disparity\n" + << "\t2/w - increase/decrease window size (for BM only)\n" + << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" + << "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; +} + + +void App::run() +{ + // Load images + left_src = imread(p.left); + right_src = imread(p.right); + if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\""); + if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\""); + + cvtColor(left_src, left, COLOR_BGR2GRAY); + cvtColor(right_src, right, COLOR_BGR2GRAY); + + d_left.upload(left); + d_right.upload(right); + + imshow("left", left); + imshow("right", right); + + // Set common parameters + bm.ndisp = p.ndisp; + bp.ndisp = p.ndisp; + csbp.ndisp = p.ndisp; + + cout << endl; + printParams(); + + running = true; + while (running) + { + + // Prepare disparity map of specified type + Mat disp; + oclMat d_disp; + workBegin(); + switch (p.method) + { + case Params::BM: + if (d_left.channels() > 1 || d_right.channels() > 1) + { + cout << "BM doesn't support color images\n"; + cvtColor(left_src, left, COLOR_BGR2GRAY); + cvtColor(right_src, right, COLOR_BGR2GRAY); + cout << "image_channels: " << left.channels() << endl; + d_left.upload(left); + d_right.upload(right); + imshow("left", left); + imshow("right", right); + } + bm(d_left, d_right, d_disp); + break; + case Params::BP: + bp(d_left, d_right, d_disp); + break; + case Params::CSBP: + csbp(d_left, d_right, d_disp); + break; + } + ocl::finish(); + workEnd(); + + // Show results + d_disp.download(disp); + if (p.method != Params::BM) + { + disp.convertTo(disp, 0); + } + putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255)); + imshow("disparity", disp); + + handleKey((char)waitKey(3)); + } +} + + +void App::printParams() const +{ + cout << "--- Parameters ---\n"; + cout << "image_size: (" << left.cols << ", " << left.rows << ")\n"; + cout << "image_channels: " << left.channels() << endl; + cout << "method: " << p.method_str() << endl + << "ndisp: " << p.ndisp << endl; + switch (p.method) + { + case Params::BM: + cout << "win_size: " << bm.winSize << endl; + cout << "prefilter_sobel: " << bm.preset << endl; + break; + case Params::BP: + cout << "iter_count: " << bp.iters << endl; + cout << "level_count: " << bp.levels << endl; + break; + case Params::CSBP: + cout << "iter_count: " << csbp.iters << endl; + cout << "level_count: " << csbp.levels << endl; + break; + } + cout << endl; +} + + +void App::handleKey(char key) +{ + switch (key) + { + case 27: + running = false; + break; + case 'p': case 'P': + printParams(); + break; + case 'g': case 'G': + if (left.channels() == 1 && p.method != Params::BM) + { + left = left_src; + right = right_src; + } + else + { + cvtColor(left_src, left, COLOR_BGR2GRAY); + cvtColor(right_src, right, COLOR_BGR2GRAY); + } + d_left.upload(left); + d_right.upload(right); + cout << "image_channels: " << left.channels() << endl; + imshow("left", left); + imshow("right", right); + break; + case 'm': case 'M': + switch (p.method) + { + case Params::BM: + p.method = Params::BP; + break; + case Params::BP: + p.method = Params::CSBP; + break; + case Params::CSBP: + p.method = Params::BM; + break; + } + cout << "method: " << p.method_str() << endl; + break; + case 's': case 'S': + if (p.method == Params::BM) + { + switch (bm.preset) + { + case StereoBM_OCL::BASIC_PRESET: + bm.preset = StereoBM_OCL::PREFILTER_XSOBEL; + break; + case StereoBM_OCL::PREFILTER_XSOBEL: + bm.preset = StereoBM_OCL::BASIC_PRESET; + break; + } + cout << "prefilter_sobel: " << bm.preset << endl; + } + break; + case '1': + p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8; + cout << "ndisp: " << p.ndisp << endl; + bm.ndisp = p.ndisp; + bp.ndisp = p.ndisp; + csbp.ndisp = p.ndisp; + break; + case 'q': case 'Q': + p.ndisp = max(p.ndisp - 8, 1); + cout << "ndisp: " << p.ndisp << endl; + bm.ndisp = p.ndisp; + bp.ndisp = p.ndisp; + csbp.ndisp = p.ndisp; + break; + case '2': + if (p.method == Params::BM) + { + bm.winSize = min(bm.winSize + 1, 51); + cout << "win_size: " << bm.winSize << endl; + } + break; + case 'w': case 'W': + if (p.method == Params::BM) + { + bm.winSize = max(bm.winSize - 1, 2); + cout << "win_size: " << bm.winSize << endl; + } + break; + case '3': + if (p.method == Params::BP) + { + bp.iters += 1; + cout << "iter_count: " << bp.iters << endl; + } + else if (p.method == Params::CSBP) + { + csbp.iters += 1; + cout << "iter_count: " << csbp.iters << endl; + } + break; + case 'e': case 'E': + if (p.method == Params::BP) + { + bp.iters = max(bp.iters - 1, 1); + cout << "iter_count: " << bp.iters << endl; + } + else if (p.method == Params::CSBP) + { + csbp.iters = max(csbp.iters - 1, 1); + cout << "iter_count: " << csbp.iters << endl; + } + break; + case '4': + if (p.method == Params::BP) + { + bp.levels += 1; + cout << "level_count: " << bp.levels << endl; + } + else if (p.method == Params::CSBP) + { + csbp.levels += 1; + cout << "level_count: " << csbp.levels << endl; + } + break; + case 'r': case 'R': + if (p.method == Params::BP) + { + bp.levels = max(bp.levels - 1, 1); + cout << "level_count: " << bp.levels << endl; + } + else if (p.method == Params::CSBP) + { + csbp.levels = max(csbp.levels - 1, 1); + cout << "level_count: " << csbp.levels << endl; + } + break; + } +} + + diff --git a/samples/python2/grabcut.py b/samples/python2/grabcut.py new file mode 100644 index 0000000000..9fc1280acf --- /dev/null +++ b/samples/python2/grabcut.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +''' +=============================================================================== +Interactive Image Segmentation using GrabCut algorithm. + +This sample shows interactive image segmentation using grabcut algorithm. + +USAGE : + python grabcut.py + +README FIRST: + Two windows will show up, one for input and one for output. + + At first, in input window, draw a rectangle around the object using +mouse right button. Then press 'n' to segment the object (once or a few times) +For any finer touch-ups, you can press any of the keys below and draw lines on +the areas you want. Then again press 'n' for updating the output. + +Key '0' - To select areas of sure background +Key '1' - To select areas of sure foreground +Key '2' - To select areas of probable background +Key '3' - To select areas of probable foreground + +Key 'n' - To update the segmentation +Key 'r' - To reset the setup +Key 's' - To save the results +=============================================================================== +''' + +import numpy as np +import cv2 +import sys + +BLUE = [255,0,0] # rectangle color +RED = [0,0,255] # PR BG +GREEN = [0,255,0] # PR FG +BLACK = [0,0,0] # sure BG +WHITE = [255,255,255] # sure FG + +DRAW_BG = {'color' : BLACK, 'val' : 0} +DRAW_FG = {'color' : WHITE, 'val' : 1} +DRAW_PR_FG = {'color' : GREEN, 'val' : 3} +DRAW_PR_BG = {'color' : RED, 'val' : 2} + +# setting up flags +rect = (0,0,1,1) +drawing = False # flag for drawing curves +rectangle = False # flag for drawing rect +rect_over = False # flag to check if rect drawn +rect_or_mask = 100 # flag for selecting rect or mask mode +value = DRAW_FG # drawing initialized to FG +thickness = 3 # brush thickness + +def onmouse(event,x,y,flags,param): + global img,img2,drawing,value,mask,rectangle,rect,rect_or_mask,ix,iy,rect_over + + # Draw Rectangle + if event == cv2.EVENT_RBUTTONDOWN: + rectangle = True + ix,iy = x,y + + elif event == cv2.EVENT_MOUSEMOVE: + if rectangle == True: + img = img2.copy() + cv2.rectangle(img,(ix,iy),(x,y),BLUE,2) + rect = (ix,iy,abs(ix-x),abs(iy-y)) + rect_or_mask = 0 + + elif event == cv2.EVENT_RBUTTONUP: + rectangle = False + rect_over = True + cv2.rectangle(img,(ix,iy),(x,y),BLUE,2) + rect = (ix,iy,abs(ix-x),abs(iy-y)) + rect_or_mask = 0 + print " Now press the key 'n' a few times until no further change \n" + + # draw touchup curves + + if event == cv2.EVENT_LBUTTONDOWN: + if rect_over == False: + print "first draw rectangle \n" + else: + drawing = True + cv2.circle(img,(x,y),thickness,value['color'],-1) + cv2.circle(mask,(x,y),thickness,value['val'],-1) + + elif event == cv2.EVENT_MOUSEMOVE: + if drawing == True: + cv2.circle(img,(x,y),thickness,value['color'],-1) + cv2.circle(mask,(x,y),thickness,value['val'],-1) + + elif event == cv2.EVENT_LBUTTONUP: + if drawing == True: + drawing = False + cv2.circle(img,(x,y),thickness,value['color'],-1) + cv2.circle(mask,(x,y),thickness,value['val'],-1) + +# print documentation +print __doc__ + +# Loading images +if len(sys.argv) == 2: + filename = sys.argv[1] # for drawing purposes +else: + print "No input image given, so loading default image, lena.jpg \n" + print "Correct Usage : python grabcut.py \n" + filename = '../cpp/lena.jpg' + +img = cv2.imread(filename) +img2 = img.copy() # a copy of original image +mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG +output = np.zeros(img.shape,np.uint8) # output image to be shown + +# input and output windows +cv2.namedWindow('output') +cv2.namedWindow('input') +cv2.setMouseCallback('input',onmouse) +cv2.moveWindow('input',img.shape[1]+10,90) + +print " Instructions : \n" +print " Draw a rectangle around the object using right mouse button \n" + +while(1): + + cv2.imshow('output',output) + cv2.imshow('input',img) + k = 0xFF & cv2.waitKey(1) + + # key bindings + if k == 27: # esc to exit + break + elif k == ord('0'): # BG drawing + print " mark background regions with left mouse button \n" + value = DRAW_BG + elif k == ord('1'): # FG drawing + print " mark foreground regions with left mouse button \n" + value = DRAW_FG + elif k == ord('2'): # PR_BG drawing + value = DRAW_PR_BG + elif k == ord('3'): # PR_FG drawing + value = DRAW_PR_FG + elif k == ord('s'): # save image + bar = np.zeros((img.shape[0],5,3),np.uint8) + res = np.hstack((img2,bar,img,bar,output)) + cv2.imwrite('grabcut_output.png',res) + print " Result saved as image \n" + elif k == ord('r'): # reset everything + print "resetting \n" + rect = (0,0,1,1) + drawing = False + rectangle = False + rect_or_mask = 100 + rect_over = False + value = DRAW_FG + img = img2.copy() + mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG + output = np.zeros(img.shape,np.uint8) # output image to be shown + elif k == ord('n'): # segment the image + print """ For finer touchups, mark foreground and background after pressing keys 0-3 + and again press 'n' \n""" + if (rect_or_mask == 0): # grabcut with rect + bgdmodel = np.zeros((1,65),np.float64) + fgdmodel = np.zeros((1,65),np.float64) + cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_RECT) + rect_or_mask = 1 + elif rect_or_mask == 1: # grabcut with mask + bgdmodel = np.zeros((1,65),np.float64) + fgdmodel = np.zeros((1,65),np.float64) + cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK) + + mask2 = np.where((mask==1) + (mask==3),255,0).astype('uint8') + output = cv2.bitwise_and(img2,img2,mask=mask2) + +cv2.destroyAllWindows()