* #1538 from StevenPuttemans:bugfix_3283 * #1545 from alalek:ocl_test_fix_rng * #1551 from alalek:cmake_install_win * #1570 from ilya-lavrenov:ipp_warn_fix * #1573 from alalek:perf_simple_strategy * #1574 from alalek:svm_workaround * #1576 from alalek:ocl_fix_cl_double * #1577 from ilya-lavrenov:ocl_setto_opencl12 * #1578 from asmorkalov:android_fd_cp_fix * #1579 from ilya-lavrenov:ocl_norm * #1582 from sperrholz:ocl-arithm-additions * #1586 from ilya-lavrenov:ocl_setto_win_fix * #1589 from ilya-lavrenov:pr1582_fix * #1591 from alalek:ocl_remove_cl_hpp_h * #1592 from alalek:ocl_program_cache_update * #1593 from ilya-lavrenov:ocl_war_on_double * #1594 from ilya-lavrenov:ocl_perf * #1595 from alalek:cl_code_cleanup * #1596 from alalek:test_fix_run_py * #1597 from alalek:ocl_fix_cleanup * #1598 from alalek:ocl_fix_build_mac * #1599 from ilya-lavrenov:ocl_mac_kernel_warnings * #1601 from ilya-lavrenov:ocl_fix_tvl1_and_sparse * #1602 from alalek:ocl_test_dump_info * #1603 from ilya-lavrenov:ocl_disable_svm_noblas * #1605 from alalek:ocl_fixes * #1606 from ilya-lavrenov:ocl_imgproc * #1607 from ilya-lavrenov:ocl_fft_cleanup * #1608 from alalek:fix_warn_upd_haar * #1609 from ilya-lavrenov:ocl_some_optimization * #1610 from alalek:ocl_fix_perf_kalman * #1612 from alalek:ocl_fix_string_info * #1614 from ilya-lavrenov:ocl_svm_misprint * #1616 from ilya-lavrenov:ocl_cvtColor * #1617 from ilya-lavrenov:ocl_info * #1622 from a0byte:2.4 * #1625 from ilya-lavrenov:to_string Conflicts: cmake/OpenCVConfig.cmake cmake/OpenCVDetectPython.cmake cmake/OpenCVGenConfig.cmake modules/core/CMakeLists.txt modules/nonfree/src/surf.ocl.cpp modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/include/opencv2/ocl/private/util.hpp modules/ocl/perf/main.cpp modules/ocl/src/arithm.cpp modules/ocl/src/cl_operations.cpp modules/ocl/src/cl_programcache.cpp modules/ocl/src/color.cpp modules/ocl/src/fft.cpp modules/ocl/src/filtering.cpp modules/ocl/src/gemm.cpp modules/ocl/src/haar.cpp modules/ocl/src/imgproc.cpp modules/ocl/src/matrix_operations.cpp modules/ocl/src/pyrlk.cpp modules/ocl/src/split_merge.cpp modules/ocl/src/svm.cpp modules/ocl/test/main.cpp modules/ocl/test/test_fft.cpp modules/ocl/test/test_moments.cpp modules/ocl/test/test_objdetect.cpp modules/ocl/test/test_optflow.cpp modules/ocl/test/utility.hpp modules/python/CMakeLists.txt modules/ts/include/opencv2/ts.hpp modules/ts/src/ts_perf.cpp samples/android/face-detection/jni/DetectionBasedTracker_jni.cpppull/1631/head
commit
e845184843
124 changed files with 3137 additions and 3188 deletions
@ -0,0 +1,166 @@ |
||||
# =================================================================================== |
||||
# The OpenCV CMake configuration file |
||||
# |
||||
# ** File generated automatically, do not modify ** |
||||
# |
||||
# Usage from an external project: |
||||
# In your CMakeLists.txt, add these lines: |
||||
# |
||||
# FIND_PACKAGE(OpenCV REQUIRED) |
||||
# TARGET_LINK_LIBRARIES(MY_TARGET_NAME ${OpenCV_LIBS}) |
||||
# |
||||
# Or you can search for specific OpenCV modules: |
||||
# |
||||
# FIND_PACKAGE(OpenCV REQUIRED core highgui) |
||||
# |
||||
# If the module is found then OPENCV_<MODULE>_FOUND is set to TRUE. |
||||
# |
||||
# This file will define the following variables: |
||||
# - OpenCV_LIBS : The list of libraries to links against. |
||||
# - OpenCV_LIB_DIR : The directory(es) where lib files are. Calling LINK_DIRECTORIES |
||||
# with this path is NOT needed. |
||||
# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. |
||||
# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability |
||||
# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API |
||||
# - OpenCV_VERSION : The version of this OpenCV build. Example: "2.4.0" |
||||
# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION. Example: "2" |
||||
# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION. Example: "4" |
||||
# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION. Example: "0" |
||||
# |
||||
# Advanced variables: |
||||
# - OpenCV_SHARED |
||||
# - OpenCV_CONFIG_PATH |
||||
# - OpenCV_LIB_COMPONENTS |
||||
# |
||||
# =================================================================================== |
||||
# |
||||
# Windows pack specific options: |
||||
# - OpenCV_STATIC |
||||
# - OpenCV_CUDA |
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER 2.6) |
||||
get_property(OpenCV_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) |
||||
if(NOT ";${OpenCV_LANGUAGES};" MATCHES ";CXX;") |
||||
enable_language(CXX) |
||||
endif() |
||||
endif() |
||||
|
||||
if(NOT DEFINED OpenCV_STATIC) |
||||
# look for global setting |
||||
if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) |
||||
set(OpenCV_STATIC OFF) |
||||
else() |
||||
set(OpenCV_STATIC ON) |
||||
endif() |
||||
endif() |
||||
|
||||
if(NOT DEFINED OpenCV_CUDA) |
||||
# if user' app uses CUDA, then it probably wants CUDA-enabled OpenCV binaries |
||||
if(CUDA_FOUND) |
||||
set(OpenCV_CUDA ON) |
||||
endif() |
||||
endif() |
||||
|
||||
if(MSVC) |
||||
if(CMAKE_CL_64) |
||||
set(OpenCV_ARCH x64) |
||||
set(OpenCV_TBB_ARCH intel64) |
||||
else() |
||||
set(OpenCV_ARCH x86) |
||||
set(OpenCV_TBB_ARCH ia32) |
||||
endif() |
||||
if(MSVC_VERSION EQUAL 1400) |
||||
set(OpenCV_RUNTIME vc8) |
||||
elseif(MSVC_VERSION EQUAL 1500) |
||||
set(OpenCV_RUNTIME vc9) |
||||
elseif(MSVC_VERSION EQUAL 1600) |
||||
set(OpenCV_RUNTIME vc10) |
||||
elseif(MSVC_VERSION EQUAL 1700) |
||||
set(OpenCV_RUNTIME vc11) |
||||
endif() |
||||
elseif(MINGW) |
||||
set(OpenCV_RUNTIME mingw) |
||||
|
||||
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine |
||||
OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE |
||||
OUTPUT_STRIP_TRAILING_WHITESPACE) |
||||
if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64") |
||||
set(MINGW64 1) |
||||
set(OpenCV_ARCH x64) |
||||
else() |
||||
set(OpenCV_ARCH x86) |
||||
endif() |
||||
endif() |
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER 2.6.2) |
||||
unset(OpenCV_CONFIG_PATH CACHE) |
||||
endif() |
||||
|
||||
if(NOT OpenCV_FIND_QUIETLY) |
||||
message(STATUS "OpenCV ARCH: ${OpenCV_ARCH}") |
||||
message(STATUS "OpenCV RUNTIME: ${OpenCV_RUNTIME}") |
||||
message(STATUS "OpenCV STATIC: ${OpenCV_STATIC}") |
||||
endif() |
||||
|
||||
get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE) |
||||
if(OpenCV_RUNTIME AND OpenCV_ARCH) |
||||
if(OpenCV_STATIC AND EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake") |
||||
if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake") |
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib") |
||||
else() |
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib") |
||||
endif() |
||||
elseif(EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake") |
||||
if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake") |
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") |
||||
else() |
||||
set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") |
||||
endif() |
||||
endif() |
||||
endif() |
||||
|
||||
if(OpenCV_LIB_PATH AND EXISTS "${OpenCV_LIB_PATH}/OpenCVConfig.cmake") |
||||
set(OpenCV_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release OpenCV libraries are located" FORCE) |
||||
set(OpenCV_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug OpenCV libraries are located" FORCE) |
||||
set(OpenCV_3RDPARTY_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release 3rdpaty OpenCV dependencies are located" FORCE) |
||||
set(OpenCV_3RDPARTY_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug 3rdpaty OpenCV dependencies are located" FORCE) |
||||
|
||||
include("${OpenCV_LIB_PATH}/OpenCVConfig.cmake") |
||||
|
||||
if(OpenCV_CUDA) |
||||
set(_OpenCV_LIBS "") |
||||
foreach(_lib ${OpenCV_LIBS}) |
||||
string(REPLACE "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}" "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}" _lib2 "${_lib}") |
||||
if(NOT EXISTS "${_lib}" AND EXISTS "${_lib2}") |
||||
list(APPEND _OpenCV_LIBS "${_lib2}") |
||||
else() |
||||
list(APPEND _OpenCV_LIBS "${_lib}") |
||||
endif() |
||||
endforeach() |
||||
set(OpenCV_LIBS ${_OpenCV_LIBS}) |
||||
endif() |
||||
set(OpenCV_FOUND TRUE CACHE BOOL "" FORCE) |
||||
set(OPENCV_FOUND TRUE CACHE BOOL "" FORCE) |
||||
|
||||
if(NOT OpenCV_FIND_QUIETLY) |
||||
message(STATUS "Found OpenCV ${OpenCV_VERSION} in ${OpenCV_LIB_PATH}") |
||||
if(NOT OpenCV_LIB_PATH MATCHES "/staticlib") |
||||
get_filename_component(_OpenCV_LIB_PATH "${OpenCV_LIB_PATH}/../bin" ABSOLUTE) |
||||
file(TO_NATIVE_PATH "${_OpenCV_LIB_PATH}" _OpenCV_LIB_PATH) |
||||
message(STATUS "You might need to add ${_OpenCV_LIB_PATH} to your PATH to be able to run your applications.") |
||||
if(OpenCV_LIB_PATH MATCHES "/gpu/") |
||||
string(REPLACE "\\gpu" "" _OpenCV_LIB_PATH2 "${_OpenCV_LIB_PATH}") |
||||
message(STATUS "GPU support is enabled so you might also need ${_OpenCV_LIB_PATH2} in your PATH (it must go after the ${_OpenCV_LIB_PATH}).") |
||||
endif() |
||||
endif() |
||||
endif() |
||||
else() |
||||
if(NOT OpenCV_FIND_QUIETLY) |
||||
message(WARNING |
||||
"Found OpenCV Windows Pack but it has not binaries compatible with your configuration. |
||||
You should manually point CMake variable OpenCV_DIR to your build of OpenCV library." |
||||
) |
||||
endif() |
||||
set(OpenCV_FOUND FALSE CACHE BOOL "" FORCE) |
||||
set(OPENCV_FOUND FALSE CACHE BOOL "" FORCE) |
||||
endif() |
@ -1,7 +1,8 @@ |
||||
if(NOT HAVE_OPENCL) |
||||
ocv_module_disable(ocl) |
||||
return() |
||||
endif() |
||||
|
||||
set(the_description "OpenCL-accelerated Computer Vision") |
||||
ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml) |
||||
ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml "${OPENCL_LIBRARIES}") |
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) |
||||
|
@ -0,0 +1,135 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined(DUMP_INFO_STDOUT) && !defined(DUMP_INFO_XML) |
||||
#error Invalid usage |
||||
#endif |
||||
|
||||
#if !defined(DUMP_INFO_STDOUT) |
||||
#define DUMP_INFO_STDOUT(...) |
||||
#endif |
||||
|
||||
#if !defined(DUMP_INFO_XML) |
||||
#define DUMP_INFO_XML(...) |
||||
#endif |
||||
|
||||
#include <sstream> |
||||
|
||||
static std::string bytesToStringRepr(size_t value) |
||||
{ |
||||
size_t b = value % 1024; |
||||
value /= 1024; |
||||
|
||||
size_t kb = value % 1024; |
||||
value /= 1024; |
||||
|
||||
size_t mb = value % 1024; |
||||
value /= 1024; |
||||
|
||||
size_t gb = value; |
||||
|
||||
std::ostringstream stream; |
||||
|
||||
if (gb > 0) |
||||
stream << gb << " GB "; |
||||
if (mb > 0) |
||||
stream << mb << " MB "; |
||||
if (kb > 0) |
||||
stream << kb << " kB "; |
||||
if (b > 0) |
||||
stream << b << " B"; |
||||
|
||||
return stream.str(); |
||||
} |
||||
|
||||
static void dumpOpenCLDevice() |
||||
{ |
||||
using namespace cv::ocl; |
||||
try |
||||
{ |
||||
const cv::ocl::DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); |
||||
|
||||
const char* deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU |
||||
? "CPU" : |
||||
(deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown"); |
||||
DUMP_INFO_STDOUT("Device type", deviceTypeStr); |
||||
DUMP_INFO_XML("cv_ocl_deviceType", deviceTypeStr); |
||||
|
||||
DUMP_INFO_STDOUT("Platform name", deviceInfo.platform->platformName); |
||||
DUMP_INFO_XML("cv_ocl_platformName", deviceInfo.platform->platformName); |
||||
|
||||
DUMP_INFO_STDOUT("Device name", deviceInfo.deviceName); |
||||
DUMP_INFO_XML("cv_ocl_deviceName", deviceInfo.deviceName); |
||||
|
||||
DUMP_INFO_STDOUT("Device version", deviceInfo.deviceVersion); |
||||
DUMP_INFO_XML("cv_ocl_deviceVersion", deviceInfo.deviceVersion); |
||||
|
||||
DUMP_INFO_STDOUT("Compute units", deviceInfo.maxComputeUnits); |
||||
DUMP_INFO_XML("cv_ocl_maxComputeUnits", deviceInfo.maxComputeUnits); |
||||
|
||||
DUMP_INFO_STDOUT("Max work group size", deviceInfo.maxWorkGroupSize); |
||||
DUMP_INFO_XML("cv_ocl_maxWorkGroupSize", deviceInfo.maxWorkGroupSize); |
||||
|
||||
std::string localMemorySizeStr = bytesToStringRepr(deviceInfo.localMemorySize); |
||||
DUMP_INFO_STDOUT("Local memory size", localMemorySizeStr.c_str()); |
||||
DUMP_INFO_XML("cv_ocl_localMemorySize", deviceInfo.localMemorySize); |
||||
|
||||
std::string maxMemAllocSizeStr = bytesToStringRepr(deviceInfo.maxMemAllocSize); |
||||
DUMP_INFO_STDOUT("Max memory allocation size", maxMemAllocSizeStr.c_str()); |
||||
DUMP_INFO_XML("cv_ocl_maxMemAllocSize", deviceInfo.maxMemAllocSize); |
||||
|
||||
const char* doubleSupportStr = deviceInfo.haveDoubleSupport ? "Yes" : "No"; |
||||
DUMP_INFO_STDOUT("Double support", doubleSupportStr); |
||||
DUMP_INFO_XML("cv_ocl_haveDoubleSupport", deviceInfo.haveDoubleSupport); |
||||
|
||||
const char* isUnifiedMemoryStr = deviceInfo.isUnifiedMemory ? "Yes" : "No"; |
||||
DUMP_INFO_STDOUT("Unified memory", isUnifiedMemoryStr); |
||||
DUMP_INFO_XML("cv_ocl_isUnifiedMemory", deviceInfo.isUnifiedMemory); |
||||
} |
||||
catch (...) |
||||
{ |
||||
DUMP_INFO_STDOUT("OpenCL device", "not available"); |
||||
DUMP_INFO_XML("cv_ocl", "not available"); |
||||
} |
||||
} |
||||
|
||||
#undef DUMP_INFO_STDOUT |
||||
#undef DUMP_INFO_XML |
@ -0,0 +1,115 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__ |
||||
#define __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__ |
||||
|
||||
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" |
||||
#include <vector> |
||||
#include <string> |
||||
|
||||
namespace cl_utils { |
||||
|
||||
inline cl_int getPlatforms(std::vector<cl_platform_id>& platforms) |
||||
{ |
||||
cl_uint n = 0; |
||||
|
||||
cl_int err = ::clGetPlatformIDs(0, NULL, &n); |
||||
if (err != CL_SUCCESS) |
||||
return err; |
||||
|
||||
platforms.clear(); platforms.resize(n); |
||||
err = ::clGetPlatformIDs(n, &platforms[0], NULL); |
||||
if (err != CL_SUCCESS) |
||||
return err; |
||||
|
||||
return CL_SUCCESS; |
||||
} |
||||
|
||||
inline cl_int getDevices(cl_platform_id platform, cl_device_type type, std::vector<cl_device_id>& devices) |
||||
{ |
||||
cl_uint n = 0; |
||||
|
||||
cl_int err = ::clGetDeviceIDs(platform, type, 0, NULL, &n); |
||||
if (err != CL_SUCCESS) |
||||
return err; |
||||
|
||||
devices.clear(); devices.resize(n); |
||||
err = ::clGetDeviceIDs(platform, type, n, &devices[0], NULL); |
||||
if (err != CL_SUCCESS) |
||||
return err; |
||||
|
||||
return CL_SUCCESS; |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
template <typename Functor, typename ObjectType, typename T> |
||||
inline cl_int getScalarInfo(Functor f, ObjectType obj, cl_uint name, T& param) |
||||
{ |
||||
return f(obj, name, sizeof(T), ¶m, NULL); |
||||
} |
||||
|
||||
template <typename Functor, typename ObjectType> |
||||
inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param) |
||||
{ |
||||
::size_t required; |
||||
cl_int err = f(obj, name, 0, NULL, &required); |
||||
if (err != CL_SUCCESS) |
||||
return err; |
||||
|
||||
param.clear(); |
||||
if (required > 0) |
||||
{ |
||||
std::vector<char> buf(required + 1, char(0)); |
||||
err = f(obj, name, required, &buf[0], NULL); |
||||
if (err != CL_SUCCESS) |
||||
return err; |
||||
param = &buf[0]; |
||||
} |
||||
|
||||
return CL_SUCCESS; |
||||
}; |
||||
|
||||
} // namespace cl_utils
|
||||
|
||||
#endif // __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__
|
File diff suppressed because it is too large
Load Diff
@ -1,764 +0,0 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Sen Liu, sen@multicorewareinc.com |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other oclMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#define BUFFER 256 |
||||
void reduce3(float val1, float val2, float val3, __local float *smem1, __local float *smem2, __local float *smem3, int tid) |
||||
{ |
||||
smem1[tid] = val1; |
||||
smem2[tid] = val2; |
||||
smem3[tid] = val3; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
#if BUFFER > 128 |
||||
|
||||
if (tid < 128) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 128]; |
||||
smem2[tid] = val2 += smem2[tid + 128]; |
||||
smem3[tid] = val3 += smem3[tid + 128]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
#endif |
||||
|
||||
#if BUFFER > 64 |
||||
|
||||
if (tid < 64) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 64]; |
||||
smem2[tid] = val2 += smem2[tid + 64]; |
||||
smem3[tid] = val3 += smem3[tid + 64]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
#endif |
||||
|
||||
if (tid < 32) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 32]; |
||||
smem2[tid] = val2 += smem2[tid + 32]; |
||||
smem3[tid] = val3 += smem3[tid + 32]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 16) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 16]; |
||||
smem2[tid] = val2 += smem2[tid + 16]; |
||||
smem3[tid] = val3 += smem3[tid + 16]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 8) |
||||
{ |
||||
volatile __local float *vmem1 = smem1; |
||||
volatile __local float *vmem2 = smem2; |
||||
volatile __local float *vmem3 = smem3; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8]; |
||||
vmem2[tid] = val2 += vmem2[tid + 8]; |
||||
vmem3[tid] = val3 += vmem3[tid + 8]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4]; |
||||
vmem2[tid] = val2 += vmem2[tid + 4]; |
||||
vmem3[tid] = val3 += vmem3[tid + 4]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2]; |
||||
vmem2[tid] = val2 += vmem2[tid + 2]; |
||||
vmem3[tid] = val3 += vmem3[tid + 2]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1]; |
||||
vmem2[tid] = val2 += vmem2[tid + 1]; |
||||
vmem3[tid] = val3 += vmem3[tid + 1]; |
||||
} |
||||
} |
||||
|
||||
void reduce2(float val1, float val2, __local float *smem1, __local float *smem2, int tid) |
||||
{ |
||||
smem1[tid] = val1; |
||||
smem2[tid] = val2; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
#if BUFFER > 128 |
||||
|
||||
if (tid < 128) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 128]; |
||||
smem2[tid] = val2 += smem2[tid + 128]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
#endif |
||||
|
||||
#if BUFFER > 64 |
||||
|
||||
if (tid < 64) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 64]; |
||||
smem2[tid] = val2 += smem2[tid + 64]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
#endif |
||||
|
||||
if (tid < 32) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 32]; |
||||
smem2[tid] = val2 += smem2[tid + 32]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 16) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 16]; |
||||
smem2[tid] = val2 += smem2[tid + 16]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 8) |
||||
{ |
||||
volatile __local float *vmem1 = smem1; |
||||
volatile __local float *vmem2 = smem2; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8]; |
||||
vmem2[tid] = val2 += vmem2[tid + 8]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4]; |
||||
vmem2[tid] = val2 += vmem2[tid + 4]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2]; |
||||
vmem2[tid] = val2 += vmem2[tid + 2]; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1]; |
||||
vmem2[tid] = val2 += vmem2[tid + 1]; |
||||
} |
||||
} |
||||
|
||||
void reduce1(float val1, __local float *smem1, int tid) |
||||
{ |
||||
smem1[tid] = val1; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
#if BUFFER > 128 |
||||
|
||||
if (tid < 128) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 128]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
#endif |
||||
|
||||
#if BUFFER > 64 |
||||
|
||||
if (tid < 64) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 64]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
#endif |
||||
|
||||
if (tid < 32) |
||||
{ |
||||
smem1[tid] = val1 += smem1[tid + 32]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 16) |
||||
{ |
||||
volatile __local float *vmem1 = smem1; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (tid < 8) |
||||
{ |
||||
volatile __local float *vmem1 = smem1; |
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8]; |
||||
vmem1[tid] = val1 += vmem1[tid + 4]; |
||||
vmem1[tid] = val1 += vmem1[tid + 2]; |
||||
vmem1[tid] = val1 += vmem1[tid + 1]; |
||||
} |
||||
} |
||||
|
||||
#define SCALE (1.0f / (1 << 20)) |
||||
#define THRESHOLD 0.01f |
||||
#define DIMENSION 21 |
||||
|
||||
float readImage2Df_C1(__global const float *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow) |
||||
{ |
||||
float2 coor = (float2)(x, y); |
||||
|
||||
int i0 = clamp((int)floor(coor.x), 0, cols - 1); |
||||
int j0 = clamp((int)floor(coor.y), 0, rows - 1); |
||||
int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1); |
||||
int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1); |
||||
float a = coor.x - floor(coor.x); |
||||
float b = coor.y - floor(coor.y); |
||||
|
||||
return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)] |
||||
+ a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)] |
||||
+ (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)] |
||||
+ a * b * image[mad24(j1, elemCntPerRow, i1)]; |
||||
} |
||||
|
||||
__kernel void lkSparse_C1_D5(__global const float *I, __global const float *J, |
||||
__global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err, |
||||
const int level, const int rows, const int cols, const int elemCntPerRow, |
||||
int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) |
||||
{ |
||||
__local float smem1[BUFFER]; |
||||
__local float smem2[BUFFER]; |
||||
__local float smem3[BUFFER]; |
||||
|
||||
float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1); |
||||
|
||||
const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); |
||||
|
||||
float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level)); |
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
prevPt -= c_halfWin; |
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives |
||||
|
||||
float A11 = 0; |
||||
float A12 = 0; |
||||
float A22 = 0; |
||||
|
||||
float I_patch[1][3]; |
||||
float dIdx_patch[1][3]; |
||||
float dIdy_patch[1][3]; |
||||
|
||||
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) |
||||
{ |
||||
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) |
||||
{ |
||||
float x = (prevPt.x + xBase); |
||||
float y = (prevPt.y + yBase); |
||||
|
||||
I_patch[i][j] = readImage2Df_C1(I, x, y, rows, cols, elemCntPerRow); |
||||
float dIdx = 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) - |
||||
(3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow)); |
||||
|
||||
float dIdy = 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) - |
||||
(3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow)); |
||||
|
||||
dIdx_patch[i][j] = dIdx; |
||||
dIdy_patch[i][j] = dIdy; |
||||
|
||||
A11 += dIdx * dIdx; |
||||
A12 += dIdx * dIdy; |
||||
A22 += dIdy * dIdy; |
||||
} |
||||
} |
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
A11 = smem1[0]; |
||||
A12 = smem2[0]; |
||||
A22 = smem3[0]; |
||||
|
||||
float D = A11 * A22 - A12 * A12; |
||||
|
||||
if (D < 1.192092896e-07f) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
D = 1.f / D; |
||||
|
||||
A11 *= D; |
||||
A12 *= D; |
||||
A22 *= D; |
||||
|
||||
float2 nextPt = nextPts[get_group_id(0)]; |
||||
nextPt = nextPt * 2.0f - c_halfWin; |
||||
|
||||
for (int k = 0; k < c_iters; ++k) |
||||
{ |
||||
if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
float b1 = 0; |
||||
float b2 = 0; |
||||
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float diff = (readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f; |
||||
|
||||
b1 += diff * dIdx_patch[i][j]; |
||||
b2 += diff * dIdy_patch[i][j]; |
||||
} |
||||
} |
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
b1 = smem1[0]; |
||||
b2 = smem2[0]; |
||||
|
||||
float2 delta; |
||||
delta.x = A12 * b2 - A22 * b1; |
||||
delta.y = A12 * b1 - A11 * b2; |
||||
|
||||
nextPt += delta; |
||||
|
||||
//if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD) |
||||
// break; |
||||
} |
||||
|
||||
float errval = 0.0f; |
||||
|
||||
if (calcErr) |
||||
{ |
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float diff = readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]; |
||||
|
||||
errval += fabs(diff); |
||||
} |
||||
} |
||||
|
||||
reduce1(errval, smem1, tid); |
||||
} |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
nextPt += c_halfWin; |
||||
|
||||
nextPts[get_group_id(0)] = nextPt; |
||||
|
||||
if (calcErr) |
||||
{ |
||||
err[get_group_id(0)] = smem1[0] / (c_winSize_x * c_winSize_y); |
||||
} |
||||
} |
||||
} |
||||
|
||||
float4 readImage2Df_C4(__global const float4 *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow) |
||||
{ |
||||
float2 coor = (float2)(x, y); |
||||
|
||||
int i0 = clamp((int)floor(coor.x), 0, cols - 1); |
||||
int j0 = clamp((int)floor(coor.y), 0, rows - 1); |
||||
int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1); |
||||
int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1); |
||||
float a = coor.x - floor(coor.x); |
||||
float b = coor.y - floor(coor.y); |
||||
|
||||
return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)] |
||||
+ a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)] |
||||
+ (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)] |
||||
+ a * b * image[mad24(j1, elemCntPerRow, i1)]; |
||||
} |
||||
|
||||
__kernel void lkSparse_C4_D5(__global const float *I, __global const float *J, |
||||
__global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err, |
||||
const int level, const int rows, const int cols, const int elemCntPerRow, |
||||
int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) |
||||
{ |
||||
__local float smem1[BUFFER]; |
||||
__local float smem2[BUFFER]; |
||||
__local float smem3[BUFFER]; |
||||
|
||||
float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1); |
||||
|
||||
const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); |
||||
|
||||
float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level)); |
||||
|
||||
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
prevPt -= c_halfWin; |
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives |
||||
|
||||
float A11 = 0; |
||||
float A12 = 0; |
||||
float A22 = 0; |
||||
|
||||
float4 I_patch[1][3]; |
||||
float4 dIdx_patch[1][3]; |
||||
float4 dIdy_patch[1][3]; |
||||
|
||||
__global float4 *ptrI = (__global float4 *)I; |
||||
|
||||
for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) |
||||
{ |
||||
for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) |
||||
{ |
||||
float x = (prevPt.x + xBase); |
||||
float y = (prevPt.y + yBase); |
||||
|
||||
I_patch[i][j] = readImage2Df_C4(ptrI, x, y, rows, cols, elemCntPerRow); |
||||
|
||||
float4 dIdx = 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) - |
||||
(3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow)); |
||||
|
||||
float4 dIdy = 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) - |
||||
(3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow)); |
||||
|
||||
dIdx_patch[i][j] = dIdx; |
||||
dIdy_patch[i][j] = dIdy; |
||||
|
||||
A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z; |
||||
A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z; |
||||
A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z; |
||||
} |
||||
} |
||||
|
||||
reduce3(A11, A12, A22, smem1, smem2, smem3, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
A11 = smem1[0]; |
||||
A12 = smem2[0]; |
||||
A22 = smem3[0]; |
||||
|
||||
float D = A11 * A22 - A12 * A12; |
||||
//pD[get_group_id(0)] = D; |
||||
|
||||
if (D < 1.192092896e-07f) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
D = 1.f / D; |
||||
|
||||
A11 *= D; |
||||
A12 *= D; |
||||
A22 *= D; |
||||
|
||||
float2 nextPt = nextPts[get_group_id(0)]; |
||||
|
||||
nextPt = nextPt * 2.0f - c_halfWin; |
||||
|
||||
__global float4 *ptrJ = (__global float4 *)J; |
||||
|
||||
for (int k = 0; k < c_iters; ++k) |
||||
{ |
||||
if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) |
||||
{ |
||||
if (tid == 0 && level == 0) |
||||
{ |
||||
status[get_group_id(0)] = 0; |
||||
} |
||||
|
||||
return; |
||||
} |
||||
|
||||
float b1 = 0; |
||||
float b2 = 0; |
||||
|
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float4 diff = (readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f; |
||||
|
||||
b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z; |
||||
b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z; |
||||
} |
||||
} |
||||
|
||||
reduce2(b1, b2, smem1, smem2, tid); |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
b1 = smem1[0]; |
||||
b2 = smem2[0]; |
||||
|
||||
float2 delta; |
||||
delta.x = A12 * b2 - A22 * b1; |
||||
delta.y = A12 * b1 - A11 * b2; |
||||
|
||||
nextPt += delta; |
||||
|
||||
//if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD) |
||||
// break; |
||||
} |
||||
|
||||
float errval = 0.0f; |
||||
|
||||
if (calcErr) |
||||
{ |
||||
for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) |
||||
{ |
||||
for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) |
||||
{ |
||||
float4 diff = readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]; |
||||
|
||||
errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); |
||||
} |
||||
} |
||||
|
||||
reduce1(errval, smem1, tid); |
||||
} |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
nextPt += c_halfWin; |
||||
nextPts[get_group_id(0)] = nextPt; |
||||
|
||||
if (calcErr) |
||||
{ |
||||
err[get_group_id(0)] = smem1[0] / (3 * c_winSize_x * c_winSize_y); |
||||
} |
||||
} |
||||
} |
||||
|
||||
int readImage2Di_C1(__global const int *image, float2 coor, int2 size, const int elemCntPerRow) |
||||
{ |
||||
int i = clamp((int)floor(coor.x), 0, size.x - 1); |
||||
int j = clamp((int)floor(coor.y), 0, size.y - 1); |
||||
return image[mad24(j, elemCntPerRow, i)]; |
||||
} |
||||
|
||||
__kernel void lkDense_C1_D0(__global const int *I, __global const int *J, __global float *u, int uStep, __global float *v, int vStep, __global const float *prevU, int prevUStep, __global const float *prevV, int prevVStep, |
||||
const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ |
||||
const int elemCntPerRow, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) |
||||
{ |
||||
int c_halfWin_x = (c_winSize_x - 1) / 2; |
||||
int c_halfWin_y = (c_winSize_y - 1) / 2; |
||||
|
||||
const int patchWidth = get_local_size(0) + 2 * c_halfWin_x; |
||||
const int patchHeight = get_local_size(1) + 2 * c_halfWin_y; |
||||
|
||||
__local int smem[8192]; |
||||
|
||||
__local int *I_patch = smem; |
||||
__local int *dIdx_patch = I_patch + patchWidth * patchHeight; |
||||
__local int *dIdy_patch = dIdx_patch + patchWidth * patchHeight; |
||||
|
||||
const int xBase = get_group_id(0) * get_local_size(0); |
||||
const int yBase = get_group_id(1) * get_local_size(1); |
||||
int2 size = (int2)(cols, rows); |
||||
|
||||
for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1)) |
||||
{ |
||||
for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0)) |
||||
{ |
||||
float x = xBase - c_halfWin_x + j + 0.5f; |
||||
float y = yBase - c_halfWin_y + i + 0.5f; |
||||
|
||||
I_patch[i * patchWidth + j] = readImage2Di_C1(I, (float2)(x, y), size, elemCntPerRow); |
||||
|
||||
// Sharr Deriv |
||||
|
||||
dIdx_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x + 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) - |
||||
(3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x - 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow)); |
||||
|
||||
dIdy_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y + 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) - |
||||
(3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y - 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow)); |
||||
} |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
// extract the patch from the first image, compute covariation matrix of derivatives |
||||
|
||||
const int x = get_global_id(0); |
||||
const int y = get_global_id(1); |
||||
|
||||
if (x >= cols || y >= rows) |
||||
{ |
||||
return; |
||||
} |
||||
|
||||
int A11i = 0; |
||||
int A12i = 0; |
||||
int A22i = 0; |
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i) |
||||
{ |
||||
for (int j = 0; j < c_winSize_x; ++j) |
||||
{ |
||||
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
|
||||
A11i += dIdx * dIdx; |
||||
A12i += dIdx * dIdy; |
||||
A22i += dIdy * dIdy; |
||||
} |
||||
} |
||||
|
||||
float A11 = A11i; |
||||
float A12 = A12i; |
||||
float A22 = A22i; |
||||
|
||||
float D = A11 * A22 - A12 * A12; |
||||
|
||||
//if (calcErr && GET_MIN_EIGENVALS) |
||||
// (err + y * errStep)[x] = minEig; |
||||
|
||||
if (D < 1.192092896e-07f) |
||||
{ |
||||
//if (calcErr) |
||||
// err(y, x) = 3.402823466e+38f; |
||||
|
||||
return; |
||||
} |
||||
|
||||
D = 1.f / D; |
||||
|
||||
A11 *= D; |
||||
A12 *= D; |
||||
A22 *= D; |
||||
|
||||
float2 nextPt; |
||||
nextPt.x = x + prevU[y / 2 * prevUStep / 4 + x / 2] * 2.0f; |
||||
nextPt.y = y + prevV[y / 2 * prevVStep / 4 + x / 2] * 2.0f; |
||||
|
||||
for (int k = 0; k < c_iters; ++k) |
||||
{ |
||||
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows) |
||||
{ |
||||
//if (calcErr) |
||||
// err(y, x) = 3.402823466e+38f; |
||||
|
||||
return; |
||||
} |
||||
|
||||
int b1 = 0; |
||||
int b2 = 0; |
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i) |
||||
{ |
||||
for (int j = 0; j < c_winSize_x; ++j) |
||||
{ |
||||
int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; |
||||
int iJ = readImage2Di_C1(J, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f), size, elemCntPerRow); |
||||
|
||||
int diff = (iJ - iI) * 32; |
||||
|
||||
int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; |
||||
|
||||
b1 += diff * dIdx; |
||||
b2 += diff * dIdy; |
||||
} |
||||
} |
||||
|
||||
float2 delta; |
||||
delta.x = A12 * b2 - A22 * b1; |
||||
delta.y = A12 * b1 - A11 * b2; |
||||
|
||||
nextPt.x += delta.x; |
||||
nextPt.y += delta.y; |
||||
|
||||
if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) |
||||
{ |
||||
break; |
||||
} |
||||
} |
||||
|
||||
u[y * uStep / 4 + x] = nextPt.x - x; |
||||
v[y * vStep / 4 + x] = nextPt.y - y; |
||||
|
||||
if (calcErr) |
||||
{ |
||||
int errval = 0; |
||||
|
||||
for (int i = 0; i < c_winSize_y; ++i) |
||||
{ |
||||
for (int j = 0; j < c_winSize_x; ++j) |
||||
{ |
||||
int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; |
||||
int iJ = readImage2Di_C1(J, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f), size, elemCntPerRow); |
||||
|
||||
errval += abs(iJ - iI); |
||||
} |
||||
} |
||||
|
||||
//err[y * errStep / 4 + x] = static_cast<float>(errval) / (c_winSize_x * c_winSize_y); |
||||
} |
||||
} |
@ -0,0 +1,80 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem |
||||
|
||||
TEST(TestAPI, openCLExecuteKernelInterop) |
||||
{ |
||||
cv::RNG rng; |
||||
Size sz(10000, 1); |
||||
cv::Mat cpuMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false); |
||||
|
||||
cv::ocl::oclMat gpuMat(cpuMat); |
||||
cv::ocl::oclMat gpuMatDst(sz, CV_32FC4); |
||||
|
||||
const char* kernelStr = |
||||
"__kernel void test_kernel(__global float4* src, __global float4* dst) {\n" |
||||
" int x = get_global_id(0);\n" |
||||
" dst[x] = src[x];\n" |
||||
"}\n"; |
||||
|
||||
cv::ocl::ProgramSource program("test_interop", kernelStr); |
||||
|
||||
using namespace std; |
||||
vector<pair<size_t , const void *> > args; |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMat.data )); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMatDst.data )); |
||||
|
||||
size_t globalThreads[3] = { sz.width, 1, 1 }; |
||||
cv::ocl::openCLExecuteKernelInterop( |
||||
gpuMat.clCxt, |
||||
program, |
||||
"test_kernel", |
||||
globalThreads, NULL, args, |
||||
-1, -1, |
||||
""); |
||||
|
||||
cv::Mat dst; |
||||
gpuMatDst.download(dst); |
||||
|
||||
EXPECT_LE(checkNorm(cpuMat, dst), 1e-3); |
||||
} |
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue