mirror of https://github.com/opencv/opencv.git
commit
1ca8f33b4e
276 changed files with 11822 additions and 5158 deletions
@ -1,154 +1,104 @@ |
||||
if(APPLE) |
||||
set(OPENCL_FOUND YES) |
||||
set(OPENCL_LIBRARIES "-framework OpenCL") |
||||
else() |
||||
set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library") |
||||
set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory") |
||||
mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY) |
||||
else(APPLE) |
||||
find_package(OpenCL QUIET) |
||||
if(WITH_OPENCLAMDFFT) |
||||
set(CLAMDFFT_SEARCH_PATH $ENV{CLAMDFFT_PATH}) |
||||
if(NOT CLAMDFFT_SEARCH_PATH) |
||||
if(WIN32) |
||||
set( CLAMDFFT_SEARCH_PATH "C:\\Program Files (x86)\\AMD\\clAmdFft" ) |
||||
endif() |
||||
endif() |
||||
set( CLAMDFFT_INCLUDE_SEARCH_PATH ${CLAMDFFT_SEARCH_PATH}/include ) |
||||
if(UNIX) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(CLAMDFFT_LIB_SEARCH_PATH /usr/lib) |
||||
else() |
||||
set(CLAMDFFT_LIB_SEARCH_PATH /usr/lib64) |
||||
|
||||
if (NOT OPENCL_FOUND) |
||||
find_path(OPENCL_ROOT_DIR |
||||
NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h include/nvidia-current/CL/cl.h |
||||
PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV INTELOCLSDKROOT |
||||
DOC "OpenCL root directory" |
||||
NO_DEFAULT_PATH) |
||||
|
||||
find_path(OPENCL_INCLUDE_DIR |
||||
NAMES OpenCL/cl.h CL/cl.h |
||||
HINTS ${OPENCL_ROOT_DIR} |
||||
PATH_SUFFIXES include include/nvidia-current |
||||
DOC "OpenCL include directory") |
||||
|
||||
if (X86_64) |
||||
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64) |
||||
elseif (X86) |
||||
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86) |
||||
endif() |
||||
else() |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(CLAMDFFT_LIB_SEARCH_PATH ${CLAMDFFT_SEARCH_PATH}\\lib32\\import) |
||||
else() |
||||
set(CLAMDFFT_LIB_SEARCH_PATH ${CLAMDFFT_SEARCH_PATH}\\lib64\\import) |
||||
|
||||
find_library(OPENCL_LIBRARY |
||||
NAMES OpenCL |
||||
HINTS ${OPENCL_ROOT_DIR} |
||||
PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES} |
||||
DOC "OpenCL library") |
||||
|
||||
mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY) |
||||
include(FindPackageHandleStandardArgs) |
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPENCL DEFAULT_MSG OPENCL_LIBRARY OPENCL_INCLUDE_DIR ) |
||||
endif() |
||||
endif(APPLE) |
||||
|
||||
if(OPENCL_FOUND) |
||||
set(HAVE_OPENCL 1) |
||||
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) |
||||
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) |
||||
|
||||
if (X86_64) |
||||
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) |
||||
elseif (X86) |
||||
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) |
||||
endif() |
||||
|
||||
if(WITH_OPENCLAMDFFT) |
||||
find_path(CLAMDFFT_ROOT_DIR |
||||
NAMES include/clAmdFft.h |
||||
PATHS ENV CLAMDFFT_PATH ENV ProgramFiles |
||||
PATH_SUFFIXES clAmdFft AMD/clAmdFft |
||||
DOC "AMD FFT root directory" |
||||
NO_DEFAULT_PATH) |
||||
|
||||
find_path(CLAMDFFT_INCLUDE_DIR |
||||
NAMES clAmdFft.h |
||||
PATHS ${CLAMDFFT_INCLUDE_SEARCH_PATH} |
||||
PATH_SUFFIXES clAmdFft |
||||
NO_DEFAULT_PATH) |
||||
HINTS ${CLAMDFFT_ROOT_DIR} |
||||
PATH_SUFFIXES include |
||||
DOC "clAmdFft include directory") |
||||
|
||||
find_library(CLAMDFFT_LIBRARY |
||||
NAMES clAmdFft.Runtime |
||||
PATHS ${CLAMDFFT_LIB_SEARCH_PATH} |
||||
NO_DEFAULT_PATH) |
||||
if(CLAMDFFT_LIBRARY) |
||||
set(CLAMDFFT_LIBRARIES ${CLAMDFFT_LIBRARY}) |
||||
else() |
||||
set(CLAMDFFT_LIBRARIES "") |
||||
HINTS ${CLAMDFFT_ROOT_DIR} |
||||
PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES} |
||||
DOC "clAmdFft library") |
||||
|
||||
if(CLAMDFFT_LIBRARY AND CLAMDFFT_INCLUDE_DIR) |
||||
set(HAVE_CLAMDFFT 1) |
||||
list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDFFT_INCLUDE_DIR}") |
||||
list(APPEND OPENCL_LIBRARIES "${CLAMDFFT_LIBRARY}") |
||||
endif() |
||||
endif() |
||||
|
||||
if(WITH_OPENCLAMDBLAS) |
||||
set(CLAMDBLAS_SEARCH_PATH $ENV{CLAMDBLAS_PATH}) |
||||
if(NOT CLAMDBLAS_SEARCH_PATH) |
||||
if(WIN32) |
||||
set( CLAMDBLAS_SEARCH_PATH "C:\\Program Files (x86)\\AMD\\clAmdBlas" ) |
||||
endif() |
||||
endif() |
||||
set( CLAMDBLAS_INCLUDE_SEARCH_PATH ${CLAMDBLAS_SEARCH_PATH}/include ) |
||||
if(UNIX) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(CLAMDBLAS_LIB_SEARCH_PATH /usr/lib) |
||||
else() |
||||
set(CLAMDBLAS_LIB_SEARCH_PATH /usr/lib64) |
||||
endif() |
||||
else() |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(CLAMDBLAS_LIB_SEARCH_PATH ${CLAMDBLAS_SEARCH_PATH}\\lib32\\import) |
||||
else() |
||||
set(CLAMDBLAS_LIB_SEARCH_PATH ${CLAMDBLAS_SEARCH_PATH}\\lib64\\import) |
||||
endif() |
||||
endif() |
||||
find_path(CLAMDBLAS_ROOT_DIR |
||||
NAMES include/clAmdBlas.h |
||||
PATHS ENV CLAMDFFT_PATH ENV ProgramFiles |
||||
PATH_SUFFIXES clAmdBlas AMD/clAmdBlas |
||||
DOC "AMD FFT root directory" |
||||
NO_DEFAULT_PATH) |
||||
|
||||
find_path(CLAMDBLAS_INCLUDE_DIR |
||||
NAMES clAmdBlas.h |
||||
PATHS ${CLAMDBLAS_INCLUDE_SEARCH_PATH} |
||||
PATH_SUFFIXES clAmdBlas |
||||
NO_DEFAULT_PATH) |
||||
HINTS ${CLAMDBLAS_ROOT_DIR} |
||||
PATH_SUFFIXES include |
||||
DOC "clAmdFft include directory") |
||||
|
||||
find_library(CLAMDBLAS_LIBRARY |
||||
NAMES clAmdBlas |
||||
PATHS ${CLAMDBLAS_LIB_SEARCH_PATH} |
||||
NO_DEFAULT_PATH) |
||||
if(CLAMDBLAS_LIBRARY) |
||||
set(CLAMDBLAS_LIBRARIES ${CLAMDBLAS_LIBRARY}) |
||||
else() |
||||
set(CLAMDBLAS_LIBRARIES "") |
||||
endif() |
||||
endif() |
||||
# Try AMD/ATI Stream SDK |
||||
if (NOT OPENCL_FOUND) |
||||
set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT}) |
||||
set(ENV_AMDAPPSDKROOT $ENV{AMDAPPSDKROOT}) |
||||
set(ENV_OPENCLROOT $ENV{OPENCLROOT}) |
||||
set(ENV_CUDA_PATH $ENV{CUDA_PATH}) |
||||
set(ENV_INTELOCLSDKROOT $ENV{INTELOCLSDKROOT}) |
||||
if(ENV_AMDSTREAMSDKROOT) |
||||
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_AMDAPPSDKROOT}/include) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDAPPSDKROOT}/lib/x86) |
||||
else() |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDAPPSDKROOT}/lib/x86_64) |
||||
endif() |
||||
elseif(ENV_AMDSTREAMSDKROOT) |
||||
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_AMDSTREAMSDKROOT}/include) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDSTREAMSDKROOT}/lib/x86) |
||||
else() |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDSTREAMSDKROOT}/lib/x86_64) |
||||
endif() |
||||
elseif(ENV_CUDA_PATH AND WIN32) |
||||
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_CUDA_PATH}/include) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_CUDA_PATH}/lib/Win32) |
||||
else() |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_CUDA_PATH}/lib/x64) |
||||
endif() |
||||
elseif(ENV_OPENCLROOT AND UNIX) |
||||
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_OPENCLROOT}/inc) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} /usr/lib) |
||||
else() |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} /usr/lib64) |
||||
endif() |
||||
elseif(ENV_INTELOCLSDKROOT) |
||||
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_INTELOCLSDKROOT}/include) |
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4) |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x86) |
||||
else() |
||||
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x64) |
||||
endif() |
||||
endif() |
||||
|
||||
if(OPENCL_INCLUDE_SEARCH_PATH) |
||||
find_path(OPENCL_INCLUDE_DIR |
||||
NAMES CL/cl.h OpenCL/cl.h |
||||
PATHS ${OPENCL_INCLUDE_SEARCH_PATH} |
||||
NO_DEFAULT_PATH) |
||||
else() |
||||
find_path(OPENCL_INCLUDE_DIR |
||||
NAMES CL/cl.h OpenCL/cl.h) |
||||
endif() |
||||
|
||||
if(OPENCL_LIB_SEARCH_PATH) |
||||
find_library(OPENCL_LIBRARY NAMES OpenCL PATHS ${OPENCL_LIB_SEARCH_PATH} NO_DEFAULT_PATH) |
||||
else() |
||||
find_library(OPENCL_LIBRARY NAMES OpenCL) |
||||
endif() |
||||
|
||||
include(FindPackageHandleStandardArgs) |
||||
find_package_handle_standard_args( |
||||
OPENCL |
||||
DEFAULT_MSG |
||||
OPENCL_LIBRARY OPENCL_INCLUDE_DIR |
||||
) |
||||
HINTS ${CLAMDBLAS_ROOT_DIR} |
||||
PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES} |
||||
DOC "clAmdBlas library") |
||||
|
||||
if(OPENCL_FOUND) |
||||
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) |
||||
set(HAVE_OPENCL 1) |
||||
else() |
||||
set(OPENCL_LIBRARIES) |
||||
if(CLAMDBLAS_LIBRARY AND CLAMDBLAS_INCLUDE_DIR) |
||||
set(HAVE_CLAMDBLAS 1) |
||||
list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDBLAS_INCLUDE_DIR}") |
||||
list(APPEND OPENCL_LIBRARIES "${CLAMDBLAS_LIBRARY}") |
||||
endif() |
||||
else() |
||||
set(HAVE_OPENCL 1) |
||||
endif() |
||||
endif() |
||||
|
@ -0,0 +1,910 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2013, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* |
||||
* Redistributions of source code must retain the above copyright notice, |
||||
* this list of conditions and the following disclaimer. |
||||
* |
||||
* Redistributions in binary form must reproduce the above copyright notice, |
||||
* this list of conditions and the following disclaimer in the documentation |
||||
* and/or other materials provided with the distribution. |
||||
* |
||||
* Neither the name of NVIDIA Corporation nor the names of its contributors |
||||
* may be used to endorse or promote products derived from this software |
||||
* without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
#ifndef __OPENCV_GPU_SIMD_FUNCTIONS_HPP__ |
||||
#define __OPENCV_GPU_SIMD_FUNCTIONS_HPP__ |
||||
|
||||
#include "common.hpp" |
||||
|
||||
/*
|
||||
This header file contains inline functions that implement intra-word SIMD |
||||
operations, that are hardware accelerated on sm_3x (Kepler) GPUs. Efficient |
||||
emulation code paths are provided for earlier architectures (sm_1x, sm_2x) |
||||
to make the code portable across all GPUs supported by CUDA. The following |
||||
functions are currently implemented: |
||||
|
||||
vadd2(a,b) per-halfword unsigned addition, with wrap-around: a + b |
||||
vsub2(a,b) per-halfword unsigned subtraction, with wrap-around: a - b |
||||
vabsdiff2(a,b) per-halfword unsigned absolute difference: |a - b| |
||||
vavg2(a,b) per-halfword unsigned average: (a + b) / 2 |
||||
vavrg2(a,b) per-halfword unsigned rounded average: (a + b + 1) / 2 |
||||
vseteq2(a,b) per-halfword unsigned comparison: a == b ? 1 : 0 |
||||
vcmpeq2(a,b) per-halfword unsigned comparison: a == b ? 0xffff : 0 |
||||
vsetge2(a,b) per-halfword unsigned comparison: a >= b ? 1 : 0 |
||||
vcmpge2(a,b) per-halfword unsigned comparison: a >= b ? 0xffff : 0 |
||||
vsetgt2(a,b) per-halfword unsigned comparison: a > b ? 1 : 0 |
||||
vcmpgt2(a,b) per-halfword unsigned comparison: a > b ? 0xffff : 0 |
||||
vsetle2(a,b) per-halfword unsigned comparison: a <= b ? 1 : 0 |
||||
vcmple2(a,b) per-halfword unsigned comparison: a <= b ? 0xffff : 0 |
||||
vsetlt2(a,b) per-halfword unsigned comparison: a < b ? 1 : 0 |
||||
vcmplt2(a,b) per-halfword unsigned comparison: a < b ? 0xffff : 0 |
||||
vsetne2(a,b) per-halfword unsigned comparison: a != b ? 1 : 0 |
||||
vcmpne2(a,b) per-halfword unsigned comparison: a != b ? 0xffff : 0 |
||||
vmax2(a,b) per-halfword unsigned maximum: max(a, b) |
||||
vmin2(a,b) per-halfword unsigned minimum: min(a, b) |
||||
|
||||
vadd4(a,b) per-byte unsigned addition, with wrap-around: a + b |
||||
vsub4(a,b) per-byte unsigned subtraction, with wrap-around: a - b |
||||
vabsdiff4(a,b) per-byte unsigned absolute difference: |a - b| |
||||
vavg4(a,b) per-byte unsigned average: (a + b) / 2 |
||||
vavrg4(a,b) per-byte unsigned rounded average: (a + b + 1) / 2 |
||||
vseteq4(a,b) per-byte unsigned comparison: a == b ? 1 : 0 |
||||
vcmpeq4(a,b) per-byte unsigned comparison: a == b ? 0xff : 0 |
||||
vsetge4(a,b) per-byte unsigned comparison: a >= b ? 1 : 0 |
||||
vcmpge4(a,b) per-byte unsigned comparison: a >= b ? 0xff : 0 |
||||
vsetgt4(a,b) per-byte unsigned comparison: a > b ? 1 : 0 |
||||
vcmpgt4(a,b) per-byte unsigned comparison: a > b ? 0xff : 0 |
||||
vsetle4(a,b) per-byte unsigned comparison: a <= b ? 1 : 0 |
||||
vcmple4(a,b) per-byte unsigned comparison: a <= b ? 0xff : 0 |
||||
vsetlt4(a,b) per-byte unsigned comparison: a < b ? 1 : 0 |
||||
vcmplt4(a,b) per-byte unsigned comparison: a < b ? 0xff : 0 |
||||
vsetne4(a,b) per-byte unsigned comparison: a != b ? 1: 0 |
||||
vcmpne4(a,b) per-byte unsigned comparison: a != b ? 0xff: 0 |
||||
vmax4(a,b) per-byte unsigned maximum: max(a, b) |
||||
vmin4(a,b) per-byte unsigned minimum: min(a, b) |
||||
*/ |
||||
|
||||
namespace cv { namespace gpu { namespace device |
||||
{ |
||||
// 2
|
||||
|
||||
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s; |
||||
s = a ^ b; // sum bits
|
||||
r = a + b; // actual sum
|
||||
s = s ^ r; // determine carry-ins for each bit position
|
||||
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
|
||||
r = r - s; // subtract out carry-out from low word
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s; |
||||
s = a ^ b; // sum bits
|
||||
r = a - b; // actual sum
|
||||
s = s ^ r; // determine carry-ins for each bit position
|
||||
s = s & 0x00010000; // borrow to high word
|
||||
r = r + s; // compensate for borrow from low word
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s, t, u, v; |
||||
s = a & 0x0000ffff; // extract low halfword
|
||||
r = b & 0x0000ffff; // extract low halfword
|
||||
u = ::max(r, s); // maximum of low halfwords
|
||||
v = ::min(r, s); // minimum of low halfwords
|
||||
s = a & 0xffff0000; // extract high halfword
|
||||
r = b & 0xffff0000; // extract high halfword
|
||||
t = ::max(r, s); // maximum of high halfwords
|
||||
s = ::min(r, s); // minimum of high halfwords
|
||||
r = u | t; // maximum of both halfwords
|
||||
s = v | s; // minimum of both halfwords
|
||||
r = r - s; // |a - b| = max(a,b) - min(a,b);
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, s; |
||||
|
||||
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||
s = a ^ b; |
||||
r = a & b; |
||||
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
|
||||
s = s >> 1; |
||||
s = r + s; |
||||
|
||||
return s; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||
unsigned int s; |
||||
s = a ^ b; |
||||
r = a | b; |
||||
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
|
||||
s = s >> 1; |
||||
r = r - s; |
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c; |
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r & ~c; // msb = 1, if r was 0x0000
|
||||
r = c >> 15; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vseteq2(a, b); |
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r & ~c; // msb = 1, if r was 0x0000
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetge2(a, b); |
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msbs = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetgt2(a, b); |
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msbs = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetle2(a, b); |
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetlt2(a, b); |
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80008000; // msb = carry-outs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c; |
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r | c; // msb = 1, if r was not 0x0000
|
||||
c = c & 0x80008000; // extract msbs
|
||||
r = c >> 15; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetne2(a, b); |
||||
c = r << 16; // convert bool
|
||||
r = c - r; // into mask
|
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x0000 if a == b
|
||||
c = r | 0x80008000; // set msbs, to catch carry out
|
||||
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||
c = r | c; // msb = 1, if r was not 0x0000
|
||||
c = c & 0x80008000; // extract msbs
|
||||
r = c >> 15; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s, t, u; |
||||
r = a & 0x0000ffff; // extract low halfword
|
||||
s = b & 0x0000ffff; // extract low halfword
|
||||
t = ::max(r, s); // maximum of low halfwords
|
||||
r = a & 0xffff0000; // extract high halfword
|
||||
s = b & 0xffff0000; // extract high halfword
|
||||
u = ::max(r, s); // maximum of high halfwords
|
||||
r = t | u; // combine halfword maximums
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s, t, u; |
||||
r = a & 0x0000ffff; // extract low halfword
|
||||
s = b & 0x0000ffff; // extract low halfword
|
||||
t = ::min(r, s); // minimum of low halfwords
|
||||
r = a & 0xffff0000; // extract high halfword
|
||||
s = b & 0xffff0000; // extract high halfword
|
||||
u = ::min(r, s); // minimum of high halfwords
|
||||
r = t | u; // combine halfword minimums
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
// 4
|
||||
|
||||
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s, t; |
||||
s = a ^ b; // sum bits
|
||||
r = a & 0x7f7f7f7f; // clear msbs
|
||||
t = b & 0x7f7f7f7f; // clear msbs
|
||||
s = s & 0x80808080; // msb sum bits
|
||||
r = r + t; // add without msbs, record carry-out in msbs
|
||||
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
|
||||
#endif /* __CUDA_ARCH__ >= 300 */ |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s, t; |
||||
s = a ^ ~b; // inverted sum bits
|
||||
r = a | 0x80808080; // set msbs
|
||||
t = b & 0x7f7f7f7f; // clear msbs
|
||||
s = s & 0x80808080; // inverted msb sum bits
|
||||
r = r - t; // subtract w/o msbs, record inverted borrows in msb
|
||||
r = r ^ s; // combine inverted msb sum bits and borrows
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, s; |
||||
|
||||
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||
s = a ^ b; |
||||
r = a & b; |
||||
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||
s = s >> 1; |
||||
s = r + s; |
||||
|
||||
return s; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||
unsigned int c; |
||||
c = a ^ b; |
||||
r = a | b; |
||||
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||
c = c >> 1; |
||||
r = r - c; |
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c; |
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r & ~c; // msb = 1, if r was 0x00
|
||||
r = c >> 7; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, t; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vseteq4(a, b); |
||||
t = r << 8; // convert bool
|
||||
r = t - r; // to mask
|
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
t = a ^ b; // 0x00 if a == b
|
||||
r = t | 0x80808080; // set msbs, to catch carry out
|
||||
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
|
||||
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
|
||||
r = t & ~r; // msb = 1, if t was 0x00
|
||||
t = r >> 7; // build mask
|
||||
t = r - t; // from
|
||||
r = t | r; // msbs
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetle4(a, b); |
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||
c = c & 0x80808080; // msbs = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetlt4(a, b); |
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(a)); |
||||
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msbs = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, s; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetge4(a, b); |
||||
s = r << 8; // convert bool
|
||||
r = s - r; // to mask
|
||||
#else |
||||
asm ("not.b32 %0,%0;" : "+r"(b)); |
||||
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||
r = r & 0x80808080; // msb = carry-outs
|
||||
s = r >> 7; // build mask
|
||||
s = r - s; // from
|
||||
r = s | r; // msbs
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int c; |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetgt4(a, b); |
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else |
||||
asm("not.b32 %0, %0;" : "+r"(b)); |
||||
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||
c = c & 0x80808080; // msb = carry-outs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
unsigned int c; |
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r | c; // msb = 1, if r was not 0x00
|
||||
c = c & 0x80808080; // extract msbs
|
||||
r = c >> 7; // convert to bool
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r, c; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
r = vsetne4(a, b); |
||||
c = r << 8; // convert bool
|
||||
r = c - r; // to mask
|
||||
#else |
||||
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||
r = a ^ b; // 0x00 if a == b
|
||||
c = r | 0x80808080; // set msbs, to catch carry out
|
||||
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||
c = r | c; // msb = 1, if r was not 0x00
|
||||
c = c & 0x80808080; // extract msbs
|
||||
r = c >> 7; // convert
|
||||
r = c - r; // msbs to
|
||||
r = c | r; // mask
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s; |
||||
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||
r = a ^ b; //
|
||||
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
|
||||
r = s ^ r; // select a when b >= a, else select b => min(a,b)
|
||||
r = s - r; // |a - b| = max(a,b) - min(a,b);
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s; |
||||
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||
r = a & s; // select a when b >= a
|
||||
s = b & ~s; // select b when b < a
|
||||
r = r | s; // combine byte selections
|
||||
#endif |
||||
|
||||
return r; // byte-wise unsigned maximum
|
||||
} |
||||
|
||||
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b) |
||||
{ |
||||
unsigned int r = 0; |
||||
|
||||
#if __CUDA_ARCH__ >= 300 |
||||
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#elif __CUDA_ARCH__ >= 200 |
||||
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); |
||||
#else |
||||
unsigned int s; |
||||
s = vcmpge4(b, a); // mask = 0xff if a >= b
|
||||
r = a & s; // select a when b >= a
|
||||
s = b & ~s; // select b when b < a
|
||||
r = r | s; // combine byte selections
|
||||
#endif |
||||
|
||||
return r; |
||||
} |
||||
}}} |
||||
|
||||
#endif // __OPENCV_GPU_SIMD_FUNCTIONS_HPP__
|
@ -1,70 +1,5 @@ |
||||
#include "perf_precomp.hpp" |
||||
|
||||
static void printOsInfo() |
||||
{ |
||||
#if defined _WIN32 |
||||
# if defined _WIN64 |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"), fflush(stdout); |
||||
# else |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"), fflush(stdout); |
||||
# endif |
||||
#elif defined linux |
||||
# if defined _LP64 |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"), fflush(stdout); |
||||
# else |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"), fflush(stdout); |
||||
# endif |
||||
#elif defined __APPLE__ |
||||
# if defined _LP64 |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"), fflush(stdout); |
||||
# else |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"), fflush(stdout); |
||||
# endif |
||||
#endif |
||||
|
||||
} |
||||
|
||||
static void printCudaInfo() |
||||
{ |
||||
printOsInfo(); |
||||
#ifndef HAVE_CUDA |
||||
printf("[----------]\n[ GPU INFO ] \tOpenCV was built without CUDA support.\n[----------]\n"), fflush(stdout); |
||||
#else |
||||
int driver; |
||||
cudaDriverGetVersion(&driver); |
||||
|
||||
printf("[----------]\n"), fflush(stdout); |
||||
printf("[ GPU INFO ] \tCUDA Driver version: %d.\n", driver), fflush(stdout); |
||||
printf("[ GPU INFO ] \tCUDA Runtime version: %d.\n", CUDART_VERSION), fflush(stdout); |
||||
printf("[----------]\n"), fflush(stdout); |
||||
|
||||
printf("[----------]\n"), fflush(stdout); |
||||
printf("[ GPU INFO ] \tGPU module was compiled for the following GPU archs.\n"), fflush(stdout); |
||||
printf("[ BIN ] \t%s.\n", CUDA_ARCH_BIN), fflush(stdout); |
||||
printf("[ PTX ] \t%s.\n", CUDA_ARCH_PTX), fflush(stdout); |
||||
printf("[----------]\n"), fflush(stdout); |
||||
|
||||
printf("[----------]\n"), fflush(stdout); |
||||
int deviceCount = cv::gpu::getCudaEnabledDeviceCount(); |
||||
printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount), fflush(stdout); |
||||
printf("[----------]\n"), fflush(stdout); |
||||
|
||||
for (int i = 0; i < deviceCount; ++i) |
||||
{ |
||||
cv::gpu::DeviceInfo info(i); |
||||
|
||||
printf("[----------]\n"), fflush(stdout); |
||||
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout); |
||||
printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout); |
||||
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout); |
||||
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout); |
||||
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout); |
||||
if (!info.isCompatible()) |
||||
printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n"); |
||||
printf("[----------]\n"), fflush(stdout); |
||||
} |
||||
|
||||
#endif |
||||
} |
||||
using namespace perf; |
||||
|
||||
CV_PERF_TEST_MAIN(gpu, printCudaInfo()) |
||||
|
@ -1,184 +0,0 @@ |
||||
#include "perf_precomp.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
|
||||
Mat readImage(const string& fileName, int flags) |
||||
{ |
||||
return imread(perf::TestBase::getDataPath(fileName), flags); |
||||
} |
||||
|
||||
void PrintTo(const CvtColorInfo& info, ostream* os) |
||||
{ |
||||
static const char* str[] = |
||||
{ |
||||
"BGR2BGRA", |
||||
"BGRA2BGR", |
||||
"BGR2RGBA", |
||||
"RGBA2BGR", |
||||
"BGR2RGB", |
||||
"BGRA2RGBA", |
||||
|
||||
"BGR2GRAY", |
||||
"RGB2GRAY", |
||||
"GRAY2BGR", |
||||
"GRAY2BGRA", |
||||
"BGRA2GRAY", |
||||
"RGBA2GRAY", |
||||
|
||||
"BGR2BGR565", |
||||
"RGB2BGR565", |
||||
"BGR5652BGR", |
||||
"BGR5652RGB", |
||||
"BGRA2BGR565", |
||||
"RGBA2BGR565", |
||||
"BGR5652BGRA", |
||||
"BGR5652RGBA", |
||||
|
||||
"GRAY2BGR565", |
||||
"BGR5652GRAY", |
||||
|
||||
"BGR2BGR555", |
||||
"RGB2BGR555", |
||||
"BGR5552BGR", |
||||
"BGR5552RGB", |
||||
"BGRA2BGR555", |
||||
"RGBA2BGR555", |
||||
"BGR5552BGRA", |
||||
"BGR5552RGBA", |
||||
|
||||
"GRAY2BGR555", |
||||
"BGR5552GRAY", |
||||
|
||||
"BGR2XYZ", |
||||
"RGB2XYZ", |
||||
"XYZ2BGR", |
||||
"XYZ2RGB", |
||||
|
||||
"BGR2YCrCb", |
||||
"RGB2YCrCb", |
||||
"YCrCb2BGR", |
||||
"YCrCb2RGB", |
||||
|
||||
"BGR2HSV", |
||||
"RGB2HSV", |
||||
|
||||
"", |
||||
"", |
||||
|
||||
"BGR2Lab", |
||||
"RGB2Lab", |
||||
|
||||
"BayerBG2BGR", |
||||
"BayerGB2BGR", |
||||
"BayerRG2BGR", |
||||
"BayerGR2BGR", |
||||
|
||||
"BGR2Luv", |
||||
"RGB2Luv", |
||||
|
||||
"BGR2HLS", |
||||
"RGB2HLS", |
||||
|
||||
"HSV2BGR", |
||||
"HSV2RGB", |
||||
|
||||
"Lab2BGR", |
||||
"Lab2RGB", |
||||
"Luv2BGR", |
||||
"Luv2RGB", |
||||
|
||||
"HLS2BGR", |
||||
"HLS2RGB", |
||||
|
||||
"BayerBG2BGR_VNG", |
||||
"BayerGB2BGR_VNG", |
||||
"BayerRG2BGR_VNG", |
||||
"BayerGR2BGR_VNG", |
||||
|
||||
"BGR2HSV_FULL", |
||||
"RGB2HSV_FULL", |
||||
"BGR2HLS_FULL", |
||||
"RGB2HLS_FULL", |
||||
|
||||
"HSV2BGR_FULL", |
||||
"HSV2RGB_FULL", |
||||
"HLS2BGR_FULL", |
||||
"HLS2RGB_FULL", |
||||
|
||||
"LBGR2Lab", |
||||
"LRGB2Lab", |
||||
"LBGR2Luv", |
||||
"LRGB2Luv", |
||||
|
||||
"Lab2LBGR", |
||||
"Lab2LRGB", |
||||
"Luv2LBGR", |
||||
"Luv2LRGB", |
||||
|
||||
"BGR2YUV", |
||||
"RGB2YUV", |
||||
"YUV2BGR", |
||||
"YUV2RGB", |
||||
|
||||
"BayerBG2GRAY", |
||||
"BayerGB2GRAY", |
||||
"BayerRG2GRAY", |
||||
"BayerGR2GRAY", |
||||
|
||||
//YUV 4:2:0 formats family
|
||||
"YUV2RGB_NV12", |
||||
"YUV2BGR_NV12", |
||||
"YUV2RGB_NV21", |
||||
"YUV2BGR_NV21", |
||||
|
||||
"YUV2RGBA_NV12", |
||||
"YUV2BGRA_NV12", |
||||
"YUV2RGBA_NV21", |
||||
"YUV2BGRA_NV21", |
||||
|
||||
"YUV2RGB_YV12", |
||||
"YUV2BGR_YV12", |
||||
"YUV2RGB_IYUV", |
||||
"YUV2BGR_IYUV", |
||||
|
||||
"YUV2RGBA_YV12", |
||||
"YUV2BGRA_YV12", |
||||
"YUV2RGBA_IYUV", |
||||
"YUV2BGRA_IYUV", |
||||
|
||||
"YUV2GRAY_420", |
||||
|
||||
//YUV 4:2:2 formats family
|
||||
"YUV2RGB_UYVY", |
||||
"YUV2BGR_UYVY", |
||||
"YUV2RGB_VYUY", |
||||
"YUV2BGR_VYUY", |
||||
|
||||
"YUV2RGBA_UYVY", |
||||
"YUV2BGRA_UYVY", |
||||
"YUV2RGBA_VYUY", |
||||
"YUV2BGRA_VYUY", |
||||
|
||||
"YUV2RGB_YUY2", |
||||
"YUV2BGR_YUY2", |
||||
"YUV2RGB_YVYU", |
||||
"YUV2BGR_YVYU", |
||||
|
||||
"YUV2RGBA_YUY2", |
||||
"YUV2BGRA_YUY2", |
||||
"YUV2RGBA_YVYU", |
||||
"YUV2BGRA_YVYU", |
||||
|
||||
"YUV2GRAY_UYVY", |
||||
"YUV2GRAY_YUY2", |
||||
|
||||
// alpha premultiplication
|
||||
"RGBA2mRGBA", |
||||
"mRGBA2RGBA", |
||||
|
||||
"COLORCVT_MAX" |
||||
}; |
||||
|
||||
*os << str[info.code]; |
||||
} |
@ -1,63 +0,0 @@ |
||||
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__ |
||||
#define __OPENCV_PERF_GPU_UTILITY_HPP__ |
||||
|
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include "opencv2/ts/ts_perf.hpp" |
||||
|
||||
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR); |
||||
|
||||
using perf::MatType; |
||||
using perf::MatDepth; |
||||
|
||||
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP) |
||||
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all()) |
||||
|
||||
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA) |
||||
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all()) |
||||
|
||||
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX) |
||||
|
||||
enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 }; |
||||
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA) |
||||
#define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA)) |
||||
#define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR)) |
||||
|
||||
struct CvtColorInfo |
||||
{ |
||||
int scn; |
||||
int dcn; |
||||
int code; |
||||
|
||||
CvtColorInfo() {} |
||||
explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {} |
||||
}; |
||||
void PrintTo(const CvtColorInfo& info, std::ostream* os); |
||||
|
||||
#define GET_PARAM(k) std::tr1::get< k >(GetParam()) |
||||
|
||||
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name |
||||
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name |
||||
|
||||
DEF_PARAM_TEST_1(Sz, cv::Size); |
||||
typedef perf::Size_MatType Sz_Type; |
||||
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth); |
||||
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); |
||||
|
||||
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p) |
||||
|
||||
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy" |
||||
|
||||
#define GPU_SANITY_CHECK(mat, ...) \ |
||||
do{ \
|
||||
cv::Mat gpu_##mat(mat); \
|
||||
SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
|
||||
} while(0) |
||||
|
||||
#define CPU_SANITY_CHECK(mat, ...) \ |
||||
do{ \
|
||||
cv::Mat cpu_##mat(mat); \
|
||||
SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
|
||||
} while(0) |
||||
|
||||
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__
|
@ -1,407 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
using namespace cvtest; |
||||
using namespace testing; |
||||
using namespace testing::internal; |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// random generators
|
||||
|
||||
int randomInt(int minVal, int maxVal) |
||||
{ |
||||
RNG& rng = TS::ptr()->get_rng(); |
||||
return rng.uniform(minVal, maxVal); |
||||
} |
||||
|
||||
double randomDouble(double minVal, double maxVal) |
||||
{ |
||||
RNG& rng = TS::ptr()->get_rng(); |
||||
return rng.uniform(minVal, maxVal); |
||||
} |
||||
|
||||
Size randomSize(int minVal, int maxVal) |
||||
{ |
||||
return Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); |
||||
} |
||||
|
||||
Scalar randomScalar(double minVal, double maxVal) |
||||
{ |
||||
return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal)); |
||||
} |
||||
|
||||
Mat randomMat(Size size, int type, double minVal, double maxVal) |
||||
{ |
||||
return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GpuMat create
|
||||
|
||||
GpuMat createMat(Size size, int type, bool useRoi) |
||||
{ |
||||
Size size0 = size; |
||||
|
||||
if (useRoi) |
||||
{ |
||||
size0.width += randomInt(5, 15); |
||||
size0.height += randomInt(5, 15); |
||||
} |
||||
|
||||
GpuMat d_m(size0, type); |
||||
|
||||
if (size0 != size) |
||||
d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height)); |
||||
|
||||
return d_m; |
||||
} |
||||
|
||||
GpuMat loadMat(const Mat& m, bool useRoi) |
||||
{ |
||||
GpuMat d_m = createMat(m.size(), m.type(), useRoi); |
||||
d_m.upload(m); |
||||
return d_m; |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Image load
|
||||
|
||||
Mat readImage(const std::string& fileName, int flags) |
||||
{ |
||||
return imread(TS::ptr()->get_data_path() + fileName, flags); |
||||
} |
||||
|
||||
Mat readImageType(const std::string& fname, int type) |
||||
{ |
||||
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR); |
||||
if (CV_MAT_CN(type) == 4) |
||||
{ |
||||
Mat temp; |
||||
cvtColor(src, temp, COLOR_BGR2BGRA); |
||||
swap(src, temp); |
||||
} |
||||
src.convertTo(src, CV_MAT_DEPTH(type), CV_MAT_DEPTH(type) == CV_32F ? 1.0 / 255.0 : 1.0); |
||||
return src; |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Gpu devices
|
||||
|
||||
bool supportFeature(const DeviceInfo& info, FeatureSet feature) |
||||
{ |
||||
return TargetArchs::builtWith(feature) && info.supports(feature); |
||||
} |
||||
|
||||
DeviceManager& DeviceManager::instance() |
||||
{ |
||||
static DeviceManager obj; |
||||
return obj; |
||||
} |
||||
|
||||
void DeviceManager::load(int i) |
||||
{ |
||||
devices_.clear(); |
||||
devices_.reserve(1); |
||||
|
||||
std::ostringstream msg; |
||||
|
||||
if (i < 0 || i >= getCudaEnabledDeviceCount()) |
||||
{ |
||||
msg << "Incorrect device number - " << i; |
||||
throw runtime_error(msg.str()); |
||||
} |
||||
|
||||
DeviceInfo info(i); |
||||
|
||||
if (!info.isCompatible()) |
||||
{ |
||||
msg << "Device " << i << " [" << info.name() << "] is NOT compatible with current GPU module build"; |
||||
throw runtime_error(msg.str()); |
||||
} |
||||
|
||||
devices_.push_back(info); |
||||
} |
||||
|
||||
void DeviceManager::loadAll() |
||||
{ |
||||
int deviceCount = getCudaEnabledDeviceCount(); |
||||
|
||||
devices_.clear(); |
||||
devices_.reserve(deviceCount); |
||||
|
||||
for (int i = 0; i < deviceCount; ++i) |
||||
{ |
||||
DeviceInfo info(i); |
||||
if (info.isCompatible()) |
||||
{ |
||||
devices_.push_back(info); |
||||
} |
||||
} |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Additional assertion
|
||||
|
||||
namespace |
||||
{ |
||||
template <typename T, typename OutT> std::string printMatValImpl(const Mat& m, Point p) |
||||
{ |
||||
const int cn = m.channels(); |
||||
|
||||
std::ostringstream ostr; |
||||
ostr << "("; |
||||
|
||||
p.x /= cn; |
||||
|
||||
ostr << static_cast<OutT>(m.at<T>(p.y, p.x * cn)); |
||||
for (int c = 1; c < m.channels(); ++c) |
||||
{ |
||||
ostr << ", " << static_cast<OutT>(m.at<T>(p.y, p.x * cn + c)); |
||||
} |
||||
ostr << ")"; |
||||
|
||||
return ostr.str(); |
||||
} |
||||
|
||||
std::string printMatVal(const Mat& m, Point p) |
||||
{ |
||||
typedef std::string (*func_t)(const Mat& m, Point p); |
||||
|
||||
static const func_t funcs[] = |
||||
{ |
||||
printMatValImpl<uchar, int>, printMatValImpl<schar, int>, printMatValImpl<ushort, int>, printMatValImpl<short, int>, |
||||
printMatValImpl<int, int>, printMatValImpl<float, float>, printMatValImpl<double, double> |
||||
}; |
||||
|
||||
return funcs[m.depth()](m, p); |
||||
} |
||||
} |
||||
|
||||
void minMaxLocGold(const Mat& src, double* minVal_, double* maxVal_, Point* minLoc_, Point* maxLoc_, const Mat& mask) |
||||
{ |
||||
if (src.depth() != CV_8S) |
||||
{ |
||||
minMaxLoc(src, minVal_, maxVal_, minLoc_, maxLoc_, mask); |
||||
return; |
||||
} |
||||
|
||||
// OpenCV's minMaxLoc doesn't support CV_8S type
|
||||
double minVal = numeric_limits<double>::max(); |
||||
Point minLoc(-1, -1); |
||||
|
||||
double maxVal = -numeric_limits<double>::max(); |
||||
Point maxLoc(-1, -1); |
||||
|
||||
for (int y = 0; y < src.rows; ++y) |
||||
{ |
||||
const schar* src_row = src.ptr<schar>(y); |
||||
const uchar* mask_row = mask.empty() ? 0 : mask.ptr<uchar>(y); |
||||
|
||||
for (int x = 0; x < src.cols; ++x) |
||||
{ |
||||
if (!mask_row || mask_row[x]) |
||||
{ |
||||
schar val = src_row[x]; |
||||
|
||||
if (val < minVal) |
||||
{ |
||||
minVal = val; |
||||
minLoc = cv::Point(x, y); |
||||
} |
||||
|
||||
if (val > maxVal) |
||||
{ |
||||
maxVal = val; |
||||
maxLoc = cv::Point(x, y); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (minVal_) *minVal_ = minVal; |
||||
if (maxVal_) *maxVal_ = maxVal; |
||||
|
||||
if (minLoc_) *minLoc_ = minLoc; |
||||
if (maxLoc_) *maxLoc_ = maxLoc; |
||||
} |
||||
|
||||
Mat getMat(InputArray arr) |
||||
{ |
||||
if (arr.kind() == _InputArray::GPU_MAT) |
||||
{ |
||||
Mat m; |
||||
arr.getGpuMat().download(m); |
||||
return m; |
||||
} |
||||
|
||||
return arr.getMat(); |
||||
} |
||||
|
||||
AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, InputArray m1_, InputArray m2_, double eps) |
||||
{ |
||||
Mat m1 = getMat(m1_); |
||||
Mat m2 = getMat(m2_); |
||||
|
||||
if (m1.size() != m2.size()) |
||||
{ |
||||
return AssertionFailure() << "Matrices \"" << expr1 << "\" and \"" << expr2 << "\" have different sizes : \"" |
||||
<< expr1 << "\" [" << PrintToString(m1.size()) << "] vs \"" |
||||
<< expr2 << "\" [" << PrintToString(m2.size()) << "]"; |
||||
} |
||||
|
||||
if (m1.type() != m2.type()) |
||||
{ |
||||
return AssertionFailure() << "Matrices \"" << expr1 << "\" and \"" << expr2 << "\" have different types : \"" |
||||
<< expr1 << "\" [" << PrintToString(MatType(m1.type())) << "] vs \"" |
||||
<< expr2 << "\" [" << PrintToString(MatType(m2.type())) << "]"; |
||||
} |
||||
|
||||
Mat diff; |
||||
absdiff(m1.reshape(1), m2.reshape(1), diff); |
||||
|
||||
double maxVal = 0.0; |
||||
Point maxLoc; |
||||
minMaxLocGold(diff, 0, &maxVal, 0, &maxLoc); |
||||
|
||||
if (maxVal > eps) |
||||
{ |
||||
return AssertionFailure() << "The max difference between matrices \"" << expr1 << "\" and \"" << expr2 |
||||
<< "\" is " << maxVal << " at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ")" |
||||
<< ", which exceeds \"" << eps_expr << "\", where \"" |
||||
<< expr1 << "\" at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ") evaluates to " << printMatVal(m1, maxLoc) << ", \"" |
||||
<< expr2 << "\" at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ") evaluates to " << printMatVal(m2, maxLoc) << ", \"" |
||||
<< eps_expr << "\" evaluates to " << eps; |
||||
} |
||||
|
||||
return AssertionSuccess(); |
||||
} |
||||
|
||||
double checkSimilarity(InputArray m1, InputArray m2) |
||||
{ |
||||
Mat diff; |
||||
matchTemplate(getMat(m1), getMat(m2), diff, CV_TM_CCORR_NORMED); |
||||
return std::abs(diff.at<float>(0, 0) - 1.f); |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Helper structs for value-parameterized tests
|
||||
|
||||
vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end) |
||||
{ |
||||
vector<MatType> v; |
||||
|
||||
v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1)); |
||||
|
||||
for (int depth = depth_start; depth <= depth_end; ++depth) |
||||
{ |
||||
for (int cn = cn_start; cn <= cn_end; ++cn) |
||||
{ |
||||
v.push_back(MatType(CV_MAKE_TYPE(depth, cn))); |
||||
} |
||||
} |
||||
|
||||
return v; |
||||
} |
||||
|
||||
const vector<MatType>& all_types() |
||||
{ |
||||
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4); |
||||
|
||||
return v; |
||||
} |
||||
|
||||
void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os) |
||||
{ |
||||
(*os) << info.name(); |
||||
} |
||||
|
||||
void PrintTo(const UseRoi& useRoi, std::ostream* os) |
||||
{ |
||||
if (useRoi) |
||||
(*os) << "sub matrix"; |
||||
else |
||||
(*os) << "whole matrix"; |
||||
} |
||||
|
||||
void PrintTo(const Inverse& inverse, std::ostream* os) |
||||
{ |
||||
if (inverse) |
||||
(*os) << "inverse"; |
||||
else |
||||
(*os) << "direct"; |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Other
|
||||
|
||||
void dumpImage(const std::string& fileName, const Mat& image) |
||||
{ |
||||
imwrite(TS::ptr()->get_data_path() + fileName, image); |
||||
} |
||||
|
||||
void showDiff(InputArray gold_, InputArray actual_, double eps) |
||||
{ |
||||
Mat gold = getMat(gold_); |
||||
Mat actual = getMat(actual_); |
||||
|
||||
Mat diff; |
||||
absdiff(gold, actual, diff); |
||||
threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY); |
||||
|
||||
namedWindow("gold", WINDOW_NORMAL); |
||||
namedWindow("actual", WINDOW_NORMAL); |
||||
namedWindow("diff", WINDOW_NORMAL); |
||||
|
||||
imshow("gold", gold); |
||||
imshow("actual", actual); |
||||
imshow("diff", diff); |
||||
|
||||
waitKey(); |
||||
} |
||||
|
||||
#endif // HAVE_CUDA
|
@ -1,331 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TEST_UTILITY_HPP__ |
||||
#define __OPENCV_GPU_TEST_UTILITY_HPP__ |
||||
|
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/core/gpumat.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/ts.hpp" |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// random generators
|
||||
|
||||
int randomInt(int minVal, int maxVal); |
||||
double randomDouble(double minVal, double maxVal); |
||||
cv::Size randomSize(int minVal, int maxVal); |
||||
cv::Scalar randomScalar(double minVal, double maxVal); |
||||
cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0); |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GpuMat create
|
||||
|
||||
cv::gpu::GpuMat createMat(cv::Size size, int type, bool useRoi = false); |
||||
cv::gpu::GpuMat loadMat(const cv::Mat& m, bool useRoi = false); |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Image load
|
||||
|
||||
//! read image from testdata folder
|
||||
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR); |
||||
|
||||
//! read image from testdata folder and convert it to specified type
|
||||
cv::Mat readImageType(const std::string& fname, int type); |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Gpu devices
|
||||
|
||||
//! return true if device supports specified feature and gpu module was built with support the feature.
|
||||
bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); |
||||
|
||||
class DeviceManager |
||||
{ |
||||
public: |
||||
static DeviceManager& instance(); |
||||
|
||||
void load(int i); |
||||
void loadAll(); |
||||
|
||||
const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; } |
||||
|
||||
private: |
||||
std::vector<cv::gpu::DeviceInfo> devices_; |
||||
}; |
||||
|
||||
#define ALL_DEVICES testing::ValuesIn(DeviceManager::instance().values()) |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Additional assertion
|
||||
|
||||
void minMaxLocGold(const cv::Mat& src, double* minVal_, double* maxVal_ = 0, cv::Point* minLoc_ = 0, cv::Point* maxLoc_ = 0, const cv::Mat& mask = cv::Mat()); |
||||
|
||||
cv::Mat getMat(cv::InputArray arr); |
||||
|
||||
testing::AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, cv::InputArray m1, cv::InputArray m2, double eps); |
||||
|
||||
#define EXPECT_MAT_NEAR(m1, m2, eps) EXPECT_PRED_FORMAT3(assertMatNear, m1, m2, eps) |
||||
#define ASSERT_MAT_NEAR(m1, m2, eps) ASSERT_PRED_FORMAT3(assertMatNear, m1, m2, eps) |
||||
|
||||
#define EXPECT_SCALAR_NEAR(s1, s2, eps) \ |
||||
{ \
|
||||
EXPECT_NEAR(s1[0], s2[0], eps); \
|
||||
EXPECT_NEAR(s1[1], s2[1], eps); \
|
||||
EXPECT_NEAR(s1[2], s2[2], eps); \
|
||||
EXPECT_NEAR(s1[3], s2[3], eps); \
|
||||
} |
||||
#define ASSERT_SCALAR_NEAR(s1, s2, eps) \ |
||||
{ \
|
||||
ASSERT_NEAR(s1[0], s2[0], eps); \
|
||||
ASSERT_NEAR(s1[1], s2[1], eps); \
|
||||
ASSERT_NEAR(s1[2], s2[2], eps); \
|
||||
ASSERT_NEAR(s1[3], s2[3], eps); \
|
||||
} |
||||
|
||||
#define EXPECT_POINT2_NEAR(p1, p2, eps) \ |
||||
{ \
|
||||
EXPECT_NEAR(p1.x, p2.x, eps); \
|
||||
EXPECT_NEAR(p1.y, p2.y, eps); \
|
||||
} |
||||
#define ASSERT_POINT2_NEAR(p1, p2, eps) \ |
||||
{ \
|
||||
ASSERT_NEAR(p1.x, p2.x, eps); \
|
||||
ASSERT_NEAR(p1.y, p2.y, eps); \
|
||||
} |
||||
|
||||
#define EXPECT_POINT3_NEAR(p1, p2, eps) \ |
||||
{ \
|
||||
EXPECT_NEAR(p1.x, p2.x, eps); \
|
||||
EXPECT_NEAR(p1.y, p2.y, eps); \
|
||||
EXPECT_NEAR(p1.z, p2.z, eps); \
|
||||
} |
||||
#define ASSERT_POINT3_NEAR(p1, p2, eps) \ |
||||
{ \
|
||||
ASSERT_NEAR(p1.x, p2.x, eps); \
|
||||
ASSERT_NEAR(p1.y, p2.y, eps); \
|
||||
ASSERT_NEAR(p1.z, p2.z, eps); \
|
||||
} |
||||
|
||||
double checkSimilarity(cv::InputArray m1, cv::InputArray m2); |
||||
|
||||
#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \ |
||||
{ \
|
||||
ASSERT_EQ(mat1.type(), mat2.type()); \
|
||||
ASSERT_EQ(mat1.size(), mat2.size()); \
|
||||
EXPECT_LE(checkSimilarity(mat1, mat2), eps); \
|
||||
} |
||||
#define ASSERT_MAT_SIMILAR(mat1, mat2, eps) \ |
||||
{ \
|
||||
ASSERT_EQ(mat1.type(), mat2.type()); \
|
||||
ASSERT_EQ(mat1.size(), mat2.size()); \
|
||||
ASSERT_LE(checkSimilarity(mat1, mat2), eps); \
|
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Helper structs for value-parameterized tests
|
||||
|
||||
#define GPU_TEST_P(test_case_name, test_name) \ |
||||
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
|
||||
: public test_case_name { \
|
||||
public: \
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
|
||||
virtual void TestBody(); \
|
||||
private: \
|
||||
void UnsafeTestBody(); \
|
||||
static int AddToRegistry() { \
|
||||
::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
|
||||
GetTestCasePatternHolder<test_case_name>(\
|
||||
#test_case_name, __FILE__, __LINE__)->AddTestPattern(\ |
||||
#test_case_name, \ |
||||
#test_name, \ |
||||
new ::testing::internal::TestMetaFactory< \
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
|
||||
return 0; \
|
||||
} \
|
||||
static int gtest_registering_dummy_; \
|
||||
GTEST_DISALLOW_COPY_AND_ASSIGN_(\
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
|
||||
}; \
|
||||
int GTEST_TEST_CLASS_NAME_(test_case_name, \
|
||||
test_name)::gtest_registering_dummy_ = \
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
|
||||
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() \
|
||||
{ \
|
||||
try \
|
||||
{ \
|
||||
UnsafeTestBody(); \
|
||||
} \
|
||||
catch (...) \
|
||||
{ \
|
||||
cv::gpu::resetDevice(); \
|
||||
throw; \
|
||||
} \
|
||||
} \
|
||||
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::UnsafeTestBody() |
||||
|
||||
#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > |
||||
#define GET_PARAM(k) std::tr1::get< k >(GetParam()) |
||||
|
||||
namespace cv { namespace gpu |
||||
{ |
||||
void PrintTo(const DeviceInfo& info, std::ostream* os); |
||||
}} |
||||
|
||||
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113)) |
||||
|
||||
// Depth
|
||||
|
||||
using perf::MatDepth; |
||||
|
||||
#define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F)) |
||||
|
||||
#define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \ |
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F))) |
||||
|
||||
// Type
|
||||
|
||||
using perf::MatType; |
||||
|
||||
//! return vector with types from specified range.
|
||||
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end); |
||||
|
||||
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
|
||||
const std::vector<MatType>& all_types(); |
||||
|
||||
#define ALL_TYPES testing::ValuesIn(all_types()) |
||||
#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) |
||||
|
||||
// ROI
|
||||
|
||||
class UseRoi |
||||
{ |
||||
public: |
||||
inline UseRoi(bool val = false) : val_(val) {} |
||||
|
||||
inline operator bool() const { return val_; } |
||||
|
||||
private: |
||||
bool val_; |
||||
}; |
||||
|
||||
void PrintTo(const UseRoi& useRoi, std::ostream* os); |
||||
|
||||
#define WHOLE_SUBMAT testing::Values(UseRoi(false), UseRoi(true)) |
||||
|
||||
// Direct/Inverse
|
||||
|
||||
class Inverse |
||||
{ |
||||
public: |
||||
inline Inverse(bool val = false) : val_(val) {} |
||||
|
||||
inline operator bool() const { return val_; } |
||||
|
||||
private: |
||||
bool val_; |
||||
}; |
||||
|
||||
void PrintTo(const Inverse& useRoi, std::ostream* os); |
||||
|
||||
#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true)) |
||||
|
||||
// Param class
|
||||
|
||||
#define IMPLEMENT_PARAM_CLASS(name, type) \ |
||||
class name \
|
||||
{ \
|
||||
public: \
|
||||
name ( type arg = type ()) : val_(arg) {} \
|
||||
operator type () const {return val_;} \
|
||||
private: \
|
||||
type val_; \
|
||||
}; \
|
||||
inline void PrintTo( name param, std::ostream* os) \
|
||||
{ \
|
||||
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
|
||||
} |
||||
|
||||
IMPLEMENT_PARAM_CLASS(Channels, int) |
||||
|
||||
#define ALL_CHANNELS testing::Values(Channels(1), Channels(2), Channels(3), Channels(4)) |
||||
#define IMAGE_CHANNELS testing::Values(Channels(1), Channels(3), Channels(4)) |
||||
|
||||
// Flags and enums
|
||||
|
||||
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX) |
||||
|
||||
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA) |
||||
|
||||
CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP) |
||||
#define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)) |
||||
|
||||
CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP) |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Other
|
||||
|
||||
void dumpImage(const std::string& fileName, const cv::Mat& image); |
||||
void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); |
||||
|
||||
#endif // __OPENCV_GPU_TEST_UTILITY_HPP__
|
@ -0,0 +1,79 @@ |
||||
Background Subtraction |
||||
====================== |
||||
|
||||
.. highlight:: cpp |
||||
|
||||
|
||||
|
||||
gpu::VIBE_GPU |
||||
------------- |
||||
.. ocv:class:: gpu::VIBE_GPU |
||||
|
||||
Class used for background/foreground segmentation. :: |
||||
|
||||
class VIBE_GPU |
||||
{ |
||||
public: |
||||
explicit VIBE_GPU(unsigned long rngSeed = 1234567); |
||||
|
||||
void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null()); |
||||
|
||||
void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null()); |
||||
|
||||
void release(); |
||||
|
||||
... |
||||
}; |
||||
|
||||
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [VIBE2011]_. |
||||
|
||||
|
||||
|
||||
gpu::VIBE_GPU::VIBE_GPU |
||||
----------------------- |
||||
The constructor. |
||||
|
||||
.. ocv:function:: gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed = 1234567) |
||||
|
||||
:param rngSeed: Value used to initiate a random sequence. |
||||
|
||||
Default constructor sets all parameters to default values. |
||||
|
||||
|
||||
|
||||
gpu::VIBE_GPU::initialize |
||||
------------------------- |
||||
Initialize background model and allocates all inner buffers. |
||||
|
||||
.. ocv:function:: void gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null()) |
||||
|
||||
:param firstFrame: First frame from video sequence. |
||||
|
||||
:param stream: Stream for the asynchronous version. |
||||
|
||||
|
||||
|
||||
gpu::VIBE_GPU::operator() |
||||
------------------------- |
||||
Updates the background model and returns the foreground mask |
||||
|
||||
.. ocv:function:: void gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null()) |
||||
|
||||
:param frame: Next video frame. |
||||
|
||||
:param fgmask: The output foreground mask as an 8-bit binary image. |
||||
|
||||
:param stream: Stream for the asynchronous version. |
||||
|
||||
|
||||
|
||||
gpu::VIBE_GPU::release |
||||
---------------------- |
||||
Releases all inner buffer's memory. |
||||
|
||||
.. ocv:function:: void gpu::VIBE_GPU::release() |
||||
|
||||
|
||||
|
||||
|
||||
.. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011 |
@ -0,0 +1,169 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_NONFREE_GPU_HPP__ |
||||
#define __OPENCV_NONFREE_GPU_HPP__ |
||||
|
||||
#include "opencv2/opencv_modules.hpp" |
||||
|
||||
#if defined(HAVE_OPENCV_GPU) |
||||
|
||||
#include "opencv2/gpu.hpp" |
||||
|
||||
namespace cv { namespace gpu { |
||||
|
||||
class CV_EXPORTS SURF_GPU |
||||
{ |
||||
public: |
||||
enum KeypointLayout |
||||
{ |
||||
X_ROW = 0, |
||||
Y_ROW, |
||||
LAPLACIAN_ROW, |
||||
OCTAVE_ROW, |
||||
SIZE_ROW, |
||||
ANGLE_ROW, |
||||
HESSIAN_ROW, |
||||
ROWS_COUNT |
||||
}; |
||||
|
||||
//! the default constructor
|
||||
SURF_GPU(); |
||||
//! the full constructor taking all the necessary parameters
|
||||
explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4, |
||||
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false); |
||||
|
||||
//! returns the descriptor size in float's (64 or 128)
|
||||
int descriptorSize() const; |
||||
|
||||
//! upload host keypoints to device memory
|
||||
void uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU); |
||||
//! download keypoints from device to host memory
|
||||
void downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints); |
||||
|
||||
//! download descriptors from device to host memory
|
||||
void downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors); |
||||
|
||||
//! finds the keypoints using fast hessian detector used in SURF
|
||||
//! supports CV_8UC1 images
|
||||
//! keypoints will have nFeature cols and 6 rows
|
||||
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
|
||||
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
|
||||
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
|
||||
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
|
||||
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
|
||||
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
|
||||
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
|
||||
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints); |
||||
//! finds the keypoints and computes their descriptors.
|
||||
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
|
||||
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors, |
||||
bool useProvidedKeypoints = false); |
||||
|
||||
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints); |
||||
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors, |
||||
bool useProvidedKeypoints = false); |
||||
|
||||
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors, |
||||
bool useProvidedKeypoints = false); |
||||
|
||||
void releaseMemory(); |
||||
|
||||
// SURF parameters
|
||||
double hessianThreshold; |
||||
int nOctaves; |
||||
int nOctaveLayers; |
||||
bool extended; |
||||
bool upright; |
||||
|
||||
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
|
||||
float keypointsRatio; |
||||
|
||||
GpuMat sum, mask1, maskSum, intBuffer; |
||||
|
||||
GpuMat det, trace; |
||||
|
||||
GpuMat maxPosBuffer; |
||||
}; |
||||
|
||||
/*!
|
||||
* The class implements the following algorithm: |
||||
* "ViBe: A universal background subtraction algorithm for video sequences" |
||||
* O. Barnich and M. Van D Roogenbroeck |
||||
* IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011 |
||||
*/ |
||||
class CV_EXPORTS VIBE_GPU |
||||
{ |
||||
public: |
||||
//! the default constructor
|
||||
explicit VIBE_GPU(unsigned long rngSeed = 1234567); |
||||
|
||||
//! re-initiaization method
|
||||
void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null()); |
||||
|
||||
//! the update operator
|
||||
void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null()); |
||||
|
||||
//! releases all inner buffers
|
||||
void release(); |
||||
|
||||
int nbSamples; // number of samples per pixel
|
||||
int reqMatches; // #_min
|
||||
int radius; // R
|
||||
int subsamplingFactor; // amount of random subsampling
|
||||
|
||||
private: |
||||
Size frameSize_; |
||||
|
||||
unsigned long rngSeed_; |
||||
GpuMat randStates_; |
||||
|
||||
GpuMat samples_; |
||||
}; |
||||
|
||||
} // namespace gpu
|
||||
|
||||
} // namespace cv
|
||||
|
||||
#endif // defined(HAVE_OPENCV_GPU)
|
||||
|
||||
#endif // __OPENCV_NONFREE_GPU_HPP__
|
@ -0,0 +1,124 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_NONFREE_OCL_HPP__ |
||||
#define __OPENCV_NONFREE_OCL_HPP__ |
||||
|
||||
#include "opencv2/ocl.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace ocl |
||||
{ |
||||
//! Speeded up robust features, port from GPU module.
|
||||
////////////////////////////////// SURF //////////////////////////////////////////
|
||||
|
||||
class CV_EXPORTS SURF_OCL |
||||
{ |
||||
public: |
||||
enum KeypointLayout |
||||
{ |
||||
X_ROW = 0, |
||||
Y_ROW, |
||||
LAPLACIAN_ROW, |
||||
OCTAVE_ROW, |
||||
SIZE_ROW, |
||||
ANGLE_ROW, |
||||
HESSIAN_ROW, |
||||
ROWS_COUNT |
||||
}; |
||||
|
||||
//! the default constructor
|
||||
SURF_OCL(); |
||||
//! the full constructor taking all the necessary parameters
|
||||
explicit SURF_OCL(double _hessianThreshold, int _nOctaves = 4, |
||||
int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false); |
||||
|
||||
//! returns the descriptor size in float's (64 or 128)
|
||||
int descriptorSize() const; |
||||
//! upload host keypoints to device memory
|
||||
void uploadKeypoints(const std::vector<cv::KeyPoint> &keypoints, oclMat &keypointsocl); |
||||
//! download keypoints from device to host memory
|
||||
void downloadKeypoints(const oclMat &keypointsocl, std::vector<KeyPoint> &keypoints); |
||||
//! download descriptors from device to host memory
|
||||
void downloadDescriptors(const oclMat &descriptorsocl, std::vector<float> &descriptors); |
||||
//! finds the keypoints using fast hessian detector used in SURF
|
||||
//! supports CV_8UC1 images
|
||||
//! keypoints will have nFeature cols and 6 rows
|
||||
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
|
||||
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
|
||||
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
|
||||
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
|
||||
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
|
||||
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
|
||||
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
|
||||
void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints); |
||||
//! finds the keypoints and computes their descriptors.
|
||||
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
|
||||
void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors, |
||||
bool useProvidedKeypoints = false); |
||||
void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints); |
||||
void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, oclMat &descriptors, |
||||
bool useProvidedKeypoints = false); |
||||
void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, std::vector<float> &descriptors, |
||||
bool useProvidedKeypoints = false); |
||||
|
||||
void releaseMemory(); |
||||
|
||||
// SURF parameters
|
||||
float hessianThreshold; |
||||
int nOctaves; |
||||
int nOctaveLayers; |
||||
bool extended; |
||||
bool upright; |
||||
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
|
||||
float keypointsRatio; |
||||
oclMat sum, mask1, maskSum, intBuffer; |
||||
oclMat det, trace; |
||||
oclMat maxPosBuffer; |
||||
|
||||
}; |
||||
} |
||||
} |
||||
|
||||
#endif //__OPENCV_NONFREE_OCL_HPP__
|
@ -0,0 +1,138 @@ |
||||
#include "perf_precomp.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace testing; |
||||
using namespace perf; |
||||
|
||||
#if defined(HAVE_XINE) || \ |
||||
defined(HAVE_GSTREAMER) || \
|
||||
defined(HAVE_QUICKTIME) || \
|
||||
defined(HAVE_AVFOUNDATION) || \
|
||||
defined(HAVE_FFMPEG) || \
|
||||
defined(WIN32) /* assume that we have ffmpeg */ |
||||
|
||||
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 |
||||
#else |
||||
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 |
||||
#endif |
||||
|
||||
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA) |
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SURF
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string); |
||||
|
||||
PERF_TEST_P(Image, GPU_SURF, |
||||
Values<string>("gpu/perf/aloe.png")) |
||||
{ |
||||
declare.time(50.0); |
||||
|
||||
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(img.empty()); |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::SURF_GPU d_surf; |
||||
|
||||
const cv::gpu::GpuMat d_img(img); |
||||
cv::gpu::GpuMat d_keypoints, d_descriptors; |
||||
|
||||
TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors); |
||||
|
||||
std::vector<cv::KeyPoint> gpu_keypoints; |
||||
d_surf.downloadKeypoints(d_keypoints, gpu_keypoints); |
||||
|
||||
cv::Mat gpu_descriptors(d_descriptors); |
||||
|
||||
sortKeyPoints(gpu_keypoints, gpu_descriptors); |
||||
|
||||
SANITY_CHECK_KEYPOINTS(gpu_keypoints); |
||||
SANITY_CHECK(gpu_descriptors, 1e-3); |
||||
} |
||||
else |
||||
{ |
||||
cv::SURF surf; |
||||
|
||||
std::vector<cv::KeyPoint> cpu_keypoints; |
||||
cv::Mat cpu_descriptors; |
||||
|
||||
TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors); |
||||
|
||||
SANITY_CHECK_KEYPOINTS(cpu_keypoints); |
||||
SANITY_CHECK(cpu_descriptors); |
||||
} |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// VIBE
|
||||
|
||||
#if BUILD_WITH_VIDEO_INPUT_SUPPORT |
||||
|
||||
DEF_PARAM_TEST(Video_Cn, string, int); |
||||
|
||||
PERF_TEST_P(Video_Cn, GPU_VIBE, |
||||
Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), |
||||
GPU_CHANNELS_1_3_4)) |
||||
{ |
||||
const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0)); |
||||
const int cn = GET_PARAM(1); |
||||
|
||||
cv::VideoCapture cap(inputFile); |
||||
ASSERT_TRUE(cap.isOpened()); |
||||
|
||||
cv::Mat frame; |
||||
cap >> frame; |
||||
ASSERT_FALSE(frame.empty()); |
||||
|
||||
if (cn != 3) |
||||
{ |
||||
cv::Mat temp; |
||||
if (cn == 1) |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); |
||||
else |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); |
||||
cv::swap(temp, frame); |
||||
} |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GpuMat d_frame(frame); |
||||
cv::gpu::VIBE_GPU vibe; |
||||
cv::gpu::GpuMat foreground; |
||||
|
||||
vibe(d_frame, foreground); |
||||
|
||||
for (int i = 0; i < 10; ++i) |
||||
{ |
||||
cap >> frame; |
||||
ASSERT_FALSE(frame.empty()); |
||||
|
||||
if (cn != 3) |
||||
{ |
||||
cv::Mat temp; |
||||
if (cn == 1) |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); |
||||
else |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); |
||||
cv::swap(temp, frame); |
||||
} |
||||
|
||||
d_frame.upload(frame); |
||||
|
||||
startTimer(); next(); |
||||
vibe(d_frame, foreground); |
||||
stopTimer(); |
||||
} |
||||
|
||||
GPU_SANITY_CHECK(foreground); |
||||
} |
||||
else |
||||
{ |
||||
FAIL_NO_CPU(); |
||||
} |
||||
} |
||||
|
||||
#endif |
||||
|
||||
#endif |
@ -1,3 +1,4 @@ |
||||
#include "perf_precomp.hpp" |
||||
#include "opencv2/ts/gpu_perf.hpp" |
||||
|
||||
CV_PERF_TEST_MAIN(nonfree) |
||||
CV_PERF_TEST_MAIN(nonfree, perf::printCudaInfo()) |
||||
|
@ -0,0 +1,285 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA) |
||||
|
||||
using namespace cvtest; |
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SURF
|
||||
|
||||
namespace |
||||
{ |
||||
IMPLEMENT_PARAM_CLASS(SURF_HessianThreshold, double) |
||||
IMPLEMENT_PARAM_CLASS(SURF_Octaves, int) |
||||
IMPLEMENT_PARAM_CLASS(SURF_OctaveLayers, int) |
||||
IMPLEMENT_PARAM_CLASS(SURF_Extended, bool) |
||||
IMPLEMENT_PARAM_CLASS(SURF_Upright, bool) |
||||
} |
||||
|
||||
PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright) |
||||
{ |
||||
cv::gpu::DeviceInfo devInfo; |
||||
double hessianThreshold; |
||||
int nOctaves; |
||||
int nOctaveLayers; |
||||
bool extended; |
||||
bool upright; |
||||
|
||||
virtual void SetUp() |
||||
{ |
||||
devInfo = GET_PARAM(0); |
||||
hessianThreshold = GET_PARAM(1); |
||||
nOctaves = GET_PARAM(2); |
||||
nOctaveLayers = GET_PARAM(3); |
||||
extended = GET_PARAM(4); |
||||
upright = GET_PARAM(5); |
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID()); |
||||
} |
||||
}; |
||||
|
||||
GPU_TEST_P(SURF, Detector) |
||||
{ |
||||
cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(image.empty()); |
||||
|
||||
cv::gpu::SURF_GPU surf; |
||||
surf.hessianThreshold = hessianThreshold; |
||||
surf.nOctaves = nOctaves; |
||||
surf.nOctaveLayers = nOctaveLayers; |
||||
surf.extended = extended; |
||||
surf.upright = upright; |
||||
surf.keypointsRatio = 0.05f; |
||||
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) |
||||
{ |
||||
try |
||||
{ |
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints); |
||||
} |
||||
catch (const cv::Exception& e) |
||||
{ |
||||
ASSERT_EQ(CV_StsNotImplemented, e.code); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints); |
||||
|
||||
cv::SURF surf_gold; |
||||
surf_gold.hessianThreshold = hessianThreshold; |
||||
surf_gold.nOctaves = nOctaves; |
||||
surf_gold.nOctaveLayers = nOctaveLayers; |
||||
surf_gold.extended = extended; |
||||
surf_gold.upright = upright; |
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold; |
||||
surf_gold(image, cv::noArray(), keypoints_gold); |
||||
|
||||
ASSERT_EQ(keypoints_gold.size(), keypoints.size()); |
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); |
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size(); |
||||
|
||||
EXPECT_GT(matchedRatio, 0.95); |
||||
} |
||||
} |
||||
|
||||
GPU_TEST_P(SURF, Detector_Masked) |
||||
{ |
||||
cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(image.empty()); |
||||
|
||||
cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1)); |
||||
mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0)); |
||||
|
||||
cv::gpu::SURF_GPU surf; |
||||
surf.hessianThreshold = hessianThreshold; |
||||
surf.nOctaves = nOctaves; |
||||
surf.nOctaveLayers = nOctaveLayers; |
||||
surf.extended = extended; |
||||
surf.upright = upright; |
||||
surf.keypointsRatio = 0.05f; |
||||
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) |
||||
{ |
||||
try |
||||
{ |
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf(loadMat(image), loadMat(mask), keypoints); |
||||
} |
||||
catch (const cv::Exception& e) |
||||
{ |
||||
ASSERT_EQ(CV_StsNotImplemented, e.code); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf(loadMat(image), loadMat(mask), keypoints); |
||||
|
||||
cv::SURF surf_gold; |
||||
surf_gold.hessianThreshold = hessianThreshold; |
||||
surf_gold.nOctaves = nOctaves; |
||||
surf_gold.nOctaveLayers = nOctaveLayers; |
||||
surf_gold.extended = extended; |
||||
surf_gold.upright = upright; |
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold; |
||||
surf_gold(image, mask, keypoints_gold); |
||||
|
||||
ASSERT_EQ(keypoints_gold.size(), keypoints.size()); |
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); |
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size(); |
||||
|
||||
EXPECT_GT(matchedRatio, 0.95); |
||||
} |
||||
} |
||||
|
||||
GPU_TEST_P(SURF, Descriptor) |
||||
{ |
||||
cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(image.empty()); |
||||
|
||||
cv::gpu::SURF_GPU surf; |
||||
surf.hessianThreshold = hessianThreshold; |
||||
surf.nOctaves = nOctaves; |
||||
surf.nOctaveLayers = nOctaveLayers; |
||||
surf.extended = extended; |
||||
surf.upright = upright; |
||||
surf.keypointsRatio = 0.05f; |
||||
|
||||
cv::SURF surf_gold; |
||||
surf_gold.hessianThreshold = hessianThreshold; |
||||
surf_gold.nOctaves = nOctaves; |
||||
surf_gold.nOctaveLayers = nOctaveLayers; |
||||
surf_gold.extended = extended; |
||||
surf_gold.upright = upright; |
||||
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) |
||||
{ |
||||
try |
||||
{ |
||||
std::vector<cv::KeyPoint> keypoints; |
||||
cv::gpu::GpuMat descriptors; |
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors); |
||||
} |
||||
catch (const cv::Exception& e) |
||||
{ |
||||
ASSERT_EQ(CV_StsNotImplemented, e.code); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf_gold(image, cv::noArray(), keypoints); |
||||
|
||||
cv::gpu::GpuMat descriptors; |
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true); |
||||
|
||||
cv::Mat descriptors_gold; |
||||
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); |
||||
|
||||
cv::BFMatcher matcher(cv::NORM_L2); |
||||
std::vector<cv::DMatch> matches; |
||||
matcher.match(descriptors_gold, cv::Mat(descriptors), matches); |
||||
|
||||
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches); |
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size(); |
||||
|
||||
EXPECT_GT(matchedRatio, 0.6); |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine( |
||||
ALL_DEVICES, |
||||
testing::Values(SURF_HessianThreshold(100.0), SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)), |
||||
testing::Values(SURF_Octaves(3), SURF_Octaves(4)), |
||||
testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)), |
||||
testing::Values(SURF_Extended(false), SURF_Extended(true)), |
||||
testing::Values(SURF_Upright(false), SURF_Upright(true)))); |
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// VIBE
|
||||
|
||||
PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) |
||||
{ |
||||
}; |
||||
|
||||
GPU_TEST_P(VIBE, Accuracy) |
||||
{ |
||||
const cv::gpu::DeviceInfo devInfo = GET_PARAM(0); |
||||
cv::gpu::setDevice(devInfo.deviceID()); |
||||
const cv::Size size = GET_PARAM(1); |
||||
const int type = GET_PARAM(2); |
||||
const bool useRoi = GET_PARAM(3); |
||||
|
||||
const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255)); |
||||
|
||||
cv::Mat frame = randomMat(size, type, 0.0, 100); |
||||
cv::gpu::GpuMat d_frame = loadMat(frame, useRoi); |
||||
|
||||
cv::gpu::VIBE_GPU vibe; |
||||
cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi); |
||||
vibe.initialize(d_frame); |
||||
|
||||
for (int i = 0; i < 20; ++i) |
||||
vibe(d_frame, d_fgmask); |
||||
|
||||
frame = randomMat(size, type, 160, 255); |
||||
d_frame = loadMat(frame, useRoi); |
||||
vibe(d_frame, d_fgmask); |
||||
|
||||
// now fgmask should be entirely foreground
|
||||
ASSERT_MAT_NEAR(fullfg, d_fgmask, 0); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine( |
||||
ALL_DEVICES, |
||||
DIFFERENT_SIZES, |
||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)), |
||||
WHOLE_SUBMAT)); |
||||
|
||||
#endif |
@ -1,3 +1,73 @@ |
||||
#include "test_precomp.hpp" |
||||
|
||||
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA) |
||||
|
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
using namespace cvtest; |
||||
using namespace testing; |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
try |
||||
{ |
||||
const char* keys = |
||||
"{ h | help ? | false | Print help}" |
||||
"{ i | info | false | Print information about system and exit }" |
||||
"{ d | device | -1 | Device on which tests will be executed (-1 means all devices) }" |
||||
; |
||||
|
||||
CommandLineParser cmd(argc, (const char**)argv, keys); |
||||
|
||||
if (cmd.get<bool>("help")) |
||||
{ |
||||
cmd.printParams(); |
||||
return 0; |
||||
} |
||||
|
||||
printCudaInfo(); |
||||
|
||||
if (cmd.get<bool>("info")) |
||||
{ |
||||
return 0; |
||||
} |
||||
|
||||
int device = cmd.get<int>("device"); |
||||
if (device < 0) |
||||
{ |
||||
DeviceManager::instance().loadAll(); |
||||
|
||||
std::cout << "Run tests on all supported devices \n" << std::endl; |
||||
} |
||||
else |
||||
{ |
||||
DeviceManager::instance().load(device); |
||||
|
||||
DeviceInfo info(device); |
||||
std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl; |
||||
} |
||||
|
||||
TS::ptr()->init("cv"); |
||||
InitGoogleTest(&argc, argv); |
||||
|
||||
return RUN_ALL_TESTS(); |
||||
} |
||||
catch (const std::exception& e) |
||||
{ |
||||
std::cerr << e.what() << std::endl; |
||||
return -1; |
||||
} |
||||
catch (...) |
||||
{ |
||||
std::cerr << "Unknown error" << std::endl; |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
CV_TEST_MAIN("cv") |
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
@ -0,0 +1,226 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_OPENCV_OCL |
||||
|
||||
using namespace std; |
||||
using std::tr1::get; |
||||
|
||||
static bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2) |
||||
{ |
||||
const double maxPtDif = 0.1; |
||||
const double maxSizeDif = 0.1; |
||||
const double maxAngleDif = 0.1; |
||||
const double maxResponseDif = 0.01; |
||||
|
||||
double dist = cv::norm(p1.pt - p2.pt); |
||||
|
||||
if (dist < maxPtDif && |
||||
fabs(p1.size - p2.size) < maxSizeDif && |
||||
abs(p1.angle - p2.angle) < maxAngleDif && |
||||
abs(p1.response - p2.response) < maxResponseDif && |
||||
p1.octave == p2.octave && |
||||
p1.class_id == p2.class_id) |
||||
{ |
||||
return true; |
||||
} |
||||
|
||||
return false; |
||||
} |
||||
|
||||
static int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual) |
||||
{ |
||||
std::sort(actual.begin(), actual.end(), perf::comparators::KeypointGreater()); |
||||
std::sort(gold.begin(), gold.end(), perf::comparators::KeypointGreater()); |
||||
|
||||
int validCount = 0; |
||||
|
||||
for (size_t i = 0; i < gold.size(); ++i) |
||||
{ |
||||
const cv::KeyPoint& p1 = gold[i]; |
||||
const cv::KeyPoint& p2 = actual[i]; |
||||
|
||||
if (keyPointsEquals(p1, p2)) |
||||
++validCount; |
||||
} |
||||
|
||||
return validCount; |
||||
} |
||||
|
||||
static int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches) |
||||
{ |
||||
int validCount = 0; |
||||
|
||||
for (size_t i = 0; i < matches.size(); ++i) |
||||
{ |
||||
const cv::DMatch& m = matches[i]; |
||||
|
||||
const cv::KeyPoint& p1 = keypoints1[m.queryIdx]; |
||||
const cv::KeyPoint& p2 = keypoints2[m.trainIdx]; |
||||
|
||||
if (keyPointsEquals(p1, p2)) |
||||
++validCount; |
||||
} |
||||
|
||||
return validCount; |
||||
} |
||||
|
||||
#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > |
||||
#define IMPLEMENT_PARAM_CLASS(name, type) \ |
||||
namespace { class name { \
|
||||
public: \
|
||||
name ( type arg = type ()) : val_(arg) {} \
|
||||
operator type () const {return val_;} \
|
||||
private: \
|
||||
type val_; \
|
||||
}; \
|
||||
inline void PrintTo( name param, std::ostream* os) {*os << #name << "=" << testing::PrintToString(static_cast< type >(param));}} |
||||
|
||||
IMPLEMENT_PARAM_CLASS(HessianThreshold, double) |
||||
IMPLEMENT_PARAM_CLASS(Octaves, int) |
||||
IMPLEMENT_PARAM_CLASS(OctaveLayers, int) |
||||
IMPLEMENT_PARAM_CLASS(Extended, bool) |
||||
IMPLEMENT_PARAM_CLASS(Upright, bool) |
||||
|
||||
PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright) |
||||
{ |
||||
double hessianThreshold; |
||||
int nOctaves; |
||||
int nOctaveLayers; |
||||
bool extended; |
||||
bool upright; |
||||
|
||||
virtual void SetUp() |
||||
{ |
||||
hessianThreshold = get<0>(GetParam()); |
||||
nOctaves = get<1>(GetParam()); |
||||
nOctaveLayers = get<2>(GetParam()); |
||||
extended = get<3>(GetParam()); |
||||
upright = get<4>(GetParam()); |
||||
} |
||||
}; |
||||
|
||||
TEST_P(SURF, DISABLED_Detector) |
||||
{ |
||||
cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(image.empty()); |
||||
|
||||
cv::ocl::SURF_OCL surf; |
||||
surf.hessianThreshold = static_cast<float>(hessianThreshold); |
||||
surf.nOctaves = nOctaves; |
||||
surf.nOctaveLayers = nOctaveLayers; |
||||
surf.extended = extended; |
||||
surf.upright = upright; |
||||
surf.keypointsRatio = 0.05f; |
||||
|
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints); |
||||
|
||||
cv::SURF surf_gold; |
||||
surf_gold.hessianThreshold = hessianThreshold; |
||||
surf_gold.nOctaves = nOctaves; |
||||
surf_gold.nOctaveLayers = nOctaveLayers; |
||||
surf_gold.extended = extended; |
||||
surf_gold.upright = upright; |
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold; |
||||
surf_gold(image, cv::noArray(), keypoints_gold); |
||||
|
||||
ASSERT_EQ(keypoints_gold.size(), keypoints.size()); |
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); |
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size(); |
||||
|
||||
EXPECT_GT(matchedRatio, 0.99); |
||||
} |
||||
|
||||
TEST_P(SURF, DISABLED_Descriptor) |
||||
{ |
||||
cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(image.empty()); |
||||
|
||||
cv::ocl::SURF_OCL surf; |
||||
surf.hessianThreshold = static_cast<float>(hessianThreshold); |
||||
surf.nOctaves = nOctaves; |
||||
surf.nOctaveLayers = nOctaveLayers; |
||||
surf.extended = extended; |
||||
surf.upright = upright; |
||||
surf.keypointsRatio = 0.05f; |
||||
|
||||
cv::SURF surf_gold; |
||||
surf_gold.hessianThreshold = hessianThreshold; |
||||
surf_gold.nOctaves = nOctaves; |
||||
surf_gold.nOctaveLayers = nOctaveLayers; |
||||
surf_gold.extended = extended; |
||||
surf_gold.upright = upright; |
||||
|
||||
std::vector<cv::KeyPoint> keypoints; |
||||
surf_gold(image, cv::noArray(), keypoints); |
||||
|
||||
cv::ocl::oclMat descriptors; |
||||
surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints, descriptors, true); |
||||
|
||||
cv::Mat descriptors_gold; |
||||
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); |
||||
|
||||
cv::BFMatcher matcher(cv::NORM_L2); |
||||
std::vector<cv::DMatch> matches; |
||||
matcher.match(descriptors_gold, cv::Mat(descriptors), matches); |
||||
|
||||
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches); |
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size(); |
||||
|
||||
EXPECT_GT(matchedRatio, 0.35); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Features2D, SURF, testing::Combine( |
||||
testing::Values(HessianThreshold(500.0), HessianThreshold(1000.0)), |
||||
testing::Values(Octaves(3), Octaves(4)), |
||||
testing::Values(OctaveLayers(2), OctaveLayers(3)), |
||||
testing::Values(Extended(false), Extended(true)), |
||||
testing::Values(Upright(false), Upright(true)))); |
||||
|
||||
#endif // HAVE_OPENCV_OCL
|
@ -0,0 +1,130 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OCL_PRIVATE_UTIL__ |
||||
#define __OPENCV_OCL_PRIVATE_UTIL__ |
||||
|
||||
#include "opencv2/ocl.hpp" |
||||
|
||||
#if defined __APPLE__ |
||||
#include <OpenCL/OpenCL.h> |
||||
#else |
||||
#include <CL/opencl.h> |
||||
#endif |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace ocl |
||||
{ |
||||
enum openCLMemcpyKind |
||||
{ |
||||
clMemcpyHostToDevice = 0, |
||||
clMemcpyDeviceToHost, |
||||
clMemcpyDeviceToDevice |
||||
}; |
||||
///////////////////////////OpenCL call wrappers////////////////////////////
|
||||
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, |
||||
size_t widthInBytes, size_t height); |
||||
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, |
||||
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); |
||||
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, |
||||
const void *src, size_t spitch, |
||||
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); |
||||
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, |
||||
const void *src, size_t spitch, |
||||
size_t width, size_t height, int src_offset); |
||||
void CV_EXPORTS openCLFree(void *devPtr); |
||||
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); |
||||
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); |
||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, |
||||
const char **source, std::string kernelName); |
||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, |
||||
const char **source, std::string kernelName, const char *build_options); |
||||
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); |
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, std::vector< std::pair<size_t, const void *> > &args, |
||||
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); |
||||
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, |
||||
size_t globalThreads[3], size_t localThreads[3], |
||||
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options); |
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], |
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth); |
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], |
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, |
||||
int depth, const char *build_options); |
||||
|
||||
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, |
||||
const size_t size); |
||||
|
||||
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); |
||||
|
||||
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); |
||||
|
||||
enum FLUSH_MODE |
||||
{ |
||||
CLFINISH = 0, |
||||
CLFLUSH, |
||||
DISABLE |
||||
}; |
||||
|
||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], |
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); |
||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], |
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, |
||||
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); |
||||
// bind oclMat to OpenCL image textures
|
||||
// note:
|
||||
// 1. there is no memory management. User need to explicitly release the resource
|
||||
// 2. for faster clamping, there is no buffer padding for the constructed texture
|
||||
cl_mem CV_EXPORTS bindTexture(const oclMat &mat); |
||||
void CV_EXPORTS releaseTexture(cl_mem& texture); |
||||
|
||||
// returns whether the current context supports image2d_t format or not
|
||||
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); |
||||
|
||||
}//namespace ocl
|
||||
|
||||
}//namespace cv
|
||||
|
||||
#endif //__OPENCV_OCL_PRIVATE_UTIL__
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue