diff --git a/OpenCVConfig.cmake.in b/OpenCVConfig.cmake.in index 812711fd98..e6ed61bf36 100644 --- a/OpenCVConfig.cmake.in +++ b/OpenCVConfig.cmake.in @@ -43,7 +43,7 @@ SET(OpenCV_LIB_DIR "@CMAKE_LIB_DIRS_CONFIGCMAKE@") # ==================================================================== # Link libraries: e.g. opencv_core220.so, opencv_imgproc220d.lib, etc... # ==================================================================== -set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib) +set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_gpu opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib) SET(OpenCV_LIBS "") foreach(__CVLIB ${OPENCV_LIB_COMPONENTS}) # CMake>=2.6 supports the notation "debug XXd optimized XX" diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index ed19be6974..0a6df840d7 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -22,3 +22,6 @@ endif() add_subdirectory(video) add_subdirectory(haartraining) add_subdirectory(traincascade) + + +#add_subdirectory(gpu) diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt new file mode 100644 index 0000000000..f8d12fd09b --- /dev/null +++ b/modules/gpu/CMakeLists.txt @@ -0,0 +1,101 @@ + +include(FindCUDA) + +if (CUDA_FOUND) + include_directories(${CUDA_INCLUDE_DIRS}) + link_directories(${CUDA_LIBRARIES}) + + #CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) + + #==================================================================================== + + + set(name "gpu") + set(DEPS "opencv_core") + + project(opencv_${name}) + add_definitions(-DCVAPI_EXPORTS) + + include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/cuda" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_BINARY_DIR}") + + foreach(d ${DEPS}) + if(${d} MATCHES "opencv_") + string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d}) + include_directories("${d_dir}/include") + endif() + endforeach() + + file(GLOB lib_srcs "src/*.cpp") + file(GLOB lib_int_hdrs "src/*.h*") + file(GLOB lib_cuda "cuda/*.cu") + file(GLOB lib_cuda_hdrs "cuda/*.h*") + source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) + source_group("Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs}) + + file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") + source_group("Include" FILES ${lib_hdrs}) + + CUDA_COMPILE(cuda_objs ${lib_cuda}) + #CUDA_BUILD_CLEAN_TARGET() + + set(the_target "opencv_${name}") + add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${cuda_objs}) + + if(PCHSupport_FOUND) + set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp) + if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*") + if(${CMAKE_GENERATOR} MATCHES "Visual*") + set(${the_target}_pch "src/precomp.cpp") + endif() + add_native_precompiled_header(${the_target} ${pch_header}) + elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles") + add_precompiled_header(${the_target} ${pch_header}) + endif() + endif() + + # For dynamic link numbering convenions + set_target_properties(${the_target} PROPERTIES + VERSION ${OPENCV_VERSION} + SOVERSION ${OPENCV_SOVERSION} + OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}" + ) + + # Additional target properties + set_target_properties(${the_target} PROPERTIES + DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/" + INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib" + ) + + # Add the required libraries for linking: + target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} ${CUDA_LIBRARIES}) + + if(MSVC) + if(CMAKE_CROSSCOMPILING) + set_target_properties(${the_target} PROPERTIES + LINK_FLAGS "/NODEFAULTLIB:secchk" + ) + endif() + set_target_properties(${the_target} PROPERTIES + LINK_FLAGS "/NODEFAULTLIB:libc" + ) + endif() + + # Dependencies of this target: + add_dependencies(${the_target} ${DEPS}) + + install(TARGETS ${the_target} + RUNTIME DESTINATION bin COMPONENT main + LIBRARY DESTINATION lib COMPONENT main + ARCHIVE DESTINATION lib COMPONENT main) + + install(FILES ${lib_hdrs} + DESTINATION include/opencv2/${name} + COMPONENT main) + +endif() + diff --git a/modules/gpu/cuda/Stereo.cu b/modules/gpu/cuda/Stereo.cu new file mode 100644 index 0000000000..a316bb84aa --- /dev/null +++ b/modules/gpu/cuda/Stereo.cu @@ -0,0 +1,322 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "cuda_shared.hpp" + +using namespace cv::gpu; + +#define cudaSafeCall + +#define ROWSperTHREAD 21 // the number of rows a thread will process +#define BLOCK_W 128 // the thread block width (464) +#define N_DISPARITIES 8 + +#define STEREO_MIND 0 // The minimum d range to check +#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing +#define RADIUS 9 // Kernel Radius 5V & 5H = 11x11 kernel + +#define WINSZ (2 * RADIUS + 1) +#define N_DIRTY_PIXELS (2 * RADIUS) +#define COL_SSD_SIZE (BLOCK_W + N_DIRTY_PIXELS) +#define SHARED_MEM_SIZE (COL_SSD_SIZE) // amount of shared memory used + +__constant__ unsigned int* cminSSDImage; +__constant__ size_t cminSSD_step; +__constant__ int cwidth; +__constant__ int cheight; + +namespace device_code +{ + +__device__ int SQ(int a) +{ + return a * a; +} + +__device__ unsigned int CalcSSD(unsigned int *col_ssd_cache, unsigned int *col_ssd) +{ + unsigned int cache = 0; + unsigned int cache2 = 0; + + for(int i = 1; i <= RADIUS; i++) + cache += col_ssd[i]; + + col_ssd_cache[0] = cache; + + __syncthreads(); + + if (threadIdx.x < BLOCK_W - RADIUS) + cache2 = col_ssd_cache[RADIUS]; + else + for(int i = RADIUS + 1; i < WINSZ; i++) + cache2 += col_ssd[i]; + + return col_ssd[0] + cache + cache2; +} + +__device__ uint2 MinSSD(unsigned int *col_ssd_cache, unsigned int *col_ssd) +{ + unsigned int ssd[N_DISPARITIES]; + + ssd[0] = CalcSSD(col_ssd_cache, col_ssd + 0 * SHARED_MEM_SIZE); + ssd[1] = CalcSSD(col_ssd_cache, col_ssd + 1 * SHARED_MEM_SIZE); + ssd[2] = CalcSSD(col_ssd_cache, col_ssd + 2 * SHARED_MEM_SIZE); + ssd[3] = CalcSSD(col_ssd_cache, col_ssd + 3 * SHARED_MEM_SIZE); + ssd[4] = CalcSSD(col_ssd_cache, col_ssd + 4 * SHARED_MEM_SIZE); + ssd[5] = CalcSSD(col_ssd_cache, col_ssd + 5 * SHARED_MEM_SIZE); + ssd[6] = CalcSSD(col_ssd_cache, col_ssd + 6 * SHARED_MEM_SIZE); + ssd[7] = CalcSSD(col_ssd_cache, col_ssd + 7 * SHARED_MEM_SIZE); + + int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7]))); + + int bestIdx = 0; + for (int i = 0; i < N_DISPARITIES; i++) + { + if (mssd == ssd[i]) + bestIdx = i; + } + + return make_uint2(mssd, bestIdx); +} + +__device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned char* imageR, int d, unsigned int *col_ssd) +{ + unsigned char leftPixel1; + unsigned char leftPixel2; + unsigned char rightPixel1[8]; + unsigned char rightPixel2[8]; + unsigned int diff1, diff2; + + leftPixel1 = imageL[idx1]; + leftPixel2 = imageL[idx2]; + + idx1 = idx1 - d; + idx2 = idx2 - d; + + rightPixel1[7] = imageR[idx1 - 7]; + rightPixel1[0] = imageR[idx1 - 0]; + rightPixel1[1] = imageR[idx1 - 1]; + rightPixel1[2] = imageR[idx1 - 2]; + rightPixel1[3] = imageR[idx1 - 3]; + rightPixel1[4] = imageR[idx1 - 4]; + rightPixel1[5] = imageR[idx1 - 5]; + rightPixel1[6] = imageR[idx1 - 6]; + + rightPixel2[7] = imageR[idx2 - 7]; + rightPixel2[0] = imageR[idx2 - 0]; + rightPixel2[1] = imageR[idx2 - 1]; + rightPixel2[2] = imageR[idx2 - 2]; + rightPixel2[3] = imageR[idx2 - 3]; + rightPixel2[4] = imageR[idx2 - 4]; + rightPixel2[5] = imageR[idx2 - 5]; + rightPixel2[6] = imageR[idx2 - 6]; + + + diff1 = leftPixel1 - rightPixel1[0]; + diff2 = leftPixel2 - rightPixel2[0]; + col_ssd[0 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[1]; + diff2 = leftPixel2 - rightPixel2[1]; + col_ssd[1 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[2]; + diff2 = leftPixel2 - rightPixel2[2]; + col_ssd[2 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[3]; + diff2 = leftPixel2 - rightPixel2[3]; + col_ssd[3 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[4]; + diff2 = leftPixel2 - rightPixel2[4]; + col_ssd[4 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[5]; + diff2 = leftPixel2 - rightPixel2[5]; + col_ssd[5 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[6]; + diff2 = leftPixel2 - rightPixel2[6]; + col_ssd[6 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); + + diff1 = leftPixel1 - rightPixel1[7]; + diff2 = leftPixel2 - rightPixel2[7]; + col_ssd[7 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1); +} + +__device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* imageL, unsigned char* imageR, int d, unsigned int *col_ssd) +{ + unsigned char leftPixel1; + int idx; + unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0}; + + for(int i = 0; i < WINSZ; i++) + { + idx = y_tex * im_pitch + x_tex; + leftPixel1 = imageL[idx]; + idx = idx - d; + + diffa[0] += SQ(leftPixel1 - imageR[idx - 0]); + diffa[1] += SQ(leftPixel1 - imageR[idx - 1]); + diffa[2] += SQ(leftPixel1 - imageR[idx - 2]); + diffa[3] += SQ(leftPixel1 - imageR[idx - 3]); + diffa[4] += SQ(leftPixel1 - imageR[idx - 4]); + diffa[5] += SQ(leftPixel1 - imageR[idx - 5]); + diffa[6] += SQ(leftPixel1 - imageR[idx - 6]); + diffa[7] += SQ(leftPixel1 - imageR[idx - 7]); + + y_tex += 1; + } + + col_ssd[0 * SHARED_MEM_SIZE] = diffa[0]; + col_ssd[1 * SHARED_MEM_SIZE] = diffa[1]; + col_ssd[2 * SHARED_MEM_SIZE] = diffa[2]; + col_ssd[3 * SHARED_MEM_SIZE] = diffa[3]; + col_ssd[4 * SHARED_MEM_SIZE] = diffa[4]; + col_ssd[5 * SHARED_MEM_SIZE] = diffa[5]; + col_ssd[6 * SHARED_MEM_SIZE] = diffa[6]; + col_ssd[7 * SHARED_MEM_SIZE] = diffa[7]; +} + +extern "C" __global__ void stereoKernel(uchar *left, uchar *right, size_t img_step, uchar* disp, size_t disp_pitch, int maxdisp) +{ + extern __shared__ unsigned int col_ssd_cache[]; + unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x; + unsigned int *col_ssd_extra = threadIdx.x < N_DIRTY_PIXELS ? col_ssd + BLOCK_W : 0; + + //#define X (blockIdx.x * BLOCK_W + threadIdx.x + STEREO_MAXD) + int X = (blockIdx.x * BLOCK_W + threadIdx.x + maxdisp); + //#define Y (__mul24(blockIdx.y, ROWSperTHREAD) + RADIUS) + #define Y (blockIdx.y * ROWSperTHREAD + RADIUS) + //int Y = blockIdx.y * ROWSperTHREAD + RADIUS; + + unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step; + uchar* disparImage = disp + X + Y * disp_pitch; + /* if (X < cwidth) + { + unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step; + for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step ) + *ptr = 0xFFFFFFFF; + }*/ + int end_row = min(ROWSperTHREAD, cheight - Y); + int y_tex; + int x_tex = X - RADIUS; + for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP) + { + y_tex = Y - RADIUS; + + InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd); + + if (col_ssd_extra > 0) + InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra); + + __syncthreads(); //before MinSSD function + + if (X < cwidth - RADIUS && Y < cheight - RADIUS) + { + uint2 minSSD = MinSSD(col_ssd_cache + threadIdx.x, col_ssd); + if (minSSD.x < minSSDImage[0]) + { + disparImage[0] = (unsigned char)(d + minSSD.y); + minSSDImage[0] = minSSD.x; + } + } + + for(int row = 1; row < end_row; row++) + { + int idx1 = y_tex * img_step + x_tex; + int idx2 = (y_tex + WINSZ) * img_step + x_tex; + + __syncthreads(); + + StepDown(idx1, idx2, left, right, d, col_ssd); + + if (col_ssd_extra) + StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra); + + y_tex += 1; + + __syncthreads(); //before MinSSD function + + if (X < cwidth - RADIUS && row < cheight - RADIUS - Y) + { + int idx = row * cminSSD_step; + uint2 minSSD = MinSSD(col_ssd_cache + threadIdx.x, col_ssd); + if (minSSD.x < minSSDImage[idx]) + { + disparImage[disp_pitch * row] = (unsigned char)(d + minSSD.y); + minSSDImage[idx] = minSSD.x; + } + } + } // for row loop + } // for d loop +} + +} + +extern "C" void cv::gpu::impl::stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_& minSSD_buf) +{ + //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) ); + //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) ); + + size_t smem_size = (BLOCK_W + N_DISPARITIES * SHARED_MEM_SIZE) * sizeof(unsigned int); + + cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp. rows) ); + cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp. rows) ); + + dim3 grid(1,1,1); + dim3 threads(BLOCK_W, 1, 1); + + grid.x = divUp(left.cols - maxdisp - 2 * RADIUS, BLOCK_W); + grid.y = divUp(left.rows - 2 * RADIUS, ROWSperTHREAD); + + cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof (left.cols) ) ); + cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof (left.rows) ) ); + cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.ptr, sizeof (minSSD_buf.ptr) ) ); + + size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize(); + cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof (minssd_step) ) ); + + device_code::stereoKernel<<>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp); + cudaSafeCall( cudaThreadSynchronize() ); +} \ No newline at end of file diff --git a/modules/gpu/cuda/cuda_shared.hpp b/modules/gpu/cuda/cuda_shared.hpp new file mode 100644 index 0000000000..4da7dcdfd7 --- /dev/null +++ b/modules/gpu/cuda/cuda_shared.hpp @@ -0,0 +1,65 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_CUDA_SHARED_HPP__ +#define __OPENCV_CUDA_SHARED_HPP__ + +#include "opencv2/gpu/devmem2d.hpp" + +namespace cv +{ + namespace gpu + { + typedef unsigned char uchar; + typedef unsigned short ushort; + typedef unsigned int uint; + + namespace impl + { + static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } + + extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_& minSSD_buf); + } + } +} + +#endif /* __OPENCV_CUDA_SHARED_HPP__ */ diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp new file mode 100644 index 0000000000..b1bbd4c80f --- /dev/null +++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp @@ -0,0 +1,73 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_DEVMEM2D_HPP__ +#define __OPENCV_GPU_DEVMEM2D_HPP__ + +namespace cv +{ + namespace gpu + { + // Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes. + // It is intended to pass to nvcc-compiled code. + + template + struct DevMem2D_ + { + enum { elem_size = sizeof(T) }; + + int cols; + int rows; + T* ptr; + size_t step; + + DevMem2D_(int rows_, int cols_, T *ptr_, size_t step_) + : cols(cols_), rows(rows_), ptr(ptr_), step(step_) {} + + size_t elemSize() const { return elem_size; } + }; + + typedef DevMem2D_<> DevMem2D; + } +} + +#endif /* __OPENCV_GPU_DEVMEM2D_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp new file mode 100644 index 0000000000..989aa15d3e --- /dev/null +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -0,0 +1,276 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_HPP__ +#define __OPENCV_GPU_HPP__ + +#include "opencv2/core/core.hpp" +#include "opencv2/gpu/devmem2d.hpp" + +namespace cv +{ + namespace gpu + { + //////////////////////////////// Initialization //////////////////////// + + CV_EXPORTS int getCudaEnabledDeviceCount(); + CV_EXPORTS string getDeviceName(int device); + CV_EXPORTS void setDevice(int device); + + enum { CV_GPU_CC_10, CV_GPU_CC_11, CV_GPU_CC_12, CV_GPU_CC_13, CV_GPU_CC_20 }; + + CV_EXPORTS int getComputeCapability(int device); + CV_EXPORTS int getNumberOfSMs(int device); + + //////////////////////////////// GpuMat //////////////////////////////// + + class CV_EXPORTS GpuMat + { + public: + //! default constructor + GpuMat(); + //! constructs GpuMatrix of the specified size and type + // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) + GpuMat(int _rows, int _cols, int _type); + GpuMat(Size _size, int _type); + //! constucts GpuMatrix and fills it with the specified value _s. + GpuMat(int _rows, int _cols, int _type, const Scalar& _s); + GpuMat(Size _size, int _type, const Scalar& _s); + //! copy constructor + GpuMat(const GpuMat& m); + + //! constructor for GpuMatrix headers pointing to user-allocated data + GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step = Mat::AUTO_STEP); + GpuMat(Size _size, int _type, void* _data, size_t _step = Mat::AUTO_STEP); + + //! creates a matrix header for a part of the bigger matrix + GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange); + GpuMat(const GpuMat& m, const Rect& roi); + + //! builds GpuMat from Mat. Perfom blocking upload to device. + GpuMat (const Mat& m); + + //! destructor - calls release() + ~GpuMat(); + + //! assignment operators + GpuMat& operator = (const GpuMat& m); + //! assignment operator. Perfom blocking upload to device. + GpuMat& operator = (const Mat& m); + + //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code. + // Contains just image size, data ptr and step. + template operator DevMem2D_() const; + + //! pefroms blocking upload data to GpuMat. . + void upload(const cv::Mat& m); + + //! Downloads data from device to host memory. Blocking calls. + operator Mat() const; + void download(cv::Mat& m) const; + + //! returns a new GpuMatrix header for the specified row + GpuMat row(int y) const; + //! returns a new GpuMatrix header for the specified column + GpuMat col(int x) const; + //! ... for the specified row span + GpuMat rowRange(int startrow, int endrow) const; + GpuMat rowRange(const Range& r) const; + //! ... for the specified column span + GpuMat colRange(int startcol, int endcol) const; + GpuMat colRange(const Range& r) const; + + //! returns deep copy of the GpuMatrix, i.e. the data is copied + GpuMat clone() const; + //! copies the GpuMatrix content to "m". + // It calls m.create(this->size(), this->type()). + void copyTo( GpuMat& m ) const; + //! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements. + void copyTo( GpuMat& m, const GpuMat& mask ) const; + //! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale. + void convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const; + + void assignTo( GpuMat& m, int type=-1 ) const; + + //! sets every GpuMatrix element to s + GpuMat& operator = (const Scalar& s); + //! sets some of the GpuMatrix elements to s, according to the mask + GpuMat& setTo(const Scalar& s, const GpuMat& mask=GpuMat()); + //! creates alternative GpuMatrix header for the same data, with different + // number of channels and/or different number of rows. see cvReshape. + GpuMat reshape(int _cn, int _rows=0) const; + + //! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int _rows, int _cols, int _type); + void create(Size _size, int _type); + //! decreases reference counter; + // deallocate the data when reference counter reaches 0. + void release(); + + //! swaps with other smart pointer + void swap(GpuMat& mat); + + //! locates GpuMatrix header within a parent GpuMatrix. See below + void locateROI( Size& wholeSize, Point& ofs ) const; + //! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix. + GpuMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + //! extracts a rectangular sub-GpuMatrix + // (this is a generalized form of row, rowRange etc.) + GpuMat operator()( Range rowRange, Range colRange ) const; + GpuMat operator()( const Rect& roi ) const; + + //! returns true iff the GpuMatrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_GpuMat_CONT(cvGpuMat->type) + bool isContinuous() const; + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvMat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + //! returns element type, similar to CV_MAT_TYPE(cvMat->type) + int type() const; + //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) + int depth() const; + //! returns element type, similar to CV_MAT_CN(cvMat->type) + int channels() const; + //! returns step/elemSize1() + size_t step1() const; + //! returns GpuMatrix size: + // width == number of columns, height == number of rows + Size size() const; + //! returns true if GpuMatrix data is NULL + bool empty() const; + + //! returns pointer to y-th row + uchar* ptr(int y=0); + const uchar* ptr(int y=0) const; + + //! template version of the above method + template _Tp* ptr(int y=0); + template const _Tp* ptr(int y=0) const; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the number of rows and columns + int rows, cols; + //! a distance between successive rows in bytes; includes the gap if any + size_t step; + //! pointer to the data + uchar* data; + + //! pointer to the reference counter; + // when GpuMatrix points to user-allocated data, the pointer is NULL + int* refcount; + + //! helper fields used in locateROI and adjustROI + uchar* datastart; + uchar* dataend; + }; + + //////////////////////////////// CudaStream //////////////////////////////// + + class CudaStream + { + public: + CudaStream(); + ~CudaStream(); + + bool queryIfComplete(); + void waitForCompletion(); + + //calls cudaMemcpyAsync + void enqueueDownload(const GpuMat& src, Mat& dst); + void enqueueUpload(const Mat& src, GpuMat& dst); + void enqueueCopy(const GpuMat& src, GpuMat& dst); + + // calls cudaMemset2D asynchronous for single channel. Invoke kernel for some multichannel. + void enqueueMemSet(const GpuMat& src, Scalar val); + + // invoke kernel asynchronous because of mask + void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); + + // converts matrix type, ex from float to uchar depending on type + void enqueueConvert(const GpuMat& src, GpuMat& dst, int type); + + //CUstream_st& getStream(); + private: + void *impl; + + CudaStream(const CudaStream&); + CudaStream& operator=(const CudaStream&); + }; + + //////////////////////////////// StereoBM_GPU //////////////////////////////// + + class CV_EXPORTS StereoBM_GPU + { + public: + enum { BASIC_PRESET=0, PREFILTER_XSOBEL = 1 }; + + //! the default constructor + StereoBM_GPU(); + //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size + //! ndisparities should be multiple of 8. SSD WindowsSize is fixed to 19 now + StereoBM_GPU(int preset, int ndisparities=0); + //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair + //! Output disparity has CV_8U type. + void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const; + private: + mutable GpuMat minSSD; + int preset; + int ndisp; + }; + } +} + + + +#include "opencv2/gpu/gpumat.hpp" + +#endif /* __OPENCV_GPU_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/include/opencv2/gpu/gpumat.hpp b/modules/gpu/include/opencv2/gpu/gpumat.hpp new file mode 100644 index 0000000000..922ff8d824 --- /dev/null +++ b/modules/gpu/include/opencv2/gpu/gpumat.hpp @@ -0,0 +1,350 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ +#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ + + +namespace cv +{ + +namespace gpu +{ + +//////////////////////////////// GpuMat //////////////////////////////// + +inline GpuMat::GpuMat() + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} + +inline GpuMat::GpuMat(int _rows, int _cols, int _type) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if( _rows > 0 && _cols > 0 ) + create( _rows, _cols, _type ); +} + +inline GpuMat::GpuMat(Size _size, int _type) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if( _size.height > 0 && _size.width > 0 ) + create( _size.height, _size.width, _type ); +} + +inline GpuMat::GpuMat(int _rows, int _cols, int _type, const Scalar& _s) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if(_rows > 0 && _cols > 0) + { + create(_rows, _cols, _type); + *this = _s; + } +} + +inline GpuMat::GpuMat(Size _size, int _type, const Scalar& _s) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if( _size.height > 0 && _size.width > 0 ) + { + create( _size.height, _size.width, _type ); + *this = _s; + } +} + +inline GpuMat::GpuMat(const GpuMat& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) +{ + if( refcount ) + CV_XADD(refcount, 1); +} + +inline GpuMat::GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step) + : flags(Mat::MAGIC_VAL + (_type & TYPE_MASK)), rows(_rows), cols(_cols), step(_step), data((uchar*)_data), refcount(0), + datastart((uchar*)_data), dataend((uchar*)_data) +{ + size_t minstep = cols*elemSize(); + if( step == Mat::AUTO_STEP ) + { + step = minstep; + flags |= Mat::CONTINUOUS_FLAG; + } + else + { + if( rows == 1 ) step = minstep; + CV_DbgAssert( step >= minstep ); + flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; + } + dataend += step*(rows-1) + minstep; +} + +inline GpuMat::GpuMat(Size _size, int _type, void* _data, size_t _step) + : flags(Mat::MAGIC_VAL + (_type & TYPE_MASK)), rows(_size.height), cols(_size.width), + step(_step), data((uchar*)_data), refcount(0), + datastart((uchar*)_data), dataend((uchar*)_data) +{ + size_t minstep = cols*elemSize(); + if( step == Mat::AUTO_STEP ) + { + step = minstep; + flags |= Mat::CONTINUOUS_FLAG; + } + else + { + if( rows == 1 ) step = minstep; + CV_DbgAssert( step >= minstep ); + flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; + } + dataend += step*(rows-1) + minstep; +} + + +inline GpuMat::GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange) +{ + flags = m.flags; + step = m.step; refcount = m.refcount; + data = m.data; datastart = m.datastart; dataend = m.dataend; + + if( rowRange == Range::all() ) + rows = m.rows; + else + { + CV_Assert( 0 <= rowRange.start && rowRange.start <= rowRange.end && rowRange.end <= m.rows ); + rows = rowRange.size(); + data += step*rowRange.start; + } + + if( colRange == Range::all() ) + cols = m.cols; + else + { + CV_Assert( 0 <= colRange.start && colRange.start <= colRange.end && colRange.end <= m.cols ); + cols = colRange.size(); + data += colRange.start*elemSize(); + flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; + } + + if( rows == 1 ) + flags |= Mat::CONTINUOUS_FLAG; + + if( refcount ) + CV_XADD(refcount, 1); + if( rows <= 0 || cols <= 0 ) + rows = cols = 0; +} + +inline GpuMat::GpuMat(const GpuMat& m, const Rect& roi) + : flags(m.flags), rows(roi.height), cols(roi.width), + step(m.step), data(m.data + roi.y*step), refcount(m.refcount), + datastart(m.datastart), dataend(m.dataend) +{ + flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; + data += roi.x*elemSize(); + CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && + 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); + if( refcount ) + CV_XADD(refcount, 1); + if( rows <= 0 || cols <= 0 ) + rows = cols = 0; +} + +inline GpuMat::GpuMat(const Mat& m) +: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { upload(m); } + +inline GpuMat::~GpuMat() { release(); } + +inline GpuMat& GpuMat::operator = (const GpuMat& m) +{ + if( this != &m ) + { + if( m.refcount ) + CV_XADD(m.refcount, 1); + release(); + flags = m.flags; + rows = m.rows; cols = m.cols; + step = m.step; data = m.data; + datastart = m.datastart; dataend = m.dataend; + refcount = m.refcount; + } + return *this; +} + +inline GpuMat& GpuMat::operator = (const Mat& m) { upload(m); return *this; } + +template inline GpuMat::operator DevMem2D_() const { return DevMem2D_(rows, cols, (T*)data, step); } + +//CPP: void GpuMat::upload(const Mat& m); + + inline GpuMat::operator Mat() const + { + Mat m; + download(m); + return m; + } + +//CPP void GpuMat::download(cv::Mat& m) const; + +inline GpuMat GpuMat::row(int y) const { return GpuMat(*this, Range(y, y+1), Range::all()); } +inline GpuMat GpuMat::col(int x) const { return GpuMat(*this, Range::all(), Range(x, x+1)); } +inline GpuMat GpuMat::rowRange(int startrow, int endrow) const { return GpuMat(*this, Range(startrow, endrow), Range::all()); } +inline GpuMat GpuMat::rowRange(const Range& r) const { return GpuMat(*this, r, Range::all()); } +inline GpuMat GpuMat::colRange(int startcol, int endcol) const { return GpuMat(*this, Range::all(), Range(startcol, endcol)); } +inline GpuMat GpuMat::colRange(const Range& r) const { return GpuMat(*this, Range::all(), r); } + +inline GpuMat GpuMat::clone() const +{ + GpuMat m; + copyTo(m); + return m; +} + +//CPP void GpuMat::copyTo( GpuMat& m ) const; +//CPP void GpuMat::copyTo( GpuMat& m, const GpuMat& mask ) const; +//CPP void GpuMat::convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const; + +inline void GpuMat::assignTo( GpuMat& m, int type ) const +{ + if( type < 0 ) + m = *this; + else + convertTo(m, type); +} + +//CPP GpuMat& GpuMat::operator = (const Scalar& s); +//CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat()); + +//CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const; + +//CPP void GpuMat::create(int _rows, int _cols, int _type); +inline void GpuMat::create(Size _size, int _type) { create(_size.height, _size.width, _type); } + +//CPP void GpuMat::release(); + +inline void GpuMat::swap(GpuMat& b) +{ + std::swap( flags, b.flags ); + std::swap( rows, b.rows ); std::swap( cols, b.cols ); + std::swap( step, b.step ); std::swap( data, b.data ); + std::swap( datastart, b.datastart ); + std::swap( dataend, b.dataend ); + std::swap( refcount, b.refcount ); +} + +inline void GpuMat::locateROI( Size& wholeSize, Point& ofs ) const +{ + size_t esz = elemSize(), minstep; + ptrdiff_t delta1 = data - datastart, delta2 = dataend - datastart; + CV_DbgAssert( step > 0 ); + if( delta1 == 0 ) + ofs.x = ofs.y = 0; + else + { + ofs.y = (int)(delta1/step); + ofs.x = (int)((delta1 - step*ofs.y)/esz); + CV_DbgAssert( data == datastart + ofs.y*step + ofs.x*esz ); + } + minstep = (ofs.x + cols)*esz; + wholeSize.height = (int)((delta2 - minstep)/step + 1); + wholeSize.height = std::max(wholeSize.height, ofs.y + rows); + wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz); + wholeSize.width = std::max(wholeSize.width, ofs.x + cols); +} + +inline GpuMat& GpuMat::adjustROI( int dtop, int dbottom, int dleft, int dright ) +{ + Size wholeSize; Point ofs; + size_t esz = elemSize(); + locateROI( wholeSize, ofs ); + int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); + int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); + data += (row1 - ofs.y)*step + (col1 - ofs.x)*esz; + rows = row2 - row1; cols = col2 - col1; + if( esz*cols == step || rows == 1 ) + flags |= Mat::CONTINUOUS_FLAG; + else + flags &= ~Mat::CONTINUOUS_FLAG; + return *this; +} + +inline GpuMat GpuMat::operator()( Range rowRange, Range colRange ) const { return GpuMat(*this, rowRange, colRange); } +inline GpuMat GpuMat::operator()( const Rect& roi ) const { return GpuMat(*this, roi); } + +inline bool GpuMat::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; } +inline size_t GpuMat::elemSize() const { return CV_ELEM_SIZE(flags); } +inline size_t GpuMat::elemSize1() const { return CV_ELEM_SIZE1(flags); } +inline int GpuMat::type() const { return CV_MAT_TYPE(flags); } +inline int GpuMat::depth() const { return CV_MAT_DEPTH(flags); } +inline int GpuMat::channels() const { return CV_MAT_CN(flags); } +inline size_t GpuMat::step1() const { return step/elemSize1(); } +inline Size GpuMat::size() const { return Size(cols, rows); } +inline bool GpuMat::empty() const { return data == 0; } + +inline uchar* GpuMat::ptr(int y) +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step*y; +} + +inline const uchar* GpuMat::ptr(int y) const +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step*y; +} + +template inline _Tp* GpuMat::ptr(int y) +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return (_Tp*)(data + step*y); +} + +template inline const _Tp* GpuMat::ptr(int y) const +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return (const _Tp*)(data + step*y); +} + +static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); } + + +} /* end of namespace gpu */ + +} /* end of namespace cv */ + +#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/include/opencv2/gpu/matpl.hpp b/modules/gpu/include/opencv2/gpu/matpl.hpp new file mode 100644 index 0000000000..cecc01d229 --- /dev/null +++ b/modules/gpu/include/opencv2/gpu/matpl.hpp @@ -0,0 +1,265 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_MATPL_HPP__ +#define __OPENCV_GPU_MATPL_HPP__ + +#include "opencv2/core/core.hpp" + +namespace cv +{ + namespace gpu + { + + //////////////////////////////// MatPL //////////////////////////////// + + //class CV_EXPORTS MatPL : private Mat + //{ + //public: + // MatPL() {} + // MatPL(int _rows, int _cols, int _type) : Mat(_rows, _cols, _type) {} + // MatPL(Size _size, int _type) : Mat(_size, _type) {} + // + // Mat(int _rows, int _cols, int _type, const Scalar& _s) : Mat + // MatPL(Size _size, int _type, const Scalar& _s); + // //! copy constructor + // MatPL(const Mat& m); + // //! constructor for matrix headers pointing to user-allocated data + // MatPL(int _rows, int _cols, int _type, void* _data, size_t _step=AUTO_STEP); + // MatPL(Size _size, int _type, void* _data, size_t _step=AUTO_STEP); + // //! creates a matrix header for a part of the bigger matrix + // MatPL(const Mat& m, const Range& rowRange, const Range& colRange); + // MatPL(const Mat& m, const Rect& roi); + // //! converts old-style CvMat to the new matrix; the data is not copied by default + // Mat(const CvMat* m, bool copyData=false); + // MatPL converts old-style IplImage to the new matrix; the data is not copied by default + // MatPL(const IplImage* img, bool copyData=false); + // //! builds matrix from std::vector with or without copying the data + // template explicit Mat(const vector<_Tp>& vec, bool copyData=false); + // //! builds matrix from cv::Vec; the data is copied by default + // template explicit Mat(const Vec<_Tp, n>& vec, + // bool copyData=true); + // //! builds matrix from cv::Matx; the data is copied by default + // template explicit Mat(const Matx<_Tp, m, n>& mtx, + // bool copyData=true); + // //! builds matrix from a 2D point + // template explicit Mat(const Point_<_Tp>& pt); + // //! builds matrix from a 3D point + // template explicit Mat(const Point3_<_Tp>& pt); + // //! builds matrix from comma initializer + // template explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer); + // //! helper constructor to compile matrix expressions + // Mat(const MatExpr_Base& expr); + // //! destructor - calls release() + // ~Mat(); + // //! assignment operators + // Mat& operator = (const Mat& m); + // Mat& operator = (const MatExpr_Base& expr); + + // operator MatExpr_() const; + + // //! returns a new matrix header for the specified row + // Mat row(int y) const; + // //! returns a new matrix header for the specified column + // Mat col(int x) const; + // //! ... for the specified row span + // Mat rowRange(int startrow, int endrow) const; + // Mat rowRange(const Range& r) const; + // //! ... for the specified column span + // Mat colRange(int startcol, int endcol) const; + // Mat colRange(const Range& r) const; + // //! ... for the specified diagonal + // // (d=0 - the main diagonal, + // // >0 - a diagonal from the lower half, + // // <0 - a diagonal from the upper half) + // Mat diag(int d=0) const; + // //! constructs a square diagonal matrix which main diagonal is vector "d" + // static Mat diag(const Mat& d); + + // //! returns deep copy of the matrix, i.e. the data is copied + // Mat clone() const; + // //! copies the matrix content to "m". + // // It calls m.create(this->size(), this->type()). + // void copyTo( Mat& m ) const; + // //! copies those matrix elements to "m" that are marked with non-zero mask elements. + // void copyTo( Mat& m, const Mat& mask ) const; + // //! converts matrix to another datatype with optional scalng. See cvConvertScale. + // void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const; + + // void assignTo( Mat& m, int type=-1 ) const; + + // //! sets every matrix element to s + // Mat& operator = (const Scalar& s); + // //! sets some of the matrix elements to s, according to the mask + // Mat& setTo(const Scalar& s, const Mat& mask=Mat()); + // //! creates alternative matrix header for the same data, with different + // // number of channels and/or different number of rows. see cvReshape. + // Mat reshape(int _cn, int _rows=0) const; + + // //! matrix transposition by means of matrix expressions + // MatExpr_ >, Mat> + // t() const; + // //! matrix inversion by means of matrix expressions + // MatExpr_ >, Mat> + // inv(int method=DECOMP_LU) const; + // MatExpr_ >, Mat> + // //! per-element matrix multiplication by means of matrix expressions + // mul(const Mat& m, double scale=1) const; + // MatExpr_ >, Mat> + // mul(const MatExpr_ >, Mat>& m, double scale=1) const; + // MatExpr_ >, Mat> + // mul(const MatExpr_ >, Mat>& m, double scale=1) const; + + // //! computes cross-product of 2 3D vectors + // Mat cross(const Mat& m) const; + // //! computes dot-product + // double dot(const Mat& m) const; + + // //! Matlab-style matrix initialization + // static MatExpr_Initializer zeros(int rows, int cols, int type); + // static MatExpr_Initializer zeros(Size size, int type); + // static MatExpr_Initializer ones(int rows, int cols, int type); + // static MatExpr_Initializer ones(Size size, int type); + // static MatExpr_Initializer eye(int rows, int cols, int type); + // static MatExpr_Initializer eye(Size size, int type); + + // //! allocates new matrix data unless the matrix already has specified size and type. + // // previous data is unreferenced if needed. + // void create(int _rows, int _cols, int _type); + // void create(Size _size, int _type); + // //! increases the reference counter; use with care to avoid memleaks + // void addref(); + // //! decreases reference counter; + // // deallocate the data when reference counter reaches 0. + // void release(); + + // //! locates matrix header within a parent matrix. See below + // void locateROI( Size& wholeSize, Point& ofs ) const; + // //! moves/resizes the current matrix ROI inside the parent matrix. + // Mat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + // //! extracts a rectangular sub-matrix + // // (this is a generalized form of row, rowRange etc.) + // Mat operator()( Range rowRange, Range colRange ) const; + // Mat operator()( const Rect& roi ) const; + + // //! converts header to CvMat; no data is copied + // operator CvMat() const; + // //! converts header to IplImage; no data is copied + // operator IplImage() const; + + // //! returns true iff the matrix data is continuous + // // (i.e. when there are no gaps between successive rows). + // // similar to CV_IS_MAT_CONT(cvmat->type) + // bool isContinuous() const; + // //! returns element size in bytes, + // // similar to CV_ELEM_SIZE(cvmat->type) + // size_t elemSize() const; + // //! returns the size of element channel in bytes. + // size_t elemSize1() const; + // //! returns element type, similar to CV_MAT_TYPE(cvmat->type) + // int type() const; + // //! returns element type, similar to CV_MAT_DEPTH(cvmat->type) + // int depth() const; + // //! returns element type, similar to CV_MAT_CN(cvmat->type) + // int channels() const; + // //! returns step/elemSize1() + // size_t step1() const; + // //! returns matrix size: + // // width == number of columns, height == number of rows + // Size size() const; + // //! returns true if matrix data is NULL + // bool empty() const; + + // //! returns pointer to y-th row + // uchar* ptr(int y=0); + // const uchar* ptr(int y=0) const; + + // //! template version of the above method + // template _Tp* ptr(int y=0); + // template const _Tp* ptr(int y=0) const; + + // //! template methods for read-write or read-only element access. + // // note that _Tp must match the actual matrix type - + // // the functions do not do any on-fly type conversion + // template _Tp& at(int y, int x); + // template _Tp& at(Point pt); + // template const _Tp& at(int y, int x) const; + // template const _Tp& at(Point pt) const; + // template _Tp& at(int i); + // template const _Tp& at(int i) const; + + // //! template methods for iteration over matrix elements. + // // the iterators take care of skipping gaps in the end of rows (if any) + // template MatIterator_<_Tp> begin(); + // template MatIterator_<_Tp> end(); + // template MatConstIterator_<_Tp> begin() const; + // template MatConstIterator_<_Tp> end() const; + + // enum { MAGIC_VAL=0x42FF0000, AUTO_STEP=0, CONTINUOUS_FLAG=CV_MAT_CONT_FLAG }; + + // /*! includes several bit-fields: + // - the magic signature + // - continuity flag + // - depth + // - number of channels + // */ + // int flags; + // //! the number of rows and columns + // int rows, cols; + // //! a distance between successive rows in bytes; includes the gap if any + // size_t step; + // //! pointer to the data + // uchar* data; + + // //! pointer to the reference counter; + // // when matrix points to user-allocated data, the pointer is NULL + // int* refcount; + + // //! helper fields used in locateROI and adjustROI + // uchar* datastart; + // uchar* dataend; + //}; + } +} + + +#endif /* __OPENCV_GPU_MATPL_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp new file mode 100644 index 0000000000..d083eed703 --- /dev/null +++ b/modules/gpu/src/cudastream.cpp @@ -0,0 +1,108 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + + +cv::gpu::CudaStream::CudaStream() : impl( fastMalloc(sizeof(cudaStream_t)) ) +{ + cudaSafeCall( cudaStreamCreate((cudaStream_t*)impl) ); +} +cv::gpu::CudaStream::~CudaStream() +{ + cudaSafeCall( cudaStreamDestroy( *(cudaStream_t*)impl ) ); + cv::fastFree( impl ); +} + +bool cv::gpu::CudaStream::queryIfComplete() +{ + cudaError_t err = cudaStreamQuery( *(cudaStream_t*)impl ); + + if (err == cudaSuccess) + return true; + + if (err == cudaErrorNotReady) + return false; + + //cudaErrorInvalidResourceHandle + cudaSafeCall( err ); + return true; +} +void cv::gpu::CudaStream::waitForCompletion() +{ + cudaSafeCall( cudaStreamSynchronize( *(cudaStream_t*)impl ) ); +} + +void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) +{ +// cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost, +} +void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) +{ + CV_Assert(!"Not implemented"); +} +void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) +{ + CV_Assert(!"Not implemented"); +} + +void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) +{ + CV_Assert(!"Not implemented"); +} + +void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) +{ + CV_Assert(!"Not implemented"); +} + +void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type) +{ + CV_Assert(!"Not implemented"); +} + +//struct cudaStream_t& cv::gpu::CudaStream::getStream() { return stream; } + + diff --git a/modules/gpu/src/gpumat.cpp b/modules/gpu/src/gpumat.cpp new file mode 100644 index 0000000000..dbbeb690dc --- /dev/null +++ b/modules/gpu/src/gpumat.cpp @@ -0,0 +1,185 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +//////////////////////////////// GpuMat //////////////////////////////// + +void GpuMat::upload(const Mat& m) +{ + CV_DbgAssert(!m.empty()); + create(m.size(), m.type()); + cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); +} + +void GpuMat::download(cv::Mat& m) const +{ + CV_DbgAssert(!this->empty()); + m.create(size(), type()); + cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); +} + +void GpuMat::copyTo( GpuMat& m ) const +{ + CV_DbgAssert(!this->empty()); + m.create(size(), type()); + cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); + cudaSafeCall( cudaThreadSynchronize() ); +} + +void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const +{ + CV_Assert(!"Not implemented"); +} + +void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const +{ + CV_Assert(!"Not implemented"); +} + +GpuMat& GpuMat::operator = (const Scalar& /*s*/) +{ + CV_Assert(!"Not implemented"); + return *this; +} + +GpuMat& GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/) +{ + CV_Assert(!"Not implemented"); + return *this; +} + + +GpuMat GpuMat::reshape(int new_cn, int new_rows) const +{ + GpuMat hdr = *this; + + int cn = channels(); + if( new_cn == 0 ) + new_cn = cn; + + int total_width = cols * cn; + + if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 ) + new_rows = rows * total_width / new_cn; + + if( new_rows != 0 && new_rows != rows ) + { + int total_size = total_width * rows; + if( !isContinuous() ) + CV_Error( CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed" ); + + if( (unsigned)new_rows > (unsigned)total_size ) + CV_Error( CV_StsOutOfRange, "Bad new number of rows" ); + + total_width = total_size / new_rows; + + if( total_width * new_rows != total_size ) + CV_Error( CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows" ); + + hdr.rows = new_rows; + hdr.step = total_width * elemSize1(); + } + + int new_width = total_width / new_cn; + + if( new_width * new_cn != total_width ) + CV_Error( CV_BadNumChannels, "The total width is not divisible by the new number of channels" ); + + hdr.cols = new_width; + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); + return hdr; +} + +void GpuMat::create(int _rows, int _cols, int _type) +{ + _type &= TYPE_MASK; + if( rows == _rows && cols == _cols && type() == _type && data ) + return; + if( data ) + release(); + CV_DbgAssert( _rows >= 0 && _cols >= 0 ); + if( _rows > 0 && _cols > 0 ) + { + flags = Mat::MAGIC_VAL + _type; + rows = _rows; + cols = _cols; + + size_t esz = elemSize(); + + void *dev_ptr; + cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) ); + + if (esz * cols == step) + flags |= Mat::CONTINUOUS_FLAG; + + int64 _nettosize = (int64)step*rows; + size_t nettosize = (size_t)_nettosize; + + datastart = data = (uchar*)dev_ptr; + dataend = data + nettosize; + + refcount = (int*)fastMalloc(sizeof(*refcount)); + *refcount = 1; + } +} + +void GpuMat::release() +{ + if( refcount && CV_XADD(refcount, -1) == 1 ) + { + fastFree(refcount); + cudaSafeCall( cudaFree(datastart) ); + } + data = datastart = dataend = 0; + step = rows = cols = 0; + refcount = 0; +} + + + + + + diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp new file mode 100644 index 0000000000..c1c3b9964a --- /dev/null +++ b/modules/gpu/src/initialization.cpp @@ -0,0 +1,93 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() +{ + int count; + cudaSafeCall( cudaGetDeviceCount( &count ) ); + return count; +} + +CV_EXPORTS string cv::gpu::getDeviceName(int device) +{ + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties( &prop, device) ); + return prop.name; +} + +CV_EXPORTS void cv::gpu::setDevice(int device) +{ + cudaSafeCall( cudaSetDevice( device ) ); +} + +CV_EXPORTS int cv::gpu::getComputeCapability(int device) +{ + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties( &prop, device) ); + + if (prop.major == 2) + return CV_GPU_CC_20; + + if (prop.major == 1) + switch (prop.minor) + { + case 0: return CV_GPU_CC_10; + case 1: return CV_GPU_CC_11; + case 2: return CV_GPU_CC_12; + case 3: return CV_GPU_CC_13; + } + + return -1; +} + + +CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) +{ + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties( &prop, device ) ); + return prop.multiProcessorCount; +} \ No newline at end of file diff --git a/modules/gpu/src/precomp.cpp b/modules/gpu/src/precomp.cpp new file mode 100644 index 0000000000..b4481e4f99 --- /dev/null +++ b/modules/gpu/src/precomp.cpp @@ -0,0 +1,44 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +/* End of file. */ diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp new file mode 100644 index 0000000000..e6cfbaa395 --- /dev/null +++ b/modules/gpu/src/precomp.hpp @@ -0,0 +1,88 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#ifndef __OPENCV_PRECOMP_H__ +#define __OPENCV_PRECOMP_H__ + +#if _MSC_VER >= 1200 +#pragma warning( disable: 4251 4710 4711 4514 4996 ) +#endif + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "opencv2/gpu/gpu.hpp" +#include "opencv2/gpu/gpumat.hpp" + +#include "cuda_shared.hpp" + +#include "cuda_runtime.h" + + +#define cudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__) + +//inline void __cudaSafeCall( cudaError err, const char *file, const int line ) +//{ +// if( cudaSuccess != err) +// CV_Error_(CV_StsAssert, ("%s(%i) : Runtime API error : %s.\n", cudaGetErrorString(err))); +//} + +namespace cv +{ + namespace gpu + { + + inline void __cudaSafeCall( cudaError err, const char *file, const int line ) + { + if( cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : cudaSafeCall() Runtime API error : %s.\n", file, line, cudaGetErrorString(err) ); + exit(-1); + } + } + + template + inline DevMem2D_ getDevMem(const GpuMat& mat) { return DevMem2D_(m.rows, m.cols, m.data, m.step); } + } +} + +#endif diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm_gpu.cpp new file mode 100644 index 0000000000..306e7e651c --- /dev/null +++ b/modules/gpu/src/stereobm_gpu.cpp @@ -0,0 +1,72 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include + +using namespace cv; +using namespace cv::gpu; + +StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {} +StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) +{ + CV_Assert(ndisp <= std::numeric_limits::max()); +} + +void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const +{ + CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); + CV_DbgAssert(left.type() == CV_8UC1); + CV_DbgAssert(right.type() == CV_8UC1); + + disparity.create(left.size(), CV_8U); + minSSD.create(left.size(), CV_32S); + + if (preset == PREFILTER_XSOBEL) + { + CV_Assert(!"Not implemented"); + } + + DevMem2D disp = disparity; + DevMem2D_ mssd = minSSD; + impl::stereoBM_GPU(left, right, disp, ndisp, mssd); +}