added reprojectImageTo3D_GPU

15 years ago · ec4ce050f9
parent f1fa323456
commit ec4ce050f9
3 changed files with 99 additions and 7 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -351,6 +351,8 @@ namespace cv

        CV_EXPORTS void colorizeDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp);

+        CV_EXPORTS void reprojectImageTo3D_GPU(const GpuMat& disp, GpuMat& xyzw, const Mat& Q);
+
        //////////////////////////////// StereoBM_GPU ////////////////////////////////

        class CV_EXPORTS StereoBM_GPU
@ -452,10 +454,10 @@ namespace cv
        class CV_EXPORTS StereoConstantSpaceBP
        {
        public:
-            enum { DEFAULT_NDISP    = 64 };
-            enum { DEFAULT_ITERS    = 5  };
-            enum { DEFAULT_LEVELS   = 5  };
-            enum { DEFAULT_NR_PLANE = 2  };
+            enum { DEFAULT_NDISP    = 128 };
+            enum { DEFAULT_ITERS    = 8   };
+            enum { DEFAULT_LEVELS   = 4   };
+            enum { DEFAULT_NR_PLANE = 4   };

            //! the default constructor
            explicit StereoConstantSpaceBP(int ndisp    = DEFAULT_NDISP,
@ -552,7 +554,7 @@ namespace cv
    //! Speckle filtering - filters small connected components on diparity image.
    //! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
    //! Threshold for border between CC is diffThreshold;
-    void filterSpeckles( Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);
+    CV_EXPORTS void filterSpeckles( Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);

 }
 #include "opencv2/gpu/matrix_operations.hpp"
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@ -296,7 +296,7 @@ namespace cv { namespace gpu { namespace impl
        grid.y = divUp(src.rows, threads.y);
         
        imgproc::colorizeDisp<<<grid, threads>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
-        cudaThreadSynchronize(); 
+        cudaSafeCall( cudaThreadSynchronize() ); 
    }

    void colorizeDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp)
@ -307,6 +307,71 @@ namespace cv { namespace gpu { namespace impl
        grid.y = divUp(src.rows, threads.y);
         
        imgproc::colorizeDisp<<<grid, threads>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
-        cudaThreadSynchronize();
+        cudaSafeCall( cudaThreadSynchronize() );
+    }
+}}}
+
+/////////////////////////////////// colorizeDisp ///////////////////////////////////////////////
+
+namespace imgproc
+{
+    __constant__ float cq[16];
+
+    template <typename T>
+    __global__ void reprojectImageTo3D(const T* disp, size_t disp_step, float* xyzw, size_t xyzw_step, int rows, int cols)
+    {        
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (y < rows && x < cols)
+        {
+
+            float qx = cq[1] * y + cq[3], qy = cq[5] * y + cq[7];
+            float qz = cq[9] * y + cq[11], qw = cq[13] * y + cq[15];
+
+            qx += x * cq[0]; 
+            qy += x * cq[4];
+            qz += x * cq[8];
+            qw += x * cq[12];
+
+            T d = *(disp + disp_step * y + x);
+
+            float iW = 1.f / (qw + cq[14] * d);
+            float4 v;
+            v.x = (qx + cq[2] * d) * iW;
+            v.y = (qy + cq[6] * d) * iW;
+            v.z = (qz + cq[10] * d) * iW;
+            v.w = 1.f;
+
+            *(float4*)(xyzw + xyzw_step * y + (x * 4)) = v;
+        }
+    }
+}
+
+namespace cv { namespace gpu { namespace impl 
+{
+    template <typename T>
+    inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q)
+    {
+        dim3 threads(32, 8, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(disp.cols, threads.x);
+        grid.y = divUp(disp.rows, threads.y);
+
+        cudaSafeCall( cudaMemcpyToSymbol(imgproc::cq, q, 16 * sizeof(float)) );
+
+        imgproc::reprojectImageTo3D<<<grid, threads>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
+
+        cudaSafeCall( cudaThreadSynchronize() );
+    }
+
+    void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q)
+    {
+        reprojectImageTo3D_caller(disp, xyzw, q);
+    }
+
+    void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q)
+    {
+        reprojectImageTo3D_caller(disp, xyzw, q);
    }
 }}}
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@ -50,6 +50,7 @@ using namespace cv::gpu;
 void cv::gpu::remap(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, int, int, TermCriteria ) { throw_nogpu(); }
 void cv::gpu::colorizeDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
+void cv::gpu::reprojectImageTo3D_GPU(const GpuMat&, GpuMat&, const Mat&) { throw_nogpu(); }

 #else /* !defined (HAVE_CUDA) */

@ -63,6 +64,9 @@ namespace cv { namespace gpu

        void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp);
        void colorizeDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp);
+
+        void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q);
+        void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q);
    }
 }}

@ -127,4 +131,25 @@ void cv::gpu::colorizeDisp(const GpuMat& src, GpuMat& dst, int ndisp)
    dst = out;
 }

+namespace
+{
+    template <typename T>
+    void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q)
+    {
+        impl::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>());
+    }
+}
+
+void cv::gpu::reprojectImageTo3D_GPU(const GpuMat& disp, GpuMat& xyzw, const Mat& Q)
+{
+    typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q);
+
+    static const reprojectImageTo3D_caller_t callers[] = {reprojectImageTo3D_caller<uchar>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};
+    CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);
+
+    xyzw.create(disp.rows, disp.cols, CV_32FC4);
+    
+    callers[disp.type()](disp, xyzw, Q);
+}
+
 #endif /* !defined (HAVE_CUDA) */