From 905e5f17399b205dfe04a6b05e266b031b290b48 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 Dec 2010 07:23:59 +0000 Subject: [PATCH] added support of 4-channels images to StereoConstantSpaceBP. refactored transpose_gpu, made it non template function. --- modules/gpu/src/arithm.cpp | 13 ++---------- modules/gpu/src/constantspacebp.cpp | 2 +- modules/gpu/src/cuda/constantspacebp.cu | 28 +++++++++++++++++++------ modules/gpu/src/cuda/mathfunc.cu | 17 ++++----------- tests/gpu/src/stereo_csbp.cpp | 3 +++ 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 0d9131416a..90edea1b8e 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -272,20 +272,11 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst) namespace cv { namespace gpu { namespace mathfunc { - template - void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); + void transpose_gpu(const DevMem2Di& src, const DevMem2Di& dst); }}} void cv::gpu::transpose(const GpuMat& src, GpuMat& dst) { - using namespace cv::gpu::mathfunc; - typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst); - static const func_t funcs[] = - { - transpose_gpu, transpose_gpu, transpose_gpu, transpose_gpu, - transpose_gpu, transpose_gpu - }; - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 || src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1); @@ -301,7 +292,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst) } else { - funcs[src.depth()](src, dst); + mathfunc::transpose_gpu(src, dst); } } diff --git a/modules/gpu/src/constantspacebp.cpp b/modules/gpu/src/constantspacebp.cpp index 29e0755730..d29c61cf96 100644 --- a/modules/gpu/src/constantspacebp.cpp +++ b/modules/gpu/src/constantspacebp.cpp @@ -141,7 +141,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2] CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane && left.rows == right.rows && left.cols == right.cols && left.type() == right.type()); - CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3)); + CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4)); const Scalar zero = Scalar::all(0); diff --git a/modules/gpu/src/cuda/constantspacebp.cu b/modules/gpu/src/cuda/constantspacebp.cu index b588b7a002..d3658d0f91 100644 --- a/modules/gpu/src/cuda/constantspacebp.cu +++ b/modules/gpu/src/cuda/constantspacebp.cu @@ -99,8 +99,15 @@ namespace cv { namespace gpu { namespace csbp /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// - template - struct DataCostPerPixel + template struct DataCostPerPixel; + template <> struct DataCostPerPixel<1> + { + static __device__ float compute(const uchar* left, const uchar* right) + { + return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term); + } + }; + template <> struct DataCostPerPixel<3> { static __device__ float compute(const uchar* left, const uchar* right) { @@ -111,13 +118,18 @@ namespace cv { namespace gpu { namespace csbp return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term); } }; - - template <> - struct DataCostPerPixel<1> + template <> struct DataCostPerPixel<4> { static __device__ float compute(const uchar* left, const uchar* right) { - return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term); + uchar4 l = *((const uchar4*)left); + uchar4 r = *((const uchar4*)right); + + float tb = 0.114f * abs((int)l.x - r.x); + float tg = 0.587f * abs((int)l.y - r.y); + float tr = 0.299f * abs((int)l.z - r.z); + + return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term); } }; @@ -327,6 +339,7 @@ namespace cv { namespace gpu { namespace csbp { case 1: init_data_cost<<>>(h, w, level); break; case 3: init_data_cost<<>>(h, w, level); break; + case 4: init_data_cost<<>>(h, w, level); break; default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); } } @@ -345,6 +358,7 @@ namespace cv { namespace gpu { namespace csbp { case 1: init_data_cost_reduce<<>>(level, rows, cols, h); break; case 3: init_data_cost_reduce<<>>(level, rows, cols, h); break; + case 4: init_data_cost_reduce<<>>(level, rows, cols, h); break; default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); } } @@ -517,6 +531,7 @@ namespace cv { namespace gpu { namespace csbp { case 1: compute_data_cost<<>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break; case 3: compute_data_cost<<>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break; + case 4: compute_data_cost<<>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break; default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); } } @@ -536,6 +551,7 @@ namespace cv { namespace gpu { namespace csbp { case 1: compute_data_cost_reduce<<>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; case 3: compute_data_cost_reduce<<>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; + case 4: compute_data_cost_reduce<<>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break; default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__); } } diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 71b70d37e8..cfacc7751e 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -1254,10 +1254,9 @@ namespace cv { namespace gpu { namespace mathfunc ////////////////////////////////////////////////////////////////////////////////////////////////////////// // transpose - template - __global__ void transpose(const DevMem2D_ src, PtrStep_ dst) + __global__ void transpose(const DevMem2Di src, PtrStepi dst) { - __shared__ T s_mem[16 * 17]; + __shared__ int s_mem[16 * 17]; int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -1280,22 +1279,14 @@ namespace cv { namespace gpu { namespace mathfunc } } - template - void transpose_gpu(const DevMem2D& src, const DevMem2D& dst) + void transpose_gpu(const DevMem2Di& src, const DevMem2Di& dst) { dim3 threads(16, 16, 1); dim3 grid(divUp(src.cols, 16), divUp(src.rows, 16), 1); - transpose<<>>((DevMem2D_)src, (DevMem2D_)dst); + transpose<<>>(src, dst); cudaSafeCall( cudaThreadSynchronize() ); } - - template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); - template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); - template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); - template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); - template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); - template void transpose_gpu(const DevMem2D& src, const DevMem2D& dst); ////////////////////////////////////////////////////////////////////////////////////////////////////////// // min/max diff --git a/tests/gpu/src/stereo_csbp.cpp b/tests/gpu/src/stereo_csbp.cpp index 6ea4327ff3..322a782a8e 100644 --- a/tests/gpu/src/stereo_csbp.cpp +++ b/tests/gpu/src/stereo_csbp.cpp @@ -62,6 +62,9 @@ struct CV_GpuStereoCSBPTest : public CvTest try { + {cv::Mat temp; cv::cvtColor(img_l, temp, CV_BGR2BGRA); cv::swap(temp, img_l);} + {cv::Mat temp; cv::cvtColor(img_r, temp, CV_BGR2BGRA); cv::swap(temp, img_r);} + cv::gpu::GpuMat disp; cv::gpu::StereoConstantSpaceBP bpm(128, 16, 4, 4);