reduce filter2d instantiates for tiny build

pull/3785/head
Vladislav Vinogradov 10 years ago
parent 00c36e88ef
commit 8e49ab1d3b
  1. 19
      modules/gpu/src/cuda/imgproc.cu
  2. 2
      modules/gpu/src/filtering.cpp

@ -985,6 +985,16 @@ namespace cv { namespace gpu { namespace device
int borderMode, const float* borderValue, cudaStream_t stream) int borderMode, const float* borderValue, cudaStream_t stream)
{ {
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
#ifdef OPENCV_TINY_GPU_MODULE
static const func_t funcs[] =
{
Filter2DCaller<T, D, BrdReflect101>::call,
Filter2DCaller<T, D, BrdReplicate>::call,
Filter2DCaller<T, D, BrdConstant>::call,
Filter2DCaller<T, D, BrdReflect>::call,
0
};
#else
static const func_t funcs[] = static const func_t funcs[] =
{ {
Filter2DCaller<T, D, BrdReflect101>::call, Filter2DCaller<T, D, BrdReflect101>::call,
@ -993,19 +1003,26 @@ namespace cv { namespace gpu { namespace device
Filter2DCaller<T, D, BrdReflect>::call, Filter2DCaller<T, D, BrdReflect>::call,
Filter2DCaller<T, D, BrdWrap>::call Filter2DCaller<T, D, BrdWrap>::call
}; };
#endif
const func_t func = funcs[borderMode];
if (!func)
cv::gpu::error("Unsupported input parameters for filter2D", __FILE__, __LINE__, "");
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); func(static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
} }
template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
#ifndef OPENCV_TINY_GPU_MODULE
template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
#endif
template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
} // namespace imgproc } // namespace imgproc

@ -789,12 +789,14 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const
case CV_8UC4: case CV_8UC4:
func = filter2D_gpu<uchar4, uchar4>; func = filter2D_gpu<uchar4, uchar4>;
break; break;
#ifndef OPENCV_TINY_GPU_MODULE
case CV_16UC1: case CV_16UC1:
func = filter2D_gpu<ushort, ushort>; func = filter2D_gpu<ushort, ushort>;
break; break;
case CV_16UC4: case CV_16UC4:
func = filter2D_gpu<ushort4, ushort4>; func = filter2D_gpu<ushort4, ushort4>;
break; break;
#endif
case CV_32FC1: case CV_32FC1:
func = filter2D_gpu<float, float>; func = filter2D_gpu<float, float>;
break; break;

Loading…
Cancel
Save