diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp index ebf764f637..21a03dc209 100644 --- a/modules/gpu/include/opencv2/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu.hpp @@ -55,142 +55,6 @@ #include "opencv2/features2d.hpp" namespace cv { namespace gpu { - -//////////////////////////////// CudaMem //////////////////////////////// -// CudaMem is limited cv::Mat with page locked memory allocation. -// Page locked memory is only needed for async and faster coping to GPU. -// It is convertable to cv::Mat header without reference counting -// so you can use it with other opencv functions. - -// Page-locks the matrix m memory and maps it for the device(s) -CV_EXPORTS void registerPageLocked(Mat& m); -// Unmaps the memory of matrix m, and makes it pageable again. -CV_EXPORTS void unregisterPageLocked(Mat& m); - -class CV_EXPORTS CudaMem -{ -public: - enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 }; - - CudaMem(); - CudaMem(const CudaMem& m); - - CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED); - CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); - - - //! creates from cv::Mat with coping data - explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED); - - ~CudaMem(); - - CudaMem& operator = (const CudaMem& m); - - //! returns deep copy of the matrix, i.e. the data is copied - CudaMem clone() const; - - //! allocates new matrix data unless the matrix already has specified size and type. - void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED); - void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); - - //! decrements reference counter and released memory if needed. - void release(); - - //! returns matrix header with disabled reference counting for CudaMem data. - Mat createMatHeader() const; - operator Mat() const; - - //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware. - GpuMat createGpuMatHeader() const; - operator GpuMat() const; - - //returns if host memory can be mapperd to gpu address space; - static bool canMapHostMemory(); - - // Please see cv::Mat for descriptions - bool isContinuous() const; - size_t elemSize() const; - size_t elemSize1() const; - int type() const; - int depth() const; - int channels() const; - size_t step1() const; - Size size() const; - bool empty() const; - - - // Please see cv::Mat for descriptions - int flags; - int rows, cols; - size_t step; - - uchar* data; - int* refcount; - - uchar* datastart; - uchar* dataend; - - int alloc_type; -}; - -//////////////////////////////// CudaStream //////////////////////////////// -// Encapculates Cuda Stream. Provides interface for async coping. -// Passed to each function that supports async kernel execution. -// Reference counting is enabled - -class CV_EXPORTS Stream -{ -public: - Stream(); - ~Stream(); - - Stream(const Stream&); - Stream& operator =(const Stream&); - - bool queryIfComplete(); - void waitForCompletion(); - - //! downloads asynchronously - // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat) - void enqueueDownload(const GpuMat& src, CudaMem& dst); - void enqueueDownload(const GpuMat& src, Mat& dst); - - //! uploads asynchronously - // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI) - void enqueueUpload(const CudaMem& src, GpuMat& dst); - void enqueueUpload(const Mat& src, GpuMat& dst); - - //! copy asynchronously - void enqueueCopy(const GpuMat& src, GpuMat& dst); - - //! memory set asynchronously - void enqueueMemSet(GpuMat& src, Scalar val); - void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask); - - //! converts matrix type, ex from float to uchar depending on type - void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0); - - //! adds a callback to be called on the host after all currently enqueued items in the stream have completed - typedef void (*StreamCallback)(Stream& stream, int status, void* userData); - void enqueueHostCallback(StreamCallback callback, void* userData); - - static Stream& Null(); - - operator bool() const; - -private: - struct Impl; - - explicit Stream(Impl* impl); - void create(); - void release(); - - Impl *impl; - - friend struct StreamAccessor; -}; - - //////////////////////////////// Filter Engine //////////////////////////////// /*! @@ -1522,97 +1386,6 @@ private: friend class CascadeClassifier_GPU_LBP; }; -// ======================== GPU version for soft cascade ===================== // - -class CV_EXPORTS ChannelsProcessor -{ -public: - enum - { - GENERIC = 1 << 4, - SEPARABLE = 2 << 4 - }; - - // Appends specified number of HOG first-order features integrals into given vector. - // Param frame is an input 3-channel bgr image. - // Param channels is a GPU matrix of optionally shrinked channels - // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution. - virtual void apply(InputArray frame, OutputArray channels, Stream& stream = Stream::Null()) = 0; - - // Creates a specific preprocessor implementation. - // Param shrinkage is a resizing factor. Resize is applied before the computing integral sum - // Param bins is a number of HOG-like channels. - // Param flags is a channel computing extra flags. - static cv::Ptr create(const int shrinkage, const int bins, const int flags = GENERIC); - - virtual ~ChannelsProcessor(); - -protected: - ChannelsProcessor(); -}; - -// Implementation of soft (stage-less) cascaded detector. -class CV_EXPORTS SCascade : public cv::Algorithm -{ -public: - - // Representation of detectors result. - struct CV_EXPORTS Detection - { - ushort x; - ushort y; - ushort w; - ushort h; - float confidence; - int kind; - - enum {PEDESTRIAN = 0}; - }; - - enum { NO_REJECT = 1, DOLLAR = 2, /*PASCAL = 4,*/ DEFAULT = NO_REJECT, NMS_MASK = 0xF}; - - // An empty cascade will be created. - // Param minScale is a minimum scale relative to the original size of the image on which cascade will be applied. - // Param minScale is a maximum scale relative to the original size of the image on which cascade will be applied. - // Param scales is a number of scales from minScale to maxScale. - // Param flags is an extra tuning flags. - SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55, - const int flags = NO_REJECT || ChannelsProcessor::GENERIC); - - virtual ~SCascade(); - - cv::AlgorithmInfo* info() const; - - // Load cascade from FileNode. - // Param fn is a root node for cascade. Should be . - virtual bool load(const FileNode& fn); - - // Load cascade config. - virtual void read(const FileNode& fn); - - // Return the matrix of of detected objects. - // Param image is a frame on which detector will be applied. - // Param rois is a regions of interests mask generated by genRoi. - // Only the objects that fall into one of the regions will be returned. - // Param objects is an output array of Detections represented as GpuMat of detections (SCascade::Detection) - // The first element of the matrix is actually a count of detections. - // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution - virtual void detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const; - -private: - - struct Fields; - Fields* fields; - - double minScale; - double maxScale; - int scales; - - int flags; -}; - -CV_EXPORTS bool initModule_gpu(void); - ////////////////////////////////// SURF ////////////////////////////////////////// class CV_EXPORTS SURF_GPU diff --git a/modules/softcascade/CMakeLists.txt b/modules/softcascade/CMakeLists.txt index 87798d2997..d558e8d295 100644 --- a/modules/softcascade/CMakeLists.txt +++ b/modules/softcascade/CMakeLists.txt @@ -1,3 +1,3 @@ set(the_description "Soft Cascade detection and training") +ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4310 -Wundef -Wsign-promo -Wmissing-declarations -Wmissing-prototypes) ocv_define_module(softcascade opencv_core opencv_imgproc opencv_ml) -ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4310 -Wundef) diff --git a/modules/softcascade/doc/softcascade_cuda.rst b/modules/softcascade/doc/softcascade_cuda.rst index 504774898b..92b3bf6bee 100644 --- a/modules/softcascade/doc/softcascade_cuda.rst +++ b/modules/softcascade/doc/softcascade_cuda.rst @@ -30,28 +30,28 @@ Implementation of soft (stageless) cascaded detector. :: softcascade::SCascade::~SCascade ---------------------------- +--------------------------------- Destructor for SCascade. -.. ocv:function:: gpu::SCascade::~SCascade() +.. ocv:function:: softcascade::SCascade::~SCascade() softcascade::SCascade::load --------------------------- +---------------------------- Load cascade from FileNode. -.. ocv:function:: bool gpu::SCascade::load(const FileNode& fn) +.. ocv:function:: bool softcascade::SCascade::load(const FileNode& fn) :param fn: File node from which the soft cascade are read. softcascade::SCascade::detect --------------------------- +------------------------------ Apply cascade to an input frame and return the vector of Decection objcts. -.. ocv:function:: void gpu::SCascade::detect(InputArray image, InputArray rois, OutputArray objects, Stream& stream = Stream::Null()) const +.. ocv:function:: void softcascade::SCascade::detect(InputArray image, InputArray rois, OutputArray objects, cv::gpu::Stream& stream = cv::gpu::Stream::Null()) const :param image: a frame on which detector will be applied. diff --git a/modules/softcascade/include/opencv2/softcascade.hpp b/modules/softcascade/include/opencv2/softcascade.hpp index 7ce31a613f..396149c84c 100644 --- a/modules/softcascade/include/opencv2/softcascade.hpp +++ b/modules/softcascade/include/opencv2/softcascade.hpp @@ -44,6 +44,7 @@ #define __OPENCV_SOFTCASCADE_HPP__ #include "opencv2/core.hpp" +#include "opencv2/core/gpumat.hpp" namespace cv { namespace softcascade { diff --git a/modules/softcascade/src/cuda/channels.cu b/modules/softcascade/src/cuda/channels.cu index 7b153413d9..6928671481 100644 --- a/modules/softcascade/src/cuda/channels.cu +++ b/modules/softcascade/src/cuda/channels.cu @@ -42,6 +42,9 @@ #include "opencv2/core/cuda_devptrs.hpp" +namespace cv { namespace softcascade { namespace internal { +void error(const char *error_string, const char *file, const int line, const char *func); +}}} #if defined(__GNUC__) #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) #else /* defined(__CUDACC__) || defined(__MSVC__) */ @@ -50,7 +53,7 @@ static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") { - // if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func); + if (cudaSuccess != err) cv::softcascade::internal::error(cudaGetErrorString(err), file, line, func); } __host__ __device__ __forceinline__ int divUp(int total, int grain) @@ -490,16 +493,30 @@ namespace cv { namespace softcascade { namespace device B2Y = 1868 }; - template static __device__ __forceinline__ unsigned char RGB2GrayConvert(uint src) + template static __device__ __forceinline__ unsigned char RGB2GrayConvert(unsigned char b, unsigned char g, unsigned char r) { - uint b = 0xffu & (src >> (bidx * 8)); - uint g = 0xffu & (src >> 8); - uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + // uint b = 0xffu & (src >> (bidx * 8)); + // uint g = 0xffu & (src >> 8); + // uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); return CV_DESCALE((uint)(b * B2Y + g * G2Y + r * R2Y), yuv_shift); } - void transform(const cv::gpu::PtrStepSz& bgr, cv::gpu::PtrStepSzb gray) + __global__ void device_transform(const cv::gpu::PtrStepSz bgr, cv::gpu::PtrStepSzb gray) { + const int y = blockIdx.y * blockDim.y + threadIdx.y; + const int x = blockIdx.x * blockDim.x + threadIdx.x; + + const uchar3 colored = (uchar3)(bgr.ptr(y))[x]; + + gray.ptr(y)[x] = RGB2GrayConvert<0>(colored.x, colored.y, colored.z); + } + /////// + void transform(const cv::gpu::PtrStepSz& bgr, cv::gpu::PtrStepSzb gray) + { + const dim3 block(32, 8); + const dim3 grid(divUp(bgr.cols, block.x), divUp(bgr.rows, block.y)); + device_transform<<>>(bgr, gray); + cudaSafeCall(cudaDeviceSynchronize()); } }}} \ No newline at end of file diff --git a/modules/softcascade/src/detector_cuda.cpp b/modules/softcascade/src/detector_cuda.cpp index 4652a2b2ac..bbadc9c54f 100644 --- a/modules/softcascade/src/detector_cuda.cpp +++ b/modules/softcascade/src/detector_cuda.cpp @@ -536,7 +536,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type()); - device::shrink(rois, flds.genRoiTmp); + device::shrink(rois, flds.mask); //cv::gpu::transpose(flds.genRoiTmp, flds.mask, s); if (type == CV_8UC3) @@ -594,15 +594,16 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null()) { - const cv::gpu::GpuMat frame = _frame.getGpuMat(); + bgr = _frame.getGpuMat(); //cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0); - _shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1); + _shrunk.create(bgr.rows * (4 + bins) / shrinkage, bgr.cols / shrinkage, CV_8UC1); cv::gpu::GpuMat shrunk = _shrunk.getGpuMat(); - channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1); + channels.create(bgr.rows * (4 + bins), bgr.cols, CV_8UC1); setZero(channels, s); + gray.create(bgr.size(), CV_8UC1); cv::softcascade::device::transform(bgr, gray); //cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY); cv::softcascade::device::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins); diff --git a/modules/softcascade/src/precomp.hpp b/modules/softcascade/src/precomp.hpp index e72b77d409..2b6be26640 100644 --- a/modules/softcascade/src/precomp.hpp +++ b/modules/softcascade/src/precomp.hpp @@ -56,6 +56,7 @@ namespace cv { namespace softcascade { namespace internal { + namespace rnd { typedef cv::RNG_MT19937 engine; diff --git a/modules/softcascade/src/softcascade_init.cpp b/modules/softcascade/src/softcascade_init.cpp index 902ad48a1d..9563ac629a 100644 --- a/modules/softcascade/src/softcascade_init.cpp +++ b/modules/softcascade/src/softcascade_init.cpp @@ -63,4 +63,22 @@ bool initModule_softcascade(void) return (sc1->info() != 0) && (sc->info() != 0); } +namespace internal { +void error(const char *error_string, const char *file, const int line, const char *func) +{ + int code = CV_GpuApiCallError; + + if (std::uncaught_exception()) + { + const char* errorStr = cvErrorStr(code); + const char* function = func ? func : "unknown function"; + + std::cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line; + std::cerr.flush(); + } + else + cv::error( cv::Exception(code, error_string, func, file, line) ); +} +} + } } \ No newline at end of file diff --git a/modules/softcascade/test/test_precomp.hpp b/modules/softcascade/test/test_precomp.hpp index 03d049b94a..80bff6536d 100644 --- a/modules/softcascade/test/test_precomp.hpp +++ b/modules/softcascade/test/test_precomp.hpp @@ -55,5 +55,6 @@ # include "opencv2/softcascade.hpp" # include "opencv2/imgproc.hpp" # include "opencv2/highgui.hpp" +# include "utility.hpp" #endif diff --git a/modules/softcascade/test/utility.hpp b/modules/softcascade/test/utility.hpp index 2018a156eb..9849b525e2 100644 --- a/modules/softcascade/test/utility.hpp +++ b/modules/softcascade/test/utility.hpp @@ -42,10 +42,9 @@ #ifndef __OPENCV_SOFTCASCADE_TEST_UTILITY_HPP__ #define __OPENCV_SOFTCASCADE_TEST_UTILITY_HPP__ -#include "opencv2/core/core.hpp" +#include "opencv2/core.hpp" #include "opencv2/core/gpumat.hpp" -#include "opencv2/ts/ts.hpp" -#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/ts.hpp" ////////////////////////////////////////////////////////////////////// // Gpu devices diff --git a/samples/cpp/peopledetect.cpp b/samples/cpp/peopledetect.cpp index 893f8cb732..85d77b851e 100644 --- a/samples/cpp/peopledetect.cpp +++ b/samples/cpp/peopledetect.cpp @@ -1,7 +1,7 @@ -#include "opencv2/imgproc/imgproc.hpp" -#include "opencv2/objdetect/objdetect.hpp" -#include "opencv2/highgui/highgui.hpp" -#include +#include "opencv2/imgproc.hpp" +#include "opencv2/objdetect.hpp" +#include "opencv2/highgui.hpp" +#include #include #include diff --git a/samples/gpu/softcascade.cpp b/samples/gpu/softcascade.cpp index e3683583a9..9313a5ab04 100644 --- a/samples/gpu/softcascade.cpp +++ b/samples/gpu/softcascade.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include int main(int argc, char** argv)