diff --git a/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/CMakeLists.txt b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/CMakeLists.txt new file mode 100644 index 0000000000..037d508569 --- /dev/null +++ b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/CMakeLists.txt @@ -0,0 +1,10 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.8) + +FIND_PACKAGE(CUDA REQUIRED) +INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) + +FIND_PACKAGE(OpenCV REQUIRED COMPONENTS core) +INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) + +CUDA_ADD_EXECUTABLE(opencv_thrust main.cu) +TARGET_LINK_LIBRARIES(opencv_thrust ${OpenCV_LIBS}) \ No newline at end of file diff --git a/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp new file mode 100644 index 0000000000..b4d0d7c4d2 --- /dev/null +++ b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp @@ -0,0 +1,74 @@ +#pragma once +#include + +#include +#include +#include +#include + +template struct +CV_TYPE +{ + static const int DEPTH; +}; + +template<> static const int CV_TYPE::DEPTH = CV_32F; +template<> static const int CV_TYPE::DEPTH = CV_64F; +template<> static const int CV_TYPE::DEPTH = CV_32S; +template<> static const int CV_TYPE::DEPTH = CV_8U; +template<> static const int CV_TYPE::DEPTH = CV_8S; +template<> static const int CV_TYPE::DEPTH = CV_16U; +template<> static const int CV_TYPE::DEPTH = CV_16S; + +template struct step_functor : public thrust::unary_function +{ + int columns; + int step; + int channels; + __host__ __device__ step_functor(int columns_, int step_, int channels_ = 1) : columns(columns_), step(step_), channels(channels_) { }; + __host__ step_functor(cv::cuda::GpuMat& mat) + { + CV_Assert(mat.depth() == CV_TYPE::DEPTH); + columns = mat.cols; + step = mat.step / sizeof(T); + channels = mat.channels(); + } + __host__ __device__ + int operator()(int x) const + { + int row = x / columns; + int idx = (row * step) + (x % columns)*channels; + return idx; + } +}; + +/* + @Brief GpuMatBeginItr returns a thrust compatible iterator to the beginning of a GPU mat's memory. + @Param mat is the input matrix + @Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order +*/ +template +thrust::permutation_iterator, thrust::transform_iterator, thrust::counting_iterator>> GpuMatBeginItr(cv::cuda::GpuMat mat, int channel = 0) +{ + if (channel == -1) + mat = mat.reshape(1); + CV_Assert(mat.depth() == CV_TYPE::DEPTH); + CV_Assert(channel < mat.channels()); + return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr(0) + channel), + thrust::make_transform_iterator(thrust::make_counting_iterator(0), step_functor(mat.cols, mat.step / sizeof(T), mat.channels()))); +} +/* +@Brief GpuMatEndItr returns a thrust compatible iterator to the end of a GPU mat's memory. +@Param mat is the input matrix +@Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order +*/ +template +thrust::permutation_iterator, thrust::transform_iterator, thrust::counting_iterator>> GpuMatEndItr(cv::cuda::GpuMat mat, int channel = 0) +{ + if (channel == -1) + mat = mat.reshape(1); + CV_Assert(mat.depth() == CV_TYPE::DEPTH); + CV_Assert(channel < mat.channels()); + return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr(0) + channel), + thrust::make_transform_iterator(thrust::make_counting_iterator(mat.rows*mat.cols), step_functor(mat.cols, mat.step / sizeof(T), mat.channels()))); +} \ No newline at end of file diff --git a/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu new file mode 100644 index 0000000000..c6784fef0b --- /dev/null +++ b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu @@ -0,0 +1,88 @@ +#include "Thrust_interop.hpp" + +#include +#include +#include +struct prg +{ + float a, b; + + __host__ __device__ + prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {}; + + __host__ __device__ + float operator()(const unsigned int n) const + { + thrust::default_random_engine rng; + thrust::uniform_real_distribution dist(a, b); + rng.discard(n); + + return dist(rng); + } +}; + +template struct pred_eq +{ + T value; + int channel; + __host__ __device__ + pred_eq(T value_, int channel_ = 0) :value(value_), channel(channel_){} + __host__ __device__ + bool operator()(const T val) const + { + return val == value; + } + template + __host__ __device__ bool operator()(const cv::Vec& val) + { + if (channel < N) + return val.val[channel] == value; + return false; + } +}; + + +int main(void) +{ + // Generate a 2 channel row matrix with 100 elements. Set the first channel to be the element index, and the second to be a randomly + // generated value. Sort by the randomly generated value while maintaining index association. + { + cv::cuda::GpuMat d_idx(1, 100, CV_32SC2); + + auto keyBegin = GpuMatBeginItr(d_idx, 1); + auto keyEnd = GpuMatEndItr(d_idx, 1); + + auto idxBegin = GpuMatBeginItr(d_idx, 0); + auto idxEnd = GpuMatEndItr(d_idx, 0); + + thrust::sequence(idxBegin, idxEnd); + thrust::transform(idxBegin, idxEnd, keyBegin, prg(0, 10)); + thrust::sort_by_key(keyBegin, keyEnd, idxBegin); + + cv::Mat h_idx(d_idx); + } + + // Randomly fill a row matrix with 100 elements between -1 and 1 + { + cv::cuda::GpuMat d_value(1, 100, CV_32F); + auto valueBegin = GpuMatBeginItr(d_value); + auto valueEnd = GpuMatEndItr(d_value); + thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1)); + + cv::Mat h_value(d_value); + } + + // OpenCV has count non zero, but what if you want to count a specific value? + { + cv::cuda::GpuMat d_value(1, 100, CV_32S); + d_value.setTo(cv::Scalar(0)); + d_value.colRange(10, 50).setTo(cv::Scalar(15)); + auto count = thrust::count(GpuMatBeginItr(d_value), GpuMatEndItr(d_value), 15); + std::cout << count << std::endl; + } + + + + + return 0; +}