mirror of https://github.com/opencv/opencv.git
parent
9633e2c93e
commit
a80e0cf8bd
3 changed files with 172 additions and 0 deletions
@ -0,0 +1,10 @@ |
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8) |
||||
|
||||
FIND_PACKAGE(CUDA REQUIRED) |
||||
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) |
||||
|
||||
FIND_PACKAGE(OpenCV REQUIRED COMPONENTS core) |
||||
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) |
||||
|
||||
CUDA_ADD_EXECUTABLE(opencv_thrust main.cu) |
||||
TARGET_LINK_LIBRARIES(opencv_thrust ${OpenCV_LIBS}) |
@ -0,0 +1,74 @@ |
||||
#pragma once |
||||
#include <opencv2/core/cuda.hpp> |
||||
|
||||
#include <thrust/iterator/permutation_iterator.h> |
||||
#include <thrust/iterator/transform_iterator.h> |
||||
#include <thrust/iterator/counting_iterator.h> |
||||
#include <thrust/device_ptr.h> |
||||
|
||||
template<typename T> struct
|
||||
CV_TYPE |
||||
{ |
||||
static const int DEPTH; |
||||
}; |
||||
|
||||
template<> static const int CV_TYPE<float>::DEPTH = CV_32F; |
||||
template<> static const int CV_TYPE<double>::DEPTH = CV_64F; |
||||
template<> static const int CV_TYPE<int>::DEPTH = CV_32S; |
||||
template<> static const int CV_TYPE<uchar>::DEPTH = CV_8U; |
||||
template<> static const int CV_TYPE<char>::DEPTH = CV_8S; |
||||
template<> static const int CV_TYPE<ushort>::DEPTH = CV_16U; |
||||
template<> static const int CV_TYPE<short>::DEPTH = CV_16S; |
||||
|
||||
template<typename T> struct step_functor : public thrust::unary_function<int, int> |
||||
{ |
||||
int columns; |
||||
int step; |
||||
int channels; |
||||
__host__ __device__ step_functor(int columns_, int step_, int channels_ = 1) : columns(columns_), step(step_), channels(channels_) { }; |
||||
__host__ step_functor(cv::cuda::GpuMat& mat) |
||||
{ |
||||
CV_Assert(mat.depth() == CV_TYPE<T>::DEPTH); |
||||
columns = mat.cols; |
||||
step = mat.step / sizeof(T); |
||||
channels = mat.channels();
|
||||
} |
||||
__host__ __device__ |
||||
int operator()(int x) const |
||||
{ |
||||
int row = x / columns; |
||||
int idx = (row * step) + (x % columns)*channels; |
||||
return idx; |
||||
} |
||||
}; |
||||
|
||||
/*
|
||||
@Brief GpuMatBeginItr returns a thrust compatible iterator to the beginning of a GPU mat's memory.
|
||||
@Param mat is the input matrix |
||||
@Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order |
||||
*/ |
||||
template<typename T> |
||||
thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>> GpuMatBeginItr(cv::cuda::GpuMat mat, int channel = 0) |
||||
{ |
||||
if (channel == -1) |
||||
mat = mat.reshape(1); |
||||
CV_Assert(mat.depth() == CV_TYPE<T>::DEPTH); |
||||
CV_Assert(channel < mat.channels()); |
||||
return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel), |
||||
thrust::make_transform_iterator(thrust::make_counting_iterator(0), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels()))); |
||||
} |
||||
/*
|
||||
@Brief GpuMatEndItr returns a thrust compatible iterator to the end of a GPU mat's memory. |
||||
@Param mat is the input matrix |
||||
@Param channel is the channel of the matrix that the iterator is accessing. If set to -1, the iterator will access every element in sequential order |
||||
*/ |
||||
template<typename T> |
||||
thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>> GpuMatEndItr(cv::cuda::GpuMat mat, int channel = 0) |
||||
{ |
||||
if (channel == -1) |
||||
mat = mat.reshape(1); |
||||
CV_Assert(mat.depth() == CV_TYPE<T>::DEPTH); |
||||
CV_Assert(channel < mat.channels()); |
||||
return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel), |
||||
thrust::make_transform_iterator(thrust::make_counting_iterator(mat.rows*mat.cols), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels()))); |
||||
} |
@ -0,0 +1,88 @@ |
||||
#include "Thrust_interop.hpp" |
||||
|
||||
#include <thrust/transform.h> |
||||
#include <thrust/random.h> |
||||
#include <thrust/sort.h> |
||||
struct prg |
||||
{ |
||||
float a, b; |
||||
|
||||
__host__ __device__ |
||||
prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {}; |
||||
|
||||
__host__ __device__ |
||||
float operator()(const unsigned int n) const |
||||
{ |
||||
thrust::default_random_engine rng; |
||||
thrust::uniform_real_distribution<float> dist(a, b); |
||||
rng.discard(n); |
||||
|
||||
return dist(rng); |
||||
} |
||||
}; |
||||
|
||||
template<typename T> struct pred_eq |
||||
{ |
||||
T value; |
||||
int channel; |
||||
__host__ __device__ |
||||
pred_eq(T value_, int channel_ = 0) :value(value_), channel(channel_){} |
||||
__host__ __device__ |
||||
bool operator()(const T val) const |
||||
{ |
||||
return val == value; |
||||
} |
||||
template<int N> |
||||
__host__ __device__ bool operator()(const cv::Vec<T, N>& val) |
||||
{ |
||||
if (channel < N) |
||||
return val.val[channel] == value; |
||||
return false; |
||||
} |
||||
}; |
||||
|
||||
|
||||
int main(void) |
||||
{ |
||||
// Generate a 2 channel row matrix with 100 elements. Set the first channel to be the element index, and the second to be a randomly |
||||
// generated value. Sort by the randomly generated value while maintaining index association. |
||||
{ |
||||
cv::cuda::GpuMat d_idx(1, 100, CV_32SC2); |
||||
|
||||
auto keyBegin = GpuMatBeginItr<int>(d_idx, 1); |
||||
auto keyEnd = GpuMatEndItr<int>(d_idx, 1); |
||||
|
||||
auto idxBegin = GpuMatBeginItr<int>(d_idx, 0); |
||||
auto idxEnd = GpuMatEndItr<int>(d_idx, 0); |
||||
|
||||
thrust::sequence(idxBegin, idxEnd); |
||||
thrust::transform(idxBegin, idxEnd, keyBegin, prg(0, 10)); |
||||
thrust::sort_by_key(keyBegin, keyEnd, idxBegin); |
||||
|
||||
cv::Mat h_idx(d_idx); |
||||
} |
||||
|
||||
// Randomly fill a row matrix with 100 elements between -1 and 1 |
||||
{ |
||||
cv::cuda::GpuMat d_value(1, 100, CV_32F); |
||||
auto valueBegin = GpuMatBeginItr<float>(d_value); |
||||
auto valueEnd = GpuMatEndItr<float>(d_value); |
||||
thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1)); |
||||
|
||||
cv::Mat h_value(d_value); |
||||
} |
||||
|
||||
// OpenCV has count non zero, but what if you want to count a specific value? |
||||
{ |
||||
cv::cuda::GpuMat d_value(1, 100, CV_32S); |
||||
d_value.setTo(cv::Scalar(0)); |
||||
d_value.colRange(10, 50).setTo(cv::Scalar(15)); |
||||
auto count = thrust::count(GpuMatBeginItr<int>(d_value), GpuMatEndItr<int>(d_value), 15); |
||||
std::cout << count << std::endl; |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
return 0; |
||||
} |
Loading…
Reference in new issue