diff --git a/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/CMakeLists.txt b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/CMakeLists.txt
new file mode 100644
index 0000000000..037d508569
--- /dev/null
+++ b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/CMakeLists.txt
@@ -0,0 +1,10 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+
+FIND_PACKAGE(CUDA REQUIRED)
+INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
+
+FIND_PACKAGE(OpenCV REQUIRED COMPONENTS core)
+INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
+
+CUDA_ADD_EXECUTABLE(opencv_thrust main.cu)
+TARGET_LINK_LIBRARIES(opencv_thrust ${OpenCV_LIBS})
\ No newline at end of file
diff --git a/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp
new file mode 100644
index 0000000000..b4d0d7c4d2
--- /dev/null
+++ b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/Thrust_interop.hpp
@@ -0,0 +1,74 @@
+#pragma once
+#include <opencv2/core/cuda.hpp>
+
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/device_ptr.h>
+
+template<typename T> struct
+CV_TYPE
+{
+	static const int DEPTH;
+};
+
+template<> static const int CV_TYPE<float>::DEPTH = CV_32F;
+template<> static const int CV_TYPE<double>::DEPTH = CV_64F;
+template<> static const int CV_TYPE<int>::DEPTH = CV_32S;
+template<> static const int CV_TYPE<uchar>::DEPTH = CV_8U;
+template<> static const int CV_TYPE<char>::DEPTH = CV_8S;
+template<> static const int CV_TYPE<ushort>::DEPTH = CV_16U;
+template<> static const int CV_TYPE<short>::DEPTH = CV_16S;
+
+template<typename T> struct step_functor : public thrust::unary_function<int, int>
+{
+	int columns;
+	int step;
+	int channels;
+	__host__ __device__ step_functor(int columns_, int step_, int channels_ = 1) : columns(columns_), step(step_), channels(channels_)	{	};
+	__host__ step_functor(cv::cuda::GpuMat& mat)
+	{
+		CV_Assert(mat.depth() == CV_TYPE<T>::DEPTH);
+		columns = mat.cols;
+		step = mat.step / sizeof(T);
+		channels = mat.channels();		
+	}
+	__host__ __device__
+		int operator()(int x) const
+	{
+		int row = x / columns;
+		int idx = (row * step) + (x % columns)*channels;
+		return idx;
+	}
+};
+
+/*
+	@Brief GpuMatBeginItr returns a thrust compatible iterator to the beginning of a GPU mat's memory.  
+	@Param mat is the input matrix
+	@Param channel is the channel of the matrix that the iterator is accessing.  If set to -1, the iterator will access every element in sequential order
+*/
+template<typename T>
+thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>>  GpuMatBeginItr(cv::cuda::GpuMat mat, int channel = 0)
+{
+	if (channel == -1)
+		mat = mat.reshape(1);
+	CV_Assert(mat.depth() == CV_TYPE<T>::DEPTH);
+	CV_Assert(channel < mat.channels());
+	return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel),
+		thrust::make_transform_iterator(thrust::make_counting_iterator(0), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels())));
+}
+/*
+@Brief GpuMatEndItr returns a thrust compatible iterator to the end of a GPU mat's memory.
+@Param mat is the input matrix
+@Param channel is the channel of the matrix that the iterator is accessing.  If set to -1, the iterator will access every element in sequential order
+*/
+template<typename T>
+thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor<T>, thrust::counting_iterator<int>>>  GpuMatEndItr(cv::cuda::GpuMat mat, int channel = 0)
+{
+	if (channel == -1)
+		mat = mat.reshape(1);
+	CV_Assert(mat.depth() == CV_TYPE<T>::DEPTH);
+	CV_Assert(channel < mat.channels());
+	return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0) + channel),
+		thrust::make_transform_iterator(thrust::make_counting_iterator(mat.rows*mat.cols), step_functor<T>(mat.cols, mat.step / sizeof(T), mat.channels())));
+}
\ No newline at end of file
diff --git a/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu
new file mode 100644
index 0000000000..c6784fef0b
--- /dev/null
+++ b/samples/cpp/tutorial_code/gpu/gpu-thrust-interop/main.cu
@@ -0,0 +1,88 @@
+#include "Thrust_interop.hpp"
+
+#include <thrust/transform.h>
+#include <thrust/random.h>
+#include <thrust/sort.h>
+struct prg
+{
+	float a, b;
+	
+	__host__ __device__
+		prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {};
+
+	__host__ __device__
+		float operator()(const unsigned int n) const
+	{
+		thrust::default_random_engine rng;
+		thrust::uniform_real_distribution<float> dist(a, b);
+		rng.discard(n);
+
+		return dist(rng);
+	}
+};
+
+template<typename T> struct pred_eq
+{
+	T value;
+	int channel;
+	__host__ __device__
+		pred_eq(T value_, int channel_ = 0) :value(value_), channel(channel_){}
+	__host__ __device__
+		bool operator()(const T val) const
+	{
+		return val == value;
+	}
+	template<int N>
+	__host__ __device__ bool operator()(const cv::Vec<T, N>& val)
+	{
+		if (channel < N)
+			return val.val[channel] == value;
+		return false;
+	}
+};
+
+
+int main(void)
+{
+	// Generate a 2 channel row matrix with 100 elements.  Set the first channel to be the element index, and the second to be a randomly
+	// generated value.  Sort by the randomly generated value while maintaining index association.
+	{
+		cv::cuda::GpuMat d_idx(1, 100, CV_32SC2);
+
+		auto keyBegin = GpuMatBeginItr<int>(d_idx, 1);
+		auto keyEnd = GpuMatEndItr<int>(d_idx, 1);
+
+		auto idxBegin = GpuMatBeginItr<int>(d_idx, 0);
+		auto idxEnd = GpuMatEndItr<int>(d_idx, 0);
+
+		thrust::sequence(idxBegin, idxEnd);
+		thrust::transform(idxBegin, idxEnd, keyBegin, prg(0, 10));
+		thrust::sort_by_key(keyBegin, keyEnd, idxBegin);
+
+		cv::Mat h_idx(d_idx);
+	}
+
+	// Randomly fill a row matrix with 100 elements between -1 and 1
+	{
+		cv::cuda::GpuMat d_value(1, 100, CV_32F);
+		auto valueBegin = GpuMatBeginItr<float>(d_value);
+		auto valueEnd = GpuMatEndItr<float>(d_value);
+		thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(d_value.cols), valueBegin, prg(-1, 1));
+
+		cv::Mat h_value(d_value);
+	}
+
+	// OpenCV has count non zero, but what if you want to count a specific value?
+	{
+		cv::cuda::GpuMat d_value(1, 100, CV_32S);
+		d_value.setTo(cv::Scalar(0));
+		d_value.colRange(10, 50).setTo(cv::Scalar(15));
+		auto count = thrust::count(GpuMatBeginItr<int>(d_value), GpuMatEndItr<int>(d_value), 15);
+		std::cout << count << std::endl;
+	}
+
+	
+
+
+	return 0;
+}