implemented asynchronous call for GpuMat::upload() and GpuMat::download(). added test for asynchronous call.

15 years ago · fff2160d1f
parent bb2fe87b32
commit fff2160d1f
4 changed files with 153 additions and 4 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -107,10 +107,12 @@ namespace cv
            //! pefroms blocking upload data to GpuMat. .
            void upload(const cv::Mat& m);
            void upload(const cv::Mat& m, CudaStream & stream);
            //! Downloads data from device to host memory. Blocking calls.
            operator Mat() const;
            void download(cv::Mat& m) const;
            void download(cv::Mat& m, CudaStream & stream) const;
            //! returns a new GpuMatrix header for the specified row
            GpuMat row(int y) const;
--- a/modules/gpu/src/cudastream.cpp
+++ b/modules/gpu/src/cudastream.cpp
@ -41,6 +41,7 @@
 //M*/
 #include "precomp.hpp"
 #include "cuda_shared.hpp"
 using namespace cv;
 using namespace cv::gpu;
@ -158,17 +159,37 @@ void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(
 void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)
 {
-    CV_Assert(!"Not implemented");
+    cv::gpu::impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream);
 }
 void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)
 {
-    CV_Assert(!"Not implemented");
+    cv::gpu::impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream);
 }
-void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b)
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
 {
-    CV_Assert(!"Not implemented");
+    bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
    if( rtype < 0 )
        rtype = src.type();
    else
        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
    int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
    if( sdepth == ddepth && noScale )
    {
        src.copyTo(dst);
        return;
    }
    GpuMat temp;
    const GpuMat* psrc = &src;
    if( sdepth != ddepth && psrc == &dst )
        psrc = &(temp = src);
    dst.create( src.size(), rtype );
    cv::gpu::impl::convert_to(*psrc, sdepth, dst, ddepth, psrc->cols * psrc->channels(), psrc->rows, alpha, beta, impl->stream);
 }
--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@ -84,6 +84,12 @@ void cv::gpu::GpuMat::upload(const Mat& m)
    cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
 }
 void cv::gpu::GpuMat::upload(const cv::Mat& m, CudaStream & stream)
 {
    CV_DbgAssert(!m.empty());
    stream.enqueueUpload(m, *this);
 }
 void cv::gpu::GpuMat::download(cv::Mat& m) const
 {
    CV_DbgAssert(!this->empty());
@ -91,6 +97,12 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const
    cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
 }
 void cv::gpu::GpuMat::download(cv::Mat& m, CudaStream & stream) const
 {
    CV_DbgAssert(!m.empty());
    stream.enqueueDownload(*this, m);
 }
 void cv::gpu::GpuMat::copyTo( GpuMat& m ) const
 {
    CV_DbgAssert(!this->empty());
--- a/tests/gpu/src/operator_async_call.cpp
+++ b/tests/gpu/src/operator_async_call.cpp
@ -0,0 +1,114 @@
 #include "gputest.hpp"
 #include "highgui.h"
 #include "cv.h"
 #include <string>
 #include <iostream>
 #include <fstream>
 #include <iterator>
 #include <limits>
 #include <numeric>
 #include <iomanip> // for  cout << setw()
 using namespace cv;
 using namespace std;
 using namespace gpu;
 class CV_GpuMatASyncCall : public CvTest
 {
    public:
        CV_GpuMatASyncCall();
        ~CV_GpuMatASyncCall();
    protected:
        template <typename T>
        void print_mat(const T & mat, const std::string & name) const;
        void run(int);
        bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
    private:
        int rows;
        int cols;
 };
 CV_GpuMatASyncCall::CV_GpuMatASyncCall(): CvTest( "GPU-MatOperatorASyncCall", "async" )
 {
    rows = 234;
    cols = 123;
    //#define PRINT_MATRIX
 }
 CV_GpuMatASyncCall::~CV_GpuMatASyncCall() {}
 template<typename T>
 void CV_GpuMatASyncCall::print_mat(const T & mat, const std::string & name) const
 {
    cv::imshow(name, mat);
 }
 bool CV_GpuMatASyncCall::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
 {
    Mat cmat(cpumat.size(), cpumat.type(), Scalar::all(0));
    GpuMat gmat0(cmat);
    GpuMat gmat1;
    GpuMat gmat2;
    GpuMat gmat3;
    //int64 time = getTickCount();
    CudaStream stream;
    stream.enqueueCopy(gmat0, gmat1);
    stream.enqueueCopy(gmat0, gmat2);
    stream.enqueueCopy(gmat0, gmat3);
    stream.waitForCompletion();
    //int64 time1 = getTickCount();
    gmat1.copyTo(gmat0);
    gmat2.copyTo(gmat0);
    gmat3.copyTo(gmat0);
    //int64 time2 = getTickCount();
    //std::cout << "\ntime async: " << std::fixed << std::setprecision(12) << double((time1 - time)  / (double)getTickFrequency());
    //std::cout << "\ntime  sync: " << std::fixed << std::setprecision(12) << double((time2 - time1) / (double)getTickFrequency());
    //std::cout << "\n";
 #ifdef PRINT_MATRIX
    print_mat(cmat, "cpu mat");
    print_mat(gmat0, "gpu mat 0");
    print_mat(gmat1, "gpu mat 1");
    print_mat(gmat2, "gpu mat 2");
    print_mat(gmat3, "gpu mat 3");
    cv::waitKey(0);
 #endif
    double ret = norm(cmat, gmat0) + norm(cmat, gmat1) + norm(cmat, gmat2) + norm(cmat, gmat3);
    if (ret < 1.0)
        return true;
    else
    {
        std::cout << "return : " << ret << "\n";
        return false;
    }
 }
 void CV_GpuMatASyncCall::run( int /* start_from */)
 {
    bool is_test_good = true;
    Mat cpumat(rows, cols, CV_8U);
    cpumat.setTo(Scalar::all(127));
    GpuMat gpumat(cpumat);
    is_test_good &= compare_matrix(cpumat, gpumat);
    if (is_test_good == true)
        ts->set_failed_test_info(CvTS::OK);
    else
        ts->set_failed_test_info(CvTS::FAIL_GENERIC);
 }
 CV_GpuMatASyncCall CV_GpuMatASyncCall_test;