parallel_do replaced with parallel_for_ in driver_gpu_multi sample.

10 years ago · 72063bf136
parent 091c7a3821
commit 72063bf136
1 changed files with 70 additions and 82 deletions
--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@ -7,56 +7,101 @@
 #endif

 #include <iostream>
-#include "cvconfig.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/gpu/gpu.hpp"

-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
-
+#if defined(__arm__)
 int main()
 {
-#if !defined(HAVE_CUDA)
-    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
-#endif
-
-#if !defined(HAVE_TBB)
-    std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
-#endif
-
-#if defined(__arm__)
    std::cout << "Unsupported for ARM CUDA library." << std::endl;
-#endif
-
    return 0;
 }
-
 #else

 #include <cuda.h>
 #include <cuda_runtime.h>
-#include "opencv2/core/internal.hpp" // For TBB wrappers

 using namespace std;
 using namespace cv;
 using namespace cv::gpu;

-struct Worker { void operator()(int device_id) const; };
-void destroyContexts();
-
 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
 inline void safeCall_(int code, const char* expr, const char* file, int line)
 {
    if (code != CUDA_SUCCESS)
    {
        std::cout << "CUDA driver API error: code " << code << ", expr " << expr
-            << ", file " << file << ", line " << line << endl;
-        destroyContexts();
+        << ", file " << file << ", line " << line << endl;
        exit(-1);
    }
 }

-// Each GPU is associated with its own context
-CUcontext contexts[2];
+struct Worker: public ParallelLoopBody
+{
+    Worker(int num_devices)
+    {
+        count = num_devices;
+        contexts = new contexts CUcontext[num_devices];
+        for (int device_id = 0; i < num_devices; device_id++)
+        {
+            CUdevice device;
+            safeCall(cuDeviceGet(&device, device_id));
+            safeCall(cuCtxCreate(&contexts[device_id], 0, device));
+        }
+    }
+
+    virtual void operator() (const Range& range) const
+    {
+        for (int device_id = range.start; device_id != range.end; ++device_id)
+        {
+            // Set the proper context
+            safeCall(cuCtxPushCurrent(contexts[device_id]));
+
+            Mat src(1000, 1000, CV_32F);
+            Mat dst;
+
+            RNG rng(0);
+            rng.fill(src, RNG::UNIFORM, 0, 1);
+
+            // CPU works
+            transpose(src, dst);
+
+            // GPU works
+            GpuMat d_src(src);
+            GpuMat d_dst;
+            transpose(d_src, d_dst);
+
+            // Check results
+            bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
+            std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
+            << (passed ? "passed" : "FAILED") << endl;
+
+            // Deallocate data here, otherwise deallocation will be performed
+            // after context is extracted from the stack
+            d_src.release();
+            d_dst.release();
+
+            CUcontext prev_context;
+            safeCall(cuCtxPopCurrent(&prev_context));
+        }
+    }
+
+    ~Worker()
+    {
+        if ((contexts != NULL) && count != 0)
+        {
+            for (int device_id = 0; i < num_devices; device_id++)
+            {
+                safeCall(cuCtxDestroy(contexts[device_id]));
+            }
+
+            delete[] contexts;
+        }
+    }
+
+    CUcontext* contexts;
+    int count;
+};

 int main()
 {
@ -84,67 +129,10 @@ int main()
    // Init CUDA Driver API
    safeCall(cuInit(0));

-    // Create context for GPU #0
-    CUdevice device;
-    safeCall(cuDeviceGet(&device, 0));
-    safeCall(cuCtxCreate(&contexts[0], 0, device));
-
-    CUcontext prev_context;
-    safeCall(cuCtxPopCurrent(&prev_context));
-
-    // Create context for GPU #1
-    safeCall(cuDeviceGet(&device, 1));
-    safeCall(cuCtxCreate(&contexts[1], 0, device));
-
-    safeCall(cuCtxPopCurrent(&prev_context));
+    // Execute calculation
+    parallel_for_(cv::Range(0, num_devices, Worker(num_devices));

-    // Execute calculation in two threads using two GPUs
-    int devices[] = {0, 1};
-    parallel_do(devices, devices + 2, Worker());
-
-    destroyContexts();
    return 0;
 }

-
-void Worker::operator()(int device_id) const
-{
-    // Set the proper context
-    safeCall(cuCtxPushCurrent(contexts[device_id]));
-
-    Mat src(1000, 1000, CV_32F);
-    Mat dst;
-
-    RNG rng(0);
-    rng.fill(src, RNG::UNIFORM, 0, 1);
-
-    // CPU works
-    transpose(src, dst);
-
-    // GPU works
-    GpuMat d_src(src);
-    GpuMat d_dst;
-    transpose(d_src, d_dst);
-
-    // Check results
-    bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
-    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
-        << (passed ? "passed" : "FAILED") << endl;
-
-    // Deallocate data here, otherwise deallocation will be performed
-    // after context is extracted from the stack
-    d_src.release();
-    d_dst.release();
-
-    CUcontext prev_context;
-    safeCall(cuCtxPopCurrent(&prev_context));
-}
-
-
-void destroyContexts()
-{
-    safeCall(cuCtxDestroy(contexts[0]));
-    safeCall(cuCtxDestroy(contexts[1]));
-}
-
 #endif