diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index 9b97a1b92a..4e45c4432f 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -334,6 +334,7 @@ if(HAVE_GAPI_ONEVPL) message(FATAL_ERROR "PkgConfig not found: building HAVE_GAPI_ONEVPL without libVA support is impossible on UNIX systems") endif() ocv_target_link_libraries(${the_module} PRIVATE ${PKG_LIBVA_LIBRARIES} ${PKG_THREAD_LIBRARIES}) + ocv_target_link_libraries(opencv_test_gapi PRIVATE ${PKG_LIBVA_LIBRARIES} ${PKG_THREAD_LIBRARIES}) endif() endif() @@ -373,6 +374,9 @@ if(TARGET example_gapi_onevpl_infer_single_roi) if(HAVE_D3D11 AND HAVE_OPENCL) ocv_target_include_directories(example_gapi_onevpl_infer_single_roi SYSTEM PRIVATE ${OPENCL_INCLUDE_DIRS}) endif() + if(PKG_LIBVA_FOUND) + ocv_target_link_libraries(example_gapi_onevpl_infer_single_roi PRIVATE ${PKG_LIBVA_LIBRARIES} ${PKG_THREAD_LIBRARIES}) + endif() endif() if(TARGET example_gapi_pipeline_modeling_tool) diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp index c53b1b31db..119188d96a 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp @@ -65,6 +65,10 @@ GAPI_EXPORTS Device create_dx11_device(Device::Ptr device_ptr, const std::string& device_name); GAPI_EXPORTS Context create_dx11_context(Context::Ptr ctx_ptr); +GAPI_EXPORTS Device create_vaapi_device(Device::Ptr device_ptr, + const std::string& device_name, + int file_description = -1); +GAPI_EXPORTS Context create_vaapi_context(Context::Ptr ctx_ptr); } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp index a89cd5071b..ed8c74ea8c 100644 --- a/modules/gapi/samples/onevpl_infer_single_roi.cpp +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -34,6 +34,17 @@ #endif // HAVE_DIRECTX #endif // HAVE_INF_ENGINE +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#include "va/va.h" +#include "va/va_drm.h" + +#include +#include +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#endif // __linux__ + + const std::string about = "This is an OpenCV-based version of oneVPLSource decoder example"; const std::string keys = @@ -41,14 +52,21 @@ const std::string keys = "{ input | | Path to the input demultiplexed video file }" "{ output | | Path to the output RAW video file. Use .avi extension }" "{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }" - "{ faced | AUTO | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }" - "{ cfg_params | :;: | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" + "{ faced | GPU | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }" + "{ cfg_params | | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automatically if 0 }" "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" - "{ roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }"; + "{ roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }" + "{ source_device | CPU | choose device for decoding }" + "{ preproc_device | CPU | choose device for preprocessing }"; + namespace { +bool is_gpu(const std::string &device_name) { + return device_name.find("GPU") != std::string::npos; +} + std::string get_weights_path(const std::string &model_path) { const auto EXT_LEN = 4u; const auto sz = model_path.size(); @@ -260,6 +278,75 @@ GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) { namespace cfg { typename cv::gapi::wip::onevpl::CfgParam create_from_string(const std::string &line); + +struct flow { + flow(bool preproc, bool rctx) : + vpl_preproc_enable(preproc), + ie_remote_ctx_enable(rctx) { + } + bool vpl_preproc_enable = false; + bool ie_remote_ctx_enable = false; +}; + +using support_matrix = + std::map >>>; +support_matrix resolved_conf{{ + {"GPU", {{ + {"", {{ "CPU", std::make_shared(false, false)}, + { "GPU", {/* unsupported: + * ie GPU preproc isn't available */}} + }}, + + {"CPU", {{ "CPU", {/* unsupported: preproc mix */}}, + { "GPU", {/* unsupported: preproc mix */}} + }}, + + {"GPU", {{ "CPU", std::make_shared(true, false)}, + { "GPU", std::make_shared(true, true)}}} + }} + }, + {"CPU", {{ + {"", {{ "CPU", std::make_shared(false, false)}, + { "GPU", std::make_shared(false, false)} + }}, + + {"CPU", {{ "CPU", std::make_shared(true, false)}, + { "GPU", std::make_shared(true, false)} + }}, + + {"GPU", {{ "CPU", {/* unsupported: preproc mix */}}, + { "GPU", {/* unsupported: preproc mix */}}}} + }} + } + }}; + +static void print_available_cfg(std::ostream &out, + const std::string &source_device, + const std::string &preproc_device, + const std::string &ie_device_id) { + const std::string source_device_cfg_name("--source_device="); + const std::string preproc_device_cfg_name("--preproc_device="); + const std::string ie_cfg_name("--faced="); + out << "unsupported acceleration param combinations:\n" + << source_device_cfg_name << source_device << " " + << preproc_device_cfg_name << preproc_device << " " + << ie_cfg_name << ie_device_id << + "\n\nSupported matrix:\n\n" << std::endl; + for (const auto &s_d : cfg::resolved_conf) { + std::string prefix = source_device_cfg_name + s_d.first; + for (const auto &p_d : s_d.second) { + std::string mid_prefix = prefix + +"\t" + preproc_device_cfg_name + + (p_d.first.empty() ? "" : p_d.first); + for (const auto &i_d : p_d.second) { + if (i_d.second) { + std::cerr << mid_prefix << "\t" << ie_cfg_name <("frames_pool_size"); const auto source_vpp_queue_capacity = cmd.get("vpp_frames_pool_size"); const auto device_id = cmd.get("faced"); + const auto source_device = cmd.get("source_device"); + const auto preproc_device = cmd.get("preproc_device"); + + // validate support matrix + std::shared_ptr flow_settings = cfg::resolved_conf[source_device][preproc_device][device_id]; + if (!flow_settings) { + cfg::print_available_cfg(std::cerr, source_device, preproc_device, device_id); + return -1; + } // check output file extension if (!output.empty()) { @@ -303,6 +399,7 @@ int main(int argc, char *argv[]) { return -1; } + // apply VPL source optimization params if (source_decode_queue_capacity != 0) { source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_frames_pool_size(source_decode_queue_capacity)); } @@ -316,22 +413,57 @@ int main(int argc, char *argv[]) { device_id }; - // Create device_ptr & context_ptr using graphic API - // InferenceEngine requires such device & context to create its own - // remote shared context through InferenceEngine::ParamMap in - // GAPI InferenceEngine backend to provide interoperability with onevpl::GSource - // So GAPI InferenceEngine backend and onevpl::GSource MUST share the same - // device and context - cv::util::optional accel_device; - cv::util::optional accel_ctx; - -#ifdef HAVE_INF_ENGINE + // It is allowed (and highly recommended) to reuse predefined device_ptr & context_ptr objects + // received from user application. Current sample demonstrate how to deal with this situation. + // + // But if you do not need this fine-grained acceleration devices configuration then + // just use default constructors for onevpl::GSource, IE and preprocessing module. + // But please pay attention that default pipeline construction in this case will be + // very inefficient and carries out multiple CPU-GPU memory copies + // + // If you want to reach max performance and seize copy-free approach for specific + // device & context selection then follow the steps below. + // The situation is complicated a little bit in comparison with default configuration, thus + // let's focusing this: + // + // - all component-participants (Source, Preprocessing, Inference) + // must share the same device & context instances + // + // - you must wrapping your available device & context instancs into thin + // `cv::gapi::wip::Device` & `cv::gapi::wip::Context`. + // !!! Please pay attention that both objects are weak wrapper so you must ensure + // that device & context would be alived before full pipeline created !!! + // + // - you should pass such wrappers as constructor arguments for each component in pipeline: + // a) use extended constructor for `onevpl::GSource` for activating predefined device & context + // b) use `cfgContextParams` method of `cv::gapi::ie::Params` to enable `PreprocesingEngine` + // for predefined device & context + // c) use `InferenceEngine::ParamMap` to activate remote ctx in Inference Engine for given + // device & context + // + // + //// P.S. the current sample supports heterogenous pipeline construction also. + //// It is possible to make up mixed device approach. + //// Please feel free to explore different configurations! + + cv::util::optional gpu_accel_device; + cv::util::optional gpu_accel_ctx; + cv::gapi::wip::onevpl::Device cpu_accel_device = cv::gapi::wip::onevpl::create_host_device(); + cv::gapi::wip::onevpl::Context cpu_accel_ctx = cv::gapi::wip::onevpl::create_host_context(); + // create GPU device if requested + if (is_gpu(device_id) + || is_gpu(source_device) + || is_gpu(preproc_device)) { #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 - auto dx11_dev = createCOMPtrGuard(); - auto dx11_ctx = createCOMPtrGuard(); + // create DX11 device & context owning handles. + // wip::Device & wip::Context provide non-owning semantic of resources and act + // as weak references API wrappers in order to carry type-erased resources type + // into appropriate modules: onevpl::GSource, PreprocEngine and InferenceEngine + // Until modules are not created owner handles must stay alive + auto dx11_dev = createCOMPtrGuard(); + auto dx11_ctx = createCOMPtrGuard(); - if (device_id.find("GPU") != std::string::npos) { auto adapter_factory = createCOMPtrGuard(); { IDXGIFactory* out_factory = nullptr; @@ -365,40 +497,102 @@ int main(int argc, char *argv[]) { } std::tie(dx11_dev, dx11_ctx) = create_device_with_ctx(intel_adapter.get()); - accel_device = cv::util::make_optional( + gpu_accel_device = cv::util::make_optional( cv::gapi::wip::onevpl::create_dx11_device( reinterpret_cast(dx11_dev.get()), - device_id)); - accel_ctx = cv::util::make_optional( + "GPU")); + gpu_accel_ctx = cv::util::make_optional( cv::gapi::wip::onevpl::create_dx11_context( reinterpret_cast(dx11_ctx.get()))); +#endif // HAVE_D3D11 +#endif // HAVE_DIRECTX +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) + static const char *predefined_vaapi_devices_list[] {"/dev/dri/renderD128", + "/dev/dri/renderD129", + "/dev/dri/card0", + "/dev/dri/card1", + nullptr}; + std::stringstream ss; + int device_fd = -1; + VADisplay va_handle = nullptr; + for (const char **device_path = predefined_vaapi_devices_list; + *device_path != nullptr; device_path++) { + device_fd = open(*device_path, O_RDWR); + if (device_fd < 0) { + std::string info("Cannot open GPU file: \""); + info = info + *device_path + "\", error: " + strerror(errno); + ss << info << std::endl; + continue; + } + va_handle = vaGetDisplayDRM(device_fd); + if (!va_handle) { + close(device_fd); + std::string info("VAAPI device vaGetDisplayDRM failed, error: "); + info += strerror(errno); + ss << info << std::endl; + continue; + } + int major_version = 0, minor_version = 0; + VAStatus status {}; + status = vaInitialize(va_handle, &major_version, &minor_version); + if (VA_STATUS_SUCCESS != status) { + close(device_fd); + va_handle = nullptr; + + std::string info("Cannot initialize VAAPI device, error: "); + info += vaErrorStr(status); + ss << info << std::endl; + continue; + } + std::cout << "VAAPI created for device: " << *device_path << ", version: " + << major_version << "." << minor_version << std::endl; + break; + } - // put accel type description for VPL source - source_cfgs.push_back(cfg::create_from_string( - "mfxImplDescription.AccelerationMode" - ":" - "MFX_ACCEL_MODE_VIA_D3D11")); + // check device creation + if (!va_handle) { + std::cerr << "Cannot create VAAPI device. Log:\n" << ss.str() << std::endl; + return -1; + } + gpu_accel_device = cv::util::make_optional( + cv::gapi::wip::onevpl::create_vaapi_device(reinterpret_cast(va_handle), + "GPU", device_fd)); + gpu_accel_ctx = cv::util::make_optional( + cv::gapi::wip::onevpl::create_vaapi_context(nullptr)); +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#endif // #ifdef __linux__ } -#endif // HAVE_D3D11 -#endif // HAVE_DIRECTX - // set ctx_config for GPU device only - no need in case of CPU device type - if (accel_device.has_value() && - accel_device.value().get_name().find("GPU") != std::string::npos) { +#ifdef HAVE_INF_ENGINE + // activate remote ctx in Inference Engine for GPU device + // when other pipeline component use the GPU device too + if (flow_settings->ie_remote_ctx_enable) { InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"}, - {"VA_DEVICE", accel_device.value().get_ptr()} }); + {"VA_DEVICE", gpu_accel_device.value().get_ptr()} }); face_net.cfgContextParams(ctx_config); + std::cout << "enforce InferenceEngine remote context on device: " << device_id << std::endl; // NB: consider NV12 surface because it's one of native GPU image format face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); + std::cout << "enforce InferenceEngine NV12 blob" << std::endl; } #endif // HAVE_INF_ENGINE - // turn on preproc - if (accel_device.has_value() && accel_ctx.has_value()) { - face_net.cfgPreprocessingParams(accel_device.value(), - accel_ctx.value()); - std::cout << "enforce VPP preprocessing on " << device_id << std::endl; + // turn on VPP PreprocesingEngine if available & requested + if (flow_settings->vpl_preproc_enable) { + if (is_gpu(preproc_device)) { + // activate VPP PreprocesingEngine on GPU + face_net.cfgPreprocessingParams(gpu_accel_device.value(), + gpu_accel_ctx.value()); + } else { + // activate VPP PreprocesingEngine on CPU + face_net.cfgPreprocessingParams(cpu_accel_device, + cpu_accel_ctx); + } + std::cout << "enforce VPP preprocessing on device: " << preproc_device << std::endl; + } else { + std::cout << "use InferenceEngine default preprocessing" << std::endl; } auto kernels = cv::gapi::kernels @@ -414,10 +608,17 @@ int main(int argc, char *argv[]) { // Create source cv::gapi::wip::IStreamSource::Ptr cap; try { - if (accel_device.has_value() && accel_ctx.has_value()) { + if (is_gpu(source_device)) { + std::cout << "enforce VPL Source deconding on device: " << source_device << std::endl; + // use special 'Device' constructor for `onevpl::GSource` + // put accel type description for VPL source + source_cfgs.push_back(cfg::create_from_string( + "mfxImplDescription.AccelerationMode" + ":" + "MFX_ACCEL_MODE_VIA_D3D11")); cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs, - accel_device.value(), - accel_ctx.value()); + gpu_accel_device.value(), + gpu_accel_ctx.value()); } else { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs); } diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 6647e484b1..eca07ce9df 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -300,6 +300,7 @@ struct IEUnit { cv::util::any_cast(¶ms.context_config); if (ctx_params != nullptr) { auto ie_core = cv::gimpl::ie::wrap::getCore(); + GAPI_LOG_DEBUG(nullptr, "create IE remote ctx for device id: " << params.device_id); rctx = ie_core.CreateContext(params.device_id, *ctx_params); } @@ -703,45 +704,6 @@ cv::MediaFrame preprocess_frame_impl(cv::MediaFrame &&in_frame, const std::strin return std::move(in_frame); } -inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i, - const std::string &layer_name, - const cv::util::optional &opt_roi, - cv::MediaFrame* out_keep_alive_frame, - bool* out_is_preprocessed) { - GAPI_Assert(ctx.inShape(i) == cv::GShape::GFRAME && - "Remote blob is supported for MediaFrame only"); - cv::MediaFrame frame = ctx.inFrame(i); - if (ctx.uu.preproc_engine_impl) { - GAPI_LOG_DEBUG(nullptr, "Try to use preprocessing for decoded remote frame in remote ctx"); - frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, - out_keep_alive_frame, out_is_preprocessed); - } - - // Request params for result frame whatever it got preprocessed or not - cv::util::any any_blob_params = frame.blobParams(); - - using ParamType = std::pair; - using NV12ParamType = std::pair; - - NV12ParamType* blob_params = cv::util::any_cast(&any_blob_params); - if (blob_params == nullptr) { - GAPI_Assert(false && "Incorrect type of blobParams:" - "expected std::pair," - "with ParamType std::pair>"); - } - - //The parameters are TensorDesc and ParamMap for both y and uv blobs - auto y_blob = ctx.uu.rctx->CreateBlob(blob_params->first.first, blob_params->first.second); - auto uv_blob = ctx.uu.rctx->CreateBlob(blob_params->second.first, blob_params->second.second); - -#if INF_ENGINE_RELEASE >= 2021010000 - return IE::make_shared_blob(y_blob, uv_blob); -#else - return IE::make_shared_blob(y_blob, uv_blob); -#endif -} - inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, cv::gapi::ie::TraitAs hint, @@ -749,11 +711,6 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, const cv::util::optional &opt_roi, cv::MediaFrame* out_keep_alive_frame = nullptr, bool* out_is_preprocessed = nullptr) { - if (ctx.uu.rctx != nullptr) { - return extractRemoteBlob(ctx, i, layer_name, opt_roi, - out_keep_alive_frame, out_is_preprocessed); - } - switch (ctx.inShape(i)) { case cv::GShape::GFRAME: { auto frame = ctx.inFrame(i); @@ -762,6 +719,37 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, out_keep_alive_frame, out_is_preprocessed); } + + // NB: check OV remote device context availability. + // if it exist and MediaFrame shares the same device context + // then we create a remote blob without memory copy + if (ctx.uu.rctx != nullptr) { + // Request params for result frame whatever it got preprocessed or not + cv::util::any any_blob_params = frame.blobParams(); + using ParamType = std::pair; + using NV12ParamType = std::pair; + + NV12ParamType* blob_params = cv::util::any_cast(&any_blob_params); + if (blob_params == nullptr) { + GAPI_Assert(false && "Incorrect type of blobParams:" + "expected std::pair," + "with ParamType std::pair>"); + } + + //The parameters are TensorDesc and ParamMap for both y and uv blobs + auto y_blob = ctx.uu.rctx->CreateBlob(blob_params->first.first, blob_params->first.second); + auto uv_blob = ctx.uu.rctx->CreateBlob(blob_params->second.first, blob_params->second.second); + +#if INF_ENGINE_RELEASE >= 2021010000 + return IE::make_shared_blob(y_blob, uv_blob); +#else + return IE::make_shared_blob(y_blob, uv_blob); +#endif + } + + // NB: If no OV remote context created then use default MediaFrame accessor approach: + // it invokes memory copying operation If GPU MediaFrame come ctx.views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R))); return wrapIE(*(ctx.views.back()), frame.desc()); } @@ -1158,6 +1146,7 @@ static void PostOutputs(InferenceEngine::InferRequest &request, ctx->out.post(std::move(output), ctx->eptr); } + ctx->views.clear(); ctx->releaseKeepAliveFrame(&request); } diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp index 67ffdf9377..d81c66b901 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp @@ -178,8 +178,8 @@ VPLCPUAccelerationPolicy::create_surface_pool(size_t pool_size, size_t surface_s GAPI_LOG_INFO(nullptr, "Released workspace memory: " << ptr); ptr = nullptr; #else + GAPI_LOG_INFO(nullptr, "Workspace memory to release: " << ptr); free(ptr); - GAPI_LOG_INFO(nullptr, "Released workspace memory: " << ptr); ptr = nullptr; #endif diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.cpp index 8fa0be9914..82bada7b70 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.cpp @@ -27,30 +27,16 @@ namespace onevpl { VPLVAAPIAccelerationPolicy::VPLVAAPIAccelerationPolicy(device_selector_ptr_t selector) : VPLAccelerationPolicy(selector), cpu_dispatcher(new VPLCPUAccelerationPolicy(selector)), - va_handle(), - device_fd(-1) { + va_handle() { #if defined(HAVE_VA) || defined(HAVE_VA_INTEL) - // TODO Move it out in device selector - device_fd = open("/dev/dri/renderD128", O_RDWR); - if (device_fd < 0) { - GAPI_LOG_WARNING(nullptr, "VAAPI device descriptor \"/dev/dri/renderD128\" has not found"); - throw std::runtime_error("cannot open VAAPI device"); - } - va_handle = vaGetDisplayDRM(device_fd); - if (!va_handle) { - GAPI_LOG_WARNING(nullptr, "VAAPI device vaGetDisplayDRM failed, error: " << strerror(errno)); - close(device_fd); - throw std::runtime_error("vaGetDisplayDRM failed"); - } - int major_version = 0, minor_version = 0; - VAStatus status {}; - status = vaInitialize(va_handle, &major_version, &minor_version); - if (VA_STATUS_SUCCESS != status) { - GAPI_LOG_WARNING(nullptr, "Cannot initialize VAAPI device, error: " << vaErrorStr(status)); - close(device_fd); - throw std::runtime_error("vaInitialize failed"); - } - GAPI_LOG_INFO(nullptr, "created"); + // setup VAAPI device + IDeviceSelector::DeviceScoreTable devices = get_device_selector()->select_devices(); + GAPI_Assert(devices.size() == 1 && "Multiple(or zero) acceleration VAAPI devices are not unsupported"); + AccelType accel_type = devices.begin()->second.get_type(); + GAPI_Assert(accel_type == AccelType::VAAPI && + "Unexpected device AccelType while is waiting AccelType::VAAPI"); + + va_handle = reinterpret_cast(devices.begin()->second.get_ptr()); #else // defined(HAVE_VA) || defined(HAVE_VA_INTEL) GAPI_Assert(false && "VPLVAAPIAccelerationPolicy unavailable in current configuration"); #endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) @@ -58,7 +44,6 @@ VPLVAAPIAccelerationPolicy::VPLVAAPIAccelerationPolicy(device_selector_ptr_t sel VPLVAAPIAccelerationPolicy::~VPLVAAPIAccelerationPolicy() { vaTerminate(va_handle); - close(device_fd); GAPI_LOG_INFO(nullptr, "destroyed"); } diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.hpp index ee7453f982..37a59a0070 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_va_api.hpp @@ -50,7 +50,6 @@ private: std::unique_ptr cpu_dispatcher; #ifdef __linux__ VADisplay va_handle; - int device_fd; // TODO Move it out in device selector #endif // __linux__ }; } // namespace onevpl diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp index 574860e03d..77cfbb18b1 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp @@ -96,6 +96,8 @@ void LockAdapter::unlock_write(mfxMemId mid, mfxFrameData &data) { SharedLock* LockAdapter::set_adaptee(SharedLock* new_impl) { SharedLock* old_impl = impl; + GAPI_LOG_DEBUG(nullptr, "this: " << this << + ", old: " << old_impl << ", new: " << new_impl); GAPI_DbgAssert(old_impl == nullptr || new_impl == nullptr && "Must not be previous impl"); impl = new_impl; return old_impl; @@ -184,6 +186,8 @@ void DX11AllocationItem::on_first_in_impl(mfxFrameData *ptr) { D3D11_MAP mapType = D3D11_MAP_READ; UINT mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; + GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture_ptr() << + ", subresorce: " << get_subresource()); shared_device_context->CopySubresourceRegion(get_staging_texture_ptr(), 0, 0, 0, 0, get_texture_ptr(), @@ -245,8 +249,8 @@ mfxStatus DX11AllocationItem::release_access(mfxFrameData *ptr) { } mfxStatus DX11AllocationItem::shared_access_acquire_unsafe(mfxFrameData *ptr) { - GAPI_LOG_DEBUG(nullptr, "acquire READ lock: " << this); - GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture_ptr() << + GAPI_LOG_DEBUG(nullptr, "acquire READ lock: " << this << + ", texture: " << get_texture_ptr() << ", sub id: " << get_subresource()); // shared access requires elastic barrier // first-in visited thread uses resource mapping on host memory @@ -257,6 +261,7 @@ mfxStatus DX11AllocationItem::shared_access_acquire_unsafe(mfxFrameData *ptr) { if (!(ptr->Y && (ptr->UV || (ptr->U && ptr->V)))) { GAPI_LOG_WARNING(nullptr, "No any data obtained: " << this); + GAPI_DbgAssert(false && "shared access must provide data"); return MFX_ERR_LOCK_MEMORY; } GAPI_LOG_DEBUG(nullptr, "READ access granted: " << this); @@ -264,8 +269,8 @@ mfxStatus DX11AllocationItem::shared_access_acquire_unsafe(mfxFrameData *ptr) { } mfxStatus DX11AllocationItem::shared_access_release_unsafe(mfxFrameData *ptr) { - GAPI_LOG_DEBUG(nullptr, "releasing READ lock: " << this); - GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture_ptr() << + GAPI_LOG_DEBUG(nullptr, "releasing READ lock: " << this << + ", texture: " << get_texture_ptr() << ", sub id: " << get_subresource()); // releasing shared access requires elastic barrier // last-out thread must make memory unmapping then and only then no more @@ -278,8 +283,8 @@ mfxStatus DX11AllocationItem::shared_access_release_unsafe(mfxFrameData *ptr) { } mfxStatus DX11AllocationItem::exclusive_access_acquire_unsafe(mfxFrameData *ptr) { - GAPI_LOG_DEBUG(nullptr, "acquire WRITE lock: " << this); - GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture_ptr() << + GAPI_LOG_DEBUG(nullptr, "acquire WRITE lock: " << this << + ", texture: " << get_texture_ptr() << ", sub id: " << get_subresource()); D3D11_MAP mapType = D3D11_MAP_WRITE; UINT mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; @@ -321,8 +326,8 @@ mfxStatus DX11AllocationItem::exclusive_access_acquire_unsafe(mfxFrameData *ptr) } mfxStatus DX11AllocationItem::exclusive_access_release_unsafe(mfxFrameData *ptr) { - GAPI_LOG_DEBUG(nullptr, "releasing WRITE lock: " << this); - GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture_ptr() << + GAPI_LOG_DEBUG(nullptr, "releasing WRITE lock: " << this << + ", texture: " << get_texture_ptr() << ", sub id: " << get_subresource()); get_device_ctx_ptr()->Unmap(get_staging_texture_ptr(), 0); diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp index 751ed7abbd..24a5b9fb7f 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp @@ -64,7 +64,7 @@ MediaFrame::View VPLMediaFrameCPUAdapter::access(MediaFrame::Access) { } cv::util::any VPLMediaFrameCPUAdapter::blobParams() const { - GAPI_Assert("VPLMediaFrameCPUAdapter::blobParams() is not implemented"); + throw std::runtime_error("VPLMediaFrameCPUAdapter::blobParams() is not implemented"); return {}; } diff --git a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp index 90bf3e8849..28f01c5718 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp @@ -30,6 +30,16 @@ #endif // HAVE_D3D11 #endif // HAVE_DIRECTX +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#include "va/va.h" +#include "va/va_drm.h" + +#include +#include +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#endif // __linux__ + #include #include "opencv2/core/directx.hpp" @@ -37,6 +47,23 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { +#ifdef __linux__ +struct Aux { + ~Aux() { + for (int fd : fds) { + close(fd); + } + } + + void remember_fd(int fd) { + fds.insert(fd); + } +private: + std::set fds; +}; +#else +struct Aux {}; +#endif static std::vector insertCfgparam(std::vector &¶m_array, AccelType type) { switch (type) { @@ -153,7 +180,78 @@ CfgParamDeviceSelector::CfgParamDeviceSelector(const CfgParams& cfg_params) : break; } case MFX_IMPL_VIA_VAAPI : { - GAPI_LOG_WARNING(nullptr, "TODO MFX_IMPL_VIA_VAAPI falls back to CPU case") +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) + static const char *predefined_vaapi_devices_list[] {"/dev/dri/renderD128", + "/dev/dri/renderD129", + "/dev/dri/card0", + "/dev/dri/card1", + nullptr}; + std::stringstream ss; + int device_fd = -1; + VADisplay va_handle;va_handle = nullptr; + for (const char **device_path = predefined_vaapi_devices_list; + *device_path != nullptr; device_path++) { + device_fd = open(*device_path, O_RDWR); + if (device_fd < 0) { + std::string info("Cannot open GPU file: \""); + info = info + *device_path + "\", error: " + strerror(errno); + GAPI_LOG_DEBUG(nullptr, info); + ss << info << std::endl; + continue; + } + va_handle = vaGetDisplayDRM(device_fd); + if (!va_handle) { + close(device_fd); + + std::string info("VAAPI device vaGetDisplayDRM failed, error: "); + info += strerror(errno); + GAPI_LOG_DEBUG(nullptr, info); + ss << info << std::endl; + continue; + } + int major_version = 0, minor_version = 0; + VAStatus status {}; + status = vaInitialize(va_handle, &major_version, &minor_version); + if (VA_STATUS_SUCCESS != status) { + close(device_fd); + va_handle = nullptr; + + std::string info("Cannot initialize VAAPI device, error: "); + info += vaErrorStr(status); + GAPI_LOG_DEBUG(nullptr, info); + ss << info << std::endl; + continue; + } + GAPI_LOG_INFO(nullptr, "VAAPI created for device: " << *device_path); + break; + } + + // check device creation + if (!va_handle) { + GAPI_LOG_WARNING(nullptr, "Cannot create VAAPI device. Log:\n" << ss.str()); + throw std::logic_error(std::string("Cannot create device for \"") + + CfgParam::acceleration_mode_name() + + ": MFX_IMPL_VIA_VAAPI\""); + } + + // Unfortunately VAAPI doesn't provide API for extracting initial FD value from VADisplay, which + // value is stored as VADisplay fields, by the way. But, because we here are only one creator + // of VAAPI device then we will need make cleanup for all allocated resources by ourselfs + //and FD is definitely must be utilized. So, let's use complementary struct `Aux` which + // represent some kind of 'platform specific data' and which will store opened FD for + // future utilization + platform_specific_data.reset (new Aux); + platform_specific_data->remember_fd(device_fd); + + suggested_device = IDeviceSelector::create(va_handle, "GPU", AccelType::VAAPI); + suggested_context = IDeviceSelector::create(nullptr, AccelType::VAAPI); +#else // defined(HAVE_VA) || defined(HAVE_VA_INTEL) + GAPI_Assert(false && "VPLVAAPIAccelerationPolicy unavailable in current linux configuration"); +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#else // #ifdef __linux__ + GAPI_Assert(false && "MFX_IMPL_VIA_VAAPI is supported on linux only") +#endif // #ifdef __linux__ break; } case MFX_ACCEL_MODE_NA: { @@ -234,6 +332,19 @@ CfgParamDeviceSelector::CfgParamDeviceSelector(Device::Ptr device_ptr, #endif // #if defined(HAVE_DIRECTX) && defined(HAVE_D3D11) break; } + case MFX_IMPL_VIA_VAAPI : { +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) + suggested_device = IDeviceSelector::create(device_ptr, device_id, AccelType::VAAPI); + suggested_context = IDeviceSelector::create(nullptr, AccelType::VAAPI); +#else // defined(HAVE_VA) || defined(HAVE_VA_INTEL) + GAPI_Assert(false && "VPLVAAPIAccelerationPolicy unavailable in current linux configuration"); +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#else // #ifdef __linux__ + GAPI_Assert(false && "MFX_IMPL_VIA_VAAPI is supported on linux only") +#endif // #ifdef __linux__ + break; + } case MFX_ACCEL_MODE_NA: { GAPI_LOG_WARNING(nullptr, "Incompatible \"" << CfgParam::acceleration_mode_name() << ": MFX_ACCEL_MODE_NA\" with " @@ -284,7 +395,13 @@ CfgParamDeviceSelector::CfgParamDeviceSelector(const Device &device, #endif // defined(HAVE_DIRECTX) && defined(HAVE_D3D11) } case AccelType::VAAPI: - GAPI_LOG_WARNING(nullptr, "TODO MFX_IMPL_VIA_VAAPI falls back to CPU case") +#ifdef __linux__ +#if !defined(HAVE_VA) || !defined(HAVE_VA_INTEL) + GAPI_Assert(false && "VPLVAAPIAccelerationPolicy unavailable in current linux configuration"); +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#else // #ifdef __linux__ + GAPI_Assert(false && "MFX_IMPL_VIA_VAAPI is supported on linux only") +#endif // #ifdef __linux__ break; case AccelType::HOST: break; @@ -332,6 +449,15 @@ CfgParamDeviceSelector::~CfgParamDeviceSelector() { #endif // defined(HAVE_DIRECTX) && defined(HAVE_D3D11) break; } + case AccelType::VAAPI: { +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) + VADisplay va_handle = reinterpret_cast(suggested_device.get_ptr()); + vaTerminate(va_handle); + platform_specific_data.reset(); +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#endif // #ifdef __linux__ + } default: break; } diff --git a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp index 5dae1c508d..c09218c41e 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp +++ b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp @@ -20,6 +20,7 @@ namespace gapi { namespace wip { namespace onevpl { +class Aux; struct GAPI_EXPORTS CfgParamDeviceSelector final: public IDeviceSelector { CfgParamDeviceSelector(const CfgParams& params = {}); CfgParamDeviceSelector(Device::Ptr device_ptr, @@ -37,6 +38,7 @@ struct GAPI_EXPORTS CfgParamDeviceSelector final: public IDeviceSelector { private: Device suggested_device; Context suggested_context; + std::unique_ptr platform_specific_data; }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp b/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp index d58d1d3d3c..0b374177da 100644 --- a/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp +++ b/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp @@ -119,6 +119,19 @@ Context create_dx11_context(Context::Ptr ctx_ptr) { AccelType::DX11); } +Device create_vaapi_device(Device::Ptr device_ptr, + const std::string& device_name, + int file_description) { + return detail::DeviceContextCreator::create_entity(device_ptr, + device_name, + AccelType::VAAPI); +} + +Context create_vaapi_context(Context::Ptr ctx_ptr) { + return detail::DeviceContextCreator::create_entity(ctx_ptr, + AccelType::VAAPI); +} + } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/source_priv.cpp b/modules/gapi/src/streaming/onevpl/source_priv.cpp index 765bdd3b64..f460a2a6ed 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.cpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.cpp @@ -321,11 +321,16 @@ std::unique_ptr GSource::Priv::initializeHWAccel(std::sha const std::vector& GSource::Priv::getDefaultCfgParams() { +#ifdef __WIN32__ static const std::vector def_params = get_params_from_string( "mfxImplDescription.Impl: MFX_IMPL_TYPE_HARDWARE\n" "mfxImplDescription.AccelerationMode: MFX_ACCEL_MODE_VIA_D3D11\n"); - +#else + static const std::vector def_params = + get_params_from_string( + "mfxImplDescription.Impl: MFX_IMPL_TYPE_HARDWARE\n"); +#endif return def_params; } diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_device_selector.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_device_selector.cpp index ee1be9f433..47c548368b 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_device_selector.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_device_selector.cpp @@ -29,6 +29,16 @@ #endif // HAVE_D3D11 #endif // HAVE_DIRECTX +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#include "va/va.h" +#include "va/va_drm.h" + +#include +#include +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#endif // __linux__ + #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" #include "streaming/onevpl/cfg_param_device_selector.hpp" @@ -208,6 +218,131 @@ TEST(OneVPL_Source_Device_Selector_CfgParam, DX11DeviceFromCfgParamWithDX11Disab #endif // HAVE_D3D11 #endif // HAVE_DIRECTX +#ifdef __linux__ +#if defined(HAVE_VA) || defined(HAVE_VA_INTEL) +TEST(OneVPL_Source_Device_Selector_CfgParam, DefaultDeviceWithEmptyCfgParam_VAAPI_ENABLED) +{ + using namespace cv::gapi::wip::onevpl; + std::vector empty_params; + CfgParamDeviceSelector selector(empty_params); + IDeviceSelector::DeviceScoreTable devs = selector.select_devices(); + EXPECT_TRUE(devs.size() == 1); + test_host_dev_eq(*devs.begin(), IDeviceSelector::Score::MaxActivePriority); + + IDeviceSelector::DeviceContexts ctxs = selector.select_context(); + EXPECT_TRUE(ctxs.size() == 1); + test_host_ctx_eq(*ctxs.begin()); +} + +TEST(OneVPL_Source_Device_Selector_CfgParam, DefaultDeviceWithVAAPIAccelCfgParam_VAAPI_ENABLED) +{ + using namespace cv::gapi::wip::onevpl; + std::vector cfg_params_w_vaapi; + cfg_params_w_vaapi.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_VAAPI)); + std::unique_ptr selector_ptr; + EXPECT_NO_THROW(selector_ptr.reset(new CfgParamDeviceSelector(cfg_params_w_vaapi))); + IDeviceSelector::DeviceScoreTable devs = selector_ptr->select_devices(); + + EXPECT_TRUE(devs.size() == 1); + test_dev_eq(*devs.begin(), IDeviceSelector::Score::MaxActivePriority, + AccelType::VAAPI, + std::get<1>(*devs.begin()).get_ptr() /* compare just type */); + + IDeviceSelector::DeviceContexts ctxs = selector_ptr->select_context(); + EXPECT_TRUE(ctxs.size() == 1); + EXPECT_FALSE(ctxs.begin()->get_ptr()); +} + +TEST(OneVPL_Source_Device_Selector_CfgParam, NULLDeviceWithVAAPIAccelCfgParam_VAAPI_ENABLED) +{ + using namespace cv::gapi::wip::onevpl; + std::vector cfg_params_w_vaapi; + cfg_params_w_vaapi.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_VAAPI)); + Device::Ptr empty_device_ptr = nullptr; + Context::Ptr empty_ctx_ptr = nullptr; + EXPECT_THROW(CfgParamDeviceSelector sel(empty_device_ptr, "GPU", + empty_ctx_ptr, + cfg_params_w_vaapi), + std::logic_error); // empty_device_ptr must be invalid +} + + +TEST(OneVPL_Source_Device_Selector_CfgParam, ExternalDeviceWithVAAPIAccelCfgParam_VAAPI_ENABLED) +{ + using namespace cv::gapi::wip::onevpl; + VADisplay va_handle = nullptr; + struct FileDescriptorRAII { + FileDescriptorRAII() :fd (-1) {} + ~FileDescriptorRAII() { reset(-1); } + void reset(int d) { + if (fd != -1) { + close(fd); + } + fd = d; + } + operator int() { return fd; } + private: + FileDescriptorRAII(FileDescriptorRAII& src) = delete; + FileDescriptorRAII& operator=(FileDescriptorRAII& src) = delete; + FileDescriptorRAII(FileDescriptorRAII&& src) = delete; + FileDescriptorRAII& operator=(FileDescriptorRAII&& src) = delete; + int fd = -1; + }; + static const char *predefined_vaapi_devices_list[] {"/dev/dri/renderD128", + "/dev/dri/renderD129", + "/dev/dri/card0", + "/dev/dri/card1", + nullptr}; + + FileDescriptorRAII device_fd; + for (const char **device_path = predefined_vaapi_devices_list; + *device_path != nullptr; device_path++) { + device_fd.reset(open(*device_path, O_RDWR)); + if (device_fd < 0) { + continue; + } + va_handle = vaGetDisplayDRM(device_fd); + if (!va_handle) { + continue; + } + int major_version = 0, minor_version = 0; + VAStatus status {}; + status = vaInitialize(va_handle, &major_version, &minor_version); + if (VA_STATUS_SUCCESS != status) { + close(device_fd); + va_handle = nullptr; + continue; + } + break; + } + EXPECT_TRUE(device_fd != -1); + EXPECT_TRUE(va_handle); + auto device = cv::util::make_optional( + cv::gapi::wip::onevpl::create_vaapi_device(reinterpret_cast(va_handle), + "GPU", device_fd)); + auto device_context = cv::util::make_optional( + cv::gapi::wip::onevpl::create_vaapi_context(nullptr)); + + std::unique_ptr selector_ptr; + std::vector cfg_params_w_vaapi; + cfg_params_w_vaapi.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_VAAPI)); + EXPECT_NO_THROW(selector_ptr.reset(new CfgParamDeviceSelector(device.value(), + device_context.value(), + cfg_params_w_vaapi))); + IDeviceSelector::DeviceScoreTable devs = selector_ptr->select_devices(); + + EXPECT_TRUE(devs.size() == 1); + test_dev_eq(*devs.begin(), IDeviceSelector::Score::MaxActivePriority, + AccelType::VAAPI, device.value().get_ptr()); + + IDeviceSelector::DeviceContexts ctxs = selector_ptr->select_context(); + EXPECT_TRUE(ctxs.size() == 1); + EXPECT_EQ(reinterpret_cast(ctxs.begin()->get_ptr()), + device_context.value().get_ptr()); +} +#endif // defined(HAVE_VA) || defined(HAVE_VA_INTEL) +#endif // #ifdef __linux__ + TEST(OneVPL_Source_Device_Selector_CfgParam, UnknownPtrDeviceFromCfgParam) { using namespace cv::gapi::wip::onevpl;