mirror of https://github.com/opencv/opencv.git
Merge branch 'master' of https://github.com/Itseez/opencv into gdal_lan
Conflicts: modules/imgcodecs/include/opencv2/imgcodecs.hpppull/5512/head
commit
a242556b1d
260 changed files with 10174 additions and 5036 deletions
@ -0,0 +1,3 @@ |
||||
## Contributing guidelines |
||||
|
||||
All guidelines for contributing to the OpenCV repository can be found at [`How to contribute guideline`](https://github.com/Itseez/opencv/wiki/How_to_contribute). |
@ -0,0 +1,19 @@ |
||||
# Main variables: |
||||
# HAVE_VA for conditional compilation OpenCV with/without libva |
||||
|
||||
if(UNIX AND NOT ANDROID) |
||||
find_path( |
||||
VA_INCLUDE_DIR |
||||
NAMES va/va.h |
||||
PATHS "/usr/include" |
||||
PATH_SUFFIXES include |
||||
DOC "Path to libva headers") |
||||
endif() |
||||
|
||||
if(VA_INCLUDE_DIR) |
||||
set(HAVE_VA TRUE) |
||||
set(VA_LIBRARIES "-lva" "-lva-drm") |
||||
else() |
||||
set(HAVE_VA FALSE) |
||||
message(WARNING "libva installation is not found.") |
||||
endif() |
@ -1,44 +0,0 @@ |
||||
# Main variables: |
||||
# VAAPI_MSDK_INCLUDE_DIR and VAAPI_IOCL_INCLUDE_DIR to use VAAPI |
||||
# HAVE_VAAPI for conditional compilation OpenCV with/without VAAPI |
||||
|
||||
# VAAPI_MSDK_ROOT - root of Intel MSDK installation |
||||
# VAAPI_IOCL_ROOT - root of Intel OCL installation |
||||
|
||||
if(UNIX AND NOT ANDROID) |
||||
if($ENV{VAAPI_MSDK_ROOT}) |
||||
set(VAAPI_MSDK_ROOT $ENV{VAAPI_MSDK_ROOT}) |
||||
else() |
||||
set(VAAPI_MSDK_ROOT "/opt/intel/mediasdk") |
||||
endif() |
||||
|
||||
if($ENV{VAAPI_IOCL_ROOT}) |
||||
set(VAAPI_IOCL_ROOT $ENV{VAAPI_IOCL_ROOT}) |
||||
else() |
||||
set(VAAPI_IOCL_ROOT "/opt/intel/opencl") |
||||
endif() |
||||
|
||||
find_path( |
||||
VAAPI_MSDK_INCLUDE_DIR |
||||
NAMES mfxdefs.h |
||||
PATHS ${VAAPI_MSDK_ROOT} |
||||
PATH_SUFFIXES include |
||||
DOC "Path to Intel MSDK headers") |
||||
|
||||
find_path( |
||||
VAAPI_IOCL_INCLUDE_DIR |
||||
NAMES CL/va_ext.h |
||||
PATHS ${VAAPI_IOCL_ROOT} |
||||
PATH_SUFFIXES include |
||||
DOC "Path to Intel OpenCL headers") |
||||
endif() |
||||
|
||||
if(VAAPI_MSDK_INCLUDE_DIR AND VAAPI_IOCL_INCLUDE_DIR) |
||||
set(HAVE_VAAPI TRUE) |
||||
set(VAAPI_EXTRA_LIBS "-lva" "-lva-drm") |
||||
else() |
||||
set(HAVE_VAAPI FALSE) |
||||
message(WARNING "Intel MSDK & OpenCL installation is not found.") |
||||
endif() |
||||
|
||||
mark_as_advanced(FORCE VAAPI_MSDK_INCLUDE_DIR VAAPI_IOCL_INCLUDE_DIR) |
@ -0,0 +1,44 @@ |
||||
# Main variables: |
||||
# VA_INTEL_MSDK_INCLUDE_DIR and VA_INTEL_IOCL_INCLUDE_DIR to use VA_INTEL |
||||
# HAVE_VA_INTEL for conditional compilation OpenCV with/without VA_INTEL |
||||
|
||||
# VA_INTEL_MSDK_ROOT - root of Intel MSDK installation |
||||
# VA_INTEL_IOCL_ROOT - root of Intel OCL installation |
||||
|
||||
if(UNIX AND NOT ANDROID) |
||||
if($ENV{VA_INTEL_MSDK_ROOT}) |
||||
set(VA_INTEL_MSDK_ROOT $ENV{VA_INTEL_MSDK_ROOT}) |
||||
else() |
||||
set(VA_INTEL_MSDK_ROOT "/opt/intel/mediasdk") |
||||
endif() |
||||
|
||||
if($ENV{VA_INTEL_IOCL_ROOT}) |
||||
set(VA_INTEL_IOCL_ROOT $ENV{VA_INTEL_IOCL_ROOT}) |
||||
else() |
||||
set(VA_INTEL_IOCL_ROOT "/opt/intel/opencl") |
||||
endif() |
||||
|
||||
find_path( |
||||
VA_INTEL_MSDK_INCLUDE_DIR |
||||
NAMES mfxdefs.h |
||||
PATHS ${VA_INTEL_MSDK_ROOT} |
||||
PATH_SUFFIXES include |
||||
DOC "Path to Intel MSDK headers") |
||||
|
||||
find_path( |
||||
VA_INTEL_IOCL_INCLUDE_DIR |
||||
NAMES CL/va_ext.h |
||||
PATHS ${VA_INTEL_IOCL_ROOT} |
||||
PATH_SUFFIXES include |
||||
DOC "Path to Intel OpenCL headers") |
||||
endif() |
||||
|
||||
if(VA_INTEL_MSDK_INCLUDE_DIR AND VA_INTEL_IOCL_INCLUDE_DIR) |
||||
set(HAVE_VA_INTEL TRUE) |
||||
set(VA_INTEL_LIBRARIES "-lva" "-lva-drm") |
||||
else() |
||||
set(HAVE_VA_INTEL FALSE) |
||||
message(WARNING "Intel MSDK & OpenCL installation is not found.") |
||||
endif() |
||||
|
||||
mark_as_advanced(FORCE VA_INTEL_MSDK_INCLUDE_DIR VA_INTEL_IOCL_INCLUDE_DIR) |
@ -0,0 +1,52 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
//
|
||||
// Library initialization file
|
||||
//
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
IPP_INITIALIZER_AUTO |
||||
|
||||
/* End of file. */ |
@ -0,0 +1,528 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2015, Itseez, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#ifdef HAVE_VA |
||||
# include <va/va.h> |
||||
#else // HAVE_VA
|
||||
# define NO_VA_SUPPORT_ERROR CV_ErrorNoReturn(cv::Error::StsBadFunc, "OpenCV was build without VA support (libva)") |
||||
#endif // HAVE_VA
|
||||
|
||||
using namespace cv; |
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// CL-VA Interoperability
|
||||
|
||||
#ifdef HAVE_OPENCL |
||||
# include "opencv2/core/opencl/runtime/opencl_core.hpp" |
||||
# include "opencv2/core.hpp" |
||||
# include "opencv2/core/ocl.hpp" |
||||
# include "opencl_kernels_core.hpp" |
||||
#endif // HAVE_OPENCL
|
||||
|
||||
#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL) |
||||
# include <CL/va_ext.h> |
||||
#endif // HAVE_VA_INTEL && HAVE_OPENCL
|
||||
|
||||
namespace cv { namespace va_intel { |
||||
|
||||
#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL) |
||||
|
||||
static clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL = NULL; |
||||
static clCreateFromVA_APIMediaSurfaceINTEL_fn clCreateFromVA_APIMediaSurfaceINTEL = NULL; |
||||
static clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn clEnqueueAcquireVA_APIMediaSurfacesINTEL = NULL; |
||||
static clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn clEnqueueReleaseVA_APIMediaSurfacesINTEL = NULL; |
||||
|
||||
static bool contextInitialized = false; |
||||
|
||||
#endif // HAVE_VA_INTEL && HAVE_OPENCL
|
||||
|
||||
namespace ocl { |
||||
|
||||
Context& initializeContextFromVA(VADisplay display, bool tryInterop) |
||||
{ |
||||
(void)display; (void)tryInterop; |
||||
#if !defined(HAVE_VA) |
||||
NO_VA_SUPPORT_ERROR; |
||||
#else // !HAVE_VA
|
||||
# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)) |
||||
contextInitialized = false; |
||||
if (tryInterop) |
||||
{ |
||||
cl_uint numPlatforms; |
||||
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms"); |
||||
if (numPlatforms == 0) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms"); |
||||
|
||||
std::vector<cl_platform_id> platforms(numPlatforms); |
||||
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform Id list"); |
||||
|
||||
// For CL-VA interop, we must find platform/device with "cl_intel_va_api_media_sharing" extension.
|
||||
// With standard initialization procedure, we should examine platform extension string for that.
|
||||
// But in practice, the platform ext string doesn't contain it, while device ext string does.
|
||||
// Follow Intel procedure (see tutorial), we should obtain device IDs by extension call.
|
||||
// Note that we must obtain function pointers using specific platform ID, and can't provide pointers in advance.
|
||||
// So, we iterate and select the first platform, for which we got non-NULL pointers, device, and CL context.
|
||||
|
||||
int found = -1; |
||||
cl_context context = 0; |
||||
cl_device_id device = 0; |
||||
|
||||
for (int i = 0; i < (int)numPlatforms; ++i) |
||||
{ |
||||
// Get extension function pointers
|
||||
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL = (clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromVA_APIMediaAdapterINTEL"); |
||||
clCreateFromVA_APIMediaSurfaceINTEL = (clCreateFromVA_APIMediaSurfaceINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clCreateFromVA_APIMediaSurfaceINTEL"); |
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueAcquireVA_APIMediaSurfacesINTEL"); |
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueReleaseVA_APIMediaSurfacesINTEL"); |
||||
|
||||
if (((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL) || |
||||
((void*)clCreateFromVA_APIMediaSurfaceINTEL == NULL) || |
||||
((void*)clEnqueueAcquireVA_APIMediaSurfacesINTEL == NULL) || |
||||
((void*)clEnqueueReleaseVA_APIMediaSurfacesINTEL == NULL)) |
||||
{ |
||||
continue; |
||||
} |
||||
|
||||
// Query device list
|
||||
|
||||
cl_uint numDevices = 0; |
||||
|
||||
status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, |
||||
CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices); |
||||
if ((status != CL_SUCCESS) || !(numDevices > 0)) |
||||
continue; |
||||
numDevices = 1; // initializeContextFromHandle() expects only 1 device
|
||||
status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, |
||||
CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL); |
||||
if (status != CL_SUCCESS) |
||||
continue; |
||||
|
||||
// Creating CL-VA media sharing OpenCL context
|
||||
|
||||
cl_context_properties props[] = { |
||||
CL_CONTEXT_VA_API_DISPLAY_INTEL, (cl_context_properties) display, |
||||
CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, // no explicit sync required
|
||||
0 |
||||
}; |
||||
|
||||
context = clCreateContext(props, numDevices, &device, NULL, NULL, &status); |
||||
if (status != CL_SUCCESS) |
||||
{ |
||||
clReleaseDevice(device); |
||||
} |
||||
else |
||||
{ |
||||
found = i; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (found >= 0) |
||||
{ |
||||
contextInitialized = true; |
||||
Context& ctx = Context::getDefault(false); |
||||
initializeContextFromHandle(ctx, platforms[found], context, device); |
||||
return ctx; |
||||
} |
||||
} |
||||
# endif // HAVE_VA_INTEL && HAVE_OPENCL
|
||||
{ |
||||
Context& ctx = Context::getDefault(true); |
||||
return ctx; |
||||
} |
||||
#endif // !HAVE_VA
|
||||
} |
||||
|
||||
#if defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL) |
||||
static bool ocl_convert_nv12_to_bgr(cl_mem clImageY, cl_mem clImageUV, cl_mem clBuffer, int step, int cols, int rows) |
||||
{ |
||||
ocl::Kernel k; |
||||
k.create("YUV2BGR_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, ""); |
||||
if (k.empty()) |
||||
return false; |
||||
|
||||
k.args(clImageY, clImageUV, clBuffer, step, cols, rows); |
||||
|
||||
size_t globalsize[] = { (size_t)cols, (size_t)rows }; |
||||
return k.run(2, globalsize, 0, false); |
||||
} |
||||
|
||||
static bool ocl_convert_bgr_to_nv12(cl_mem clBuffer, int step, int cols, int rows, cl_mem clImageY, cl_mem clImageUV) |
||||
{ |
||||
ocl::Kernel k; |
||||
k.create("BGR2YUV_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, ""); |
||||
if (k.empty()) |
||||
return false; |
||||
|
||||
k.args(clBuffer, step, cols, rows, clImageY, clImageUV); |
||||
|
||||
size_t globalsize[] = { (size_t)cols, (size_t)rows }; |
||||
return k.run(2, globalsize, 0, false); |
||||
} |
||||
#endif // HAVE_VA_INTEL && HAVE_OPENCL
|
||||
|
||||
} // namespace cv::va_intel::ocl
|
||||
|
||||
#if defined(HAVE_VA) |
||||
const int NCHANNELS = 3; |
||||
|
||||
static void copy_convert_nv12_to_bgr(const VAImage& image, const unsigned char* buffer, Mat& bgr) |
||||
{ |
||||
const float d1 = 16.0f; |
||||
const float d2 = 128.0f; |
||||
|
||||
static const float coeffs[5] = |
||||
{ |
||||
1.163999557f, |
||||
2.017999649f, |
||||
-0.390999794f, |
||||
-0.812999725f, |
||||
1.5959997177f |
||||
}; |
||||
|
||||
const size_t srcOffsetY = image.offsets[0]; |
||||
const size_t srcOffsetUV = image.offsets[1]; |
||||
|
||||
const size_t srcStepY = image.pitches[0]; |
||||
const size_t srcStepUV = image.pitches[1]; |
||||
|
||||
const size_t dstStep = bgr.step; |
||||
|
||||
const unsigned char* srcY0 = buffer + srcOffsetY; |
||||
const unsigned char* srcUV = buffer + srcOffsetUV; |
||||
|
||||
unsigned char* dst0 = bgr.data; |
||||
|
||||
for (int y = 0; y < bgr.rows; y += 2) |
||||
{ |
||||
const unsigned char* srcY1 = srcY0 + srcStepY; |
||||
unsigned char *dst1 = dst0 + dstStep; |
||||
|
||||
for (int x = 0; x < bgr.cols; x += 2) |
||||
{ |
||||
float Y0 = float(srcY0[x+0]); |
||||
float Y1 = float(srcY0[x+1]); |
||||
float Y2 = float(srcY1[x+0]); |
||||
float Y3 = float(srcY1[x+1]); |
||||
|
||||
float U = float(srcUV[2*(x/2)+0]) - d2; |
||||
float V = float(srcUV[2*(x/2)+1]) - d2; |
||||
|
||||
Y0 = std::max(0.0f, Y0 - d1) * coeffs[0]; |
||||
Y1 = std::max(0.0f, Y1 - d1) * coeffs[0]; |
||||
Y2 = std::max(0.0f, Y2 - d1) * coeffs[0]; |
||||
Y3 = std::max(0.0f, Y3 - d1) * coeffs[0]; |
||||
|
||||
float ruv = coeffs[4]*V; |
||||
float guv = coeffs[3]*V + coeffs[2]*U; |
||||
float buv = coeffs[1]*U; |
||||
|
||||
dst0[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y0 + buv); |
||||
dst0[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y0 + guv); |
||||
dst0[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y0 + ruv); |
||||
|
||||
dst0[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y1 + buv); |
||||
dst0[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y1 + guv); |
||||
dst0[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y1 + ruv); |
||||
|
||||
dst1[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y2 + buv); |
||||
dst1[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y2 + guv); |
||||
dst1[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y2 + ruv); |
||||
|
||||
dst1[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y3 + buv); |
||||
dst1[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y3 + guv); |
||||
dst1[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y3 + ruv); |
||||
} |
||||
|
||||
srcY0 = srcY1 + srcStepY; |
||||
srcUV += srcStepUV; |
||||
dst0 = dst1 + dstStep; |
||||
} |
||||
} |
||||
|
||||
static void copy_convert_bgr_to_nv12(const VAImage& image, const Mat& bgr, unsigned char* buffer) |
||||
{ |
||||
const float d1 = 16.0f; |
||||
const float d2 = 128.0f; |
||||
|
||||
static const float coeffs[8] = |
||||
{ |
||||
0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, |
||||
-0.2909994125f, 0.438999176f, -0.3679990768f, -0.0709991455f |
||||
}; |
||||
|
||||
const size_t dstOffsetY = image.offsets[0]; |
||||
const size_t dstOffsetUV = image.offsets[1]; |
||||
|
||||
const size_t dstStepY = image.pitches[0]; |
||||
const size_t dstStepUV = image.pitches[1]; |
||||
|
||||
const size_t srcStep = bgr.step; |
||||
|
||||
const unsigned char* src0 = bgr.data; |
||||
|
||||
unsigned char* dstY0 = buffer + dstOffsetY; |
||||
unsigned char* dstUV = buffer + dstOffsetUV; |
||||
|
||||
for (int y = 0; y < bgr.rows; y += 2) |
||||
{ |
||||
const unsigned char *src1 = src0 + srcStep; |
||||
unsigned char* dstY1 = dstY0 + dstStepY; |
||||
|
||||
for (int x = 0; x < bgr.cols; x += 2) |
||||
{ |
||||
float B0 = float(src0[(x+0)*NCHANNELS+0]); |
||||
float G0 = float(src0[(x+0)*NCHANNELS+1]); |
||||
float R0 = float(src0[(x+0)*NCHANNELS+2]); |
||||
|
||||
float B1 = float(src0[(x+1)*NCHANNELS+0]); |
||||
float G1 = float(src0[(x+1)*NCHANNELS+1]); |
||||
float R1 = float(src0[(x+1)*NCHANNELS+2]); |
||||
|
||||
float B2 = float(src1[(x+0)*NCHANNELS+0]); |
||||
float G2 = float(src1[(x+0)*NCHANNELS+1]); |
||||
float R2 = float(src1[(x+0)*NCHANNELS+2]); |
||||
|
||||
float B3 = float(src1[(x+1)*NCHANNELS+0]); |
||||
float G3 = float(src1[(x+1)*NCHANNELS+1]); |
||||
float R3 = float(src1[(x+1)*NCHANNELS+2]); |
||||
|
||||
float Y0 = coeffs[0]*R0 + coeffs[1]*G0 + coeffs[2]*B0 + d1; |
||||
float Y1 = coeffs[0]*R1 + coeffs[1]*G1 + coeffs[2]*B1 + d1; |
||||
float Y2 = coeffs[0]*R2 + coeffs[1]*G2 + coeffs[2]*B2 + d1; |
||||
float Y3 = coeffs[0]*R3 + coeffs[1]*G3 + coeffs[2]*B3 + d1; |
||||
|
||||
float U = coeffs[3]*R0 + coeffs[4]*G0 + coeffs[5]*B0 + d2; |
||||
float V = coeffs[5]*R0 + coeffs[6]*G0 + coeffs[7]*B0 + d2; |
||||
|
||||
dstY0[x+0] = saturate_cast<unsigned char>(Y0); |
||||
dstY0[x+1] = saturate_cast<unsigned char>(Y1); |
||||
dstY1[x+0] = saturate_cast<unsigned char>(Y2); |
||||
dstY1[x+1] = saturate_cast<unsigned char>(Y3); |
||||
|
||||
dstUV[2*(x/2)+0] = saturate_cast<unsigned char>(U); |
||||
dstUV[2*(x/2)+1] = saturate_cast<unsigned char>(V); |
||||
} |
||||
|
||||
src0 = src1 + srcStep; |
||||
dstY0 = dstY1 + dstStepY; |
||||
dstUV += dstStepUV; |
||||
} |
||||
} |
||||
#endif // HAVE_VA
|
||||
|
||||
void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size) |
||||
{ |
||||
(void)display; (void)src; (void)surface; (void)size; |
||||
#if !defined(HAVE_VA) |
||||
NO_VA_SUPPORT_ERROR; |
||||
#else // !HAVE_VA
|
||||
const int stype = CV_8UC3; |
||||
|
||||
int srcType = src.type(); |
||||
CV_Assert(srcType == stype); |
||||
|
||||
Size srcSize = src.size(); |
||||
CV_Assert(srcSize.width == size.width && srcSize.height == size.height); |
||||
|
||||
# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)) |
||||
if (contextInitialized) |
||||
{ |
||||
UMat u = src.getUMat(); |
||||
|
||||
// TODO Add support for roi
|
||||
CV_Assert(u.offset == 0); |
||||
CV_Assert(u.isContinuous()); |
||||
|
||||
cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ); |
||||
|
||||
using namespace cv::ocl; |
||||
Context& ctx = Context::getDefault(); |
||||
cl_context context = (cl_context)ctx.ptr(); |
||||
|
||||
cl_int status = 0; |
||||
|
||||
cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 0, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)"); |
||||
cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 1, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)"); |
||||
|
||||
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); |
||||
|
||||
cl_mem images[2] = { clImageY, clImageUV }; |
||||
status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed"); |
||||
if (!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImageY, clImageUV)) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed"); |
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed"); |
||||
|
||||
status = clFinish(q); // TODO Use events
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed"); |
||||
|
||||
status = clReleaseMemObject(clImageY); // TODO RAII
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)"); |
||||
status = clReleaseMemObject(clImageUV); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)"); |
||||
} |
||||
else |
||||
# endif // HAVE_VA_INTEL && HAVE_OPENCL
|
||||
{ |
||||
Mat m = src.getMat(); |
||||
|
||||
// TODO Add support for roi
|
||||
CV_Assert(m.data == m.datastart); |
||||
CV_Assert(m.isContinuous()); |
||||
|
||||
VAStatus status = 0; |
||||
|
||||
status = vaSyncSurface(display, surface); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaSyncSurface failed"); |
||||
|
||||
VAImage image; |
||||
status = vaDeriveImage(display, surface, &image); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed"); |
||||
|
||||
unsigned char* buffer = 0; |
||||
status = vaMapBuffer(display, image.buf, (void **)&buffer); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed"); |
||||
|
||||
CV_Assert(image.format.fourcc == VA_FOURCC_NV12); |
||||
|
||||
copy_convert_bgr_to_nv12(image, m, buffer); |
||||
|
||||
status = vaUnmapBuffer(display, image.buf); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaUnmapBuffer failed"); |
||||
|
||||
status = vaDestroyImage(display, image.image_id); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaDestroyImage failed"); |
||||
} |
||||
#endif // !HAVE_VA
|
||||
} |
||||
|
||||
void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst) |
||||
{ |
||||
(void)display; (void)surface; (void)dst; (void)size; |
||||
#if !defined(HAVE_VA) |
||||
NO_VA_SUPPORT_ERROR; |
||||
#else // !HAVE_VA
|
||||
const int dtype = CV_8UC3; |
||||
|
||||
// TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
|
||||
dst.create(size, dtype); |
||||
|
||||
# if (defined(HAVE_VA_INTEL) && defined(HAVE_OPENCL)) |
||||
if (contextInitialized) |
||||
{ |
||||
UMat u = dst.getUMat(); |
||||
|
||||
// TODO Add support for roi
|
||||
CV_Assert(u.offset == 0); |
||||
CV_Assert(u.isContinuous()); |
||||
|
||||
cl_mem clBuffer = (cl_mem)u.handle(ACCESS_WRITE); |
||||
|
||||
using namespace cv::ocl; |
||||
Context& ctx = Context::getDefault(); |
||||
cl_context context = (cl_context)ctx.ptr(); |
||||
|
||||
cl_int status = 0; |
||||
|
||||
cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 0, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)"); |
||||
cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 1, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)"); |
||||
|
||||
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); |
||||
|
||||
cl_mem images[2] = { clImageY, clImageUV }; |
||||
status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed"); |
||||
if (!ocl::ocl_convert_nv12_to_bgr(clImageY, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows)) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed"); |
||||
status = clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed"); |
||||
|
||||
status = clFinish(q); // TODO Use events
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed"); |
||||
|
||||
status = clReleaseMemObject(clImageY); // TODO RAII
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)"); |
||||
status = clReleaseMemObject(clImageUV); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)"); |
||||
} |
||||
else |
||||
# endif // HAVE_VA_INTEL && HAVE_OPENCL
|
||||
{ |
||||
Mat m = dst.getMat(); |
||||
|
||||
// TODO Add support for roi
|
||||
CV_Assert(m.data == m.datastart); |
||||
CV_Assert(m.isContinuous()); |
||||
|
||||
VAStatus status = 0; |
||||
|
||||
status = vaSyncSurface(display, surface); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaSyncSurface failed"); |
||||
|
||||
VAImage image; |
||||
status = vaDeriveImage(display, surface, &image); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed"); |
||||
|
||||
unsigned char* buffer = 0; |
||||
status = vaMapBuffer(display, image.buf, (void **)&buffer); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed"); |
||||
|
||||
CV_Assert(image.format.fourcc == VA_FOURCC_NV12); |
||||
|
||||
copy_convert_nv12_to_bgr(image, buffer, m); |
||||
|
||||
status = vaUnmapBuffer(display, image.buf); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaUnmapBuffer failed"); |
||||
|
||||
status = vaDestroyImage(display, image.image_id); |
||||
if (status != VA_STATUS_SUCCESS) |
||||
CV_Error(cv::Error::StsError, "VA-API: vaDestroyImage failed"); |
||||
} |
||||
#endif // !HAVE_VA
|
||||
} |
||||
|
||||
}} // namespace cv::va_intel
|
@ -1,302 +0,0 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2015, Itseez, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#ifdef HAVE_VAAPI |
||||
#else // HAVE_VAAPI
|
||||
# define NO_VAAPI_SUPPORT_ERROR CV_ErrorNoReturn(cv::Error::StsBadFunc, "OpenCV was build without VA-API support") |
||||
#endif // HAVE_VAAPI
|
||||
|
||||
using namespace cv; |
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// CL-VA Interoperability
|
||||
|
||||
#ifdef HAVE_OPENCL |
||||
# include "opencv2/core/opencl/runtime/opencl_core.hpp" |
||||
# include "opencv2/core.hpp" |
||||
# include "opencv2/core/ocl.hpp" |
||||
# include "opencl_kernels_core.hpp" |
||||
#else // HAVE_OPENCL
|
||||
# define NO_OPENCL_SUPPORT_ERROR CV_ErrorNoReturn(cv::Error::StsBadFunc, "OpenCV was build without OpenCL support") |
||||
#endif // HAVE_OPENCL
|
||||
|
||||
#if defined(HAVE_VAAPI) && defined(HAVE_OPENCL) |
||||
# include <CL/va_ext.h> |
||||
#endif // HAVE_VAAPI && HAVE_OPENCL
|
||||
|
||||
namespace cv { namespace vaapi { |
||||
|
||||
#if defined(HAVE_VAAPI) && defined(HAVE_OPENCL) |
||||
|
||||
static clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL = NULL; |
||||
static clCreateFromVA_APIMediaSurfaceINTEL_fn clCreateFromVA_APIMediaSurfaceINTEL = NULL; |
||||
static clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn clEnqueueAcquireVA_APIMediaSurfacesINTEL = NULL; |
||||
static clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn clEnqueueReleaseVA_APIMediaSurfacesINTEL = NULL; |
||||
|
||||
static bool contextInitialized = false; |
||||
|
||||
#endif // HAVE_VAAPI && HAVE_OPENCL
|
||||
|
||||
namespace ocl { |
||||
|
||||
Context& initializeContextFromVA(VADisplay display) |
||||
{ |
||||
(void)display; |
||||
#if !defined(HAVE_VAAPI) |
||||
NO_VAAPI_SUPPORT_ERROR; |
||||
#elif !defined(HAVE_OPENCL) |
||||
NO_OPENCL_SUPPORT_ERROR; |
||||
#else |
||||
contextInitialized = false; |
||||
|
||||
cl_uint numPlatforms; |
||||
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms"); |
||||
if (numPlatforms == 0) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms"); |
||||
|
||||
std::vector<cl_platform_id> platforms(numPlatforms); |
||||
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform Id list"); |
||||
|
||||
// For CL-VA interop, we must find platform/device with "cl_intel_va_api_media_sharing" extension.
|
||||
// With standard initialization procedure, we should examine platform extension string for that.
|
||||
// But in practice, the platform ext string doesn't contain it, while device ext string does.
|
||||
// Follow Intel procedure (see tutorial), we should obtain device IDs by extension call.
|
||||
// Note that we must obtain function pointers using specific platform ID, and can't provide pointers in advance.
|
||||
// So, we iterate and select the first platform, for which we got non-NULL pointers, device, and CL context.
|
||||
|
||||
int found = -1; |
||||
cl_context context = 0; |
||||
cl_device_id device = 0; |
||||
|
||||
for (int i = 0; i < (int)numPlatforms; ++i) |
||||
{ |
||||
// Get extension function pointers
|
||||
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL = (clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromVA_APIMediaAdapterINTEL"); |
||||
clCreateFromVA_APIMediaSurfaceINTEL = (clCreateFromVA_APIMediaSurfaceINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clCreateFromVA_APIMediaSurfaceINTEL"); |
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueAcquireVA_APIMediaSurfacesINTEL"); |
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn) |
||||
clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueReleaseVA_APIMediaSurfacesINTEL"); |
||||
|
||||
if (((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL) || |
||||
((void*)clCreateFromVA_APIMediaSurfaceINTEL == NULL) || |
||||
((void*)clEnqueueAcquireVA_APIMediaSurfacesINTEL == NULL) || |
||||
((void*)clEnqueueReleaseVA_APIMediaSurfacesINTEL == NULL)) |
||||
{ |
||||
continue; |
||||
} |
||||
|
||||
// Query device list
|
||||
|
||||
cl_uint numDevices = 0; |
||||
|
||||
status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, |
||||
CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices); |
||||
if ((status != CL_SUCCESS) || !(numDevices > 0)) |
||||
continue; |
||||
numDevices = 1; // initializeContextFromHandle() expects only 1 device
|
||||
status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, |
||||
CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL); |
||||
if (status != CL_SUCCESS) |
||||
continue; |
||||
|
||||
// Creating CL-VA media sharing OpenCL context
|
||||
|
||||
cl_context_properties props[] = { |
||||
CL_CONTEXT_VA_API_DISPLAY_INTEL, (cl_context_properties) display, |
||||
CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, // no explicit sync required
|
||||
0 |
||||
}; |
||||
|
||||
context = clCreateContext(props, numDevices, &device, NULL, NULL, &status); |
||||
if (status != CL_SUCCESS) |
||||
{ |
||||
clReleaseDevice(device); |
||||
} |
||||
else |
||||
{ |
||||
found = i; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (found < 0) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for VA-API interop"); |
||||
|
||||
Context& ctx = Context::getDefault(false); |
||||
initializeContextFromHandle(ctx, platforms[found], context, device); |
||||
contextInitialized = true; |
||||
return ctx; |
||||
#endif |
||||
} |
||||
|
||||
#if defined(HAVE_VAAPI) && defined(HAVE_OPENCL) |
||||
static bool ocl_convert_nv12_to_bgr(cl_mem clImageY, cl_mem clImageUV, cl_mem clBuffer, int step, int cols, int rows) |
||||
{ |
||||
ocl::Kernel k; |
||||
k.create("YUV2BGR_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, ""); |
||||
if (k.empty()) |
||||
return false; |
||||
|
||||
k.args(clImageY, clImageUV, clBuffer, step, cols, rows); |
||||
|
||||
size_t globalsize[] = { cols, rows }; |
||||
return k.run(2, globalsize, 0, false); |
||||
} |
||||
|
||||
static bool ocl_convert_bgr_to_nv12(cl_mem clBuffer, int step, int cols, int rows, cl_mem clImageY, cl_mem clImageUV) |
||||
{ |
||||
ocl::Kernel k; |
||||
k.create("BGR2YUV_NV12_8u", cv::ocl::core::cvtclr_dx_oclsrc, ""); |
||||
if (k.empty()) |
||||
return false; |
||||
|
||||
k.args(clBuffer, step, cols, rows, clImageY, clImageUV); |
||||
|
||||
size_t globalsize[] = { cols, rows }; |
||||
return k.run(2, globalsize, 0, false); |
||||
} |
||||
#endif // HAVE_VAAPI && HAVE_OPENCL
|
||||
|
||||
} // namespace cv::vaapi::ocl
|
||||
|
||||
void convertToVASurface(InputArray src, VASurfaceID surface, Size size) |
||||
{ |
||||
(void)src; (void)surface; (void)size; |
||||
#if !defined(HAVE_VAAPI) |
||||
NO_VAAPI_SUPPORT_ERROR; |
||||
#elif !defined(HAVE_OPENCL) |
||||
NO_OPENCL_SUPPORT_ERROR; |
||||
#else |
||||
if (!contextInitialized) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Context for VA-API interop hasn't been created"); |
||||
|
||||
const int stype = CV_8UC4; |
||||
|
||||
int srcType = src.type(); |
||||
CV_Assert(srcType == stype); |
||||
|
||||
Size srcSize = src.size(); |
||||
CV_Assert(srcSize.width == size.width && srcSize.height == size.height); |
||||
|
||||
UMat u = src.getUMat(); |
||||
|
||||
// TODO Add support for roi
|
||||
CV_Assert(u.offset == 0); |
||||
CV_Assert(u.isContinuous()); |
||||
|
||||
cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ); |
||||
|
||||
using namespace cv::ocl; |
||||
Context& ctx = Context::getDefault(); |
||||
cl_context context = (cl_context)ctx.ptr(); |
||||
|
||||
cl_int status = 0; |
||||
|
||||
cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 0, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)"); |
||||
cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 1, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)"); |
||||
|
||||
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); |
||||
|
||||
cl_mem images[2] = { clImageY, clImageUV }; |
||||
status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed"); |
||||
if (!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImageY, clImageUV)) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed"); |
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed"); |
||||
|
||||
status = clFinish(q); // TODO Use events
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed"); |
||||
|
||||
status = clReleaseMemObject(clImageY); // TODO RAII
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)"); |
||||
status = clReleaseMemObject(clImageUV); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)"); |
||||
#endif |
||||
} |
||||
|
||||
void convertFromVASurface(VASurfaceID surface, Size size, OutputArray dst) |
||||
{ |
||||
(void)surface; (void)dst; (void)size; |
||||
#if !defined(HAVE_VAAPI) |
||||
NO_VAAPI_SUPPORT_ERROR; |
||||
#elif !defined(HAVE_OPENCL) |
||||
NO_OPENCL_SUPPORT_ERROR; |
||||
#else |
||||
if (!contextInitialized) |
||||
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Context for VA-API interop hasn't been created"); |
||||
|
||||
const int dtype = CV_8UC4; |
||||
|
||||
// TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
|
||||
dst.create(size, dtype); |
||||
UMat u = dst.getUMat(); |
||||
|
||||
// TODO Add support for roi
|
||||
CV_Assert(u.offset == 0); |
||||
CV_Assert(u.isContinuous()); |
||||
|
||||
cl_mem clBuffer = (cl_mem)u.handle(ACCESS_WRITE); |
||||
|
||||
using namespace cv::ocl; |
||||
Context& ctx = Context::getDefault(); |
||||
cl_context context = (cl_context)ctx.ptr(); |
||||
|
||||
cl_int status = 0; |
||||
|
||||
cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 0, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)"); |
||||
cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 1, &status); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)"); |
||||
|
||||
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); |
||||
|
||||
cl_mem images[2] = { clImageY, clImageUV }; |
||||
status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed"); |
||||
if (!ocl::ocl_convert_nv12_to_bgr(clImageY, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows)) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed"); |
||||
status = clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed"); |
||||
|
||||
status = clFinish(q); // TODO Use events
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clFinish failed"); |
||||
|
||||
status = clReleaseMemObject(clImageY); // TODO RAII
|
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (Y plane)"); |
||||
status = clReleaseMemObject(clImageUV); |
||||
if (status != CL_SUCCESS) |
||||
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clReleaseMem failed (UV plane)"); |
||||
#endif |
||||
} |
||||
|
||||
}} // namespace cv::vaapi
|
@ -0,0 +1,221 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
|
||||
namespace { |
||||
|
||||
static const char * const keys = |
||||
"{ h help | | print help }" |
||||
"{ i info | false | print info }" |
||||
"{ t true | true | true value }" |
||||
"{ n unused | | dummy }" |
||||
; |
||||
|
||||
TEST(CommandLineParser, testFailure) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-q"}; |
||||
const int argc = 2; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_ANY_THROW(parser.has("q")); |
||||
EXPECT_ANY_THROW(parser.get<bool>("q")); |
||||
EXPECT_ANY_THROW(parser.get<bool>(0)); |
||||
|
||||
parser.get<bool>("h"); |
||||
EXPECT_FALSE(parser.check()); |
||||
} |
||||
TEST(CommandLineParser, testHas_noValues) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h", "--info"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_TRUE(parser.has("help")); |
||||
EXPECT_TRUE(parser.has("h")); |
||||
EXPECT_TRUE(parser.has("info")); |
||||
EXPECT_TRUE(parser.has("i")); |
||||
EXPECT_FALSE(parser.has("n")); |
||||
EXPECT_FALSE(parser.has("unused")); |
||||
} |
||||
TEST(CommandLineParser, testHas_TrueValues) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h=TRUE", "--info=true"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_TRUE(parser.has("help")); |
||||
EXPECT_TRUE(parser.has("h")); |
||||
EXPECT_TRUE(parser.has("info")); |
||||
EXPECT_TRUE(parser.has("i")); |
||||
EXPECT_FALSE(parser.has("n")); |
||||
EXPECT_FALSE(parser.has("unused")); |
||||
} |
||||
TEST(CommandLineParser, testHas_TrueValues1) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h=1", "--info=1"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_TRUE(parser.has("help")); |
||||
EXPECT_TRUE(parser.has("h")); |
||||
EXPECT_TRUE(parser.has("info")); |
||||
EXPECT_TRUE(parser.has("i")); |
||||
EXPECT_FALSE(parser.has("n")); |
||||
EXPECT_FALSE(parser.has("unused")); |
||||
} |
||||
TEST(CommandLineParser, testHas_FalseValues0) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h=0", "--info=0"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_TRUE(parser.has("help")); |
||||
EXPECT_TRUE(parser.has("h")); |
||||
EXPECT_TRUE(parser.has("info")); |
||||
EXPECT_TRUE(parser.has("i")); |
||||
EXPECT_FALSE(parser.has("n")); |
||||
EXPECT_FALSE(parser.has("unused")); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testBoolOption_noArgs) |
||||
{ |
||||
const char* argv[] = {"<bin>"}; |
||||
const int argc = 1; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_FALSE(parser.get<bool>("help")); |
||||
EXPECT_FALSE(parser.get<bool>("h")); |
||||
EXPECT_FALSE(parser.get<bool>("info")); |
||||
EXPECT_FALSE(parser.get<bool>("i")); |
||||
EXPECT_TRUE(parser.get<bool>("true")); // default is true
|
||||
EXPECT_TRUE(parser.get<bool>("t")); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testBoolOption_noValues) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h", "--info"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_TRUE(parser.get<bool>("help")); |
||||
EXPECT_TRUE(parser.get<bool>("h")); |
||||
EXPECT_TRUE(parser.get<bool>("info")); |
||||
EXPECT_TRUE(parser.get<bool>("i")); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testBoolOption_TrueValues) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h=TRUE", "--info=true"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
//EXPECT_TRUE(parser.get<bool>("help"));
|
||||
//EXPECT_TRUE(parser.get<bool>("h"));
|
||||
EXPECT_TRUE(parser.get<bool>("info")); |
||||
EXPECT_TRUE(parser.get<bool>("i")); |
||||
EXPECT_FALSE(parser.get<bool>("unused")); |
||||
EXPECT_FALSE(parser.get<bool>("n")); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testBoolOption_FalseValues) |
||||
{ |
||||
const char* argv[] = {"<bin>", "--help=FALSE", "-i=false"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
EXPECT_FALSE(parser.get<bool>("help")); |
||||
EXPECT_FALSE(parser.get<bool>("h")); |
||||
EXPECT_FALSE(parser.get<bool>("info")); |
||||
EXPECT_FALSE(parser.get<bool>("i")); |
||||
} |
||||
|
||||
|
||||
static const char * const keys2 = |
||||
"{ h help | | print help }" |
||||
"{ @arg1 | default1 | param1 }" |
||||
"{ @arg2 | | param2 }" |
||||
"{ n unused | | dummy }" |
||||
; |
||||
|
||||
TEST(CommandLineParser, testPositional_noArgs) |
||||
{ |
||||
const char* argv[] = {"<bin>"}; |
||||
const int argc = 1; |
||||
cv::CommandLineParser parser(argc, argv, keys2); |
||||
EXPECT_TRUE(parser.has("@arg1")); |
||||
EXPECT_FALSE(parser.has("@arg2")); |
||||
EXPECT_EQ("default1", parser.get<String>("@arg1")); |
||||
EXPECT_EQ("default1", parser.get<String>(0)); |
||||
|
||||
EXPECT_EQ("", parser.get<String>("@arg2")); |
||||
EXPECT_EQ("", parser.get<String>(1)); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testPositional_default) |
||||
{ |
||||
const char* argv[] = {"<bin>", "test1", "test2"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys2); |
||||
EXPECT_TRUE(parser.has("@arg1")); |
||||
EXPECT_TRUE(parser.has("@arg2")); |
||||
EXPECT_EQ("test1", parser.get<String>("@arg1")); |
||||
EXPECT_EQ("test2", parser.get<String>("@arg2")); |
||||
EXPECT_EQ("test1", parser.get<String>(0)); |
||||
EXPECT_EQ("test2", parser.get<String>(1)); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testPositional_withFlagsBefore) |
||||
{ |
||||
const char* argv[] = {"<bin>", "-h", "test1", "test2"}; |
||||
const int argc = 4; |
||||
cv::CommandLineParser parser(argc, argv, keys2); |
||||
EXPECT_TRUE(parser.has("@arg1")); |
||||
EXPECT_TRUE(parser.has("@arg2")); |
||||
EXPECT_EQ("test1", parser.get<String>("@arg1")); |
||||
EXPECT_EQ("test2", parser.get<String>("@arg2")); |
||||
EXPECT_EQ("test1", parser.get<String>(0)); |
||||
EXPECT_EQ("test2", parser.get<String>(1)); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testPositional_withFlagsAfter) |
||||
{ |
||||
const char* argv[] = {"<bin>", "test1", "test2", "-h"}; |
||||
const int argc = 4; |
||||
cv::CommandLineParser parser(argc, argv, keys2); |
||||
EXPECT_TRUE(parser.has("@arg1")); |
||||
EXPECT_TRUE(parser.has("@arg2")); |
||||
EXPECT_EQ("test1", parser.get<String>("@arg1")); |
||||
EXPECT_EQ("test2", parser.get<String>("@arg2")); |
||||
EXPECT_EQ("test1", parser.get<String>(0)); |
||||
EXPECT_EQ("test2", parser.get<String>(1)); |
||||
} |
||||
|
||||
TEST(CommandLineParser, testEmptyStringValue) |
||||
{ |
||||
static const char * const keys3 = |
||||
"{ @pos0 | | empty default value }" |
||||
"{ @pos1 | <none> | forbid empty default value }"; |
||||
|
||||
const char* argv[] = {"<bin>"}; |
||||
const int argc = 1; |
||||
cv::CommandLineParser parser(argc, argv, keys3); |
||||
// EXPECT_TRUE(parser.has("@pos0"));
|
||||
EXPECT_EQ("", parser.get<String>("@pos0")); |
||||
EXPECT_TRUE(parser.check()); |
||||
|
||||
EXPECT_FALSE(parser.has("@pos1")); |
||||
parser.get<String>(1); |
||||
EXPECT_FALSE(parser.check()); |
||||
} |
||||
|
||||
TEST(CommandLineParser, positional_regression_5074_equal_sign) |
||||
{ |
||||
static const char * const keys3 = |
||||
"{ @eq0 | | }" |
||||
"{ eq1 | | }"; |
||||
|
||||
const char* argv[] = {"<bin>", "1=0", "--eq1=1=0"}; |
||||
const int argc = 3; |
||||
cv::CommandLineParser parser(argc, argv, keys3); |
||||
EXPECT_EQ("1=0", parser.get<String>("@eq0")); |
||||
EXPECT_EQ("1=0", parser.get<String>(0)); |
||||
EXPECT_EQ("1=0", parser.get<String>("eq1")); |
||||
EXPECT_TRUE(parser.check()); |
||||
} |
||||
|
||||
} // namespace
|
@ -0,0 +1,52 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
//
|
||||
// Library initialization file
|
||||
//
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
IPP_INITIALIZER_AUTO |
||||
|
||||
/* End of file. */ |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,864 @@ |
||||
#include "test_intrin_utils.hpp" |
||||
#include <climits> |
||||
|
||||
using namespace cv; |
||||
|
||||
template<typename R> struct TheTest |
||||
{ |
||||
typedef typename R::lane_type LaneType; |
||||
|
||||
TheTest & test_loadstore() |
||||
{ |
||||
AlignedData<R> data; |
||||
AlignedData<R> out; |
||||
|
||||
// check if addresses are aligned and unaligned respectively
|
||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); |
||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); |
||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); |
||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); |
||||
|
||||
// check some initialization methods
|
||||
R r1 = data.a; |
||||
R r2 = v_load(data.u.d); |
||||
R r3 = v_load_aligned(data.a.d); |
||||
R r4(r2); |
||||
EXPECT_EQ(data.a[0], r1.get0()); |
||||
EXPECT_EQ(data.u[0], r2.get0()); |
||||
EXPECT_EQ(data.a[0], r3.get0()); |
||||
EXPECT_EQ(data.u[0], r4.get0()); |
||||
|
||||
// check some store methods
|
||||
out.u.clear(); |
||||
out.a.clear(); |
||||
v_store(out.u.d, r1); |
||||
v_store_aligned(out.a.d, r2); |
||||
EXPECT_EQ(data.a, out.a); |
||||
EXPECT_EQ(data.u, out.u); |
||||
|
||||
// check more store methods
|
||||
Data<R> d, res(0); |
||||
R r5 = d; |
||||
v_store_high(res.mid(), r5); |
||||
v_store_low(res.d, r5); |
||||
EXPECT_EQ(d, res); |
||||
|
||||
// check halves load correctness
|
||||
res.clear(); |
||||
R r6 = v_load_halves(d.d, d.mid()); |
||||
v_store(res.d, r6); |
||||
EXPECT_EQ(d, res); |
||||
|
||||
// zero, all
|
||||
Data<R> resZ = RegTrait<R>::zero(); |
||||
Data<R> resV = RegTrait<R>::all(8); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ((LaneType)0, resZ[i]); |
||||
EXPECT_EQ((LaneType)8, resV[i]); |
||||
} |
||||
|
||||
// reinterpret_as
|
||||
v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a); |
||||
v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a); |
||||
v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a); |
||||
v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a); |
||||
v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a); |
||||
v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a); |
||||
v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a); |
||||
v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a); |
||||
v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a); |
||||
#if CV_SIMD128_64F |
||||
v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a); |
||||
#endif |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_interleave() |
||||
{ |
||||
Data<R> data1, data2, data3, data4; |
||||
data2 += 20; |
||||
data3 += 40; |
||||
data4 += 60; |
||||
|
||||
|
||||
R a = data1, b = data2, c = data3; |
||||
R d = data1, e = data2, f = data3, g = data4; |
||||
|
||||
LaneType buf3[R::nlanes * 3]; |
||||
LaneType buf4[R::nlanes * 4]; |
||||
|
||||
v_store_interleave(buf3, a, b, c); |
||||
v_store_interleave(buf4, d, e, f, g); |
||||
|
||||
Data<R> z(0); |
||||
a = b = c = d = e = f = g = z; |
||||
|
||||
v_load_deinterleave(buf3, a, b, c); |
||||
v_load_deinterleave(buf4, d, e, f, g); |
||||
|
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(data1, Data<R>(a)); |
||||
EXPECT_EQ(data2, Data<R>(b)); |
||||
EXPECT_EQ(data3, Data<R>(c)); |
||||
|
||||
EXPECT_EQ(data1, Data<R>(d)); |
||||
EXPECT_EQ(data2, Data<R>(e)); |
||||
EXPECT_EQ(data3, Data<R>(f)); |
||||
EXPECT_EQ(data4, Data<R>(g)); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
// v_expand and v_load_expand
|
||||
TheTest & test_expand() |
||||
{ |
||||
typedef typename RegTrait<R>::w_reg Rx2; |
||||
Data<R> dataA; |
||||
R a = dataA; |
||||
|
||||
Data<Rx2> resB = v_load_expand(dataA.d); |
||||
|
||||
Rx2 c, d; |
||||
v_expand(a, c, d); |
||||
|
||||
Data<Rx2> resC = c, resD = d; |
||||
const int n = Rx2::nlanes; |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i], resB[i]); |
||||
EXPECT_EQ(dataA[i], resC[i]); |
||||
EXPECT_EQ(dataA[i + n], resD[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_expand_q() |
||||
{ |
||||
typedef typename RegTrait<R>::q_reg Rx4; |
||||
Data<R> data; |
||||
Data<Rx4> out = v_load_expand_q(data.d); |
||||
const int n = Rx4::nlanes; |
||||
for (int i = 0; i < n; ++i) |
||||
EXPECT_EQ(data[i], out[i]); |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_addsub() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB.reverse(); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = a + b, resD = a - b; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] + dataB[i]), resC[i]); |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] - dataB[i]), resD[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_addsub_wrap() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB.reverse(); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = v_add_wrap(a, b), |
||||
resD = v_sub_wrap(a, b); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]); |
||||
EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_mul() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB.reverse(); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = a * b; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i] * dataB[i], resC[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_div() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB.reverse(); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = a / b; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i] / dataB[i], resC[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_mul_expand() |
||||
{ |
||||
typedef typename RegTrait<R>::w_reg Rx2; |
||||
Data<R> dataA, dataB(2); |
||||
R a = dataA, b = dataB; |
||||
Rx2 c, d; |
||||
|
||||
v_mul_expand(a, b, c, d); |
||||
|
||||
Data<Rx2> resC = c, resD = d; |
||||
const int n = R::nlanes / 2; |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]); |
||||
EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
template <int s> |
||||
TheTest & test_shift() |
||||
{ |
||||
Data<R> dataA; |
||||
R a = dataA; |
||||
|
||||
Data<R> resB = a << s, resC = v_shl<s>(a), resD = a >> s, resE = v_shr<s>(a); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i] << s, resB[i]); |
||||
EXPECT_EQ(dataA[i] << s, resC[i]); |
||||
EXPECT_EQ(dataA[i] >> s, resD[i]); |
||||
EXPECT_EQ(dataA[i] >> s, resE[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_cmp() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB.reverse(); |
||||
dataB += 1; |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = (a == b); |
||||
Data<R> resD = (a != b); |
||||
Data<R> resE = (a > b); |
||||
Data<R> resF = (a >= b); |
||||
Data<R> resG = (a < b); |
||||
Data<R> resH = (a <= b); |
||||
|
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); |
||||
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); |
||||
EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0); |
||||
EXPECT_EQ(dataA[i] >= dataB[i], resF[i] != 0); |
||||
EXPECT_EQ(dataA[i] < dataB[i], resG[i] != 0); |
||||
EXPECT_EQ(dataA[i] <= dataB[i], resH[i] != 0); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_dot_prod() |
||||
{ |
||||
typedef typename RegTrait<R>::w_reg Rx2; |
||||
Data<R> dataA, dataB(2); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<Rx2> res = v_dotprod(a, b); |
||||
|
||||
const int n = R::nlanes / 2; |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], res[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_logic() |
||||
{ |
||||
Data<R> dataA, dataB(2); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = a & b, resD = a | b, resE = a ^ b, resF = ~a; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i] & dataB[i], resC[i]); |
||||
EXPECT_EQ(dataA[i] | dataB[i], resD[i]); |
||||
EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]); |
||||
EXPECT_EQ((LaneType)~dataA[i], resF[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_sqrt_abs() |
||||
{ |
||||
Data<R> dataA, dataD; |
||||
dataD *= -1.0; |
||||
R a = dataA, d = dataD; |
||||
|
||||
Data<R> resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_FLOAT_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); |
||||
EXPECT_FLOAT_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); |
||||
EXPECT_FLOAT_EQ((float)abs(dataA[i]), (float)resE[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_min_max() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB.reverse(); |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = v_min(a, b), resD = v_max(a, b); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]); |
||||
EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_absdiff() |
||||
{ |
||||
typedef typename RegTrait<R>::u_reg Ru; |
||||
typedef typename Ru::lane_type u_type; |
||||
Data<R> dataA(std::numeric_limits<LaneType>::max()), |
||||
dataB(std::numeric_limits<LaneType>::min()); |
||||
dataA[0] = (LaneType)-1; |
||||
dataB[0] = 1; |
||||
dataA[1] = 2; |
||||
dataB[1] = (LaneType)-2; |
||||
R a = dataA, b = dataB; |
||||
Data<Ru> resC = v_absdiff(a, b); |
||||
const u_type mask = std::numeric_limits<LaneType>::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0; |
||||
for (int i = 0; i < Ru::nlanes; ++i) |
||||
{ |
||||
u_type uA = dataA[i] ^ mask; |
||||
u_type uB = dataB[i] ^ mask; |
||||
EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_float_absdiff() |
||||
{ |
||||
Data<R> dataA(std::numeric_limits<LaneType>::max()), |
||||
dataB(std::numeric_limits<LaneType>::min()); |
||||
dataA[0] = -1; |
||||
dataB[0] = 1; |
||||
dataA[1] = 2; |
||||
dataB[1] = -2; |
||||
R a = dataA, b = dataB; |
||||
Data<R> resC = v_absdiff(a, b); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_reduce() |
||||
{ |
||||
Data<R> dataA; |
||||
R a = dataA; |
||||
EXPECT_EQ((LaneType)1, v_reduce_min(a)); |
||||
EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a)); |
||||
EXPECT_EQ((LaneType)(1 + R::nlanes)*2, v_reduce_sum(a)); |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_mask() |
||||
{ |
||||
Data<R> dataA, dataB, dataC, dataD(1), dataE(2); |
||||
dataA[1] *= (LaneType)-1; |
||||
dataC *= (LaneType)-1; |
||||
R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE; |
||||
|
||||
int m = v_signmask(a); |
||||
EXPECT_EQ(2, m); |
||||
|
||||
EXPECT_EQ(false, v_check_all(a)); |
||||
EXPECT_EQ(false, v_check_all(b)); |
||||
EXPECT_EQ(true, v_check_all(c)); |
||||
|
||||
EXPECT_EQ(true, v_check_any(a)); |
||||
EXPECT_EQ(false, v_check_any(b)); |
||||
EXPECT_EQ(true, v_check_any(c)); |
||||
|
||||
typedef V_TypeTraits<LaneType> Traits; |
||||
typedef typename Traits::int_type int_type; |
||||
|
||||
R f = v_select(b, d, e); |
||||
Data<R> resF = f; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
int_type m2 = Traits::reinterpret_int(dataB[i]); |
||||
EXPECT_EQ((Traits::reinterpret_int(dataD[i]) & m2) |
||||
| (Traits::reinterpret_int(dataE[i]) & ~m2), |
||||
Traits::reinterpret_int(resF[i])); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
template <int s> |
||||
TheTest & test_pack() |
||||
{ |
||||
typedef typename RegTrait<R>::w_reg Rx2; |
||||
typedef typename Rx2::lane_type w_type; |
||||
Data<Rx2> dataA, dataB; |
||||
dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10; |
||||
dataB *= 10; |
||||
Rx2 a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = v_pack(a, b); |
||||
Data<R> resD = v_rshr_pack<s>(a, b); |
||||
|
||||
Data<R> resE(0); |
||||
v_pack_store(resE.d, b); |
||||
|
||||
Data<R> resF(0); |
||||
v_rshr_pack_store<s>(resF.d, b); |
||||
|
||||
const int n = Rx2::nlanes; |
||||
const w_type add = (w_type)1 << (s - 1); |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i]), resC[i]); |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resC[i + n]); |
||||
EXPECT_EQ(saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]); |
||||
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]); |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resE[i]); |
||||
EXPECT_EQ((LaneType)0, resE[i + n]); |
||||
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]); |
||||
EXPECT_EQ((LaneType)0, resF[i + n]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
template <int s> |
||||
TheTest & test_pack_u() |
||||
{ |
||||
typedef typename RegTrait<R>::w_reg Rx2; |
||||
typedef typename RegTrait<Rx2>::int_reg Ri2; |
||||
typedef typename Ri2::lane_type w_type; |
||||
|
||||
Data<Ri2> dataA, dataB; |
||||
dataA += -10; |
||||
dataB *= 10; |
||||
Ri2 a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = v_pack_u(a, b); |
||||
Data<R> resD = v_rshr_pack_u<s>(a, b); |
||||
|
||||
Data<R> resE(0); |
||||
v_pack_u_store(resE.d, b); |
||||
|
||||
Data<R> resF(0); |
||||
v_rshr_pack_u_store<s>(resF.d, b); |
||||
|
||||
const int n = Ri2::nlanes; |
||||
const w_type add = (w_type)1 << (s - 1); |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i]), resC[i]); |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resC[i + n]); |
||||
EXPECT_EQ(saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]); |
||||
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]); |
||||
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resE[i]); |
||||
EXPECT_EQ((LaneType)0, resE[i + n]); |
||||
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]); |
||||
EXPECT_EQ((LaneType)0, resF[i + n]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_unpack() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB *= 10; |
||||
R a = dataA, b = dataB; |
||||
|
||||
R c, d, e, f, lo, hi; |
||||
v_zip(a, b, c, d); |
||||
v_recombine(a, b, e, f); |
||||
lo = v_combine_low(a, b); |
||||
hi = v_combine_high(a, b); |
||||
|
||||
Data<R> resC = c, resD = d, resE = e, resF = f, resLo = lo, resHi = hi; |
||||
|
||||
const int n = R::nlanes/2; |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i], resC[i*2]); |
||||
EXPECT_EQ(dataB[i], resC[i*2+1]); |
||||
EXPECT_EQ(dataA[i+n], resD[i*2]); |
||||
EXPECT_EQ(dataB[i+n], resD[i*2+1]); |
||||
|
||||
EXPECT_EQ(dataA[i], resE[i]); |
||||
EXPECT_EQ(dataB[i], resE[i+n]); |
||||
EXPECT_EQ(dataA[i+n], resF[i]); |
||||
EXPECT_EQ(dataB[i+n], resF[i+n]); |
||||
|
||||
EXPECT_EQ(dataA[i], resLo[i]); |
||||
EXPECT_EQ(dataB[i], resLo[i+n]); |
||||
EXPECT_EQ(dataA[i+n], resHi[i]); |
||||
EXPECT_EQ(dataB[i+n], resHi[i+n]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
template<int s> |
||||
TheTest & test_extract() |
||||
{ |
||||
Data<R> dataA, dataB; |
||||
dataB *= 10; |
||||
R a = dataA, b = dataB; |
||||
|
||||
Data<R> resC = v_extract<s>(a, b); |
||||
|
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
if (i + s >= R::nlanes) |
||||
EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]); |
||||
else |
||||
EXPECT_EQ(dataA[i + s], resC[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_float_math() |
||||
{ |
||||
typedef typename RegTrait<R>::int_reg Ri; |
||||
Data<R> data1, data2, data3; |
||||
data1 *= 1.1; |
||||
data2 += 10; |
||||
R a1 = data1, a2 = data2, a3 = data3; |
||||
|
||||
Data<Ri> resB = v_round(a1), |
||||
resC = v_trunc(a1), |
||||
resD = v_floor(a1), |
||||
resE = v_ceil(a1); |
||||
|
||||
Data<R> resF = v_magnitude(a1, a2), |
||||
resG = v_sqr_magnitude(a1, a2), |
||||
resH = v_muladd(a1, a2, a3); |
||||
|
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(cvRound(data1[i]), resB[i]); |
||||
EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]); |
||||
EXPECT_EQ(cvFloor(data1[i]), resD[i]); |
||||
EXPECT_EQ(cvCeil(data1[i]), resE[i]); |
||||
|
||||
EXPECT_DOUBLE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]); |
||||
EXPECT_DOUBLE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]); |
||||
EXPECT_DOUBLE_EQ(data1[i]*data2[i] + data3[i], resH[i]); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_float_cvt32() |
||||
{ |
||||
typedef v_float32x4 Rt; |
||||
Data<R> dataA; |
||||
dataA *= 1.1; |
||||
R a = dataA; |
||||
Rt b = v_cvt_f32(a); |
||||
Data<Rt> resB = b; |
||||
int n = std::min<int>(Rt::nlanes, R::nlanes); |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_float_cvt64() |
||||
{ |
||||
#if CV_SIMD128_64F |
||||
typedef v_float64x2 Rt; |
||||
Data<R> dataA; |
||||
dataA *= 1.1; |
||||
R a = dataA; |
||||
Rt b = v_cvt_f64(a); |
||||
Data<Rt> resB = b; |
||||
int n = std::min<int>(Rt::nlanes, R::nlanes); |
||||
for (int i = 0; i < n; ++i) |
||||
{ |
||||
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); |
||||
} |
||||
#endif |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_matmul() |
||||
{ |
||||
Data<R> dataV, dataA, dataB, dataC, dataD; |
||||
dataB.reverse(); |
||||
dataC += 2; |
||||
dataD *= 0.3; |
||||
R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD; |
||||
|
||||
Data<R> res = v_matmul(v, a, b, c, d); |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
LaneType val = dataV[0] * dataA[i] |
||||
+ dataV[1] * dataB[i] |
||||
+ dataV[2] * dataC[i] |
||||
+ dataV[3] * dataD[i]; |
||||
EXPECT_DOUBLE_EQ(val, res[i]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
TheTest & test_transpose() |
||||
{ |
||||
Data<R> dataA, dataB, dataC, dataD; |
||||
dataB *= 5; |
||||
dataC *= 10; |
||||
dataD *= 15; |
||||
R a = dataA, b = dataB, c = dataC, d = dataD; |
||||
R e, f, g, h; |
||||
v_transpose4x4(a, b, c, d, |
||||
e, f, g, h); |
||||
|
||||
Data<R> res[4] = {e, f, g, h}; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
EXPECT_EQ(dataA[i], res[i][0]); |
||||
EXPECT_EQ(dataB[i], res[i][1]); |
||||
EXPECT_EQ(dataC[i], res[i][2]); |
||||
EXPECT_EQ(dataD[i], res[i][3]); |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
}; |
||||
|
||||
|
||||
//============= 8-bit integer =====================================================================
|
||||
|
||||
TEST(hal_intrin, uint8x16) { |
||||
TheTest<v_uint8x16>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_expand() |
||||
.test_expand_q() |
||||
.test_addsub() |
||||
.test_addsub_wrap() |
||||
.test_cmp() |
||||
.test_logic() |
||||
.test_min_max() |
||||
.test_absdiff() |
||||
.test_mask() |
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() |
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() |
||||
.test_unpack() |
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() |
||||
; |
||||
} |
||||
|
||||
TEST(hal_intrin, int8x16) { |
||||
TheTest<v_int8x16>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_expand() |
||||
.test_expand_q() |
||||
.test_addsub() |
||||
.test_addsub_wrap() |
||||
.test_cmp() |
||||
.test_logic() |
||||
.test_min_max() |
||||
.test_absdiff() |
||||
.test_mask() |
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() |
||||
.test_unpack() |
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() |
||||
; |
||||
} |
||||
|
||||
//============= 16-bit integer =====================================================================
|
||||
|
||||
TEST(hal_intrin, uint16x8) { |
||||
TheTest<v_uint16x8>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_expand() |
||||
.test_addsub() |
||||
.test_addsub_wrap() |
||||
.test_mul() |
||||
.test_mul_expand() |
||||
.test_cmp() |
||||
.test_shift<1>() |
||||
.test_shift<8>() |
||||
.test_logic() |
||||
.test_min_max() |
||||
.test_absdiff() |
||||
.test_mask() |
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() |
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() |
||||
.test_unpack() |
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() |
||||
; |
||||
} |
||||
|
||||
TEST(hal_intrin, int16x8) { |
||||
TheTest<v_int16x8>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_expand() |
||||
.test_addsub() |
||||
.test_addsub_wrap() |
||||
.test_mul() |
||||
.test_mul_expand() |
||||
.test_cmp() |
||||
.test_shift<1>() |
||||
.test_shift<8>() |
||||
.test_dot_prod() |
||||
.test_logic() |
||||
.test_min_max() |
||||
.test_absdiff() |
||||
.test_mask() |
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() |
||||
.test_unpack() |
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() |
||||
; |
||||
} |
||||
|
||||
//============= 32-bit integer =====================================================================
|
||||
|
||||
TEST(hal_intrin, uint32x4) { |
||||
TheTest<v_uint32x4>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_expand() |
||||
.test_addsub() |
||||
.test_mul() |
||||
.test_mul_expand() |
||||
.test_cmp() |
||||
.test_shift<1>() |
||||
.test_shift<8>() |
||||
.test_logic() |
||||
.test_min_max() |
||||
.test_absdiff() |
||||
.test_reduce() |
||||
.test_mask() |
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() |
||||
.test_unpack() |
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() |
||||
.test_transpose() |
||||
; |
||||
} |
||||
|
||||
TEST(hal_intrin, int32x4) { |
||||
TheTest<v_int32x4>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_expand() |
||||
.test_addsub() |
||||
.test_mul() |
||||
.test_cmp() |
||||
.test_shift<1>().test_shift<8>() |
||||
.test_logic() |
||||
.test_min_max() |
||||
.test_absdiff() |
||||
.test_reduce() |
||||
.test_mask() |
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() |
||||
.test_unpack() |
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() |
||||
.test_float_cvt32() |
||||
.test_float_cvt64() |
||||
.test_transpose() |
||||
; |
||||
} |
||||
|
||||
//============= 64-bit integer =====================================================================
|
||||
|
||||
TEST(hal_intrin, uint64x2) { |
||||
TheTest<v_uint64x2>() |
||||
.test_loadstore() |
||||
.test_addsub() |
||||
.test_shift<1>().test_shift<8>() |
||||
.test_logic() |
||||
.test_extract<0>().test_extract<1>() |
||||
; |
||||
} |
||||
|
||||
TEST(hal_intrin, int64x2) { |
||||
TheTest<v_int64x2>() |
||||
.test_loadstore() |
||||
.test_addsub() |
||||
.test_shift<1>().test_shift<8>() |
||||
.test_logic() |
||||
.test_extract<0>().test_extract<1>() |
||||
; |
||||
} |
||||
|
||||
//============= Floating point =====================================================================
|
||||
|
||||
TEST(hal_intrin, float32x4) { |
||||
TheTest<v_float32x4>() |
||||
.test_loadstore() |
||||
.test_interleave() |
||||
.test_addsub() |
||||
.test_mul() |
||||
.test_div() |
||||
.test_cmp() |
||||
.test_sqrt_abs() |
||||
.test_min_max() |
||||
.test_float_absdiff() |
||||
.test_reduce() |
||||
.test_mask() |
||||
.test_unpack() |
||||
.test_float_math() |
||||
.test_float_cvt64() |
||||
.test_matmul() |
||||
.test_transpose() |
||||
; |
||||
} |
||||
|
||||
#if CV_SIMD128_64F |
||||
TEST(hal_intrin, float64x2) { |
||||
TheTest<v_float64x2>() |
||||
.test_loadstore() |
||||
.test_addsub() |
||||
.test_mul() |
||||
.test_div() |
||||
.test_cmp() |
||||
.test_sqrt_abs() |
||||
.test_min_max() |
||||
.test_float_absdiff() |
||||
.test_mask() |
||||
.test_unpack() |
||||
.test_float_math() |
||||
.test_float_cvt32() |
||||
; |
||||
} |
||||
#endif |
@ -0,0 +1,234 @@ |
||||
#ifndef _TEST_UTILS_HPP_ |
||||
#define _TEST_UTILS_HPP_ |
||||
|
||||
#include "opencv2/hal/intrin.hpp" |
||||
#include "opencv2/ts.hpp" |
||||
#include <ostream> |
||||
#include <algorithm> |
||||
|
||||
template <typename R> struct Data; |
||||
template <int N> struct initializer; |
||||
|
||||
template <> struct initializer<16> |
||||
{ |
||||
template <typename R> static R init(const Data<R> & d) |
||||
{ |
||||
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); |
||||
} |
||||
}; |
||||
|
||||
template <> struct initializer<8> |
||||
{ |
||||
template <typename R> static R init(const Data<R> & d) |
||||
{ |
||||
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]); |
||||
} |
||||
}; |
||||
|
||||
template <> struct initializer<4> |
||||
{ |
||||
template <typename R> static R init(const Data<R> & d) |
||||
{ |
||||
return R(d[0], d[1], d[2], d[3]); |
||||
} |
||||
}; |
||||
|
||||
template <> struct initializer<2> |
||||
{ |
||||
template <typename R> static R init(const Data<R> & d) |
||||
{ |
||||
return R(d[0], d[1]); |
||||
} |
||||
}; |
||||
|
||||
//==================================================================================================
|
||||
|
||||
template <typename R> struct Data |
||||
{ |
||||
typedef typename R::lane_type LaneType; |
||||
Data() |
||||
{ |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
d[i] = (LaneType)(i + 1); |
||||
} |
||||
Data(LaneType val) |
||||
{ |
||||
fill(val); |
||||
} |
||||
Data(const R & r) |
||||
{ |
||||
*this = r; |
||||
} |
||||
operator R () |
||||
{ |
||||
return initializer<R::nlanes>().init(*this); |
||||
} |
||||
Data<R> & operator=(const R & r) |
||||
{ |
||||
v_store(d, r); |
||||
return *this; |
||||
} |
||||
template <typename T> Data<R> & operator*=(T m) |
||||
{ |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
d[i] *= (LaneType)m; |
||||
return *this; |
||||
} |
||||
template <typename T> Data<R> & operator+=(T m) |
||||
{ |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
d[i] += (LaneType)m; |
||||
return *this; |
||||
} |
||||
void fill(LaneType val) |
||||
{ |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
d[i] = val; |
||||
} |
||||
void reverse() |
||||
{ |
||||
for (int i = 0; i < R::nlanes / 2; ++i) |
||||
std::swap(d[i], d[R::nlanes - i - 1]); |
||||
} |
||||
const LaneType & operator[](int i) const |
||||
{ |
||||
CV_Assert(i >= 0 && i < R::nlanes); |
||||
return d[i]; |
||||
} |
||||
LaneType & operator[](int i) |
||||
{ |
||||
CV_Assert(i >= 0 && i < R::nlanes); |
||||
return d[i]; |
||||
} |
||||
const LaneType * mid() const |
||||
{ |
||||
return d + R::nlanes / 2; |
||||
} |
||||
LaneType * mid() |
||||
{ |
||||
return d + R::nlanes / 2; |
||||
} |
||||
bool operator==(const Data<R> & other) const |
||||
{ |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
if (d[i] != other.d[i]) |
||||
return false; |
||||
return true; |
||||
} |
||||
void clear() |
||||
{ |
||||
fill(0); |
||||
} |
||||
bool isZero() const |
||||
{ |
||||
return isValue(0); |
||||
} |
||||
bool isValue(uchar val) const |
||||
{ |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
if (d[i] != val) |
||||
return false; |
||||
return true; |
||||
} |
||||
|
||||
LaneType d[R::nlanes]; |
||||
}; |
||||
|
||||
template<typename R> struct AlignedData |
||||
{ |
||||
Data<R> CV_DECL_ALIGNED(16) a; // aligned
|
||||
char dummy; |
||||
Data<R> u; // unaligned
|
||||
}; |
||||
|
||||
template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R> & d) |
||||
{ |
||||
out << "{ "; |
||||
for (int i = 0; i < R::nlanes; ++i) |
||||
{ |
||||
// out << std::hex << +V_TypeTraits<typename R::lane_type>::reinterpret_int(d.d[i]);
|
||||
out << +d.d[i]; |
||||
if (i + 1 < R::nlanes) |
||||
out << ", "; |
||||
} |
||||
out << " }"; |
||||
return out; |
||||
} |
||||
|
||||
//==================================================================================================
|
||||
|
||||
template <typename R> struct RegTrait; |
||||
|
||||
template <> struct RegTrait<cv::v_uint8x16> { |
||||
typedef cv::v_uint16x8 w_reg; |
||||
typedef cv::v_uint32x4 q_reg; |
||||
typedef cv::v_uint8x16 u_reg; |
||||
static cv::v_uint8x16 zero() { return cv::v_setzero_u8(); } |
||||
static cv::v_uint8x16 all(uchar val) { return cv::v_setall_u8(val); } |
||||
}; |
||||
template <> struct RegTrait<cv::v_int8x16> { |
||||
typedef cv::v_int16x8 w_reg; |
||||
typedef cv::v_int32x4 q_reg; |
||||
typedef cv::v_uint8x16 u_reg; |
||||
static cv::v_int8x16 zero() { return cv::v_setzero_s8(); } |
||||
static cv::v_int8x16 all(schar val) { return cv::v_setall_s8(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_uint16x8> { |
||||
typedef cv::v_uint32x4 w_reg; |
||||
typedef cv::v_int16x8 int_reg; |
||||
typedef cv::v_uint16x8 u_reg; |
||||
static cv::v_uint16x8 zero() { return cv::v_setzero_u16(); } |
||||
static cv::v_uint16x8 all(ushort val) { return cv::v_setall_u16(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_int16x8> { |
||||
typedef cv::v_int32x4 w_reg; |
||||
typedef cv::v_uint16x8 u_reg; |
||||
static cv::v_int16x8 zero() { return cv::v_setzero_s16(); } |
||||
static cv::v_int16x8 all(short val) { return cv::v_setall_s16(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_uint32x4> { |
||||
typedef cv::v_uint64x2 w_reg; |
||||
typedef cv::v_int32x4 int_reg; |
||||
typedef cv::v_uint32x4 u_reg; |
||||
static cv::v_uint32x4 zero() { return cv::v_setzero_u32(); } |
||||
static cv::v_uint32x4 all(unsigned val) { return cv::v_setall_u32(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_int32x4> { |
||||
typedef cv::v_int64x2 w_reg; |
||||
typedef cv::v_uint32x4 u_reg; |
||||
static cv::v_int32x4 zero() { return cv::v_setzero_s32(); } |
||||
static cv::v_int32x4 all(int val) { return cv::v_setall_s32(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_uint64x2> { |
||||
static cv::v_uint64x2 zero() { return cv::v_setzero_u64(); } |
||||
static cv::v_uint64x2 all(uint64 val) { return cv::v_setall_u64(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_int64x2> { |
||||
static cv::v_int64x2 zero() { return cv::v_setzero_s64(); } |
||||
static cv::v_int64x2 all(int64 val) { return cv::v_setall_s64(val); } |
||||
}; |
||||
|
||||
template <> struct RegTrait<cv::v_float32x4> { |
||||
typedef cv::v_int32x4 int_reg; |
||||
typedef cv::v_float32x4 u_reg; |
||||
static cv::v_float32x4 zero() { return cv::v_setzero_f32(); } |
||||
static cv::v_float32x4 all(float val) { return cv::v_setall_f32(val); } |
||||
}; |
||||
|
||||
#if CV_SIMD128_64F |
||||
template <> struct RegTrait<cv::v_float64x2> { |
||||
typedef cv::v_int32x4 int_reg; |
||||
typedef cv::v_float64x2 u_reg; |
||||
static cv::v_float64x2 zero() { return cv::v_setzero_f64(); } |
||||
static cv::v_float64x2 all(double val) { return cv::v_setall_f64(val); } |
||||
}; |
||||
|
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,3 @@ |
||||
#include "opencv2/ts.hpp" |
||||
|
||||
CV_TEST_MAIN("cv") |
@ -0,0 +1,11 @@ |
||||
#ifndef __OPENCV_HAL_TEST_PRECOMP_HPP__ |
||||
#define __OPENCV_HAL_TEST_PRECOMP_HPP__ |
||||
|
||||
#include <iostream> |
||||
#include <limits> |
||||
#include "opencv2/ts.hpp" |
||||
#include "opencv2/hal.hpp" |
||||
#include "opencv2/hal/defs.h" |
||||
#include "opencv2/hal/intrin.hpp" |
||||
|
||||
#endif |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue