@ -0,0 +1,57 @@ |
||||
Image Segmentation with Distance Transform and Watershed Algorithm {#tutorial_distance_transform} |
||||
============= |
||||
|
||||
Goal |
||||
---- |
||||
|
||||
In this tutorial you will learn how to: |
||||
|
||||
- Use the OpenCV function @ref cv::filter2D in order to perform some laplacian filtering for image sharpening |
||||
- Use the OpenCV function @ref cv::distanceTransform in order to obtain the derived representation of a binary image, where the value of each pixel is replaced by its distance to the nearest background pixel |
||||
- Use the OpenCV function @ref cv::watershed in order to isolate objects in the image from the background |
||||
|
||||
Theory |
||||
------ |
||||
|
||||
Code |
||||
---- |
||||
|
||||
This tutorial code's is shown lines below. You can also download it from |
||||
[here](https://github.com/Itseez/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp). |
||||
@includelineno samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp |
||||
|
||||
Explanation / Result |
||||
-------------------- |
||||
|
||||
-# Load the source image and check if it is loaded without any problem, then show it: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp load_image |
||||
 |
||||
|
||||
-# Then if we have an image with white background, it is good to tranform it black. This will help us to desciminate the foreground objects easier when we will apply the Distance Transform: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp black_bg |
||||
 |
||||
|
||||
-# Afterwards we will sharp our image in order to acute the edges of the foreground objects. We will apply a laplacian filter with a quite strong filter (an approximation of second derivative): |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp sharp |
||||
 |
||||
 |
||||
|
||||
-# Now we tranfrom our new sharped source image to a grayscale and a binary one, respectively: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp bin |
||||
 |
||||
|
||||
-# We are ready now to apply the Distance Tranform on the binary image. Moreover, we normalize the output image in order to be able visualize and threshold the result: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp dist |
||||
 |
||||
|
||||
-# We threshold the *dist* image and then perform some morphology operation (i.e. dilation) in order to extract the peaks from the above image: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp peaks |
||||
 |
||||
|
||||
-# From each blob then we create a seed/marker for the watershed algorithm with the help of the @ref cv::findContours function: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp seeds |
||||
 |
||||
|
||||
-# Finally, we can apply the watershed algorithm, and visualize the result: |
||||
@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp watershed |
||||
 |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 46 KiB |
@ -0,0 +1,81 @@ |
||||
/* See LICENSE file in the root OpenCV directory */ |
||||
|
||||
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__ |
||||
#define __OPENCV_CORE_OPENCL_SVM_HPP__ |
||||
|
||||
//
|
||||
// Internal usage only (binary compatibility is not guaranteed)
|
||||
//
|
||||
#ifndef __OPENCV_BUILD |
||||
#error Internal header file |
||||
#endif |
||||
|
||||
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM) |
||||
#include "runtime/opencl_core.hpp" |
||||
#include "runtime/opencl_svm_20.hpp" |
||||
#include "runtime/opencl_svm_hsa_extension.hpp" |
||||
|
||||
namespace cv { namespace ocl { namespace svm { |
||||
|
||||
struct SVMCapabilities |
||||
{ |
||||
enum Value |
||||
{ |
||||
SVM_COARSE_GRAIN_BUFFER = (1 << 0), |
||||
SVM_FINE_GRAIN_BUFFER = (1 << 1), |
||||
SVM_FINE_GRAIN_SYSTEM = (1 << 2), |
||||
SVM_ATOMICS = (1 << 3), |
||||
}; |
||||
int value_; |
||||
|
||||
SVMCapabilities(int capabilities = 0) : value_(capabilities) { } |
||||
operator int() const { return value_; } |
||||
|
||||
inline bool isNoSVMSupport() const { return value_ == 0; } |
||||
inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; } |
||||
inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; } |
||||
inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; } |
||||
inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; } |
||||
}; |
||||
|
||||
CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context); |
||||
|
||||
struct SVMFunctions |
||||
{ |
||||
clSVMAllocAMD_fn fn_clSVMAlloc; |
||||
clSVMFreeAMD_fn fn_clSVMFree; |
||||
clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer; |
||||
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
|
||||
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
|
||||
clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy; |
||||
clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill; |
||||
clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap; |
||||
clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap; |
||||
|
||||
inline SVMFunctions() |
||||
: fn_clSVMAlloc(NULL), fn_clSVMFree(NULL), |
||||
fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/ |
||||
/*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL), |
||||
fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL) |
||||
{ |
||||
// nothing
|
||||
} |
||||
|
||||
inline bool isValid() const |
||||
{ |
||||
return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer && |
||||
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy && |
||||
fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap; |
||||
} |
||||
}; |
||||
|
||||
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
|
||||
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context); |
||||
|
||||
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags); |
||||
|
||||
}}} //namespace cv::ocl::svm
|
||||
#endif |
||||
|
||||
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
|
||||
/* End of file. */ |
@ -0,0 +1,52 @@ |
||||
/* See LICENSE file in the root OpenCV directory */ |
||||
|
||||
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__ |
||||
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__ |
||||
|
||||
#if defined(HAVE_OPENCL_SVM) |
||||
#include "opencl_core.hpp" |
||||
|
||||
#include "opencl_svm_definitions.hpp" |
||||
|
||||
#ifndef HAVE_OPENCL_STATIC |
||||
|
||||
#undef clSVMAlloc |
||||
#define clSVMAlloc clSVMAlloc_pfn |
||||
#undef clSVMFree |
||||
#define clSVMFree clSVMFree_pfn |
||||
#undef clSetKernelArgSVMPointer |
||||
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn |
||||
#undef clSetKernelExecInfo |
||||
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
|
||||
#undef clEnqueueSVMFree |
||||
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
|
||||
#undef clEnqueueSVMMemcpy |
||||
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn |
||||
#undef clEnqueueSVMMemFill |
||||
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn |
||||
#undef clEnqueueSVMMap |
||||
#define clEnqueueSVMMap clEnqueueSVMMap_pfn |
||||
#undef clEnqueueSVMUnmap |
||||
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn |
||||
|
||||
extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment); |
||||
extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer); |
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value); |
||||
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
|
||||
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
|
||||
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
|
||||
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
|
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, |
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); |
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, |
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); |
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, |
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); |
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, |
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); |
||||
|
||||
#endif // HAVE_OPENCL_STATIC
|
||||
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
|
@ -0,0 +1,42 @@ |
||||
/* See LICENSE file in the root OpenCV directory */ |
||||
|
||||
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__ |
||||
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__ |
||||
|
||||
#if defined(HAVE_OPENCL_SVM) |
||||
#if defined(CL_VERSION_2_0) |
||||
|
||||
// OpenCL 2.0 contains SVM definitions
|
||||
|
||||
#else |
||||
|
||||
typedef cl_bitfield cl_device_svm_capabilities; |
||||
typedef cl_bitfield cl_svm_mem_flags; |
||||
typedef cl_uint cl_kernel_exec_info; |
||||
|
||||
//
|
||||
// TODO Add real values after OpenCL 2.0 release
|
||||
//
|
||||
|
||||
#ifndef CL_DEVICE_SVM_CAPABILITIES |
||||
#define CL_DEVICE_SVM_CAPABILITIES 0x1053 |
||||
|
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) |
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) |
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) |
||||
#define CL_DEVICE_SVM_ATOMICS (1 << 3) |
||||
#endif |
||||
|
||||
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER |
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) |
||||
#endif |
||||
|
||||
#ifndef CL_MEM_SVM_ATOMICS |
||||
#define CL_MEM_SVM_ATOMICS (1 << 11) |
||||
#endif |
||||
|
||||
|
||||
#endif // CL_VERSION_2_0
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
|
@ -0,0 +1,166 @@ |
||||
/* See LICENSE file in the root OpenCV directory */ |
||||
|
||||
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__ |
||||
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__ |
||||
|
||||
#if defined(HAVE_OPENCL_SVM) |
||||
#include "opencl_core.hpp" |
||||
|
||||
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD |
||||
//
|
||||
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
|
||||
// Below is the original copyright.
|
||||
//
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2013 The Khronos Group Inc. |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining a |
||||
* copy of this software and/or associated documentation files (the |
||||
* "Materials"), to deal in the Materials without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||
* permit persons to whom the Materials are furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be included |
||||
* in all copies or substantial portions of the Materials. |
||||
* |
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||
******************************************************************************/ |
||||
|
||||
/*******************************************
|
||||
* Shared Virtual Memory (SVM) extension |
||||
*******************************************/ |
||||
typedef cl_bitfield cl_device_svm_capabilities_amd; |
||||
typedef cl_bitfield cl_svm_mem_flags_amd; |
||||
typedef cl_uint cl_kernel_exec_info_amd; |
||||
|
||||
/* cl_device_info */ |
||||
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053 |
||||
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054 |
||||
|
||||
/* cl_device_svm_capabilities_amd */ |
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0) |
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1) |
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2) |
||||
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3) |
||||
|
||||
/* cl_svm_mem_flags_amd */ |
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10) |
||||
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11) |
||||
|
||||
/* cl_mem_info */ |
||||
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109 |
||||
|
||||
/* cl_kernel_exec_info_amd */ |
||||
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6 |
||||
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7 |
||||
|
||||
/* cl_command_type */ |
||||
#define CL_COMMAND_SVM_FREE_AMD 0x1209 |
||||
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A |
||||
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B |
||||
#define CL_COMMAND_SVM_MAP_AMD 0x120C |
||||
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D |
||||
|
||||
typedef CL_API_ENTRY void* |
||||
(CL_API_CALL * clSVMAllocAMD_fn)( |
||||
cl_context /* context */, |
||||
cl_svm_mem_flags_amd /* flags */, |
||||
size_t /* size */, |
||||
unsigned int /* alignment */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY void |
||||
(CL_API_CALL * clSVMFreeAMD_fn)( |
||||
cl_context /* context */, |
||||
void* /* svm_pointer */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clEnqueueSVMFreeAMD_fn)( |
||||
cl_command_queue /* command_queue */, |
||||
cl_uint /* num_svm_pointers */, |
||||
void** /* svm_pointers */, |
||||
void (CL_CALLBACK *)( /*pfn_free_func*/ |
||||
cl_command_queue /* queue */, |
||||
cl_uint /* num_svm_pointers */, |
||||
void** /* svm_pointers */, |
||||
void* /* user_data */), |
||||
void* /* user_data */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event* /* event_wait_list */, |
||||
cl_event* /* event */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)( |
||||
cl_command_queue /* command_queue */, |
||||
cl_bool /* blocking_copy */, |
||||
void* /* dst_ptr */, |
||||
const void* /* src_ptr */, |
||||
size_t /* size */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event* /* event_wait_list */, |
||||
cl_event* /* event */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)( |
||||
cl_command_queue /* command_queue */, |
||||
void* /* svm_ptr */, |
||||
const void* /* pattern */, |
||||
size_t /* pattern_size */, |
||||
size_t /* size */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event* /* event_wait_list */, |
||||
cl_event* /* event */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clEnqueueSVMMapAMD_fn)( |
||||
cl_command_queue /* command_queue */, |
||||
cl_bool /* blocking_map */, |
||||
cl_map_flags /* map_flags */, |
||||
void* /* svm_ptr */, |
||||
size_t /* size */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event* /* event_wait_list */, |
||||
cl_event* /* event */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)( |
||||
cl_command_queue /* command_queue */, |
||||
void* /* svm_ptr */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event* /* event_wait_list */, |
||||
cl_event* /* event */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)( |
||||
cl_kernel /* kernel */, |
||||
cl_uint /* arg_index */, |
||||
const void * /* arg_value */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int |
||||
(CL_API_CALL * clSetKernelExecInfoAMD_fn)( |
||||
cl_kernel /* kernel */, |
||||
cl_kernel_exec_info_amd /* param_name */, |
||||
size_t /* param_value_size */, |
||||
const void * /* param_value */ |
||||
) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
#endif |
||||
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
|
@ -0,0 +1,645 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_SSE_UTILS_HPP__ |
||||
#define __OPENCV_CORE_SSE_UTILS_HPP__ |
||||
|
||||
#ifndef __cplusplus |
||||
# error sse_utils.hpp header must be compiled as C++ |
||||
#endif |
||||
|
||||
#if CV_SSE2 |
||||
|
||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) |
||||
{ |
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0); |
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0); |
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1); |
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1); |
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2); |
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2); |
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3); |
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3); |
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2); |
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2); |
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3); |
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3); |
||||
|
||||
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2); |
||||
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2); |
||||
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3); |
||||
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3); |
||||
|
||||
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2); |
||||
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2); |
||||
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3); |
||||
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, |
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1) |
||||
{ |
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1); |
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1); |
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0); |
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0); |
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1); |
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1); |
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3); |
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3); |
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4); |
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4); |
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5); |
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5); |
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3); |
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3); |
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4); |
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4); |
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5); |
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5); |
||||
|
||||
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3); |
||||
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3); |
||||
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4); |
||||
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4); |
||||
__m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5); |
||||
__m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5); |
||||
|
||||
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3); |
||||
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3); |
||||
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4); |
||||
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4); |
||||
v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5); |
||||
v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, |
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) |
||||
{ |
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0); |
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0); |
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1); |
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1); |
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0); |
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0); |
||||
__m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1); |
||||
__m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1); |
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4); |
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4); |
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5); |
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5); |
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6); |
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6); |
||||
__m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7); |
||||
__m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7); |
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4); |
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4); |
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5); |
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5); |
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6); |
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6); |
||||
__m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7); |
||||
__m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7); |
||||
|
||||
__m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4); |
||||
__m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4); |
||||
__m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5); |
||||
__m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5); |
||||
__m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6); |
||||
__m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6); |
||||
__m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7); |
||||
__m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7); |
||||
|
||||
v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4); |
||||
v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4); |
||||
v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5); |
||||
v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5); |
||||
v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6); |
||||
v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6); |
||||
v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7); |
||||
v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7); |
||||
} |
||||
|
||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) |
||||
{ |
||||
__m128i v_mask = _mm_set1_epi16(0x00ff); |
||||
|
||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); |
||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); |
||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); |
||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); |
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); |
||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); |
||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); |
||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); |
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); |
||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); |
||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); |
||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); |
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); |
||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); |
||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); |
||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); |
||||
|
||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); |
||||
v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); |
||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); |
||||
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); |
||||
} |
||||
|
||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, |
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1) |
||||
{ |
||||
__m128i v_mask = _mm_set1_epi16(0x00ff); |
||||
|
||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); |
||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); |
||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); |
||||
__m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); |
||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); |
||||
__m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8)); |
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); |
||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); |
||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); |
||||
__m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); |
||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask)); |
||||
__m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8)); |
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); |
||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); |
||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); |
||||
__m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); |
||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); |
||||
__m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8)); |
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); |
||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); |
||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); |
||||
__m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); |
||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); |
||||
__m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8)); |
||||
|
||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); |
||||
v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); |
||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); |
||||
v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); |
||||
v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); |
||||
v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8)); |
||||
} |
||||
|
||||
inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, |
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) |
||||
{ |
||||
__m128i v_mask = _mm_set1_epi16(0x00ff); |
||||
|
||||
__m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); |
||||
__m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); |
||||
__m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); |
||||
__m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); |
||||
__m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); |
||||
__m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8)); |
||||
__m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask)); |
||||
__m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8)); |
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); |
||||
__m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); |
||||
__m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); |
||||
__m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); |
||||
__m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask)); |
||||
__m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8)); |
||||
__m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask)); |
||||
__m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8)); |
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); |
||||
__m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); |
||||
__m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); |
||||
__m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); |
||||
__m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); |
||||
__m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8)); |
||||
__m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask)); |
||||
__m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8)); |
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); |
||||
__m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); |
||||
__m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); |
||||
__m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); |
||||
__m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); |
||||
__m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8)); |
||||
__m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask)); |
||||
__m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8)); |
||||
|
||||
v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); |
||||
v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); |
||||
v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); |
||||
v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); |
||||
v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); |
||||
v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8)); |
||||
v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask)); |
||||
v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8)); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) |
||||
{ |
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0); |
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0); |
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1); |
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1); |
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2); |
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2); |
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3); |
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3); |
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2); |
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2); |
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3); |
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3); |
||||
|
||||
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2); |
||||
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2); |
||||
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3); |
||||
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, |
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1) |
||||
{ |
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1); |
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1); |
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0); |
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0); |
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1); |
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1); |
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3); |
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3); |
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4); |
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4); |
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5); |
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5); |
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3); |
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3); |
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4); |
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4); |
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5); |
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5); |
||||
|
||||
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3); |
||||
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3); |
||||
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4); |
||||
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4); |
||||
v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5); |
||||
v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, |
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) |
||||
{ |
||||
__m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0); |
||||
__m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0); |
||||
__m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1); |
||||
__m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1); |
||||
__m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0); |
||||
__m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0); |
||||
__m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1); |
||||
__m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1); |
||||
|
||||
__m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4); |
||||
__m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4); |
||||
__m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5); |
||||
__m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5); |
||||
__m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6); |
||||
__m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6); |
||||
__m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7); |
||||
__m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7); |
||||
|
||||
__m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4); |
||||
__m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4); |
||||
__m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5); |
||||
__m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5); |
||||
__m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6); |
||||
__m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6); |
||||
__m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7); |
||||
__m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7); |
||||
|
||||
v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4); |
||||
v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4); |
||||
v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5); |
||||
v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5); |
||||
v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6); |
||||
v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6); |
||||
v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7); |
||||
v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7); |
||||
} |
||||
|
||||
#if CV_SSE4_1 |
||||
|
||||
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) |
||||
{ |
||||
__m128i v_mask = _mm_set1_epi32(0x0000ffff); |
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); |
||||
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); |
||||
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); |
||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); |
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); |
||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); |
||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); |
||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); |
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); |
||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); |
||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); |
||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); |
||||
|
||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); |
||||
v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); |
||||
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); |
||||
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); |
||||
} |
||||
|
||||
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, |
||||
__m128i & v_g1, __m128i & v_b0, __m128i & v_b1) |
||||
{ |
||||
__m128i v_mask = _mm_set1_epi32(0x0000ffff); |
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); |
||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); |
||||
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); |
||||
__m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); |
||||
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); |
||||
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16)); |
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); |
||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); |
||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); |
||||
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); |
||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); |
||||
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16)); |
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); |
||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); |
||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); |
||||
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); |
||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); |
||||
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16)); |
||||
|
||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); |
||||
v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); |
||||
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); |
||||
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); |
||||
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); |
||||
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16)); |
||||
} |
||||
|
||||
inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, |
||||
__m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) |
||||
{ |
||||
__m128i v_mask = _mm_set1_epi32(0x0000ffff); |
||||
|
||||
__m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); |
||||
__m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); |
||||
__m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); |
||||
__m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); |
||||
__m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); |
||||
__m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16)); |
||||
__m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask)); |
||||
__m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16)); |
||||
|
||||
__m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); |
||||
__m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); |
||||
__m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); |
||||
__m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); |
||||
__m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); |
||||
__m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16)); |
||||
__m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask)); |
||||
__m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16)); |
||||
|
||||
__m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); |
||||
__m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); |
||||
__m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); |
||||
__m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); |
||||
__m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); |
||||
__m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16)); |
||||
__m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask)); |
||||
__m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16)); |
||||
|
||||
v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); |
||||
v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); |
||||
v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); |
||||
v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); |
||||
v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); |
||||
v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16)); |
||||
v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask)); |
||||
v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16)); |
||||
} |
||||
|
||||
#endif // CV_SSE4_1
|
||||
|
||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) |
||||
{ |
||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0); |
||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0); |
||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1); |
||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1); |
||||
|
||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2); |
||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2); |
||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3); |
||||
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3); |
||||
|
||||
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2); |
||||
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2); |
||||
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3); |
||||
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, |
||||
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1) |
||||
{ |
||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1); |
||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1); |
||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0); |
||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0); |
||||
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1); |
||||
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1); |
||||
|
||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3); |
||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3); |
||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4); |
||||
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4); |
||||
__m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5); |
||||
__m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5); |
||||
|
||||
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3); |
||||
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3); |
||||
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4); |
||||
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4); |
||||
v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5); |
||||
v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5); |
||||
} |
||||
|
||||
inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, |
||||
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) |
||||
{ |
||||
__m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0); |
||||
__m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0); |
||||
__m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1); |
||||
__m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1); |
||||
__m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0); |
||||
__m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0); |
||||
__m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1); |
||||
__m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1); |
||||
|
||||
__m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4); |
||||
__m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4); |
||||
__m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5); |
||||
__m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5); |
||||
__m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6); |
||||
__m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6); |
||||
__m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7); |
||||
__m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7); |
||||
|
||||
v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4); |
||||
v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4); |
||||
v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5); |
||||
v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5); |
||||
v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6); |
||||
v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6); |
||||
v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7); |
||||
v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7); |
||||
} |
||||
|
||||
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) |
||||
{ |
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); |
||||
|
||||
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); |
||||
__m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); |
||||
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); |
||||
__m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); |
||||
|
||||
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); |
||||
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); |
||||
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); |
||||
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); |
||||
|
||||
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); |
||||
v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); |
||||
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); |
||||
v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); |
||||
} |
||||
|
||||
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, |
||||
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1) |
||||
{ |
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); |
||||
|
||||
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); |
||||
__m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); |
||||
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); |
||||
__m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); |
||||
__m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo); |
||||
__m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi); |
||||
|
||||
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); |
||||
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); |
||||
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); |
||||
__m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); |
||||
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo); |
||||
__m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi); |
||||
|
||||
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); |
||||
v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); |
||||
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); |
||||
v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); |
||||
v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo); |
||||
v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi); |
||||
} |
||||
|
||||
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, |
||||
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) |
||||
{ |
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); |
||||
|
||||
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); |
||||
__m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); |
||||
__m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); |
||||
__m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); |
||||
__m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo); |
||||
__m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi); |
||||
__m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo); |
||||
__m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi); |
||||
|
||||
__m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); |
||||
__m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); |
||||
__m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); |
||||
__m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); |
||||
__m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo); |
||||
__m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi); |
||||
__m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo); |
||||
__m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi); |
||||
|
||||
v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); |
||||
v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); |
||||
v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); |
||||
v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); |
||||
v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo); |
||||
v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi); |
||||
v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo); |
||||
v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi); |
||||
} |
||||
|
||||
#endif // CV_SSE2
|
||||
|
||||
#endif //__OPENCV_CORE_SSE_UTILS_HPP__
|
@ -1,9 +0,0 @@ |
||||
if(IOS OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS)) |
||||
ocv_module_disable(cuda) |
||||
endif() |
||||
|
||||
set(the_description "CUDA-accelerated Computer Vision") |
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter) |
||||
|
||||
ocv_define_module(cuda opencv_calib3d opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy) |
@ -1,135 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_HPP__ |
||||
#define __OPENCV_CUDA_HPP__ |
||||
|
||||
#ifndef __cplusplus |
||||
# error cuda.hpp header must be compiled as C++ |
||||
#endif |
||||
|
||||
#include "opencv2/core/cuda.hpp" |
||||
|
||||
/**
|
||||
@addtogroup cuda |
||||
@{ |
||||
@defgroup cuda_calib3d Camera Calibration and 3D Reconstruction |
||||
@} |
||||
*/ |
||||
|
||||
namespace cv { namespace cuda { |
||||
|
||||
//////////////////////////// Labeling ////////////////////////////
|
||||
|
||||
//! @addtogroup cuda
|
||||
//! @{
|
||||
|
||||
//!performs labeling via graph cuts of a 2D regular 4-connected graph.
|
||||
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, |
||||
GpuMat& buf, Stream& stream = Stream::Null()); |
||||
|
||||
//!performs labeling via graph cuts of a 2D regular 8-connected graph.
|
||||
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, |
||||
GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, |
||||
GpuMat& labels, |
||||
GpuMat& buf, Stream& stream = Stream::Null()); |
||||
|
||||
//! compute mask for Generalized Flood fill componetns labeling.
|
||||
CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null()); |
||||
|
||||
//! performs connected componnents labeling.
|
||||
CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null()); |
||||
|
||||
//! @}
|
||||
|
||||
//////////////////////////// Calib3d ////////////////////////////
|
||||
|
||||
//! @addtogroup cuda_calib3d
|
||||
//! @{
|
||||
|
||||
CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, |
||||
GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, |
||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, |
||||
Stream& stream = Stream::Null()); |
||||
|
||||
/** @brief Finds the object pose from 3D-2D point correspondences.
|
||||
|
||||
@param object Single-row matrix of object points. |
||||
@param image Single-row matrix of image points. |
||||
@param camera_mat 3x3 matrix of intrinsic camera parameters. |
||||
@param dist_coef Distortion coefficients. See undistortPoints for details. |
||||
@param rvec Output 3D rotation vector. |
||||
@param tvec Output 3D translation vector. |
||||
@param use_extrinsic_guess Flag to indicate that the function must use rvec and tvec as an |
||||
initial transformation guess. It is not supported for now. |
||||
@param num_iters Maximum number of RANSAC iterations. |
||||
@param max_dist Euclidean distance threshold to detect whether point is inlier or not. |
||||
@param min_inlier_count Flag to indicate that the function must stop if greater or equal number |
||||
of inliers is achieved. It is not supported for now. |
||||
@param inliers Output vector of inlier indices. |
||||
*/ |
||||
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, |
||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, |
||||
int num_iters=100, float max_dist=8.0, int min_inlier_count=100, |
||||
std::vector<int>* inliers=NULL); |
||||
|
||||
//! @}
|
||||
|
||||
//////////////////////////// VStab ////////////////////////////
|
||||
|
||||
//! @addtogroup cuda
|
||||
//! @{
|
||||
|
||||
//! removes points (CV_32FC2, single row matrix) with zero mask value
|
||||
CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask); |
||||
|
||||
CV_EXPORTS void calcWobbleSuppressionMaps( |
||||
int left, int idx, int right, Size size, const Mat &ml, const Mat &mr, |
||||
GpuMat &mapx, GpuMat &mapy); |
||||
|
||||
//! @}
|
||||
|
||||
}} // namespace cv { namespace cuda {
|
||||
|
||||
#endif /* __OPENCV_CUDA_HPP__ */ |
@ -1,96 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::cuda; |
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER) |
||||
|
||||
void cv::cuda::compactPoints(GpuMat&, GpuMat&, const GpuMat&) { throw_no_cuda(); } |
||||
void cv::cuda::calcWobbleSuppressionMaps( |
||||
int, int, int, Size, const Mat&, const Mat&, GpuMat&, GpuMat&) { throw_no_cuda(); } |
||||
|
||||
#else |
||||
|
||||
namespace cv { namespace cuda { namespace device { namespace globmotion { |
||||
|
||||
int compactPoints(int N, float *points0, float *points1, const uchar *mask); |
||||
|
||||
void calcWobbleSuppressionMaps( |
||||
int left, int idx, int right, int width, int height, |
||||
const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy); |
||||
|
||||
}}}} |
||||
|
||||
void cv::cuda::compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask) |
||||
{ |
||||
CV_Assert(points0.rows == 1 && points1.rows == 1 && mask.rows == 1); |
||||
CV_Assert(points0.type() == CV_32FC2 && points1.type() == CV_32FC2 && mask.type() == CV_8U); |
||||
CV_Assert(points0.cols == mask.cols && points1.cols == mask.cols); |
||||
|
||||
int npoints = points0.cols; |
||||
int remaining = cv::cuda::device::globmotion::compactPoints( |
||||
npoints, (float*)points0.data, (float*)points1.data, mask.data); |
||||
|
||||
points0 = points0.colRange(0, remaining); |
||||
points1 = points1.colRange(0, remaining); |
||||
} |
||||
|
||||
|
||||
void cv::cuda::calcWobbleSuppressionMaps( |
||||
int left, int idx, int right, Size size, const Mat &ml, const Mat &mr, |
||||
GpuMat &mapx, GpuMat &mapy) |
||||
{ |
||||
CV_Assert(ml.size() == Size(3, 3) && ml.type() == CV_32F && ml.isContinuous()); |
||||
CV_Assert(mr.size() == Size(3, 3) && mr.type() == CV_32F && mr.isContinuous()); |
||||
|
||||
mapx.create(size, CV_32F); |
||||
mapy.create(size, CV_32F); |
||||
|
||||
cv::cuda::device::globmotion::calcWobbleSuppressionMaps( |
||||
left, idx, right, size.width, size.height, |
||||
ml.ptr<float>(), mr.ptr<float>(), mapx, mapy); |
||||
} |
||||
|
||||
#endif |
@ -1,60 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_PRECOMP_H__ |
||||
#define __OPENCV_PRECOMP_H__ |
||||
|
||||
#include "opencv2/cuda.hpp" |
||||
#include "opencv2/cudaarithm.hpp" |
||||
#include "opencv2/cudawarping.hpp" |
||||
#include "opencv2/calib3d.hpp" |
||||
|
||||
#include "opencv2/core/private.cuda.hpp" |
||||
#include "opencv2/core/utility.hpp" |
||||
|
||||
#include "opencv2/opencv_modules.hpp" |
||||
|
||||
#ifdef HAVE_OPENCV_CUDALEGACY |
||||
# include "opencv2/cudalegacy/private.hpp" |
||||
#endif |
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */ |
@ -1,90 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
|
||||
struct CompactPoints : testing::TestWithParam<cuda::DeviceInfo> |
||||
{ |
||||
virtual void SetUp() { cuda::setDevice(GetParam().deviceID()); } |
||||
}; |
||||
|
||||
CUDA_TEST_P(CompactPoints, CanCompactizeSmallInput) |
||||
{ |
||||
Mat src0(1, 3, CV_32FC2); |
||||
src0.at<Point2f>(0,0) = Point2f(0,0); |
||||
src0.at<Point2f>(0,1) = Point2f(0,1); |
||||
src0.at<Point2f>(0,2) = Point2f(0,2); |
||||
|
||||
Mat src1(1, 3, CV_32FC2); |
||||
src1.at<Point2f>(0,0) = Point2f(1,0); |
||||
src1.at<Point2f>(0,1) = Point2f(1,1); |
||||
src1.at<Point2f>(0,2) = Point2f(1,2); |
||||
|
||||
Mat mask(1, 3, CV_8U); |
||||
mask.at<uchar>(0,0) = 1; |
||||
mask.at<uchar>(0,1) = 0; |
||||
mask.at<uchar>(0,2) = 1; |
||||
|
||||
cuda::GpuMat dsrc0(src0), dsrc1(src1), dmask(mask); |
||||
cuda::compactPoints(dsrc0, dsrc1, dmask); |
||||
|
||||
dsrc0.download(src0); |
||||
dsrc1.download(src1); |
||||
|
||||
ASSERT_EQ(2, src0.cols); |
||||
ASSERT_EQ(2, src1.cols); |
||||
|
||||
ASSERT_TRUE(src0.at<Point2f>(0,0) == Point2f(0,0)); |
||||
ASSERT_TRUE(src0.at<Point2f>(0,1) == Point2f(0,2)); |
||||
|
||||
ASSERT_TRUE(src1.at<Point2f>(0,0) == Point2f(1,0)); |
||||
ASSERT_TRUE(src1.at<Point2f>(0,1) == Point2f(1,2)); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_GlobalMotion, CompactPoints, ALL_DEVICES); |
||||
|
||||
#endif // HAVE_CUDA
|
@ -1,45 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
CV_CUDA_TEST_MAIN("gpu") |
@ -1,66 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__ |
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations" |
||||
# if defined __clang__ || defined __APPLE__ |
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes" |
||||
# pragma GCC diagnostic ignored "-Wextra" |
||||
# endif |
||||
#endif |
||||
|
||||
#ifndef __OPENCV_TEST_PRECOMP_HPP__ |
||||
#define __OPENCV_TEST_PRECOMP_HPP__ |
||||
|
||||
#include <fstream> |
||||
|
||||
#include "opencv2/ts.hpp" |
||||
#include "opencv2/ts/cuda_test.hpp" |
||||
|
||||
#include "opencv2/cuda.hpp" |
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/core/opengl.hpp" |
||||
#include "opencv2/calib3d.hpp" |
||||
|
||||
#include "cvconfig.h" |
||||
|
||||
#endif |
@ -0,0 +1,249 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp" |
||||
|
||||
#ifdef HAVE_OPENCV_CUDAIMGPROC |
||||
# include "opencv2/cudaimgproc.hpp" |
||||
#endif |
||||
|
||||
using namespace std; |
||||
using namespace testing; |
||||
using namespace perf; |
||||
|
||||
#if defined(HAVE_XINE) || \ |
||||
defined(HAVE_GSTREAMER) || \
|
||||
defined(HAVE_QUICKTIME) || \
|
||||
defined(HAVE_QTKIT) || \
|
||||
defined(HAVE_AVFOUNDATION) || \
|
||||
defined(HAVE_FFMPEG) || \
|
||||
defined(WIN32) /* assume that we have ffmpeg */ |
||||
|
||||
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 |
||||
#else |
||||
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 |
||||
#endif |
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FGDStatModel
|
||||
|
||||
#if BUILD_WITH_VIDEO_INPUT_SUPPORT |
||||
|
||||
DEF_PARAM_TEST_1(Video, string); |
||||
|
||||
PERF_TEST_P(Video, FGDStatModel, |
||||
Values(string("gpu/video/768x576.avi"))) |
||||
{ |
||||
const int numIters = 10; |
||||
|
||||
declare.time(60); |
||||
|
||||
const string inputFile = perf::TestBase::getDataPath(GetParam()); |
||||
|
||||
cv::VideoCapture cap(inputFile); |
||||
ASSERT_TRUE(cap.isOpened()); |
||||
|
||||
cv::Mat frame; |
||||
cap >> frame; |
||||
ASSERT_FALSE(frame.empty()); |
||||
|
||||
if (PERF_RUN_CUDA()) |
||||
{ |
||||
cv::cuda::GpuMat d_frame(frame), foreground; |
||||
|
||||
cv::Ptr<cv::cuda::BackgroundSubtractorFGD> d_fgd = cv::cuda::createBackgroundSubtractorFGD(); |
||||
d_fgd->apply(d_frame, foreground); |
||||
|
||||
int i = 0; |
||||
|
||||
// collect performance data
|
||||
for (; i < numIters; ++i) |
||||
{ |
||||
cap >> frame; |
||||
ASSERT_FALSE(frame.empty()); |
||||
|
||||
d_frame.upload(frame); |
||||
|
||||
startTimer(); |
||||
if(!next()) |
||||
break; |
||||
|
||||
d_fgd->apply(d_frame, foreground); |
||||
|
||||
stopTimer(); |
||||
} |
||||
|
||||
// process last frame in sequence to get data for sanity test
|
||||
for (; i < numIters; ++i) |
||||
{ |
||||
cap >> frame; |
||||
ASSERT_FALSE(frame.empty()); |
||||
|
||||
d_frame.upload(frame); |
||||
|
||||
d_fgd->apply(d_frame, foreground); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
FAIL_NO_CPU(); |
||||
} |
||||
|
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
|
||||
#endif |
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// GMG
|
||||
|
||||
#if BUILD_WITH_VIDEO_INPUT_SUPPORT |
||||
|
||||
DEF_PARAM_TEST(Video_Cn_MaxFeatures, string, MatCn, int); |
||||
|
||||
PERF_TEST_P(Video_Cn_MaxFeatures, GMG, |
||||
Combine(Values(string("gpu/video/768x576.avi")), |
||||
CUDA_CHANNELS_1_3_4, |
||||
Values(20, 40, 60))) |
||||
{ |
||||
const int numIters = 150; |
||||
|
||||
const std::string inputFile = perf::TestBase::getDataPath(GET_PARAM(0)); |
||||
const int cn = GET_PARAM(1); |
||||
const int maxFeatures = GET_PARAM(2); |
||||
|
||||
cv::VideoCapture cap(inputFile); |
||||
ASSERT_TRUE(cap.isOpened()); |
||||
|
||||
cv::Mat frame; |
||||
cap >> frame; |
||||
ASSERT_FALSE(frame.empty()); |
||||
|
||||
if (cn != 3) |
||||
{ |
||||
cv::Mat temp; |
||||
if (cn == 1) |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); |
||||
else |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); |
||||
cv::swap(temp, frame); |
||||
} |
||||
|
||||
if (PERF_RUN_CUDA()) |
||||
{ |
||||
cv::cuda::GpuMat d_frame(frame); |
||||
cv::cuda::GpuMat foreground; |
||||
|
||||
cv::Ptr<cv::cuda::BackgroundSubtractorGMG> d_gmg = cv::cuda::createBackgroundSubtractorGMG(); |
||||
d_gmg->setMaxFeatures(maxFeatures); |
||||
|
||||
d_gmg->apply(d_frame, foreground); |
||||
|
||||
int i = 0; |
||||
|
||||
// collect performance data
|
||||
for (; i < numIters; ++i) |
||||
{ |
||||
cap >> frame; |
||||
if (frame.empty()) |
||||
{ |
||||
cap.release(); |
||||
cap.open(inputFile); |
||||
cap >> frame; |
||||
} |
||||
|
||||
if (cn != 3) |
||||
{ |
||||
cv::Mat temp; |
||||
if (cn == 1) |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); |
||||
else |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); |
||||
cv::swap(temp, frame); |
||||
} |
||||
|
||||
d_frame.upload(frame); |
||||
|
||||
startTimer(); |
||||
if(!next()) |
||||
break; |
||||
|
||||
d_gmg->apply(d_frame, foreground); |
||||
|
||||
stopTimer(); |
||||
} |
||||
|
||||
// process last frame in sequence to get data for sanity test
|
||||
for (; i < numIters; ++i) |
||||
{ |
||||
cap >> frame; |
||||
if (frame.empty()) |
||||
{ |
||||
cap.release(); |
||||
cap.open(inputFile); |
||||
cap >> frame; |
||||
} |
||||
|
||||
if (cn != 3) |
||||
{ |
||||
cv::Mat temp; |
||||
if (cn == 1) |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); |
||||
else |
||||
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); |
||||
cv::swap(temp, frame); |
||||
} |
||||
|
||||
d_frame.upload(frame); |
||||
|
||||
d_gmg->apply(d_frame, foreground); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
FAIL_NO_CPU(); |
||||
} |
||||
|
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
|
||||
#endif |