diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index bf128fbcd1..781d143349 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -1037,6 +1037,11 @@ namespace } }; + template static inline bool isAligned(const T* ptr, size_t size) + { + return reinterpret_cast(ptr) % size == 0; + } + ////////////////////////////////////////////////////////////////////////// // CudaFuncTable @@ -1165,6 +1170,13 @@ namespace CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + bool aligned = isAligned(src.data, 16) && isAligned(dst.data, 16); + if (!aligned) + { + cv::gpu::convertTo(src, dst); + return; + } + const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1]; CV_DbgAssert(func != 0); diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 6488b64b62..f447bf4141 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -1158,11 +1158,8 @@ namespace cv { namespace gpu { namespace device ////////////////////////////////////////////////////////////////////////////////////// // Compare -#define TYPE_VEC(type, cn) typename TypeVec::vec_type - - template