diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 0baa8645f6..e5d9367390 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -1037,6 +1037,11 @@ namespace } }; + template static inline bool isAligned(const T* ptr, size_t size) + { + return reinterpret_cast(ptr) % size == 0; + } + ////////////////////////////////////////////////////////////////////////// // CudaFuncTable @@ -1165,6 +1170,13 @@ namespace CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); } + bool aligned = isAligned(src.data, 16) && isAligned(dst.data, 16); + if (!aligned) + { + cv::gpu::convertTo(src, dst); + return; + } + const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1]; CV_DbgAssert(func != 0); diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 06b9fa309c..01c339f2c2 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -850,7 +850,7 @@ TEST_P(Divide_Array, WithScale) cv::Mat dst_gold; cv::divide(mat1, mat2, dst_gold, scale, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 1.0); } } diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp index 99c463b84a..2e441ee97c 100644 --- a/modules/gpu/test/test_filters.cpp +++ b/modules/gpu/test/test_filters.cpp @@ -334,7 +334,7 @@ TEST_P(Laplacian, Accuracy) cv::Mat dst_gold; cv::Laplacian(src, dst_gold, -1, ksize.width); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3); } INSTANTIATE_TEST_CASE_P(GPU_Filter, Laplacian, testing::Combine(