diff --git a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
index fa0852d7dc..a8952f1e61 100644
--- a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
+++ b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp
@@ -160,16 +160,12 @@ namespace cv { namespace gpu { namespace device
         template <int green_bits, int bidx> struct RGB2RGB5x5Converter;
         template<int bidx> struct RGB2RGB5x5Converter<6, bidx>
         {
-            static __device__ __forceinline__ ushort cvt(const uchar3& src)
+            template <typename T>
+            static __device__ __forceinline__ ushort cvt(const T& src)
             {
-                return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
-            }
-
-            static __device__ __forceinline__ ushort cvt(uint src)
-            {
-                uint b = 0xffu & (src >> (bidx * 8));
-                uint g = 0xffu & (src >> 8);
-                uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
+                uint b = bidx == 0 ? src.x : src.z;
+                uint g = src.y;
+                uint r = bidx == 0 ? src.z : src.x;
                 return (ushort)((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8));
             }
         };
@@ -178,22 +174,25 @@ namespace cv { namespace gpu { namespace device
         {
             static __device__ __forceinline__ ushort cvt(const uchar3& src)
             {
-                return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7));
+                uint b = bidx == 0 ? src.x : src.z;
+                uint g = src.y;
+                uint r = bidx == 0 ? src.z : src.x;
+                return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7));
             }
 
-            static __device__ __forceinline__ ushort cvt(uint src)
+            static __device__ __forceinline__ ushort cvt(const uchar4& src)
             {
-                uint b = 0xffu & (src >> (bidx * 8));
-                uint g = 0xffu & (src >> 8);
-                uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
-                uint a = 0xffu & (src >> 24);
+                uint b = bidx == 0 ? src.x : src.z;
+                uint g = src.y;
+                uint r = bidx == 0 ? src.z : src.x;
+                uint a = src.w;
                 return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000));
             }
         };
 
         template<int scn, int bidx, int green_bits> struct RGB2RGB5x5;
 
-        template<int bidx, int green_bits> struct RGB2RGB5x5<3, bidx,green_bits> : unary_function<uchar3, ushort>
+        template<int bidx, int green_bits> struct RGB2RGB5x5<3, bidx, green_bits> : unary_function<uchar3, ushort>
         {
             __device__ __forceinline__ ushort operator()(const uchar3& src) const
             {
@@ -204,9 +203,9 @@ namespace cv { namespace gpu { namespace device
             __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
         };
 
-        template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
+        template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx, green_bits> : unary_function<uchar4, ushort>
         {
-            __device__ __forceinline__ ushort operator()(uint src) const
+            __device__ __forceinline__ ushort operator()(const uchar4& src) const
             {
                 return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
             }
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index 6c7a648221..16e0844106 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
 
         TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
 
-        GPU_SANITY_CHECK(u, 1e-1);
-        GPU_SANITY_CHECK(v, 1e-1);
+        GPU_SANITY_CHECK(u, 0.12);
+        GPU_SANITY_CHECK(v, 0.12);
     }
     else
     {
diff --git a/modules/gpu/test/test_bgfg.cpp b/modules/gpu/test/test_bgfg.cpp
index e08bfb399b..e279bc1417 100644
--- a/modules/gpu/test/test_bgfg.cpp
+++ b/modules/gpu/test/test_bgfg.cpp
@@ -98,10 +98,13 @@ GPU_TEST_P(FGDStatModel, Update)
     cap >> frame;
     ASSERT_FALSE(frame.empty());
 
-    IplImage ipl_frame = frame;
+    cv::Mat frameSmall;
+    cv::resize(frame, frameSmall, cv::Size(), 0.5, 0.5);
+
+    IplImage ipl_frame = frameSmall;
     cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
 
-    cv::gpu::GpuMat d_frame(frame);
+    cv::gpu::GpuMat d_frame(frameSmall);
     cv::gpu::FGDStatModel d_model(out_cn);
     d_model.create(d_frame);
 
@@ -109,18 +112,17 @@ GPU_TEST_P(FGDStatModel, Update)
     cv::Mat h_foreground;
     cv::Mat h_background3;
 
-    cv::Mat backgroundDiff;
-    cv::Mat foregroundDiff;
-
     for (int i = 0; i < 5; ++i)
     {
         cap >> frame;
         ASSERT_FALSE(frame.empty());
 
-        ipl_frame = frame;
+        cv::resize(frame, frameSmall, cv::Size(), 0.5, 0.5);
+
+        ipl_frame = frameSmall;
         int gold_count = cvUpdateBGStatModel(&ipl_frame, model);
 
-        d_frame.upload(frame);
+        d_frame.upload(frameSmall);
 
         int count = d_model.update(d_frame);
 
diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp
index 460e50c2b4..25f0f0e2bb 100644
--- a/modules/gpu/test/test_resize.cpp
+++ b/modules/gpu/test/test_resize.cpp
@@ -217,7 +217,8 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
     cv::Mat dst_gold;
     cv::resize(src, dst_gold, cv::Size(), coeff, coeff, interpolation);
 
-    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
+    // CPU test for cv::resize uses 16 as error threshold for CV_8U, we uses 4 as error threshold for CV_8U
+    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-2 : src.depth() == CV_8U ? 4.0 : 1.0);
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine(