imgproc: fix bit-exact GaussianBlur() / sepFilter2D() (#15855)

* imgproc: fix bit-exact GaussianBlur() / sepFilter2D() - avoid kernels with bad approximation - GaussiabBlur - apply error-diffusion approximation for kernel (8-bit fraction) * java(test): update features2d ref data * test: update test_facedetect
5 years ago · f4d55d512f
parent 955b20230c
commit f4d55d512f
13 changed files with 341 additions and 109 deletions
--- a/modules/features2d/misc/java/test/BruteForceDescriptorMatcherTest.java
+++ b/modules/features2d/misc/java/test/BruteForceDescriptorMatcherTest.java
@ -85,11 +85,11 @@ public class BruteForceDescriptorMatcherTest extends OpenCVTestCase {
        matSize = 100;

        truth = new DMatch[] {
-                new DMatch(0, 0, 0, 0.6211397f),
+                new DMatch(0, 0, 0, 0.6159003f),
                new DMatch(1, 1, 0, 0.9177120f),
                new DMatch(2, 1, 0, 0.3112163f),
                new DMatch(3, 1, 0, 0.2925074f),
-                new DMatch(4, 1, 0, 0.9309178f)
+                new DMatch(4, 1, 0, 0.26520672f)
                };
    }

--- a/modules/features2d/misc/java/test/BruteForceL1DescriptorMatcherTest.java
+++ b/modules/features2d/misc/java/test/BruteForceL1DescriptorMatcherTest.java
@ -85,11 +85,11 @@ public class BruteForceL1DescriptorMatcherTest extends OpenCVTestCase {
        matSize = 100;

        truth = new DMatch[] {
-                new DMatch(0, 0, 0, 3.0975165f),
-                new DMatch(1, 1, 0, 3.5680308f),
-                new DMatch(2, 1, 0, 1.3722466f),
-                new DMatch(3, 1, 0, 1.3041023f),
-                new DMatch(4, 1, 0, 3.5970376f)
+                new DMatch(0, 0, 0, 3.0710702f),
+                new DMatch(1, 1, 0, 3.562016f),
+                new DMatch(2, 1, 0, 1.3682679f),
+                new DMatch(3, 1, 0, 1.3012862f),
+                new DMatch(4, 1, 0, 1.1852086f)
                };
    }

--- a/modules/features2d/misc/java/test/BruteForceSL2DescriptorMatcherTest.java
+++ b/modules/features2d/misc/java/test/BruteForceSL2DescriptorMatcherTest.java
@ -90,11 +90,11 @@ public class BruteForceSL2DescriptorMatcherTest extends OpenCVTestCase {
        matSize = 100;

        truth = new DMatch[] {
-                new DMatch(0, 0, 0, 0.3858146f),
+                new DMatch(0, 0, 0, 0.37933317f),
                new DMatch(1, 1, 0, 0.8421953f),
                new DMatch(2, 1, 0, 0.0968556f),
                new DMatch(3, 1, 0, 0.0855606f),
-                new DMatch(4, 1, 0, 0.8666080f)
+                new DMatch(4, 1, 0, 0.07033461f)
                };
    }

--- a/modules/features2d/misc/java/test/FlannBasedDescriptorMatcherTest.java
+++ b/modules/features2d/misc/java/test/FlannBasedDescriptorMatcherTest.java
@ -160,11 +160,11 @@ public class FlannBasedDescriptorMatcherTest extends OpenCVTestCase {
        matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
        matSize = 100;
        truth = new DMatch[] {
-                new DMatch(0, 0, 0, 0.6211397f),
+                new DMatch(0, 0, 0, 0.6159003f),
                new DMatch(1, 1, 0, 0.9177120f),
                new DMatch(2, 1, 0, 0.3112163f),
                new DMatch(3, 1, 0, 0.2925075f),
-                new DMatch(4, 1, 0, 0.9309179f)
+                new DMatch(4, 1, 0, 0.26520672f)
                };
    }

--- a/modules/features2d/misc/java/test/ORBDescriptorExtractorTest.java
+++ b/modules/features2d/misc/java/test/ORBDescriptorExtractorTest.java
@ -53,7 +53,7 @@ public class ORBDescriptorExtractorTest extends OpenCVTestCase {
        Mat truth = new Mat(1, 32, CvType.CV_8UC1) {
            {
                put(0, 0,
-                        6, 74, 6, 129, 2, 130, 56, 0, 36, 132, 66, 165, 172, 6, 3, 72, 102, 61, 163, 214, 0, 144, 65, 232, 4, 32, 138, 129, 4, 21, 37, 88);
+                        6, 74, 6, 129, 2, 130, 56, 0, 44, 132, 66, 165, 172, 6, 3, 72, 102, 61, 171, 214, 0, 144, 65, 232, 4, 32, 138, 131, 4, 21, 37, 217);
            }
        };
        assertDescriptorsClose(truth, descriptors, 1);
@ -92,7 +92,7 @@ public class ORBDescriptorExtractorTest extends OpenCVTestCase {
        Mat truth = new Mat(1, 32, CvType.CV_8UC1) {
            {
                put(0, 0,
-                        6, 10, 22, 5, 2, 130, 56, 0, 44, 164, 66, 165, 140, 6, 1, 72, 38, 61, 163, 210, 0, 208, 1, 104, 4, 32, 10, 131, 0, 37, 37, 67);
+                        6, 10, 22, 5, 2, 130, 56, 0, 44, 164, 66, 165, 140, 6, 1, 72, 38, 61, 163, 210, 0, 208, 1, 104, 4, 32, 74, 131, 0, 37, 37, 67);
            }
        };
        assertDescriptorsClose(truth, descriptors, 1);
--- a/modules/imgproc/src/filter.dispatch.cpp
+++ b/modules/imgproc/src/filter.dispatch.cpp
@ -41,6 +41,12 @@
 //M*/

 #include "precomp.hpp"
+
+#include <opencv2/core/utils/logger.defines.hpp>
+#undef CV_LOG_STRIP_LEVEL
+#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
+#include <opencv2/core/utils/logger.hpp>
+
 #include "opencv2/core/opencl/ocl_defs.hpp"
 #include "opencl_kernels_imgproc.hpp"
 #include "hal_replacement.hpp"
@ -273,6 +279,22 @@ Ptr<BaseColumnFilter> getLinearColumnFilter(
        CV_CPU_DISPATCH_MODES_ALL);
 }

+static bool createBitExactKernel_32S(const Mat& kernel, Mat& kernel_dst, int bits)
+{
+    kernel.convertTo(kernel_dst, CV_32S, (1 << bits));
+    Mat_<double> kernel_64f;
+    kernel.convertTo(kernel_64f, CV_64F, (1 << bits));
+    int ksize = (int)kernel.total();
+    const double eps = 10 * FLT_EPSILON * (1 << bits);
+    for (int i = 0; i < ksize; i++)
+    {
+        int bitExactValue = kernel_dst.at<int>(i);
+        double approxValue = kernel_64f.at<double>(i);
+        if (fabs(approxValue - bitExactValue) > eps)
+            return false;
+    }
+    return true;
+}

 Ptr<FilterEngine> createSeparableLinearFilter(
        int _srcType, int _dstType,
@ -299,6 +321,7 @@ Ptr<FilterEngine> createSeparableLinearFilter(
        _columnKernel.rows == 1 ? Point(_anchor.y, 0) : Point(0, _anchor.y));
    Mat rowKernel, columnKernel;

+    bool isBitExactMode = false;
    int bdepth = std::max(CV_32F,std::max(sdepth, ddepth));
    int bits = 0;

@ -311,14 +334,27 @@ Ptr<FilterEngine> createSeparableLinearFilter(
          (rtype & ctype & KERNEL_INTEGER) &&
          ddepth == CV_16S)) )
    {
-        bdepth = CV_32S;
-        bits = ddepth == CV_8U ? 8 : 0;
-        _rowKernel.convertTo( rowKernel, CV_32S, 1 << bits );
-        _columnKernel.convertTo( columnKernel, CV_32S, 1 << bits );
-        bits *= 2;
-        _delta *= (1 << bits);
+        int bits_ = ddepth == CV_8U ? 8 : 0;
+        bool isValidBitExactRowKernel = createBitExactKernel_32S(_rowKernel, rowKernel, bits_);
+        bool isValidBitExactColumnKernel = createBitExactKernel_32S(_columnKernel, columnKernel, bits_);
+        if (!isValidBitExactRowKernel)
+        {
+            CV_LOG_DEBUG(NULL, "createSeparableLinearFilter: bit-exact row-kernel can't be applied: ksize=" << _rowKernel.total());
+        }
+        else if (!isValidBitExactColumnKernel)
+        {
+            CV_LOG_DEBUG(NULL, "createSeparableLinearFilter: bit-exact column-kernel can't be applied: ksize=" << _columnKernel.total());
+        }
+        else
+        {
+            bdepth = CV_32S;
+            bits = bits_;
+            bits *= 2;
+            _delta *= (1 << bits);
+            isBitExactMode = true;
+        }
    }
-    else
+    if (!isBitExactMode)
    {
        if( _rowKernel.type() != bdepth )
            _rowKernel.convertTo( rowKernel, bdepth );
--- a/modules/imgproc/src/fixedpoint.inl.hpp
+++ b/modules/imgproc/src/fixedpoint.inl.hpp
@ -355,6 +355,9 @@ public:
    CV_ALWAYS_INLINE bool isZero() { return val == 0; }
    static CV_ALWAYS_INLINE ufixedpoint16 zero() { return ufixedpoint16(); }
    static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); }
+
+    static CV_ALWAYS_INLINE ufixedpoint16 fromRaw(uint16_t v) { return ufixedpoint16(v); }
+    CV_ALWAYS_INLINE ufixedpoint16 raw() { return val; }
 };

 }
--- a/modules/imgproc/src/smooth.dispatch.cpp
+++ b/modules/imgproc/src/smooth.dispatch.cpp
@ -43,6 +43,10 @@

 #include "precomp.hpp"

+#include <opencv2/core/utils/logger.hpp>
+
+#include <opencv2/core/utils/configuration.private.hpp>
+
 #include <vector>

 #include "opencv2/core/hal/intrin.hpp"
@ -67,109 +71,212 @@ namespace cv {
                                     Gaussian Blur
 \****************************************************************************************/

-Mat getGaussianKernel(int n, double sigma, int ktype)
+/**
+ * Bit-exact in terms of softfloat computations
+ *
+ * returns sum of kernel values. Should be equal to 1.0
+ */
+static
+softdouble getGaussianKernelBitExact(std::vector<softdouble>& result, int n, double sigma)
 {
    CV_Assert(n > 0);
-    const int SMALL_GAUSSIAN_SIZE = 7;
-    static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE] =
-    {
-        {1.f},
-        {0.25f, 0.5f, 0.25f},
-        {0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f},
-        {0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f}
-    };
-
-    const float* fixed_kernel = n % 2 == 1 && n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ?
-        small_gaussian_tab[n>>1] : 0;
+    //TODO: incorrect SURF implementation requests kernel with n = 20 (PATCH_SZ): https://github.com/opencv/opencv/issues/15856
+    //CV_Assert((n & 1) == 1);  // odd

-    CV_Assert( ktype == CV_32F || ktype == CV_64F );
-    Mat kernel(n, 1, ktype);
-    float* cf = kernel.ptr<float>();
-    double* cd = kernel.ptr<double>();
-
-    double sigmaX = sigma > 0 ? sigma : ((n-1)*0.5 - 1)*0.3 + 0.8;
-    double scale2X = -0.5/(sigmaX*sigmaX);
-    double sum = 0;
-
-    int i;
-    for( i = 0; i < n; i++ )
+    if (sigma <= 0)
    {
-        double x = i - (n-1)*0.5;
-        double t = fixed_kernel ? (double)fixed_kernel[i] : std::exp(scale2X*x*x);
-        if( ktype == CV_32F )
+        if (n == 1)
        {
-            cf[i] = (float)t;
-            sum += cf[i];
+            result = std::vector<softdouble>(1, softdouble::one());
+            return softdouble::one();
        }
-        else
+        else if (n == 3)
        {
-            cd[i] = t;
-            sum += cd[i];
+            softdouble v3[] = {
+                softdouble::fromRaw(0x3fd0000000000000),  // 0.25
+                softdouble::fromRaw(0x3fe0000000000000),  // 0.5
+                softdouble::fromRaw(0x3fd0000000000000)   // 0.25
+            };
+            result.assign(v3, v3 + 3);
+            return softdouble::one();
        }
-    }
-
-    CV_DbgAssert(fabs(sum) > 0);
-    sum = 1./sum;
-    for( i = 0; i < n; i++ )
-    {
-        if( ktype == CV_32F )
-            cf[i] = (float)(cf[i]*sum);
-        else
-            cd[i] *= sum;
-    }
-
-    return kernel;
-}
-
-template <typename T>
-static std::vector<T> getFixedpointGaussianKernel( int n, double sigma )
-{
-    if (sigma <= 0)
-    {
-        if(n == 1)
-            return std::vector<T>(1, softdouble(1.0));
-        else if(n == 3)
+        else if (n == 5)
        {
-            T v3[] = { softdouble(0.25), softdouble(0.5), softdouble(0.25) };
-            return std::vector<T>(v3, v3 + 3);
+            softdouble v5[] = {
+                softdouble::fromRaw(0x3fb0000000000000),  // 0.0625
+                softdouble::fromRaw(0x3fd0000000000000),  // 0.25
+                softdouble::fromRaw(0x3fd8000000000000),  // 0.375
+                softdouble::fromRaw(0x3fd0000000000000),  // 0.25
+                softdouble::fromRaw(0x3fb0000000000000)   // 0.0625
+            };
+            result.assign(v5, v5 + 5);
+            return softdouble::one();
        }
-        else if(n == 5)
+        else if (n == 7)
        {
-            T v5[] = { softdouble(0.0625), softdouble(0.25), softdouble(0.375), softdouble(0.25), softdouble(0.0625) };
-            return std::vector<T>(v5, v5 + 5);
+            softdouble v7[] = {
+                softdouble::fromRaw(0x3fa0000000000000),  // 0.03125
+                softdouble::fromRaw(0x3fbc000000000000),  // 0.109375
+                softdouble::fromRaw(0x3fcc000000000000),  // 0.21875
+                softdouble::fromRaw(0x3fd2000000000000),  // 0.28125
+                softdouble::fromRaw(0x3fcc000000000000),  // 0.21875
+                softdouble::fromRaw(0x3fbc000000000000),  // 0.109375
+                softdouble::fromRaw(0x3fa0000000000000)   // 0.03125
+            };
+            result.assign(v7, v7 + 7);
+            return softdouble::one();
        }
-        else if(n == 7)
+        else if (n == 9)
        {
-            T v7[] = { softdouble(0.03125), softdouble(0.109375), softdouble(0.21875), softdouble(0.28125), softdouble(0.21875), softdouble(0.109375), softdouble(0.03125) };
-            return std::vector<T>(v7, v7 + 7);
+            softdouble v9[] = {
+                softdouble::fromRaw(0x3f90000000000000),  // 4  / 256
+                softdouble::fromRaw(0x3faa000000000000),  // 13 / 256
+                softdouble::fromRaw(0x3fbe000000000000),  // 30 / 256
+                softdouble::fromRaw(0x3fc9800000000000),  // 51 / 256
+                softdouble::fromRaw(0x3fce000000000000),  // 60 / 256
+                softdouble::fromRaw(0x3fc9800000000000),  // 51 / 256
+                softdouble::fromRaw(0x3fbe000000000000),  // 30 / 256
+                softdouble::fromRaw(0x3faa000000000000),  // 13 / 256
+                softdouble::fromRaw(0x3f90000000000000)   // 4  / 256
+            };
+            result.assign(v9, v9 + 9);
+            return softdouble::one();
        }
    }

+    softdouble sd_0_15 = softdouble::fromRaw(0x3fc3333333333333);  // 0.15
+    softdouble sd_0_35 = softdouble::fromRaw(0x3fd6666666666666);  // 0.35
+    softdouble sd_minus_0_125 = softdouble::fromRaw(0xbfc0000000000000);  // -0.5*0.25

-    softdouble sigmaX = sigma > 0 ? softdouble(sigma) : mulAdd(softdouble(n),softdouble(0.15),softdouble(0.35));// softdouble(((n-1)*0.5 - 1)*0.3 + 0.8)
-    softdouble scale2X = softdouble(-0.5*0.25)/(sigmaX*sigmaX);
-    std::vector<softdouble> values(n);
-    softdouble sum(0.);
-    for(int i = 0, x = 1 - n; i < n; i++, x+=2 )
+    softdouble sigmaX = sigma > 0 ? softdouble(sigma) : mulAdd(softdouble(n), sd_0_15, sd_0_35);// softdouble(((n-1)*0.5 - 1)*0.3 + 0.8)
+    softdouble scale2X = sd_minus_0_125/(sigmaX*sigmaX);
+
+    int n2_ = (n - 1) / 2;
+    cv::AutoBuffer<softdouble> values(n2_ + 1);
+    softdouble sum = softdouble::zero();
+    for (int i = 0, x = 1 - n; i < n2_; i++, x+=2)
    {
        // x = i - (n - 1)*0.5
        // t = std::exp(scale2X*x*x)
-        values[i] = exp(softdouble(x*x)*scale2X);
-        sum += values[i];
+        softdouble t = exp(softdouble(x*x)*scale2X);
+        values[i] = t;
+        sum += t;
+    }
+    sum *= softdouble(2);
+    //values[n2_] = softdouble::one(); // x=0 in exp(softdouble(x*x)*scale2X);
+    sum += softdouble::one();
+    if ((n & 1) == 0)
+    {
+        //values[n2_ + 1] = softdouble::one();
+        sum += softdouble::one();
    }
-    sum = softdouble::one()/sum;

-    std::vector<T> kernel(n);
-    for(int i = 0; i < n; i++ )
+    // normalize: sum(k[i]) = 1
+    softdouble mul1 = softdouble::one()/sum;
+
+    result.resize(n);
+
+    softdouble sum2 = softdouble::zero();
+    for (int i = 0; i < n2_; i++ )
+    {
+        softdouble t = values[i] * mul1;
+        result[i] = t;
+        result[n - 1 - i] = t;
+        sum2 += t;
+    }
+    sum2 *= softdouble(2);
+    result[n2_] = /*values[n2_]*/ softdouble::one() * mul1;
+    sum2 += result[n2_];
+    if ((n & 1) == 0)
    {
-        kernel[i] = values[i] * sum;
+        result[n2_ + 1] = result[n2_];
+        sum2 += result[n2_];
+    }
+
+    return sum2;
+}
+
+Mat getGaussianKernel(int n, double sigma, int ktype)
+{
+    CV_CheckDepth(ktype, ktype == CV_32F || ktype == CV_64F, "");
+    Mat kernel(n, 1, ktype);
+
+    std::vector<softdouble> kernel_bitexact;
+    getGaussianKernelBitExact(kernel_bitexact, n, sigma);
+
+    if (ktype == CV_32F)
+    {
+        for (int i = 0; i < n; i++)
+            kernel.at<float>(i) = (float)kernel_bitexact[i];
+    }
+    else
+    {
+        CV_DbgAssert(ktype == CV_64F);
+        for (int i = 0; i < n; i++)
+            kernel.at<double>(i) = kernel_bitexact[i];
    }

    return kernel;
-};
+}
+
+static
+softdouble getGaussianKernelFixedPoint_ED(CV_OUT std::vector<int64_t>& result, const std::vector<softdouble> kernel_bitexact, int fractionBits)
+{
+    const int n = (int)kernel_bitexact.size();
+    CV_Assert((n & 1) == 1);  // odd
+
+    CV_CheckGT(fractionBits, 0, "");
+    CV_CheckLE(fractionBits, 32, "");
+
+    int64_t fractionMultiplier = CV_BIG_INT(1) << fractionBits;
+    softdouble fractionMultiplier_sd(fractionMultiplier);
+
+    result.resize(n);
+
+    int n2_ = n / 2;  // n is odd
+    softdouble err = softdouble::zero();
+    int64_t sum = 0;
+    for (int i = 0; i < n2_; i++)
+    {
+        //softdouble err0 = err;
+        softdouble adj_v = kernel_bitexact[i] * fractionMultiplier_sd + err;
+        int64_t v0 = cvRound(adj_v);  // cvFloor() provides bad results
+        err = adj_v - softdouble(v0);
+        //printf("%3d: adj_v=%8.3f(%8.3f+%8.3f)  v0=%d   ed_err=%8.3f\n", i, (double)adj_v, (double)(kernel_bitexact[i] * fractionMultiplier_sd), (double)err0, (int)v0, (double)err);
+
+        result[i] = v0;
+        result[n - 1 - i] = v0;
+        sum += v0;
+    }
+    sum *= 2;
+    softdouble adj_v_center = kernel_bitexact[n2_] * fractionMultiplier_sd + err;
+    int64_t v_center = fractionMultiplier - sum;
+    result[n2_] = v_center;
+    //printf("center = %g ===> %g  ===> %g\n", (double)(kernel_bitexact[n2_] * fractionMultiplier), (double)adj_v_center, (double)v_center);
+    return (adj_v_center - softdouble(v_center));
+}

 static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); }
-template <typename T> static void getGaussianKernel(int n, double sigma, int, std::vector<T>& res) { res = getFixedpointGaussianKernel<T>(n, sigma); }
+template <typename T> static void getGaussianKernel(int n, double sigma, int, std::vector<T>& res);
+//{ res = getFixedpointGaussianKernel<T>(n, sigma); }
+
+template<> void getGaussianKernel<ufixedpoint16>(int n, double sigma, int, std::vector<ufixedpoint16>& res)
+{
+    std::vector<softdouble> res_sd;
+    softdouble s0 = getGaussianKernelBitExact(res_sd, n, sigma);
+    CV_UNUSED(s0);
+
+    std::vector<int64_t> fixed_256;
+    softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, 8);
+    CV_UNUSED(approx_err);
+
+    res.resize(n);
+    for (int i = 0; i < n; i++)
+    {
+        res[i] = ufixedpoint16::fromRaw((uint16_t)fixed_256[i]);
+        //printf("%03d: %d\n", i, res[i].raw());
+    }
+}

 template <typename T>
 static void createGaussianKernels( T & kx, T & ky, int type, Size &ksize,
@ -477,6 +584,19 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
 }
 #endif

+template<typename T>
+static bool validateGaussianBlurKernel(std::vector<T>& kernel)
+{
+    softdouble validation_sum = softdouble::zero();
+    for (size_t i = 0; i < kernel.size(); i++)
+    {
+        validation_sum += softdouble((double)kernel[i]);
+    }
+
+    bool isValid = validation_sum == softdouble::one();
+    return isValid;
+}
+
 void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
                  double sigma1, double sigma2,
                  int borderType)
@ -539,11 +659,24 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
    {
        std::vector<ufixedpoint16> fkx, fky;
        createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
-        if (src.data == dst.data)
-            src = src.clone();
-        CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType),
-            CV_CPU_DISPATCH_MODES_ALL);
-        return;
+
+        static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool("OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS", false);
+        if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fkx))
+        {
+            CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2));
+        }
+        else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fky))
+        {
+            CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2));
+        }
+        else
+        {
+            if (src.data == dst.data)
+                src = src.clone();
+            CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType),
+                CV_CPU_DISPATCH_MODES_ALL);
+            return;
+        }
    }

    sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
--- a/modules/imgproc/test/test_filter.cpp
+++ b/modules/imgproc/test/test_filter.cpp
@ -59,6 +59,13 @@ protected:
    bool fp_kernel;
    bool inplace;
    int border;
+
+    void dump_test_case(int test_case_idx, std::ostream* out) CV_OVERRIDE
+    {
+        ArrayTest::dump_test_case(test_case_idx, out);
+        *out << "border=" << border << std::endl;
+    }
+
 };


@ -685,6 +692,12 @@ protected:
    void get_test_array_types_and_sizes( int test_case_idx, vector<vector<Size> >& sizes, vector<vector<int> >& types );
    double get_success_error_level( int test_case_idx, int i, int j );
    const char* smooth_type;
+
+    void dump_test_case(int test_case_idx, std::ostream* out) CV_OVERRIDE
+    {
+        CV_FilterBaseTest::dump_test_case(test_case_idx, out);
+        *out << "smooth_type=" << smooth_type << std::endl;
+    }
 };


@ -795,6 +808,12 @@ protected:
    double get_success_error_level( int /*test_case_idx*/, int /*i*/, int /*j*/ );
    double sigma;
    int param1, param2;
+
+    void dump_test_case(int test_case_idx, std::ostream* out) CV_OVERRIDE
+    {
+        CV_SmoothBaseTest::dump_test_case(test_case_idx, out);
+        *out << "kernel=(" << param1 << ", " << param2 << ") sigma=" << sigma << std::endl;
+    }
 };


@ -838,7 +857,7 @@ void CV_GaussianBlurTest::run_func()

 // !!! Copied from cvSmooth, if the code is changed in cvSmooth,
 // make sure to update this one too.
-#define SMALL_GAUSSIAN_SIZE 7
+#define SMALL_GAUSSIAN_SIZE 9
 static void
 calcGaussianKernel( int n, double sigma, vector<float>& kernel )
 {
@ -847,14 +866,15 @@ calcGaussianKernel( int n, double sigma, vector<float>& kernel )
        {1.f},
        {0.25f, 0.5f, 0.25f},
        {0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f},
-        {0.03125, 0.109375, 0.21875, 0.28125, 0.21875, 0.109375, 0.03125}
+        {0.03125, 0.109375, 0.21875, 0.28125, 0.21875, 0.109375, 0.03125},
+        {4.0 / 256, 13.0 / 256, 30.0 / 256, 51.0 / 256, 60.0 / 256, 51.0 / 256, 30.0 / 256, 13.0 / 256, 4.0 / 256}
    };

    kernel.resize(n);
    if( n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 )
    {
-        assert( n%2 == 1 );
-        memcpy( &kernel[0], small_gaussian_tab[n>>1], n*sizeof(kernel[0]));
+        CV_Assert(n%2 == 1);
+        memcpy(&kernel[0], small_gaussian_tab[n / 2], n*sizeof(kernel[0]));
    }
    else
    {
--- a/modules/imgproc/test/test_smooth_bitexact.cpp
+++ b/modules/imgproc/test/test_smooth_bitexact.cpp
@ -14,11 +14,14 @@ namespace opencv_test { namespace {
        { fixedOne >> 2, fixedOne >> 1, fixedOne >> 2 }, // size 3, sigma 0
        { fixedOne >> 4, fixedOne >> 2, 6 * (fixedOne >> 4), fixedOne >> 2, fixedOne >> 4 }, // size 5, sigma 0
        { fixedOne >> 5, 7 * (fixedOne >> 6), 7 * (fixedOne >> 5), 9 * (fixedOne >> 5), 7 * (fixedOne >> 5), 7 * (fixedOne >> 6), fixedOne >> 5 }, // size 7, sigma 0
-        { 4, 13, 30, 51, 61, 51, 30, 13, 4 }, // size 9, sigma 0
-        { 81, 95, 81 }, // size 3, sigma 1.75
-        { 65, 125, 65 }, // size 3, sigma 0.875
+        { 4, 13, 30, 51, 60, 51, 30, 13, 4 }, // size 9, sigma 0
+#if 1
+#define CV_TEST_INACCURATE_GAUSSIAN_BLUR
+        { 81, 94, 81 }, // size 3, sigma 1.75
+        { 65, 126, 65 }, // size 3, sigma 0.875
        { 0, 7, 242, 7, 0 }, // size 5, sigma 0.375
        { 4, 56, 136, 56, 4 } // size 5, sigma 0.75
+#endif
    };

    template <typename T, int fixedShift>
@ -68,11 +71,13 @@ TEST(GaussianBlur_Bitexact, Linear8U)
        { CV_8UC1, Size( 256, 128), Size(5, 5), 0, 0, vector<int64_t>(v[2], v[2]+5), vector<int64_t>(v[2], v[2]+5) },
        { CV_8UC1, Size( 256, 128), Size(7, 7), 0, 0, vector<int64_t>(v[3], v[3]+7), vector<int64_t>(v[3], v[3]+7) },
        { CV_8UC1, Size( 256, 128), Size(9, 9), 0, 0, vector<int64_t>(v[4], v[4]+9), vector<int64_t>(v[4], v[4]+9) },
+#ifdef CV_TEST_INACCURATE_GAUSSIAN_BLUR
        { CV_8UC1, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
        { CV_8UC2, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
        { CV_8UC3, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
        { CV_8UC4, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
        { CV_8UC1, Size( 256, 128), Size(5, 5), 0.375, 0.75, vector<int64_t>(v[7], v[7]+5), vector<int64_t>(v[8], v[8]+5) }
+#endif
    };

    int bordermodes[] = {
@ -162,8 +167,28 @@ TEST(GaussianBlur_Bitexact, regression_15015)
 {
    Mat src(100,100,CV_8UC3,Scalar(255,255,255));
    Mat dst;
-    GaussianBlur(src, dst, Size(5, 5), 9);
+    GaussianBlur(src, dst, Size(5, 5), 0);
    ASSERT_EQ(0.0, cvtest::norm(dst, src, NORM_INF));
 }

+
+static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N, double sigma)
+{
+    Mat dst8u; GaussianBlur(src8u, dst8u, Size(N, N), sigma);     // through bit-exact path
+    Mat dst8u_32f; dst8u.convertTo(dst8u_32f, CV_32F);
+
+    Mat dst32f; GaussianBlur(src32f, dst32f, Size(N, N), sigma);  // without bit-exact computations
+
+    double normINF_32f = cv::norm(dst8u_32f, dst32f, NORM_INF);
+    EXPECT_LE(normINF_32f, 1.0);
+}
+
+TEST(GaussianBlur_Bitexact, regression_9863)
+{
+    Mat src8u = imread(cvtest::findDataFile("shared/lena.png"));
+     Mat src32f; src8u.convertTo(src32f, CV_32F);
+
+    checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30);
+}
+
 }} // namespace
--- a/modules/objdetect/misc/python/test/test_facedetect.py
+++ b/modules/objdetect/misc/python/test/test_facedetect.py
@ -50,7 +50,7 @@ class facedetect_test(NewOpenCVTests):

            img = self.get_sample(  sample)
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
-            gray = cv.GaussianBlur(gray, (5, 5), 5.1)
+            gray = cv.GaussianBlur(gray, (5, 5), 0)

            rects = detect(gray, cascade)
            faces.append(rects)
--- a/modules/ts/include/opencv2/ts.hpp
+++ b/modules/ts/include/opencv2/ts.hpp
@ -428,6 +428,9 @@ protected:
    // updates progress bar
    virtual int update_progress( int progress, int test_case_idx, int count, double dt );

+    // dump test case input parameters
+    virtual void dump_test_case(int test_case_idx, std::ostream* out);
+
    // finds test parameter
    const CvFileNode* find_param( CvFileStorage* fs, const char* param_name );

--- a/modules/ts/src/ts.cpp
+++ b/modules/ts/src/ts.cpp
@ -350,7 +350,13 @@ void BaseTest::run( int start_from )
            return;

        if( validate_test_results( test_case_idx ) < 0 || ts->get_err_code() < 0 )
+        {
+            std::stringstream ss;
+            dump_test_case(test_case_idx, &ss);
+            std::string s = ss.str();
+            ts->printf( TS::LOG, "%s", s.c_str());
            return;
+        }
    }
 }

@ -401,6 +407,12 @@ int BaseTest::update_progress( int progress, int test_case_idx, int count, doubl
 }


+void BaseTest::dump_test_case(int test_case_idx, std::ostream* out)
+{
+    *out << "test_case_idx = " << test_case_idx << std::endl;
+}
+
+
 BadArgTest::BadArgTest()
 {
    test_case_idx   = -1;