Merge pull request #8045 from onetimking:master

8 years ago · ff5eaadfdf
parent 205ccddff8 203b398caa
commit ff5eaadfdf
2 changed files with 15 additions and 10 deletions
--- a/modules/cudaoptflow/src/cuda/pyrlk.cu
+++ b/modules/cudaoptflow/src/cuda/pyrlk.cu
@ -51,6 +51,8 @@
 #include "opencv2/core/cuda/filters.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"

+#include <iostream>
+
 using namespace cv::cuda;
 using namespace cv::cuda::device;

@ -923,15 +925,15 @@ namespace pyrlk
                float x = xBase - c_halfWin_x + j + 0.5f;
                float y = yBase - c_halfWin_y + i + 0.5f;

-                I_patch[i * patchWidth + j] = tex2D(tex_Ib, x, y);
+                I_patch[i * patchWidth + j] = tex2D(tex_If, x, y);

                // Sharr Deriv

-                dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_Ib, x+1, y-1) + 10 * tex2D(tex_Ib, x+1, y) + 3 * tex2D(tex_Ib, x+1, y+1) -
-                                                (3 * tex2D(tex_Ib, x-1, y-1) + 10 * tex2D(tex_Ib, x-1, y) + 3 * tex2D(tex_Ib, x-1, y+1));
+                dIdx_patch[i * patchWidth + j] = 3 * tex2D(tex_If, x+1, y-1) + 10 * tex2D(tex_If, x+1, y) + 3 * tex2D(tex_If, x+1, y+1) -
+                                                (3 * tex2D(tex_If, x-1, y-1) + 10 * tex2D(tex_If, x-1, y) + 3 * tex2D(tex_If, x-1, y+1));

-                dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_Ib, x-1, y+1) + 10 * tex2D(tex_Ib, x, y+1) + 3 * tex2D(tex_Ib, x+1, y+1) -
-                                                (3 * tex2D(tex_Ib, x-1, y-1) + 10 * tex2D(tex_Ib, x, y-1) + 3 * tex2D(tex_Ib, x+1, y-1));
+                dIdy_patch[i * patchWidth + j] = 3 * tex2D(tex_If, x-1, y+1) + 10 * tex2D(tex_If, x, y+1) + 3 * tex2D(tex_If, x+1, y+1) -
+                                                (3 * tex2D(tex_If, x-1, y-1) + 10 * tex2D(tex_If, x, y-1) + 3 * tex2D(tex_If, x+1, y-1));
            }
        }

@ -943,6 +945,7 @@ namespace pyrlk
        if (x >= cols || y >= rows)
            return;

+
        int A11i = 0;
        int A12i = 0;
        int A22i = 0;
@ -970,7 +973,6 @@ namespace pyrlk
        {
            if (calcErr)
                err(y, x) = numeric_limits<float>::max();
-
            return;
        }

@ -1014,6 +1016,7 @@ namespace pyrlk
                }
            }

+
            float2 delta;
            delta.x = A12 * b2 - A22 * b1;
            delta.y = A12 * b1 - A11 * b2;
@ -1083,11 +1086,11 @@ namespace pyrlk
            funcs[patch.y - 1][patch.x - 1](I, J, I.rows, I.cols, prevPts, nextPts, status, err, ptcount,
                level, block, stream);
        }
-        static void dense(PtrStepSzb I, PtrStepSz<T> J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream)
+        static void dense(PtrStepSz<T> I, PtrStepSz<T> J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, PtrStepSzf err, int2 winSize, cudaStream_t stream)
        {
            dim3 block(16, 16);
            dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
-            Tex_I<1, uchar>::bindTexture_(I);
+            Tex_I<1, T>::bindTexture_(I);
            Tex_J<1, T>::bindTexture_(J);

            int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
--- a/modules/cudaoptflow/src/pyrlk.cpp
+++ b/modules/cudaoptflow/src/pyrlk.cpp
@ -61,7 +61,7 @@ namespace pyrlk
        static void sparse(PtrStepSz<typename device::TypeVec<T, cn>::vec_type> I, PtrStepSz<typename device::TypeVec<T, cn>::vec_type> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
            int level, dim3 block, dim3 patch, cudaStream_t stream);

-        static void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
+        static void dense(PtrStepSzf I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
            PtrStepSzf err, int2 winSize, cudaStream_t stream);
    };

@ -236,7 +236,9 @@ namespace
        prevPyr_.resize(maxLevel_ + 1);
        nextPyr_.resize(maxLevel_ + 1);

-        prevPyr_[0] = prevImg;
+        //prevPyr_[0] = prevImg;
+
+        prevImg.convertTo(prevPyr_[0], CV_32F, stream);
        nextImg.convertTo(nextPyr_[0], CV_32F, stream);

        for (int level = 1; level <= maxLevel_; ++level)