fixed cvtColorGPU on linux

14 years ago · fea66d9384
parent 2a0909acfd
commit fea66d9384
1 changed files with 371 additions and 372 deletions
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@ -65,7 +65,7 @@ namespace imgproc
        static __device__ unsigned char half() { return (unsigned char)(max()/2 + 1); }
    };

-    template<> struct ColorChannel<ushort>
+    template<> struct ColorChannel<unsigned short>
    {
        typedef float worktype_f;
        typedef ushort3 vec3_t;
@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace impl
        RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream);
    }

-    void RGB2RGB_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int dstcn, int bidx, cudaStream_t stream)
+    void RGB2RGB_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int dstcn, int bidx, cudaStream_t stream)
    {
        RGB2RGB_caller(src, srccn, dst, dstcn, bidx, stream);
    }
@ -254,7 +254,7 @@ namespace cv { namespace gpu { namespace impl
 //            if( greenBits == 6 )
 //                for( int i = 0; i < n; i++, dst += dcn )
 //                {
-//                    unsigned t = ((const ushort*)src)[i];
+//                    unsigned t = ((const unsigned short*)src)[i];
 //                    dst[bidx] = (uchar)(t << 3);
 //                    dst[1] = (uchar)((t >> 3) & ~3);
 //                    dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
@ -264,7 +264,7 @@ namespace cv { namespace gpu { namespace impl
 //            else
 //                for( int i = 0; i < n; i++, dst += dcn )
 //                {
-//                    unsigned t = ((const ushort*)src)[i];
+//                    unsigned t = ((const unsigned short*)src)[i];
 //                    dst[bidx] = (uchar)(t << 3);
 //                    dst[1] = (uchar)((t >> 2) & ~7);
 //                    dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
@ -290,17 +290,17 @@ namespace cv { namespace gpu { namespace impl
 //            if( greenBits == 6 )
 //                for( int i = 0; i < n; i++, src += scn )
 //                {
-//                    ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
+//                    ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
 //                }
 //            else if( scn == 3 )
 //                for( int i = 0; i < n; i++, src += 3 )
 //                {
-//                    ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
+//                    ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
 //                }
 //            else
 //                for( int i = 0; i < n; i++, src += 4 )
 //                {
-//                    ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|
+//                    ((unsigned short*)dst)[i] = (unsigned short)((src[bidx] >> 3)|((src[1]&~7) << 2)|
 //                        ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
 //                }
 //        }
@ -364,13 +364,13 @@ namespace imgproc
    //            for( int i = 0; i < n; i++ )
    //            {
    //                int t = src[i];
-    //                ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
+    //                ((unsigned short*)dst)[i] = (unsigned short)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
    //            }
    //        else
    //            for( int i = 0; i < n; i++ )
    //            {
    //                int t = src[i] >> 3;
-    //                ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
+    //                ((unsigned short*)dst)[i] = (unsigned short)(t|(t << 5)|(t << 10));
    //            }
    //    }
    //    int greenBits;
@ -410,7 +410,7 @@ namespace cv { namespace gpu { namespace impl
        Gray2RGB_caller(src, dst, dstcn, stream);
    }

-    void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream)
+    void Gray2RGB_gpu(const DevMem2D_<unsigned short>& src, const DevMem2D_<unsigned short>& dst, int dstcn, cudaStream_t stream)
    {
        Gray2RGB_caller(src, dst, dstcn, stream);
    }
@ -449,7 +449,7 @@ namespace imgproc
    //        if( greenBits == 6 )
    //            for( int i = 0; i < n; i++ )
    //            {
-    //                int t = ((ushort*)src)[i];
+    //                int t = ((unsigned short*)src)[i];
    //                dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
    //                                           ((t >> 3) & 0xfc)*G2Y +
    //                                           ((t >> 8) & 0xf8)*R2Y, yuv_shift);
@ -457,7 +457,7 @@ namespace imgproc
    //        else
    //            for( int i = 0; i < n; i++ )
    //            {
-    //                int t = ((ushort*)src)[i];
+    //                int t = ((unsigned short*)src)[i];
    //                dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
    //                                           ((t >> 2) & 0xf8)*G2Y +
    //                                           ((t >> 7) & 0xf8)*R2Y, yuv_shift);
@ -501,7 +501,7 @@ namespace imgproc
        }
    }

-    __global__ void RGB2Gray_3(const ushort* src_, size_t src_step, ushort* dst_, size_t dst_step, int rows, int cols, int bidx)
+    __global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)
    {
        const int cr = 4899;
        const int cg = 9617;
@ -513,16 +513,16 @@ namespace imgproc

        if (y < rows && x < cols)
        {
-            const ushort* src = src_ + y * src_step + x * 3;
+            const unsigned short* src = src_ + y * src_step + x * 3;

-            ushort t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
+            unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];

            ushort2 dst;
-            dst.x = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);

            src += 3;
            t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
-            dst.y = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);

            *(ushort2*)(dst_ + y * dst_step + x) = dst;
        }
@ -581,7 +581,7 @@ namespace imgproc
        }
    }

-    __global__ void RGB2Gray_4(const ushort* src_, size_t src_step, ushort* dst_, size_t dst_step, int rows, int cols, int bidx)
+    __global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)
    {
        const int cr = 4899;
        const int cg = 9617;
@ -595,14 +595,14 @@ namespace imgproc
        {
            ushort4 src = *(ushort4*)(src_ + y * src_step + (x << 2));

-            ushort t0 = ((ushort*)(&src))[bidx], t1 = src.y, t2 = ((ushort*)(&src))[bidx ^ 2];
+            unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];

            ushort2 dst;
-            dst.x = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);

            src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4);
-            t0 = ((ushort*)(&src))[bidx], t1 = src.y, t2 = ((ushort*)(&src))[bidx ^ 2];
-            dst.y = (ushort)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];
+            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);

            *(ushort2*)(dst_ + y * dst_step + x) = dst;
        }
@ -654,7 +654,7 @@ namespace cv { namespace gpu { namespace impl
            cudaSafeCall( cudaThreadSynchronize() );
    }

-    void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream)
+    void RGB2Gray_gpu(const DevMem2D_<unsigned short>& src, int srccn, const DevMem2D_<unsigned short>& dst, int bidx, cudaStream_t stream)
    {
        dim3 threads(32, 8, 1);
        dim3 grid(1, 1, 1);
@ -665,10 +665,10 @@ namespace cv { namespace gpu { namespace impl
        switch (srccn)
        {
        case 3:
-            imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(ushort), dst.ptr, dst.step / sizeof(ushort), src.rows, src.cols, bidx);
+            imgproc::RGB2Gray_3<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);
            break;
        case 4:
-            imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(ushort), dst.ptr, dst.step / sizeof(ushort), src.rows, src.cols, bidx);
+            imgproc::RGB2Gray_4<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(unsigned short), dst.ptr, dst.step / sizeof(unsigned short), src.rows, src.cols, bidx);
            break;
        default:
            cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
@ -1472,13 +1472,13 @@ namespace cv { namespace gpu { namespace impl
 //static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
 //static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
 //
-//static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];    
+//static unsigned short sRGBGammaTab_b[256], linearGammaTab_b[256];
 //#undef lab_shift
 //#define lab_shift xyz_shift
 //#define gamma_shift 3
 //#define lab_shift2 (lab_shift + gamma_shift)
 //#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
-//static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
+//static unsigned short LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
 //
 //static void initLabTabs()
 //{
@ -1507,14 +1507,14 @@ namespace cv { namespace gpu { namespace impl
 //        for(i = 0; i < 256; i++)
 //        {
 //            float x = i*(1.f/255.f);
-//            sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4)));
-//            linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
+//            sRGBGammaTab_b[i] = saturate_cast<unsigned short>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4)));
+//            linearGammaTab_b[i] = (unsigned short)(i*(1 << gamma_shift));
 //        }
 //
 //        for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
 //        {
 //            float x = i*(1.f/(255.f*(1 << gamma_shift)));
-//            LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
+//            LabCbrtTab_b[i] = saturate_cast<unsigned short>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
 //        }
 //        initialized = true;
 //    }
@ -1554,7 +1554,7 @@ namespace cv { namespace gpu { namespace impl
 //    {
 //        const int Lscale = (116*255+50)/100;
 //        const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
-//        const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
+//        const unsigned short* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
 //        int i, scn = srccn;
 //        int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
 //            C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
@ -2103,8 +2103,8 @@ namespace cv { namespace gpu { namespace impl
 //    const int brows = 3, bcn = 7;
 //    int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;
 //    int i, bufstep = N7*bcn;
-//    cv::AutoBuffer<ushort> _buf(bufstep*brows);
-//    ushort* buf = (ushort*)_buf;
+//    cv::AutoBuffer<unsigned short> _buf(bufstep*brows);
+//    unsigned short* buf = (unsigned short*)_buf;
 //
 //    bayer += bstep*2;
 //
@ -2120,7 +2120,7 @@ namespace cv { namespace gpu { namespace impl
 //
 //        for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ )
 //        {
-//            ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;
+//            unsigned short* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;
 //            srow = bayer + (y+dy)*bstep + 1;
 //
 //            for( i = 0; i < bcn; i++ )
@ -2179,25 +2179,25 @@ namespace cv { namespace gpu { namespace impl
 //
 //            for( ; i < N-1; i++, srow++, brow++ )
 //            {
-//                brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +
+//                brow[0] = (unsigned short)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +
 //                                   std::abs(srow[-bstep] - srow[+bstep])*2 +
 //                                   std::abs(srow[1-bstep] - srow[1+bstep]));
-//                brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) +
+//                brow[N] = (unsigned short)(std::abs(srow[-1-bstep] - srow[1-bstep]) +
 //                                   std::abs(srow[-1] - srow[1])*2 +
 //                                   std::abs(srow[-1+bstep] - srow[1+bstep]));
-//                brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);
-//                brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);
-//                brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +
+//                brow[N2] = (unsigned short)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);
+//                brow[N3] = (unsigned short)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);
+//                brow[N4] = (unsigned short)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +
 //                                    std::abs(srow[+bstep] - srow[1]));
-//                brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +
+//                brow[N5] = (unsigned short)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +
 //                                    std::abs(srow[+bstep] - srow[-1]));
-//                brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);
+//                brow[N6] = (unsigned short)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);
 //            }
 //        }
 //
-//        const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2;
-//        const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2;
-//        const ushort* brow2 = buf + (y % brows)*bufstep + 2;
+//        const unsigned short* brow0 = buf + ((y - 2) % brows)*bufstep + 2;
+//        const unsigned short* brow1 = buf + ((y - 1) % brows)*bufstep + 2;
+//        const unsigned short* brow2 = buf + (y % brows)*bufstep + 2;
 //        static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f };
 //        srow = bayer + y*bstep + 2;
 //        bool greenCell = greenCell0;
@ -2380,8 +2380,7 @@ namespace cv { namespace gpu { namespace impl
 //            z = _mm_setzero_si128();
 //            __m128 _0_5 = _mm_set1_ps(0.5f);
 //
-//            #define _mm_merge_epi16(a, b) \
-//                _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask))
+//            #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask))
 //            #define _mm_cvtloepi16_ps(a)  _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16))
 //            #define _mm_cvthiepi16_ps(a)  _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16))
 //