Merge branch '2.4'

12 years ago · 956aa68fb5
parent b44b920997 ca5e6f9522
commit 956aa68fb5
6 changed files with 224 additions and 144 deletions
--- a/modules/core/doc/utility_and_system_functions_and_macros.rst
+++ b/modules/core/doc/utility_and_system_functions_and_macros.rst
@ -255,7 +255,7 @@ The function allocates the buffer of the specified size and returns it. When the


 fastFree
------------
+--------
 Deallocates a memory buffer.

 .. ocv:function:: void fastFree(void* ptr)
@ -280,6 +280,14 @@ The function acts like ``sprintf``  but forms and returns an STL string. It can
 :ocv:class:`Exception` constructor.


+getBuildInformation
+-------------------
+Returns full configuration time cmake output.
+
+.. ocv:function:: const std::string& getBuildInformation()
+
+Returned value is raw cmake output including version control system revision, compiler version, compiler flags, enabled modules and third party libraries, etc. Output format depends on target architecture.
+

 checkHardwareSupport
 --------------------
@ -304,7 +312,7 @@ Returns true if the specified feature is supported by the host hardware.
 The function returns true if the host hardware supports the specified feature. When user calls ``setUseOptimized(false)``, the subsequent calls to ``checkHardwareSupport()`` will return false until ``setUseOptimized(true)`` is called. This way user can dynamically switch on and off the optimized code in OpenCV.

 getNumThreads
-----------------
+-------------
 Returns the number of threads used by OpenCV.

 .. ocv:function:: int getNumThreads()
@ -318,7 +326,7 @@ The function returns the number of threads that is used by OpenCV.


 getThreadNum
----------------
+------------
 Returns the index of the currently executed thread.

 .. ocv:function:: int getThreadNum()
@ -332,7 +340,7 @@ The function returns a 0-based index of the currently executed thread. The funct


 getTickCount
----------------
+------------
 Returns the number of ticks.

 .. ocv:function:: int64 getTickCount()
@ -346,7 +354,7 @@ It can be used to initialize


 getTickFrequency
--------------------
+----------------
 Returns the number of ticks per second.

 .. ocv:function:: double getTickFrequency()
@ -363,7 +371,7 @@ That is, the following code computes the execution time in seconds: ::


 getCPUTickCount
----------------
+---------------
 Returns the number of CPU ticks.

 .. ocv:function:: int64 getCPUTickCount()
@ -417,7 +425,7 @@ The function sets the number of threads used by OpenCV in parallel OpenMP region


 setUseOptimized
-----------------
+---------------
 Enables or disables the optimized code.

 .. ocv:function:: int cvUseOptimized( int on_off )
@ -433,7 +441,7 @@ The function can be used to dynamically turn on and off optimized code (code tha
 By default, the optimized code is enabled unless you disable it in CMake. The current status can be retrieved using ``useOptimized``.

 useOptimized
-----------------
+------------
 Returns the status of optimized code usage.

 .. ocv:function:: bool useOptimized()
--- a/modules/core/test/test_misc.cpp
+++ b/modules/core/test/test_misc.cpp
@ -39,4 +39,12 @@ TEST(Core_OutputArraySreate, _1997)
    Size submatSize = Size(256, 256);

    ASSERT_NO_THROW(local::create( mat(Rect(Point(), submatSize)), submatSize, mat.type() ));
-}
+}
+
+TEST(Core_SaturateCast, NegativeNotClipped)
+{
+    double d = -1.0;
+    unsigned int val = cv::saturate_cast<unsigned int>(d);
+
+    ASSERT_EQ(0xffffffff, val);
+}
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@ -55,6 +55,128 @@
 using namespace cv::gpu;
 using namespace cv::gpu::device;

+namespace detail
+{
+    __device__ __forceinline__ int cvAtomicAdd(int* address, int val)
+    {
+        return ::atomicAdd(address, val);
+    }
+    __device__ __forceinline__ unsigned int cvAtomicAdd(unsigned int* address, unsigned int val)
+    {
+        return ::atomicAdd(address, val);
+    }
+    __device__ __forceinline__ float cvAtomicAdd(float* address, float val)
+    {
+    #if __CUDA_ARCH__ >= 200
+        return ::atomicAdd(address, val);
+    #else
+        int* address_as_i = (int*) address;
+        int old = *address_as_i, assumed;
+        do {
+            assumed = old;
+            old = ::atomicCAS(address_as_i, assumed,
+                __float_as_int(val + __int_as_float(assumed)));
+        } while (assumed != old);
+        return __int_as_float(old);
+    #endif
+    }
+    __device__ __forceinline__ double cvAtomicAdd(double* address, double val)
+    {
+    #if __CUDA_ARCH__ >= 130
+        unsigned long long int* address_as_ull = (unsigned long long int*) address;
+        unsigned long long int old = *address_as_ull, assumed;
+        do {
+            assumed = old;
+            old = ::atomicCAS(address_as_ull, assumed,
+                __double_as_longlong(val + __longlong_as_double(assumed)));
+        } while (assumed != old);
+        return __longlong_as_double(old);
+    #else
+        (void) address;
+        (void) val;
+        return 0.0;
+    #endif
+    }
+
+    __device__ __forceinline__ int cvAtomicMin(int* address, int val)
+    {
+        return ::atomicMin(address, val);
+    }
+    __device__ __forceinline__ float cvAtomicMin(float* address, float val)
+    {
+    #if __CUDA_ARCH__ >= 120
+        int* address_as_i = (int*) address;
+        int old = *address_as_i, assumed;
+        do {
+            assumed = old;
+            old = ::atomicCAS(address_as_i, assumed,
+                __float_as_int(::fminf(val, __int_as_float(assumed))));
+        } while (assumed != old);
+        return __int_as_float(old);
+    #else
+        (void) address;
+        (void) val;
+        return 0.0f;
+    #endif
+    }
+    __device__ __forceinline__ double cvAtomicMin(double* address, double val)
+    {
+    #if __CUDA_ARCH__ >= 130
+        unsigned long long int* address_as_ull = (unsigned long long int*) address;
+        unsigned long long int old = *address_as_ull, assumed;
+        do {
+            assumed = old;
+            old = ::atomicCAS(address_as_ull, assumed,
+                __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
+        } while (assumed != old);
+        return __longlong_as_double(old);
+    #else
+        (void) address;
+        (void) val;
+        return 0.0;
+    #endif
+    }
+
+    __device__ __forceinline__ int cvAtomicMax(int* address, int val)
+    {
+        return ::atomicMax(address, val);
+    }
+    __device__ __forceinline__ float cvAtomicMax(float* address, float val)
+    {
+    #if __CUDA_ARCH__ >= 120
+        int* address_as_i = (int*) address;
+        int old = *address_as_i, assumed;
+        do {
+            assumed = old;
+            old = ::atomicCAS(address_as_i, assumed,
+                __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+        } while (assumed != old);
+        return __int_as_float(old);
+    #else
+        (void) address;
+        (void) val;
+        return 0.0f;
+    #endif
+    }
+    __device__ __forceinline__ double cvAtomicMax(double* address, double val)
+    {
+    #if __CUDA_ARCH__ >= 130
+        unsigned long long int* address_as_ull = (unsigned long long int*) address;
+        unsigned long long int old = *address_as_ull, assumed;
+        do {
+            assumed = old;
+            old = ::atomicCAS(address_as_ull, assumed,
+                __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
+        } while (assumed != old);
+        return __longlong_as_double(old);
+    #else
+        (void) address;
+        (void) val;
+        return 0.0;
+    #endif
+    }
+}
+
 namespace detail
 {
    template <int cn> struct Unroll;
@ -152,7 +274,7 @@ namespace sum
    {
        static __device__ void run(R* ptr, R val)
        {
-            ::atomicAdd(ptr, val);
+            detail::cvAtomicAdd(ptr, val);
        }
    };
    template <typename R> struct AtomicAdd<R, 2>
@ -161,8 +283,8 @@ namespace sum

        static __device__ void run(R* ptr, val_type val)
        {
-            ::atomicAdd(ptr, val.x);
-            ::atomicAdd(ptr + 1, val.y);
+            detail::cvAtomicAdd(ptr, val.x);
+            detail::cvAtomicAdd(ptr + 1, val.y);
        }
    };
    template <typename R> struct AtomicAdd<R, 3>
@ -171,9 +293,9 @@ namespace sum

        static __device__ void run(R* ptr, val_type val)
        {
-            ::atomicAdd(ptr, val.x);
-            ::atomicAdd(ptr + 1, val.y);
-            ::atomicAdd(ptr + 2, val.z);
+            detail::cvAtomicAdd(ptr, val.x);
+            detail::cvAtomicAdd(ptr + 1, val.y);
+            detail::cvAtomicAdd(ptr + 2, val.z);
        }
    };
    template <typename R> struct AtomicAdd<R, 4>
@ -182,10 +304,10 @@ namespace sum

        static __device__ void run(R* ptr, val_type val)
        {
-            ::atomicAdd(ptr, val.x);
-            ::atomicAdd(ptr + 1, val.y);
-            ::atomicAdd(ptr + 2, val.z);
-            ::atomicAdd(ptr + 3, val.w);
+            detail::cvAtomicAdd(ptr, val.x);
+            detail::cvAtomicAdd(ptr + 1, val.y);
+            detail::cvAtomicAdd(ptr + 2, val.z);
+            detail::cvAtomicAdd(ptr + 3, val.w);
        }
    };

@ -229,41 +351,6 @@ namespace sum
        #endif
        }
    };
-    template <int BLOCK_SIZE, int cn>
-    struct GlobalReduce<BLOCK_SIZE, double, cn>
-    {
-        typedef typename TypeVec<double, cn>::vec_type result_type;
-
-        static __device__ void run(result_type& sum, result_type* result, int tid, int bid, double* smem)
-        {
-            __shared__ bool is_last;
-
-            if (tid == 0)
-            {
-                result[bid] = sum;
-
-                __threadfence();
-
-                unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
-                is_last = (ticket == gridDim.x * gridDim.y - 1);
-            }
-
-            __syncthreads();
-
-            if (is_last)
-            {
-                sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
-
-                device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<double>()));
-
-                if (tid == 0)
-                {
-                    result[0] = sum;
-                    blocks_finished = 0;
-                }
-            }
-        }
-    };

    template <int BLOCK_SIZE, typename src_type, typename result_type, class Op>
    __global__ void kernel(const PtrStepSz<src_type> src, result_type* result, const Op op, const int twidth, const int theight)
@ -518,53 +605,12 @@ namespace minMax
    struct GlobalReduce
    {
        static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval)
-        {
-            __shared__ bool is_last;
-
-            if (tid == 0)
-            {
-                minval[bid] = mymin;
-                maxval[bid] = mymax;
-
-                __threadfence();
-
-                unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
-                is_last = (ticket == gridDim.x * gridDim.y - 1);
-            }
-
-            __syncthreads();
-
-            if (is_last)
-            {
-                int idx = ::min(tid, gridDim.x * gridDim.y - 1);
-
-                mymin = minval[idx];
-                mymax = maxval[idx];
-
-                const minimum<R> minOp;
-                const maximum<R> maxOp;
-                device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
-
-                if (tid == 0)
-                {
-                    minval[0] = mymin;
-                    maxval[0] = mymax;
-
-                    blocks_finished = 0;
-                }
-            }
-        }
-    };
-    template <int BLOCK_SIZE>
-    struct GlobalReduce<BLOCK_SIZE, int>
-    {
-        static __device__ void run(int& mymin, int& mymax, int* minval, int* maxval, int tid, int bid, int* sminval, int* smaxval)
        {
        #if __CUDA_ARCH__ >= 200
            if (tid == 0)
            {
-                ::atomicMin(minval, mymin);
-                ::atomicMax(maxval, mymax);
+                detail::cvAtomicMin(minval, mymin);
+                detail::cvAtomicMax(maxval, mymax);
            }
        #else
            __shared__ bool is_last;
@ -589,8 +635,8 @@ namespace minMax
                mymin = minval[idx];
                mymax = maxval[idx];

-                const minimum<int> minOp;
-                const maximum<int> maxOp;
+                const minimum<R> minOp;
+                const maximum<R> maxOp;
                device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));

                if (tid == 0)
@ -672,12 +718,19 @@ namespace minMax
        *minval_buf = numeric_limits<int>::max();
        *maxval_buf = numeric_limits<int>::min();
    }
-
-    template <typename R>
-    void setDefault(R*, R*)
+    __global__ void setDefaultKernel(float* minval_buf, float* maxval_buf)
+    {
+        *minval_buf = numeric_limits<float>::max();
+        *maxval_buf = -numeric_limits<float>::max();
+    }
+    __global__ void setDefaultKernel(double* minval_buf, double* maxval_buf)
    {
+        *minval_buf = numeric_limits<double>::max();
+        *maxval_buf = -numeric_limits<double>::max();
    }
-    void setDefault(int* minval_buf, int* maxval_buf)
+
+    template <typename R>
+    void setDefault(R* minval_buf, R* maxval_buf)
    {
        setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf);
    }
@ -728,21 +781,19 @@ namespace minMax

 namespace minMaxLoc
 {
-    __device__ unsigned int blocks_finished = 0;
-
    // To avoid shared bank conflicts we convert each value into value of
    // appropriate type (32 bits minimum)
    template <typename T> struct MinMaxTypeTraits;
-    template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<schar> { typedef int best_type; };
-    template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<signed char> { typedef int best_type; };
+    template <> struct MinMaxTypeTraits<unsigned short> { typedef int best_type; };
    template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
    template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
    template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
    template <> struct MinMaxTypeTraits<double> { typedef double best_type; };

    template <int BLOCK_SIZE, typename T, class Mask>
-    __global__ void kernel(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
+    __global__ void kernel_pass_1(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
    {
        typedef typename MinMaxTypeTraits<T>::best_type work_type;

@ -750,7 +801,6 @@ namespace minMaxLoc
        __shared__ work_type smaxval[BLOCK_SIZE];
        __shared__ unsigned int sminloc[BLOCK_SIZE];
        __shared__ unsigned int smaxloc[BLOCK_SIZE];
-        __shared__ bool is_last;

        const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
        const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
@ -799,38 +849,36 @@ namespace minMaxLoc
            maxval[bid] = (T) mymax;
            minloc[bid] = myminloc;
            maxloc[bid] = mymaxloc;
-
-            __threadfence();
-
-            unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y);
-            is_last = (ticket == gridDim.x * gridDim.y - 1);
        }
+    }
+    template <int BLOCK_SIZE, typename T>
+    __global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count)
+    {
+        typedef typename MinMaxTypeTraits<T>::best_type work_type;

-        __syncthreads();
-
-        if (is_last)
-        {
-            unsigned int idx = ::min(tid, gridDim.x * gridDim.y - 1);
+        __shared__ work_type sminval[BLOCK_SIZE];
+        __shared__ work_type smaxval[BLOCK_SIZE];
+        __shared__ unsigned int sminloc[BLOCK_SIZE];
+        __shared__ unsigned int smaxloc[BLOCK_SIZE];

-            mymin = minval[idx];
-            mymax = maxval[idx];
-            myminloc = minloc[idx];
-            mymaxloc = maxloc[idx];
+        unsigned int idx = ::min(threadIdx.x, count - 1);

-            reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
-                                     smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
-                                     tid,
-                                     thrust::make_tuple(less<work_type>(), greater<work_type>()));
+        work_type mymin = minval[idx];
+        work_type mymax = maxval[idx];
+        unsigned int myminloc = minloc[idx];
+        unsigned int mymaxloc = maxloc[idx];

-            if (tid == 0)
-            {
-                minval[0] = (T) mymin;
-                maxval[0] = (T) mymax;
-                minloc[0] = myminloc;
-                maxloc[0] = mymaxloc;
+        reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
+                                 smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
+                                 threadIdx.x,
+                                 thrust::make_tuple(less<work_type>(), greater<work_type>()));

-                blocks_finished = 0;
-            }
+        if (threadIdx.x == 0)
+        {
+            minval[0] = (T) mymin;
+            maxval[0] = (T) mymax;
+            minloc[0] = myminloc;
+            maxloc[0] = mymaxloc;
        }
    }

@ -877,10 +925,13 @@ namespace minMaxLoc
        unsigned int* maxloc_buf = locbuf.ptr(1);

        if (mask.data)
-            kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
+            kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
        else
-            kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
+            kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
+
+        cudaSafeCall( cudaGetLastError() );

+        kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
        cudaSafeCall( cudaGetLastError() );

        cudaSafeCall( cudaDeviceSynchronize() );
@ -898,9 +949,9 @@ namespace minMaxLoc
        maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
    }

-    template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
-    template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
+    template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
    template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
    template void run<int   >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
    template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@ -49,6 +49,10 @@
 #pragma warning( disable: 4244 4510 4512 4610 )
 #endif

+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -2054,7 +2058,7 @@ bool InputMediaStream_FFMPEG::read(unsigned char** data, int* size, int* endOfFi

        if (ret < 0)
        {
-            if (ret == AVERROR_EOF)
+            if (ret == (int)AVERROR_EOF)
                *endOfFile = true;
            return false;
        }
--- a/modules/java/generator/src/java/android+AsyncServiceHelper.java
+++ b/modules/java/generator/src/java/android+AsyncServiceHelper.java
@ -3,6 +3,7 @@ package org.opencv.android;
 import java.io.File;
 import java.util.StringTokenizer;

+import org.opencv.core.Core;
 import org.opencv.engine.OpenCVEngineInterface;

 import android.content.ComponentName;
@ -85,7 +86,6 @@ class AsyncServiceHelper
                    {
                        mServiceInstallationProgress = true;
                        Log.d(TAG, "Package installation started");
-
                    }
                    else
                    {
@ -299,6 +299,10 @@ class AsyncServiceHelper
                        if (initOpenCVLibs(path, libs))
                        {
                            Log.d(TAG, "First attempt to load libs is OK");
+                            String eol = System.getProperty("line.separator");
+                            for (String str : Core.getBuildInformation().split(eol))
+                                Log.i(TAG, str);
+
                            status = LoaderCallbackInterface.SUCCESS;
                        }
                        else
--- a/modules/java/generator/src/java/android+StaticHelper.java
+++ b/modules/java/generator/src/java/android+StaticHelper.java
@ -1,7 +1,8 @@
 package org.opencv.android;

-import java.util.StringTokenizer;
+import org.opencv.core.Core;

+import java.util.StringTokenizer;
 import android.util.Log;

 class StaticHelper {
@ -28,6 +29,10 @@ class StaticHelper {
        if (initOpenCVLibs(libs))
        {
            Log.d(TAG, "First attempt to load libs is OK");
+            String eol = System.getProperty("line.separator");
+            for (String str : Core.getBuildInformation().split(eol))
+                Log.i(TAG, str);
+
            result = true;
        }
        else