Merge branch '2.4'

pull/481/merge
Andrey Kamaev 12 years ago
commit 956aa68fb5
  1. 24
      modules/core/doc/utility_and_system_functions_and_macros.rst
  2. 10
      modules/core/test/test_misc.cpp
  3. 315
      modules/gpu/src/cuda/matrix_reductions.cu
  4. 6
      modules/highgui/src/cap_ffmpeg_impl.hpp
  5. 6
      modules/java/generator/src/java/android+AsyncServiceHelper.java
  6. 7
      modules/java/generator/src/java/android+StaticHelper.java

@ -255,7 +255,7 @@ The function allocates the buffer of the specified size and returns it. When the
fastFree
------------
--------
Deallocates a memory buffer.
.. ocv:function:: void fastFree(void* ptr)
@ -280,6 +280,14 @@ The function acts like ``sprintf`` but forms and returns an STL string. It can
:ocv:class:`Exception` constructor.
getBuildInformation
-------------------
Returns full configuration time cmake output.
.. ocv:function:: const std::string& getBuildInformation()
Returned value is raw cmake output including version control system revision, compiler version, compiler flags, enabled modules and third party libraries, etc. Output format depends on target architecture.
checkHardwareSupport
--------------------
@ -304,7 +312,7 @@ Returns true if the specified feature is supported by the host hardware.
The function returns true if the host hardware supports the specified feature. When user calls ``setUseOptimized(false)``, the subsequent calls to ``checkHardwareSupport()`` will return false until ``setUseOptimized(true)`` is called. This way user can dynamically switch on and off the optimized code in OpenCV.
getNumThreads
-----------------
-------------
Returns the number of threads used by OpenCV.
.. ocv:function:: int getNumThreads()
@ -318,7 +326,7 @@ The function returns the number of threads that is used by OpenCV.
getThreadNum
----------------
------------
Returns the index of the currently executed thread.
.. ocv:function:: int getThreadNum()
@ -332,7 +340,7 @@ The function returns a 0-based index of the currently executed thread. The funct
getTickCount
----------------
------------
Returns the number of ticks.
.. ocv:function:: int64 getTickCount()
@ -346,7 +354,7 @@ It can be used to initialize
getTickFrequency
--------------------
----------------
Returns the number of ticks per second.
.. ocv:function:: double getTickFrequency()
@ -363,7 +371,7 @@ That is, the following code computes the execution time in seconds: ::
getCPUTickCount
----------------
---------------
Returns the number of CPU ticks.
.. ocv:function:: int64 getCPUTickCount()
@ -417,7 +425,7 @@ The function sets the number of threads used by OpenCV in parallel OpenMP region
setUseOptimized
-----------------
---------------
Enables or disables the optimized code.
.. ocv:function:: int cvUseOptimized( int on_off )
@ -433,7 +441,7 @@ The function can be used to dynamically turn on and off optimized code (code tha
By default, the optimized code is enabled unless you disable it in CMake. The current status can be retrieved using ``useOptimized``.
useOptimized
-----------------
------------
Returns the status of optimized code usage.
.. ocv:function:: bool useOptimized()

@ -39,4 +39,12 @@ TEST(Core_OutputArraySreate, _1997)
Size submatSize = Size(256, 256);
ASSERT_NO_THROW(local::create( mat(Rect(Point(), submatSize)), submatSize, mat.type() ));
}
}
TEST(Core_SaturateCast, NegativeNotClipped)
{
double d = -1.0;
unsigned int val = cv::saturate_cast<unsigned int>(d);
ASSERT_EQ(0xffffffff, val);
}

@ -55,6 +55,128 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace detail
{
__device__ __forceinline__ int cvAtomicAdd(int* address, int val)
{
return ::atomicAdd(address, val);
}
__device__ __forceinline__ unsigned int cvAtomicAdd(unsigned int* address, unsigned int val)
{
return ::atomicAdd(address, val);
}
__device__ __forceinline__ float cvAtomicAdd(float* address, float val)
{
#if __CUDA_ARCH__ >= 200
return ::atomicAdd(address, val);
#else
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(val + __int_as_float(assumed)));
} while (assumed != old);
return __int_as_float(old);
#endif
}
__device__ __forceinline__ double cvAtomicAdd(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
#else
(void) address;
(void) val;
return 0.0;
#endif
}
__device__ __forceinline__ int cvAtomicMin(int* address, int val)
{
return ::atomicMin(address, val);
}
__device__ __forceinline__ float cvAtomicMin(float* address, float val)
{
#if __CUDA_ARCH__ >= 120
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fminf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
#else
(void) address;
(void) val;
return 0.0f;
#endif
}
__device__ __forceinline__ double cvAtomicMin(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
#else
(void) address;
(void) val;
return 0.0;
#endif
}
__device__ __forceinline__ int cvAtomicMax(int* address, int val)
{
return ::atomicMax(address, val);
}
__device__ __forceinline__ float cvAtomicMax(float* address, float val)
{
#if __CUDA_ARCH__ >= 120
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
#else
(void) address;
(void) val;
return 0.0f;
#endif
}
__device__ __forceinline__ double cvAtomicMax(double* address, double val)
{
#if __CUDA_ARCH__ >= 130
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_ull, assumed,
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
#else
(void) address;
(void) val;
return 0.0;
#endif
}
}
namespace detail
{
template <int cn> struct Unroll;
@ -152,7 +274,7 @@ namespace sum
{
static __device__ void run(R* ptr, R val)
{
::atomicAdd(ptr, val);
detail::cvAtomicAdd(ptr, val);
}
};
template <typename R> struct AtomicAdd<R, 2>
@ -161,8 +283,8 @@ namespace sum
static __device__ void run(R* ptr, val_type val)
{
::atomicAdd(ptr, val.x);
::atomicAdd(ptr + 1, val.y);
detail::cvAtomicAdd(ptr, val.x);
detail::cvAtomicAdd(ptr + 1, val.y);
}
};
template <typename R> struct AtomicAdd<R, 3>
@ -171,9 +293,9 @@ namespace sum
static __device__ void run(R* ptr, val_type val)
{
::atomicAdd(ptr, val.x);
::atomicAdd(ptr + 1, val.y);
::atomicAdd(ptr + 2, val.z);
detail::cvAtomicAdd(ptr, val.x);
detail::cvAtomicAdd(ptr + 1, val.y);
detail::cvAtomicAdd(ptr + 2, val.z);
}
};
template <typename R> struct AtomicAdd<R, 4>
@ -182,10 +304,10 @@ namespace sum
static __device__ void run(R* ptr, val_type val)
{
::atomicAdd(ptr, val.x);
::atomicAdd(ptr + 1, val.y);
::atomicAdd(ptr + 2, val.z);
::atomicAdd(ptr + 3, val.w);
detail::cvAtomicAdd(ptr, val.x);
detail::cvAtomicAdd(ptr + 1, val.y);
detail::cvAtomicAdd(ptr + 2, val.z);
detail::cvAtomicAdd(ptr + 3, val.w);
}
};
@ -229,41 +351,6 @@ namespace sum
#endif
}
};
template <int BLOCK_SIZE, int cn>
struct GlobalReduce<BLOCK_SIZE, double, cn>
{
typedef typename TypeVec<double, cn>::vec_type result_type;
static __device__ void run(result_type& sum, result_type* result, int tid, int bid, double* smem)
{
__shared__ bool is_last;
if (tid == 0)
{
result[bid] = sum;
__threadfence();
unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
is_last = (ticket == gridDim.x * gridDim.y - 1);
}
__syncthreads();
if (is_last)
{
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<double>()));
if (tid == 0)
{
result[0] = sum;
blocks_finished = 0;
}
}
}
};
template <int BLOCK_SIZE, typename src_type, typename result_type, class Op>
__global__ void kernel(const PtrStepSz<src_type> src, result_type* result, const Op op, const int twidth, const int theight)
@ -518,53 +605,12 @@ namespace minMax
struct GlobalReduce
{
static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval)
{
__shared__ bool is_last;
if (tid == 0)
{
minval[bid] = mymin;
maxval[bid] = mymax;
__threadfence();
unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
is_last = (ticket == gridDim.x * gridDim.y - 1);
}
__syncthreads();
if (is_last)
{
int idx = ::min(tid, gridDim.x * gridDim.y - 1);
mymin = minval[idx];
mymax = maxval[idx];
const minimum<R> minOp;
const maximum<R> maxOp;
device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
if (tid == 0)
{
minval[0] = mymin;
maxval[0] = mymax;
blocks_finished = 0;
}
}
}
};
template <int BLOCK_SIZE>
struct GlobalReduce<BLOCK_SIZE, int>
{
static __device__ void run(int& mymin, int& mymax, int* minval, int* maxval, int tid, int bid, int* sminval, int* smaxval)
{
#if __CUDA_ARCH__ >= 200
if (tid == 0)
{
::atomicMin(minval, mymin);
::atomicMax(maxval, mymax);
detail::cvAtomicMin(minval, mymin);
detail::cvAtomicMax(maxval, mymax);
}
#else
__shared__ bool is_last;
@ -589,8 +635,8 @@ namespace minMax
mymin = minval[idx];
mymax = maxval[idx];
const minimum<int> minOp;
const maximum<int> maxOp;
const minimum<R> minOp;
const maximum<R> maxOp;
device::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
if (tid == 0)
@ -672,12 +718,19 @@ namespace minMax
*minval_buf = numeric_limits<int>::max();
*maxval_buf = numeric_limits<int>::min();
}
template <typename R>
void setDefault(R*, R*)
__global__ void setDefaultKernel(float* minval_buf, float* maxval_buf)
{
*minval_buf = numeric_limits<float>::max();
*maxval_buf = -numeric_limits<float>::max();
}
__global__ void setDefaultKernel(double* minval_buf, double* maxval_buf)
{
*minval_buf = numeric_limits<double>::max();
*maxval_buf = -numeric_limits<double>::max();
}
void setDefault(int* minval_buf, int* maxval_buf)
template <typename R>
void setDefault(R* minval_buf, R* maxval_buf)
{
setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf);
}
@ -728,21 +781,19 @@ namespace minMax
namespace minMaxLoc
{
__device__ unsigned int blocks_finished = 0;
// To avoid shared bank conflicts we convert each value into value of
// appropriate type (32 bits minimum)
template <typename T> struct MinMaxTypeTraits;
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
template <> struct MinMaxTypeTraits<schar> { typedef int best_type; };
template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; };
template <> struct MinMaxTypeTraits<signed char> { typedef int best_type; };
template <> struct MinMaxTypeTraits<unsigned short> { typedef int best_type; };
template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
template <int BLOCK_SIZE, typename T, class Mask>
__global__ void kernel(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
__global__ void kernel_pass_1(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
{
typedef typename MinMaxTypeTraits<T>::best_type work_type;
@ -750,7 +801,6 @@ namespace minMaxLoc
__shared__ work_type smaxval[BLOCK_SIZE];
__shared__ unsigned int sminloc[BLOCK_SIZE];
__shared__ unsigned int smaxloc[BLOCK_SIZE];
__shared__ bool is_last;
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
@ -799,38 +849,36 @@ namespace minMaxLoc
maxval[bid] = (T) mymax;
minloc[bid] = myminloc;
maxloc[bid] = mymaxloc;
__threadfence();
unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y);
is_last = (ticket == gridDim.x * gridDim.y - 1);
}
}
template <int BLOCK_SIZE, typename T>
__global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count)
{
typedef typename MinMaxTypeTraits<T>::best_type work_type;
__syncthreads();
if (is_last)
{
unsigned int idx = ::min(tid, gridDim.x * gridDim.y - 1);
__shared__ work_type sminval[BLOCK_SIZE];
__shared__ work_type smaxval[BLOCK_SIZE];
__shared__ unsigned int sminloc[BLOCK_SIZE];
__shared__ unsigned int smaxloc[BLOCK_SIZE];
mymin = minval[idx];
mymax = maxval[idx];
myminloc = minloc[idx];
mymaxloc = maxloc[idx];
unsigned int idx = ::min(threadIdx.x, count - 1);
reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
tid,
thrust::make_tuple(less<work_type>(), greater<work_type>()));
work_type mymin = minval[idx];
work_type mymax = maxval[idx];
unsigned int myminloc = minloc[idx];
unsigned int mymaxloc = maxloc[idx];
if (tid == 0)
{
minval[0] = (T) mymin;
maxval[0] = (T) mymax;
minloc[0] = myminloc;
maxloc[0] = mymaxloc;
reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
threadIdx.x,
thrust::make_tuple(less<work_type>(), greater<work_type>()));
blocks_finished = 0;
}
if (threadIdx.x == 0)
{
minval[0] = (T) mymin;
maxval[0] = (T) mymax;
minloc[0] = myminloc;
maxloc[0] = mymaxloc;
}
}
@ -877,10 +925,13 @@ namespace minMaxLoc
unsigned int* maxloc_buf = locbuf.ptr(1);
if (mask.data)
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
else
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
cudaSafeCall( cudaGetLastError() );
kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -898,9 +949,9 @@ namespace minMaxLoc
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
}
template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);

@ -49,6 +49,10 @@
#pragma warning( disable: 4244 4510 4512 4610 )
#endif
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#ifdef __cplusplus
extern "C" {
#endif
@ -2054,7 +2058,7 @@ bool InputMediaStream_FFMPEG::read(unsigned char** data, int* size, int* endOfFi
if (ret < 0)
{
if (ret == AVERROR_EOF)
if (ret == (int)AVERROR_EOF)
*endOfFile = true;
return false;
}

@ -3,6 +3,7 @@ package org.opencv.android;
import java.io.File;
import java.util.StringTokenizer;
import org.opencv.core.Core;
import org.opencv.engine.OpenCVEngineInterface;
import android.content.ComponentName;
@ -85,7 +86,6 @@ class AsyncServiceHelper
{
mServiceInstallationProgress = true;
Log.d(TAG, "Package installation started");
}
else
{
@ -299,6 +299,10 @@ class AsyncServiceHelper
if (initOpenCVLibs(path, libs))
{
Log.d(TAG, "First attempt to load libs is OK");
String eol = System.getProperty("line.separator");
for (String str : Core.getBuildInformation().split(eol))
Log.i(TAG, str);
status = LoaderCallbackInterface.SUCCESS;
}
else

@ -1,7 +1,8 @@
package org.opencv.android;
import java.util.StringTokenizer;
import org.opencv.core.Core;
import java.util.StringTokenizer;
import android.util.Log;
class StaticHelper {
@ -28,6 +29,10 @@ class StaticHelper {
if (initOpenCVLibs(libs))
{
Log.d(TAG, "First attempt to load libs is OK");
String eol = System.getProperty("line.separator");
for (String str : Core.getBuildInformation().split(eol))
Log.i(TAG, str);
result = true;
}
else

Loading…
Cancel
Save