Merge pull request #9238 from alalek:valgrind_fixes

pull/9248/head
Alexander Alekhin 8 years ago
commit 34f9c039c5
  1. 5
      cmake/OpenCVCompilerOptions.cmake
  2. 6
      cmake/OpenCVUtils.cmake
  3. 4
      modules/calib3d/src/stereobm.cpp
  4. 2
      modules/core/src/stat.cpp
  5. 17
      modules/dnn/src/init.cpp
  6. 25
      modules/flann/include/opencv2/flann/lsh_table.h
  7. 197
      modules/imgproc/src/accum.simd.hpp
  8. 2
      modules/ts/misc/run.py
  9. 10
      modules/ts/misc/run_long.py
  10. 11
      modules/ts/misc/run_suite.py
  11. 198
      platforms/scripts/valgrind.supp
  12. 113
      platforms/scripts/valgrind_3rdparty.supp

@ -203,7 +203,10 @@ if(CMAKE_COMPILER_IS_GNUCXX)
endif()
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} -DNDEBUG")
set(OPENCV_EXTRA_FLAGS_DEBUG "${OPENCV_EXTRA_FLAGS_DEBUG} -O0 -DDEBUG -D_DEBUG")
if(NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "-O")
set(OPENCV_EXTRA_FLAGS_DEBUG "${OPENCV_EXTRA_FLAGS_DEBUG} -O0")
endif()
set(OPENCV_EXTRA_FLAGS_DEBUG "${OPENCV_EXTRA_FLAGS_DEBUG} -DDEBUG -D_DEBUG")
endif()
if(MSVC)

@ -533,6 +533,12 @@ macro(ocv_finalize_status)
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${OPENCV_BUILD_INFO_FILE}" "${OPENCV_MODULE_opencv_core_BINARY_DIR}/version_string.inc" OUTPUT_QUIET)
endif()
endif()
if(UNIX)
install(FILES "${OpenCV_SOURCE_DIR}/platforms/scripts/valgrind.supp"
"${OpenCV_SOURCE_DIR}/platforms/scripts/valgrind_3rdparty.supp"
DESTINATION "${OPENCV_OTHER_INSTALL_PATH}" COMPONENT "dev")
endif()
endmacro()

@ -197,7 +197,7 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
{
int x, y;
const int OFS = 256*4, TABSZ = OFS*2 + 256;
uchar tab[TABSZ];
uchar tab[TABSZ] = { 0 };
Size size = src.size();
for( x = 0; x < TABSZ; x++ )
@ -227,7 +227,7 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2));
v_int16x8 z = v_setzero_s16();
for(; x <= size.width-8; x += 8 )
for(; x <= (size.width - 1) - 8; x += 8 )
{
v_int16x8 s00 = v_reinterpret_as_s16(v_load_expand(srow0 + x + 1));
v_int16x8 s01 = v_reinterpret_as_s16(v_load_expand(srow0 + x - 1));

@ -1547,7 +1547,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
bool haveMask = _mask.kind() != _InputArray::NONE;
int nz = haveMask ? -1 : (int)_src.total();
Scalar mean, stddev;
Scalar mean(0), stddev(0);
const int cn = _src.channels();
if (cn > 4)
return false;

@ -42,6 +42,8 @@
#include "precomp.hpp"
#include <opencv2/dnn/layer.details.hpp>
#include <google/protobuf/stubs/common.h>
namespace cv {
namespace dnn {
CV__DNN_EXPERIMENTAL_NS_BEGIN
@ -56,11 +58,26 @@ Mutex& getInitializationMutex()
// force initialization (single-threaded environment)
Mutex* __initialization_mutex_initializer = &getInitializationMutex();
namespace {
using namespace google::protobuf;
class ProtobufShutdown {
public:
bool initialized;
ProtobufShutdown() : initialized(true) {}
~ProtobufShutdown()
{
initialized = false;
google::protobuf::ShutdownProtobufLibrary();
}
};
} // namespace
void initializeLayerFactory()
{
CV_TRACE_FUNCTION();
static ProtobufShutdown protobufShutdown; (void)protobufShutdown;
CV_DNN_REGISTER_LAYER_CLASS(Slice, SliceLayer);
CV_DNN_REGISTER_LAYER_CLASS(Split, SplitLayer);
CV_DNN_REGISTER_LAYER_CLASS(Concat, ConcatLayer);

@ -147,6 +147,7 @@ public:
LshTable()
{
key_size_ = 0;
feature_size_ = 0;
speed_level_ = kArray;
}
@ -157,7 +158,7 @@ public:
*/
LshTable(unsigned int feature_size, unsigned int key_size)
{
(void)feature_size;
feature_size_ = feature_size;
(void)key_size;
std::cerr << "LSH is not implemented for that type" << std::endl;
assert(0);
@ -332,6 +333,8 @@ private:
*/
unsigned int key_size_;
unsigned int feature_size_;
// Members only used for the unsigned char specialization
/** The mask to apply to a feature to get the hash key
* Only used in the unsigned char case
@ -345,9 +348,10 @@ private:
template<>
inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
{
feature_size_ = feature_size;
initialize(subsignature_size);
// Allocate the mask
mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);
// A bit brutal but fast to code
std::vector<size_t> indices(feature_size * CHAR_BIT);
@ -392,6 +396,7 @@ inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) cons
{
// no need to check if T is dividable by sizeof(size_t) like in the Hamming
// distance computation as we have a mask
// FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);
// Figure out the subsignature of the feature
@ -400,10 +405,20 @@ inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) cons
size_t subsignature = 0;
size_t bit_index = 1;
for (std::vector<size_t>::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) {
for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {
// get the mask and signature blocks
size_t feature_block = *feature_block_ptr;
size_t mask_block = *pmask_block;
size_t feature_block;
if (i <= feature_size_ - sizeof(size_t))
{
feature_block = *feature_block_ptr;
}
else
{
size_t tmp = 0;
memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
feature_block = tmp;
}
size_t mask_block = mask_[i / sizeof(size_t)];
while (mask_block) {
// Get the lowest set bit in the mask block
size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);

@ -425,9 +425,7 @@ void acc_simd_(const ushort* src, float* dst, const uchar* mask, int len, int cn
v_uint16x8 v_0 = v_setall_u16(0);
for ( ; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 _v_mask = v_load(mask + x);
v_uint16x8 v_mask, dummy;
v_expand(_v_mask, v_mask, dummy);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_uint16x8 v_src = v_load(src + x);
v_src = v_src & v_mask;
@ -443,9 +441,7 @@ void acc_simd_(const ushort* src, float* dst, const uchar* mask, int len, int cn
v_uint16x8 v_0 = v_setall_u16(0);
for ( ; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 _v_mask = v_load(mask + x);
v_uint16x8 v_mask, dummy;
v_expand(_v_mask, v_mask, dummy);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_uint16x8 v_src0, v_src1, v_src2;
v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
@ -491,8 +487,7 @@ void acc_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 v_masku16, dummy0;
v_expand(v_load(mask + x), v_masku16, dummy0);
v_uint16x8 v_masku16 = v_load_expand(mask + x);
v_uint32x4 v_masku320, v_masku321;
v_expand(v_masku16, v_masku320, v_masku321);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_masku320 == v_reinterpret_as_u32(v_0)));
@ -506,8 +501,7 @@ void acc_simd_(const float* src, float* dst, const uchar* mask, int len, int cn)
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 v_masku16, dummy0;
v_expand(v_load(mask + x), v_masku16, dummy0);
v_uint16x8 v_masku16 = v_load_expand(mask + x);
v_uint32x4 v_masku320, v_masku321;
v_expand(v_masku16, v_masku320, v_masku321);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_masku320 == v_reinterpret_as_u32(v_0)));
@ -770,8 +764,7 @@ void acc_simd_(const ushort* src, double* dst, const uchar* mask, int len, int c
{
for ( ; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 v_mask, dummy;
v_expand(v_load(mask + x), v_mask, dummy);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_uint16x8 v_src = v_load(src + x);
v_src = v_src & v_mask;
@ -803,8 +796,7 @@ void acc_simd_(const ushort* src, double* dst, const uchar* mask, int len, int c
{
for ( ; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 v_mask, dummy;
v_expand(v_load(mask + x), v_mask, dummy);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_uint16x8 v_src0, v_src1, v_src2;
v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
@ -871,10 +863,7 @@ void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 v_masku16, dummy0;
v_expand(v_load(mask + x), v_masku16, dummy0);
v_uint32x4 v_masku32, dummy1;
v_expand(v_masku16, v_masku32, dummy1);
v_uint32x4 v_masku32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(v_masku32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
@ -892,10 +881,7 @@ void acc_simd_(const float* src, double* dst, const uchar* mask, int len, int cn
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 v_masku16, dummy0;
v_expand(v_load(mask + x), v_masku16, dummy0);
v_uint32x4 v_masku32, dummy1;
v_expand(v_masku16, v_masku32, dummy1);
v_uint32x4 v_masku32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(v_masku32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
@ -947,10 +933,7 @@ void acc_simd_(const double* src, double* dst, const uchar* mask, int len, int c
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 v_masku16, dummy0;
v_expand(v_load(mask + x), v_masku16, dummy0);
v_uint32x4 v_masku32, dummy1;
v_expand(v_masku16, v_masku32, dummy1);
v_uint32x4 v_masku32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(v_masku32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
@ -967,10 +950,7 @@ void acc_simd_(const double* src, double* dst, const uchar* mask, int len, int c
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 v_masku16, dummy0;
v_expand(v_load(mask + x), v_masku16, dummy0);
v_uint32x4 v_masku32, dummy1;
v_expand(v_masku16, v_masku32, dummy1);
v_uint32x4 v_masku32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(v_masku32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
@ -1157,9 +1137,9 @@ void accSqr_simd_(const ushort* src, float* dst, const uchar* mask, int len, int
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint16x8 v_mask16 = v_load_expand(mask + x);
v_uint32x4 v_mask0, v_mask1;
v_expand(stub, v_mask0, v_mask1);
v_expand(v_mask16, v_mask0, v_mask1);
v_mask0 = ~(v_mask0 == v_0);
v_mask1 = ~(v_mask1 == v_0);
v_uint16x8 v_src = v_load(src + x);
@ -1182,9 +1162,9 @@ void accSqr_simd_(const ushort* src, float* dst, const uchar* mask, int len, int
{
for ( ; x <= len - cVectorWidth ; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint16x8 v_mask16 = v_load_expand(mask + x);
v_uint32x4 v_mask0, v_mask1;
v_expand(stub, v_mask0, v_mask1);
v_expand(v_mask16, v_mask0, v_mask1);
v_mask0 = ~(v_mask0 == v_0);
v_mask1 = ~(v_mask1 == v_0);
@ -1254,11 +1234,11 @@ void accSqr_simd_(const float* src, float* dst, const uchar* mask, int len, int
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 v_stub = v_load_expand(mask + x);
v_uint32x4 v_stub0, v_stub1;
v_expand(v_stub, v_stub0, v_stub1);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_stub0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(v_stub1 == v_0));
v_uint16x8 v_mask16 = v_load_expand(mask + x);
v_uint32x4 v_mask_0, v_mask_1;
v_expand(v_mask16, v_mask_0, v_mask_1);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_mask_0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(v_mask_1 == v_0));
v_float32x4 v_src0 = v_load(src + x);
v_float32x4 v_src1 = v_load(src + x + 4);
v_src0 = v_src0 & v_mask0;
@ -1274,11 +1254,11 @@ void accSqr_simd_(const float* src, float* dst, const uchar* mask, int len, int
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 v_stub = v_load_expand(mask + x);
v_uint32x4 v_stub0, v_stub1;
v_expand(v_stub, v_stub0, v_stub1);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_stub0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(v_stub1 == v_0));
v_uint16x8 v_mask16 = v_load_expand(mask + x);
v_uint32x4 v_mask_0, v_mask_1;
v_expand(v_mask16, v_mask_0, v_mask_1);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_mask_0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(v_mask_1 == v_0));
v_float32x4 v_src00, v_src10, v_src20, v_src01, v_src11, v_src21;
v_load_deinterleave(src + x * cn, v_src00, v_src10, v_src20);
@ -1319,9 +1299,7 @@ void accSqr_simd_(const uchar* src, double* dst, const uchar* mask, int len, int
int size = len * cn;
for (; x <= size - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 v_src = v_load(src + x);
v_uint16x8 v_int, dummy;
v_expand(v_src, v_int, dummy);
v_uint16x8 v_int = v_load_expand(src + x);
v_uint32x4 v_int0, v_int1;
v_expand(v_int, v_int0, v_int1);
@ -1353,17 +1331,15 @@ void accSqr_simd_(const uchar* src, double* dst, const uchar* mask, int len, int
}
else
{
v_uint8x16 v_0 = v_setzero_u8();
v_uint16x8 v_0 = v_setzero_u16();
if (cn == 1)
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 v_mask = v_load(mask + x);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_uint8x16 v_src = v_load(src + x);
v_src = v_src & v_mask;
v_uint16x8 v_int, dummy;
v_expand(v_src, v_int, dummy);
v_uint16x8 v_src = v_load_expand(src + x);
v_uint16x8 v_int = v_src & v_mask;
v_uint32x4 v_int0, v_int1;
v_expand(v_int, v_int0, v_int1);
@ -1395,19 +1371,19 @@ void accSqr_simd_(const uchar* src, double* dst, const uchar* mask, int len, int
}
else if (cn == 3)
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
for (; x <= len - /*cVectorWidth*/16; x += cVectorWidth)
{
v_uint8x16 v_mask = v_load(mask + x);
v_mask = ~(v_mask == v_0);
v_uint8x16 v_src0, v_src1, v_src2;
v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
v_src0 = v_src0 & v_mask;
v_src1 = v_src1 & v_mask;
v_src2 = v_src2 & v_mask;
v_uint16x8 v_int0, v_int1, v_int2, dummy;
v_expand(v_src0, v_int0, dummy);
v_expand(v_src1, v_int1, dummy);
v_expand(v_src2, v_int2, dummy);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_int0 = v_int0 & v_mask;
v_int1 = v_int1 & v_mask;
v_int2 = v_int2 & v_mask;
v_uint32x4 v_int00, v_int01, v_int10, v_int11, v_int20, v_int21;
v_expand(v_int0, v_int00, v_int01);
@ -1627,9 +1603,7 @@ void accSqr_simd_(const float* src, double* dst, const uchar* mask, int len, int
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 v_mask, dummy;
v_expand(stub, v_mask, dummy);
v_uint32x4 v_mask = v_load_expand_q(mask + x);;
v_mask = ~(v_mask == v_0);
v_float32x4 v_src = v_load(src + x);
v_src = v_src & v_reinterpret_as_f32(v_mask);
@ -1646,9 +1620,7 @@ void accSqr_simd_(const float* src, double* dst, const uchar* mask, int len, int
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 v_mask, dummy;
v_expand(stub, v_mask, dummy);
v_uint32x4 v_mask = v_load_expand_q(mask + x);
v_mask = ~(v_mask == v_0);
v_float32x4 v_src0, v_src1, v_src2;
@ -1709,11 +1681,9 @@ void accSqr_simd_(const double* src, double* dst, const uchar* mask, int len, in
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 stub0, stub1;
v_expand(stub, stub0, stub1);
v_uint32x4 v_mask32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(stub0, v_masku640, v_masku641);
v_expand(v_mask32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
v_float64x2 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
v_float64x2 v_src0 = v_load(src + x);
@ -1731,11 +1701,9 @@ void accSqr_simd_(const double* src, double* dst, const uchar* mask, int len, in
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 stub0, stub1;
v_expand(stub, stub0, stub1);
v_uint32x4 v_mask32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(stub0, v_masku640, v_masku641);
v_expand(v_mask32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
v_float64x2 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
@ -2059,11 +2027,10 @@ void accProd_simd_(const float* src1, const float* src2, float* dst, const uchar
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 stub0, stub1;
v_expand(stub, stub0, stub1);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(stub0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(stub1 == v_0));
v_uint32x4 v_mask32_0 = v_load_expand_q(mask + x);
v_uint32x4 v_mask32_1 = v_load_expand_q(mask + x + 4);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_mask32_0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(v_mask32_1 == v_0));
v_store(dst + x, v_load(dst + x) + ((v_load(src1 + x) * v_load(src2 + x)) & v_mask0));
v_store(dst + x + 4, v_load(dst + x + 4) + ((v_load(src1 + x + 4) * v_load(src2 + x + 4)) & v_mask1));
@ -2073,11 +2040,10 @@ void accProd_simd_(const float* src1, const float* src2, float* dst, const uchar
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 stub0, stub1;
v_expand(stub, stub0, stub1);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(stub0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(stub1 == v_0));
v_uint32x4 v_mask32_0 = v_load_expand_q(mask + x);
v_uint32x4 v_mask32_1 = v_load_expand_q(mask + x + 4);
v_float32x4 v_mask0 = v_reinterpret_as_f32(~(v_mask32_0 == v_0));
v_float32x4 v_mask1 = v_reinterpret_as_f32(~(v_mask32_1 == v_0));
v_float32x4 v_1src00, v_1src01, v_1src10, v_1src11, v_1src20, v_1src21;
v_float32x4 v_2src00, v_2src01, v_2src10, v_2src11, v_2src20, v_2src21;
@ -2109,12 +2075,8 @@ void accProd_simd_(const uchar* src1, const uchar* src2, double* dst, const ucha
int size = len * cn;
for (; x <= size - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 v_1src = v_load(src1 + x);
v_uint8x16 v_2src = v_load(src2 + x);
v_uint16x8 v_1int, v_2int, dummy;
v_expand(v_1src, v_1int, dummy);
v_expand(v_2src, v_2int, dummy);
v_uint16x8 v_1int = v_load_expand(src1 + x);
v_uint16x8 v_2int = v_load_expand(src2 + x);
v_uint32x4 v_1int_0, v_1int_1, v_2int_0, v_2int_1;
v_expand(v_1int, v_1int_0, v_1int_1);
@ -2148,19 +2110,15 @@ void accProd_simd_(const uchar* src1, const uchar* src2, double* dst, const ucha
}
else
{
v_uint8x16 v_0 = v_setzero_u8();
v_uint16x8 v_0 = v_setzero_u16();
if (cn == 1)
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 v_mask = v_load(mask + x);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_uint8x16 v_1src = v_load(src1 + x) & v_mask;
v_uint8x16 v_2src = v_load(src2 + x) & v_mask;
v_uint16x8 v_1int, v_2int, dummy;
v_expand(v_1src, v_1int, dummy);
v_expand(v_2src, v_2int, dummy);
v_uint16x8 v_1int = v_load_expand(src1 + x) & v_mask;
v_uint16x8 v_2int = v_load_expand(src2 + x) & v_mask;
v_uint32x4 v_1int_0, v_1int_1, v_2int_0, v_2int_1;
v_expand(v_1int, v_1int_0, v_1int_1);
@ -2194,19 +2152,11 @@ void accProd_simd_(const uchar* src1, const uchar* src2, double* dst, const ucha
}
else if (cn == 3)
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
for (; x <= len - /*cVectorWidth*/16; x += cVectorWidth)
{
v_uint8x16 v_mask = v_load(mask + x);
v_mask = ~(v_mask == v_0);
v_uint8x16 v_1src0, v_1src1, v_1src2, v_2src0, v_2src1, v_2src2;
v_load_deinterleave(src1 + x * cn, v_1src0, v_1src1, v_1src2);
v_load_deinterleave(src2 + x * cn, v_2src0, v_2src1, v_2src2);
v_1src0 = v_1src0 & v_mask;
v_1src1 = v_1src1 & v_mask;
v_1src2 = v_1src2 & v_mask;
v_2src0 = v_2src0 & v_mask;
v_2src1 = v_2src1 & v_mask;
v_2src2 = v_2src2 & v_mask;
v_uint16x8 v_1int0, v_1int1, v_1int2, v_2int0, v_2int1, v_2int2, dummy;
v_expand(v_1src0, v_1int0, dummy);
@ -2216,6 +2166,15 @@ void accProd_simd_(const uchar* src1, const uchar* src2, double* dst, const ucha
v_expand(v_2src1, v_2int1, dummy);
v_expand(v_2src2, v_2int2, dummy);
v_uint16x8 v_mask = v_load_expand(mask + x);
v_mask = ~(v_mask == v_0);
v_1int0 = v_1int0 & v_mask;
v_1int1 = v_1int1 & v_mask;
v_1int2 = v_1int2 & v_mask;
v_2int0 = v_2int0 & v_mask;
v_2int1 = v_2int1 & v_mask;
v_2int2 = v_2int2 & v_mask;
v_uint32x4 v_1int00, v_1int01, v_1int10, v_1int11, v_1int20, v_1int21;
v_uint32x4 v_2int00, v_2int01, v_2int10, v_2int11, v_2int20, v_2int21;
v_expand(v_1int0, v_1int00, v_1int01);
@ -2440,9 +2399,7 @@ void accProd_simd_(const float* src1, const float* src2, double* dst, const ucha
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 v_mask, dummy;
v_expand(stub, v_mask, dummy);
v_uint32x4 v_mask = v_load_expand_q(mask + x);
v_mask = ~(v_mask == v_0);
v_float32x4 v_1src = v_load(src1 + x);
v_float32x4 v_2src = v_load(src2 + x);
@ -2462,9 +2419,7 @@ void accProd_simd_(const float* src1, const float* src2, double* dst, const ucha
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 v_mask, dummy;
v_expand(stub, v_mask, dummy);
v_uint32x4 v_mask = v_load_expand_q(mask + x);
v_mask = ~(v_mask == v_0);
v_float32x4 v_1src0, v_1src1, v_1src2, v_2src0, v_2src1, v_2src2;
v_load_deinterleave(src1 + x * cn, v_1src0, v_1src1, v_1src2);
@ -2522,11 +2477,9 @@ void accProd_simd_(const double* src1, const double* src2, double* dst, const uc
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 stub0, stub1;
v_expand(stub, stub0, stub1);
v_uint32x4 v_mask32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(stub0, v_masku640, v_masku641);
v_expand(v_mask32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
v_float64x2 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
@ -2543,11 +2496,9 @@ void accProd_simd_(const double* src1, const double* src2, double* dst, const uc
{
for (; x <= len - cVectorWidth; x += cVectorWidth)
{
v_uint16x8 stub = v_load_expand(mask + x);
v_uint32x4 stub0, stub1;
v_expand(stub, stub0, stub1);
v_uint32x4 v_mask32 = v_load_expand_q(mask + x);
v_uint64x2 v_masku640, v_masku641;
v_expand(stub0, v_masku640, v_masku641);
v_expand(v_mask32, v_masku640, v_masku641);
v_float64x2 v_mask0 = v_reinterpret_as_f64(~(v_masku640 == v_0));
v_float64x2 v_mask1 = v_reinterpret_as_f64(~(v_masku641 == v_0));
@ -2704,12 +2655,10 @@ void accW_simd_(const uchar* src, double* dst, const uchar* mask, int len, int c
int size = len * cn;
for (; x <= size - cVectorWidth; x += cVectorWidth)
{
v_uint8x16 v_src = v_load(src + x);
v_uint16x8 v_int, dummy;
v_expand(v_src, v_int, dummy);
v_uint16x8 v_src16 = v_load_expand(src + x);
v_uint32x4 v_int_0, v_int_1;
v_expand(v_int, v_int_0, v_int_1);
v_expand(v_src16, v_int_0, v_int_1);
v_int32x4 v_int0 = v_reinterpret_as_s32(v_int_0);
v_int32x4 v_int1 = v_reinterpret_as_s32(v_int_1);

@ -37,7 +37,7 @@ if __name__ == "__main__":
# Valgrind
parser.add_argument("--valgrind", action="store_true", default=False, help="Run C++ tests in valgrind")
parser.add_argument("--valgrind_supp", metavar="FILE", help="Path to valgrind suppression file (example: --valgrind_supp opencv/platforms/scripts/valgrind.supp)")
parser.add_argument("--valgrind_supp", metavar="FILE", action='append', help="Path to valgrind suppression file (example: --valgrind_supp opencv/platforms/scripts/valgrind.supp)")
parser.add_argument("--valgrind_opt", metavar="OPT", action="append", default=[], help="Add command line option to valgrind (example: --valgrind_opt=--leak-check=full)")
# Android

@ -7,7 +7,10 @@ from pprint import PrettyPrinter as PP
LONG_TESTS_DEBUG_VALGRIND = [
('calib3d', 'Calib3d_InitUndistortRectifyMap.accuracy', 2017.22),
('dnn', 'Reproducibility*', 1000), # large DNN models
('features2d', 'Features2d_Feature2d.no_crash', 1235.68),
('imgcodecs', 'Imgcodecs_Png.write_big', 1000), # memory limit
('imgcodecs', 'Imgcodecs_Tiff.decode_tile16384x16384', 1000), # memory limit
('ml', 'ML_RTrees.regression', 1423.47),
('optflow', 'DenseOpticalFlow_DeepFlow.ReferenceAccuracy', 1360.95),
('optflow', 'DenseOpticalFlow_DeepFlow_perf.perf/0', 1881.59),
@ -23,6 +26,7 @@ LONG_TESTS_DEBUG_VALGRIND = [
('shape', 'Shape_SCD.regression', 3311.46),
('tracking', 'AUKF.br_mean_squared_error', 10764.6),
('tracking', 'UKF.br_mean_squared_error', 5228.27),
('videoio', 'Videoio_Video.ffmpeg_writebig', 1000),
('xfeatures2d', 'Features2d_RotationInvariance_Descriptor_BoostDesc_LBGM.regression', 1124.51),
('xfeatures2d', 'Features2d_RotationInvariance_Descriptor_VGG120.regression', 2198.1),
('xfeatures2d', 'Features2d_RotationInvariance_Descriptor_VGG48.regression', 1958.52),
@ -43,10 +47,8 @@ LONG_TESTS_DEBUG_VALGRIND = [
]
def longTestFilter(data):
res = ['*', '-']
for _, v, _ in data:
res.append(v)
def longTestFilter(data, module = None):
res = ['*', '-'] + [v for _, v, m in data if module is None or m == module]
return '--gtest_filter={}'.format(':'.join(res))

@ -103,10 +103,15 @@ class TestSuite(object):
def wrapInValgrind(self, cmd = []):
if self.options.valgrind:
res = ['valgrind']
if self.options.valgrind_supp:
res.append("--suppressions=%s" % self.options.valgrind_supp)
supp = self.options.valgrind_supp or []
for f in supp:
if os.path.isfile(f):
res.append("--suppressions=%s" % f)
else:
print("WARNING: Valgrind suppression file is missing, SKIP: %s" % f)
res.extend(self.options.valgrind_opt)
return res + cmd + [longTestFilter(LONG_TESTS_DEBUG_VALGRIND)]
has_gtest_filter = next((True for x in cmd if x.startswith('--gtest_filter=')), False)
return res + cmd + ([longTestFilter(LONG_TESTS_DEBUG_VALGRIND)] if not has_gtest_filter else [])
return cmd
def tryCommand(self, cmd):

@ -1,13 +1,203 @@
{
IPP static init
OpenCV-IPP static init
Memcheck:Cond
fun:ippicvGetCpuFeatures
fun:ippicvStaticInit
}
{
TBB - allocate_via_handler_v3 issue
OpenCV-getInitializationMutex
Memcheck:Leak
fun:malloc
fun:_ZN3tbb8internal23allocate_via_handler_v3Em
...
fun:_ZN2cv22getInitializationMutexEv
}
{
OpenCV-getStdAllocator
Memcheck:Leak
...
fun:_ZN2cv3Mat15getStdAllocatorEv
}
{
OpenCV-getOpenCLAllocator
Memcheck:Leak
...
fun:_ZN2cv3ocl18getOpenCLAllocatorEv
}
{
OpenCV-getCoreTlsData
Memcheck:Leak
fun:_Znwm
fun:_ZN2cv14getCoreTlsDataEv
}
{
OpenCV-TLS-getTlsStorage
Memcheck:Leak
...
fun:_ZN2cvL13getTlsStorageEv
}
{
OpenCV-TLS-getData()
Memcheck:Leak
...
fun:*setData*
fun:_ZNK2cv16TLSDataContainer7getDataEv
}
{
OpenCV-parallel_for
Memcheck:Leak
...
fun:_ZN2cv13ThreadManager8initPoolEv*
}
{
OpenCV-parallel_for
Memcheck:Leak
fun:_Znwm
fun:*instance*
fun:_ZN2cv21parallel_for_pthreadsERKNS_5RangeERKNS_16ParallelLoopBodyEd
fun:_ZN2cv13parallel_for_ERKNS_5RangeERKNS_16ParallelLoopBodyEd
}
{
OpenCV-parallel_for-ThreadManager::TLS
Memcheck:Leak
fun:_Znwm
fun:_ZNK2cv7TLSDataINS_13ThreadManager13work_thread_tEE18createDataInstanceEv
}
{
OpenCV-parallel_for-setNumThreads()
Memcheck:Leak
fun:_Znwm
fun:_ZN2cv13ThreadManager8instanceEv
fun:_ZN2cv33parallel_pthreads_set_threads_numEi
fun:_ZN2cv13setNumThreadsEi
}
{
OpenCV-parallel_for-getNumThreads()
Memcheck:Leak
...
fun:_ZN2cv13getNumThreadsEv
}
{
OpenCV-getIPPSingelton
Memcheck:Leak
...
fun:_ZN2cv3ippL15getIPPSingeltonEv
}
{
OpenCV-getGlobalMatOpInitializer
Memcheck:Leak
fun:_Znwm
fun:_ZN2cvL25getGlobalMatOpInitializerEv
}
{
OpenCV-CoreTLSData
Memcheck:Leak
...
fun:_ZNK2cv7TLSDataINS_11CoreTLSDataEE3getEv
}
{
OpenCV-ThreadID
Memcheck:Leak
fun:_Znwm
fun:_ZNK2cv7TLSDataINS_12_GLOBAL__N_18ThreadIDEE18createDataInstanceEv
}
{
OpenCV-ThreadID-TLS
Memcheck:Leak
fun:_Znwm
fun:getThreadIDTLS
}
{
OpenCV-CoreTLS
Memcheck:Leak
fun:_Znwm
fun:_ZNK2cv7TLSDataINS_11CoreTLSDataEE18createDataInstanceEv
}
{
OpenCV-haveOpenCL
Memcheck:Leak
...
fun:_ZN2cv3ocl10haveOpenCLEv
}
{
OpenCV-DNN-getLayerFactoryMutex
Memcheck:Leak
...
fun:_ZN2cv3dnn*L20getLayerFactoryMutexEv
}
{
OpenCV-ocl::Context
Memcheck:Leak
...
fun:_ZN2cv3ocl7Context10getDefaultEb
}
{
OpenCV-ocl::Device
Memcheck:Leak
...
fun:_ZN2cv3ocl6Device10getDefaultEv
}
{
OpenCV-ocl::Queue
Memcheck:Leak
...
fun:_ZN2cv3ocl5Queue6createERKNS0_7ContextERKNS0_6DeviceE
}
{
OpenCV-ocl::Program
Memcheck:Leak
...
fun:_ZN2cv3ocl6Kernel6createEPKcRKNS0_7ProgramE
}
{
OpenCV-ocl::ProgramEntry
Memcheck:Leak
...
fun:_ZNK2cv3ocl8internal12ProgramEntrycvRNS0_13ProgramSourceEEv
}
{
OpenCV-ocl::Context::getProg
Memcheck:Leak
...
fun:_ZN2cv3ocl7Context7getProgERKNS0_13ProgramSourceERKNS_6StringERS5_
}
{
OpenCV-ITT
Memcheck:Leak
...
fun:__itt_*create*
}
{
OpenCV-FFmpeg-swsscale
Memcheck:Addr16
...
fun:sws_scale
fun:_ZN20CvVideoWriter_FFMPEG10writeFrameEPKhiiiii
fun:cvWriteFrame_FFMPEG
}

@ -0,0 +1,113 @@
{
IPP static init
Memcheck:Cond
fun:ippicvGetCpuFeatures
fun:ippicvStaticInit
}
{
TBB - allocate_via_handler_v3 issue
Memcheck:Leak
fun:malloc
fun:_ZN3tbb8internal23allocate_via_handler_v3Em
}
{
GTest
Memcheck:Cond
fun:_ZN7testing8internal11CmpHelperLEIddEENS_15AssertionResultEPKcS4_RKT_RKT0_
}
{
OpenCL
Memcheck:Cond
...
obj:**/libOpenCL.so*
}
{
OpenCL-Intel
Memcheck:Cond
...
obj:**/libigdrcl.so
}
{
OpenCL-Intel
Memcheck:Leak
...
obj:*/libigdrcl.so*
}
{
OpenCL
Memcheck:Param
ioctl(generic)
...
fun:clGetPlatformIDs
}
{
OpenCL-Init
Memcheck:Leak
...
fun:clGetPlatformIDs
}
{
glib
Memcheck:Leak
fun:*alloc
obj:*/libglib*
}
{
gcrypt
Memcheck:Leak
...
obj:*/libgcrypt*
}
{
p11-kit
Memcheck:Leak
fun:*alloc
obj:*/libp11-kit*
}
{
gobject
Memcheck:Leak
fun:*alloc
...
obj:*/libgobject*
}
{
tasn
Memcheck:Leak
fun:*alloc
obj:*/libtasn*.so*
}
{
dl_init
Memcheck:Leak
...
fun:_dl_init
}
{
dl_open
Memcheck:Leak
...
fun:_dl_open
}
{
GDAL
Memcheck:Leak
fun:*alloc
...
obj:/usr/lib/libgdal.so.1.17.1
}
Loading…
Cancel
Save