Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/12055/head
Alexander Alekhin 6 years ago
commit 9787ab598b
  1. 4
      apps/createsamples/utility.cpp
  2. 2
      cmake/OpenCVCompilerOptimizations.cmake
  3. 22
      modules/calib3d/src/circlesgrid.cpp
  4. 2
      modules/calib3d/src/dls.cpp
  5. 8
      modules/calib3d/test/test_chesscorners.cpp
  6. 816
      modules/core/include/opencv2/core/hal/intrin_avx.hpp
  7. 77
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  8. 327
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  9. 2
      modules/core/include/opencv2/core/hal/intrin_vsx.hpp
  10. 7
      modules/core/include/opencv2/core/types.hpp
  11. 1
      modules/core/include/opencv2/core/types_c.h
  12. 4
      modules/core/src/array.cpp
  13. 347
      modules/core/src/merge.cpp
  14. 9
      modules/core/src/persistence_json.cpp
  15. 4
      modules/core/src/persistence_types.cpp
  16. 361
      modules/core/src/split.cpp
  17. 4
      modules/core/test/test_mat.cpp
  18. 10
      modules/core/test/test_rand.cpp
  19. 6
      modules/dnn/CMakeLists.txt
  20. 36
      modules/dnn/include/opencv2/dnn/dnn.hpp
  21. 262
      modules/dnn/src/dnn.cpp
  22. 21
      modules/dnn/src/layers/convolution_layer.cpp
  23. 2
      modules/dnn/src/layers/eltwise_layer.cpp
  24. 1
      modules/dnn/src/layers/normalize_bbox_layer.cpp
  25. 6
      modules/dnn/src/layers/recurrent_layers.cpp
  26. 2
      modules/dnn/src/layers/resize_layer.cpp
  27. 4
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  28. 100
      modules/dnn/src/op_inf_engine.cpp
  29. 40
      modules/dnn/src/op_inf_engine.hpp
  30. 45
      modules/dnn/src/tensorflow/tf_importer.cpp
  31. 10
      modules/dnn/test/test_halide_layers.cpp
  32. 238
      modules/dnn/test/test_ie_models.cpp
  33. 76
      modules/dnn/test/test_layers.cpp
  34. 40
      modules/dnn/test/test_misc.cpp
  35. 2
      modules/dnn/test/test_tf_importer.cpp
  36. 4
      modules/features2d/src/brisk.cpp
  37. 2
      modules/features2d/src/kaze/KAZEFeatures.cpp
  38. 1
      modules/imgcodecs/src/exif.cpp
  39. 7
      modules/imgcodecs/src/grfmt_bmp.cpp
  40. 2
      modules/imgcodecs/src/grfmt_pam.cpp
  41. 9
      modules/imgcodecs/src/grfmt_tiff.cpp
  42. 6
      modules/imgproc/src/contours.cpp
  43. 4
      modules/imgproc/src/drawing.cpp
  44. 7
      modules/imgproc/src/filter.avx2.cpp
  45. 2
      modules/imgproc/src/filter.cpp
  46. 7
      modules/imgproc/src/floodfill.cpp
  47. 4
      modules/imgproc/src/histogram.cpp
  48. 1
      modules/imgproc/src/hough.cpp
  49. 1
      modules/imgproc/src/min_enclosing_triangle.cpp
  50. 1
      modules/imgproc/src/undistort.cpp
  51. 2
      modules/ml/src/ann_mlp.cpp
  52. 84
      modules/objdetect/test/test_qrcode.cpp
  53. 3
      modules/python/src2/cv2.cpp
  54. 25
      modules/python/test/test_videoio.py
  55. 6
      modules/ts/include/opencv2/ts.hpp
  56. 18
      modules/ts/include/opencv2/ts/ts_ext.hpp
  57. 12
      modules/ts/include/opencv2/ts/ts_perf.hpp
  58. 32
      modules/ts/src/ts.cpp
  59. 1
      modules/videoio/include/opencv2/videoio.hpp
  60. 44
      modules/videoio/include/opencv2/videoio/registry.hpp
  61. 50
      modules/videoio/misc/python/pyopencv_videoio.hpp
  62. 3
      modules/videoio/src/cap_ffmpeg_impl.hpp
  63. 28
      modules/videoio/src/cap_mjpeg_encoder.cpp
  64. 15
      modules/videoio/src/cap_v4l.cpp
  65. 28
      modules/videoio/src/container_avi.cpp
  66. 56
      modules/videoio/src/videoio_registry.cpp
  67. 27
      modules/videoio/test/test_precomp.hpp
  68. 64
      modules/videoio/test/test_video_io.cpp
  69. 293
      samples/cpp/detect_mser.cpp
  70. 22
      samples/dnn/tf_text_graph_ssd.py

@ -1044,12 +1044,10 @@ void cvCreateTrainingSamples( const char* filename,
output = fopen( filename, "wb" );
if( output != NULL )
{
int hasbg;
int i;
int inverse;
hasbg = 0;
hasbg = (bgfilename != NULL && icvInitBackgroundReaders( bgfilename,
const int hasbg = (bgfilename != NULL && icvInitBackgroundReaders( bgfilename,
Size( winwidth,winheight ) ) );
Mat sample( winheight, winwidth, CV_8UC1 );

@ -740,7 +740,7 @@ macro(ocv_compiler_optimization_fill_cpu_config)
")
set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
set(__file "${OpenCV_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
if(EXISTS "${__file}")
file(READ "${__file}" __content)
endif()

@ -220,7 +220,7 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector<cv::Point2f>
CV_Assert(!corners.empty());
outsideCorners.clear();
//find two pairs of the most nearest corners
int i, j, n = (int)corners.size();
const size_t n = corners.size();
#ifdef DEBUG_CIRCLES
Mat cornersImage(1024, 1248, CV_8UC1, Scalar(0));
@ -228,22 +228,22 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector<cv::Point2f>
imshow("corners", cornersImage);
#endif
std::vector<Point2f> tangentVectors(corners.size());
for(size_t k=0; k<corners.size(); k++)
std::vector<Point2f> tangentVectors(n);
for(size_t k=0; k < n; k++)
{
Point2f diff = corners[(k + 1) % corners.size()] - corners[k];
Point2f diff = corners[(k + 1) % n] - corners[k];
tangentVectors[k] = diff * (1.0f / norm(diff));
}
//compute angles between all sides
Mat cosAngles(n, n, CV_32FC1, 0.0f);
for(i = 0; i < n; i++)
Mat cosAngles((int)n, (int)n, CV_32FC1, 0.0f);
for(size_t i = 0; i < n; i++)
{
for(j = i + 1; j < n; j++)
for(size_t j = i + 1; j < n; j++)
{
float val = fabs(tangentVectors[i].dot(tangentVectors[j]));
cosAngles.at<float>(i, j) = val;
cosAngles.at<float>(j, i) = val;
cosAngles.at<float>((int)i, (int)j) = val;
cosAngles.at<float>((int)j, (int)i) = val;
}
}
@ -272,10 +272,10 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector<cv::Point2f>
const int bigDiff = 4;
if(maxIdx - minIdx == bigDiff)
{
minIdx += n;
minIdx += (int)n;
std::swap(maxIdx, minIdx);
}
if(maxIdx - minIdx != n - bigDiff)
if(maxIdx - minIdx != (int)n - bigDiff)
{
return;
}

@ -206,7 +206,7 @@ void dls::run_kernel(const cv::Mat& pp)
void dls::build_coeff_matrix(const cv::Mat& pp, cv::Mat& Mtilde, cv::Mat& D)
{
CV_Assert(!pp.empty());
CV_Assert(!pp.empty() && N > 0);
cv::Mat eye = cv::Mat::eye(3, 3, CV_64F);
// build coeff matrix

@ -334,19 +334,19 @@ bool validateData(const ChessBoardGenerator& cbg, const Size& imgSz,
tmp = cv::norm(cur - mat(i + 1, j + 1)); // TODO cvtest
if (tmp < minNeibDist)
tmp = minNeibDist;
minNeibDist = tmp;
tmp = cv::norm(cur - mat(i - 1, j + 1)); // TODO cvtest
if (tmp < minNeibDist)
tmp = minNeibDist;
minNeibDist = tmp;
tmp = cv::norm(cur - mat(i + 1, j - 1)); // TODO cvtest
if (tmp < minNeibDist)
tmp = minNeibDist;
minNeibDist = tmp;
tmp = cv::norm(cur - mat(i - 1, j - 1)); // TODO cvtest
if (tmp < minNeibDist)
tmp = minNeibDist;
minNeibDist = tmp;
}
const double threshold = 0.25;

@ -526,13 +526,13 @@ inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1)
template<typename _Tpvec>
inline _Tpvec v256_combine_diagonal(const _Tpvec& a, const _Tpvec& b)
{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0b11110000)); }
{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0xf0)); }
inline v_float32x8 v256_combine_diagonal(const v_float32x8& a, const v_float32x8& b)
{ return v256_blend<0b11110000>(a, b); }
{ return v256_blend<0xf0>(a, b); }
inline v_float64x4 v256_combine_diagonal(const v_float64x4& a, const v_float64x4& b)
{ return v256_blend<0b1100>(a, b); }
{ return v256_blend<0xc>(a, b); }
template<typename _Tpvec>
inline _Tpvec v256_alignr_128(const _Tpvec& a, const _Tpvec& b)
@ -1609,392 +1609,592 @@ OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8)
OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4)
/** Reinterpret **/
// its up there with load and store operations
/* de&interleave */
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_2CH(_Tpvec, _Tp, suffix) \
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \
{ return v256_load_deinterleave_##suffix(ptr, a, b); } \
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \
{ return v256_store_interleave_2ch(ptr, a, b); }
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_3CH(_Tpvec, _Tp, suffix) \
inline void v_load_deinterleave \
(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c) \
{ return v256_load_deinterleave_##suffix(ptr, a, b, c); } \
inline void v_store_interleave \
(_Tp* ptr, const _Tpvec& a,const _Tpvec& b, const _Tpvec& c) \
{ return v256_store_interleave_##suffix(ptr, a, b, c); }
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_4CH(_Tpvec, _Tp, suffix) \
inline void v_load_deinterleave \
(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d) \
{ return v256_load_deinterleave_##suffix(ptr, a, b, c, d); } \
inline void v_store_interleave \
(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d) \
{ return v256_store_interleave_##suffix(ptr, a, b, c, d); }
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_3n4CH(_Tpvec, _Tp, suffix) \
OPENCV_HAL_IMPL_AVX_INTERLEAVE_3CH(_Tpvec, _Tp, suffix) \
OPENCV_HAL_IMPL_AVX_INTERLEAVE_4CH(_Tpvec, _Tp, suffix)
///////////////////// load deinterleave /////////////////////////////
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(_Tpvec, _Tp, suffix) \
OPENCV_HAL_IMPL_AVX_INTERLEAVE_2CH(_Tpvec, _Tp, suffix) \
OPENCV_HAL_IMPL_AVX_INTERLEAVE_3n4CH(_Tpvec, _Tp, suffix)
/* **** */
//
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_2ch(_Tp* ptr, const _Tpvec& a, const _Tpvec& b)
inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b )
{
_Tpvec ab0, ab1;
v_zip(a, b, ab0, ab1);
v_store(ptr, ab0);
v_store(ptr + _Tpvec::nlanes, ab1);
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
static const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
__m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
__m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
a = v_uint8x32(a0);
b = v_uint8x32(b0);
}
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l4(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b )
{
_Tpvec ab0 = v256_load(ptr);
_Tpvec ab1 = v256_load(ptr + _Tpvec::nlanes);
_Tpvec ab00, ab11;
v_recombine(ab0, ab1, ab00, ab11);
v256_zip(ab00, ab11, a, b);
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
static const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
__m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
__m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
a = v_uint16x16(a0);
b = v_uint16x16(b0);
}
///
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l4(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c)
inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b )
{
_Tpvec abc0 = v256_load(ptr);
_Tpvec abc1 = v256_load(ptr + _Tpvec::nlanes);
_Tpvec abc2 = v256_load(ptr + _Tpvec::nlanes * 2);
_Tpvec ab0 = v256_combine_diagonal(abc0, abc1);
_Tpvec bc1 = v256_combine_diagonal(abc1, abc2);
_Tpvec ac1 = v256_reverse_64(v256_combine_diagonal(abc2, abc0));
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
a = v256_unpacklo(ab0, ac1);
c = v256_unpackhi(ac1, bc1);
b = v256_alignr_64(bc1, ab0);
const int sh = 0+2*4+1*16+3*64;
__m256i p0 = _mm256_shuffle_epi32(ab0, sh);
__m256i p1 = _mm256_shuffle_epi32(ab1, sh);
__m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
__m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
a = v_uint32x8(a0);
b = v_uint32x8(b0);
}
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l4(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)
inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b )
{
_Tpvec ab0 = v256_unpacklo(a, b);
_Tpvec bc1 = v256_unpackhi(b, c);
_Tpvec ca10 = v256_swap_halves(v256_blend<0b1010>(c, a));
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
v_store(ptr, v256_combine_diagonal(ab0, ca10));
v_store(ptr + _Tpvec::nlanes, v256_combine_diagonal(bc1, ab0));
v_store(ptr + _Tpvec::nlanes * 2, v256_combine_diagonal(ca10, bc1));
__m256i pl = _mm256_permute2x128_si256(ab0, ab1, 0 + 2*16);
__m256i ph = _mm256_permute2x128_si256(ab0, ab1, 1 + 3*16);
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
a = v_uint64x4(a0);
b = v_uint64x4(b0);
}
////
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l4(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g, v_uint8x32& r )
{
_Tpvec abcd0 = v256_load(ptr);
_Tpvec abcd1 = v256_load(ptr + _Tpvec::nlanes);
_Tpvec abcd2 = v256_load(ptr + _Tpvec::nlanes * 2);
_Tpvec abcd3 = v256_load(ptr + _Tpvec::nlanes * 3);
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
_Tpvec cd0ab0 = v256_alignr_128(abcd0, abcd2);
_Tpvec cd1ab1 = v256_alignr_128(abcd1, abcd3);
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
_Tpvec ab0 = v256_combine_diagonal(abcd0, cd0ab0);
_Tpvec ab1 = v256_combine_diagonal(abcd1, cd1ab1);
_Tpvec cd0 = v256_combine_diagonal(cd0ab0, abcd2);
_Tpvec cd1 = v256_combine_diagonal(cd1ab1, abcd3);
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
static const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
-1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1);
v256_zip(ab0, ab1, a, b);
v256_zip(cd0, cd1, c, d);
}
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l4(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
{
_Tpvec ab0, ab1, cd0, cd1;
v256_zip(a, b, ab0, ab1);
v256_zip(c, d, cd0, cd1);
_Tpvec ab0cd0 = v256_alignr_128(ab0, cd0);
_Tpvec ab1cd1 = v256_alignr_128(ab1, cd1);
static const __m256i
sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13,
0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13),
sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14,
1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14),
sh_r = _mm256_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15,
2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
b0 = _mm256_shuffle_epi8(b0, sh_b);
g0 = _mm256_shuffle_epi8(g0, sh_g);
r0 = _mm256_shuffle_epi8(r0, sh_r);
v_store(ptr, v256_combine_diagonal(ab0, ab0cd0));
v_store(ptr + _Tpvec::nlanes, v256_combine_diagonal(ab1, ab1cd1));
v_store(ptr + _Tpvec::nlanes * 2, v256_combine_diagonal(ab0cd0, cd0));
v_store(ptr + _Tpvec::nlanes * 3, v256_combine_diagonal(ab1cd1, cd1));
b = v_uint8x32(b0);
g = v_uint8x32(g0);
r = v_uint8x32(r0);
}
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint64x4, uint64, l4)
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int64x4, int64, l4)
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_float64x4, double, l4)
/* **** **** */
//
inline void v256_load_deinterleave_l8(const float* ptr, v_float32x8& a, v_float32x8& b)
{
v_float32x8 ab0 = v256_load(ptr);
v_float32x8 ab1 = v256_load(ptr + 8);
inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16& g, v_uint16x16& r )
{
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
static const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
static const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13,
2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
static const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
b0 = _mm256_shuffle_epi8(b0, sh_b);
g0 = _mm256_shuffle_epi8(g0, sh_g);
r0 = _mm256_shuffle_epi8(r0, sh_r);
b = v_uint16x16(b0);
g = v_uint16x16(g0);
r = v_uint16x16(r0);
}
inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& b, v_uint32x8& g, v_uint32x8& r )
{
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
__m256i b0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_low, s02_high, 0x24), bgr1, 0x92);
__m256i g0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_high, s02_low, 0x92), bgr1, 0x24);
__m256i r0 = _mm256_blend_epi32(_mm256_blend_epi32(bgr1, s02_low, 0x24), s02_high, 0x92);
b0 = _mm256_shuffle_epi32(b0, 0x6c);
g0 = _mm256_shuffle_epi32(g0, 0xb1);
r0 = _mm256_shuffle_epi32(r0, 0xc6);
b = v_uint32x8(b0);
g = v_uint32x8(g0);
r = v_uint32x8(r0);
}
inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& b, v_uint64x4& g, v_uint64x4& r )
{
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
__m256i s01 = _mm256_blend_epi32(bgr0, bgr1, 0xf0);
__m256i s12 = _mm256_blend_epi32(bgr1, bgr2, 0xf0);
__m256i s20r = _mm256_permute4x64_epi64(_mm256_blend_epi32(bgr2, bgr0, 0xf0), 0x1b);
__m256i b0 = _mm256_unpacklo_epi64(s01, s20r);
__m256i g0 = _mm256_alignr_epi8(s12, s01, 8);
__m256i r0 = _mm256_unpackhi_epi64(s20r, s12);
b = v_uint64x4(b0);
g = v_uint64x4(g0);
r = v_uint64x4(r0);
}
inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g, v_uint8x32& r, v_uint8x32& a )
{
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96));
static const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
__m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
__m256i p2 = _mm256_shuffle_epi8(bgr2, sh);
__m256i p3 = _mm256_shuffle_epi8(bgr3, sh);
__m256i p01l = _mm256_unpacklo_epi32(p0, p1);
__m256i p01h = _mm256_unpackhi_epi32(p0, p1);
__m256i p23l = _mm256_unpacklo_epi32(p2, p3);
__m256i p23h = _mm256_unpackhi_epi32(p2, p3);
__m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
__m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
__m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
__m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
v_float32x8 ab0ab2, ab1ab3;
v_recombine(ab0, ab1, ab0ab2, ab1ab3);
__m256i b0 = _mm256_unpacklo_epi32(pll, plh);
__m256i g0 = _mm256_unpackhi_epi32(pll, plh);
__m256i r0 = _mm256_unpacklo_epi32(phl, phh);
__m256i a0 = _mm256_unpackhi_epi32(phl, phh);
a.val = _mm256_shuffle_ps(ab0ab2.val, ab1ab3.val, _MM_SHUFFLE(2, 0, 2, 0));
b.val = _mm256_shuffle_ps(ab0ab2.val, ab1ab3.val, _MM_SHUFFLE(3, 1, 3, 1));
b = v_uint8x32(b0);
g = v_uint8x32(g0);
r = v_uint8x32(r0);
a = v_uint8x32(a0);
}
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l8(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16& g, v_uint16x16& r, v_uint16x16& a )
{
v_float32x8 fa, fb;
v256_load_deinterleave_l8((float*)ptr, fa, fb);
a.val = v_reinterpret_as_u32(fa).val;
b.val = v_reinterpret_as_u32(fb).val;
}
///
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l8(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)
{
_Tpvec ab0, ab1, bc0, bc1;
v256_zip(a, b, ab0, ab1);
v256_zip(b, c, bc0, bc1);
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48));
static const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
__m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
__m256i p2 = _mm256_shuffle_epi8(bgr2, sh);
__m256i p3 = _mm256_shuffle_epi8(bgr3, sh);
_Tpvec cazg = v256_blend<0b10101010>(c, a);
_Tpvec abc0abc1(_mm256_unpacklo_epi64(ab0.val, cazg.val));
_Tpvec abc1abc2(_mm256_unpackhi_epi64(cazg.val, bc1.val));
_Tpvec abc2abc0 = v256_reverse_64(v256_blend<0b11001100>(ab1, bc0));
__m256i p01l = _mm256_unpacklo_epi32(p0, p1);
__m256i p01h = _mm256_unpackhi_epi32(p0, p1);
__m256i p23l = _mm256_unpacklo_epi32(p2, p3);
__m256i p23h = _mm256_unpackhi_epi32(p2, p3);
_Tpvec abc0 = v256_combine_diagonal(abc0abc1, abc2abc0);
_Tpvec abc1 = v256_combine_diagonal(abc1abc2, abc0abc1);
_Tpvec abc2 = v256_combine_diagonal(abc2abc0, abc1abc2);
__m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
__m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
__m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
__m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
v_store(ptr, abc0);
v_store(ptr + _Tpvec::nlanes, abc1);
v_store(ptr + _Tpvec::nlanes * 2, abc2);
__m256i b0 = _mm256_unpacklo_epi32(pll, plh);
__m256i g0 = _mm256_unpackhi_epi32(pll, plh);
__m256i r0 = _mm256_unpacklo_epi32(phl, phh);
__m256i a0 = _mm256_unpackhi_epi32(phl, phh);
b = v_uint16x16(b0);
g = v_uint16x16(g0);
r = v_uint16x16(r0);
a = v_uint16x16(a0);
}
inline void v256_store_interleave_l8(float* ptr, const v_float32x8& a, const v_float32x8& b, const v_float32x8& c)
inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& b, v_uint32x8& g, v_uint32x8& r, v_uint32x8& a )
{
v_float32x8 ab0, ab1, bc0, bc1;
v256_zip(a, b, ab0, ab1);
v256_zip(b, c, bc0, bc1);
__m256i p0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i p1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
__m256i p2 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
__m256i p3 = _mm256_loadu_si256((const __m256i*)(ptr + 24));
v_float32x8 cazg = v256_blend<0b10101010>(c, a);
v_float32x8 abc0abc1(_mm256_shuffle_ps(ab0.val, cazg.val, _MM_SHUFFLE(1, 0, 1, 0)));
v_float32x8 abc1abc2(_mm256_shuffle_ps(cazg.val, bc1.val, _MM_SHUFFLE(3, 2, 3, 2)));
__m256i p01l = _mm256_unpacklo_epi32(p0, p1);
__m256i p01h = _mm256_unpackhi_epi32(p0, p1);
__m256i p23l = _mm256_unpacklo_epi32(p2, p3);
__m256i p23h = _mm256_unpackhi_epi32(p2, p3);
v_float32x8 abc0abc2(_mm256_shuffle_ps(bc0.val, ab1.val, _MM_SHUFFLE(1, 0, 3, 2)));
v_float32x8 abc2abc0 = v256_swap_halves(abc0abc2);
__m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
__m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
__m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
__m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
v_float32x8 abc0 = v256_combine_diagonal(abc0abc1, abc2abc0);
v_float32x8 abc1 = v256_combine_diagonal(abc1abc2, abc0abc1);
v_float32x8 abc2 = v256_combine_diagonal(abc2abc0, abc1abc2);
__m256i b0 = _mm256_unpacklo_epi32(pll, plh);
__m256i g0 = _mm256_unpackhi_epi32(pll, plh);
__m256i r0 = _mm256_unpacklo_epi32(phl, phh);
__m256i a0 = _mm256_unpackhi_epi32(phl, phh);
v_store(ptr, abc0);
v_store(ptr + 8, abc1);
v_store(ptr + 16, abc2);
b = v_uint32x8(b0);
g = v_uint32x8(g0);
r = v_uint32x8(r0);
a = v_uint32x8(a0);
}
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l8(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c)
inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& b, v_uint64x4& g, v_uint64x4& r, v_uint64x4& a )
{
_Tpvec abc02 = v256_load(ptr);
_Tpvec abc1 = v256_load(ptr + _Tpvec::nlanes);
_Tpvec abc20 = v256_load(ptr + _Tpvec::nlanes * 2);
__m256i bgra0 = _mm256_loadu_si256((const __m256i*)ptr);
__m256i bgra1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
__m256i bgra2 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
__m256i bgra3 = _mm256_loadu_si256((const __m256i*)(ptr + 12));
_Tpvec abc2 = v256_alignr_128(abc02, abc20);
_Tpvec abc0 = v256_combine_diagonal(abc02, abc20);
__m256i l02 = _mm256_permute2x128_si256(bgra0, bgra2, 0 + 2*16);
__m256i h02 = _mm256_permute2x128_si256(bgra0, bgra2, 1 + 3*16);
__m256i l13 = _mm256_permute2x128_si256(bgra1, bgra3, 0 + 2*16);
__m256i h13 = _mm256_permute2x128_si256(bgra1, bgra3, 1 + 3*16);
a = v256_blend<0b10010010>(abc0, abc1);
a = v256_blend<0b01000100>(a, abc2);
__m256i b0 = _mm256_unpacklo_epi64(l02, l13);
__m256i g0 = _mm256_unpackhi_epi64(l02, l13);
__m256i r0 = _mm256_unpacklo_epi64(h02, h13);
__m256i a0 = _mm256_unpackhi_epi64(h02, h13);
b = v256_blend<0b00100100>(abc0, abc1);
b = v256_blend<0b10011001>(b, abc2);
b = v_uint64x4(b0);
g = v_uint64x4(g0);
r = v_uint64x4(r0);
a = v_uint64x4(a0);
}
c = v256_blend<0b01001001>(abc0, abc1);
c = v256_blend<0b00100010>(c, abc2);
///////////////////////////// store interleave /////////////////////////////////////
a = v256_shuffle<_MM_SHUFFLE(1, 2, 3, 0)>(a);
b = v256_shuffle<_MM_SHUFFLE(2, 3, 0, 1)>(b);
c = v256_shuffle<_MM_SHUFFLE(3, 0, 1, 2)>(c);
}
/////
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l8(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y )
{
_Tpvec ab0, ab1, cd0, cd1;
v256_load_deinterleave_l4(ptr, ab0, cd0, ab1, cd1);
v256_zip(ab0, ab1, a, b);
v256_zip(cd0, cd1, c, d);
__m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val);
__m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val);
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, xy0);
_mm256_storeu_si256((__m256i*)(ptr + 32), xy1);
}
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l8(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y )
{
_Tpvec ac0, ac1, bd0, bd1;
v256_zip(a, c, ac0, ac1);
v256_zip(b, d, bd0, bd1);
__m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val);
__m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val);
_Tpvec abcd0, abcd1, abcd2, abcd3;
v256_zip(ac0, bd0, abcd0, abcd1);
v256_zip(ac1, bd1, abcd2, abcd3);
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
_Tpvec abcd01, abcd23, abcd45, abcd67;
v_recombine(abcd0, abcd1, abcd01, abcd45);
v_recombine(abcd2, abcd3, abcd23, abcd67);
v_store(ptr, abcd01);
v_store(ptr + _Tpvec::nlanes, abcd23);
v_store(ptr + _Tpvec::nlanes * 2, abcd45);
v_store(ptr + _Tpvec::nlanes * 3, abcd67);
_mm256_storeu_si256((__m256i*)ptr, xy0);
_mm256_storeu_si256((__m256i*)(ptr + 16), xy1);
}
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint32x8, unsigned, l8)
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int32x8, int, l8)
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_float32x8, float, l8)
/* ******** ******** */
//
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l16(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y )
{
const __m256i sep = _mm256_setr_epi8(
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
);
__m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val);
__m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val);
_Tpvec ab0, ab1;
v_recombine(v256_load(ptr), v256_load(ptr + _Tpvec::nlanes), ab0, ab1);
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
__m256i a0b0 = _mm256_shuffle_epi8(ab0.val, sep);
__m256i a1b1 = _mm256_shuffle_epi8(ab1.val, sep);
a.val = _mm256_unpacklo_epi64(a0b0, a1b1);
b.val = _mm256_unpackhi_epi64(a0b0, a1b1);
_mm256_storeu_si256((__m256i*)ptr, xy0);
_mm256_storeu_si256((__m256i*)(ptr + 8), xy1);
}
///
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l16(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)
inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y )
{
v_uint32x8 ab0 = v_reinterpret_as_u32(v256_unpacklo(a, b));
v_uint32x8 ab1 = v_reinterpret_as_u32(v256_unpackhi(a, b));
v_uint32x8 bc0 = v_reinterpret_as_u32(v256_unpacklo(b, c));
v_uint32x8 bc1 = v_reinterpret_as_u32(v256_unpackhi(b, c));
__m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val);
__m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val);
v_uint32x8 cazg = v_reinterpret_as_u32(v256_blend<0b10101010>(c, a));
cazg = v256_shuffle<_MM_SHUFFLE(2, 1, 0, 3)>(cazg);
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
v_uint32x8 ac1ab1 = v256_blend<0b10101010>(ab1, bc1);
ac1ab1 = v256_shuffle<_MM_SHUFFLE(2, 1, 0, 3)>(ac1ab1);
_mm256_storeu_si256((__m256i*)ptr, xy0);
_mm256_storeu_si256((__m256i*)(ptr + 4), xy1);
}
v_uint32x8 abc001 = v256_blend<0b10101010>(ab0, cazg);
v_uint32x8 cabc0 = v256_blend<0b10101010>(cazg, bc0);
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r )
{
static const __m256i sh_b = _mm256_setr_epi8(
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5,
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
static const __m256i sh_g = _mm256_setr_epi8(
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10,
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
static const __m256i sh_r = _mm256_setr_epi8(
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15,
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
v_uint32x8 cabc1 = v256_unpacklo(cabc0, ac1ab1);
v_uint32x8 bcab0 = v256_unpackhi(cabc1, abc001);
__m256i b0 = _mm256_shuffle_epi8(b.val, sh_b);
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
v_uint64x4 abc01 = v256_unpacklo(v_reinterpret_as_u64(abc001), v_reinterpret_as_u64(bcab0));
v_uint64x4 abc21 = v256_unpackhi(v_reinterpret_as_u64(cabc0), v_reinterpret_as_u64(bcab0));
abc21 = v256_swap_halves(abc21);
v_uint64x4 abc12 = v_reinterpret_as_u64(v256_alignr_64(cabc1, ac1ab1));
static const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
static const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
v_uint64x4 abc0 = v256_combine_diagonal(abc01, abc21);
v_uint64x4 abc1 = v256_combine_diagonal(abc12, abc01);
v_uint64x4 abc2 = v256_combine_diagonal(abc21, abc12);
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
__m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
__m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
v_store(ptr, _Tpvec(abc0.val));
v_store(ptr + _Tpvec::nlanes, _Tpvec(abc1.val));
v_store(ptr + _Tpvec::nlanes * 2, _Tpvec(abc2.val));
}
// todo:
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l16(const _Tp*, _Tpvec&, _Tpvec&, _Tpvec&)
{}
////
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l16(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
{
_Tpvec ab0, ab1, cd0, cd1;
v256_load_deinterleave_l8(ptr, ab0, cd0, ab1, cd1);
v256_zip(ab0, ab1, a, b);
v256_zip(cd0, cd1, c, d);
__m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
__m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16);
__m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, bgr0);
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr1);
_mm256_storeu_si256((__m256i*)(ptr + 64), bgr2);
}
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l16(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
{ v256_store_interleave_l8(ptr, a, b, c, d); }
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, const v_uint16x16& r )
{
static const __m256i sh_b = _mm256_setr_epi8(
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
static const __m256i sh_g = _mm256_setr_epi8(
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5,
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
static const __m256i sh_r = _mm256_setr_epi8(
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint16x16, ushort, l16)
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int16x16, short, l16)
__m256i b0 = _mm256_shuffle_epi8(b.val, sh_b);
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
/* **************** **************** */
//
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l32(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
{
const __m256i sep = _mm256_setr_epi8(
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
);
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
_Tpvec ab0, ab1;
v_recombine(v256_load(ptr), v256_load(ptr + _Tpvec::nlanes), ab0, ab1);
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
__m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
__m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
__m256i a0b0 = _mm256_shuffle_epi8(ab0.val, sep);
__m256i a1b1 = _mm256_shuffle_epi8(ab1.val, sep);
__m256i bgr0 = _mm256_permute2x128_si256(p0, p2, 0 + 2*16);
//__m256i bgr1 = p1;
__m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16);
a.val = _mm256_unpacklo_epi64(a0b0, a1b1);
b.val = _mm256_unpackhi_epi64(a0b0, a1b1);
_mm256_storeu_si256((__m256i*)ptr, bgr0);
_mm256_storeu_si256((__m256i*)(ptr + 16), p1);
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr2);
}
/// todo
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l32(_Tp*, const _Tpvec&, const _Tpvec&, const _Tpvec&)
{}
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l32(const _Tp*, _Tpvec&, _Tpvec&, _Tpvec&)
{}
////
template<typename _Tp, typename _Tpvec>
inline void v256_load_deinterleave_l32(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, const v_uint32x8& r )
{
const __m256i sep = _mm256_setr_epi8(
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
);
__m256i b0 = _mm256_shuffle_epi32(b.val, 0x6c);
__m256i g0 = _mm256_shuffle_epi32(g.val, 0xb1);
__m256i r0 = _mm256_shuffle_epi32(r.val, 0xc6);
_Tpvec abcd0, abcd1, abcd2, abcd3;
v_recombine(v256_load(ptr), v256_load(ptr + _Tpvec::nlanes * 2), abcd0, abcd1);
v_recombine(v256_load(ptr + _Tpvec::nlanes), v256_load(ptr + _Tpvec::nlanes * 3), abcd2, abcd3);
__m256i p0 = _mm256_blend_epi32(_mm256_blend_epi32(b0, g0, 0x92), r0, 0x24);
__m256i p1 = _mm256_blend_epi32(_mm256_blend_epi32(g0, r0, 0x92), b0, 0x24);
__m256i p2 = _mm256_blend_epi32(_mm256_blend_epi32(r0, b0, 0x92), g0, 0x24);
__m256i ab0cd0 = _mm256_shuffle_epi8(abcd0.val, sep);
__m256i ab1cd1 = _mm256_shuffle_epi8(abcd1.val, sep);
__m256i ab2cd2 = _mm256_shuffle_epi8(abcd2.val, sep);
__m256i ab3cd3 = _mm256_shuffle_epi8(abcd3.val, sep);
__m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
//__m256i bgr1 = p2;
__m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
__m256i ab0 = _mm256_unpacklo_epi32(ab0cd0, ab1cd1);
__m256i ab1 = _mm256_unpacklo_epi32(ab2cd2, ab3cd3);
__m256i cd0 = _mm256_unpackhi_epi32(ab0cd0, ab1cd1);
__m256i cd1 = _mm256_unpackhi_epi32(ab2cd2, ab3cd3);
a.val = _mm256_unpacklo_epi64(ab0, ab1);
b.val = _mm256_unpackhi_epi64(ab0, ab1);
c.val = _mm256_unpacklo_epi64(cd0, cd1);
d.val = _mm256_unpackhi_epi64(cd0, cd1);
}
_mm256_storeu_si256((__m256i*)ptr, bgr0);
_mm256_storeu_si256((__m256i*)(ptr + 8), p2);
_mm256_storeu_si256((__m256i*)(ptr + 16), bgr2);
}
template<typename _Tp, typename _Tpvec>
inline void v256_store_interleave_l32(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
{ v256_store_interleave_l8(ptr, a, b, c, d); }
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, const v_uint64x4& r )
{
__m256i s01 = _mm256_unpacklo_epi64(b.val, g.val);
__m256i s12 = _mm256_unpackhi_epi64(g.val, r.val);
__m256i s20 = _mm256_blend_epi32(r.val, b.val, 0xcc);
__m256i bgr0 = _mm256_permute2x128_si256(s01, s20, 0 + 2*16);
__m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f);
__m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, bgr0);
_mm256_storeu_si256((__m256i*)(ptr + 4), bgr1);
_mm256_storeu_si256((__m256i*)(ptr + 8), bgr2);
}
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint8x32, uchar, l32)
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int8x32, schar, l32)
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r, const v_uint8x32& a )
{
__m256i bg0 = _mm256_unpacklo_epi8(b.val, g.val);
__m256i bg1 = _mm256_unpackhi_epi8(b.val, g.val);
__m256i ra0 = _mm256_unpacklo_epi8(r.val, a.val);
__m256i ra1 = _mm256_unpackhi_epi8(r.val, a.val);
__m256i bgra0_ = _mm256_unpacklo_epi16(bg0, ra0);
__m256i bgra1_ = _mm256_unpackhi_epi16(bg0, ra0);
__m256i bgra2_ = _mm256_unpacklo_epi16(bg1, ra1);
__m256i bgra3_ = _mm256_unpackhi_epi16(bg1, ra1);
__m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
__m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, bgra0);
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra1);
_mm256_storeu_si256((__m256i*)(ptr + 64), bgra2);
_mm256_storeu_si256((__m256i*)(ptr + 96), bgra3);
}
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g,
const v_uint16x16& r, const v_uint16x16& a )
{
__m256i bg0 = _mm256_unpacklo_epi16(b.val, g.val);
__m256i bg1 = _mm256_unpackhi_epi16(b.val, g.val);
__m256i ra0 = _mm256_unpacklo_epi16(r.val, a.val);
__m256i ra1 = _mm256_unpackhi_epi16(r.val, a.val);
__m256i bgra0_ = _mm256_unpacklo_epi32(bg0, ra0);
__m256i bgra1_ = _mm256_unpackhi_epi32(bg0, ra0);
__m256i bgra2_ = _mm256_unpacklo_epi32(bg1, ra1);
__m256i bgra3_ = _mm256_unpackhi_epi32(bg1, ra1);
__m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
__m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, bgra0);
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra1);
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra2);
_mm256_storeu_si256((__m256i*)(ptr + 48), bgra3);
}
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g,
const v_uint32x8& r, const v_uint32x8& a )
{
__m256i bg0 = _mm256_unpacklo_epi32(b.val, g.val);
__m256i bg1 = _mm256_unpackhi_epi32(b.val, g.val);
__m256i ra0 = _mm256_unpacklo_epi32(r.val, a.val);
__m256i ra1 = _mm256_unpackhi_epi32(r.val, a.val);
__m256i bgra0_ = _mm256_unpacklo_epi64(bg0, ra0);
__m256i bgra1_ = _mm256_unpackhi_epi64(bg0, ra0);
__m256i bgra2_ = _mm256_unpacklo_epi64(bg1, ra1);
__m256i bgra3_ = _mm256_unpackhi_epi64(bg1, ra1);
__m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
__m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, bgra0);
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra1);
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra2);
_mm256_storeu_si256((__m256i*)(ptr + 24), bgra3);
}
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g,
const v_uint64x4& r, const v_uint64x4& a )
{
__m256i bg0 = _mm256_unpacklo_epi64(b.val, g.val);
__m256i bg1 = _mm256_unpackhi_epi64(b.val, g.val);
__m256i ra0 = _mm256_unpacklo_epi64(r.val, a.val);
__m256i ra1 = _mm256_unpackhi_epi64(r.val, a.val);
__m256i bgra0 = _mm256_permute2x128_si256(bg0, ra0, 0 + 2*16);
__m256i bgra1 = _mm256_permute2x128_si256(bg1, ra1, 0 + 2*16);
__m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16);
__m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16);
_mm256_storeu_si256((__m256i*)ptr, bgra0);
_mm256_storeu_si256((__m256i*)(ptr + 4), bgra1);
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra2);
_mm256_storeu_si256((__m256i*)(ptr + 12), bgra3);
}
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
{ \
_Tpvec1 a1, b1; \
v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
a0 = v_reinterpret_as_##suffix0(a1); \
b0 = v_reinterpret_as_##suffix0(b1); \
} \
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
{ \
_Tpvec1 a1, b1, c1; \
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
a0 = v_reinterpret_as_##suffix0(a1); \
b0 = v_reinterpret_as_##suffix0(b1); \
c0 = v_reinterpret_as_##suffix0(c1); \
} \
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
{ \
_Tpvec1 a1, b1, c1, d1; \
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
a0 = v_reinterpret_as_##suffix0(a1); \
b0 = v_reinterpret_as_##suffix0(b1); \
c0 = v_reinterpret_as_##suffix0(c1); \
d0 = v_reinterpret_as_##suffix0(d1); \
} \
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \
{ \
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
v_store_interleave((_Tp1*)ptr, a1, b1); \
} \
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \
{ \
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
v_store_interleave((_Tp1*)ptr, a1, b1, c1); \
} \
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
const _Tpvec0& c0, const _Tpvec0& d0 ) \
{ \
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \
}
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8)
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int16x16, short, s16, v_uint16x16, ushort, u16)
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int32x8, int, s32, v_uint32x8, unsigned, u32)
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, unsigned, u32)
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
inline void v256_cleanup() { _mm256_zeroupper(); }

@ -1318,6 +1318,80 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
vst4q_##suffix(ptr, v); \
}
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \
{ \
tp##x1_t a0 = vld1_##suffix(ptr); \
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
tp##x1_t a1 = vld1_##suffix(ptr + 2); \
tp##x1_t b1 = vld1_##suffix(ptr + 3); \
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
} \
\
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \
v_##tp##x2& b, v_##tp##x2& c ) \
{ \
tp##x1_t a0 = vld1_##suffix(ptr); \
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
tp##x1_t c0 = vld1_##suffix(ptr + 2); \
tp##x1_t a1 = vld1_##suffix(ptr + 3); \
tp##x1_t b1 = vld1_##suffix(ptr + 4); \
tp##x1_t c1 = vld1_##suffix(ptr + 5); \
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
} \
\
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \
v_##tp##x2& c, v_##tp##x2& d ) \
{ \
tp##x1_t a0 = vld1_##suffix(ptr); \
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
tp##x1_t c0 = vld1_##suffix(ptr + 2); \
tp##x1_t d0 = vld1_##suffix(ptr + 3); \
tp##x1_t a1 = vld1_##suffix(ptr + 4); \
tp##x1_t b1 = vld1_##suffix(ptr + 5); \
tp##x1_t c1 = vld1_##suffix(ptr + 6); \
tp##x1_t d1 = vld1_##suffix(ptr + 7); \
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
} \
\
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b ) \
{ \
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \
vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \
} \
\
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
const v_##tp##x2& b, const v_##tp##x2& c ) \
{ \
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \
vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \
vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \
} \
\
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
const v_##tp##x2& c, const v_##tp##x2& d ) \
{ \
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \
vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \
vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \
vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \
vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \
}
OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
@ -1329,6 +1403,9 @@ OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
#endif
OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64)
OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64)
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
{
return v_float32x4(vcvtq_f32_s32(a.val));

@ -58,17 +58,6 @@ namespace cv
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
struct v_uint8x16;
struct v_int8x16;
struct v_uint16x8;
struct v_int16x8;
struct v_uint32x4;
struct v_int32x4;
struct v_float32x4;
struct v_uint64x2;
struct v_int64x2;
struct v_float64x2;
struct v_uint8x16
{
typedef uchar lane_type;
@ -1660,7 +1649,7 @@ OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_N
OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
// adopted from sse_utils.hpp
// load deinterleave
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
{
__m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
@ -1681,7 +1670,25 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
{
#if CV_SSSE3
#if CV_SSE4_1
static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
__m128i s0 = _mm_loadu_si128((const __m128i*)ptr);
__m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
__m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
__m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1);
__m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1);
__m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1);
static const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
static const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
static const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
a0 = _mm_shuffle_epi8(a0, sh_b);
b0 = _mm_shuffle_epi8(b0, sh_g);
c0 = _mm_shuffle_epi8(c0, sh_r);
a.val = a0;
b.val = b0;
c.val = c0;
#elif CV_SSSE3
static const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
static const __m128i m1 = _mm_alignr_epi8(m0, m0, 11);
static const __m128i m2 = _mm_alignr_epi8(m0, m0, 6);
@ -1753,8 +1760,41 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
d.val = _mm_unpackhi_epi8(v2, v3);
}
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
{
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
}
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
{
#if CV_SSE4_1
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr));
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8));
__m128i v2 = _mm_loadu_si128((__m128i*)(ptr + 16));
__m128i a0 = _mm_blend_epi16(_mm_blend_epi16(v0, v1, 0x92), v2, 0x24);
__m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24);
__m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24);
static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
static const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
a0 = _mm_shuffle_epi8(a0, sh_a);
b0 = _mm_shuffle_epi8(b0, sh_b);
c0 = _mm_shuffle_epi8(c0, sh_c);
a.val = a0;
b.val = b0;
c.val = c0;
#else
__m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
__m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8));
__m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16));
@ -1770,6 +1810,7 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21));
b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22);
c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22));
#endif
}
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
@ -1795,6 +1836,18 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
d.val = _mm_unpackhi_epi16(u2, u3);
}
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b)
{
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 4)); // a2 b2 a3 b3
__m128i v2 = _mm_unpacklo_epi32(v0, v1); // a0 a2 b0 b2
__m128i v3 = _mm_unpackhi_epi32(v0, v1); // a1 a3 b1 b3
a.val = _mm_unpacklo_epi32(v2, v3); // a0 a1 a2 a3
b.val = _mm_unpackhi_epi32(v2, v3); // b0 b1 ab b3
}
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
{
__m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
@ -1812,12 +1865,23 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
{
v_uint32x4 u0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0
v_uint32x4 u1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
v_uint32x4 u2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
v_uint32x4 u3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
v_uint32x4 s0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0
v_uint32x4 s1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
v_uint32x4 s2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
v_uint32x4 s3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
v_transpose4x4(s0, s1, s2, s3, a, b, c, d);
}
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
{
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
__m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1
__m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
}
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
@ -1853,77 +1917,43 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
d.val = _mm_unpackhi_ps(t02hi, t13hi);
}
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b)
{
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
}
inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
{
v_uint64x2 t0, t1, t2;
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
a = v_reinterpret_as_s64(t0);
b = v_reinterpret_as_s64(t1);
c = v_reinterpret_as_s64(t2);
}
inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
{
v_uint64x2 t0, t1, t2;
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
a = v_reinterpret_as_f64(t0);
b = v_reinterpret_as_f64(t1);
c = v_reinterpret_as_f64(t2);
a = v_uint64x2(_mm_unpacklo_epi64(t0, t1));
b = v_uint64x2(_mm_unpackhi_epi64(t0, t1));
}
// 2-channel
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
{
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0, b0
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0, a1
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // b1, c1
__m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1
__m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
t1 = _mm_shuffle_epi32(t1, 0x4e); // a1, c0
a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
a = v_uint64x2(_mm_unpacklo_epi64(t0, t1));
b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
c = v_uint64x2(_mm_unpackhi_epi64(t1, t2));
}
inline void v_load_deinterleave(const short* ptr, v_int16x8& a, v_int16x8& b)
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
v_uint64x2& b, v_uint64x2& c, v_uint64x2& d)
{
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0 d0
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // a1 b1
__m128i t3 = _mm_loadu_si128((const __m128i*)(ptr + 6)); // c1 d1
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
a = v_uint64x2(_mm_unpacklo_epi64(t0, t2));
b = v_uint64x2(_mm_unpackhi_epi64(t0, t2));
c = v_uint64x2(_mm_unpacklo_epi64(t1, t3));
d = v_uint64x2(_mm_unpackhi_epi64(t1, t3));
}
inline void v_load_deinterleave(const ushort*ptr, v_uint16x8& a, v_uint16x8& b)
{
v_int16x8 sa, sb;
v_load_deinterleave((const short*)ptr, sa, sb);
a = v_reinterpret_as_u16(sa);
b = v_reinterpret_as_u16(sb);
}
inline void v_store_interleave(short* ptr, const v_int16x8& a, const v_int16x8& b)
{
__m128i t0, t1;
t0 = _mm_unpacklo_epi16(a.val, b.val);
t1 = _mm_unpackhi_epi16(a.val, b.val);
_mm_storeu_si128((__m128i*)(ptr), t0);
_mm_storeu_si128((__m128i*)(ptr + 8), t1);
}
// store interleave
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b)
{
@ -1937,7 +1967,24 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
const v_uint8x16& c )
{
#if CV_SSSE3
#if CV_SSE4_1
static const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
static const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
static const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
__m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0);
__m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0);
__m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0);
_mm_storeu_si128((__m128i*)(ptr), v0);
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
_mm_storeu_si128((__m128i*)(ptr + 32), v2);
#elif CV_SSSE3
static const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5);
static const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10);
static const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15);
@ -2025,10 +2072,35 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
_mm_storeu_si128((__m128i*)(ptr + 48), v3);
}
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b )
{
__m128i t0, t1;
t0 = _mm_unpacklo_epi16(a.val, b.val);
t1 = _mm_unpackhi_epi16(a.val, b.val);
_mm_storeu_si128((__m128i*)(ptr), t0);
_mm_storeu_si128((__m128i*)(ptr + 8), t1);
}
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
const v_uint16x8& b,
const v_uint16x8& c )
{
#if CV_SSE4_1
static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
static const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
__m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24);
__m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24);
__m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24);
_mm_storeu_si128((__m128i*)ptr, v0);
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
#else
__m128i z = _mm_setzero_si128();
__m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
__m128i ab1 = _mm_unpackhi_epi16(a.val, b.val);
@ -2060,6 +2132,7 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
_mm_storeu_si128((__m128i*)(ptr), v0);
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
#endif
}
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
@ -2085,6 +2158,15 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16
_mm_storeu_si128((__m128i*)(ptr + 24), v3);
}
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b )
{
__m128i t0 = _mm_unpacklo_epi32(a.val, b.val);
__m128i t1 = _mm_unpackhi_epi32(a.val, b.val);
_mm_storeu_si128((__m128i*)ptr, t0);
_mm_storeu_si128((__m128i*)(ptr + 4), t1);
}
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
const v_uint32x4& c )
{
@ -2158,6 +2240,15 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
_mm_storeu_ps(ptr + 12, v3);
}
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b)
{
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
__m128i t1 = _mm_unpackhi_epi64(a.val, b.val);
_mm_storeu_si128((__m128i*)ptr, t0);
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
}
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
{
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
@ -2169,58 +2260,72 @@ inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
}
inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, const v_uint64x2& d)
{
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
}
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
__m128i t1 = _mm_unpacklo_epi64(c.val, d.val);
__m128i t2 = _mm_unpackhi_epi64(a.val, b.val);
__m128i t3 = _mm_unpackhi_epi64(c.val, d.val);
inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
{
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
_mm_storeu_si128((__m128i*)ptr, t0);
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
_mm_storeu_si128((__m128i*)(ptr + 6), t3);
}
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
_Tpvec& b0, _Tpvec& c0 ) \
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
{ \
_Tpvec1 a1, b1; \
v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
a0 = v_reinterpret_as_##suffix0(a1); \
b0 = v_reinterpret_as_##suffix0(b1); \
} \
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
{ \
_Tpvec1 a1, b1, c1; \
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
a0 = v_reinterpret_as_##suffix0(a1); \
b0 = v_reinterpret_as_##suffix0(b1); \
c0 = v_reinterpret_as_##suffix0(c1); \
} \
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
{ \
_Tpuvec a1, b1, c1; \
v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1); \
a0 = v_reinterpret_as_##suffix(a1); \
b0 = v_reinterpret_as_##suffix(b1); \
c0 = v_reinterpret_as_##suffix(c1); \
_Tpvec1 a1, b1, c1, d1; \
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
a0 = v_reinterpret_as_##suffix0(a1); \
b0 = v_reinterpret_as_##suffix0(b1); \
c0 = v_reinterpret_as_##suffix0(c1); \
d0 = v_reinterpret_as_##suffix0(d1); \
} \
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
_Tpvec& b0, _Tpvec& c0, _Tpvec& d0 ) \
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \
{ \
_Tpuvec a1, b1, c1, d1; \
v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1, d1); \
a0 = v_reinterpret_as_##suffix(a1); \
b0 = v_reinterpret_as_##suffix(b1); \
c0 = v_reinterpret_as_##suffix(c1); \
d0 = v_reinterpret_as_##suffix(d1); \
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
v_store_interleave((_Tp1*)ptr, a1, b1); \
} \
inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, \
const _Tpvec& b0, const _Tpvec& c0 ) \
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \
{ \
_Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
_Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
_Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
v_store_interleave((_Tpu*)ptr, a1, b1, c1); \
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
v_store_interleave((_Tp1*)ptr, a1, b1, c1); \
} \
inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \
const _Tpvec& c0, const _Tpvec& d0 ) \
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
const _Tpvec0& c0, const _Tpvec0& d0 ) \
{ \
_Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
_Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
_Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
_Tpuvec d1 = v_reinterpret_as_##usuffix(d0); \
v_store_interleave((_Tpu*)ptr, a1, b1, c1, d1); \
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \
}
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32)
//OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64)
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
{

@ -298,6 +298,8 @@ OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2)
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2)
/* Expand */
#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \

@ -871,6 +871,13 @@ public:
*/
TermCriteria(int type, int maxCount, double epsilon);
inline bool isValid() const
{
const bool isCount = (type & COUNT) && maxCount > 0;
const bool isEps = (type & EPS) && !cvIsNaN(epsilon);
return isCount || isEps;
}
int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS
int maxCount; //!< the maximum number of iterations/elements
double epsilon; //!< the desired accuracy

@ -629,7 +629,6 @@ CV_INLINE int cvIplDepth( int type )
#define CV_TYPE_NAME_MATND "opencv-nd-matrix"
#define CV_MAX_DIM 32
#define CV_MAX_DIM_HEAP 1024
/**
@deprecated consider using cv::Mat instead

@ -1725,8 +1725,8 @@ cvPtr1D( const CvArr* arr, int idx, int* _type )
else
{
int i, n = m->dims;
CV_DbgAssert( n <= CV_MAX_DIM_HEAP );
int _idx[CV_MAX_DIM_HEAP];
CV_DbgAssert( n <= CV_MAX_DIM );
int _idx[CV_MAX_DIM];
for( i = n - 1; i >= 0; i-- )
{

@ -8,223 +8,49 @@
namespace cv { namespace hal {
#if CV_NEON
template<typename T> struct VMerge2;
template<typename T> struct VMerge3;
template<typename T> struct VMerge4;
#define MERGE2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src0, const data_type* src1, \
data_type* dst){ \
reg_type r; \
r.val[0] = load_func(src0); \
r.val[1] = load_func(src1); \
store_func(dst, r); \
} \
}
#if CV_SIMD
template<typename T, typename VecT> static void
vecmerge_( const T** src, T* dst, int len, int cn )
{
int i;
const T* src0 = src[0];
const T* src1 = src[1];
#define MERGE3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src0, const data_type* src1, \
const data_type* src2, data_type* dst){ \
reg_type r; \
r.val[0] = load_func(src0); \
r.val[1] = load_func(src1); \
r.val[2] = load_func(src2); \
store_func(dst, r); \
} \
const int VECSZ = VecT::nlanes;
if( cn == 2 )
{
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
v_store_interleave(dst + i*cn, a, b);
}
}
#define MERGE4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src0, const data_type* src1, \
const data_type* src2, const data_type* src3, \
data_type* dst){ \
reg_type r; \
r.val[0] = load_func(src0); \
r.val[1] = load_func(src1); \
r.val[2] = load_func(src2); \
r.val[3] = load_func(src3); \
store_func(dst, r); \
} \
else if( cn == 3 )
{
const T* src2 = src[2];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
VecT a = vx_load(src0 + i), b = vx_load(src1 + i), c = vx_load(src2 + i);
v_store_interleave(dst + i*cn, a, b, c);
}
}
MERGE2_KERNEL_TEMPLATE(VMerge2, uchar , uint8x16x2_t, vld1q_u8 , vst2q_u8 );
MERGE2_KERNEL_TEMPLATE(VMerge2, ushort, uint16x8x2_t, vld1q_u16, vst2q_u16);
MERGE2_KERNEL_TEMPLATE(VMerge2, int , int32x4x2_t, vld1q_s32, vst2q_s32);
MERGE2_KERNEL_TEMPLATE(VMerge2, int64 , int64x1x2_t, vld1_s64 , vst2_s64 );
MERGE3_KERNEL_TEMPLATE(VMerge3, uchar , uint8x16x3_t, vld1q_u8 , vst3q_u8 );
MERGE3_KERNEL_TEMPLATE(VMerge3, ushort, uint16x8x3_t, vld1q_u16, vst3q_u16);
MERGE3_KERNEL_TEMPLATE(VMerge3, int , int32x4x3_t, vld1q_s32, vst3q_s32);
MERGE3_KERNEL_TEMPLATE(VMerge3, int64 , int64x1x3_t, vld1_s64 , vst3_s64 );
MERGE4_KERNEL_TEMPLATE(VMerge4, uchar , uint8x16x4_t, vld1q_u8 , vst4q_u8 );
MERGE4_KERNEL_TEMPLATE(VMerge4, ushort, uint16x8x4_t, vld1q_u16, vst4q_u16);
MERGE4_KERNEL_TEMPLATE(VMerge4, int , int32x4x4_t, vld1q_s32, vst4q_s32);
MERGE4_KERNEL_TEMPLATE(VMerge4, int64 , int64x1x4_t, vld1_s64 , vst4_s64 );
#elif CV_SSE2
template <typename T>
struct VMerge2
{
VMerge2() : support(false) { }
void operator()(const T *, const T *, T *) const { }
bool support;
};
template <typename T>
struct VMerge3
{
VMerge3() : support(false) { }
void operator()(const T *, const T *, const T *, T *) const { }
bool support;
};
template <typename T>
struct VMerge4
{
VMerge4() : support(false) { }
void operator()(const T *, const T *, const T *, const T *, T *) const { }
bool support;
};
#define MERGE2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \
template <> \
struct VMerge2<data_type> \
{ \
enum \
{ \
ELEMS_IN_VEC = 16 / sizeof(data_type) \
}; \
\
VMerge2() \
{ \
support = checkHardwareSupport(se); \
} \
\
void operator()(const data_type * src0, const data_type * src1, \
data_type * dst) const \
{ \
reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \
reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \
reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \
reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \
\
_mm_interleave(v_src0, v_src1, v_src2, v_src3); \
\
_mm_storeu_##flavor((cast_type *)(dst), v_src0); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \
} \
\
bool support; \
}
#define MERGE3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \
template <> \
struct VMerge3<data_type> \
{ \
enum \
{ \
ELEMS_IN_VEC = 16 / sizeof(data_type) \
}; \
\
VMerge3() \
{ \
support = checkHardwareSupport(se); \
} \
\
void operator()(const data_type * src0, const data_type * src1, const data_type * src2,\
data_type * dst) const \
{ \
reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \
reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \
reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \
reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \
reg_type v_src4 = _mm_loadu_##flavor((const cast_type *)(src2)); \
reg_type v_src5 = _mm_loadu_##flavor((const cast_type *)(src2 + ELEMS_IN_VEC)); \
\
_mm_interleave(v_src0, v_src1, v_src2, \
v_src3, v_src4, v_src5); \
\
_mm_storeu_##flavor((cast_type *)(dst), v_src0); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 4), v_src4); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 5), v_src5); \
} \
\
bool support; \
}
#define MERGE4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \
template <> \
struct VMerge4<data_type> \
{ \
enum \
{ \
ELEMS_IN_VEC = 16 / sizeof(data_type) \
}; \
\
VMerge4() \
{ \
support = checkHardwareSupport(se); \
} \
\
void operator()(const data_type * src0, const data_type * src1, \
const data_type * src2, const data_type * src3, \
data_type * dst) const \
{ \
reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \
reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \
reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \
reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \
reg_type v_src4 = _mm_loadu_##flavor((const cast_type *)(src2)); \
reg_type v_src5 = _mm_loadu_##flavor((const cast_type *)(src2 + ELEMS_IN_VEC)); \
reg_type v_src6 = _mm_loadu_##flavor((const cast_type *)(src3)); \
reg_type v_src7 = _mm_loadu_##flavor((const cast_type *)(src3 + ELEMS_IN_VEC)); \
\
_mm_interleave(v_src0, v_src1, v_src2, v_src3, \
v_src4, v_src5, v_src6, v_src7); \
\
_mm_storeu_##flavor((cast_type *)(dst), v_src0); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 4), v_src4); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 5), v_src5); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 6), v_src6); \
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 7), v_src7); \
} \
\
bool support; \
else
{
CV_Assert( cn == 4 );
const T* src2 = src[2];
const T* src3 = src[3];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
VecT c = vx_load(src2 + i), d = vx_load(src3 + i);
v_store_interleave(dst + i*cn, a, b, c, d);
}
}
vx_cleanup();
}
MERGE2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2);
MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2);
MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2);
#if CV_SSE4_1
MERGE2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1);
MERGE3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1);
MERGE4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1);
#endif
MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2);
MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2);
MERGE4_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2);
#endif
template<typename T> static void
@ -242,28 +68,6 @@ merge_( const T** src, T* dst, int len, int cn )
{
const T *src0 = src[0], *src1 = src[1];
i = j = 0;
#if CV_NEON
if(cn == 2)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 2 * inc_i;
VMerge2<T> vmerge;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, dst + j);
}
#elif CV_SSE2
if(cn == 2)
{
int inc_i = 32/sizeof(T);
int inc_j = 2 * inc_i;
VMerge2<T> vmerge;
if (vmerge.support)
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, dst + j);
}
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
@ -274,28 +78,6 @@ merge_( const T** src, T* dst, int len, int cn )
{
const T *src0 = src[0], *src1 = src[1], *src2 = src[2];
i = j = 0;
#if CV_NEON
if(cn == 3)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 3 * inc_i;
VMerge3<T> vmerge;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, src2 + i, dst + j);
}
#elif CV_SSE2
if(cn == 3)
{
int inc_i = 32/sizeof(T);
int inc_j = 3 * inc_i;
VMerge3<T> vmerge;
if (vmerge.support)
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, src2 + i, dst + j);
}
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
@ -307,28 +89,6 @@ merge_( const T** src, T* dst, int len, int cn )
{
const T *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
i = j = 0;
#if CV_NEON
if(cn == 4)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 4 * inc_i;
VMerge4<T> vmerge;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j);
}
#elif CV_SSE2
if(cn == 4)
{
int inc_i = 32/sizeof(T);
int inc_j = 4 * inc_i;
VMerge4<T> vmerge;
if (vmerge.support)
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j);
}
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
@ -347,29 +107,48 @@ merge_( const T** src, T* dst, int len, int cn )
}
}
void merge8u(const uchar** src, uchar* dst, int len, int cn )
{
CALL_HAL(merge8u, cv_hal_merge8u, src, dst, len, cn)
merge_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 )
vecmerge_<uchar, v_uint8>(src, dst, len, cn);
else
#endif
merge_(src, dst, len, cn);
}
void merge16u(const ushort** src, ushort* dst, int len, int cn )
{
CALL_HAL(merge16u, cv_hal_merge16u, src, dst, len, cn)
merge_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 )
vecmerge_<ushort, v_uint16>(src, dst, len, cn);
else
#endif
merge_(src, dst, len, cn);
}
void merge32s(const int** src, int* dst, int len, int cn )
{
CALL_HAL(merge32s, cv_hal_merge32s, src, dst, len, cn)
merge_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_int32::nlanes && 2 <= cn && cn <= 4 )
vecmerge_<int, v_int32>(src, dst, len, cn);
else
#endif
merge_(src, dst, len, cn);
}
void merge64s(const int64** src, int64* dst, int len, int cn )
{
CALL_HAL(merge64s, cv_hal_merge64s, src, dst, len, cn)
merge_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 )
vecmerge_<int64, v_int64>(src, dst, len, cn);
else
#endif
merge_(src, dst, len, cn);
}
}} // cv::hal::

@ -123,7 +123,6 @@ static char* icvJSONParseKey( CvFileStorage* fs, char* ptr, CvFileNode* map, CvF
CV_PARSE_ERROR( "Key must start with \'\"\'" );
char * beg = ptr + 1;
char * end = beg;
do {
++ptr;
@ -133,7 +132,7 @@ static char* icvJSONParseKey( CvFileStorage* fs, char* ptr, CvFileNode* map, CvF
if( *ptr != '"' )
CV_PARSE_ERROR( "Key must end with \'\"\'" );
end = ptr;
const char * end = ptr;
ptr++;
ptr = icvJSONSkipSpaces( fs, ptr );
if ( ptr == 0 || fs->dummy_eof )
@ -576,12 +575,12 @@ void icvJSONParse( CvFileStorage* fs )
if ( *ptr == '{' )
{
CvFileNode* root_node = (CvFileNode*)cvSeqPush( fs->roots, 0 );
ptr = icvJSONParseMap( fs, ptr, root_node );
icvJSONParseMap( fs, ptr, root_node );
}
else if ( *ptr == '[' )
{
CvFileNode* root_node = (CvFileNode*)cvSeqPush( fs->roots, 0 );
ptr = icvJSONParseSeq( fs, ptr, root_node );
icvJSONParseSeq( fs, ptr, root_node );
}
else
{
@ -668,7 +667,7 @@ void icvJSONWrite( CvFileStorage* fs, const char* key, const char* data )
*ptr++ = '\n';
*ptr++ = '\0';
::icvPuts( fs, fs->buffer_start );
ptr = fs->buffer = fs->buffer_start;
fs->buffer = fs->buffer_start;
}
ptr = icvFSFlush(fs);
}

@ -302,7 +302,7 @@ static void* icvReadSparseMat( CvFileStorage* fs, CvFileNode* node )
CvFileNode* sizes_node;
CvSeqReader reader;
CvSeq* elements;
int sizes[CV_MAX_DIM_HEAP], dims, elem_type, cn;
int sizes[CV_MAX_DIM], dims, elem_type, cn;
int i;
sizes_node = cvGetFileNodeByName( fs, node, "sizes" );
@ -327,7 +327,7 @@ static void* icvReadSparseMat( CvFileStorage* fs, CvFileNode* node )
mat = cvCreateSparseMat( dims, sizes, elem_type );
cn = CV_MAT_CN(elem_type);
int idx[CV_MAX_DIM_HEAP];
int idx[CV_MAX_DIM];
elements = data->data.seq;
cvStartReadRawData( fs, data, &reader );

@ -8,222 +8,57 @@
namespace cv { namespace hal {
#if CV_NEON
template<typename T> struct VSplit2;
template<typename T> struct VSplit3;
template<typename T> struct VSplit4;
#define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type> \
{ \
void operator()(const data_type* src, data_type* dst0, \
data_type* dst1) const \
{ \
reg_type r = load_func(src); \
store_func(dst0, r.val[0]); \
store_func(dst1, r.val[1]); \
} \
}
#if CV_SIMD
template<typename T, typename VecT> static void
vecsplit_( const T* src, T** dst, int len, int cn )
{
int i;
T* dst0 = dst[0];
T* dst1 = dst[1];
#define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type> \
{ \
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
data_type* dst2) const \
{ \
reg_type r = load_func(src); \
store_func(dst0, r.val[0]); \
store_func(dst1, r.val[1]); \
store_func(dst2, r.val[2]); \
} \
const int VECSZ = VecT::nlanes;
if( cn == 2 )
{
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
VecT a, b;
v_load_deinterleave(src + i*cn, a, b);
v_store(dst0 + i, a);
v_store(dst1 + i, b);
}
}
#define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type> \
{ \
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
data_type* dst2, data_type* dst3) const \
{ \
reg_type r = load_func(src); \
store_func(dst0, r.val[0]); \
store_func(dst1, r.val[1]); \
store_func(dst2, r.val[2]); \
store_func(dst3, r.val[3]); \
} \
else if( cn == 3 )
{
T* dst2 = dst[2];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
VecT a, b, c;
v_load_deinterleave(src + i*cn, a, b, c);
v_store(dst0 + i, a);
v_store(dst1 + i, b);
v_store(dst2 + i, c);
}
}
SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar , uint8x16x2_t, vld2q_u8 , vst1q_u8 );
SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort, uint16x8x2_t, vld2q_u16, vst1q_u16);
SPLIT2_KERNEL_TEMPLATE(VSplit2, int , int32x4x2_t, vld2q_s32, vst1q_s32);
SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 , int64x1x2_t, vld2_s64 , vst1_s64 );
SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar , uint8x16x3_t, vld3q_u8 , vst1q_u8 );
SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort, uint16x8x3_t, vld3q_u16, vst1q_u16);
SPLIT3_KERNEL_TEMPLATE(VSplit3, int , int32x4x3_t, vld3q_s32, vst1q_s32);
SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 , int64x1x3_t, vld3_s64 , vst1_s64 );
SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar , uint8x16x4_t, vld4q_u8 , vst1q_u8 );
SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort, uint16x8x4_t, vld4q_u16, vst1q_u16);
SPLIT4_KERNEL_TEMPLATE(VSplit4, int , int32x4x4_t, vld4q_s32, vst1q_s32);
SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 , int64x1x4_t, vld4_s64 , vst1_s64 );
#elif CV_SSE2
template <typename T>
struct VSplit2
{
VSplit2() : support(false) { }
void operator()(const T *, T *, T *) const { }
bool support;
};
template <typename T>
struct VSplit3
{
VSplit3() : support(false) { }
void operator()(const T *, T *, T *, T *) const { }
bool support;
};
template <typename T>
struct VSplit4
{
VSplit4() : support(false) { }
void operator()(const T *, T *, T *, T *, T *) const { }
bool support;
};
#define SPLIT2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \
template <> \
struct VSplit2<data_type> \
{ \
enum \
{ \
ELEMS_IN_VEC = 16 / sizeof(data_type) \
}; \
\
VSplit2() \
{ \
support = checkHardwareSupport(CV_CPU_SSE2); \
} \
\
void operator()(const data_type * src, \
data_type * dst0, data_type * dst1) const \
{ \
reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \
reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \
reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
\
_mm_deinterleave(v_src0, v_src1, v_src2, v_src3); \
\
_mm_storeu_##flavor((cast_type *)(dst0), v_src0); \
_mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \
_mm_storeu_##flavor((cast_type *)(dst1), v_src2); \
_mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \
} \
\
bool support; \
}
#define SPLIT3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \
template <> \
struct VSplit3<data_type> \
{ \
enum \
{ \
ELEMS_IN_VEC = 16 / sizeof(data_type) \
}; \
\
VSplit3() \
{ \
support = checkHardwareSupport(CV_CPU_SSE2); \
} \
\
void operator()(const data_type * src, \
data_type * dst0, data_type * dst1, data_type * dst2) const \
{ \
reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \
reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \
reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
\
_mm_deinterleave(v_src0, v_src1, v_src2, \
v_src3, v_src4, v_src5); \
\
_mm_storeu_##flavor((cast_type *)(dst0), v_src0); \
_mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \
_mm_storeu_##flavor((cast_type *)(dst1), v_src2); \
_mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \
_mm_storeu_##flavor((cast_type *)(dst2), v_src4); \
_mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5); \
} \
\
bool support; \
}
#define SPLIT4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \
template <> \
struct VSplit4<data_type> \
{ \
enum \
{ \
ELEMS_IN_VEC = 16 / sizeof(data_type) \
}; \
\
VSplit4() \
{ \
support = checkHardwareSupport(CV_CPU_SSE2); \
} \
\
void operator()(const data_type * src, data_type * dst0, data_type * dst1, \
data_type * dst2, data_type * dst3) const \
{ \
reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \
reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \
reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
reg_type v_src6 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 6)); \
reg_type v_src7 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 7)); \
\
_mm_deinterleave(v_src0, v_src1, v_src2, v_src3, \
v_src4, v_src5, v_src6, v_src7); \
\
_mm_storeu_##flavor((cast_type *)(dst0), v_src0); \
_mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \
_mm_storeu_##flavor((cast_type *)(dst1), v_src2); \
_mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \
_mm_storeu_##flavor((cast_type *)(dst2), v_src4); \
_mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5); \
_mm_storeu_##flavor((cast_type *)(dst3), v_src6); \
_mm_storeu_##flavor((cast_type *)(dst3 + ELEMS_IN_VEC), v_src7); \
} \
\
bool support; \
else
{
CV_Assert( cn == 4 );
T* dst2 = dst[2];
T* dst3 = dst[3];
for( i = 0; i < len; i += VECSZ )
{
i = std::min( len - VECSZ, i );
VecT a, b, c, d;
v_load_deinterleave(src + i*cn, a, b, c, d);
v_store(dst0 + i, a);
v_store(dst1 + i, b);
v_store(dst2 + i, c);
v_store(dst3 + i, d);
}
}
vx_cleanup();
}
SPLIT2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
SPLIT2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
SPLIT2_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
SPLIT3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
SPLIT3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
SPLIT3_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
SPLIT4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
SPLIT4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
SPLIT4_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
#endif
template<typename T> static void
@ -250,30 +85,6 @@ split_( const T* src, T** dst, int len, int cn )
T *dst0 = dst[0], *dst1 = dst[1];
i = j = 0;
#if CV_NEON
if(cn == 2)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 2 * inc_i;
VSplit2<T> vsplit;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i);
}
#elif CV_SSE2
if (cn == 2)
{
int inc_i = 32/sizeof(T);
int inc_j = 2 * inc_i;
VSplit2<T> vsplit;
if (vsplit.support)
{
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i);
}
}
#endif
for( ; i < len; i++, j += cn )
{
dst0[i] = src[j];
@ -285,31 +96,6 @@ split_( const T* src, T** dst, int len, int cn )
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
i = j = 0;
#if CV_NEON
if(cn == 3)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 3 * inc_i;
VSplit3<T> vsplit;
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
}
#elif CV_SSE2
if (cn == 3)
{
int inc_i = 32/sizeof(T);
int inc_j = 3 * inc_i;
VSplit3<T> vsplit;
if (vsplit.support)
{
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
}
}
#endif
for( ; i < len; i++, j += cn )
{
dst0[i] = src[j];
@ -322,30 +108,6 @@ split_( const T* src, T** dst, int len, int cn )
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
i = j = 0;
#if CV_NEON
if(cn == 4)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 4 * inc_i;
VSplit4<T> vsplit;
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
}
#elif CV_SSE2
if (cn == 4)
{
int inc_i = 32/sizeof(T);
int inc_j = 4 * inc_i;
VSplit4<T> vsplit;
if (vsplit.support)
{
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
}
}
#endif
for( ; i < len; i++, j += cn )
{
dst0[i] = src[j]; dst1[i] = src[j+1];
@ -367,25 +129,46 @@ split_( const T* src, T** dst, int len, int cn )
void split8u(const uchar* src, uchar** dst, int len, int cn )
{
CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn)
split_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 )
vecsplit_<uchar, v_uint8>(src, dst, len, cn);
else
#endif
split_(src, dst, len, cn);
}
void split16u(const ushort* src, ushort** dst, int len, int cn )
{
CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn)
split_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 )
vecsplit_<ushort, v_uint16>(src, dst, len, cn);
else
#endif
split_(src, dst, len, cn);
}
void split32s(const int* src, int** dst, int len, int cn )
{
CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn)
split_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_uint32::nlanes && 2 <= cn && cn <= 4 )
vecsplit_<int, v_int32>(src, dst, len, cn);
else
#endif
split_(src, dst, len, cn);
}
void split64s(const int64* src, int64** dst, int len, int cn )
{
CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn)
split_(src, dst, len, cn);
#if CV_SIMD
if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 )
vecsplit_<int64, v_int64>(src, dst, len, cn);
else
#endif
split_(src, dst, len, cn);
}
}} // cv::hal::

@ -1014,8 +1014,8 @@ protected:
Size mSize(rng.uniform(minMSize, maxMSize), rng.uniform(minMSize, maxMSize));
size_t mvSize = rng.uniform(1, maxMvSize);
int res = cvtest::TS::OK, curRes = res;
curRes = run_case(CV_8U, mvSize, mSize, rng);
int res = cvtest::TS::OK;
int curRes = run_case(CV_8U, mvSize, mSize, rng);
res = curRes != cvtest::TS::OK ? curRes : res;
curRes = run_case(CV_8S, mvSize, mSize, rng);

@ -173,7 +173,7 @@ void Core_RandTest::run( int )
dsz = slice+1 < maxSlice ? (int)(cvtest::randInt(rng) % (SZ - sz) + 1) : SZ - sz;
Mat aslice = arr[k].colRange(sz, sz + dsz);
tested_rng.fill(aslice, dist_type, A, B);
printf("%d - %d\n", sz, sz + dsz);
//printf("%d - %d\n", sz, sz + dsz);
}
}
@ -375,9 +375,11 @@ TEST(Core_Rand, Regression_Stack_Corruption)
int bufsz = 128; //enough for 14 doubles
AutoBuffer<uchar> buffer(bufsz);
size_t offset = 0;
cv::Mat_<cv::Point2d> x(2, 3, (cv::Point2d*)(buffer.data()+offset)); offset += x.total()*x.elemSize();
double& param1 = *(double*)(buffer.data()+offset); offset += sizeof(double);
double& param2 = *(double*)(buffer.data()+offset); offset += sizeof(double);
cv::Mat_<cv::Point2d> x(2, 3, (cv::Point2d*)(buffer.data()+offset));
offset += x.total()*x.elemSize();
double& param1 = *(double*)(buffer.data()+offset);
offset += sizeof(double);
double& param2 = *(double*)(buffer.data()+offset);
param1 = -9; param2 = 2;
cv::theRNG().fill(x, cv::RNG::NORMAL, param1, param2);

@ -120,3 +120,9 @@ if(BUILD_PERF_TESTS)
endif()
endif()
endif()
# Test Intel's Inference Engine models
if(HAVE_INF_ENGINE AND TARGET opencv_test_dnn)
ocv_target_include_directories(opencv_test_dnn PRIVATE ${INF_ENGINE_INCLUDE_DIRS})
ocv_target_link_libraries(opencv_test_dnn LINK_PRIVATE ${INF_ENGINE_LIBRARIES})
endif()

@ -46,9 +46,9 @@
#include <opencv2/core.hpp>
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v5 {
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v6 {
#define CV__DNN_EXPERIMENTAL_NS_END }
namespace cv { namespace dnn { namespace experimental_dnn_v5 { } using namespace experimental_dnn_v5; }}
namespace cv { namespace dnn { namespace experimental_dnn_v6 { } using namespace experimental_dnn_v6; }}
#else
#define CV__DNN_EXPERIMENTAL_NS_BEGIN
#define CV__DNN_EXPERIMENTAL_NS_END
@ -487,14 +487,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/
CV_WRAP void setPreferableTarget(int targetId);
/** @brief Sets the new value for the layer output blob
* @param name descriptor of the updating layer output blob.
* @param blob new blob.
/** @brief Sets the new input value for the network
* @param blob A new blob. Should have CV_32F or CV_8U depth.
* @param name A name of input layer.
* @param scalefactor An optional normalization scale.
* @param mean An optional mean subtraction values.
* @see connect(String, String) to know format of the descriptor.
* @note If updating blob is not empty then @p blob must have the same shape,
* because network reshaping is not implemented yet.
*
* If scale or mean values are specified, a final input blob is computed
* as:
* \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f]
*/
CV_WRAP void setInput(InputArray blob, const String& name = "");
CV_WRAP void setInput(InputArray blob, const String& name = "",
double scalefactor = 1.0, const Scalar& mean = Scalar());
/** @brief Sets the new value for the learned param of the layer.
* @param layer name or id of the layer.
@ -805,13 +810,15 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* @param swapRB flag which indicates that swap first and last channels
* in 3-channel image is necessary.
* @param crop flag which indicates whether image will be cropped after resize or not
* @param ddepth Depth of output blob. Choose CV_32F or CV_8U.
* @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
* @returns 4-dimensional Mat with NCHW dimensions order.
*/
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
int ddepth=CV_32F);
/** @brief Creates 4-dimensional blob from image.
* @details This is an overloaded member function, provided for convenience.
@ -819,7 +826,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/
CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0,
const Size& size = Size(), const Scalar& mean = Scalar(),
bool swapRB=true, bool crop=true);
bool swapRB=true, bool crop=true, int ddepth=CV_32F);
/** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
@ -833,13 +840,15 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* @param swapRB flag which indicates that swap first and last channels
* in 3-channel image is necessary.
* @param crop flag which indicates whether image will be cropped after resize or not
* @param ddepth Depth of output blob. Choose CV_32F or CV_8U.
* @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
* @returns 4-dimansional Mat with NCHW dimensions order.
* @returns 4-dimensional Mat with NCHW dimensions order.
*/
CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
int ddepth=CV_32F);
/** @brief Creates 4-dimensional blob from series of images.
* @details This is an overloaded member function, provided for convenience.
@ -847,7 +856,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/
CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob,
double scalefactor=1.0, Size size = Size(),
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
int ddepth=CV_32F);
/** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
* (std::vector<cv::Mat>).

@ -97,35 +97,42 @@ namespace
}
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
const Scalar& mean, bool swapRB, bool crop)
const Scalar& mean, bool swapRB, bool crop, int ddepth)
{
CV_TRACE_FUNCTION();
Mat blob;
blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop);
blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
return blob;
}
void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
const Size& size, const Scalar& mean, bool swapRB, bool crop)
const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
{
CV_TRACE_FUNCTION();
std::vector<Mat> images(1, image.getMat());
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop);
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
}
Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
const Scalar& mean, bool swapRB, bool crop)
const Scalar& mean, bool swapRB, bool crop, int ddepth)
{
CV_TRACE_FUNCTION();
Mat blob;
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop);
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
return blob;
}
void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
Size size, const Scalar& mean_, bool swapRB, bool crop)
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
{
CV_TRACE_FUNCTION();
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
if (ddepth == CV_8U)
{
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
CV_Assert(mean_ == Scalar(), "Mean subtraction is not supported for CV_8U blob depth");
}
std::vector<Mat> images;
images_.getMatVector(images);
CV_Assert(!images.empty());
@ -149,7 +156,7 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
else
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
}
if(images[i].depth() == CV_8U)
if(images[i].depth() == CV_8U && ddepth == CV_32F)
images[i].convertTo(images[i], CV_32F);
Scalar mean = mean_;
if (swapRB)
@ -167,20 +174,20 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
if (nch == 3 || nch == 4)
{
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
blob_.create(4, sz, CV_32F);
blob_.create(4, sz, ddepth);
Mat blob = blob_.getMat();
Mat ch[4];
for( i = 0; i < nimages; i++ )
{
image = images[i];
CV_Assert(image.depth() == CV_32F);
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
CV_Assert(image.size() == image0.size());
for( int j = 0; j < nch; j++ )
ch[j] = Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j));
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
if(swapRB)
std::swap(ch[0], ch[2]);
split(image, ch);
@ -190,18 +197,18 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
{
CV_Assert(nch == 1);
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
blob_.create(4, sz, CV_32F);
blob_.create(4, sz, ddepth);
Mat blob = blob_.getMat();
for( i = 0; i < nimages; i++ )
{
Mat image = images[i];
CV_Assert(image.depth() == CV_32F);
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 1));
CV_Assert(image.size() == image0.size());
image.copyTo(Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, 0)));
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
}
}
}
@ -408,7 +415,16 @@ struct LayerData
//fake layer containing network input blobs
struct DataLayer : public Layer
{
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) CV_OVERRIDE {}
DataLayer() : Layer()
{
skip = false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1;
}
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
{
@ -423,11 +439,36 @@ struct DataLayer : public Layer
void forward(std::vector<Mat*>&, std::vector<Mat>& outputs, std::vector<Mat> &) CV_OVERRIDE
{
// Supported modes:
// | Input type | Output type |
// | fp32 | fp32 |
// | uint8 | fp32 |
for (int i = 0; i < inputsData.size(); ++i)
{
if (inputsData[i].type() == CV_32F && outputs[i].type() == CV_16S)
double scale = scaleFactors[i];
Scalar& mean = means[i];
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4,
outputs[i].type() == CV_32F);
bool singleMean = true;
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
{
convertFp16(inputsData[i], outputs[i]);
singleMean = mean[j] == mean[j - 1];
}
if (singleMean)
{
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
}
else
{
for (int n = 0; n < inputsData[i].size[0]; ++n)
for (int c = 0; c < inputsData[i].size[1]; ++c)
{
Mat inp = getPlane(inputsData[i], n, c);
Mat out = getPlane(outputs[i], n, c);
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
}
}
}
}
@ -435,13 +476,66 @@ struct DataLayer : public Layer
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
if (outputs_.depth() == CV_16S)
// Supported modes:
// | Input type | Output type |
// | fp32 | fp32 |
// | fp32 | fp16 |
// | uint8 | fp32 |
std::vector<UMat> outputs;
outputs_.getUMatVector(outputs);
for (int i = 0; i < inputsData.size(); ++i)
{
std::vector<UMat> outputs;
outputs_.getUMatVector(outputs);
for (int i = 0; i < inputsData.size(); ++i)
double scale = scaleFactors[i];
Scalar& mean = means[i];
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
bool singleMean = true;
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
{
convertFp16(inputsData[i], outputs[i]);
singleMean = mean[j] == mean[j - 1];
}
if (outputs_.depth() == CV_16S)
{
if (singleMean)
convertFp16(scale * (inputsData[i] - mean[0]), outputs[i]);
else
{
for (int n = 0; n < inputsData[i].size[0]; ++n)
for (int c = 0; c < inputsData[i].size[1]; ++c)
{
Mat inp = getPlane(inputsData[i], n, c);
std::vector<cv::Range> plane(4, Range::all());
plane[0] = Range(n, n + 1);
plane[1] = Range(c, c + 1);
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
convertFp16(scale * (inp - mean[c]), out);
}
}
}
else
{
CV_Assert(outputs_.depth() == CV_32F);
if (singleMean)
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
else
{
for (int n = 0; n < inputsData[i].size[0]; ++n)
for (int c = 0; c < inputsData[i].size[1]; ++c)
{
Mat inp = getPlane(inputsData[i], n, c);
std::vector<cv::Range> plane(4, Range::all());
plane[0] = Range(n, n + 1);
plane[1] = Range(c, c + 1);
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
}
}
}
}
return true;
@ -469,8 +563,61 @@ struct DataLayer : public Layer
return false;
}
void finalize(const std::vector<Mat*>&, std::vector<Mat>& outputs) CV_OVERRIDE
{
CV_Assert(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
inputsData.size() == outputs.size());
skip = true;
for (int i = 0; skip && i < inputsData.size(); ++i)
{
if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
skip = false;
}
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "ScaleShift";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
CV_Assert(inputsData.size() == 1, inputsData[0].dims == 4);
const size_t numChannels = inputsData[0].size[1];
CV_Assert(numChannels <= 4);
// Scale
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
{numChannels});
weights->allocate();
weights->set(std::vector<float>(numChannels, scaleFactors[0]));
ieLayer->_weights = weights;
// Mean subtraction
auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
{numChannels});
biases->allocate();
std::vector<float> biasesVec(numChannels);
for (int i = 0; i < numChannels; ++i)
{
biasesVec[i] = -means[0][i] * scaleFactors[0];
}
biases->set(biasesVec);
ieLayer->_biases = biases;
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
std::vector<String> outNames;
// Preprocessing parameters for each network's input.
std::vector<double> scaleFactors;
std::vector<Scalar> means;
std::vector<Mat> inputsData;
bool skip;
};
struct BlobManager
@ -739,7 +886,7 @@ struct Net::Impl
netInputLayer = Ptr<DataLayer>(new DataLayer());
LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
inpl.id = 0;
inpl.name = "_input";
netInputLayer->name = inpl.name = "_input";
inpl.type = "__NetInputLayer__";
inpl.layerInstance = netInputLayer;
layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
@ -930,6 +1077,11 @@ struct Net::Impl
clear();
allocateLayers(blobsToKeep_);
MapIdToLayerData::iterator it = layers.find(0);
CV_Assert(it != layers.end());
it->second.skip = netInputLayer->skip;
initBackend();
if (!netWasAllocated )
@ -1179,6 +1331,29 @@ struct Net::Impl
MapIdToLayerData::iterator it;
Ptr<InfEngineBackendNet> net;
for (it = layers.begin(); it != layers.end(); ++it)
{
LayerData &ld = it->second;
if (ld.id == 0)
{
CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
(netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
{
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
}
}
else
{
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
{
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
dataPtr->name = ld.name;
}
}
}
if (skipInfEngineInit)
{
Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
@ -1190,11 +1365,21 @@ struct Net::Impl
for (it = layers.begin(); it != layers.end(); ++it)
{
LayerData &ld = it->second;
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
if (ld.id == 0)
{
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
dataPtr->name = ld.id == 0 ? netInputLayer->outNames[i] : ld.name;
for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
{
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
dataPtr->name = netInputLayer->outNames[i];
}
}
else
{
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
{
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
dataPtr->name = ld.name;
}
}
ieNode->net->addBlobs(ld.inputBlobsWrappers);
ieNode->net->addBlobs(ld.outputBlobsWrappers);
@ -1210,11 +1395,11 @@ struct Net::Impl
// some of layers is not implemented.
// Set of all input and output blobs wrappers for current network.
std::map<int, Ptr<BackendWrapper> > netBlobsWrappers;
std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
for (it = layers.begin(); it != layers.end(); ++it)
{
LayerData &ld = it->second;
if (ld.id == 0)
if (ld.id == 0 && ld.skip)
continue;
bool fused = ld.skip;
@ -1251,20 +1436,17 @@ struct Net::Impl
// So we need to rewrap all the external blobs.
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
{
int lid = ld.inputBlobsId[i].lid;
LayerData &inpLd = layers[lid];
auto it = netBlobsWrappers.find(lid);
LayerPin inPin = ld.inputBlobsId[i];
auto it = netBlobsWrappers.find(inPin);
if (it == netBlobsWrappers.end())
{
ld.inputBlobsWrappers[i] = wrap(*ld.inputBlobs[i]);
auto dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
dataPtr->name = inpLd.name;
netBlobsWrappers[lid] = ld.inputBlobsWrappers[i];
ld.inputBlobsWrappers[i] = InfEngineBackendWrapper::create(ld.inputBlobsWrappers[i]);
netBlobsWrappers[inPin] = ld.inputBlobsWrappers[i];
}
else
ld.inputBlobsWrappers[i] = it->second;
}
netBlobsWrappers[ld.id] = ld.outputBlobsWrappers[0];
netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
Ptr<BackendNode> node;
if (!net.empty())
@ -2343,7 +2525,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames)
impl->netInputLayer->setNames(inputBlobNames);
}
void Net::setInput(InputArray blob, const String& name)
void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
@ -2360,6 +2542,8 @@ void Net::setInput(InputArray blob, const String& name)
ld.outputBlobs.resize(numInputs);
ld.outputBlobsWrappers.resize(numInputs);
impl->netInputLayer->inputsData.resize(numInputs);
impl->netInputLayer->scaleFactors.resize(numInputs);
impl->netInputLayer->means.resize(numInputs);
MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
Mat blob_ = blob.getMat();
@ -2378,6 +2562,8 @@ void Net::setInput(InputArray blob, const String& name)
{
ld.outputBlobsWrappers[pin.oid]->setHostDirty();
}
impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
impl->netInputLayer->means[pin.oid] = mean;
impl->netWasAllocated = impl->netWasAllocated && oldShape;
}

@ -560,7 +560,7 @@ public:
int ngroups = ngroups_, batchSize = input_->size[0]*ngroups;
int outW = output_->size[3], outH = output_->size[2], outCn = output_->size[1]/ngroups;
int width = input_->size[3], height = input_->size[2], inpCn = input_->size[1]/ngroups;
int nstripes = nstripes_;
const int nstripes = nstripes_;
int kernel_w = kernel_.width, kernel_h = kernel_.height;
int pad_w = pad_.width, pad_h = pad_.height;
int stride_w = stride_.width, stride_h = stride_.height;
@ -587,7 +587,6 @@ public:
int samplesPerStripe = std::max((batchSize + nstripes - 1)/nstripes, 1);
r.start *= samplesPerStripe;
r.end *= samplesPerStripe;
nstripes *= samplesPerStripe;
stripeSize = outPlaneSize;
}
@ -866,6 +865,16 @@ public:
for (int i = 0; i < inputs.size(); ++i)
CV_Assert(inputs[i].u != outputs[0].u);
if (umat_blobs.empty())
{
size_t n = blobs.size();
umat_blobs.resize(n);
for (size_t i = 0; i < n; i++)
{
blobs[i].copyTo(umat_blobs[i]);
}
}
if (convolutionOp.empty())
{
OCL4DNNConvConfig config;
@ -1637,14 +1646,6 @@ public:
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(const LayerParams &params)
{
Ptr<ConvolutionLayerImpl> l(new ConvolutionLayerImpl(params));
#ifdef HAVE_OPENCL
size_t n = params.blobs.size();
l->umat_blobs.resize(n);
for (int i = 0; i < n; i++)
l->umat_blobs[i] = params.blobs[i].getUMat(ACCESS_READ);
#endif
return l;
}

@ -187,7 +187,7 @@ public:
int c, j, k, n = nsrcs;
const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0;
float* dstptr0 = dst->ptr<float>();
int blockSize0 = 1 << 12, blockSize = blockSize0;
int blockSize0 = 1 << 12, blockSize;
for( size_t ofs = stripeStart; ofs < stripeEnd; ofs += blockSize )
{

@ -190,6 +190,7 @@ public:
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
CV_Assert(num * numPlanes != 0);
size_t planeSize = inp0.total() / (num * numPlanes);
for (size_t n = 0; n < num; ++n)
{

@ -189,18 +189,16 @@ public:
else
outTailShape_.assign(1, _numOut);
int _numTimeStamps, _numSamples;
int _numSamples;
if (useTimestampDim)
{
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
_numTimeStamps = inp0[0];
_numSamples = inp0[1];
outResShape.push_back(_numTimeStamps);
outResShape.push_back(inp0[0]);
}
else
{
CV_Assert(inp0.size() >= 2 && total(inp0, 1) == _numInp);
_numTimeStamps = 1;
_numSamples = inp0[0];
}

@ -14,7 +14,7 @@ namespace cv { namespace dnn {
class ResizeLayerImpl : public ResizeLayer
{
public:
ResizeLayerImpl(const LayerParams& params) : scaleWidth(0), scaleHeight(0)
ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(0), zoomFactorHeight(0), scaleWidth(0), scaleHeight(0)
{
setParamsFrom(params);
outWidth = params.get<float>("width", 0);

@ -563,10 +563,10 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
}
if (use_half_ && bias_half.empty() && !bias.empty())
convertFp16((UMat&)bias, bias_half);
convertFp16(bias, bias_half);
if (use_half_ && weights_half.empty())
convertFp16((UMat&)weight, weights_half);
convertFp16(weight, weights_half);
prepareKernel(bottom, top, weight, (use_half_) ? bias_half : bias, numImages);
if (bestKernelConfig.empty())

@ -68,19 +68,32 @@ static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std:
{
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
std::reverse(reversedShape.begin(), reversedShape.end());
return InferenceEngine::DataPtr(
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, estimateLayout(m))
);
if (m.type() == CV_32F)
return InferenceEngine::DataPtr(
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, estimateLayout(m))
);
else if (m.type() == CV_8U)
return InferenceEngine::DataPtr(
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::U8, estimateLayout(m))
);
else
CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type()));
}
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
InferenceEngine::Layout layout)
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
InferenceEngine::Layout layout)
{
return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
layout, shape, (float*)m.data);
if (m.type() == CV_32F)
return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
layout, shape, (float*)m.data);
else if (m.type() == CV_8U)
return InferenceEngine::make_shared_blob<uint8_t>(InferenceEngine::Precision::U8,
layout, shape, (uint8_t*)m.data);
else
CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type()));
}
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
{
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
std::reverse(reversedShape.begin(), reversedShape.end());
@ -102,6 +115,24 @@ InfEngineBackendWrapper::InfEngineBackendWrapper(int targetId, const cv::Mat& m)
blob = wrapToInfEngineBlob(m, estimateLayout(m));
}
InfEngineBackendWrapper::InfEngineBackendWrapper(Ptr<BackendWrapper> wrapper)
: BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE, wrapper->targetId)
{
Ptr<InfEngineBackendWrapper> ieWrapper = wrapper.dynamicCast<InfEngineBackendWrapper>();
CV_Assert(!ieWrapper.empty());
InferenceEngine::DataPtr srcData = ieWrapper->dataPtr;
dataPtr = InferenceEngine::DataPtr(
new InferenceEngine::Data(srcData->name, srcData->dims, srcData->precision,
srcData->layout)
);
blob = ieWrapper->blob;
}
Ptr<BackendWrapper> InfEngineBackendWrapper::create(Ptr<BackendWrapper> wrapper)
{
return Ptr<BackendWrapper>(new InfEngineBackendWrapper(wrapper));
}
InfEngineBackendWrapper::~InfEngineBackendWrapper()
{
@ -149,10 +180,15 @@ InferenceEngine::Precision InfEngineBackendNet::getPrecision() noexcept
return precision;
}
InferenceEngine::Precision InfEngineBackendNet::getPrecision() const noexcept
{
return precision;
}
// Assume that outputs of network is unconnected blobs.
void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &outputs_) noexcept
{
outputs_ = outputs;
const_cast<const InfEngineBackendNet*>(this)->getOutputsInfo(outputs_);
}
void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &outputs_) const noexcept
{
@ -162,7 +198,7 @@ void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &output
// Returns input references that aren't connected to internal outputs.
void InfEngineBackendNet::getInputsInfo(InferenceEngine::InputsDataMap &inputs_) noexcept
{
inputs_ = inputs;
const_cast<const InfEngineBackendNet*>(this)->getInputsInfo(inputs_);
}
// Returns input references that aren't connected to internal outputs.
@ -173,7 +209,11 @@ void InfEngineBackendNet::getInputsInfo(InferenceEngine::InputsDataMap &inputs_)
InferenceEngine::InputInfo::Ptr InfEngineBackendNet::getInput(const std::string &inputName) noexcept
{
getInputsInfo(inputs);
return const_cast<const InfEngineBackendNet*>(this)->getInput(inputName);
}
InferenceEngine::InputInfo::Ptr InfEngineBackendNet::getInput(const std::string &inputName) const noexcept
{
const auto& it = inputs.find(inputName);
CV_Assert(it != inputs.end());
return it->second;
@ -187,7 +227,17 @@ void InfEngineBackendNet::getName(char*, size_t) const noexcept
{
}
const std::string& InfEngineBackendNet::getName() const noexcept
{
return name;
}
size_t InfEngineBackendNet::layerCount() noexcept
{
return const_cast<const InfEngineBackendNet*>(this)->layerCount();
}
size_t InfEngineBackendNet::layerCount() const noexcept
{
return layers.size();
}
@ -227,6 +277,13 @@ InfEngineBackendNet::addOutput(const std::string &layerName, size_t outputIndex,
InferenceEngine::StatusCode
InfEngineBackendNet::getLayerByName(const char *layerName, InferenceEngine::CNNLayerPtr &out,
InferenceEngine::ResponseDesc *resp) noexcept
{
return const_cast<const InfEngineBackendNet*>(this)->getLayerByName(layerName, out, resp);
}
InferenceEngine::StatusCode InfEngineBackendNet::getLayerByName(const char *layerName,
InferenceEngine::CNNLayerPtr &out,
InferenceEngine::ResponseDesc *resp) const noexcept
{
for (auto& l : layers)
{
@ -254,7 +311,12 @@ InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() noexcept
return targetDevice;
}
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t size) noexcept
InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() const noexcept
{
return targetDevice;
}
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t) noexcept
{
CV_Error(Error::StsNotImplemented, "");
return InferenceEngine::StatusCode::OK;
@ -329,6 +391,7 @@ void InfEngineBackendNet::init(int targetId)
{
CV_Assert(allBlobs.find(it.first) != allBlobs.end());
inpBlobs[it.first] = allBlobs[it.first];
it.second->setPrecision(inpBlobs[it.first]->precision());
}
// Set up output blobs.
@ -342,7 +405,9 @@ void InfEngineBackendNet::init(int targetId)
switch (targetId)
{
case DNN_TARGET_CPU: setTargetDevice(InferenceEngine::TargetDevice::eCPU); break;
case DNN_TARGET_OPENCL_FP16: setPrecision(InferenceEngine::Precision::FP16); // Fallback to the next.
case DNN_TARGET_OPENCL_FP16:
setPrecision(InferenceEngine::Precision::FP16);
/* Falls through. */
case DNN_TARGET_OPENCL: setTargetDevice(InferenceEngine::TargetDevice::eGPU); break;
case DNN_TARGET_MYRIAD:
{
@ -363,9 +428,8 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
try
{
static std::map<std::string, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
std::string deviceName = InferenceEngine::getDeviceName(targetDevice);
auto pluginIt = sharedPlugins.find(deviceName);
static std::map<InferenceEngine::TargetDevice, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
auto pluginIt = sharedPlugins.find(targetDevice);
if (pluginIt != sharedPlugins.end())
{
enginePtr = pluginIt->second;
@ -373,7 +437,7 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
else
{
enginePtr = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice);
sharedPlugins[deviceName] = enginePtr;
sharedPlugins[targetDevice] = enginePtr;
if (targetDevice == InferenceEngine::TargetDevice::eCPU)
{
@ -427,7 +491,7 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
auto wrappers = infEngineWrappers(ptrs);
for (const auto& wrapper : wrappers)
{
allBlobs[wrapper->dataPtr->name] = wrapper->blob;
allBlobs.insert({wrapper->dataPtr->name, wrapper->blob});
}
}

@ -8,6 +8,8 @@
#ifndef __OPENCV_DNN_OP_INF_ENGINE_HPP__
#define __OPENCV_DNN_OP_INF_ENGINE_HPP__
#include "opencv2/core/cvdef.h"
#ifdef HAVE_INF_ENGINE
#if defined(__GNUC__) && __GNUC__ >= 5
//#pragma GCC diagnostic push
@ -34,7 +36,9 @@ public:
void setPrecision(InferenceEngine::Precision p) noexcept;
virtual InferenceEngine::Precision getPrecision() noexcept CV_OVERRIDE;
virtual InferenceEngine::Precision getPrecision() noexcept;
virtual InferenceEngine::Precision getPrecision() const noexcept;
virtual void getOutputsInfo(InferenceEngine::OutputsDataMap &out) noexcept /*CV_OVERRIDE*/;
@ -44,13 +48,19 @@ public:
virtual void getInputsInfo(InferenceEngine::InputsDataMap &inputs) const noexcept /*CV_OVERRIDE*/;
virtual InferenceEngine::InputInfo::Ptr getInput(const std::string &inputName) noexcept CV_OVERRIDE;
virtual InferenceEngine::InputInfo::Ptr getInput(const std::string &inputName) noexcept;
virtual InferenceEngine::InputInfo::Ptr getInput(const std::string &inputName) const noexcept;
virtual void getName(char *pName, size_t len) noexcept;
virtual void getName(char *pName, size_t len) const noexcept;
virtual size_t layerCount() noexcept CV_OVERRIDE;
virtual const std::string& getName() const noexcept;
virtual size_t layerCount() noexcept;
virtual size_t layerCount() const noexcept;
virtual InferenceEngine::DataPtr& getData(const char *dname) noexcept CV_OVERRIDE;
@ -58,15 +68,21 @@ public:
virtual InferenceEngine::StatusCode addOutput(const std::string &layerName,
size_t outputIndex = 0,
InferenceEngine::ResponseDesc *resp = nullptr) noexcept CV_OVERRIDE;
InferenceEngine::ResponseDesc *resp = nullptr) noexcept;
virtual InferenceEngine::StatusCode getLayerByName(const char *layerName,
InferenceEngine::CNNLayerPtr &out,
InferenceEngine::ResponseDesc *resp) noexcept;
virtual InferenceEngine::StatusCode getLayerByName(const char *layerName,
InferenceEngine::CNNLayerPtr &out,
InferenceEngine::ResponseDesc *resp) noexcept CV_OVERRIDE;
InferenceEngine::ResponseDesc *resp) const noexcept;
virtual void setTargetDevice(InferenceEngine::TargetDevice device) noexcept CV_OVERRIDE;
virtual InferenceEngine::TargetDevice getTargetDevice() noexcept CV_OVERRIDE;
virtual InferenceEngine::TargetDevice getTargetDevice() noexcept;
virtual InferenceEngine::TargetDevice getTargetDevice() const noexcept;
virtual InferenceEngine::StatusCode setBatchSize(const size_t size) noexcept CV_OVERRIDE;
@ -94,6 +110,8 @@ private:
InferenceEngine::ExecutableNetwork netExec;
InferenceEngine::InferRequest infRequest;
std::string name;
void initPlugin(InferenceEngine::ICNNNetwork& net);
};
@ -115,19 +133,23 @@ class InfEngineBackendWrapper : public BackendWrapper
public:
InfEngineBackendWrapper(int targetId, const Mat& m);
InfEngineBackendWrapper(Ptr<BackendWrapper> wrapper);
~InfEngineBackendWrapper();
static Ptr<BackendWrapper> create(Ptr<BackendWrapper> wrapper);
virtual void copyToHost() CV_OVERRIDE;
virtual void setHostDirty() CV_OVERRIDE;
InferenceEngine::DataPtr dataPtr;
InferenceEngine::TBlob<float>::Ptr blob;
InferenceEngine::Blob::Ptr blob;
};
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr);

@ -771,6 +771,13 @@ void TFImporter::populateNet(Net dstNet)
type = layer.op();
}
// For the object detection networks, TensorFlow Object Detection API
// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
// order. We can manage it at DetectionOutput layer parsing predictions
// or shuffle last convolution's weights.
bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
getLayerAttr(layer, "loc_pred_transposed").b();
layerParams.set("bias_term", false);
layerParams.blobs.resize(1);
@ -784,18 +791,32 @@ void TFImporter::populateNet(Net dstNet)
blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
ExcludeLayer(net, weights_layer_index, 0, false);
layers_to_ignore.insert(next_layers[0].first);
// Shuffle bias from yxYX to xyXY.
if (locPredTransposed)
{
const int numWeights = layerParams.blobs[1].total();
float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
CV_Assert(numWeights % 4 == 0);
for (int i = 0; i < numWeights; i += 2)
{
std::swap(biasData[i], biasData[i + 1]);
}
}
}
const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id);
kernelFromTensor(kernelTensor, layerParams.blobs[0]);
releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
int* kshape = layerParams.blobs[0].size.p;
const int outCh = kshape[0];
const int inCh = kshape[1];
const int height = kshape[2];
const int width = kshape[3];
if (type == "DepthwiseConv2dNative")
{
CV_Assert(!locPredTransposed);
const int chMultiplier = kshape[0];
const int inCh = kshape[1];
const int height = kshape[2];
const int width = kshape[3];
Mat copy = layerParams.blobs[0].clone();
float* src = (float*)copy.data;
@ -814,9 +835,21 @@ void TFImporter::populateNet(Net dstNet)
size_t* kstep = layerParams.blobs[0].step.p;
kstep[0] = kstep[1]; // fix steps too
}
layerParams.set("kernel_h", kshape[2]);
layerParams.set("kernel_w", kshape[3]);
layerParams.set("num_output", kshape[0]);
layerParams.set("kernel_h", height);
layerParams.set("kernel_w", width);
layerParams.set("num_output", outCh);
// Shuffle output channels from yxYX to xyXY.
if (locPredTransposed)
{
const int slice = height * width * inCh;
for (int i = 0; i < outCh; i += 2)
{
cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));
cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));
std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
}
}
setStrides(layerParams, layer);
setPadding(layerParams, layer);

@ -107,12 +107,10 @@ TEST_P(Convolution, Accuracy)
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
throw SkipTestException("");
// TODO: unstable test cases
if (backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
inChannels == 6 && outChannels == 9 && group == 1 && inSize == Size(5, 6) &&
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1) && dilation == Size(1, 1) &&
hasBias)
throw SkipTestException("");
if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
(targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1))
throw SkipTestException("Skip unstable test");
int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
Mat weights(4, &sz[0], CV_32F);

@ -0,0 +1,238 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "test_precomp.hpp"
#ifdef HAVE_INF_ENGINE
#include <opencv2/core/utils/filesystem.hpp>
#include <inference_engine.hpp>
#include <ie_icnn_network.hpp>
#include <ie_extension.h>
namespace opencv_test { namespace {
static void initDLDTDataPath()
{
#ifndef WINRT
static bool initialized = false;
if (!initialized)
{
const char* dldtTestDataPath = getenv("INTEL_CVSDK_DIR");
if (dldtTestDataPath)
cvtest::addDataSearchPath(cv::utils::fs::join(dldtTestDataPath, "deployment_tools"));
initialized = true;
}
#endif
}
using namespace cv;
using namespace cv::dnn;
using namespace InferenceEngine;
static inline void genData(const std::vector<size_t>& dims, Mat& m, Blob::Ptr& dataPtr)
{
std::vector<int> reversedDims(dims.begin(), dims.end());
std::reverse(reversedDims.begin(), reversedDims.end());
m.create(reversedDims, CV_32F);
randu(m, -1, 1);
dataPtr = make_shared_blob<float>(Precision::FP32, dims, (float*)m.data);
}
void runIE(Target target, const std::string& xmlPath, const std::string& binPath,
std::map<std::string, cv::Mat>& inputsMap, std::map<std::string, cv::Mat>& outputsMap)
{
CNNNetReader reader;
reader.ReadNetwork(xmlPath);
reader.ReadWeights(binPath);
CNNNetwork net = reader.getNetwork();
InferenceEnginePluginPtr enginePtr;
InferencePlugin plugin;
ExecutableNetwork netExec;
InferRequest infRequest;
TargetDevice targetDevice;
switch (target)
{
case DNN_TARGET_CPU:
targetDevice = TargetDevice::eCPU;
break;
case DNN_TARGET_OPENCL:
case DNN_TARGET_OPENCL_FP16:
targetDevice = TargetDevice::eGPU;
break;
case DNN_TARGET_MYRIAD:
targetDevice = TargetDevice::eMYRIAD;
break;
default:
CV_Error(Error::StsNotImplemented, "Unknown target");
};
try
{
enginePtr = PluginDispatcher({""}).getSuitablePlugin(targetDevice);
if (targetDevice == TargetDevice::eCPU)
{
std::string suffixes[] = {"_avx2", "_sse4", ""};
bool haveFeature[] = {
checkHardwareSupport(CPU_AVX2),
checkHardwareSupport(CPU_SSE4_2),
true
};
for (int i = 0; i < 3; ++i)
{
if (!haveFeature[i])
continue;
#ifdef _WIN32
std::string libName = "cpu_extension" + suffixes[i] + ".dll";
#else
std::string libName = "libcpu_extension" + suffixes[i] + ".so";
#endif // _WIN32
try
{
IExtensionPtr extension = make_so_pointer<IExtension>(libName);
enginePtr->AddExtension(extension, 0);
break;
}
catch(...) {}
}
// Some of networks can work without a library of extra layers.
}
plugin = InferencePlugin(enginePtr);
netExec = plugin.LoadNetwork(net, {});
infRequest = netExec.CreateInferRequest();
}
catch (const std::exception& ex)
{
CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what()));
}
// Fill input blobs.
inputsMap.clear();
BlobMap inputBlobs;
for (auto& it : net.getInputsInfo())
{
genData(it.second->getDims(), inputsMap[it.first], inputBlobs[it.first]);
}
infRequest.SetInput(inputBlobs);
// Fill output blobs.
outputsMap.clear();
BlobMap outputBlobs;
for (auto& it : net.getOutputsInfo())
{
genData(it.second->dims, outputsMap[it.first], outputBlobs[it.first]);
}
infRequest.SetOutput(outputBlobs);
infRequest.Infer();
}
std::vector<String> getOutputsNames(const Net& net)
{
std::vector<String> names;
if (names.empty())
{
std::vector<int> outLayers = net.getUnconnectedOutLayers();
std::vector<String> layersNames = net.getLayerNames();
names.resize(outLayers.size());
for (size_t i = 0; i < outLayers.size(); ++i)
names[i] = layersNames[outLayers[i] - 1];
}
return names;
}
void runCV(Target target, const std::string& xmlPath, const std::string& binPath,
const std::map<std::string, cv::Mat>& inputsMap,
std::map<std::string, cv::Mat>& outputsMap)
{
Net net = readNet(xmlPath, binPath);
for (auto& it : inputsMap)
net.setInput(it.second, it.first);
net.setPreferableTarget(target);
std::vector<String> outNames = getOutputsNames(net);
std::vector<Mat> outs;
net.forward(outs, outNames);
outputsMap.clear();
EXPECT_EQ(outs.size(), outNames.size());
for (int i = 0; i < outs.size(); ++i)
{
EXPECT_TRUE(outputsMap.insert({outNames[i], outs[i]}).second);
}
}
typedef TestWithParam<tuple<Target, String> > DNNTestOpenVINO;
TEST_P(DNNTestOpenVINO, models)
{
Target target = (dnn::Target)(int)get<0>(GetParam());
std::string modelName = get<1>(GetParam());
if (modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32";
std::string prefix = utils::fs::join("intel_models",
utils::fs::join(modelName,
utils::fs::join(precision, modelName)));
std::string xmlPath = findDataFile(prefix + ".xml");
std::string binPath = findDataFile(prefix + ".bin");
std::map<std::string, cv::Mat> inputsMap;
std::map<std::string, cv::Mat> ieOutputsMap, cvOutputsMap;
runIE(target, xmlPath, binPath, inputsMap, ieOutputsMap);
runCV(target, xmlPath, binPath, inputsMap, cvOutputsMap);
EXPECT_EQ(ieOutputsMap.size(), cvOutputsMap.size());
for (auto& srcIt : ieOutputsMap)
{
auto dstIt = cvOutputsMap.find(srcIt.first);
CV_Assert(dstIt != cvOutputsMap.end());
double normInf = cvtest::norm(srcIt.second, dstIt->second, cv::NORM_INF);
EXPECT_EQ(normInf, 0);
}
}
static testing::internal::ParamGenerator<String> intelModels()
{
initDLDTDataPath();
std::vector<String> modelsNames;
std::string path;
try
{
path = findDataDirectory("intel_models", false);
}
catch (...)
{
std::cerr << "ERROR: Can't find OpenVINO models. Check INTEL_CVSDK_DIR environment variable (run setup.sh)" << std::endl;
return ValuesIn(modelsNames); // empty list
}
cv::utils::fs::glob_relative(path, "", modelsNames, false, true);
modelsNames.erase(
std::remove_if(modelsNames.begin(), modelsNames.end(),
[&](const String& dir){ return !utils::fs::isDirectory(utils::fs::join(path, dir)); }),
modelsNames.end()
);
CV_Assert(!modelsNames.empty());
return ValuesIn(modelsNames);
}
INSTANTIATE_TEST_CASE_P(/**/, DNNTestOpenVINO, Combine(
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16), intelModels()
));
}}
#endif // HAVE_INF_ENGINE

@ -291,7 +291,7 @@ TEST_P(Test_Caffe_layers, Fused_Concat)
TEST_P(Test_Caffe_layers, Eltwise)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
testLayerUsingCaffeModels("layer_eltwise");
}
@ -939,6 +939,25 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
}
TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
{
Mat inp = blobFromNPY(_tf("blob.npy"));
Mat inputs[] = {Mat(inp.dims, inp.size, CV_8U), Mat()};
randu(inputs[0], 0, 255);
inputs[0].convertTo(inputs[1], CV_32F);
Mat outs[2];
for (int i = 0; i < 2; ++i)
{
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
net.setInput(inputs[i]);
outs[i] = net.forward();
ASSERT_EQ(outs[i].type(), CV_32F);
}
normAssert(outs[0], outs[1]);
}
// 1. Create a .prototxt file with the following network:
// layer {
// type: "Input" name: "data" top: "data"
@ -961,22 +980,65 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
// net.save('/path/to/caffemodel')
//
// 3. Convert using ModelOptimizer.
TEST(Test_DLDT, two_inputs)
typedef testing::TestWithParam<tuple<int, int> > Test_DLDT_two_inputs;
TEST_P(Test_DLDT_two_inputs, as_IR)
{
int firstInpType = get<0>(GetParam());
int secondInpType = get<1>(GetParam());
// TODO: It looks like a bug in Inference Engine.
if (secondInpType == CV_8U)
throw SkipTestException("");
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
int inpSize[] = {1, 2, 3};
Mat firstInp(3, &inpSize[0], CV_32F);
Mat secondInp(3, &inpSize[0], CV_32F);
randu(firstInp, -1, 1);
randu(secondInp, -1, 1);
Mat firstInp(3, &inpSize[0], firstInpType);
Mat secondInp(3, &inpSize[0], secondInpType);
randu(firstInp, 0, 255);
randu(secondInp, 0, 255);
net.setInput(firstInp, "data");
net.setInput(secondInp, "second_input");
Mat out = net.forward();
normAssert(out, firstInp + secondInp);
Mat ref;
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
normAssert(out, ref);
}
TEST_P(Test_DLDT_two_inputs, as_backend)
{
static const float kScale = 0.5f;
static const float kScaleInv = 1.0f / kScale;
Net net;
LayerParams lp;
lp.type = "Eltwise";
lp.name = "testLayer";
lp.set("operation", "sum");
int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp); // connect to a first input
net.connect(0, 1, eltwiseId, 1); // connect to a second input
int inpSize[] = {1, 2, 3};
Mat firstInp(3, &inpSize[0], get<0>(GetParam()));
Mat secondInp(3, &inpSize[0], get<1>(GetParam()));
randu(firstInp, 0, 255);
randu(secondInp, 0, 255);
net.setInputsNames({"data", "second_input"});
net.setInput(firstInp, "data", kScale);
net.setInput(secondInp, "second_input", kScaleInv);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
Mat out = net.forward();
Mat ref;
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
normAssert(out, ref);
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine(
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F)
));
class UnsupportedLayer : public Layer
{
public:

@ -138,4 +138,44 @@ TEST(LayerFactory, custom_layers)
LayerFactory::unregisterLayer("CustomType");
}
typedef testing::TestWithParam<tuple<float, Vec3f, int, tuple<Backend, Target> > > setInput;
TEST_P(setInput, normalization)
{
const float kScale = get<0>(GetParam());
const Scalar kMean = get<1>(GetParam());
const int dtype = get<2>(GetParam());
const int backend = get<0>(get<3>(GetParam()));
const int target = get<1>(get<3>(GetParam()));
const bool kSwapRB = true;
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD && !checkMyriadTarget())
throw SkipTestException("Myriad is not available/disabled in OpenCV");
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16 && dtype != CV_32F)
throw SkipTestException("");
Mat inp(5, 5, CV_8UC3);
randu(inp, 0, 255);
Mat ref = blobFromImage(inp, kScale, Size(), kMean, kSwapRB, /*crop*/false);
LayerParams lp;
Net net;
net.addLayerToPrev("testLayer", "Identity", lp);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat blob = blobFromImage(inp, 1.0, Size(), Scalar(), kSwapRB, /*crop*/false, dtype);
ASSERT_EQ(blob.type(), dtype);
net.setInput(blob, "", kScale, kMean);
Mat out = net.forward();
ASSERT_EQ(out.type(), CV_32F);
normAssert(ref, out, "", 4e-4, 1e-3);
}
INSTANTIATE_TEST_CASE_P(/**/, setInput, Combine(
Values(1.0f, 1.0 / 127.5),
Values(Vec3f(), Vec3f(50, 50, 50), Vec3f(10, 50, 140)),
Values(CV_32F, CV_8U),
dnnBackendsAndTargets()
));
}} // namespace

@ -309,7 +309,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : default_l1;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.025 : default_lInf;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
}

@ -1236,7 +1236,6 @@ BriskScaleSpace::isMax2D(const int layer, const int x_layer, const int y_layer)
{
// in this case, we have to analyze the situation more carefully:
// the values are gaussian blurred and then we really decide
data = scores.ptr() + y_layer * scorescols + x_layer;
int smoothedcenter = 4 * center + 2 * (s_10 + s10 + s0_1 + s01) + s_1_1 + s1_1 + s_11 + s11;
for (unsigned int i = 0; i < deltasize; i += 2)
{
@ -1312,8 +1311,7 @@ BriskScaleSpace::refine3D(const int layer, const int x_layer, const int y_layer,
int s_2_2 = l.getAgastScore_5_8(x_layer + 1, y_layer + 1, 1);
max_below = std::max(s_2_2, max_below);
max_below_float = subpixel2D(s_0_0, s_0_1, s_0_2, s_1_0, s_1_1, s_1_2, s_2_0, s_2_1, s_2_2, delta_x_below,
delta_y_below);
subpixel2D(s_0_0, s_0_1, s_0_2, s_1_0, s_1_1, s_1_2, s_2_0, s_2_1, s_2_2, delta_x_below, delta_y_below);
max_below_float = (float)max_below;
}
else

@ -373,8 +373,6 @@ void KAZEFeatures::Determinant_Hessian(std::vector<KeyPoint>& kpts)
is_out = true;
}
is_out = false;
if (is_out == false) {
if (is_repeated == false) {
kpts.push_back(kpts_par_[i][j]);

@ -175,7 +175,6 @@ std::map<int, ExifEntry_t > ExifReader::getExif()
CV_THROW (ExifParsingError());
}
m_stream.read( reinterpret_cast<char*>(&m_data[0]), exifSize - offsetToTiffHeader );
count = m_stream.gcount();
exifFound = true;
break;

@ -265,7 +265,7 @@ bool BmpDecoder::readData( Mat& img )
for(;;)
{
int code = m_strm.getWord();
int len = code & 255;
const int len = code & 255;
code >>= 8;
if( len != 0 ) // encoded mode
{
@ -304,16 +304,13 @@ bool BmpDecoder::readData( Mat& img )
else
{
int x_shift3 = (int)(line_end - data);
int y_shift = m_height - y;
if( code == 2 )
{
x_shift3 = m_strm.getByte()*nch;
y_shift = m_strm.getByte();
m_strm.getByte();
}
len = x_shift3 + ((y_shift * width3) & ((code == 0) - 1));
if( color )
data = FillUniColor( data, line_end, step, width3,
y, m_height, x_shift3,

@ -689,7 +689,7 @@ bool PAMEncoder::write( const Mat& img, const std::vector<int>& params )
tmp += sprintf( buffer + tmp, "MAXVAL %d\n", (1 << img.elemSize1()*8) - 1);
if (fmt)
tmp += sprintf( buffer + tmp, "TUPLTYPE %s\n", fmt->name );
tmp += sprintf( buffer + tmp, "ENDHDR\n" );
sprintf( buffer + tmp, "ENDHDR\n" );
strm.putBytes( buffer, (int)strlen(buffer) );
/* write data */

@ -255,22 +255,21 @@ bool TiffDecoder::readHeader()
{
case 8:
m_type = CV_MAKETYPE(CV_8U, photometric > 1 ? wanted_channels : 1);
result = true;
break;
case 16:
m_type = CV_MAKETYPE(CV_16U, photometric > 1 ? wanted_channels : 1);
result = true;
break;
case 32:
m_type = CV_MAKETYPE(CV_32F, photometric > 1 ? 3 : 1);
result = true;
break;
case 64:
m_type = CV_MAKETYPE(CV_64F, photometric > 1 ? 3 : 1);
result = true;
break;
default:
result = false;
}
result = true;
}
}

@ -855,7 +855,6 @@ icvTraceContour_32s( int *ptr, int step, int *stop_ptr, int is_hole )
for( ;; )
{
CV_Assert(i3 != NULL);
s_end = s;
s = std::min(s, MAX_SIZE - 1);
while( s < MAX_SIZE - 1 )
@ -1479,7 +1478,7 @@ icvFindContoursInInterval( const CvArr* src,
cv::Ptr<CvMemStorage> storage01;
CvSeq* first = 0;
int i, j, k, n;
int j, k, n;
uchar* src_data = 0;
int img_step = 0;
@ -1547,7 +1546,6 @@ icvFindContoursInInterval( const CvArr* src,
// First line. None of runs is binded
tmp.pt.y = 0;
i = 0;
CV_WRITE_SEQ_ELEM( tmp, writer );
upper_line = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
@ -1580,7 +1578,7 @@ icvFindContoursInInterval( const CvArr* src,
last_elem = tmp_prev;
tmp_prev->next = 0;
for( i = 1; i < img_size.height; i++ )
for( int i = 1; i < img_size.height; i++ )
{
//------// Find runs in next line
src_data += img_step;

@ -338,7 +338,6 @@ LineAA( Mat& img, Point2l pt1, Point2l pt2, const void* color )
if( ax > ay )
{
dx = ax;
dy = (dy ^ j) - j;
pt1.x ^= pt2.x & j;
pt2.x ^= pt1.x & j;
@ -362,7 +361,6 @@ LineAA( Mat& img, Point2l pt1, Point2l pt2, const void* color )
}
else
{
dy = ay;
dx = (dx ^ i) - i;
pt1.x ^= pt2.x & i;
pt2.x ^= pt1.x & i;
@ -677,7 +675,6 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
if( ax > ay )
{
dx = ax;
dy = (dy ^ j) - j;
pt1.x ^= pt2.x & j;
pt2.x ^= pt1.x & j;
@ -692,7 +689,6 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
}
else
{
dy = ay;
dx = (dx ^ i) - i;
pt1.x ^= pt2.x & i;
pt2.x ^= pt1.x & i;

@ -128,8 +128,6 @@ int SymmColumnVec_32f_Symm_AVX(const float** src, const float* ky, float* dst, f
for( k = 1; k <= ksize2; k++ )
{
f = _mm_set1_ps(ky[k]);
S = src[k] + i;
S2 = src[-k] + i;
x0 = _mm_add_ps(_mm_load_ps(src[k]+i), _mm_load_ps(src[-k] + i));
s0 = _mm_add_ps(s0, _mm_mul_ps(x0, f));
}
@ -144,7 +142,7 @@ int SymmColumnVec_32f_Symm_AVX(const float** src, const float* ky, float* dst, f
int SymmColumnVec_32f_Unsymm_AVX(const float** src, const float* ky, float* dst, float delta, int width, int ksize2)
{
int i = 0, k;
const float *S, *S2;
const float *S2;
const __m128 d4 = _mm_set1_ps(delta);
const __m256 d8 = _mm256_set1_ps(delta);
@ -152,11 +150,10 @@ int SymmColumnVec_32f_Unsymm_AVX(const float** src, const float* ky, float* dst,
{
__m256 f, s0 = d8, s1 = d8;
__m256 x0;
S = src[0] + i;
for (k = 1; k <= ksize2; k++)
{
S = src[k] + i;
const float *S = src[k] + i;
S2 = src[-k] + i;
f = _mm256_set1_ps(ky[k]);
x0 = _mm256_sub_ps(_mm256_loadu_ps(S), _mm256_loadu_ps(S2));

@ -4284,7 +4284,7 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
size_t src_step = _src.step(), src_offset = _src.offset();
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (esz == 0
if (esz == 0 || src_step == 0
|| (src_offset % src_step) % esz != 0
|| (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|| !(borderType == BORDER_CONSTANT

@ -467,7 +467,7 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
if( rect )
*rect = Rect();
int i, connectivity = flags & 255;
int i;
union {
uchar b[4];
int i[4];
@ -491,9 +491,8 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
CV_Error( CV_StsBadArg, "Number of channels in input image must be 1 or 3" );
}
if( connectivity == 0 )
connectivity = 4;
else if( connectivity != 4 && connectivity != 8 )
const int connectivity = flags & 255;
if( connectivity != 0 && connectivity != 4 && connectivity != 8 )
CV_Error( CV_StsBadFlag, "Connectivity must be 4, 0(=4) or 8" );
bool is_simple = mask.empty() && (flags & FLOODFILL_MASK_ONLY) == 0;

@ -1930,7 +1930,7 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
Mat planes[2];
NAryMatIterator it(arrays, planes);
double result = 0;
int j, len = (int)it.size;
int j;
CV_Assert( H1.type() == H2.type() && H1.depth() == CV_32F );
@ -1946,7 +1946,7 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
{
const float* h1 = it.planes[0].ptr<float>();
const float* h2 = it.planes[1].ptr<float>();
len = it.planes[0].rows*it.planes[0].cols*H1.channels();
const int len = it.planes[0].rows*it.planes[0].cols*H1.channels();
j = 0;
if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT))

@ -413,7 +413,6 @@ HoughLinesSDiv( InputArray image, OutputArray lines, int type,
// Find peaks in maccum...
for( index = 0; index < sfn; index++ )
{
i = 0;
int pos = (int)(lst.size() - 1);
if( pos < 0 || lst[pos].value < mcaccum[index] )
{

@ -401,7 +401,6 @@ static void findMinimumAreaEnclosingTriangle(const std::vector<cv::Point2f> &pol
a = 1;
b = 2;
c = 0;
// Main algorithm steps

@ -370,6 +370,7 @@ static void cvUndistortPointsInternal( const CvMat* _src, CvMat* _dst, const CvM
const CvMat* _distCoeffs,
const CvMat* matR, const CvMat* matP, cv::TermCriteria criteria)
{
CV_Assert(criteria.isValid());
double A[3][3], RR[3][3], k[14]={0,0,0,0,0,0,0,0,0,0,0,0,0,0};
CvMat matA=cvMat(3, 3, CV_64F, A), _Dk;
CvMat _RR=cvMat(3, 3, CV_64F, RR);

@ -1187,7 +1187,7 @@ public:
prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
}
CV_Assert(total > 0);
int dcount0 = max_buf_size/(2*total);
dcount0 = std::max( dcount0, 1 );
dcount0 = std::min( dcount0, count );

@ -5,10 +5,9 @@
#include "test_precomp.hpp"
namespace opencv_test
{
namespace opencv_test { namespace {
String qrcode_images_name[] = {
std::string qrcode_images_name[] = {
"20110817_030.jpg",
"20110817_048.jpg",
"img_20120226_161648.jpg",
@ -25,24 +24,25 @@ String qrcode_images_name[] = {
TEST(Objdetect_QRCode, generate_test_data)
{
String root = cvtest::TS::ptr()->get_data_path() + "qrcode/";
String dataset_config = cvtest::TS::ptr()->get_data_path() + "qrcode/dataset_config.json";
const std::string root = "qrcode/";
const std::string dataset_config = findDataFile(root + "dataset_config.json");
FileStorage file_config(dataset_config, FileStorage::WRITE);
file_config << "test_images" << "[";
size_t images_count = sizeof(qrcode_images_name) / sizeof(String);
size_t images_count = sizeof(qrcode_images_name) / sizeof(qrcode_images_name[0]);
for (size_t i = 0; i < images_count; i++)
{
file_config << "{:" << "image_name" << qrcode_images_name[i];
String image_path = root + qrcode_images_name[i];
std::vector<Point> transform;
std::string image_path = findDataFile(root + qrcode_images_name[i]);
std::vector<Point> corners;
Mat src = imread(image_path, IMREAD_GRAYSCALE);
EXPECT_TRUE(detectQRCode(src, transform));
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
EXPECT_TRUE(detectQRCode(src, corners));
file_config << "x" << "[:";
for (size_t j = 0; j < transform.size(); j++) { file_config << transform[j].x; }
for (size_t j = 0; j < corners.size(); j++) { file_config << corners[j].x; }
file_config << "]";
file_config << "y" << "[:";
for (size_t j = 0; j < transform.size(); j++) { file_config << transform[j].y; }
for (size_t j = 0; j < corners.size(); j++) { file_config << corners[j].y; }
file_config << "]" << "}";
}
file_config << "]";
@ -51,65 +51,65 @@ TEST(Objdetect_QRCode, generate_test_data)
#else
typedef testing::TestWithParam< String > Objdetect_QRCode;
typedef testing::TestWithParam< std::string > Objdetect_QRCode;
TEST_P(Objdetect_QRCode, regression)
{
String root = cvtest::TS::ptr()->get_data_path() + "qrcode/";
String dataset_config = cvtest::TS::ptr()->get_data_path() + "qrcode/dataset_config.json";
FileStorage file_config(dataset_config, FileStorage::READ);
const std::string name_current_image = GetParam();
const std::string root = "qrcode/";
const int pixels_error = 3;
std::vector<Point> corners;
String image_path = root + String(GetParam());
std::string image_path = findDataFile(root + name_current_image);
Mat src = imread(image_path, IMREAD_GRAYSCALE);
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
std::vector<Point> corners;
EXPECT_TRUE(detectQRCode(src, corners));
if (file_config.isOpened())
const std::string dataset_config = findDataFile(root + "dataset_config.json", false);
FileStorage file_config(dataset_config, FileStorage::READ);
ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config;
{
FileNode images_list = file_config["test_images"];
int index = 0, images_count = static_cast<int>(images_list.size());
ASSERT_GT(images_count, 0);
size_t images_count = static_cast<size_t>(images_list.size());
ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config;
bool runTestsFlag = false;
String name_current_image = String(GetParam());
for (; index < images_count; index++)
for (size_t index = 0; index < images_count; index++)
{
String name_test_image = images_list[index]["image_name"];
FileNode config = images_list[(int)index];
std::string name_test_image = config["image_name"];
if (name_test_image == name_current_image)
{
for (int i = 0; i < 4; i++)
{
int x = images_list[index]["x"][i];
int y = images_list[index]["y"][i];
int x = config["x"][i];
int y = config["y"][i];
EXPECT_NEAR(x, corners[i].x, pixels_error);
EXPECT_NEAR(y, corners[i].y, pixels_error);
}
runTestsFlag = true;
return; // done
}
}
if (!runTestsFlag)
{
std::cout << "Not found results for " << name_current_image;
std::cout << " image in dataset_config.json file." << std::endl;
}
file_config.release();
}
else
{
std::cout << " Not found dataset_config.json file." << std::endl;
std::cerr
<< "Not found results for '" << name_current_image
<< "' image in config file:" << dataset_config << std::endl
<< "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data."
<< std::endl;
}
}
INSTANTIATE_TEST_CASE_P(objdetect, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name));
INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name));
TEST(Objdetect_QRCode, not_found_qrcode)
TEST(Objdetect_QRCode_basic, not_found_qrcode)
{
std::vector<Point> corners;
Mat zero_image = Mat::zeros(256, 256, CV_8UC1);
EXPECT_FALSE(detectQRCode(zero_image, corners));
}
#endif
} // namespace
#endif // UPDATE_QRCODE_TEST_DATA
}} // namespace

@ -1563,8 +1563,6 @@ PyObject* pyopencv_from(const Moments& m)
"nu30", m.nu30, "nu21", m.nu21, "nu12", m.nu12, "nu03", m.nu03);
}
#include "pyopencv_custom_headers.h"
static int OnError(int status, const char *func_name, const char *err_msg, const char *file_name, int line, void *userdata)
{
PyGILState_STATE gstate;
@ -1802,6 +1800,7 @@ static int convert_to_char(PyObject *o, char *dst, const char *name = "no_name")
# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif
#include "pyopencv_custom_headers.h"
#include "pyopencv_generated_types.h"
#include "pyopencv_generated_funcs.h"

@ -0,0 +1,25 @@
#!/usr/bin/env python
from __future__ import print_function
import numpy as np
import cv2 as cv
from tests_common import NewOpenCVTests
class Bindings(NewOpenCVTests):
def check_name(self, name):
#print(name)
self.assertFalse(name == None)
self.assertFalse(name == "")
def test_registry(self):
self.check_name(cv.videoio_registry.getBackendName(cv.CAP_ANY));
self.check_name(cv.videoio_registry.getBackendName(cv.CAP_FFMPEG))
self.check_name(cv.videoio_registry.getBackendName(cv.CAP_OPENCV_MJPEG))
backends = cv.videoio_registry.getBackends()
for backend in backends:
self.check_name(cv.videoio_registry.getBackendName(backend))
if __name__ == '__main__':
NewOpenCVTests.bootstrap()

@ -103,6 +103,7 @@ using std::pair;
using std::make_pair;
using testing::TestWithParam;
using testing::Values;
using testing::ValuesIn;
using testing::Combine;
using cv::Mat;
@ -654,6 +655,11 @@ void addDataSearchSubDirectory(const std::string& subdir);
*/
std::string findDataFile(const std::string& relative_path, bool required = true);
/*! @brief Try to find requested data directory
@sa findDataFile
*/
std::string findDataDirectory(const std::string& relative_path, bool required = true);
#ifndef __CV_TEST_EXEC_ARGS
#if defined(_MSC_VER) && (_MSC_VER <= 1400)

@ -44,13 +44,13 @@ extern int testThreads;
#undef TEST
#define TEST_(test_case_name, test_name, BODY_IMPL) \
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public ::testing::Test {\
#define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_IMPL) \
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
public:\
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
private:\
virtual void TestBody() CV_OVERRIDE;\
virtual void Body();\
virtual void bodyMethodName();\
static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
GTEST_DISALLOW_COPY_AND_ASSIGN_(\
GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
@ -62,14 +62,14 @@ extern int testThreads;
#test_case_name, #test_name, NULL, NULL, \
::testing::internal::CodeLocation(__FILE__, __LINE__), \
(::testing::internal::GetTestTypeId()), \
::testing::Test::SetUpTestCase, \
::testing::Test::TearDownTestCase, \
parent_class::SetUpTestCase, \
parent_class::TearDownTestCase, \
new ::testing::internal::TestFactoryImpl<\
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body()
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName()
#define TEST(test_case_name, test_name) TEST_(test_case_name, test_name, CV__TEST_BODY_IMPL)
#define TEST(test_case_name, test_name) TEST_(test_case_name, test_name, ::testing::Test, Body, CV__TEST_BODY_IMPL)
#define CV__TEST_BIGDATA_BODY_IMPL(name) \
{ \
@ -92,9 +92,9 @@ extern int testThreads;
// Special type of tests which require / use or validate processing of huge amount of data (>= 2Gb)
#if defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__)
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, test_name, CV__TEST_BIGDATA_BODY_IMPL)
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, test_name, ::testing::Test, Body, CV__TEST_BIGDATA_BODY_IMPL)
#else
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, DISABLED_ ## test_name, CV__TEST_BIGDATA_BODY_IMPL)
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, DISABLED_ ## test_name, ::testing::Test, Body, CV__TEST_BIGDATA_BODY_IMPL)
#endif
#undef TEST_F

@ -546,17 +546,7 @@ void PrintTo(const Size& sz, ::std::ostream* os);
// EXPECT_TRUE(foo.StatusIsOK());
// }
#define PERF_TEST(test_case_name, test_name)\
namespace PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name) {\
class TestBase {/*compile error for this class means that you are trying to use perf::TestBase as a fixture*/};\
class test_case_name : public ::perf::TestBase {\
public:\
test_case_name() {}\
protected:\
virtual void PerfTestBody();\
};\
TEST_F(test_case_name, test_name){ CV__PERF_TEST_BODY_IMPL(#test_case_name "_" #test_name); }\
}\
void PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name)::test_case_name::PerfTestBody()
TEST_(test_case_name, test_name, ::perf::TestBase, PerfTestBody, CV__PERF_TEST_BODY_IMPL)
// Defines a performance test that uses a test fixture.
//

@ -772,16 +772,24 @@ void addDataSearchSubDirectory(const std::string& subdir)
TS::ptr()->data_search_subdir.push_back(subdir);
}
std::string findDataFile(const std::string& relative_path, bool required)
static std::string findData(const std::string& relative_path, bool required, bool findDirectory)
{
#define TEST_TRY_FILE_WITH_PREFIX(prefix) \
{ \
std::string path = path_join(prefix, relative_path); \
/*printf("Trying %s\n", path.c_str());*/ \
FILE* f = fopen(path.c_str(), "rb"); \
if(f) { \
fclose(f); \
return path; \
if (findDirectory) \
{ \
if (isDirectory(path)) \
return path; \
} \
else \
{ \
FILE* f = fopen(path.c_str(), "rb"); \
if(f) { \
fclose(f); \
return path; \
} \
} \
}
@ -842,11 +850,21 @@ std::string findDataFile(const std::string& relative_path, bool required)
}
#endif
#endif
const char* type = findDirectory ? "directory" : "data file";
if (required)
CV_Error(cv::Error::StsError, cv::format("OpenCV tests: Can't find required data file: %s", relative_path.c_str()));
throw SkipTestException(cv::format("OpenCV tests: Can't find data file: %s", relative_path.c_str()));
CV_Error(cv::Error::StsError, cv::format("OpenCV tests: Can't find required %s: %s", type, relative_path.c_str()));
throw SkipTestException(cv::format("OpenCV tests: Can't find %s: %s", type, relative_path.c_str()));
}
std::string findDataFile(const std::string& relative_path, bool required)
{
return findData(relative_path, required, false);
}
std::string findDataDirectory(const std::string& relative_path, bool required)
{
return findData(relative_path, required, true);
}
} //namespace cvtest

@ -59,6 +59,7 @@
@defgroup videoio_c C API for video I/O
@defgroup videoio_ios iOS glue for video I/O
@defgroup videoio_winrt WinRT glue for video I/O
@defgroup videoio_registry Query I/O API backends registry
@}
*/

@ -0,0 +1,44 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_VIDEOIO_REGISTRY_HPP
#define OPENCV_VIDEOIO_REGISTRY_HPP
#include <opencv2/videoio.hpp>
namespace cv { namespace videoio_registry {
/** @addtogroup videoio_registry
This section contains API description how to query/configure available Video I/O backends.
Runtime configuration options:
- enable debug mode: `OPENCV_VIDEOIO_DEBUG=1`
- change backend priority: `OPENCV_VIDEOIO_PRIORITY_<backend>=9999`
- disable backend: `OPENCV_VIDEOIO_PRIORITY_<backend>=0`
- specify list of backends with high priority (>100000): `OPENCV_VIDEOIO_PRIORITY_LIST=FFMPEG,GSTREAMER`
@{
*/
/** @brief Returns backend API name or "unknown"
@param api backend ID (#VideoCaptureAPIs)
*/
CV_EXPORTS_W cv::String getBackendName(VideoCaptureAPIs api);
/** @brief Returns list of all builtin backends */
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getBackends();
/** @brief Returns list of available backends which works via `cv::VideoCapture(int index)` */
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getCameraBackends();
/** @brief Returns list of available backends which works via `cv::VideoCapture(filename)` */
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getStreamBackends();
/** @brief Returns list of available backends which works via `cv::VideoWriter()` */
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getWriterBackends();
//! @}
}} // namespace
#endif // OPENCV_VIDEOIO_REGISTRY_HPP

@ -0,0 +1,50 @@
#ifdef HAVE_OPENCV_VIDEOIO
typedef std::vector<VideoCaptureAPIs> vector_VideoCaptureAPIs;
template<>
bool pyopencv_to(PyObject *o, cv::VideoCaptureAPIs &v, const char *name)
{
(void)name;
v = CAP_ANY;
if (!o || o == Py_None)
return false;
else if (PyLong_Check(o))
{
v = VideoCaptureAPIs((int64)PyLong_AsLongLong(o));
return true;
}
else if (PyInt_Check(o))
{
v = VideoCaptureAPIs((int64)PyInt_AS_LONG(o));
return true;
}
else
return false;
}
template<>
PyObject* pyopencv_from(const cv::VideoCaptureAPIs &v)
{
return pyopencv_from((int)(v));
}
template<> struct pyopencvVecConverter<cv::VideoCaptureAPIs>
{
static bool to(PyObject* obj, std::vector<cv::VideoCaptureAPIs>& value, const ArgInfo info)
{
return pyopencv_to_generic_vec(obj, value, info);
}
static PyObject* from(const std::vector<cv::VideoCaptureAPIs>& value)
{
return pyopencv_from_generic_vec(value);
}
};
template<>
bool pyopencv_to(PyObject *o, std::vector<cv::VideoCaptureAPIs>& apis, const char *name)
{
return pyopencvVecConverter<cv::VideoCaptureAPIs>::to(o, apis, ArgInfo(name, false));
}
#endif // HAVE_OPENCV_VIDEOIO

@ -2351,9 +2351,6 @@ AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CV_CODEC
c->codec_type = AVMEDIA_TYPE_VIDEO;
// put sample parameters
unsigned long long lbit_rate = static_cast<unsigned long long>(bitrate);
lbit_rate += (bitrate / 4);
lbit_rate = std::min(lbit_rate, static_cast<unsigned long long>(std::numeric_limits<int>::max()));
c->bit_rate = bitrate;
// took advice from

@ -158,8 +158,9 @@ public:
data.resize(size);
}
void put(unsigned bits, int len)
inline void put_bits(unsigned bits, int len)
{
CV_Assert(len >=0 && len < 32);
if((m_pos == (data.size() - 1) && len > bits_free) || m_pos == data.size())
{
resize(int(2*data.size()));
@ -182,6 +183,12 @@ public:
}
}
inline void put_val(int val, const unsigned * table)
{
unsigned code = table[(val) + 2];
put_bits(code >> 8, (int)(code & 255));
}
void finish()
{
if(bits_free == 32)
@ -1188,13 +1195,6 @@ public:
void operator()( const cv::Range& range ) const CV_OVERRIDE
{
const int CAT_TAB_SIZE = 4096;
unsigned code = 0;
#define JPUT_BITS(val, bits) output_buffer.put(val, bits)
#define JPUT_HUFF(val, table) \
code = table[(val) + 2]; \
JPUT_BITS(code >> 8, (int)(code & 255))
int x, y;
int i, j;
@ -1300,8 +1300,8 @@ public:
int cat = cat_table[val + CAT_TAB_SIZE];
//CV_Assert( cat <= 11 );
JPUT_HUFF( cat, huff_dc_tab[is_chroma] );
JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
output_buffer.put_val(cat, huff_dc_tab[is_chroma] );
output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
}
for( j = 1; j < 64; j++ )
@ -1316,15 +1316,15 @@ public:
{
while( run >= 16 )
{
JPUT_HUFF( 0xF0, htable ); // encode 16 zeros
output_buffer.put_val( 0xF0, htable ); // encode 16 zeros
run -= 16;
}
{
int cat = cat_table[val + CAT_TAB_SIZE];
//CV_Assert( cat <= 10 );
JPUT_HUFF( cat + run*16, htable );
JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
output_buffer.put_val( cat + run*16, htable );
output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
}
run = 0;
@ -1333,7 +1333,7 @@ public:
if( run )
{
JPUT_HUFF( 0x00, htable ); // encode EOB
output_buffer.put_val( 0x00, htable ); // encode EOB
}
}
}

@ -277,6 +277,7 @@ struct CvCaptureCAM_V4L CV_FINAL : public CvCapture
__u32 palette;
int width, height;
int width_set, height_set;
int bufferSize;
__u32 fps;
bool convert_rgb;
@ -797,6 +798,7 @@ bool CvCaptureCAM_V4L::open(const char* _deviceName)
FirstCapture = 1;
width = DEFAULT_V4L_WIDTH;
height = DEFAULT_V4L_HEIGHT;
width_set = height_set = 0;
bufferSize = DEFAULT_V4L_BUFFERS;
fps = DEFAULT_V4L_FPS;
convert_rgb = true;
@ -1769,7 +1771,6 @@ static bool icvSetControl (CvCaptureCAM_V4L* capture,
static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
int property_id, double value ){
static int width = 0, height = 0;
bool retval = false;
bool possible;
@ -1778,6 +1779,9 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
switch (property_id) {
case CV_CAP_PROP_FRAME_WIDTH:
{
int& width = capture->width_set;
int& height = capture->height_set;
width = cvRound(value);
retval = width != 0;
if(width !=0 && height != 0) {
@ -1786,8 +1790,12 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
retval = v4l2_reset(capture);
width = height = 0;
}
break;
}
break;
case CV_CAP_PROP_FRAME_HEIGHT:
{
int& width = capture->width_set;
int& height = capture->height_set;
height = cvRound(value);
retval = height != 0;
if(width !=0 && height != 0) {
@ -1796,7 +1804,8 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
retval = v4l2_reset(capture);
width = height = 0;
}
break;
}
break;
case CV_CAP_PROP_FPS:
capture->fps = value;
retval = v4l2_reset(capture);

@ -12,7 +12,7 @@ namespace cv
// Utility function for safe integer conversions
template <typename D, typename S>
inline D safe_int_cast(S val)
inline D safe_int_cast(S val, const char * msg = 0)
{
typedef std::numeric_limits<S> st;
typedef std::numeric_limits<D> dt;
@ -21,7 +21,10 @@ inline D safe_int_cast(S val)
const bool in_range_l = (double)val >= (double)dt::min();
if (!in_range_r || !in_range_l)
{
CV_Error_(cv::Error::StsOutOfRange, ("Can not convert integer values (%s -> %s), value 0x%llx is out of range", typeid(S).name(), typeid(D).name(), val));
if (!msg)
CV_Error_(Error::StsOutOfRange, ("Can not convert integer values (%s -> %s), value 0x%llx is out of range", typeid(S).name(), typeid(D).name(), val));
else
CV_Error(Error::StsOutOfRange, msg);
}
return static_cast<D>(val);
}
@ -128,7 +131,7 @@ public:
VideoInputStream();
VideoInputStream(const String& filename);
~VideoInputStream();
VideoInputStream& read(char*, uint64_t);
VideoInputStream& read(char*, uint32_t);
VideoInputStream& seekg(uint64_t);
uint64_t tellg();
bool isOpened() const;
@ -229,11 +232,11 @@ void VideoInputStream::close()
}
}
VideoInputStream& VideoInputStream::read(char* buf, uint64_t count)
VideoInputStream& VideoInputStream::read(char* buf, uint32_t count)
{
if(isOpened())
{
input.read(buf, safe_int_cast<std::streamsize>(count));
input.read(buf, safe_int_cast<std::streamsize>(count, "Failed to read AVI file: requested chunk size is too large"));
m_is_valid = (input.gcount() == (std::streamsize)count);
}
@ -243,7 +246,7 @@ VideoInputStream& VideoInputStream::read(char* buf, uint64_t count)
VideoInputStream& VideoInputStream::seekg(uint64_t pos)
{
input.clear();
input.seekg(safe_int_cast<std::streamoff>(pos));
input.seekg(safe_int_cast<std::streamoff>(pos, "Failed to seek in AVI file: position is out of range"));
m_is_valid = !input.eof();
return *this;
}
@ -322,9 +325,6 @@ bool AVIReadContainer::parseStrl(char stream_id, Codecs codec_)
if(m_file_stream && strh.m_four_cc == STRH_CC)
{
uint64_t next_strl_list = m_file_stream->tellg();
next_strl_list += strh.m_size;
AviStreamHeader strm_hdr;
*m_file_stream >> strm_hdr;
@ -668,7 +668,7 @@ void BitStream::writeBlock()
}
size_t BitStream::getPos() const {
return safe_int_cast<size_t>(m_current - m_start) + m_pos;
return safe_int_cast<size_t>(m_current - m_start, "Failed to determine AVI bufer position: value is out of range") + m_pos;
}
void BitStream::putByte(int val)
@ -737,7 +737,7 @@ void BitStream::patchInt(uint32_t val, size_t pos)
{
if( pos >= m_pos )
{
ptrdiff_t delta = safe_int_cast<ptrdiff_t>(pos - m_pos);
ptrdiff_t delta = safe_int_cast<ptrdiff_t>(pos - m_pos, "Failed to seek in AVI buffer: value is out of range");
CV_Assert( delta < m_current - m_start );
m_start[delta] = (uchar)val;
m_start[delta+1] = (uchar)(val >> 8);
@ -747,7 +747,7 @@ void BitStream::patchInt(uint32_t val, size_t pos)
else
{
std::streamoff fpos = output.tellp();
output.seekp(safe_int_cast<std::streamoff>(pos));
output.seekp(safe_int_cast<std::streamoff>(pos, "Failed to seek in AVI file: value is out of range"));
uchar buf[] = { (uchar)val, (uchar)(val >> 8), (uchar)(val >> 16), (uchar)(val >> 24) };
output.write((char *)buf, 4);
output.seekp(fpos);
@ -960,7 +960,7 @@ void AVIWriteContainer::endWriteChunk()
size_t pospos = AVIChunkSizeIndex.back();
AVIChunkSizeIndex.pop_back();
CV_Assert(currpos >= pospos);
uint32_t chunksz = safe_int_cast<uint32_t>(currpos - pospos);
uint32_t chunksz = safe_int_cast<uint32_t>(currpos - pospos, "Failed to write AVI file: chunk size is out of bounds");
strm->patchInt(chunksz, pospos);
}
}
@ -996,7 +996,7 @@ void AVIWriteContainer::writeIndex(int stream_number, StreamType strm_type)
void AVIWriteContainer::finishWriteAVI()
{
uint32_t nframes = safe_int_cast<uint32_t>(frameOffset.size());
uint32_t nframes = safe_int_cast<uint32_t>(frameOffset.size(), "Failed to write AVI file: number of frames is too large");
// Record frames numbers to AVI Header
while (!frameNumIndexes.empty())
{

@ -6,6 +6,8 @@
#include "videoio_registry.hpp"
#include "opencv2/videoio/registry.hpp"
#include "cap_intelperc.hpp"
#include "cap_librealsense.hpp"
#include "cap_dshow.hpp"
@ -250,6 +252,8 @@ public:
return g_instance;
}
inline std::vector<VideoBackendInfo> getEnabledBackends() const { return enabledBackends; }
inline std::vector<VideoBackendInfo> getAvailableBackends_CaptureByIndex() const
{
std::vector<VideoBackendInfo> result;
@ -305,6 +309,58 @@ std::vector<VideoBackendInfo> getAvailableBackends_Writer()
return result;
}
cv::String getBackendName(VideoCaptureAPIs api)
{
if (api == CAP_ANY)
return "CAP_ANY"; // special case, not a part of backends list
const int N = sizeof(builtin_backends)/sizeof(builtin_backends[0]);
for (size_t i = 0; i < N; i++)
{
const VideoBackendInfo& backend = builtin_backends[i];
if (backend.id == api)
return backend.name;
}
return cv::format("UnknownVideoAPI(%d)", (int)api);
}
std::vector<VideoCaptureAPIs> getBackends()
{
std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getEnabledBackends();
std::vector<VideoCaptureAPIs> result;
for (size_t i = 0; i < backends.size(); i++)
result.push_back((VideoCaptureAPIs)backends[i].id);
return result;
}
std::vector<VideoCaptureAPIs> getCameraBackends()
{
const std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getAvailableBackends_CaptureByIndex();
std::vector<VideoCaptureAPIs> result;
for (size_t i = 0; i < backends.size(); i++)
result.push_back((VideoCaptureAPIs)backends[i].id);
return result;
}
std::vector<VideoCaptureAPIs> getStreamBackends()
{
const std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getAvailableBackends_CaptureByFilename();
std::vector<VideoCaptureAPIs> result;
for (size_t i = 0; i < backends.size(); i++)
result.push_back((VideoCaptureAPIs)backends[i].id);
return result;
}
std::vector<VideoCaptureAPIs> getWriterBackends()
{
const std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getAvailableBackends_Writer();
std::vector<VideoCaptureAPIs> result;
for (size_t i = 0; i < backends.size(); i++)
result.push_back((VideoCaptureAPIs)backends[i].id);
return result;
}
} // namespace registry
#define TRY_OPEN(backend_func) \

@ -6,10 +6,26 @@
#include "opencv2/ts.hpp"
#include "opencv2/videoio.hpp"
#include "opencv2/videoio/registry.hpp"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/private.hpp"
namespace cv {
inline std::ostream &operator<<(std::ostream &out, const VideoCaptureAPIs& api)
{
out << cv::videoio_registry::getBackendName(api); return out;
}
static inline void PrintTo(const cv::VideoCaptureAPIs& api, std::ostream* os)
{
*os << cv::videoio_registry::getBackendName(api);
}
} // namespace
inline std::string fourccToString(int fourcc)
{
return cv::format("%c%c%c%c", fourcc & 255, (fourcc >> 8) & 255, (fourcc >> 16) & 255, (fourcc >> 24) & 255);
@ -55,4 +71,15 @@ public:
}
};
static inline bool isBackendAvailable(cv::VideoCaptureAPIs api, const std::vector<cv::VideoCaptureAPIs>& api_list)
{
for (size_t i = 0; i < api_list.size(); i++)
{
if (api_list[i] == api)
return true;
}
return false;
}
#endif

@ -46,62 +46,12 @@
namespace opencv_test
{
struct VideoCaptureAPI
{
VideoCaptureAPIs api;
inline const char * toString() const
{
switch (api)
{
case CAP_ANY: return "CAP_ANY";
#ifdef __linux__
case CAP_V4L2: return "CAP_V4L/CAP_V4L2";
#else
case CAP_VFW: return "CAP_VFW";
#endif
case CAP_FIREWIRE: return "CAP_FIREWIRE";
case CAP_QT: return "CAP_QT";
case CAP_UNICAP: return "CAP_UNICAP";
case CAP_DSHOW: return "CAP_DSHOW";
case CAP_PVAPI: return "CAP_PVAPI";
case CAP_OPENNI: return "CAP_OPENNI";
case CAP_OPENNI_ASUS: return "CAP_OPENNI_ASUS";
case CAP_ANDROID: return "CAP_ANDROID";
case CAP_XIAPI: return "CAP_XIAPI";
case CAP_AVFOUNDATION: return "CAP_AVFOUNDATION";
case CAP_GIGANETIX: return "CAP_GIGANETIX";
case CAP_MSMF: return "CAP_MSMF";
case CAP_WINRT: return "CAP_WINRT";
case CAP_INTELPERC: return "CAP_INTELPERC";
case CAP_OPENNI2: return "CAP_OPENNI2";
case CAP_OPENNI2_ASUS: return "CAP_OPENNI2_ASUS";
case CAP_GPHOTO2: return "CAP_GPHOTO2";
case CAP_GSTREAMER: return "CAP_GSTREAMER";
case CAP_FFMPEG: return "CAP_FFMPEG";
case CAP_IMAGES: return "CAP_IMAGES";
case CAP_ARAVIS: return "CAP_ARAVIS";
case CAP_OPENCV_MJPEG: return "CAP_OPENCV_MJPEG";
case CAP_INTEL_MFX: return "CAP_INTEL_MFX";
case CAP_XINE: return "CAP_XINE";
}
return "unknown";
}
VideoCaptureAPI(int api_ = CAP_ANY) : api((VideoCaptureAPIs)api_) {}
operator int() { return api; }
};
inline std::ostream &operator<<(std::ostream &out, const VideoCaptureAPI & api)
{
out << api.toString(); return out;
}
class Videoio_Test_Base
{
protected:
string ext;
string video_file;
VideoCaptureAPI apiPref;
VideoCaptureAPIs apiPref;
protected:
Videoio_Test_Base() {}
virtual ~Videoio_Test_Base() {}
@ -131,6 +81,8 @@ protected:
public:
void doTest()
{
if (!isBackendAvailable(apiPref, cv::videoio_registry::getStreamBackends()))
throw SkipTestException(cv::String("Backend is not available/disabled: ") + cv::videoio_registry::getBackendName(apiPref));
VideoCapture cap;
ASSERT_NO_THROW(cap.open(video_file, apiPref));
if (!cap.isOpened())
@ -200,7 +152,7 @@ public:
};
//==================================================================================================
typedef tuple<string, VideoCaptureAPI> Backend_Type_Params;
typedef tuple<string, VideoCaptureAPIs> Backend_Type_Params;
class Videoio_Bunny : public Videoio_Test_Base, public testing::TestWithParam<Backend_Type_Params>
{
@ -214,6 +166,8 @@ public:
}
void doFrameCountTest()
{
if (!isBackendAvailable(apiPref, cv::videoio_registry::getStreamBackends()))
throw SkipTestException(cv::String("Backend is not available/disabled: ") + cv::videoio_registry::getBackendName(apiPref));
VideoCapture cap;
EXPECT_NO_THROW(cap.open(video_file, apiPref));
if (!cap.isOpened())
@ -274,7 +228,7 @@ struct Ext_Fourcc_PSNR
string ext;
string fourcc;
float PSNR;
VideoCaptureAPI api;
VideoCaptureAPIs api;
};
typedef tuple<Size, Ext_Fourcc_PSNR> Size_Ext_Fourcc_PSNR;
@ -348,7 +302,7 @@ public:
//==================================================================================================
static VideoCaptureAPI backend_params[] = {
static const VideoCaptureAPIs backend_params[] = {
#ifdef HAVE_QUICKTIME
CAP_QT,
#endif
@ -383,7 +337,7 @@ static VideoCaptureAPI backend_params[] = {
// CAP_INTEL_MFX
};
static string bunny_params[] = {
static const string bunny_params[] = {
#ifdef HAVE_VIDEO_INPUT
string("wmv"),
string("mov"),

@ -7,6 +7,9 @@
#include <vector>
#include <map>
#include <iostream>
#include <iomanip>
#include <limits>
#include <stdint.h>
#ifdef HAVE_OPENGL
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN 1
@ -36,17 +39,17 @@ static void help()
cout << "\n This program demonstrates how to use MSER to detect extremal regions \n"
"Usage: \n"
" ./detect_mser <image1(without parameter a syntehtic image is used as default)>\n"
"Press esc key when image window is active to change descriptor parameter\n"
"Press esc key when image window is active to change descriptor parameter\n"
"Press 2, 8, 4, 6, +,- or 5 keys in openGL windows to change view or use mouse\n";
}
struct MSERParams
{
MSERParams(int _delta = 5, int _min_area = 60, int _max_area = 14400,
double _max_variation = 0.25, double _min_diversity = .2,
int _max_evolution = 200, double _area_threshold = 1.01,
double _min_margin = 0.003, int _edge_blur_size = 5)
{
double _max_variation = 0.25, double _min_diversity = .2,
int _max_evolution = 200, double _area_threshold = 1.01,
double _min_margin = 0.003, int _edge_blur_size = 5)
{
delta = _delta;
minArea = _min_area;
maxArea = _max_area;
@ -57,7 +60,7 @@ struct MSERParams
minMargin = _min_margin;
edgeBlurSize = _edge_blur_size;
pass2Only = false;
}
}
int delta;
int minArea;
@ -72,30 +75,20 @@ struct MSERParams
int edgeBlurSize;
};
static String Legende(MSERParams &pAct)
static String Legende(const MSERParams &pAct)
{
String s="";
String inf = static_cast<const ostringstream&>(ostringstream() << pAct.minArea).str();
String sup = static_cast<const ostringstream&>(ostringstream() << pAct.maxArea).str();
s = " Area[" + inf + "," + sup + "]";
inf = static_cast<const ostringstream&>(ostringstream() << pAct.delta).str();
s += " del. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << pAct.maxVariation).str();
s += " var. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.minDiversity).str();
s += " div. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.pass2Only).str();
s += " pas. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.maxEvolution).str();
s += "RGb-> evo. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.areaThreshold).str();
s += " are. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.minMargin).str();
s += " mar. [" + inf + "]";
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.edgeBlurSize).str();
s += " siz. [" + inf + "]";
return s;
ostringstream ss;
ss << "Area[" << pAct.minArea << "," << pAct.maxArea << "] ";
ss << "del. [" << pAct.delta << "] ";
ss << "var. [" << pAct.maxVariation << "] ";
ss << "div. [" << (int)pAct.minDiversity << "] ";
ss << "pas. [" << (int)pAct.pass2Only << "] ";
ss << "RGb->evo. [" << pAct.maxEvolution << "] ";
ss << "are. [" << (int)pAct.areaThreshold << "] ";
ss << "mar. [" << (int)pAct.minMargin << "] ";
ss << "siz. [" << pAct.edgeBlurSize << "]";
return ss.str();
}
@ -109,18 +102,28 @@ bool keyPressed=false;
Vec4f rotAxis(1,0,1,0);
Vec3f zoom(1,0,0);
float obsX = (float)0, obsY = (float)0, obsZ = (float)-10, tx = (float)0, ty = (float)0;
float thetaObs = (float)-1.570, phiObs = (float)1.570, rObs = (float)10;
int prevX=-1,prevY=-1,prevTheta=-1000,prevPhi=-1000;
float obsX = 0.f;
float obsY = 0.f;
float obsZ = -10.f;
float tx = 0.f;
float ty = 0.f;
float thetaObs = -1.570f;
float phiObs = 1.570f;
float rObs = 10.f;
int prevX = -1;
int prevY = -1;
int prevTheta = -1000;
int prevPhi = -1000;
#ifdef HAVE_OPENGL
struct DrawData
{
{
ogl::Arrays arr;
ogl::Texture2D tex;
ogl::Buffer indices;
};
};
static void draw(void* userdata)
@ -167,19 +170,19 @@ static void onMouse(int event, int x, int y, int flags, void*)
{
if (x - prevTheta<0)
{
thetaObs +=(float)0.02;
thetaObs += 0.02f;
}
else if (x - prevTheta>0)
{
thetaObs -= (float)0.02;
thetaObs -= 0.02f;
}
if (y - prevPhi<0)
{
phiObs -= (float)0.02;
phiObs -= 0.02f;
}
else if (y - prevPhi>0)
{
phiObs += (float)0.02;
phiObs += 0.02f;
}
prevTheta = x;
prevPhi = y;
@ -187,9 +190,9 @@ static void onMouse(int event, int x, int y, int flags, void*)
if (event==EVENT_MOUSEWHEEL)
{
if (getMouseWheelDelta(flags)>0)
rObs += (float)0.1;
rObs += 0.1f;
else
rObs -= (float)0.1;
rObs -= 0.1f;
}
float pi = static_cast<float>(CV_PI);
if (thetaObs>pi)
@ -202,11 +205,11 @@ static void onMouse(int event, int x, int y, int flags, void*)
}
if (phiObs>pi / 2)
{
phiObs = pi / 2 - (float)0.0001;
phiObs = pi / 2 - 0.0001f;
}
if (phiObs<-pi / 2)
{
phiObs = -pi / 2 + (float)0.00001;
phiObs = -pi / 2 + 0.00001f;
}
if (rObs<0)
{
@ -224,36 +227,37 @@ static void DrawOpenGLMSER(Mat img, Mat result)
cvtColor(img, imgGray, COLOR_BGR2GRAY);
else
imgGray = img;
namedWindow("OpenGL", WINDOW_OPENGL);
setMouseCallback("OpenGL", onMouse, NULL);
Mat_<Vec3f> vertex(1, img.cols*img.rows);
Mat_<Vec2f> texCoords(1, img.cols*img.rows);
for (int i = 0, nbPix = 0; i<img.rows; i++)
{
{
for (int j = 0; j<img.cols; j++, nbPix++)
{
{
float x = (j) / (float)img.cols;
float y = (i) / (float)img.rows;
vertex.at< Vec3f >(0, nbPix) = Vec3f(float(2 * (x - 0.5)), float(2 * (0.5 - y)), float(imgGray.at<uchar>(i, j) / 512.0));
texCoords.at< Vec2f>(0, nbPix) = Vec2f(x, y);
}
}
}
Mat_<int> indices(1, (img.rows - 1)*(6 * img.cols));
for (int i = 1, nbPix = 0; i<img.rows; i++)
{
{
for (int j = 1; j<img.cols; j++)
{
{
int c = i*img.cols + j;
indices.at<int>(0, nbPix++) = c ;
indices.at<int>(0, nbPix++) = c;
indices.at<int>(0, nbPix++) = c - 1;
indices.at<int>(0, nbPix++) = c- img.cols - 1;
indices.at<int>(0, nbPix++) = c- img.cols - 1;
indices.at<int>(0, nbPix++) = c - img.cols - 1;
indices.at<int>(0, nbPix++) = c - img.cols - 1;
indices.at<int>(0, nbPix++) = c - img.cols;
indices.at<int>(0, nbPix++) = c ;
}
indices.at<int>(0, nbPix++) = c;
}
}
DrawData *data = new DrawData;
@ -279,7 +283,7 @@ static void DrawOpenGLMSER(Mat img, Mat result)
setOpenGlDrawCallback("OpenGL", draw, data);
for (;;)
{
{
updateWindow("OpenGL");
char key = (char)waitKey(40);
if (key == 27)
@ -292,27 +296,28 @@ static void DrawOpenGLMSER(Mat img, Mat result)
case '5':
obsX = 0, obsY = 0, obsZ = -10;
thetaObs = -pi/2, phiObs = pi/2, rObs = 10;
tx=0;ty=0;
tx=0; ty=0;
break;
case '4':
thetaObs += (float)0.1;
thetaObs += 0.1f;
break;
case '6':
thetaObs -= (float)0.1;
thetaObs -= 0.1f;
break;
case '2':
phiObs -= (float).1;
phiObs -= 0.1f;
break;
case '8':
phiObs += (float).1;
phiObs += 0.1f;
break;
case '+':
rObs -= (float).1;
rObs -= 0.1f;
break;
case '-':
rObs += (float).1;
rObs += 0.1f;
break;
}
if (thetaObs>pi)
{
thetaObs = -2 * pi + thetaObs;
@ -320,9 +325,9 @@ static void DrawOpenGLMSER(Mat img, Mat result)
if (thetaObs<-pi)
thetaObs = 2 * pi + thetaObs;
if (phiObs>pi / 2)
phiObs = pi / 2 - (float)0.0001;
phiObs = pi / 2 - 0.0001f;
if (phiObs<-pi / 2)
phiObs = -pi / 2 + (float)0.00001;
phiObs = -pi / 2 + 0.00001f;
if (rObs<0)
rObs = 0;
obsX = rObs*cos(thetaObs)*cos(phiObs);
@ -334,67 +339,59 @@ static void DrawOpenGLMSER(Mat img, Mat result)
}
#endif
// Add nested rectangles of different widths and colors to an image
static void addNestedRectangles(Mat &img, Point p0, int* width, int *color, int n) {
for (int i = 0; i<n; i++)
{
rectangle(img, Rect(p0, Size(width[i], width[i])), Scalar(color[i]), 1);
p0 += Point((width[i] - width[i + 1]) / 2, (width[i] - width[i + 1]) / 2);
floodFill(img, p0, Scalar(color[i]));
}
}
// Add nested circles of different widths and colors to an image
static void addNestedCircles(Mat &img, Point p0, int *width, int *color, int n) {
for (int i = 0; i<n; i++)
{
circle(img, p0, width[i] / 2, Scalar(color[i]), 1);
floodFill(img, p0, Scalar(color[i]));
}
}
static Mat MakeSyntheticImage()
{
const int fond = 0;
Mat img(800, 800, CV_8UC1);
map<int, char> val;
int fond = 0;
img = Scalar(fond);
val[fond] = 1;
int width1[] = { 390, 380, 300, 290, 280, 270, 260, 250, 210, 190, 150, 100, 80, 70 };
int color1[] = { 80, 180, 160, 140, 120, 100, 90, 110, 170, 150, 140, 100, 220 };
Point p0(10, 10);
int *width, *color;
width = width1;
color = color1;
for (int i = 0; i<13; i++)
{
rectangle(img, Rect(p0, Size(width[i], width[i])), Scalar(color[i]), 1);
p0 += Point((width[i] - width[i + 1]) / 2, (width[i] - width[i + 1]) / 2);
floodFill(img, p0, Scalar(color[i]));
int width[] = { 390, 380, 300, 290, 280, 270, 260, 250, 210, 190, 150, 100, 80, 70 };
}
int color1[] = { 80, 180, 160, 140, 120, 100, 90, 110, 170, 150, 140, 100, 220 };
int color2[] = { 81, 181, 161, 141, 121, 101, 91, 111, 171, 151, 141, 101, 221 };
color = color2;
p0 = Point(200, 600);
for (int i = 0; i<13; i++)
{
circle(img, p0, width[i] / 2, Scalar(color[i]), 1);
floodFill(img, p0, Scalar(color[i]));
int color3[] = { 175, 75, 95, 115, 135, 155, 165, 145, 85, 105, 115, 155, 35 };
int color4[] = { 173, 73, 93, 113, 133, 153, 163, 143, 83, 103, 113, 153, 33 };
}
int color3[] = { 175,75,95,115,135,155,165,145,85,105,115,156 };
color = color3;
p0 = Point(410, 10);
for (int i = 0; i<13; i++)
{
rectangle(img, Rect(p0, Size(width[i], width[i])), Scalar(color[i]), 1);
p0 += Point((width[i] - width[i + 1]) / 2, (width[i] - width[i + 1]) / 2);
floodFill(img, p0, Scalar(color[i]));
addNestedRectangles(img, Point(10, 10), width, color1, 13);
addNestedCircles(img, Point(200, 600), width, color2, 13);
}
int color4[] = { 173,73,93,113,133,153,163,143,83,103,114,154 };
color = color4;
addNestedRectangles(img, Point(410, 10), width, color3, 13);
addNestedCircles(img, Point(600, 600), width, color4, 13);
p0 = Point(600, 600);
for (int i = 0; i<13; i++)
{
circle(img, p0, width[i] / 2, Scalar(color[i]), 1);
floodFill(img, p0, Scalar(color[i]));
}
int histSize = 256;
float range[] = { 0, 256 };
const float* histRange[] = { range };
Mat hist;
// we compute the histogram
calcHist(&img, 1, 0, Mat(), hist, 1, &histSize, histRange, true, false);
cout << "****************Maximal region************************\n";
for (int i = 0; i < hist.rows ; i++)
for (int i = 0; i < hist.rows; i++)
{
if (hist.at<float>(i, 0)!=0)
{
cout << "h" << i << "=\t" << hist.at<float>(i, 0) << "\n";
cout << "h" << setw(3) << left << i << "\t=\t" << hist.at<float>(i, 0) << "\n";
}
}
@ -403,68 +400,60 @@ static Mat MakeSyntheticImage()
int main(int argc, char *argv[])
{
vector<String> fileName;
Mat imgOrig,img;
Size blurSize(5,5);
Mat imgOrig, img;
Size blurSize(5, 5);
cv::CommandLineParser parser(argc, argv, "{ help h | | }{ @input | | }");
if (parser.has("help"))
{
help();
return 0;
}
string input = parser.get<string>("@input");
if (!input.empty())
{
fileName.push_back(input);
imgOrig = imread(fileName[0], IMREAD_GRAYSCALE);
imgOrig = imread(input, IMREAD_GRAYSCALE);
blur(imgOrig, img, blurSize);
}
else
{
fileName.push_back("SyntheticImage.bmp");
imgOrig = MakeSyntheticImage();
img=imgOrig;
img = imgOrig;
}
MSERParams pDefaultMSER;
// Descriptor array MSER
vector<String> typeDesc;
// Param array for MSER
vector<MSERParams> pMSER;
vector<MSERParams>::iterator itMSER;
// Color palette
vector<Vec3b> palette;
for (int i = 0; i<65536; i++)
vector<Vec3b> palette;
for (int i = 0; i<=numeric_limits<uint16_t>::max(); i++)
palette.push_back(Vec3b((uchar)rand(), (uchar)rand(), (uchar)rand()));
help();
MSERParams params;
params.delta = 10;
params.minArea = 100;
params.maxArea = 5000;
params.maxVariation = 2;
params.minDiversity = 0;
params.pass2Only = true;
typeDesc.push_back("MSER");
pMSER.push_back(pDefaultMSER);
pMSER.back().delta = 10;
pMSER.back().minArea = 100;
pMSER.back().maxArea = 5000;
pMSER.back().maxVariation = 2;
pMSER.back().minDiversity = 0;
pMSER.back().pass2Only = true;
pMSER.push_back(params);
params.pass2Only = false;
typeDesc.push_back("MSER");
pMSER.push_back(pDefaultMSER);
pMSER.back().delta = 10;
pMSER.back().minArea = 100;
pMSER.back().maxArea = 5000;
pMSER.back().maxVariation = 2;
pMSER.back().minDiversity = 0;
pMSER.back().pass2Only = false;
pMSER.push_back(params);
params.delta = 100;
typeDesc.push_back("MSER");
pMSER.push_back(pDefaultMSER);
pMSER.back().delta = 100;
pMSER.back().minArea = 100;
pMSER.back().maxArea = 5000;
pMSER.back().maxVariation = 2;
pMSER.back().minDiversity = 0;
pMSER.back().pass2Only = false;
itMSER = pMSER.begin();
vector<double> desMethCmp;
pMSER.push_back(params);
vector<MSERParams>::iterator itMSER = pMSER.begin();
Ptr<Feature2D> b;
String label;
// Descriptor loop
@ -473,14 +462,14 @@ int main(int argc, char *argv[])
for (itDesc = typeDesc.begin(); itDesc != typeDesc.end(); ++itDesc)
{
vector<KeyPoint> keyImg1;
if (*itDesc == "MSER"){
if (*itDesc == "MSER")
{
if (img.type() == CV_8UC3)
{
b = MSER::create(itMSER->delta, itMSER->minArea, itMSER->maxArea, itMSER->maxVariation, itMSER->minDiversity, itMSER->maxEvolution,
itMSER->areaThreshold, itMSER->minMargin, itMSER->edgeBlurSize);
label = Legende(*itMSER);
++itMSER;
}
else
{
@ -490,6 +479,7 @@ int main(int argc, char *argv[])
++itMSER;
}
}
if (img.type()==CV_8UC3)
{
img.copyTo(result);
@ -505,36 +495,37 @@ int main(int argc, char *argv[])
try
{
// We can detect regions using detectRegions method
vector<KeyPoint> keyImg;
vector<Rect> zone;
vector<vector <Point> > region;
Mat desc;
vector<KeyPoint> keyImg;
vector<Rect> zone;
vector<vector <Point> > region;
Mat desc;
if (b.dynamicCast<MSER>() != NULL)
{
Ptr<MSER> sbd = b.dynamicCast<MSER>();
sbd->detectRegions(img, region, zone);
int i = 0;
//result = Scalar(0, 0, 0);
int nbPixelInMSER=0;
for (vector<vector <Point> >::iterator itr = region.begin(); itr != region.end(); ++itr, ++i)
for (vector<vector <Point> >::iterator itr = region.begin(); itr != region.end(); ++itr)
{
for (vector <Point>::iterator itp = region[i].begin(); itp != region[i].end(); ++itp)
for (vector <Point>::iterator itp = itr->begin(); itp != itr->end(); ++itp)
{
// all pixels belonging to region become blue
result.at<Vec3b>(itp->y, itp->x) = Vec3b(128, 0, 0);
nbPixelInMSER++;
}
}
cout << "Number of MSER region " << region.size()<<" Number of pixels in all MSER region : "<<nbPixelInMSER<<"\n";
cout << "Number of MSER region: " << region.size() << "; Number of pixels in all MSER region: " << nbPixelInMSER << "\n";
}
namedWindow(*itDesc + label, WINDOW_AUTOSIZE);
imshow(*itDesc + label, result);
const string winName = *itDesc + label;
namedWindow(winName, WINDOW_AUTOSIZE);
imshow(winName, result);
imshow("Original", img);
}
catch (Exception& e)
{
cout << "Feature : " << *itDesc << "\n";
cout << "Feature: " << *itDesc << "\n";
cout << e.msg << endl;
}
#ifdef HAVE_OPENGL

@ -208,12 +208,18 @@ for label in ['ClassPredictor', 'BoxEncodingPredictor']:
graph_def.node.extend([flatten])
addConcatNode('%s/concat' % label, concatInputs, 'concat/axis_flatten')
idx = 0
for node in graph_def.node:
if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx):
text_format.Merge('b: true', node.attr["loc_pred_transposed"])
idx += 1
assert(idx == args.num_layers)
# Add layers that generate anchors (bounding boxes proposals).
scales = [args.min_scale + (args.max_scale - args.min_scale) * i / (args.num_layers - 1)
for i in range(args.num_layers)] + [1.0]
priorBoxes = []
addConstNode('reshape_prior_boxes_to_4d', [1, 2, -1, 1])
for i in range(args.num_layers):
priorBox = NodeDef()
priorBox.name = 'PriorBox_%d' % i
@ -240,18 +246,9 @@ for i in range(args.num_layers):
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
graph_def.node.extend([priorBox])
priorBoxes.append(priorBox.name)
# Reshape from 1x2xN to 1x2xNx1
reshape = NodeDef()
reshape.name = priorBox.name + '/4d'
reshape.op = 'Reshape'
reshape.input.append(priorBox.name)
reshape.input.append('reshape_prior_boxes_to_4d')
graph_def.node.extend([reshape])
priorBoxes.append(reshape.name)
addConcatNode('PriorBox/concat', priorBoxes, 'PriorBox/concat/axis')
addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
# Sigmoid for classes predictions and DetectionOutput layer
sigmoid = NodeDef()
@ -276,7 +273,6 @@ text_format.Merge('i: 100', detectionOut.attr['top_k'])
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
text_format.Merge('i: 100', detectionOut.attr['keep_top_k'])
text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])
text_format.Merge('b: true', detectionOut.attr['loc_pred_transposed'])
graph_def.node.extend([detectionOut])

Loading…
Cancel
Save