Merge pull request #22947 from chacha21:hasNonZero

Added cv::hasNonZero() #22947 

`cv::hasNonZero()` is semantically equivalent to (`cv::countNonZero()>0`) but stops parsing the image when a non-zero value is found, for a performance gain

- [X] I agree to contribute to the project under Apache 2 License.
- [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [X] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake

This pull request might be refused, but I submit it to know if further work is needed or if I just stop working on it.
The idea is only a performance gain vs `countNonZero()>0` at the cost of more code.

Reasons why it might be refused :

- this is just more code
- the execution time is "unfair"/"unpredictable" since it depends on the position of the first non-zero value
- the user must be aware that default search is from first row/col to last row/col and has no way to customize that, even if his use case lets him know where a non zero could be found
- the PR in its current state is using, for the ocl implementation, a mere `countNonZero()>0` ; there is not much sense in trying to break early the ocl kernel call when non-zero is encountered. So the ocl implementation does not bring any improvement.
- there is no IPP function that can help (`countNonZero()` is based in `ippCountInRange`)
- the PR in its current state might be slower than a call to `countNonZero()>0` in some cases (see "challenges" below)

Reasons why it might be accepted :

- the performance gain is huge on average, if we consider that "on average" means "non zero in the middle of the image"
- the "missing" IPP implementation is replaced by an "Open-CV universal intrinsics" implementation
- the PR in its current state is almost always faster than a call to `countNonZero()>0`, is only slightly slower in the worst cases, and not even for all matrices

The worst case is either an all-zero matrix, or a non-zero at the very last position.  In such a case, the `hasNonZero()` implementation will parse the whole matrix like `countNonZero()` would do. But we expect the performance to be the same in this case. And `ippCountInRange` is hard to beat !
There is also the case of very small matrices (<=32x32...) in 8b, where the SIMD can be hard to feed.

For all cases but the worse, my custom `hasNonZero()` performs better than `ippCountInRange()`
For the worst case, my custom `hasNonZero()` performs better than `ippCountInRange()` *except for large matrices of type CV_32S or CV_64F* (but surprisingly, not CV_32F).
The difference is small, but it exists (and I don't understand why).
For very small CV_8U matrices `ippCountInRange()` seems unbeatable.

Here is the code that I use to check timings


  //test cv::hasNonZero() vs (cv::countNonZero()>0) for different matrices sizes, types, strides...
    const std::vector<cv::Size> sizes = {{32, 32}, {64, 64}, {128, 128}, {320, 240}, {512, 512}, {640, 480}, {1024, 768}, {2048, 2048}, {1031, 1000}};
    const std::vector<int> types = {CV_8U, CV_16U, CV_32S, CV_32F, CV_64F};
    const size_t iterations = 1000;
    for(const cv::Size& size : sizes)
      for(const int type : types)
        for(int c = 0 ; c<2 ; ++c)
          const bool continuous = !c;
          for(int i = 0 ; i<4 ; ++i)
            cv::Mat m = continuous ? cv::Mat::zeros(size, type) : cv::Mat(cv::Mat::zeros(cv::Size(2*size.width, size.height), type), cv::Rect(cv::Point(0, 0), size));
            const bool nz = (i <= 2);
            const unsigned int nzOffsetRange = 10;
            const unsigned int nzOffset = cv::randu<unsigned int>()%nzOffsetRange;
            const cv::Point pos = 
              (i == 0) ? cv::Point(nzOffset, 0) :
              (i == 1) ? cv::Point(size.width/2-nzOffsetRange/2+nzOffset, size.height/2) :
              (i == 2) ? cv::Point(size.width-1-nzOffset, size.height-1) :
              cv::Point(0, 0);
            std::cout << "============================================================" << std::endl;
            std::cout << "size:" << size << "  type:" << type << "  continuous = " << (continuous ? "true" : "false") << "  iterations:" << iterations << "  nz=" << (nz ? "true" : "false");
            std::cout << "  pos=" << ((i == 0) ? "begin" : (i == 1) ? "middle" : (i == 2) ? "end" : "none");
            std::cout << std::endl;
            cv::Mat mask = cv::Mat::zeros(size, CV_8UC1);
  <unsigned char>(pos) = 0xFF;
            m.setTo(cv::Scalar::all(nz ? 1 : 0), mask);
            std::vector<bool> results;
            std::vector<double> timings;

              bool res = false;
              auto ref = cv::getTickCount();
              for(size_t k = 0 ; k<iterations ; ++k)
                res = cv::hasNonZero(m);
              auto now = cv::getTickCount();
              const bool error = (res != nz);
              if (error)
              bool res = false;
              auto ref = cv::getTickCount();
              for(size_t k = 0 ; k<iterations ; ++k)
                res = (cv::countNonZero(m)>0);
              auto now = cv::getTickCount();
              const bool error = (res != nz);
              if (error)

            const size_t bestTimingIndex = (std::min_element(timings.begin(), timings.end())-timings.begin());
            if ((bestTimingIndex != 0) || (std::find_if_not(results.begin(), results.end(), [&](bool r) {return (r == nz);}) != results.end()))
              std::cout << "cv::hasNonZero\t\t=>" << results[0] << ((results[0] != nz) ? "  ERROR" : "") << "   perf:" << timings[0] << "ms => " << (iterations/timings[0]*1000) << " im/s" << ((bestTimingIndex == 0) ? " * " : "") << std::endl;
              std::cout << "cv::countNonZero\t=>" << results[1] << ((results[1] != nz) ? "  ERROR" : "") << "   perf:" << timings[1] << "ms => " << (iterations/timings[1]*1000) << " im/s" << ((bestTimingIndex == 1) ? " * " : "") << std::endl;


Here is a report of this benchmark (it only reports timings when `cv::countNonZero()` is faster)
My CPU is an Intel Core I7 4790 @ 3.60Ghz


size:[32 x 32]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
cv::hasNonZero          =>1   perf:0.353764ms => 2.82674e+06 im/s
cv::countNonZero        =>1   perf:0.282044ms => 3.54555e+06 im/s *
size:[32 x 32]  type:0  continuous = false  iterations:1000  nz=true  pos=end
cv::hasNonZero          =>1   perf:0.610478ms => 1.63806e+06 im/s
cv::countNonZero        =>1   perf:0.283182ms => 3.5313e+06 im/s *
size:[32 x 32]  type:0  continuous = false  iterations:1000  nz=false  pos=none
cv::hasNonZero          =>0   perf:0.630115ms => 1.58701e+06 im/s
cv::countNonZero        =>0   perf:0.282044ms => 3.54555e+06 im/s *
size:[32 x 32]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:5  continuous = false  iterations:1000  nz=true  pos=end
cv::hasNonZero          =>1   perf:0.607347ms => 1.64651e+06 im/s
cv::countNonZero        =>1   perf:0.467037ms => 2.14116e+06 im/s *
size:[32 x 32]  type:5  continuous = false  iterations:1000  nz=false  pos=none
cv::hasNonZero          =>0   perf:0.618162ms => 1.6177e+06 im/s
cv::countNonZero        =>0   perf:0.468175ms => 2.13595e+06 im/s *
size:[32 x 32]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[32 x 32]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[32 x 32]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[32 x 32]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[32 x 32]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[64 x 64]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[64 x 64]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[64 x 64]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[64 x 64]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[128 x 128]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[128 x 128]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[128 x 128]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[128 x 128]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[320 x 240]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[320 x 240]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[320 x 240]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[320 x 240]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[512 x 512]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[512 x 512]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[512 x 512]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[512 x 512]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[640 x 480]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[640 x 480]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[640 x 480]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[640 x 480]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[1024 x 768]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[1024 x 768]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[1024 x 768]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[1024 x 768]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:4  continuous = true  iterations:1000  nz=true  pos=end
cv::hasNonZero          =>1   perf:895.381ms => 1116.84 im/s
cv::countNonZero        =>1   perf:882.569ms => 1133.06 im/s *
size:[2048 x 2048]  type:4  continuous = true  iterations:1000  nz=false  pos=none
cv::hasNonZero          =>0   perf:899.53ms => 1111.69 im/s
cv::countNonZero        =>0   perf:870.894ms => 1148.24 im/s *
size:[2048 x 2048]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[2048 x 2048]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:6  continuous = true  iterations:1000  nz=true  pos=end
cv::hasNonZero          =>1   perf:2018.92ms => 495.313 im/s
cv::countNonZero        =>1   perf:1966.37ms => 508.552 im/s *
size:[2048 x 2048]  type:6  continuous = true  iterations:1000  nz=false  pos=none
cv::hasNonZero          =>0   perf:2005.87ms => 498.537 im/s
cv::countNonZero        =>0   perf:1992.78ms => 501.812 im/s *
size:[2048 x 2048]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[2048 x 2048]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[2048 x 2048]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[2048 x 2048]  type:6  continuous = false  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:0  continuous = true  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:0  continuous = true  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:0  continuous = true  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:0  continuous = true  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:0  continuous = false  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:0  continuous = false  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:0  continuous = false  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:0  continuous = false  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:2  continuous = true  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:2  continuous = true  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:2  continuous = true  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:2  continuous = true  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:2  continuous = false  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:2  continuous = false  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:2  continuous = false  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:2  continuous = false  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:4  continuous = true  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:4  continuous = true  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:4  continuous = true  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:4  continuous = true  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:4  continuous = false  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:4  continuous = false  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:4  continuous = false  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:4  continuous = false  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:5  continuous = true  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:5  continuous = true  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:5  continuous = true  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:5  continuous = true  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:5  continuous = false  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:5  continuous = false  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:5  continuous = false  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:5  continuous = false  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:6  continuous = true  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:6  continuous = true  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:6  continuous = true  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:6  continuous = true  iterations:1000  nz=false  pos=none
size:[1031 x 1000]  type:6  continuous = false  iterations:1000  nz=true  pos=begin
size:[1031 x 1000]  type:6  continuous = false  iterations:1000  nz=true  pos=middle
size:[1031 x 1000]  type:6  continuous = false  iterations:1000  nz=true  pos=end
size:[1031 x 1000]  type:6  continuous = false  iterations:1000  nz=false  pos=none

Pierre Chatelier 2 years ago committed by GitHub
parent eec8a20c33
commit 60b806f9b8
No known key found for this signature in database
  1. 1
  2. 8
  3. 24
  4. 16
  5. 107
  6. 327
  7. 201

@ -6,6 +6,7 @@ ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3)
ocv_add_dispatched_file(convert SSE2 AVX2 VSX3) ocv_add_dispatched_file(convert SSE2 AVX2 VSX3)
ocv_add_dispatched_file(convert_scale SSE2 AVX2) ocv_add_dispatched_file(convert_scale SSE2 AVX2)
ocv_add_dispatched_file(count_non_zero SSE2 AVX2) ocv_add_dispatched_file(count_non_zero SSE2 AVX2)
ocv_add_dispatched_file(has_non_zero SSE2 AVX2)
ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD) ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD)
ocv_add_dispatched_file(mean SSE2 AVX2) ocv_add_dispatched_file(mean SSE2 AVX2)
ocv_add_dispatched_file(merge SSE2 AVX2) ocv_add_dispatched_file(merge SSE2 AVX2)

@ -572,6 +572,14 @@ independently for each channel.
*/ */
CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src); CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src);
/** @brief Checks for the presence of at least one non-zero array element.
The function returns whether there are non-zero elements in src
@param src single-channel array.
@sa mean, meanStdDev, norm, minMaxLoc, calcCovarMatrix
CV_EXPORTS_W bool hasNonZero( InputArray src );
/** @brief Counts non-zero array elements. /** @brief Counts non-zero array elements.
The function returns the number of non-zero elements in src : The function returns the number of non-zero elements in src :

@ -460,6 +460,30 @@ OCL_PERF_TEST_P(CountNonZeroFixture, CountNonZero,
} }
///////////// countNonZero ////////////////////////
typedef Size_MatType HasNonZeroFixture;
OCL_PERF_TEST_P(HasNonZeroFixture, HasNonZero,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
/*bool result = false;*/
randu(src, 0, 10);;
OCL_TEST_CYCLE() /*result =*/ cv::hasNonZero(src);
///////////// Phase //////////////////////// ///////////// Phase ////////////////////////
typedef Size_MatType PhaseFixture; typedef Size_MatType PhaseFixture;

@ -101,4 +101,20 @@ PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPIC
} }
PERF_TEST_P(Size_MatType, hasNonZero, testing::Combine( testing::Values( TYPICAL_MAT_SIZES ), testing::Values( CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1 ) ))
Size sz = get<0>(GetParam());
int matType = get<1>(GetParam());
Mat src(sz, matType);
/*bool hnz = false;*/, WARMUP_RNG);
int runs = (sz.width <= 640) ? 8 : 1;
TEST_CYCLE_MULTIRUN(runs) /*hnz =*/ hasNonZero(src);
} // namespace } // namespace

@ -0,0 +1,107 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "stat.hpp"
#include "has_non_zero.simd.hpp"
#include "has_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace cv {
static HasNonZeroFunc getHasNonZeroTab(int depth)
CV_CPU_DISPATCH(getHasNonZeroTab, (depth),
static bool ocl_hasNonZero( InputArray _src, bool & res )
int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (depth == CV_64F && !doubleSupport)
return false;
int dbsize = ocl::Device::getDefault().maxComputeUnits();
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs)
wgs2_aligned <<= 1;
wgs2_aligned >>= 1;
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
ocl::typeToStr(depth), (int)wgs, kercn,
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
_src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
if (k.empty())
return false;
UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int),
dbsize, ocl::KernelArg::PtrWriteOnly(db));
size_t globalsize = dbsize * wgs;
if (, &globalsize, &wgs, true))
return res = (saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0])>0), true;
return false;
bool hasNonZero(InputArray _src)
int type = _src.type(), cn = CV_MAT_CN(type);
CV_Assert( cn == 1 );
bool res = false;
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_hasNonZero(_src, res),
Mat src = _src.getMat();
HasNonZeroFunc func = getHasNonZeroTab(src.depth());
CV_Assert( func != 0 );
if (src.dims == 2)//fast path to avoid creating planes of single rows
if (src.isContinuous())
res |= func(src.ptr<uchar>(0),;
for(int row = 0, rowsCount = src.rows ; !res && (row<rowsCount) ; ++row)
res |= func(src.ptr<uchar>(row), src.cols);
else//if (src.dims != 2)
const Mat* arrays[] = {&src, nullptr};
Mat planes[1];
NAryMatIterator itNAry(arrays, planes, 1);
for(size_t p = 0 ; !res && (p<itNAry.nplanes) ; ++p, ++itNAry)
const Mat& plane = itNAry.planes[0];
if (plane.isContinuous())
res |= func(plane.ptr<uchar>(0),;
for(int row = 0, rowsCount = plane.rows ; !res && (row<rowsCount) ; ++row)
res |= func(plane.ptr<uchar>(row), plane.cols);
return res;
} // namespace

@ -0,0 +1,327 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at
#include "precomp.hpp"
namespace cv {
typedef bool (*HasNonZeroFunc)(const uchar*, size_t);
HasNonZeroFunc getHasNonZeroTab(int depth);
template<typename T>
inline bool hasNonZero_(const T* src, size_t len )
bool res = false;
if (len > 0)
size_t i=0;
for(; !res && (i+4 <= len); i += 4 )
res |= ((src[i] | src[i+1] | src[i+2] | src[i+3]) != 0);
for( ; !res && (i < len); i++ )
res |= (src[i] != 0);
return res;
inline bool hasNonZero_(const float* src, size_t len )
bool res = false;
if (len > 0)
size_t i=0;
if (sizeof(float) == sizeof(unsigned int))
typedef unsigned int float_as_uint_t;
const float_as_uint_t* src_as_ui = reinterpret_cast<const float_as_uint_t*>(src);
for(; !res && (i+4 <= len); i += 4 )
const float_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]);
res |= ((gathered<<1) != 0);//remove what would be the sign bit
for( ; !res && (i < len); i++ )
res |= (src[i] != 0);
return res;
inline bool hasNonZero_(const double* src, size_t len )
bool res = false;
if (len > 0)
size_t i=0;
if (sizeof(double) == sizeof(uint64_t))
typedef uint64_t double_as_uint_t;
const double_as_uint_t* src_as_ui = reinterpret_cast<const double_as_uint_t*>(src);
for(; !res && (i+4 <= len); i += 4 )
const double_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]);
res |= ((gathered<<1) != 0);//remove what would be the sign bit
for( ; !res && (i < len); i++ )
res |= (src[i] != 0);
return res;
static bool hasNonZero8u( const uchar* src, size_t len )
bool res = false;
const uchar* srcEnd = src+len;
typedef v_uint8 v_type;
const v_type v_zero = vx_setzero_u8();
constexpr const int unrollCount = 2;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const uchar* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
res = v_check_any(((v0 | v1) != v_zero));
return res || hasNonZero_(src, srcEnd-src);
static bool hasNonZero16u( const ushort* src, size_t len )
bool res = false;
const ushort* srcEnd = src+len;
typedef v_uint16 v_type;
const v_type v_zero = vx_setzero_u16();
constexpr const int unrollCount = 4;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const ushort* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
v_type v2 = vx_load(src);
src += v_type::nlanes;
v_type v3 = vx_load(src);
src += v_type::nlanes;
v0 |= v1;
v2 |= v3;
res = v_check_any(((v0 | v2) != v_zero));
return res || hasNonZero_(src, srcEnd-src);
static bool hasNonZero32s( const int* src, size_t len )
bool res = false;
const int* srcEnd = src+len;
typedef v_int32 v_type;
const v_type v_zero = vx_setzero_s32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const int* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
v_type v2 = vx_load(src);
src += v_type::nlanes;
v_type v3 = vx_load(src);
src += v_type::nlanes;
v_type v4 = vx_load(src);
src += v_type::nlanes;
v_type v5 = vx_load(src);
src += v_type::nlanes;
v_type v6 = vx_load(src);
src += v_type::nlanes;
v_type v7 = vx_load(src);
src += v_type::nlanes;
v0 |= v1;
v2 |= v3;
v4 |= v5;
v6 |= v7;
v0 |= v2;
v4 |= v6;
res = v_check_any(((v0 | v4) != v_zero));
return res || hasNonZero_(src, srcEnd-src);
static bool hasNonZero32f( const float* src, size_t len )
bool res = false;
const float* srcEnd = src+len;
typedef v_float32 v_type;
const v_type v_zero = vx_setzero_f32();
constexpr const int unrollCount = 8;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const float* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
v_type v2 = vx_load(src);
src += v_type::nlanes;
v_type v3 = vx_load(src);
src += v_type::nlanes;
v_type v4 = vx_load(src);
src += v_type::nlanes;
v_type v5 = vx_load(src);
src += v_type::nlanes;
v_type v6 = vx_load(src);
src += v_type::nlanes;
v_type v7 = vx_load(src);
src += v_type::nlanes;
v0 |= v1;
v2 |= v3;
v4 |= v5;
v6 |= v7;
v0 |= v2;
v4 |= v6;
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v4) == v_zero));
return res || hasNonZero_(src, srcEnd-src);
static bool hasNonZero64f( const double* src, size_t len )
bool res = false;
const double* srcEnd = src+len;
#if CV_SIMD_64F
typedef v_float64 v_type;
const v_type v_zero = vx_setzero_f64();
constexpr const int unrollCount = 16;
int step = v_type::nlanes * unrollCount;
int len0 = len & -step;
const double* srcSimdEnd = src+len0;
int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
while(!res && countSIMD--)
v_type v0 = vx_load(src);
src += v_type::nlanes;
v_type v1 = vx_load(src);
src += v_type::nlanes;
v_type v2 = vx_load(src);
src += v_type::nlanes;
v_type v3 = vx_load(src);
src += v_type::nlanes;
v_type v4 = vx_load(src);
src += v_type::nlanes;
v_type v5 = vx_load(src);
src += v_type::nlanes;
v_type v6 = vx_load(src);
src += v_type::nlanes;
v_type v7 = vx_load(src);
src += v_type::nlanes;
v_type v8 = vx_load(src);
src += v_type::nlanes;
v_type v9 = vx_load(src);
src += v_type::nlanes;
v_type v10 = vx_load(src);
src += v_type::nlanes;
v_type v11 = vx_load(src);
src += v_type::nlanes;
v_type v12 = vx_load(src);
src += v_type::nlanes;
v_type v13 = vx_load(src);
src += v_type::nlanes;
v_type v14 = vx_load(src);
src += v_type::nlanes;
v_type v15 = vx_load(src);
src += v_type::nlanes;
v0 |= v1;
v2 |= v3;
v4 |= v5;
v6 |= v7;
v8 |= v9;
v10 |= v11;
v12 |= v13;
v14 |= v15;
v0 |= v2;
v4 |= v6;
v8 |= v10;
v12 |= v14;
v0 |= v4;
v8 |= v12;
//res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
res = !v_check_all(((v0 | v8) == v_zero));
return res || hasNonZero_(src, srcEnd-src);
HasNonZeroFunc getHasNonZeroTab(int depth)
static HasNonZeroFunc hasNonZeroTab[] =
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u),
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u),
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32s), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32f),
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero64f), 0
return hasNonZeroTab[depth];
} // namespace

@ -0,0 +1,201 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroAllZeros;
TEST_P(HasNonZeroAllZeros, hasNonZeroAllZeros)
const int type = std::get<0>(GetParam());
const Size size = std::get<1>(GetParam());
Mat m = Mat::zeros(size, type);
testing::Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1),
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113))
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroNegZeros;
TEST_P(HasNonZeroNegZeros, hasNonZeroNegZeros)
const int type = std::get<0>(GetParam());
const Size size = std::get<1>(GetParam());
Mat m = Mat(size, type);
testing::Values(CV_32FC1, CV_64FC1),
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113))
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroLimitValues;
TEST_P(HasNonZeroLimitValues, hasNonZeroLimitValues)
const int type = std::get<0>(GetParam());
const Size size = std::get<1>(GetParam());
Mat m = Mat(size, type);
m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits<double>::epsilon()) : Scalar::all(std::numeric_limits<float>::epsilon()));
m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits<double>::min()) : Scalar::all(std::numeric_limits<float>::min()));
m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits<double>::denorm_min()) : Scalar::all(std::numeric_limits<float>::denorm_min()));
INSTANTIATE_TEST_CASE_P(Core, HasNonZeroLimitValues,
testing::Values(CV_32FC1, CV_64FC1),
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113))
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroRandom;
TEST_P(HasNonZeroRandom, hasNonZeroRandom)
const int type = std::get<0>(GetParam());
const Size size = std::get<1>(GetParam());
RNG& rng = theRNG();
const size_t N = std::min(100, size.area());
for(size_t i = 0 ; i<N ; ++i)
const int nz_pos_x = rng.uniform(0, size.width);
const int nz_pos_y = rng.uniform(0, size.height);
Mat m = Mat::zeros(size, type);
Mat nzROI = Mat(m, Rect(nz_pos_x, nz_pos_y, 1, 1));
testing::Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1),
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113))
typedef testing::TestWithParam<tuple<int, int, bool> > HasNonZeroNd;
TEST_P(HasNonZeroNd, hasNonZeroNd)
const int type = get<0>(GetParam());
const int ndims = get<1>(GetParam());
const bool continuous = get<2>(GetParam());
RNG& rng = theRNG();
const size_t N = 10;
for(size_t i = 0 ; i<N ; ++i)
std::vector<size_t> steps(ndims);
std::vector<int> sizes(ndims);
size_t totalBytes = 1;
for(int dim = 0 ; dim<ndims ; ++dim)
const bool isFirstDim = (dim == 0);
const bool isLastDim = (dim+1 == ndims);
const int length = rng.uniform(1, 64);
steps[dim] = (isLastDim ? 1 : static_cast<size_t>(length))*CV_ELEM_SIZE(type);
sizes[dim] = (isFirstDim || continuous) ? length : rng.uniform(1, length);
totalBytes *= steps[dim]*static_cast<size_t>(sizes[dim]);
std::vector<unsigned char> buffer(totalBytes);
void* data =;
Mat m = Mat(ndims,, type, data,;
std::vector<Range> nzRange(ndims);
for(int dim = 0 ; dim<ndims ; ++dim)
const int pos = rng.uniform(0, sizes[dim]);
nzRange[dim] = Range(pos, pos+1);
Mat nzROI = Mat(m,;
const int nzCount = countNonZero(m);
EXPECT_EQ((nzCount>0), hasNonZero(m));
testing::Values(2, 3),
testing::Values(true, false)
}} // namespace