mirror of https://github.com/opencv/opencv.git
Merge pull request #22947 from chacha21:hasNonZero
Added cv::hasNonZero() #22947 `cv::hasNonZero()` is semantically equivalent to (`cv::countNonZero()>0`) but stops parsing the image when a non-zero value is found, for a performance gain - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake This pull request might be refused, but I submit it to know if further work is needed or if I just stop working on it. The idea is only a performance gain vs `countNonZero()>0` at the cost of more code. Reasons why it might be refused : - this is just more code - the execution time is "unfair"/"unpredictable" since it depends on the position of the first non-zero value - the user must be aware that default search is from first row/col to last row/col and has no way to customize that, even if his use case lets him know where a non zero could be found - the PR in its current state is using, for the ocl implementation, a mere `countNonZero()>0` ; there is not much sense in trying to break early the ocl kernel call when non-zero is encountered. So the ocl implementation does not bring any improvement. - there is no IPP function that can help (`countNonZero()` is based in `ippCountInRange`) - the PR in its current state might be slower than a call to `countNonZero()>0` in some cases (see "challenges" below) Reasons why it might be accepted : - the performance gain is huge on average, if we consider that "on average" means "non zero in the middle of the image" - the "missing" IPP implementation is replaced by an "Open-CV universal intrinsics" implementation - the PR in its current state is almost always faster than a call to `countNonZero()>0`, is only slightly slower in the worst cases, and not even for all matrices **Challenges** The worst case is either an all-zero matrix, or a non-zero at the very last position. In such a case, the `hasNonZero()` implementation will parse the whole matrix like `countNonZero()` would do. But we expect the performance to be the same in this case. And `ippCountInRange` is hard to beat ! There is also the case of very small matrices (<=32x32...) in 8b, where the SIMD can be hard to feed. For all cases but the worse, my custom `hasNonZero()` performs better than `ippCountInRange()` For the worst case, my custom `hasNonZero()` performs better than `ippCountInRange()` *except for large matrices of type CV_32S or CV_64F* (but surprisingly, not CV_32F). The difference is small, but it exists (and I don't understand why). For very small CV_8U matrices `ippCountInRange()` seems unbeatable. Here is the code that I use to check timings ``` //test cv::hasNonZero() vs (cv::countNonZero()>0) for different matrices sizes, types, strides... { cv::setRNGSeed(1234); const std::vector<cv::Size> sizes = {{32, 32}, {64, 64}, {128, 128}, {320, 240}, {512, 512}, {640, 480}, {1024, 768}, {2048, 2048}, {1031, 1000}}; const std::vector<int> types = {CV_8U, CV_16U, CV_32S, CV_32F, CV_64F}; const size_t iterations = 1000; for(const cv::Size& size : sizes) { for(const int type : types) { for(int c = 0 ; c<2 ; ++c) { const bool continuous = !c; for(int i = 0 ; i<4 ; ++i) { cv::Mat m = continuous ? cv::Mat::zeros(size, type) : cv::Mat(cv::Mat::zeros(cv::Size(2*size.width, size.height), type), cv::Rect(cv::Point(0, 0), size)); const bool nz = (i <= 2); const unsigned int nzOffsetRange = 10; const unsigned int nzOffset = cv::randu<unsigned int>()%nzOffsetRange; const cv::Point pos = (i == 0) ? cv::Point(nzOffset, 0) : (i == 1) ? cv::Point(size.width/2-nzOffsetRange/2+nzOffset, size.height/2) : (i == 2) ? cv::Point(size.width-1-nzOffset, size.height-1) : cv::Point(0, 0); std::cout << "============================================================" << std::endl; std::cout << "size:" << size << " type:" << type << " continuous = " << (continuous ? "true" : "false") << " iterations:" << iterations << " nz=" << (nz ? "true" : "false"); std::cout << " pos=" << ((i == 0) ? "begin" : (i == 1) ? "middle" : (i == 2) ? "end" : "none"); std::cout << std::endl; cv::Mat mask = cv::Mat::zeros(size, CV_8UC1); mask.at<unsigned char>(pos) = 0xFF; m.setTo(cv::Scalar::all(0)); m.setTo(cv::Scalar::all(nz ? 1 : 0), mask); std::vector<bool> results; std::vector<double> timings; { bool res = false; auto ref = cv::getTickCount(); for(size_t k = 0 ; k<iterations ; ++k) res = cv::hasNonZero(m); auto now = cv::getTickCount(); const bool error = (res != nz); if (error) printf("!!ERROR!!\r\n"); results.push_back(res); timings.push_back(1000.*(now-ref)/cv::getTickFrequency()); } { bool res = false; auto ref = cv::getTickCount(); for(size_t k = 0 ; k<iterations ; ++k) res = (cv::countNonZero(m)>0); auto now = cv::getTickCount(); const bool error = (res != nz); if (error) printf("!!ERROR!!\r\n"); results.push_back(res); timings.push_back(1000.*(now-ref)/cv::getTickFrequency()); } const size_t bestTimingIndex = (std::min_element(timings.begin(), timings.end())-timings.begin()); if ((bestTimingIndex != 0) || (std::find_if_not(results.begin(), results.end(), [&](bool r) {return (r == nz);}) != results.end())) { std::cout << "cv::hasNonZero\t\t=>" << results[0] << ((results[0] != nz) ? " ERROR" : "") << " perf:" << timings[0] << "ms => " << (iterations/timings[0]*1000) << " im/s" << ((bestTimingIndex == 0) ? " * " : "") << std::endl; std::cout << "cv::countNonZero\t=>" << results[1] << ((results[1] != nz) ? " ERROR" : "") << " perf:" << timings[1] << "ms => " << (iterations/timings[1]*1000) << " im/s" << ((bestTimingIndex == 1) ? " * " : "") << std::endl; } } } } } } ``` Here is a report of this benchmark (it only reports timings when `cv::countNonZero()` is faster) My CPU is an Intel Core I7 4790 @ 3.60Ghz ``` ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=true pos=middle cv::hasNonZero =>1 perf:0.353764ms => 2.82674e+06 im/s cv::countNonZero =>1 perf:0.282044ms => 3.54555e+06 im/s * ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:0.610478ms => 1.63806e+06 im/s cv::countNonZero =>1 perf:0.283182ms => 3.5313e+06 im/s * ============================================================ size:[32 x 32] type:0 continuous = false iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:0.630115ms => 1.58701e+06 im/s cv::countNonZero =>0 perf:0.282044ms => 3.54555e+06 im/s * ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:0.607347ms => 1.64651e+06 im/s cv::countNonZero =>1 perf:0.467037ms => 2.14116e+06 im/s * ============================================================ size:[32 x 32] type:5 continuous = false iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:0.618162ms => 1.6177e+06 im/s cv::countNonZero =>0 perf:0.468175ms => 2.13595e+06 im/s * ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[32 x 32] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[64 x 64] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[128 x 128] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[320 x 240] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[512 x 512] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[640 x 480] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1024 x 768] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:895.381ms => 1116.84 im/s cv::countNonZero =>1 perf:882.569ms => 1133.06 im/s * ============================================================ size:[2048 x 2048] type:4 continuous = true iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:899.53ms => 1111.69 im/s cv::countNonZero =>0 perf:870.894ms => 1148.24 im/s * ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=true pos=end cv::hasNonZero =>1 perf:2018.92ms => 495.313 im/s cv::countNonZero =>1 perf:1966.37ms => 508.552 im/s * ============================================================ size:[2048 x 2048] type:6 continuous = true iterations:1000 nz=false pos=none cv::hasNonZero =>0 perf:2005.87ms => 498.537 im/s cv::countNonZero =>0 perf:1992.78ms => 501.812 im/s * ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[2048 x 2048] type:6 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:0 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:0 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:2 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:2 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:4 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:4 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:5 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:5 continuous = false iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:6 continuous = true iterations:1000 nz=false pos=none ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=true pos=begin ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=true pos=middle ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=true pos=end ============================================================ size:[1031 x 1000] type:6 continuous = false iterations:1000 nz=false pos=none done ```pull/23774/head
parent
eec8a20c33
commit
60b806f9b8
7 changed files with 684 additions and 0 deletions
@ -0,0 +1,107 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
|
||||
#include "precomp.hpp" |
||||
#include "opencl_kernels_core.hpp" |
||||
#include "stat.hpp" |
||||
|
||||
#include "has_non_zero.simd.hpp" |
||||
#include "has_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content |
||||
|
||||
namespace cv { |
||||
|
||||
static HasNonZeroFunc getHasNonZeroTab(int depth) |
||||
{ |
||||
CV_INSTRUMENT_REGION(); |
||||
CV_CPU_DISPATCH(getHasNonZeroTab, (depth), |
||||
CV_CPU_DISPATCH_MODES_ALL); |
||||
} |
||||
|
||||
#ifdef HAVE_OPENCL |
||||
static bool ocl_hasNonZero( InputArray _src, bool & res ) |
||||
{ |
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src); |
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; |
||||
|
||||
if (depth == CV_64F && !doubleSupport) |
||||
return false; |
||||
|
||||
int dbsize = ocl::Device::getDefault().maxComputeUnits(); |
||||
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); |
||||
|
||||
int wgs2_aligned = 1; |
||||
while (wgs2_aligned < (int)wgs) |
||||
wgs2_aligned <<= 1; |
||||
wgs2_aligned >>= 1; |
||||
|
||||
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, |
||||
format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO" |
||||
" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s", |
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), |
||||
ocl::typeToStr(depth), (int)wgs, kercn, |
||||
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", |
||||
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "")); |
||||
if (k.empty()) |
||||
return false; |
||||
|
||||
UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1); |
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), |
||||
dbsize, ocl::KernelArg::PtrWriteOnly(db)); |
||||
|
||||
size_t globalsize = dbsize * wgs; |
||||
if (k.run(1, &globalsize, &wgs, true)) |
||||
return res = (saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0])>0), true; |
||||
return false; |
||||
} |
||||
#endif |
||||
|
||||
bool hasNonZero(InputArray _src) |
||||
{ |
||||
CV_INSTRUMENT_REGION(); |
||||
|
||||
int type = _src.type(), cn = CV_MAT_CN(type); |
||||
CV_Assert( cn == 1 ); |
||||
|
||||
bool res = false; |
||||
|
||||
#ifdef HAVE_OPENCL |
||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, |
||||
ocl_hasNonZero(_src, res), |
||||
res) |
||||
#endif |
||||
|
||||
Mat src = _src.getMat(); |
||||
|
||||
HasNonZeroFunc func = getHasNonZeroTab(src.depth()); |
||||
CV_Assert( func != 0 ); |
||||
|
||||
if (src.dims == 2)//fast path to avoid creating planes of single rows
|
||||
{ |
||||
if (src.isContinuous()) |
||||
res |= func(src.ptr<uchar>(0), src.total()); |
||||
else |
||||
for(int row = 0, rowsCount = src.rows ; !res && (row<rowsCount) ; ++row) |
||||
res |= func(src.ptr<uchar>(row), src.cols); |
||||
} |
||||
else//if (src.dims != 2)
|
||||
{ |
||||
const Mat* arrays[] = {&src, nullptr}; |
||||
Mat planes[1]; |
||||
NAryMatIterator itNAry(arrays, planes, 1); |
||||
for(size_t p = 0 ; !res && (p<itNAry.nplanes) ; ++p, ++itNAry) |
||||
{ |
||||
const Mat& plane = itNAry.planes[0]; |
||||
if (plane.isContinuous()) |
||||
res |= func(plane.ptr<uchar>(0), plane.total()); |
||||
else |
||||
for(int row = 0, rowsCount = plane.rows ; !res && (row<rowsCount) ; ++row) |
||||
res |= func(plane.ptr<uchar>(row), plane.cols); |
||||
} |
||||
} |
||||
|
||||
return res; |
||||
} |
||||
|
||||
} // namespace
|
@ -0,0 +1,327 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
namespace cv { |
||||
|
||||
typedef bool (*HasNonZeroFunc)(const uchar*, size_t); |
||||
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN |
||||
|
||||
HasNonZeroFunc getHasNonZeroTab(int depth); |
||||
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY |
||||
|
||||
template<typename T> |
||||
inline bool hasNonZero_(const T* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
if (len > 0) |
||||
{ |
||||
size_t i=0; |
||||
#if CV_ENABLE_UNROLLED |
||||
for(; !res && (i+4 <= len); i += 4 ) |
||||
res |= ((src[i] | src[i+1] | src[i+2] | src[i+3]) != 0); |
||||
#endif |
||||
for( ; !res && (i < len); i++ ) |
||||
res |= (src[i] != 0); |
||||
} |
||||
return res; |
||||
} |
||||
|
||||
template<> |
||||
inline bool hasNonZero_(const float* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
if (len > 0) |
||||
{ |
||||
size_t i=0; |
||||
if (sizeof(float) == sizeof(unsigned int)) |
||||
{ |
||||
#if CV_ENABLE_UNROLLED |
||||
typedef unsigned int float_as_uint_t; |
||||
const float_as_uint_t* src_as_ui = reinterpret_cast<const float_as_uint_t*>(src); |
||||
for(; !res && (i+4 <= len); i += 4 ) |
||||
{ |
||||
const float_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]); |
||||
res |= ((gathered<<1) != 0);//remove what would be the sign bit
|
||||
} |
||||
#endif |
||||
} |
||||
for( ; !res && (i < len); i++ ) |
||||
res |= (src[i] != 0); |
||||
} |
||||
return res; |
||||
} |
||||
|
||||
template<> |
||||
inline bool hasNonZero_(const double* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
if (len > 0) |
||||
{ |
||||
size_t i=0; |
||||
if (sizeof(double) == sizeof(uint64_t)) |
||||
{ |
||||
#if CV_ENABLE_UNROLLED |
||||
typedef uint64_t double_as_uint_t; |
||||
const double_as_uint_t* src_as_ui = reinterpret_cast<const double_as_uint_t*>(src); |
||||
for(; !res && (i+4 <= len); i += 4 ) |
||||
{ |
||||
const double_as_uint_t gathered = (src_as_ui[i] | src_as_ui[i+1] | src_as_ui[i+2] | src_as_ui[i+3]); |
||||
res |= ((gathered<<1) != 0);//remove what would be the sign bit
|
||||
} |
||||
#endif |
||||
} |
||||
for( ; !res && (i < len); i++ ) |
||||
res |= (src[i] != 0); |
||||
} |
||||
return res; |
||||
} |
||||
|
||||
static bool hasNonZero8u( const uchar* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
const uchar* srcEnd = src+len; |
||||
#if CV_SIMD |
||||
typedef v_uint8 v_type; |
||||
const v_type v_zero = vx_setzero_u8(); |
||||
constexpr const int unrollCount = 2; |
||||
int step = v_type::nlanes * unrollCount; |
||||
int len0 = len & -step; |
||||
const uchar* srcSimdEnd = src+len0; |
||||
|
||||
int countSIMD = static_cast<int>((srcSimdEnd-src)/step); |
||||
while(!res && countSIMD--) |
||||
{ |
||||
v_type v0 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v1 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
res = v_check_any(((v0 | v1) != v_zero)); |
||||
} |
||||
|
||||
v_cleanup(); |
||||
#endif |
||||
return res || hasNonZero_(src, srcEnd-src); |
||||
} |
||||
|
||||
static bool hasNonZero16u( const ushort* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
const ushort* srcEnd = src+len; |
||||
#if CV_SIMD |
||||
typedef v_uint16 v_type; |
||||
const v_type v_zero = vx_setzero_u16(); |
||||
constexpr const int unrollCount = 4; |
||||
int step = v_type::nlanes * unrollCount; |
||||
int len0 = len & -step; |
||||
const ushort* srcSimdEnd = src+len0; |
||||
|
||||
int countSIMD = static_cast<int>((srcSimdEnd-src)/step); |
||||
while(!res && countSIMD--) |
||||
{ |
||||
v_type v0 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v1 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v2 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v3 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v0 |= v1; |
||||
v2 |= v3; |
||||
res = v_check_any(((v0 | v2) != v_zero)); |
||||
} |
||||
|
||||
v_cleanup(); |
||||
#endif |
||||
return res || hasNonZero_(src, srcEnd-src); |
||||
} |
||||
|
||||
static bool hasNonZero32s( const int* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
const int* srcEnd = src+len; |
||||
#if CV_SIMD |
||||
typedef v_int32 v_type; |
||||
const v_type v_zero = vx_setzero_s32(); |
||||
constexpr const int unrollCount = 8; |
||||
int step = v_type::nlanes * unrollCount; |
||||
int len0 = len & -step; |
||||
const int* srcSimdEnd = src+len0; |
||||
|
||||
int countSIMD = static_cast<int>((srcSimdEnd-src)/step); |
||||
while(!res && countSIMD--) |
||||
{ |
||||
v_type v0 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v1 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v2 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v3 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v4 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v5 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v6 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v7 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v0 |= v1; |
||||
v2 |= v3; |
||||
v4 |= v5; |
||||
v6 |= v7; |
||||
|
||||
v0 |= v2; |
||||
v4 |= v6; |
||||
res = v_check_any(((v0 | v4) != v_zero)); |
||||
} |
||||
|
||||
v_cleanup(); |
||||
#endif |
||||
return res || hasNonZero_(src, srcEnd-src); |
||||
} |
||||
|
||||
static bool hasNonZero32f( const float* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
const float* srcEnd = src+len; |
||||
#if CV_SIMD |
||||
typedef v_float32 v_type; |
||||
const v_type v_zero = vx_setzero_f32(); |
||||
constexpr const int unrollCount = 8; |
||||
int step = v_type::nlanes * unrollCount; |
||||
int len0 = len & -step; |
||||
const float* srcSimdEnd = src+len0; |
||||
|
||||
int countSIMD = static_cast<int>((srcSimdEnd-src)/step); |
||||
while(!res && countSIMD--) |
||||
{ |
||||
v_type v0 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v1 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v2 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v3 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v4 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v5 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v6 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v7 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v0 |= v1; |
||||
v2 |= v3; |
||||
v4 |= v5; |
||||
v6 |= v7; |
||||
|
||||
v0 |= v2; |
||||
v4 |= v6; |
||||
//res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
|
||||
res = !v_check_all(((v0 | v4) == v_zero)); |
||||
} |
||||
|
||||
v_cleanup(); |
||||
#endif |
||||
return res || hasNonZero_(src, srcEnd-src); |
||||
} |
||||
|
||||
static bool hasNonZero64f( const double* src, size_t len ) |
||||
{ |
||||
bool res = false; |
||||
const double* srcEnd = src+len; |
||||
#if CV_SIMD_64F |
||||
typedef v_float64 v_type; |
||||
const v_type v_zero = vx_setzero_f64(); |
||||
constexpr const int unrollCount = 16; |
||||
int step = v_type::nlanes * unrollCount; |
||||
int len0 = len & -step; |
||||
const double* srcSimdEnd = src+len0; |
||||
|
||||
int countSIMD = static_cast<int>((srcSimdEnd-src)/step); |
||||
while(!res && countSIMD--) |
||||
{ |
||||
v_type v0 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v1 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v2 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v3 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v4 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v5 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v6 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v7 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v8 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v9 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v10 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v11 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v12 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v13 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v14 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v_type v15 = vx_load(src); |
||||
src += v_type::nlanes; |
||||
v0 |= v1; |
||||
v2 |= v3; |
||||
v4 |= v5; |
||||
v6 |= v7; |
||||
v8 |= v9; |
||||
v10 |= v11; |
||||
v12 |= v13; |
||||
v14 |= v15; |
||||
|
||||
v0 |= v2; |
||||
v4 |= v6; |
||||
v8 |= v10; |
||||
v12 |= v14; |
||||
|
||||
v0 |= v4; |
||||
v8 |= v12; |
||||
//res = v_check_any(((v0 | v8) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
|
||||
res = !v_check_all(((v0 | v8) == v_zero)); |
||||
} |
||||
|
||||
v_cleanup(); |
||||
#endif |
||||
return res || hasNonZero_(src, srcEnd-src); |
||||
} |
||||
|
||||
HasNonZeroFunc getHasNonZeroTab(int depth) |
||||
{ |
||||
static HasNonZeroFunc hasNonZeroTab[] = |
||||
{ |
||||
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero8u), |
||||
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero16u), |
||||
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32s), (HasNonZeroFunc)GET_OPTIMIZED(hasNonZero32f), |
||||
(HasNonZeroFunc)GET_OPTIMIZED(hasNonZero64f), 0 |
||||
}; |
||||
|
||||
return hasNonZeroTab[depth]; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END |
||||
} // namespace
|
@ -0,0 +1,201 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
namespace opencv_test { namespace { |
||||
|
||||
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroAllZeros; |
||||
|
||||
TEST_P(HasNonZeroAllZeros, hasNonZeroAllZeros) |
||||
{ |
||||
const int type = std::get<0>(GetParam()); |
||||
const Size size = std::get<1>(GetParam()); |
||||
|
||||
Mat m = Mat::zeros(size, type); |
||||
EXPECT_FALSE(hasNonZero(m)); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Core, HasNonZeroAllZeros, |
||||
testing::Combine( |
||||
testing::Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), |
||||
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) |
||||
) |
||||
); |
||||
|
||||
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroNegZeros; |
||||
|
||||
TEST_P(HasNonZeroNegZeros, hasNonZeroNegZeros) |
||||
{ |
||||
const int type = std::get<0>(GetParam()); |
||||
const Size size = std::get<1>(GetParam()); |
||||
|
||||
Mat m = Mat(size, type); |
||||
m.setTo(Scalar::all(-0.)); |
||||
EXPECT_FALSE(hasNonZero(m)); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Core, HasNonZeroNegZeros, |
||||
testing::Combine( |
||||
testing::Values(CV_32FC1, CV_64FC1), |
||||
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) |
||||
) |
||||
); |
||||
|
||||
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroLimitValues; |
||||
|
||||
TEST_P(HasNonZeroLimitValues, hasNonZeroLimitValues) |
||||
{ |
||||
const int type = std::get<0>(GetParam()); |
||||
const Size size = std::get<1>(GetParam()); |
||||
|
||||
Mat m = Mat(size, type); |
||||
|
||||
m.setTo(Scalar::all(std::numeric_limits<double>::infinity())); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
|
||||
m.setTo(Scalar::all(-std::numeric_limits<double>::infinity())); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
|
||||
m.setTo(Scalar::all(std::numeric_limits<double>::quiet_NaN())); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
|
||||
m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits<double>::epsilon()) : Scalar::all(std::numeric_limits<float>::epsilon())); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
|
||||
m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits<double>::min()) : Scalar::all(std::numeric_limits<float>::min())); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
|
||||
m.setTo((CV_MAT_DEPTH(type) == CV_64F) ? Scalar::all(std::numeric_limits<double>::denorm_min()) : Scalar::all(std::numeric_limits<float>::denorm_min())); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Core, HasNonZeroLimitValues, |
||||
testing::Combine( |
||||
testing::Values(CV_32FC1, CV_64FC1), |
||||
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) |
||||
) |
||||
); |
||||
|
||||
typedef testing::TestWithParam<std::tuple<int, Size> > HasNonZeroRandom; |
||||
|
||||
TEST_P(HasNonZeroRandom, hasNonZeroRandom) |
||||
{ |
||||
const int type = std::get<0>(GetParam()); |
||||
const Size size = std::get<1>(GetParam()); |
||||
|
||||
RNG& rng = theRNG(); |
||||
|
||||
const size_t N = std::min(100, size.area()); |
||||
for(size_t i = 0 ; i<N ; ++i) |
||||
{ |
||||
const int nz_pos_x = rng.uniform(0, size.width); |
||||
const int nz_pos_y = rng.uniform(0, size.height); |
||||
Mat m = Mat::zeros(size, type); |
||||
Mat nzROI = Mat(m, Rect(nz_pos_x, nz_pos_y, 1, 1)); |
||||
nzROI.setTo(Scalar::all(1)); |
||||
EXPECT_TRUE(hasNonZero(m)); |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Core, HasNonZeroRandom, |
||||
testing::Combine( |
||||
testing::Values(CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), |
||||
testing::Values(Size(1, 1), Size(320, 240), Size(127, 113), Size(1, 113)) |
||||
) |
||||
); |
||||
|
||||
typedef testing::TestWithParam<tuple<int, int, bool> > HasNonZeroNd; |
||||
|
||||
TEST_P(HasNonZeroNd, hasNonZeroNd) |
||||
{ |
||||
const int type = get<0>(GetParam()); |
||||
const int ndims = get<1>(GetParam()); |
||||
const bool continuous = get<2>(GetParam()); |
||||
|
||||
RNG& rng = theRNG(); |
||||
|
||||
const size_t N = 10; |
||||
for(size_t i = 0 ; i<N ; ++i) |
||||
{ |
||||
std::vector<size_t> steps(ndims); |
||||
std::vector<int> sizes(ndims); |
||||
size_t totalBytes = 1; |
||||
for(int dim = 0 ; dim<ndims ; ++dim) |
||||
{ |
||||
const bool isFirstDim = (dim == 0); |
||||
const bool isLastDim = (dim+1 == ndims); |
||||
const int length = rng.uniform(1, 64); |
||||
steps[dim] = (isLastDim ? 1 : static_cast<size_t>(length))*CV_ELEM_SIZE(type); |
||||
sizes[dim] = (isFirstDim || continuous) ? length : rng.uniform(1, length); |
||||
totalBytes *= steps[dim]*static_cast<size_t>(sizes[dim]); |
||||
} |
||||
|
||||
std::vector<unsigned char> buffer(totalBytes); |
||||
void* data = buffer.data(); |
||||
|
||||
Mat m = Mat(ndims, sizes.data(), type, data, steps.data()); |
||||
|
||||
std::vector<Range> nzRange(ndims); |
||||
for(int dim = 0 ; dim<ndims ; ++dim) |
||||
{ |
||||
const int pos = rng.uniform(0, sizes[dim]); |
||||
nzRange[dim] = Range(pos, pos+1); |
||||
} |
||||
|
||||
Mat nzROI = Mat(m, nzRange.data()); |
||||
nzROI.setTo(Scalar::all(1)); |
||||
|
||||
const int nzCount = countNonZero(m); |
||||
EXPECT_EQ((nzCount>0), hasNonZero(m)); |
||||
} |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Core, HasNonZeroNd, |
||||
testing::Combine( |
||||
testing::Values(CV_8UC1), |
||||
testing::Values(2, 3), |
||||
testing::Values(true, false) |
||||
) |
||||
); |
||||
|
||||
}} // namespace
|
Loading…
Reference in new issue