From fc21b15d6e7141b33e4e448e4d48b5c0106b04c6 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 31 Oct 2018 23:41:49 +0000 Subject: [PATCH 01/14] samples(gpu): cleanup samples for legacy API --- samples/gpu/CMakeLists.txt | 1 - samples/gpu/bgfg_segm.cpp | 37 +- samples/gpu/cascadeclassifier_nvidia_api.cpp | 388 ----------- samples/gpu/opticalflow_nvidia_api.cpp | 651 ------------------- 4 files changed, 5 insertions(+), 1072 deletions(-) delete mode 100644 samples/gpu/cascadeclassifier_nvidia_api.cpp delete mode 100644 samples/gpu/opticalflow_nvidia_api.cpp diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index 6aa6b87afa..96fe897af3 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -21,7 +21,6 @@ set(OPENCV_CUDA_SAMPLES_REQUIRED_DEPS opencv_cudaoptflow opencv_cudabgsegm opencv_cudastereo - opencv_cudalegacy opencv_cudaobjdetect) ocv_check_dependencies(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp index b7d9d7e9ae..a3c56f8b88 100644 --- a/samples/gpu/bgfg_segm.cpp +++ b/samples/gpu/bgfg_segm.cpp @@ -4,7 +4,6 @@ #include "opencv2/core.hpp" #include "opencv2/core/utility.hpp" #include "opencv2/cudabgsegm.hpp" -#include "opencv2/cudalegacy.hpp" #include "opencv2/video.hpp" #include "opencv2/highgui.hpp" @@ -16,8 +15,6 @@ enum Method { MOG, MOG2, - GMG, - FGD_STAT }; int main(int argc, const char** argv) @@ -25,7 +22,7 @@ int main(int argc, const char** argv) cv::CommandLineParser cmd(argc, argv, "{ c camera | | use camera }" "{ f file | ../data/vtest.avi | input video file }" - "{ m method | mog | method (mog, mog2, gmg, fgd) }" + "{ m method | mog | method (mog, mog2) }" "{ h help | | print help message }"); if (cmd.has("help") || !cmd.check()) @@ -40,9 +37,7 @@ int main(int argc, const char** argv) string method = cmd.get("method"); if (method != "mog" - && method != "mog2" - && method != "gmg" - && method != "fgd") + && method != "mog2") { cerr << "Incorrect method" << endl; return -1; @@ -50,8 +45,8 @@ int main(int argc, const char** argv) Method m = method == "mog" ? MOG : method == "mog2" ? MOG2 : - method == "fgd" ? FGD_STAT : - GMG; + (Method)-1; + CV_Assert(m != (Method)-1); VideoCapture cap; @@ -73,8 +68,6 @@ int main(int argc, const char** argv) Ptr mog = cuda::createBackgroundSubtractorMOG(); Ptr mog2 = cuda::createBackgroundSubtractorMOG2(); - Ptr gmg = cuda::createBackgroundSubtractorGMG(40); - Ptr fgd = cuda::createBackgroundSubtractorFGD(); GpuMat d_fgmask; GpuMat d_fgimg; @@ -93,23 +86,12 @@ int main(int argc, const char** argv) case MOG2: mog2->apply(d_frame, d_fgmask); break; - - case GMG: - gmg->apply(d_frame, d_fgmask); - break; - - case FGD_STAT: - fgd->apply(d_frame, d_fgmask); - break; } namedWindow("image", WINDOW_NORMAL); namedWindow("foreground mask", WINDOW_NORMAL); namedWindow("foreground image", WINDOW_NORMAL); - if (m != GMG) - { - namedWindow("mean background image", WINDOW_NORMAL); - } + namedWindow("mean background image", WINDOW_NORMAL); for(;;) { @@ -132,15 +114,6 @@ int main(int argc, const char** argv) mog2->apply(d_frame, d_fgmask); mog2->getBackgroundImage(d_bgimg); break; - - case GMG: - gmg->apply(d_frame, d_fgmask); - break; - - case FGD_STAT: - fgd->apply(d_frame, d_fgmask); - fgd->getBackgroundImage(d_bgimg); - break; } double fps = cv::getTickFrequency() / (cv::getTickCount() - start); diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp deleted file mode 100644 index c932411eda..0000000000 --- a/samples/gpu/cascadeclassifier_nvidia_api.cpp +++ /dev/null @@ -1,388 +0,0 @@ -#if defined _MSC_VER && _MSC_VER >= 1400 -#pragma warning( disable : 4201 4408 4127 4100) -#endif - -#include -#include -#include -#include "opencv2/core/cuda.hpp" -#include "opencv2/cudalegacy.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/objdetect.hpp" -#include "opencv2/objdetect/objdetect_c.h" - -using namespace std; -using namespace cv; - - -#if !defined(HAVE_CUDA) || defined(__arm__) - -int main( int, const char** ) -{ -#if !defined(HAVE_CUDA) - std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl; -#endif - -#if defined(__arm__) - std::cout << "Unsupported for ARM CUDA library." << std::endl; -#endif - - return 0; -} - -#else - - -const Size2i preferredVideoFrameSize(640, 480); -const cv::String wndTitle = "NVIDIA Computer Vision :: Haar Classifiers Cascade"; - - -static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss) -{ - int fontFace = FONT_HERSHEY_DUPLEX; - double fontScale = 0.8; - int fontThickness = 2; - Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0); - - Point org; - org.x = 1; - org.y = 3 * fontSize.height * (lineOffsY + 1) / 2; - putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16); - putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16); -} - - -static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps) -{ - Scalar fontColorRed(0,0,255); - Scalar fontColorNV(0,185,118); - - ostringstream ss; - ss << "FPS = " << setprecision(1) << fixed << fps; - matPrint(canvas, 0, fontColorRed, ss.str()); - ss.str(""); - ss << "[" << canvas.cols << "x" << canvas.rows << "], " << - (bGpu ? "GPU, " : "CPU, ") << - (bLargestFace ? "OneFace, " : "MultiFace, ") << - (bFilter ? "Filter:ON" : "Filter:OFF"); - matPrint(canvas, 1, fontColorRed, ss.str()); - - if (bHelp) - { - matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU"); - matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace"); - matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter"); - matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help"); - } - else - { - matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help"); - } -} - - -static NCVStatus process(Mat *srcdst, - Ncv32u width, Ncv32u height, - NcvBool bFilterRects, NcvBool bLargestFace, - HaarClassifierCascadeDescriptor &haar, - NCVVector &d_haarStages, NCVVector &d_haarNodes, - NCVVector &d_haarFeatures, NCVVector &h_haarStages, - INCVMemAllocator &gpuAllocator, - INCVMemAllocator &cpuAllocator, - cudaDeviceProp &devProp) -{ - ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR); - - NCVStatus ncvStat; - - NCV_SET_SKIP_COND(gpuAllocator.isCounting()); - - NCVMatrixAlloc d_src(gpuAllocator, width, height); - ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - NCVMatrixAlloc h_src(cpuAllocator, width, height); - ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - NCVVectorAlloc d_rects(gpuAllocator, 100); - ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - - NCV_SKIP_COND_BEGIN - - for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++) - { - memcpy(h_src.ptr() + i * h_src.stride(), srcdst->ptr(i), srcdst->cols); - } - - ncvStat = h_src.copySolid(d_src, 0); - ncvAssertReturnNcvStat(ncvStat); - ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); - - NCV_SKIP_COND_END - - NcvSize32u roi; - roi.width = d_src.width(); - roi.height = d_src.height(); - - Ncv32u numDetections; - ncvStat = ncvDetectObjectsMultiScale_device( - d_src, roi, d_rects, numDetections, haar, h_haarStages, - d_haarStages, d_haarNodes, d_haarFeatures, - haar.ClassifierSize, - (bFilterRects || bLargestFace) ? 4 : 0, - 1.2f, 1, - (bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) - | NCVPipeObjDet_VisualizeInPlace, - gpuAllocator, cpuAllocator, devProp, 0); - ncvAssertReturnNcvStat(ncvStat); - ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); - - NCV_SKIP_COND_BEGIN - - ncvStat = d_src.copySolid(h_src, 0); - ncvAssertReturnNcvStat(ncvStat); - ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); - - for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++) - { - memcpy(srcdst->ptr(i), h_src.ptr() + i * h_src.stride(), srcdst->cols); - } - - NCV_SKIP_COND_END - - return NCV_SUCCESS; -} - - -int main(int argc, const char** argv) -{ - cout << "OpenCV / NVIDIA Computer Vision" << endl; - cout << "Face Detection in video and live feed" << endl; - cout << "Syntax: exename " << endl; - cout << "=========================================" << endl; - - ncvAssertPrintReturn(cv::cuda::getCudaEnabledDeviceCount() != 0, "No GPU found or the library is compiled without CUDA support", -1); - ncvAssertPrintReturn(argc == 3, "Invalid number of arguments", -1); - - cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice()); - - string cascadeName = argv[1]; - string inputName = argv[2]; - - NCVStatus ncvStat; - NcvBool bQuit = false; - VideoCapture capture; - Size2i frameSize; - - //open content source - Mat image = imread(inputName); - Mat frame; - if (!image.empty()) - { - frameSize.width = image.cols; - frameSize.height = image.rows; - } - else - { - if (!capture.open(inputName)) - { - int camid = -1; - - istringstream ss(inputName); - int x = 0; - ss >> x; - - ncvAssertPrintReturn(capture.open(camid) != 0, "Can't open source", -1); - } - - capture >> frame; - ncvAssertPrintReturn(!frame.empty(), "Empty video source", -1); - - frameSize.width = frame.cols; - frameSize.height = frame.rows; - } - - NcvBool bUseGPU = true; - NcvBool bLargestObject = false; - NcvBool bFilterRects = true; - NcvBool bHelpScreen = false; - - CascadeClassifier classifierOpenCV; - ncvAssertPrintReturn(classifierOpenCV.load(cascadeName) != 0, "Error (in OpenCV) opening classifier", -1); - - int devId; - ncvAssertCUDAReturn(cudaGetDevice(&devId), -1); - cudaDeviceProp devProp; - ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1); - cout << "Using GPU: " << devId << "(" << devProp.name << - "), arch=" << devProp.major << "." << devProp.minor << endl; - - //============================================================================== - // - // Load the classifier from file (assuming its size is about 1 mb) - // using a simple allocator - // - //============================================================================== - - NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice, static_cast(devProp.textureAlignment)); - ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1); - NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned, static_cast(devProp.textureAlignment)); - ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1); - - Ncv32u haarNumStages, haarNumNodes, haarNumFeatures; - ncvStat = ncvHaarGetClassifierSize(cascadeName, haarNumStages, haarNumNodes, haarNumFeatures); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1); - - NCVVectorAlloc h_haarStages(cpuCascadeAllocator, haarNumStages); - ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1); - NCVVectorAlloc h_haarNodes(cpuCascadeAllocator, haarNumNodes); - ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1); - NCVVectorAlloc h_haarFeatures(cpuCascadeAllocator, haarNumFeatures); - - ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1); - - HaarClassifierCascadeDescriptor haar; - ncvStat = ncvHaarLoadFromFile_host(cascadeName, haar, h_haarStages, h_haarNodes, h_haarFeatures); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1); - - NCVVectorAlloc d_haarStages(gpuCascadeAllocator, haarNumStages); - ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1); - NCVVectorAlloc d_haarNodes(gpuCascadeAllocator, haarNumNodes); - ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1); - NCVVectorAlloc d_haarFeatures(gpuCascadeAllocator, haarNumFeatures); - ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1); - - ncvStat = h_haarStages.copySolid(d_haarStages, 0); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1); - ncvStat = h_haarNodes.copySolid(d_haarNodes, 0); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1); - ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1); - - //============================================================================== - // - // Calculate memory requirements and create real allocators - // - //============================================================================== - - NCVMemStackAllocator gpuCounter(static_cast(devProp.textureAlignment)); - ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1); - NCVMemStackAllocator cpuCounter(static_cast(devProp.textureAlignment)); - ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1); - - ncvStat = process(NULL, frameSize.width, frameSize.height, - false, false, haar, - d_haarStages, d_haarNodes, - d_haarFeatures, h_haarStages, - gpuCounter, cpuCounter, devProp); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1); - - NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast(devProp.textureAlignment)); - ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1); - NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast(devProp.textureAlignment)); - ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1); - - printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height); - - //============================================================================== - // - // Main processing loop - // - //============================================================================== - - namedWindow(wndTitle, 1); - Mat frameDisp; - - do - { - Mat gray; - cvtColor((image.empty() ? frame : image), gray, cv::COLOR_BGR2GRAY); - - // - // process - // - - NcvSize32u minSize = haar.ClassifierSize; - if (bLargestObject) - { - Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width; - Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height; - Ncv32u ratioSmallest = min(ratioX, ratioY); - ratioSmallest = max((Ncv32u)(ratioSmallest / 2.5f), (Ncv32u)1); - minSize.width *= ratioSmallest; - minSize.height *= ratioSmallest; - } - - Ncv32f avgTime; - NcvTimer timer = ncvStartTimer(); - - if (bUseGPU) - { - ncvStat = process(&gray, frameSize.width, frameSize.height, - bFilterRects, bLargestObject, haar, - d_haarStages, d_haarNodes, - d_haarFeatures, h_haarStages, - gpuAllocator, cpuAllocator, devProp); - ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1); - } - else - { - vector rectsOpenCV; - - classifierOpenCV.detectMultiScale( - gray, - rectsOpenCV, - 1.2f, - bFilterRects ? 4 : 0, - (bLargestObject ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) - | CV_HAAR_SCALE_IMAGE, - Size(minSize.width, minSize.height)); - - for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt) - rectangle(gray, rectsOpenCV[rt], Scalar(255)); - } - - avgTime = (Ncv32f)ncvEndQueryTimerMs(timer); - - cvtColor(gray, frameDisp, cv::COLOR_GRAY2BGR); - displayState(frameDisp, bHelpScreen, bUseGPU, bLargestObject, bFilterRects, 1000.0f / avgTime); - imshow(wndTitle, frameDisp); - - //handle input - switch (cv::waitKey(3)) - { - case ' ': - bUseGPU = !bUseGPU; - break; - case 'm': - case 'M': - bLargestObject = !bLargestObject; - break; - case 'f': - case 'F': - bFilterRects = !bFilterRects; - break; - case 'h': - case 'H': - bHelpScreen = !bHelpScreen; - break; - case 27: - bQuit = true; - break; - } - - // For camera and video file, capture the next image - if (capture.isOpened()) - { - capture >> frame; - if (frame.empty()) - { - break; - } - } - } while (!bQuit); - - cv::destroyWindow(wndTitle); - - return 0; -} - -#endif //!defined(HAVE_CUDA) diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp deleted file mode 100644 index 0d924ec85e..0000000000 --- a/samples/gpu/opticalflow_nvidia_api.cpp +++ /dev/null @@ -1,651 +0,0 @@ -#if defined _MSC_VER && _MSC_VER >= 1400 -#pragma warning( disable : 4201 4408 4127 4100) -#endif - -#include -#include -#include -#include -#include -#include - -#include -#include -#include "opencv2/core/cuda.hpp" -#include "opencv2/cudalegacy.hpp" -#include "opencv2/highgui.hpp" - -#include "opencv2/core/core_c.h" // FIXIT legacy API -#include "opencv2/highgui/highgui_c.h" // FIXIT legacy API - -#if !defined(HAVE_CUDA) -int main( int, const char** ) -{ - std::cout << "Please compile the library with CUDA support" << std::endl; - return -1; -} -#else - -//using std::shared_ptr; -using cv::Ptr; - -#define PARAM_LEFT "--left" -#define PARAM_RIGHT "--right" -#define PARAM_SCALE "--scale" -#define PARAM_ALPHA "--alpha" -#define PARAM_GAMMA "--gamma" -#define PARAM_INNER "--inner" -#define PARAM_OUTER "--outer" -#define PARAM_SOLVER "--solver" -#define PARAM_TIME_STEP "--time-step" -#define PARAM_HELP "--help" - -Ptr g_pGPUMemAllocator; -Ptr g_pHostMemAllocator; - -class RgbToMonochrome -{ -public: - float operator ()(unsigned char b, unsigned char g, unsigned char r) - { - float _r = static_cast(r)/255.0f; - float _g = static_cast(g)/255.0f; - float _b = static_cast(b)/255.0f; - return (_r + _g + _b)/3.0f; - } -}; - -class RgbToR -{ -public: - float operator ()(unsigned char /*b*/, unsigned char /*g*/, unsigned char r) - { - return static_cast(r)/255.0f; - } -}; - - -class RgbToG -{ -public: - float operator ()(unsigned char /*b*/, unsigned char g, unsigned char /*r*/) - { - return static_cast(g)/255.0f; - } -}; - -class RgbToB -{ -public: - float operator ()(unsigned char b, unsigned char /*g*/, unsigned char /*r*/) - { - return static_cast(b)/255.0f; - } -}; - -template -NCVStatus CopyData(IplImage *image, Ptr >& dst) -{ - dst = Ptr > (new NCVMatrixAlloc (*g_pHostMemAllocator, image->width, image->height)); - ncvAssertReturn (dst->isMemAllocated (), NCV_ALLOCATOR_BAD_ALLOC); - - unsigned char *row = reinterpret_cast (image->imageData); - T convert; - for (int i = 0; i < image->height; ++i) - { - for (int j = 0; j < image->width; ++j) - { - if (image->nChannels < 3) - { - dst->ptr ()[j + i*dst->stride ()] = static_cast (*(row + j*image->nChannels))/255.0f; - } - else - { - unsigned char *color = row + j * image->nChannels; - dst->ptr ()[j +i*dst->stride ()] = convert (color[0], color[1], color[2]); - } - } - row += image->widthStep; - } - return NCV_SUCCESS; -} - -template -NCVStatus CopyData(const IplImage *image, const NCVMatrixAlloc &dst) -{ - unsigned char *row = reinterpret_cast (image->imageData); - T convert; - for (int i = 0; i < image->height; ++i) - { - for (int j = 0; j < image->width; ++j) - { - if (image->nChannels < 3) - { - dst.ptr ()[j + i*dst.stride ()] = static_cast(*(row + j*image->nChannels))/255.0f; - } - else - { - unsigned char *color = row + j * image->nChannels; - dst.ptr ()[j +i*dst.stride()] = convert (color[0], color[1], color[2]); - } - } - row += image->widthStep; - } - return NCV_SUCCESS; -} - -static NCVStatus LoadImages (const char *frame0Name, - const char *frame1Name, - int &width, - int &height, - Ptr > &src, - Ptr > &dst, - IplImage *&firstFrame, - IplImage *&lastFrame) -{ - IplImage *image; - image = cvLoadImage (frame0Name); - if (image == 0) - { - std::cout << "Could not open '" << frame0Name << "'\n"; - return NCV_FILE_ERROR; - } - - firstFrame = image; - // copy data to src - ncvAssertReturnNcvStat (CopyData (image, src)); - - IplImage *image2; - image2 = cvLoadImage (frame1Name); - if (image2 == 0) - { - std::cout << "Could not open '" << frame1Name << "'\n"; - return NCV_FILE_ERROR; - } - lastFrame = image2; - - ncvAssertReturnNcvStat (CopyData (image2, dst)); - - width = image->width; - height = image->height; - - return NCV_SUCCESS; -} - -template -inline T Clamp (T x, T a, T b) -{ - return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); -} - -template -inline T MapValue (T x, T a, T b, T c, T d) -{ - x = Clamp (x, a, b); - return c + (d - c) * (x - a) / (b - a); -} - -static NCVStatus ShowFlow (NCVMatrixAlloc &u, NCVMatrixAlloc &v, const char *name) -{ - IplImage *flowField; - - NCVMatrixAlloc host_u(*g_pHostMemAllocator, u.width(), u.height()); - ncvAssertReturn(host_u.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); - - NCVMatrixAlloc host_v (*g_pHostMemAllocator, u.width (), u.height ()); - ncvAssertReturn (host_v.isMemAllocated (), NCV_ALLOCATOR_BAD_ALLOC); - - ncvAssertReturnNcvStat (u.copySolid (host_u, 0)); - ncvAssertReturnNcvStat (v.copySolid (host_v, 0)); - - float *ptr_u = host_u.ptr (); - float *ptr_v = host_v.ptr (); - - float maxDisplacement = 1.0f; - - for (Ncv32u i = 0; i < u.height (); ++i) - { - for (Ncv32u j = 0; j < u.width (); ++j) - { - float d = std::max ( fabsf(*ptr_u), fabsf(*ptr_v) ); - if (d > maxDisplacement) maxDisplacement = d; - ++ptr_u; - ++ptr_v; - } - ptr_u += u.stride () - u.width (); - ptr_v += v.stride () - v.width (); - } - - CvSize image_size = cvSize (u.width (), u.height ()); - flowField = cvCreateImage (image_size, IPL_DEPTH_8U, 4); - if (flowField == 0) return NCV_NULL_PTR; - - unsigned char *row = reinterpret_cast (flowField->imageData); - - ptr_u = host_u.ptr(); - ptr_v = host_v.ptr(); - for (int i = 0; i < flowField->height; ++i) - { - for (int j = 0; j < flowField->width; ++j) - { - (row + j * flowField->nChannels)[0] = 0; - (row + j * flowField->nChannels)[1] = static_cast (MapValue (-(*ptr_v), -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); - (row + j * flowField->nChannels)[2] = static_cast (MapValue (*ptr_u , -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); - (row + j * flowField->nChannels)[3] = 255; - ++ptr_u; - ++ptr_v; - } - row += flowField->widthStep; - ptr_u += u.stride () - u.width (); - ptr_v += v.stride () - v.width (); - } - - cvShowImage (name, flowField); - - return NCV_SUCCESS; -} - -static IplImage *CreateImage (NCVMatrixAlloc &h_r, NCVMatrixAlloc &h_g, NCVMatrixAlloc &h_b) -{ - CvSize imageSize = cvSize (h_r.width (), h_r.height ()); - IplImage *image = cvCreateImage (imageSize, IPL_DEPTH_8U, 4); - if (image == 0) return 0; - - unsigned char *row = reinterpret_cast (image->imageData); - - for (int i = 0; i < image->height; ++i) - { - for (int j = 0; j < image->width; ++j) - { - int offset = j * image->nChannels; - int pos = i * h_r.stride () + j; - row[offset + 0] = static_cast (h_b.ptr ()[pos] * 255.0f); - row[offset + 1] = static_cast (h_g.ptr ()[pos] * 255.0f); - row[offset + 2] = static_cast (h_r.ptr ()[pos] * 255.0f); - row[offset + 3] = 255; - } - row += image->widthStep; - } - return image; -} - -static void PrintHelp () -{ - std::cout << "Usage help:\n"; - std::cout << std::setiosflags(std::ios::left); - std::cout << "\t" << std::setw(15) << PARAM_ALPHA << " - set alpha\n"; - std::cout << "\t" << std::setw(15) << PARAM_GAMMA << " - set gamma\n"; - std::cout << "\t" << std::setw(15) << PARAM_INNER << " - set number of inner iterations\n"; - std::cout << "\t" << std::setw(15) << PARAM_LEFT << " - specify left image\n"; - std::cout << "\t" << std::setw(15) << PARAM_RIGHT << " - specify right image\n"; - std::cout << "\t" << std::setw(15) << PARAM_OUTER << " - set number of outer iterations\n"; - std::cout << "\t" << std::setw(15) << PARAM_SCALE << " - set pyramid scale factor\n"; - std::cout << "\t" << std::setw(15) << PARAM_SOLVER << " - set number of basic solver iterations\n"; - std::cout << "\t" << std::setw(15) << PARAM_TIME_STEP << " - set frame interpolation time step\n"; - std::cout << "\t" << std::setw(15) << PARAM_HELP << " - display this help message\n"; -} - -static int ProcessCommandLine(int argc, char **argv, - Ncv32f &timeStep, - char *&frame0Name, - char *&frame1Name, - NCVBroxOpticalFlowDescriptor &desc) -{ - timeStep = 0.25f; - for (int iarg = 1; iarg < argc; ++iarg) - { - if (strcmp(argv[iarg], PARAM_LEFT) == 0) - { - if (iarg + 1 < argc) - { - frame0Name = argv[++iarg]; - } - else - return -1; - } - if (strcmp(argv[iarg], PARAM_RIGHT) == 0) - { - if (iarg + 1 < argc) - { - frame1Name = argv[++iarg]; - } - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_SCALE) == 0) - { - if (iarg + 1 < argc) - desc.scale_factor = static_cast(atof(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_ALPHA) == 0) - { - if (iarg + 1 < argc) - desc.alpha = static_cast(atof(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_GAMMA) == 0) - { - if (iarg + 1 < argc) - desc.gamma = static_cast(atof(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_INNER) == 0) - { - if (iarg + 1 < argc) - desc.number_of_inner_iterations = static_cast(atoi(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_OUTER) == 0) - { - if (iarg + 1 < argc) - desc.number_of_outer_iterations = static_cast(atoi(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_SOLVER) == 0) - { - if (iarg + 1 < argc) - desc.number_of_solver_iterations = static_cast(atoi(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_TIME_STEP) == 0) - { - if (iarg + 1 < argc) - timeStep = static_cast(atof(argv[++iarg])); - else - return -1; - } - else if(strcmp(argv[iarg], PARAM_HELP) == 0) - { - PrintHelp (); - return 0; - } - } - return 0; -} - - -int main(int argc, char **argv) -{ - char *frame0Name = 0, *frame1Name = 0; - Ncv32f timeStep = 0.01f; - - NCVBroxOpticalFlowDescriptor desc; - - desc.alpha = 0.197f; - desc.gamma = 50.0f; - desc.number_of_inner_iterations = 10; - desc.number_of_outer_iterations = 77; - desc.number_of_solver_iterations = 10; - desc.scale_factor = 0.8f; - - int result = ProcessCommandLine (argc, argv, timeStep, frame0Name, frame1Name, desc); - if (argc == 1 || result) - { - PrintHelp(); - return result; - } - - cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice()); - - std::cout << "OpenCV / NVIDIA Computer Vision\n"; - std::cout << "Optical Flow Demo: Frame Interpolation\n"; - std::cout << "=========================================\n"; - std::cout << "Press:\n ESC to quit\n 'a' to move to the previous frame\n 's' to move to the next frame\n"; - - int devId; - ncvAssertCUDAReturn(cudaGetDevice(&devId), -1); - cudaDeviceProp devProp; - ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1); - std::cout << "Using GPU: " << devId << "(" << devProp.name << - "), arch=" << devProp.major << "." << devProp.minor << std::endl; - - g_pGPUMemAllocator = Ptr (new NCVMemNativeAllocator (NCVMemoryTypeDevice, static_cast(devProp.textureAlignment))); - ncvAssertPrintReturn (g_pGPUMemAllocator->isInitialized (), "Device memory allocator isn't initialized", -1); - - g_pHostMemAllocator = Ptr (new NCVMemNativeAllocator (NCVMemoryTypeHostPageable, static_cast(devProp.textureAlignment))); - ncvAssertPrintReturn (g_pHostMemAllocator->isInitialized (), "Host memory allocator isn't initialized", -1); - - int width, height; - - Ptr > src_host; - Ptr > dst_host; - - IplImage *firstFrame, *lastFrame; - if (frame0Name != 0 && frame1Name != 0) - { - ncvAssertReturnNcvStat (LoadImages (frame0Name, frame1Name, width, height, src_host, dst_host, firstFrame, lastFrame)); - } - else - { - ncvAssertReturnNcvStat (LoadImages ("frame10.bmp", "frame11.bmp", width, height, src_host, dst_host, firstFrame, lastFrame)); - } - - Ptr > src (new NCVMatrixAlloc (*g_pGPUMemAllocator, src_host->width (), src_host->height ())); - ncvAssertReturn(src->isMemAllocated(), -1); - - Ptr > dst (new NCVMatrixAlloc (*g_pGPUMemAllocator, src_host->width (), src_host->height ())); - ncvAssertReturn (dst->isMemAllocated (), -1); - - ncvAssertReturnNcvStat (src_host->copySolid ( *src, 0 )); - ncvAssertReturnNcvStat (dst_host->copySolid ( *dst, 0 )); - -#if defined SAFE_MAT_DECL -#undef SAFE_MAT_DECL -#endif -#define SAFE_MAT_DECL(name, allocator, sx, sy) \ - NCVMatrixAlloc name(*allocator, sx, sy);\ - ncvAssertReturn(name.isMemAllocated(), -1); - - SAFE_MAT_DECL (u, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (v, g_pGPUMemAllocator, width, height); - - SAFE_MAT_DECL (uBck, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (vBck, g_pGPUMemAllocator, width, height); - - SAFE_MAT_DECL (h_r, g_pHostMemAllocator, width, height); - SAFE_MAT_DECL (h_g, g_pHostMemAllocator, width, height); - SAFE_MAT_DECL (h_b, g_pHostMemAllocator, width, height); - - std::cout << "Estimating optical flow\nForward...\n"; - - if (NCV_SUCCESS != NCVBroxOpticalFlow (desc, *g_pGPUMemAllocator, *src, *dst, u, v, 0)) - { - std::cout << "Failed\n"; - return -1; - } - - std::cout << "Backward...\n"; - if (NCV_SUCCESS != NCVBroxOpticalFlow (desc, *g_pGPUMemAllocator, *dst, *src, uBck, vBck, 0)) - { - std::cout << "Failed\n"; - return -1; - } - - // matrix for temporary data - SAFE_MAT_DECL (d_temp, g_pGPUMemAllocator, width, height); - - // first frame color components (GPU memory) - SAFE_MAT_DECL (d_r, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (d_g, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (d_b, g_pGPUMemAllocator, width, height); - - // second frame color components (GPU memory) - SAFE_MAT_DECL (d_rt, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (d_gt, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (d_bt, g_pGPUMemAllocator, width, height); - - // intermediate frame color components (GPU memory) - SAFE_MAT_DECL (d_rNew, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (d_gNew, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (d_bNew, g_pGPUMemAllocator, width, height); - - // interpolated forward flow - SAFE_MAT_DECL (ui, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (vi, g_pGPUMemAllocator, width, height); - - // interpolated backward flow - SAFE_MAT_DECL (ubi, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (vbi, g_pGPUMemAllocator, width, height); - - // occlusion masks - SAFE_MAT_DECL (occ0, g_pGPUMemAllocator, width, height); - SAFE_MAT_DECL (occ1, g_pGPUMemAllocator, width, height); - - // prepare color components on host and copy them to device memory - ncvAssertReturnNcvStat (CopyData (firstFrame, h_r)); - ncvAssertReturnNcvStat (CopyData (firstFrame, h_g)); - ncvAssertReturnNcvStat (CopyData (firstFrame, h_b)); - - ncvAssertReturnNcvStat (h_r.copySolid ( d_r, 0 )); - ncvAssertReturnNcvStat (h_g.copySolid ( d_g, 0 )); - ncvAssertReturnNcvStat (h_b.copySolid ( d_b, 0 )); - - ncvAssertReturnNcvStat (CopyData (lastFrame, h_r)); - ncvAssertReturnNcvStat (CopyData (lastFrame, h_g)); - ncvAssertReturnNcvStat (CopyData (lastFrame, h_b)); - - ncvAssertReturnNcvStat (h_r.copySolid ( d_rt, 0 )); - ncvAssertReturnNcvStat (h_g.copySolid ( d_gt, 0 )); - ncvAssertReturnNcvStat (h_b.copySolid ( d_bt, 0 )); - - std::cout << "Interpolating...\n"; - std::cout.precision (4); - - std::vector frames; - frames.push_back (firstFrame); - - // compute interpolated frames - for (Ncv32f timePos = timeStep; timePos < 1.0f; timePos += timeStep) - { - ncvAssertCUDAReturn (cudaMemset (ui.ptr (), 0, ui.pitch () * ui.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (vi.ptr (), 0, vi.pitch () * vi.height ()), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn (cudaMemset (ubi.ptr (), 0, ubi.pitch () * ubi.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (vbi.ptr (), 0, vbi.pitch () * vbi.height ()), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn (cudaMemset (occ0.ptr (), 0, occ0.pitch () * occ0.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (occ1.ptr (), 0, occ1.pitch () * occ1.height ()), NCV_CUDA_ERROR); - - NppStInterpolationState state; - // interpolation state should be filled once except pSrcFrame0, pSrcFrame1, and pNewFrame - // we will only need to reset buffers content to 0 since interpolator doesn't do this itself - state.size = NcvSize32u (width, height); - state.nStep = d_r.pitch (); - state.pSrcFrame0 = d_r.ptr (); - state.pSrcFrame1 = d_rt.ptr (); - state.pFU = u.ptr (); - state.pFV = v.ptr (); - state.pBU = uBck.ptr (); - state.pBV = vBck.ptr (); - state.pos = timePos; - state.pNewFrame = d_rNew.ptr (); - state.ppBuffers[0] = occ0.ptr (); - state.ppBuffers[1] = occ1.ptr (); - state.ppBuffers[2] = ui.ptr (); - state.ppBuffers[3] = vi.ptr (); - state.ppBuffers[4] = ubi.ptr (); - state.ppBuffers[5] = vbi.ptr (); - - // interpolate red channel - nppiStInterpolateFrames (&state); - - // reset buffers - ncvAssertCUDAReturn (cudaMemset (ui.ptr (), 0, ui.pitch () * ui.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (vi.ptr (), 0, vi.pitch () * vi.height ()), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn (cudaMemset (ubi.ptr (), 0, ubi.pitch () * ubi.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (vbi.ptr (), 0, vbi.pitch () * vbi.height ()), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn (cudaMemset (occ0.ptr (), 0, occ0.pitch () * occ0.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (occ1.ptr (), 0, occ1.pitch () * occ1.height ()), NCV_CUDA_ERROR); - - // interpolate green channel - state.pSrcFrame0 = d_g.ptr (); - state.pSrcFrame1 = d_gt.ptr (); - state.pNewFrame = d_gNew.ptr (); - - nppiStInterpolateFrames (&state); - - // reset buffers - ncvAssertCUDAReturn (cudaMemset (ui.ptr (), 0, ui.pitch () * ui.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (vi.ptr (), 0, vi.pitch () * vi.height ()), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn (cudaMemset (ubi.ptr (), 0, ubi.pitch () * ubi.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (vbi.ptr (), 0, vbi.pitch () * vbi.height ()), NCV_CUDA_ERROR); - - ncvAssertCUDAReturn (cudaMemset (occ0.ptr (), 0, occ0.pitch () * occ0.height ()), NCV_CUDA_ERROR); - ncvAssertCUDAReturn (cudaMemset (occ1.ptr (), 0, occ1.pitch () * occ1.height ()), NCV_CUDA_ERROR); - - // interpolate blue channel - state.pSrcFrame0 = d_b.ptr (); - state.pSrcFrame1 = d_bt.ptr (); - state.pNewFrame = d_bNew.ptr (); - - nppiStInterpolateFrames (&state); - - // copy to host memory - ncvAssertReturnNcvStat (d_rNew.copySolid (h_r, 0)); - ncvAssertReturnNcvStat (d_gNew.copySolid (h_g, 0)); - ncvAssertReturnNcvStat (d_bNew.copySolid (h_b, 0)); - - // convert to IplImage - IplImage *newFrame = CreateImage (h_r, h_g, h_b); - if (newFrame == 0) - { - std::cout << "Could not create new frame in host memory\n"; - break; - } - frames.push_back (newFrame); - std::cout << timePos * 100.0f << "%\r"; - } - std::cout << std::setw (5) << "100%\n"; - - frames.push_back (lastFrame); - - Ncv32u currentFrame; - currentFrame = 0; - - ShowFlow (u, v, "Forward flow"); - ShowFlow (uBck, vBck, "Backward flow"); - - cvShowImage ("Interpolated frame", frames[currentFrame]); - - bool qPressed = false; - while ( !qPressed ) - { - int key = toupper (cvWaitKey (10)); - switch (key) - { - case 27: - qPressed = true; - break; - case 'A': - if (currentFrame > 0) --currentFrame; - cvShowImage ("Interpolated frame", frames[currentFrame]); - break; - case 'S': - if (currentFrame < frames.size()-1) ++currentFrame; - cvShowImage ("Interpolated frame", frames[currentFrame]); - break; - } - } - - cvDestroyAllWindows (); - - std::vector::iterator iter; - for (iter = frames.begin (); iter != frames.end (); ++iter) - { - cvReleaseImage (&(*iter)); - } - - return 0; -} - -#endif From 0cd14e3fb4ec28990e6587b907daef88d54ed824 Mon Sep 17 00:00:00 2001 From: berak Date: Thu, 1 Nov 2018 15:37:23 +0100 Subject: [PATCH 02/14] java: add support for MatOfRotatedRect --- modules/core/misc/java/gen_dict.json | 9 +++++++++ modules/java/generator/src/cpp/converters.cpp | 14 ++++++++++++++ modules/java/generator/src/cpp/converters.h | 2 ++ 3 files changed, 25 insertions(+) diff --git a/modules/core/misc/java/gen_dict.json b/modules/core/misc/java/gen_dict.json index 44505b6b8d..36b2c58e58 100644 --- a/modules/core/misc/java/gen_dict.json +++ b/modules/core/misc/java/gen_dict.json @@ -765,6 +765,15 @@ "v_type": "Mat", "j_import": "org.opencv.core.MatOfRect2d" }, + "vector_RotatedRect": { + "j_type": "MatOfRotatedRect", + "jn_type": "long", + "jni_type": "jlong", + "jni_var": "std::vector< RotatedRect > %(n)s", + "suffix": "J", + "v_type": "Mat", + "j_import": "org.opencv.core.MatOfRotatedRect" + }, "vector_String": { "j_type": "List", "jn_type": "List", diff --git a/modules/java/generator/src/cpp/converters.cpp b/modules/java/generator/src/cpp/converters.cpp index 11511200e1..9c4caaccca 100644 --- a/modules/java/generator/src/cpp/converters.cpp +++ b/modules/java/generator/src/cpp/converters.cpp @@ -107,6 +107,20 @@ void vector_Rect2d_to_Mat(std::vector& v_rect, Mat& mat) mat = Mat(v_rect, true); } +//vector_RotatedRect + +void Mat_to_vector_RotatedRect(Mat& mat, std::vector& v_rect) +{ + v_rect.clear(); + CHECK_MAT(mat.type()==CV_32FC(5) && mat.cols==1); + v_rect = (std::vector) mat; +} + +void vector_RotatedRect_to_Mat(std::vector& v_rect, Mat& mat) +{ + mat = Mat(v_rect, true); +} + //vector_Point void Mat_to_vector_Point(Mat& mat, std::vector& v_point) { diff --git a/modules/java/generator/src/cpp/converters.h b/modules/java/generator/src/cpp/converters.h index 27c2ded3a8..019d7d698f 100644 --- a/modules/java/generator/src/cpp/converters.h +++ b/modules/java/generator/src/cpp/converters.h @@ -26,6 +26,8 @@ void vector_Rect_to_Mat(std::vector& v_rect, cv::Mat& mat); void Mat_to_vector_Rect2d(cv::Mat& mat, std::vector& v_rect); void vector_Rect2d_to_Mat(std::vector& v_rect, cv::Mat& mat); +void Mat_to_vector_RotatedRect(cv::Mat& mat, std::vector& v_rect); +void vector_RotatedRect_to_Mat(std::vector& v_rect, cv::Mat& mat); void Mat_to_vector_Point(cv::Mat& mat, std::vector& v_point); void Mat_to_vector_Point2f(cv::Mat& mat, std::vector& v_point); From fbe41703e4bf6cf57acd2e5411d8460767acef8e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 1 Nov 2018 01:37:40 +0000 Subject: [PATCH 03/14] cmake: introduce setup_vars scripts - CMake install COMPONENT: "scripts" --- CMakeLists.txt | 5 +++ cmake/OpenCVGenSetupVars.cmake | 51 +++++++++++++++++++++++++ cmake/OpenCVUtils.cmake | 4 +- cmake/templates/setup_vars_linux.sh.in | 20 ++++++++++ cmake/templates/setup_vars_macosx.sh.in | 20 ++++++++++ cmake/templates/setup_vars_win32.cmd.in | 18 +++++++++ 6 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 cmake/OpenCVGenSetupVars.cmake create mode 100644 cmake/templates/setup_vars_linux.sh.in create mode 100644 cmake/templates/setup_vars_macosx.sh.in create mode 100644 cmake/templates/setup_vars_win32.cmd.in diff --git a/CMakeLists.txt b/CMakeLists.txt index a536b4d5c0..cfcc63f665 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -345,6 +345,7 @@ OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_chec OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON ) OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF ) OCV_OPTION(CV_TRACE "Enable OpenCV code trace" ON) +OCV_OPTION(OPENCV_GENERATE_SETUPVARS "Generate setup_vars* scripts" ON IF (NOT ANDROID AND NOT APPLE_FRAMEWORK) ) OCV_OPTION(ENABLE_PYLINT "Add target with Pylint checks" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) ) OCV_OPTION(ENABLE_FLAKE8 "Add target with Python flake8 checker" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) ) @@ -925,6 +926,10 @@ if(COMMAND ocv_pylint_finalize) ocv_pylint_finalize() endif() +if(OPENCV_GENERATE_SETUPVARS) + include(cmake/OpenCVGenSetupVars.cmake) +endif() + # ---------------------------------------------------------------------------- # Summary: # ---------------------------------------------------------------------------- diff --git a/cmake/OpenCVGenSetupVars.cmake b/cmake/OpenCVGenSetupVars.cmake new file mode 100644 index 0000000000..e924317871 --- /dev/null +++ b/cmake/OpenCVGenSetupVars.cmake @@ -0,0 +1,51 @@ +if(WIN32) + ocv_update(OPENCV_SETUPVARS_INSTALL_PATH ".") + ocv_update(OPENCV_SCRIPT_EXTENSION ".cmd") + ocv_update(OPENCV_SETUPVARS_TEMPLATE "setup_vars_win32.cmd.in") +else() + ocv_update(OPENCV_SETUPVARS_INSTALL_PATH "bin") + ocv_update(OPENCV_SCRIPT_EXTENSION ".sh") + if(APPLE) + ocv_update(OPENCV_SETUPVARS_TEMPLATE "setup_vars_macosx.sh.in") + else() + ocv_update(OPENCV_SETUPVARS_TEMPLATE "setup_vars_linux.sh.in") + endif() +endif() + +if(INSTALL_TO_MANGLED_PATHS) + ocv_update(OPENCV_SETUPVARS_FILENAME "setup_vars_opencv-${OPENCV_VERSION}${OPENCV_SCRIPT_EXTENSION}") +else() + ocv_update(OPENCV_SETUPVARS_FILENAME setup_vars_opencv3${OPENCV_SCRIPT_EXTENSION}) +endif() + +##### build directory +if(WIN32) + set(__build_type "${CMAKE_BUILD_TYPE}") + if(NOT __build_type) + set(__build_type "Release") # default + endif() + file(RELATIVE_PATH OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG "${OpenCV_BINARY_DIR}/" "${EXECUTABLE_OUTPUT_PATH}/${__build_type}/") +else() + file(RELATIVE_PATH OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG "${OpenCV_BINARY_DIR}/" "${LIBRARY_OUTPUT_PATH}/") +endif() +set(OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG "python_loader") # https://github.com/opencv/opencv/pull/12977 +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/${OPENCV_SETUPVARS_TEMPLATE}" "${CMAKE_BINARY_DIR}/tmp/setup_vars${OPENCV_SCRIPT_EXTENSION}" @ONLY) +file(COPY "${CMAKE_BINARY_DIR}/tmp/setup_vars${OPENCV_SCRIPT_EXTENSION}" DESTINATION "${CMAKE_BINARY_DIR}" + FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + +##### install directory +if(WIN32) + file(RELATIVE_PATH OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG + "${CMAKE_INSTALL_PREFIX}/${OPENCV_SETUPVARS_INSTALL_PATH}/" "${CMAKE_INSTALL_PREFIX}/${OPENCV_BIN_INSTALL_PATH}/") +else() + file(RELATIVE_PATH OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG + "${CMAKE_INSTALL_PREFIX}/${OPENCV_SETUPVARS_INSTALL_PATH}/" "${CMAKE_INSTALL_PREFIX}/${OPENCV_LIB_INSTALL_PATH}/") +endif() +file(RELATIVE_PATH OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG + "${CMAKE_INSTALL_PREFIX}/${OPENCV_SETUPVARS_INSTALL_PATH}/" "${CMAKE_INSTALL_PREFIX}/") +ocv_path_join(OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG "${OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG}" "python_loader") # https://github.com/opencv/opencv/pull/12977 +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/${OPENCV_SETUPVARS_TEMPLATE}" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/install/${OPENCV_SETUPVARS_FILENAME}" @ONLY) +install(FILES "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/install/${OPENCV_SETUPVARS_FILENAME}" + DESTINATION "${OPENCV_SETUPVARS_INSTALL_PATH}" + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + COMPONENT scripts) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 3ae6162ba6..a07f035b3a 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -149,7 +149,9 @@ macro(ocv_path_join result_var P1 P2_) else() set(${result_var} "${P1}/${P2}") endif() - string(REGEX REPLACE "([/\\]?)[\\.][/\\]" "\\1" ${result_var} "${${result_var}}") + string(REPLACE "\\\\" "\\" ${result_var} "${${result_var}}") + string(REPLACE "//" "/" ${result_var} "${${result_var}}") + string(REGEX REPLACE "(^|[/\\])[\\.][/\\]" "\\1" ${result_var} "${${result_var}}") if("${${result_var}}" STREQUAL "") set(${result_var} ".") endif() diff --git a/cmake/templates/setup_vars_linux.sh.in b/cmake/templates/setup_vars_linux.sh.in new file mode 100644 index 0000000000..f7d30541c6 --- /dev/null +++ b/cmake/templates/setup_vars_linux.sh.in @@ -0,0 +1,20 @@ +#!/bin/bash + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +[[ ! "${OPENCV_QUIET}" ]] && ( echo "Setting vars for OpenCV @OPENCV_VERSION@" ) +export LD_LIBRARY_PATH="$SCRIPT_DIR/@OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG@:$LD_LIBRARY_PATH" + +if [[ ! "$OPENCV_SKIP_PYTHON" ]]; then + PYTHONPATH_OPENCV="$SCRIPT_DIR/@OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG@" + [[ ! "${OPENCV_QUIET}" ]] && ( echo "Append PYTHONPATH: ${PYTHONPATH_OPENCV}" ) + export PYTHONPATH="${PYTHONPATH_OPENCV}:$PYTHONPATH" +fi + +# Don't exec in "sourced" mode +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + if [[ $# -ne 0 ]]; then + [[ ! "${OPENCV_QUIET}" && "${OPENCV_VERBOSE}" ]] && ( echo "Executing: $*" ) + exec "$@" + fi +fi diff --git a/cmake/templates/setup_vars_macosx.sh.in b/cmake/templates/setup_vars_macosx.sh.in new file mode 100644 index 0000000000..3e85f3add2 --- /dev/null +++ b/cmake/templates/setup_vars_macosx.sh.in @@ -0,0 +1,20 @@ +#!/bin/bash + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +[[ ! "${OPENCV_QUIET}" ]] && ( echo "Setting vars for OpenCV @OPENCV_VERSION@" ) +export DYLD_LIBRARY_PATH="$SCRIPT_DIR/@OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG@:$DYLD_LIBRARY_PATH" + +if [[ ! "$OPENCV_SKIP_PYTHON" ]]; then + PYTHONPATH_OPENCV="$SCRIPT_DIR/@OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG@" + [[ ! "${OPENCV_QUIET}" ]] && ( echo "Append PYTHONPATH: ${PYTHONPATH_OPENCV}" ) + export PYTHONPATH="${PYTHONPATH_OPENCV}:$PYTHONPATH" +fi + +# Don't exec in "sourced" mode +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + if [[ $# -ne 0 ]]; then + [[ ! "${OPENCV_QUIET}" && "${OPENCV_VERBOSE}" ]] && ( echo "Executing: $*" ) + exec "$@" + fi +fi diff --git a/cmake/templates/setup_vars_win32.cmd.in b/cmake/templates/setup_vars_win32.cmd.in new file mode 100644 index 0000000000..f6722535ac --- /dev/null +++ b/cmake/templates/setup_vars_win32.cmd.in @@ -0,0 +1,18 @@ +@ECHO OFF +SETLOCAL EnableDelayedExpansion + +SET "SCRIPT_DIR=%~dp0" + +IF NOT DEFINED OPENCV_QUIET ( ECHO Setting vars for OpenCV @OPENCV_VERSION@ ) +SET "PATH=!SCRIPT_DIR!\@OPENCV_LIB_RUNTIME_DIR_RELATIVE_CMAKECONFIG@;%PATH%" + +IF NOT DEFINED OPENCV_SKIP_PYTHON ( + SET "PYTHONPATH_OPENCV=!SCRIPT_DIR!\@OPENCV_PYTHON_DIR_RELATIVE_CMAKECONFIG@" + IF NOT DEFINED OPENCV_QUIET ( ECHO Append PYTHONPATH: !PYTHONPATH_OPENCV! ) + SET "PYTHONPATH=!PYTHONPATH_OPENCV!;%PYTHONPATH%" +) + +IF NOT [%1] == [] ( + %* + EXIT /B !errorlevel! +) From 0fda551dbcb922acc1b898a5e2c4bcbc4c422bf8 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Fri, 2 Nov 2018 12:26:23 +0300 Subject: [PATCH 04/14] Updated medianBlur implementations to use wide universal intrinsics --- modules/imgproc/src/smooth.cpp | 417 ++++++++++++++++++++------------- 1 file changed, 258 insertions(+), 159 deletions(-) diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 3eb79594fe..cb815e241a 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -4178,45 +4178,6 @@ typedef struct HT fine[16][16]; } Histogram; - -#if CV_SIMD128 - -static inline void histogram_add_simd( const HT x[16], HT y[16] ) -{ - v_store(y, v_load(x) + v_load(y)); - v_store(y + 8, v_load(x + 8) + v_load(y + 8)); -} - -static inline void histogram_sub_simd( const HT x[16], HT y[16] ) -{ - v_store(y, v_load(y) - v_load(x)); - v_store(y + 8, v_load(y + 8) - v_load(x + 8)); -} - -#endif - - -static inline void histogram_add( const HT x[16], HT y[16] ) -{ - int i; - for( i = 0; i < 16; ++i ) - y[i] = (HT)(y[i] + x[i]); -} - -static inline void histogram_sub( const HT x[16], HT y[16] ) -{ - int i; - for( i = 0; i < 16; ++i ) - y[i] = (HT)(y[i] - x[i]); -} - -static inline void histogram_muladd( int a, const HT x[16], - HT y[16] ) -{ - for( int i = 0; i < 16; ++i ) - y[i] = (HT)(y[i] + a * x[i]); -} - static void medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) { @@ -4244,9 +4205,6 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) std::vector _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + 16); HT* h_coarse = alignPtr(&_h_coarse[0], 16); HT* h_fine = alignPtr(&_h_fine[0], 16); -#if CV_SIMD128 - volatile bool useSIMD = hasSIMD128(); -#endif for( int x = 0; x < _dst.cols; x += STRIPE_SIZE ) { @@ -4288,136 +4246,170 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) } // First column initialization - for( k = 0; k < 16; ++k ) - histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] ); - -#if CV_SIMD128 - if( useSIMD ) + for (k = 0; k < 16; ++k) { - for( j = 0; j < 2*r; ++j ) - histogram_add_simd( &h_coarse[16*(n*c+j)], H[c].coarse ); - - for( j = r; j < n-r; j++ ) - { - int t = 2*r*r + 2*r, b, sum = 0; - HT* segment; - - histogram_add_simd( &h_coarse[16*(n*c + std::min(j+r,n-1))], H[c].coarse ); - - // Find median at coarse level - for ( k = 0; k < 16 ; ++k ) - { - sum += H[c].coarse[k]; - if ( sum > t ) - { - sum -= H[c].coarse[k]; - break; - } - } - CV_Assert( k < 16 ); +#if CV_SIMD256 + v_store(H[c].fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H[c].fine[k])); +#elif CV_SIMD128 + v_store(H[c].fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16(2 * r + 1)) + v_load(H[c].fine[k])); + v_store(H[c].fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16(2 * r + 1)) + v_load(H[c].fine[k] + 8)); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]; +#endif + } - /* Update corresponding histogram segment */ - if ( luc[c][k] <= j-r ) - { - memset( &H[c].fine[k], 0, 16 * sizeof(HT) ); - for ( luc[c][k] = cv::HT(j-r); luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) - histogram_add_simd( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] ); - - if ( luc[c][k] < j+r+1 ) - { - histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] ); - luc[c][k] = (HT)(j+r+1); - } - } - else - { - for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) - { - histogram_sub_simd( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] ); - histogram_add_simd( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] ); - } - } +#if CV_SIMD256 + v_uint16x16 v_coarse = v256_load(H[c].coarse); +#elif CV_SIMD128 + v_uint16x8 v_coarsel = v_load(H[c].coarse); + v_uint16x8 v_coarseh = v_load(H[c].coarse + 8); +#endif + HT* px = h_coarse + 16 * n*c; + for( j = 0; j < 2*r; ++j, px += 16 ) + { +#if CV_SIMD256 + v_coarse += v256_load(px); +#elif CV_SIMD128 + v_coarsel += v_load(px); + v_coarseh += v_load(px + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].coarse[ind] += px[ind]; +#endif + } - histogram_sub_simd( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse ); + for( j = r; j < n-r; j++ ) + { + int t = 2*r*r + 2*r, b, sum = 0; + HT* segment; + + px = h_coarse + 16 * (n*c + std::min(j + r, n - 1)); +#if CV_SIMD256 + v_coarse += v256_load(px); + v_store(H[c].coarse, v_coarse); +#elif CV_SIMD128 + v_coarsel += v_load(px); + v_coarseh += v_load(px + 8); + v_store(H[c].coarse, v_coarsel); + v_store(H[c].coarse + 8, v_coarseh); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].coarse[ind] += px[ind]; +#endif - /* Find median in segment */ - segment = H[c].fine[k]; - for ( b = 0; b < 16 ; b++ ) + // Find median at coarse level + for ( k = 0; k < 16 ; ++k ) + { + sum += H[c].coarse[k]; + if ( sum > t ) { - sum += segment[b]; - if ( sum > t ) - { - dst[dstep*i+cn*j+c] = (uchar)(16*k + b); - break; - } + sum -= H[c].coarse[k]; + break; } - CV_Assert( b < 16 ); } - } - else + CV_Assert( k < 16 ); + + /* Update corresponding histogram segment */ +#if CV_SIMD256 + v_uint16x16 v_fine; +#elif CV_SIMD128 + v_uint16x8 v_finel; + v_uint16x8 v_fineh; #endif - { - for( j = 0; j < 2*r; ++j ) - histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse ); - - for( j = r; j < n-r; j++ ) + if ( luc[c][k] <= j-r ) { - int t = 2*r*r + 2*r, b, sum = 0; - HT* segment; - - histogram_add( &h_coarse[16*(n*c + std::min(j+r,n-1))], H[c].coarse ); - - // Find median at coarse level - for ( k = 0; k < 16 ; ++k ) +#if CV_SIMD256 + v_fine = v256_setzero_u16(); +#elif CV_SIMD128 + v_finel = v_setzero_u16(); + v_fineh = v_setzero_u16(); +#else + memset(&H[c].fine[k], 0, 16 * sizeof(HT)); +#endif + px = h_fine + 16 * (n*(16 * c + k) + j - r); + for (luc[c][k] = cv::HT(j - r); luc[c][k] < MIN(j + r + 1, n); ++luc[c][k], px += 16) { - sum += H[c].coarse[k]; - if ( sum > t ) - { - sum -= H[c].coarse[k]; - break; - } +#if CV_SIMD256 + v_fine += v256_load(px); +#elif CV_SIMD128 + v_finel += v_load(px); + v_fineh += v_load(px + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += px[ind]; +#endif } - CV_Assert( k < 16 ); - /* Update corresponding histogram segment */ - if ( luc[c][k] <= j-r ) + if ( luc[c][k] < j+r+1 ) { - memset( &H[c].fine[k], 0, 16 * sizeof(HT) ); - for ( luc[c][k] = cv::HT(j-r); luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) - histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] ); - - if ( luc[c][k] < j+r+1 ) - { - histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] ); - luc[c][k] = (HT)(j+r+1); - } + px = h_fine + 16 * (n*(16 * c + k) + (n - 1)); +#if CV_SIMD256 + v_fine += v_mul_wrap(v256_load(px), v256_setall_u16(j + r + 1 - n)); +#elif CV_SIMD128 + v_finel += v_mul_wrap(v_load(px), v_setall_u16(j + r + 1 - n)); + v_fineh += v_mul_wrap(v_load(px + 8), v_setall_u16(j + r + 1 - n)); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += (j + r + 1 - n) * px[ind]; +#endif + luc[c][k] = (HT)(j+r+1); } - else + } + else + { +#if CV_SIMD256 + v_fine = v256_load(H[c].fine[k]); +#elif CV_SIMD128 + v_finel = v_load(H[c].fine[k]); + v_fineh = v_load(H[c].fine[k] + 8); +#endif + px = h_fine + 16*n*(16 * c + k); + for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) { - for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) - { - histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] ); - histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] ); - } +#if CV_SIMD256 + v_fine += v256_load(px + 16 * MIN(luc[c][k], n - 1)) - v256_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0)); +#elif CV_SIMD128 + v_finel += v_load(px + 16 * MIN(luc[c][k], n - 1) ) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0)); + v_fineh += v_load(px + 16 * MIN(luc[c][k], n - 1) + 8) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0) + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += px[16 * MIN(luc[c][k], n - 1) + ind] - px[16 * MAX(luc[c][k] - 2 * r - 1, 0) + ind]; +#endif } + } - histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse ); + px = h_coarse + 16 * (n*c + MAX(j - r, 0)); +#if CV_SIMD256 + v_store(H[c].fine[k], v_fine); + v_coarse -= v256_load(px); +#elif CV_SIMD128 + v_store(H[c].fine[k], v_finel); + v_store(H[c].fine[k] + 8, v_fineh); + v_coarsel -= v_load(px); + v_coarseh -= v_load(px + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].coarse[ind] -= px[ind]; +#endif - /* Find median in segment */ - segment = H[c].fine[k]; - for ( b = 0; b < 16 ; b++ ) + /* Find median in segment */ + segment = H[c].fine[k]; + for ( b = 0; b < 16 ; b++ ) + { + sum += segment[b]; + if ( sum > t ) { - sum += segment[b]; - if ( sum > t ) - { - dst[dstep*i+cn*j+c] = (uchar)(16*k + b); - break; - } + dst[dstep*i+cn*j+c] = (uchar)(16*k + b); + break; } - CV_Assert( b < 16 ); } + CV_Assert( b < 16 ); } } +#if CV_SIMD + vx_cleanup(); +#endif } } @@ -4629,13 +4621,13 @@ struct MinMax32f } }; -#if CV_SIMD128 +#if CV_SIMD struct MinMaxVec8u { typedef uchar value_type; typedef v_uint8x16 arg_type; - enum { SIZE = 16 }; + enum { SIZE = v_uint8x16::nlanes }; arg_type load(const uchar* ptr) { return v_load(ptr); } void store(uchar* ptr, const arg_type &val) { v_store(ptr, val); } void operator()(arg_type& a, arg_type& b) const @@ -4644,6 +4636,18 @@ struct MinMaxVec8u a = v_min(a, b); b = v_max(b, t); } +#if CV_SIMD_WIDTH > 16 + typedef v_uint8 warg_type; + enum { WSIZE = v_uint8::nlanes }; + warg_type wload(const uchar* ptr) { return vx_load(ptr); } + void store(uchar* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif }; @@ -4651,7 +4655,7 @@ struct MinMaxVec16u { typedef ushort value_type; typedef v_uint16x8 arg_type; - enum { SIZE = 8 }; + enum { SIZE = v_uint16x8::nlanes }; arg_type load(const ushort* ptr) { return v_load(ptr); } void store(ushort* ptr, const arg_type &val) { v_store(ptr, val); } void operator()(arg_type& a, arg_type& b) const @@ -4660,6 +4664,18 @@ struct MinMaxVec16u a = v_min(a, b); b = v_max(b, t); } +#if CV_SIMD_WIDTH > 16 + typedef v_uint16 warg_type; + enum { WSIZE = v_uint16::nlanes }; + warg_type wload(const ushort* ptr) { return vx_load(ptr); } + void store(ushort* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif }; @@ -4667,7 +4683,7 @@ struct MinMaxVec16s { typedef short value_type; typedef v_int16x8 arg_type; - enum { SIZE = 8 }; + enum { SIZE = v_int16x8::nlanes }; arg_type load(const short* ptr) { return v_load(ptr); } void store(short* ptr, const arg_type &val) { v_store(ptr, val); } void operator()(arg_type& a, arg_type& b) const @@ -4676,6 +4692,18 @@ struct MinMaxVec16s a = v_min(a, b); b = v_max(b, t); } +#if CV_SIMD_WIDTH > 16 + typedef v_int16 warg_type; + enum { WSIZE = v_int16::nlanes }; + warg_type wload(const short* ptr) { return vx_load(ptr); } + void store(short* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif }; @@ -4683,7 +4711,7 @@ struct MinMaxVec32f { typedef float value_type; typedef v_float32x4 arg_type; - enum { SIZE = 4 }; + enum { SIZE = v_float32x4::nlanes }; arg_type load(const float* ptr) { return v_load(ptr); } void store(float* ptr, const arg_type &val) { v_store(ptr, val); } void operator()(arg_type& a, arg_type& b) const @@ -4692,6 +4720,18 @@ struct MinMaxVec32f a = v_min(a, b); b = v_max(b, t); } +#if CV_SIMD_WIDTH > 16 + typedef v_float32 warg_type; + enum { WSIZE = v_float32::nlanes }; + warg_type wload(const float* ptr) { return vx_load(ptr); } + void store(float* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif }; #else @@ -4710,6 +4750,7 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) typedef typename Op::value_type T; typedef typename Op::arg_type WT; typedef typename VecOp::arg_type VT; + typedef typename VecOp::warg_type WVT; const T* src = _src.ptr(); T* dst = _dst.ptr(); @@ -4719,7 +4760,6 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) int i, j, k, cn = _src.channels(); Op op; VecOp vop; - volatile bool useSIMD = hasSIMD128(); if( m == 3 ) { @@ -4749,7 +4789,7 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) const T* row0 = src + std::max(i - 1, 0)*sstep; const T* row1 = src + i*sstep; const T* row2 = src + std::min(i + 1, size.height-1)*sstep; - int limit = useSIMD ? cn : size.width; + int limit = cn; for(j = 0;; ) { @@ -4772,6 +4812,21 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) if( limit == size.width ) break; +#if CV_SIMD_WIDTH > 16 + for( ; j <= size.width - VecOp::WSIZE - cn; j += VecOp::WSIZE ) + { + WVT p0 = vop.wload(row0+j-cn), p1 = vop.wload(row0+j), p2 = vop.wload(row0+j+cn); + WVT p3 = vop.wload(row1+j-cn), p4 = vop.wload(row1+j), p5 = vop.wload(row1+j+cn); + WVT p6 = vop.wload(row2+j-cn), p7 = vop.wload(row2+j), p8 = vop.wload(row2+j+cn); + + vop(p1, p2); vop(p4, p5); vop(p7, p8); vop(p0, p1); + vop(p3, p4); vop(p6, p7); vop(p1, p2); vop(p4, p5); + vop(p7, p8); vop(p0, p3); vop(p5, p8); vop(p4, p7); + vop(p3, p6); vop(p1, p4); vop(p2, p5); vop(p4, p7); + vop(p4, p2); vop(p6, p4); vop(p4, p2); + vop.store(dst+j, p4); + } +#endif for( ; j <= size.width - VecOp::SIZE - cn; j += VecOp::SIZE ) { VT p0 = vop.load(row0+j-cn), p1 = vop.load(row0+j), p2 = vop.load(row0+j+cn); @@ -4789,6 +4844,9 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) limit = size.width; } } +#if CV_SIMD + vx_cleanup(); +#endif } else if( m == 5 ) { @@ -4824,7 +4882,7 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) row[2] = src + i*sstep; row[3] = src + std::min(i + 1, size.height-1)*sstep; row[4] = src + std::min(i + 2, size.height-1)*sstep; - int limit = useSIMD ? cn*2 : size.width; + int limit = cn*2; for(j = 0;; ) { @@ -4872,6 +4930,44 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) if( limit == size.width ) break; +#if CV_SIMD_WIDTH > 16 + for( ; j <= size.width - VecOp::WSIZE - cn*2; j += VecOp::WSIZE ) + { + WVT p[25]; + for( k = 0; k < 5; k++ ) + { + const T* rowk = row[k]; + p[k*5] = vop.wload(rowk+j-cn*2); p[k*5+1] = vop.wload(rowk+j-cn); + p[k*5+2] = vop.wload(rowk+j); p[k*5+3] = vop.wload(rowk+j+cn); + p[k*5+4] = vop.wload(rowk+j+cn*2); + } + + vop(p[1], p[2]); vop(p[0], p[1]); vop(p[1], p[2]); vop(p[4], p[5]); vop(p[3], p[4]); + vop(p[4], p[5]); vop(p[0], p[3]); vop(p[2], p[5]); vop(p[2], p[3]); vop(p[1], p[4]); + vop(p[1], p[2]); vop(p[3], p[4]); vop(p[7], p[8]); vop(p[6], p[7]); vop(p[7], p[8]); + vop(p[10], p[11]); vop(p[9], p[10]); vop(p[10], p[11]); vop(p[6], p[9]); vop(p[8], p[11]); + vop(p[8], p[9]); vop(p[7], p[10]); vop(p[7], p[8]); vop(p[9], p[10]); vop(p[0], p[6]); + vop(p[4], p[10]); vop(p[4], p[6]); vop(p[2], p[8]); vop(p[2], p[4]); vop(p[6], p[8]); + vop(p[1], p[7]); vop(p[5], p[11]); vop(p[5], p[7]); vop(p[3], p[9]); vop(p[3], p[5]); + vop(p[7], p[9]); vop(p[1], p[2]); vop(p[3], p[4]); vop(p[5], p[6]); vop(p[7], p[8]); + vop(p[9], p[10]); vop(p[13], p[14]); vop(p[12], p[13]); vop(p[13], p[14]); vop(p[16], p[17]); + vop(p[15], p[16]); vop(p[16], p[17]); vop(p[12], p[15]); vop(p[14], p[17]); vop(p[14], p[15]); + vop(p[13], p[16]); vop(p[13], p[14]); vop(p[15], p[16]); vop(p[19], p[20]); vop(p[18], p[19]); + vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[21], p[23]); vop(p[22], p[24]); + vop(p[22], p[23]); vop(p[18], p[21]); vop(p[20], p[23]); vop(p[20], p[21]); vop(p[19], p[22]); + vop(p[22], p[24]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[12], p[18]); + vop(p[16], p[22]); vop(p[16], p[18]); vop(p[14], p[20]); vop(p[20], p[24]); vop(p[14], p[16]); + vop(p[18], p[20]); vop(p[22], p[24]); vop(p[13], p[19]); vop(p[17], p[23]); vop(p[17], p[19]); + vop(p[15], p[21]); vop(p[15], p[17]); vop(p[19], p[21]); vop(p[13], p[14]); vop(p[15], p[16]); + vop(p[17], p[18]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[0], p[12]); + vop(p[8], p[20]); vop(p[8], p[12]); vop(p[4], p[16]); vop(p[16], p[24]); vop(p[12], p[16]); + vop(p[2], p[14]); vop(p[10], p[22]); vop(p[10], p[14]); vop(p[6], p[18]); vop(p[6], p[10]); + vop(p[10], p[12]); vop(p[1], p[13]); vop(p[9], p[21]); vop(p[9], p[13]); vop(p[5], p[17]); + vop(p[13], p[17]); vop(p[3], p[15]); vop(p[11], p[23]); vop(p[11], p[15]); vop(p[7], p[19]); + vop(p[7], p[11]); vop(p[11], p[13]); vop(p[11], p[12]); + vop.store(dst+j, p[12]); + } +#endif for( ; j <= size.width - VecOp::SIZE - cn*2; j += VecOp::SIZE ) { VT p[25]; @@ -4912,6 +5008,9 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) limit = size.width; } } +#if CV_SIMD + vx_cleanup(); +#endif } } @@ -5173,7 +5272,7 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) #endif bool useSortNet = ksize == 3 || (ksize == 5 -#if !(CV_SIMD128) +#if !(CV_SIMD) && ( src0.depth() > CV_8U || src0.channels() == 2 || src0.channels() > 4 ) #endif ); @@ -5208,7 +5307,7 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) double img_size_mp = (double)(src0.total())/(1 << 20); if( ksize <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)* - (CV_SIMD128 && hasSIMD128() ? 1 : 3)) + (CV_SIMD ? 1 : 3)) medianBlur_8u_Om( src, dst, ksize ); else medianBlur_8u_O1( src, dst, ksize ); From 30bf4a5e34251498ba9da548b00e7c833ebf7e3a Mon Sep 17 00:00:00 2001 From: fegorsch Date: Mon, 15 Oct 2018 18:13:03 +0200 Subject: [PATCH 05/14] CircleGridClusterFinder: map circle pattern width and height correctly During the cluster-based detection of circle grids, the detected circle pattern has to be mapped to 3D-points. When doing this the width (i.e. more circles) and height (i.e. less circles) of the pattern need to be identified in image coordinates. Until now this was done by assuming that the shorter side in image coordinates (length in pixels) corresponds to the height in 3D. This assumption does not hold if we look at the pattern from a perspective where the projection of the width is shorter than the projection of the height. This in turn lead to misdetections in although the circle pattern was clearly visible. Instead count how many circles have been detected along two edges of the projected quadrangle and use the one with more circles as width and the one with less as height. --- modules/calib3d/src/circlesgrid.cpp | 37 +++++++++++++++++++++++++---- modules/calib3d/src/circlesgrid.hpp | 2 +- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/modules/calib3d/src/circlesgrid.cpp b/modules/calib3d/src/circlesgrid.cpp index 81693941de..c27604873c 100644 --- a/modules/calib3d/src/circlesgrid.cpp +++ b/modules/calib3d/src/circlesgrid.cpp @@ -178,7 +178,7 @@ void CirclesGridClusterFinder::findGrid(const std::vector &points, if(outsideCorners.size() != outsideCornersCount) return; } - getSortedCorners(hull2f, corners, outsideCorners, sortedCorners); + getSortedCorners(hull2f, patternPoints, corners, outsideCorners, sortedCorners); if(sortedCorners.size() != cornersCount) return; @@ -295,7 +295,18 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector #endif } -void CirclesGridClusterFinder::getSortedCorners(const std::vector &hull2f, const std::vector &corners, const std::vector &outsideCorners, std::vector &sortedCorners) +namespace { +double pointLineDistance(const cv::Point2f &p, const cv::Vec4f &line) +{ + Vec3f pa( line[0], line[1], 1 ); + Vec3f pb( line[2], line[3], 1 ); + Vec3f l = pa.cross(pb); + return std::abs((p.x * l[0] + p.y * l[1] + l[2])) * 1.0 / + std::sqrt(double(l[0] * l[0] + l[1] * l[1])); +} +} + +void CirclesGridClusterFinder::getSortedCorners(const std::vector &hull2f, const std::vector &patternPoints, const std::vector &corners, const std::vector &outsideCorners, std::vector &sortedCorners) { Point2f firstCorner; if(isAsymmetricGrid) @@ -341,10 +352,26 @@ void CirclesGridClusterFinder::getSortedCorners(const std::vector & if(!isAsymmetricGrid) { - double dist1 = norm(sortedCorners[0] - sortedCorners[1]); - double dist2 = norm(sortedCorners[1] - sortedCorners[2]); + double dist01 = norm(sortedCorners[0] - sortedCorners[1]); + double dist12 = norm(sortedCorners[1] - sortedCorners[2]); + // Use half the average distance between circles on the shorter side as threshold for determining whether a point lies on an edge. + double thresh = min(dist01, dist12) / min(patternSize.width, patternSize.height) / 2; + + size_t circleCount01 = 0; + size_t circleCount12 = 0; + Vec4f line01( sortedCorners[0].x, sortedCorners[0].y, sortedCorners[1].x, sortedCorners[1].y ); + Vec4f line12( sortedCorners[1].x, sortedCorners[1].y, sortedCorners[2].x, sortedCorners[2].y ); + // Count the circles along both edges. + for (size_t i = 0; i < patternPoints.size(); i++) + { + if (pointLineDistance(patternPoints[i], line01) < thresh) + circleCount01++; + if (pointLineDistance(patternPoints[i], line12) < thresh) + circleCount12++; + } - if((dist1 > dist2 && patternSize.height > patternSize.width) || (dist1 < dist2 && patternSize.height < patternSize.width)) + // Ensure that the edge from sortedCorners[0] to sortedCorners[1] is the one with more circles (i.e. it is interpreted as the pattern's width). + if ((circleCount01 > circleCount12 && patternSize.height > patternSize.width) || (circleCount01 < circleCount12 && patternSize.height < patternSize.width)) { for(size_t i=0; i &hull2f, std::vector &corners); void findOutsideCorners(const std::vector &corners, std::vector &outsideCorners); - void getSortedCorners(const std::vector &hull2f, const std::vector &corners, const std::vector &outsideCorners, std::vector &sortedCorners); + void getSortedCorners(const std::vector &hull2f, const std::vector &patternPoints, const std::vector &corners, const std::vector &outsideCorners, std::vector &sortedCorners); void rectifyPatternPoints(const std::vector &patternPoints, const std::vector &sortedCorners, std::vector &rectifiedPatternPoints); void parsePatternPoints(const std::vector &patternPoints, const std::vector &rectifiedPatternPoints, std::vector ¢ers); From 877de883b00fae81ee03dfcd8d7c97b758d62d1d Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Fri, 2 Nov 2018 13:42:36 +0300 Subject: [PATCH 06/14] medianBlur() implementation moved to separate file --- modules/imgproc/src/median_blur.cpp | 1235 +++++++++++++++++++++++++++ modules/imgproc/src/smooth.cpp | 1177 +------------------------ 2 files changed, 1236 insertions(+), 1176 deletions(-) create mode 100644 modules/imgproc/src/median_blur.cpp diff --git a/modules/imgproc/src/median_blur.cpp b/modules/imgproc/src/median_blur.cpp new file mode 100644 index 0000000000..07d5ae2e6d --- /dev/null +++ b/modules/imgproc/src/median_blur.cpp @@ -0,0 +1,1235 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, 2018, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2014-2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#include + +#include "opencv2/core/hal/intrin.hpp" +#include "opencl_kernels_imgproc.hpp" + +#include "opencv2/core/openvx/ovx_defs.hpp" + +/* + * This file includes the code, contributed by Simon Perreault + * (the function icvMedianBlur_8u_O1) + * + * Constant-time median filtering -- http://nomis80.org/ctmf.html + * Copyright (C) 2006 Simon Perreault + * + * Contact: + * Laboratoire de vision et systemes numeriques + * Pavillon Adrien-Pouliot + * Universite Laval + * Sainte-Foy, Quebec, Canada + * G1K 7P4 + * + * perreaul@gel.ulaval.ca + */ + +/****************************************************************************************\ + Median Filter +\****************************************************************************************/ + +namespace cv +{ + +namespace +{ + +typedef ushort HT; + +/** + * This structure represents a two-tier histogram. The first tier (known as the + * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level) + * is 8 bit wide. Pixels inserted in the fine level also get inserted into the + * coarse bucket designated by the 4 MSBs of the fine bucket value. + * + * The structure is aligned on 16 bits, which is a prerequisite for SIMD + * instructions. Each bucket is 16 bit wide, which means that extra care must be + * taken to prevent overflow. + */ +typedef struct +{ + HT coarse[16]; + HT fine[16][16]; +} Histogram; + +static void +medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) +{ +/** + * HOP is short for Histogram OPeration. This macro makes an operation \a op on + * histogram \a h for pixel value \a x. It takes care of handling both levels. + */ +#define HOP(h,x,op) \ + h.coarse[x>>4] op, \ + *((HT*)h.fine + x) op + +#define COP(c,j,x,op) \ + h_coarse[ 16*(n*c+j) + (x>>4) ] op, \ + h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op + + int cn = _dst.channels(), m = _dst.rows, r = (ksize-1)/2; + CV_Assert(cn > 0 && cn <= 4); + size_t sstep = _src.step, dstep = _dst.step; + Histogram CV_DECL_ALIGNED(16) H[4]; + HT CV_DECL_ALIGNED(16) luc[4][16]; + + int STRIPE_SIZE = std::min( _dst.cols, 512/cn ); + + std::vector _h_coarse(1 * 16 * (STRIPE_SIZE + 2*r) * cn + 16); + std::vector _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + 16); + HT* h_coarse = alignPtr(&_h_coarse[0], 16); + HT* h_fine = alignPtr(&_h_fine[0], 16); + + for( int x = 0; x < _dst.cols; x += STRIPE_SIZE ) + { + int i, j, k, c, n = std::min(_dst.cols - x, STRIPE_SIZE) + r*2; + const uchar* src = _src.ptr() + x*cn; + uchar* dst = _dst.ptr() + (x - r)*cn; + + memset( h_coarse, 0, 16*n*cn*sizeof(h_coarse[0]) ); + memset( h_fine, 0, 16*16*n*cn*sizeof(h_fine[0]) ); + + // First row initialization + for( c = 0; c < cn; c++ ) + { + for( j = 0; j < n; j++ ) + COP( c, j, src[cn*j+c], += (cv::HT)(r+2) ); + + for( i = 1; i < r; i++ ) + { + const uchar* p = src + sstep*std::min(i, m-1); + for ( j = 0; j < n; j++ ) + COP( c, j, p[cn*j+c], ++ ); + } + } + + for( i = 0; i < m; i++ ) + { + const uchar* p0 = src + sstep * std::max( 0, i-r-1 ); + const uchar* p1 = src + sstep * std::min( m-1, i+r ); + + memset( H, 0, cn*sizeof(H[0]) ); + memset( luc, 0, cn*sizeof(luc[0]) ); + for( c = 0; c < cn; c++ ) + { + // Update column histograms for the entire row. + for( j = 0; j < n; j++ ) + { + COP( c, j, p0[j*cn + c], -- ); + COP( c, j, p1[j*cn + c], ++ ); + } + + // First column initialization + for (k = 0; k < 16; ++k) + { +#if CV_SIMD256 + v_store(H[c].fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H[c].fine[k])); +#elif CV_SIMD128 + v_store(H[c].fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16((ushort)(2 * r + 1))) + v_load(H[c].fine[k])); + v_store(H[c].fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16((ushort)(2 * r + 1))) + v_load(H[c].fine[k] + 8)); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]; +#endif + } + +#if CV_SIMD256 + v_uint16x16 v_coarse = v256_load(H[c].coarse); +#elif CV_SIMD128 + v_uint16x8 v_coarsel = v_load(H[c].coarse); + v_uint16x8 v_coarseh = v_load(H[c].coarse + 8); +#endif + HT* px = h_coarse + 16 * n*c; + for( j = 0; j < 2*r; ++j, px += 16 ) + { +#if CV_SIMD256 + v_coarse += v256_load(px); +#elif CV_SIMD128 + v_coarsel += v_load(px); + v_coarseh += v_load(px + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].coarse[ind] += px[ind]; +#endif + } + + for( j = r; j < n-r; j++ ) + { + int t = 2*r*r + 2*r, b, sum = 0; + HT* segment; + + px = h_coarse + 16 * (n*c + std::min(j + r, n - 1)); +#if CV_SIMD256 + v_coarse += v256_load(px); + v_store(H[c].coarse, v_coarse); +#elif CV_SIMD128 + v_coarsel += v_load(px); + v_coarseh += v_load(px + 8); + v_store(H[c].coarse, v_coarsel); + v_store(H[c].coarse + 8, v_coarseh); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].coarse[ind] += px[ind]; +#endif + + // Find median at coarse level + for ( k = 0; k < 16 ; ++k ) + { + sum += H[c].coarse[k]; + if ( sum > t ) + { + sum -= H[c].coarse[k]; + break; + } + } + CV_Assert( k < 16 ); + + /* Update corresponding histogram segment */ +#if CV_SIMD256 + v_uint16x16 v_fine; +#elif CV_SIMD128 + v_uint16x8 v_finel; + v_uint16x8 v_fineh; +#endif + if ( luc[c][k] <= j-r ) + { +#if CV_SIMD256 + v_fine = v256_setzero_u16(); +#elif CV_SIMD128 + v_finel = v_setzero_u16(); + v_fineh = v_setzero_u16(); +#else + memset(&H[c].fine[k], 0, 16 * sizeof(HT)); +#endif + px = h_fine + 16 * (n*(16 * c + k) + j - r); + for (luc[c][k] = cv::HT(j - r); luc[c][k] < MIN(j + r + 1, n); ++luc[c][k], px += 16) + { +#if CV_SIMD256 + v_fine += v256_load(px); +#elif CV_SIMD128 + v_finel += v_load(px); + v_fineh += v_load(px + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += px[ind]; +#endif + } + + if ( luc[c][k] < j+r+1 ) + { + px = h_fine + 16 * (n*(16 * c + k) + (n - 1)); +#if CV_SIMD256 + v_fine += v_mul_wrap(v256_load(px), v256_setall_u16(j + r + 1 - n)); +#elif CV_SIMD128 + v_finel += v_mul_wrap(v_load(px), v_setall_u16((ushort)(j + r + 1 - n))); + v_fineh += v_mul_wrap(v_load(px + 8), v_setall_u16((ushort)(j + r + 1 - n))); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += (j + r + 1 - n) * px[ind]; +#endif + luc[c][k] = (HT)(j+r+1); + } + } + else + { +#if CV_SIMD256 + v_fine = v256_load(H[c].fine[k]); +#elif CV_SIMD128 + v_finel = v_load(H[c].fine[k]); + v_fineh = v_load(H[c].fine[k] + 8); +#endif + px = h_fine + 16*n*(16 * c + k); + for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) + { +#if CV_SIMD256 + v_fine += v256_load(px + 16 * MIN(luc[c][k], n - 1)) - v256_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0)); +#elif CV_SIMD128 + v_finel += v_load(px + 16 * MIN(luc[c][k], n - 1) ) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0)); + v_fineh += v_load(px + 16 * MIN(luc[c][k], n - 1) + 8) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0) + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].fine[k][ind] += px[16 * MIN(luc[c][k], n - 1) + ind] - px[16 * MAX(luc[c][k] - 2 * r - 1, 0) + ind]; +#endif + } + } + + px = h_coarse + 16 * (n*c + MAX(j - r, 0)); +#if CV_SIMD256 + v_store(H[c].fine[k], v_fine); + v_coarse -= v256_load(px); +#elif CV_SIMD128 + v_store(H[c].fine[k], v_finel); + v_store(H[c].fine[k] + 8, v_fineh); + v_coarsel -= v_load(px); + v_coarseh -= v_load(px + 8); +#else + for (int ind = 0; ind < 16; ++ind) + H[c].coarse[ind] -= px[ind]; +#endif + + /* Find median in segment */ + segment = H[c].fine[k]; + for ( b = 0; b < 16 ; b++ ) + { + sum += segment[b]; + if ( sum > t ) + { + dst[dstep*i+cn*j+c] = (uchar)(16*k + b); + break; + } + } + CV_Assert( b < 16 ); + } + } +#if CV_SIMD + vx_cleanup(); +#endif + } + } + +#undef HOP +#undef COP +} + +static void +medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m ) +{ + #define N 16 + int zone0[4][N]; + int zone1[4][N*N]; + int x, y; + int n2 = m*m/2; + Size size = _dst.size(); + const uchar* src = _src.ptr(); + uchar* dst = _dst.ptr(); + int src_step = (int)_src.step, dst_step = (int)_dst.step; + int cn = _src.channels(); + const uchar* src_max = src + size.height*src_step; + CV_Assert(cn > 0 && cn <= 4); + + #define UPDATE_ACC01( pix, cn, op ) \ + { \ + int p = (pix); \ + zone1[cn][p] op; \ + zone0[cn][p >> 4] op; \ + } + + //CV_Assert( size.height >= nx && size.width >= nx ); + for( x = 0; x < size.width; x++, src += cn, dst += cn ) + { + uchar* dst_cur = dst; + const uchar* src_top = src; + const uchar* src_bottom = src; + int k, c; + int src_step1 = src_step, dst_step1 = dst_step; + + if( x % 2 != 0 ) + { + src_bottom = src_top += src_step*(size.height-1); + dst_cur += dst_step*(size.height-1); + src_step1 = -src_step1; + dst_step1 = -dst_step1; + } + + // init accumulator + memset( zone0, 0, sizeof(zone0[0])*cn ); + memset( zone1, 0, sizeof(zone1[0])*cn ); + + for( y = 0; y <= m/2; y++ ) + { + for( c = 0; c < cn; c++ ) + { + if( y > 0 ) + { + for( k = 0; k < m*cn; k += cn ) + UPDATE_ACC01( src_bottom[k+c], c, ++ ); + } + else + { + for( k = 0; k < m*cn; k += cn ) + UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 ); + } + } + + if( (src_step1 > 0 && y < size.height-1) || + (src_step1 < 0 && size.height-y-1 > 0) ) + src_bottom += src_step1; + } + + for( y = 0; y < size.height; y++, dst_cur += dst_step1 ) + { + // find median + for( c = 0; c < cn; c++ ) + { + int s = 0; + for( k = 0; ; k++ ) + { + int t = s + zone0[c][k]; + if( t > n2 ) break; + s = t; + } + + for( k *= N; ;k++ ) + { + s += zone1[c][k]; + if( s > n2 ) break; + } + + dst_cur[c] = (uchar)k; + } + + if( y+1 == size.height ) + break; + + if( cn == 1 ) + { + for( k = 0; k < m; k++ ) + { + int p = src_top[k]; + int q = src_bottom[k]; + zone1[0][p]--; + zone0[0][p>>4]--; + zone1[0][q]++; + zone0[0][q>>4]++; + } + } + else if( cn == 3 ) + { + for( k = 0; k < m*3; k += 3 ) + { + UPDATE_ACC01( src_top[k], 0, -- ); + UPDATE_ACC01( src_top[k+1], 1, -- ); + UPDATE_ACC01( src_top[k+2], 2, -- ); + + UPDATE_ACC01( src_bottom[k], 0, ++ ); + UPDATE_ACC01( src_bottom[k+1], 1, ++ ); + UPDATE_ACC01( src_bottom[k+2], 2, ++ ); + } + } + else + { + assert( cn == 4 ); + for( k = 0; k < m*4; k += 4 ) + { + UPDATE_ACC01( src_top[k], 0, -- ); + UPDATE_ACC01( src_top[k+1], 1, -- ); + UPDATE_ACC01( src_top[k+2], 2, -- ); + UPDATE_ACC01( src_top[k+3], 3, -- ); + + UPDATE_ACC01( src_bottom[k], 0, ++ ); + UPDATE_ACC01( src_bottom[k+1], 1, ++ ); + UPDATE_ACC01( src_bottom[k+2], 2, ++ ); + UPDATE_ACC01( src_bottom[k+3], 3, ++ ); + } + } + + if( (src_step1 > 0 && src_bottom + src_step1 < src_max) || + (src_step1 < 0 && src_bottom + src_step1 >= src) ) + src_bottom += src_step1; + + if( y >= m/2 ) + src_top += src_step1; + } + } +#undef N +#undef UPDATE_ACC +} + + +struct MinMax8u +{ + typedef uchar value_type; + typedef int arg_type; + enum { SIZE = 1 }; + arg_type load(const uchar* ptr) { return *ptr; } + void store(uchar* ptr, arg_type val) { *ptr = (uchar)val; } + void operator()(arg_type& a, arg_type& b) const + { + int t = CV_FAST_CAST_8U(a - b); + b += t; a -= t; + } +}; + +struct MinMax16u +{ + typedef ushort value_type; + typedef int arg_type; + enum { SIZE = 1 }; + arg_type load(const ushort* ptr) { return *ptr; } + void store(ushort* ptr, arg_type val) { *ptr = (ushort)val; } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = std::min(a, b); + b = std::max(b, t); + } +}; + +struct MinMax16s +{ + typedef short value_type; + typedef int arg_type; + enum { SIZE = 1 }; + arg_type load(const short* ptr) { return *ptr; } + void store(short* ptr, arg_type val) { *ptr = (short)val; } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = std::min(a, b); + b = std::max(b, t); + } +}; + +struct MinMax32f +{ + typedef float value_type; + typedef float arg_type; + enum { SIZE = 1 }; + arg_type load(const float* ptr) { return *ptr; } + void store(float* ptr, arg_type val) { *ptr = val; } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = std::min(a, b); + b = std::max(b, t); + } +}; + +#if CV_SIMD + +struct MinMaxVec8u +{ + typedef uchar value_type; + typedef v_uint8x16 arg_type; + enum { SIZE = v_uint8x16::nlanes }; + arg_type load(const uchar* ptr) { return v_load(ptr); } + void store(uchar* ptr, const arg_type &val) { v_store(ptr, val); } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#if CV_SIMD_WIDTH > 16 + typedef v_uint8 warg_type; + enum { WSIZE = v_uint8::nlanes }; + warg_type wload(const uchar* ptr) { return vx_load(ptr); } + void store(uchar* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif +}; + + +struct MinMaxVec16u +{ + typedef ushort value_type; + typedef v_uint16x8 arg_type; + enum { SIZE = v_uint16x8::nlanes }; + arg_type load(const ushort* ptr) { return v_load(ptr); } + void store(ushort* ptr, const arg_type &val) { v_store(ptr, val); } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#if CV_SIMD_WIDTH > 16 + typedef v_uint16 warg_type; + enum { WSIZE = v_uint16::nlanes }; + warg_type wload(const ushort* ptr) { return vx_load(ptr); } + void store(ushort* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif +}; + + +struct MinMaxVec16s +{ + typedef short value_type; + typedef v_int16x8 arg_type; + enum { SIZE = v_int16x8::nlanes }; + arg_type load(const short* ptr) { return v_load(ptr); } + void store(short* ptr, const arg_type &val) { v_store(ptr, val); } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#if CV_SIMD_WIDTH > 16 + typedef v_int16 warg_type; + enum { WSIZE = v_int16::nlanes }; + warg_type wload(const short* ptr) { return vx_load(ptr); } + void store(short* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif +}; + + +struct MinMaxVec32f +{ + typedef float value_type; + typedef v_float32x4 arg_type; + enum { SIZE = v_float32x4::nlanes }; + arg_type load(const float* ptr) { return v_load(ptr); } + void store(float* ptr, const arg_type &val) { v_store(ptr, val); } + void operator()(arg_type& a, arg_type& b) const + { + arg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#if CV_SIMD_WIDTH > 16 + typedef v_float32 warg_type; + enum { WSIZE = v_float32::nlanes }; + warg_type wload(const float* ptr) { return vx_load(ptr); } + void store(float* ptr, const warg_type &val) { v_store(ptr, val); } + void operator()(warg_type& a, warg_type& b) const + { + warg_type t = a; + a = v_min(a, b); + b = v_max(b, t); + } +#endif +}; + +#else + +typedef MinMax8u MinMaxVec8u; +typedef MinMax16u MinMaxVec16u; +typedef MinMax16s MinMaxVec16s; +typedef MinMax32f MinMaxVec32f; + +#endif + +template +static void +medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) +{ + typedef typename Op::value_type T; + typedef typename Op::arg_type WT; + typedef typename VecOp::arg_type VT; +#if CV_SIMD_WIDTH > 16 + typedef typename VecOp::warg_type WVT; +#endif + + const T* src = _src.ptr(); + T* dst = _dst.ptr(); + int sstep = (int)(_src.step/sizeof(T)); + int dstep = (int)(_dst.step/sizeof(T)); + Size size = _dst.size(); + int i, j, k, cn = _src.channels(); + Op op; + VecOp vop; + + if( m == 3 ) + { + if( size.width == 1 || size.height == 1 ) + { + int len = size.width + size.height - 1; + int sdelta = size.height == 1 ? cn : sstep; + int sdelta0 = size.height == 1 ? 0 : sstep - cn; + int ddelta = size.height == 1 ? cn : dstep; + + for( i = 0; i < len; i++, src += sdelta0, dst += ddelta ) + for( j = 0; j < cn; j++, src++ ) + { + WT p0 = src[i > 0 ? -sdelta : 0]; + WT p1 = src[0]; + WT p2 = src[i < len - 1 ? sdelta : 0]; + + op(p0, p1); op(p1, p2); op(p0, p1); + dst[j] = (T)p1; + } + return; + } + + size.width *= cn; + for( i = 0; i < size.height; i++, dst += dstep ) + { + const T* row0 = src + std::max(i - 1, 0)*sstep; + const T* row1 = src + i*sstep; + const T* row2 = src + std::min(i + 1, size.height-1)*sstep; + int limit = cn; + + for(j = 0;; ) + { + for( ; j < limit; j++ ) + { + int j0 = j >= cn ? j - cn : j; + int j2 = j < size.width - cn ? j + cn : j; + WT p0 = row0[j0], p1 = row0[j], p2 = row0[j2]; + WT p3 = row1[j0], p4 = row1[j], p5 = row1[j2]; + WT p6 = row2[j0], p7 = row2[j], p8 = row2[j2]; + + op(p1, p2); op(p4, p5); op(p7, p8); op(p0, p1); + op(p3, p4); op(p6, p7); op(p1, p2); op(p4, p5); + op(p7, p8); op(p0, p3); op(p5, p8); op(p4, p7); + op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7); + op(p4, p2); op(p6, p4); op(p4, p2); + dst[j] = (T)p4; + } + + if( limit == size.width ) + break; + +#if CV_SIMD_WIDTH > 16 + for( ; j <= size.width - VecOp::WSIZE - cn; j += VecOp::WSIZE ) + { + WVT p0 = vop.wload(row0+j-cn), p1 = vop.wload(row0+j), p2 = vop.wload(row0+j+cn); + WVT p3 = vop.wload(row1+j-cn), p4 = vop.wload(row1+j), p5 = vop.wload(row1+j+cn); + WVT p6 = vop.wload(row2+j-cn), p7 = vop.wload(row2+j), p8 = vop.wload(row2+j+cn); + + vop(p1, p2); vop(p4, p5); vop(p7, p8); vop(p0, p1); + vop(p3, p4); vop(p6, p7); vop(p1, p2); vop(p4, p5); + vop(p7, p8); vop(p0, p3); vop(p5, p8); vop(p4, p7); + vop(p3, p6); vop(p1, p4); vop(p2, p5); vop(p4, p7); + vop(p4, p2); vop(p6, p4); vop(p4, p2); + vop.store(dst+j, p4); + } +#endif + for( ; j <= size.width - VecOp::SIZE - cn; j += VecOp::SIZE ) + { + VT p0 = vop.load(row0+j-cn), p1 = vop.load(row0+j), p2 = vop.load(row0+j+cn); + VT p3 = vop.load(row1+j-cn), p4 = vop.load(row1+j), p5 = vop.load(row1+j+cn); + VT p6 = vop.load(row2+j-cn), p7 = vop.load(row2+j), p8 = vop.load(row2+j+cn); + + vop(p1, p2); vop(p4, p5); vop(p7, p8); vop(p0, p1); + vop(p3, p4); vop(p6, p7); vop(p1, p2); vop(p4, p5); + vop(p7, p8); vop(p0, p3); vop(p5, p8); vop(p4, p7); + vop(p3, p6); vop(p1, p4); vop(p2, p5); vop(p4, p7); + vop(p4, p2); vop(p6, p4); vop(p4, p2); + vop.store(dst+j, p4); + } + + limit = size.width; + } + } +#if CV_SIMD + vx_cleanup(); +#endif + } + else if( m == 5 ) + { + if( size.width == 1 || size.height == 1 ) + { + int len = size.width + size.height - 1; + int sdelta = size.height == 1 ? cn : sstep; + int sdelta0 = size.height == 1 ? 0 : sstep - cn; + int ddelta = size.height == 1 ? cn : dstep; + + for( i = 0; i < len; i++, src += sdelta0, dst += ddelta ) + for( j = 0; j < cn; j++, src++ ) + { + int i1 = i > 0 ? -sdelta : 0; + int i0 = i > 1 ? -sdelta*2 : i1; + int i3 = i < len-1 ? sdelta : 0; + int i4 = i < len-2 ? sdelta*2 : i3; + WT p0 = src[i0], p1 = src[i1], p2 = src[0], p3 = src[i3], p4 = src[i4]; + + op(p0, p1); op(p3, p4); op(p2, p3); op(p3, p4); op(p0, p2); + op(p2, p4); op(p1, p3); op(p1, p2); + dst[j] = (T)p2; + } + return; + } + + size.width *= cn; + for( i = 0; i < size.height; i++, dst += dstep ) + { + const T* row[5]; + row[0] = src + std::max(i - 2, 0)*sstep; + row[1] = src + std::max(i - 1, 0)*sstep; + row[2] = src + i*sstep; + row[3] = src + std::min(i + 1, size.height-1)*sstep; + row[4] = src + std::min(i + 2, size.height-1)*sstep; + int limit = cn*2; + + for(j = 0;; ) + { + for( ; j < limit; j++ ) + { + WT p[25]; + int j1 = j >= cn ? j - cn : j; + int j0 = j >= cn*2 ? j - cn*2 : j1; + int j3 = j < size.width - cn ? j + cn : j; + int j4 = j < size.width - cn*2 ? j + cn*2 : j3; + for( k = 0; k < 5; k++ ) + { + const T* rowk = row[k]; + p[k*5] = rowk[j0]; p[k*5+1] = rowk[j1]; + p[k*5+2] = rowk[j]; p[k*5+3] = rowk[j3]; + p[k*5+4] = rowk[j4]; + } + + op(p[1], p[2]); op(p[0], p[1]); op(p[1], p[2]); op(p[4], p[5]); op(p[3], p[4]); + op(p[4], p[5]); op(p[0], p[3]); op(p[2], p[5]); op(p[2], p[3]); op(p[1], p[4]); + op(p[1], p[2]); op(p[3], p[4]); op(p[7], p[8]); op(p[6], p[7]); op(p[7], p[8]); + op(p[10], p[11]); op(p[9], p[10]); op(p[10], p[11]); op(p[6], p[9]); op(p[8], p[11]); + op(p[8], p[9]); op(p[7], p[10]); op(p[7], p[8]); op(p[9], p[10]); op(p[0], p[6]); + op(p[4], p[10]); op(p[4], p[6]); op(p[2], p[8]); op(p[2], p[4]); op(p[6], p[8]); + op(p[1], p[7]); op(p[5], p[11]); op(p[5], p[7]); op(p[3], p[9]); op(p[3], p[5]); + op(p[7], p[9]); op(p[1], p[2]); op(p[3], p[4]); op(p[5], p[6]); op(p[7], p[8]); + op(p[9], p[10]); op(p[13], p[14]); op(p[12], p[13]); op(p[13], p[14]); op(p[16], p[17]); + op(p[15], p[16]); op(p[16], p[17]); op(p[12], p[15]); op(p[14], p[17]); op(p[14], p[15]); + op(p[13], p[16]); op(p[13], p[14]); op(p[15], p[16]); op(p[19], p[20]); op(p[18], p[19]); + op(p[19], p[20]); op(p[21], p[22]); op(p[23], p[24]); op(p[21], p[23]); op(p[22], p[24]); + op(p[22], p[23]); op(p[18], p[21]); op(p[20], p[23]); op(p[20], p[21]); op(p[19], p[22]); + op(p[22], p[24]); op(p[19], p[20]); op(p[21], p[22]); op(p[23], p[24]); op(p[12], p[18]); + op(p[16], p[22]); op(p[16], p[18]); op(p[14], p[20]); op(p[20], p[24]); op(p[14], p[16]); + op(p[18], p[20]); op(p[22], p[24]); op(p[13], p[19]); op(p[17], p[23]); op(p[17], p[19]); + op(p[15], p[21]); op(p[15], p[17]); op(p[19], p[21]); op(p[13], p[14]); op(p[15], p[16]); + op(p[17], p[18]); op(p[19], p[20]); op(p[21], p[22]); op(p[23], p[24]); op(p[0], p[12]); + op(p[8], p[20]); op(p[8], p[12]); op(p[4], p[16]); op(p[16], p[24]); op(p[12], p[16]); + op(p[2], p[14]); op(p[10], p[22]); op(p[10], p[14]); op(p[6], p[18]); op(p[6], p[10]); + op(p[10], p[12]); op(p[1], p[13]); op(p[9], p[21]); op(p[9], p[13]); op(p[5], p[17]); + op(p[13], p[17]); op(p[3], p[15]); op(p[11], p[23]); op(p[11], p[15]); op(p[7], p[19]); + op(p[7], p[11]); op(p[11], p[13]); op(p[11], p[12]); + dst[j] = (T)p[12]; + } + + if( limit == size.width ) + break; + +#if CV_SIMD_WIDTH > 16 + for( ; j <= size.width - VecOp::WSIZE - cn*2; j += VecOp::WSIZE ) + { + WVT p[25]; + for( k = 0; k < 5; k++ ) + { + const T* rowk = row[k]; + p[k*5] = vop.wload(rowk+j-cn*2); p[k*5+1] = vop.wload(rowk+j-cn); + p[k*5+2] = vop.wload(rowk+j); p[k*5+3] = vop.wload(rowk+j+cn); + p[k*5+4] = vop.wload(rowk+j+cn*2); + } + + vop(p[1], p[2]); vop(p[0], p[1]); vop(p[1], p[2]); vop(p[4], p[5]); vop(p[3], p[4]); + vop(p[4], p[5]); vop(p[0], p[3]); vop(p[2], p[5]); vop(p[2], p[3]); vop(p[1], p[4]); + vop(p[1], p[2]); vop(p[3], p[4]); vop(p[7], p[8]); vop(p[6], p[7]); vop(p[7], p[8]); + vop(p[10], p[11]); vop(p[9], p[10]); vop(p[10], p[11]); vop(p[6], p[9]); vop(p[8], p[11]); + vop(p[8], p[9]); vop(p[7], p[10]); vop(p[7], p[8]); vop(p[9], p[10]); vop(p[0], p[6]); + vop(p[4], p[10]); vop(p[4], p[6]); vop(p[2], p[8]); vop(p[2], p[4]); vop(p[6], p[8]); + vop(p[1], p[7]); vop(p[5], p[11]); vop(p[5], p[7]); vop(p[3], p[9]); vop(p[3], p[5]); + vop(p[7], p[9]); vop(p[1], p[2]); vop(p[3], p[4]); vop(p[5], p[6]); vop(p[7], p[8]); + vop(p[9], p[10]); vop(p[13], p[14]); vop(p[12], p[13]); vop(p[13], p[14]); vop(p[16], p[17]); + vop(p[15], p[16]); vop(p[16], p[17]); vop(p[12], p[15]); vop(p[14], p[17]); vop(p[14], p[15]); + vop(p[13], p[16]); vop(p[13], p[14]); vop(p[15], p[16]); vop(p[19], p[20]); vop(p[18], p[19]); + vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[21], p[23]); vop(p[22], p[24]); + vop(p[22], p[23]); vop(p[18], p[21]); vop(p[20], p[23]); vop(p[20], p[21]); vop(p[19], p[22]); + vop(p[22], p[24]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[12], p[18]); + vop(p[16], p[22]); vop(p[16], p[18]); vop(p[14], p[20]); vop(p[20], p[24]); vop(p[14], p[16]); + vop(p[18], p[20]); vop(p[22], p[24]); vop(p[13], p[19]); vop(p[17], p[23]); vop(p[17], p[19]); + vop(p[15], p[21]); vop(p[15], p[17]); vop(p[19], p[21]); vop(p[13], p[14]); vop(p[15], p[16]); + vop(p[17], p[18]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[0], p[12]); + vop(p[8], p[20]); vop(p[8], p[12]); vop(p[4], p[16]); vop(p[16], p[24]); vop(p[12], p[16]); + vop(p[2], p[14]); vop(p[10], p[22]); vop(p[10], p[14]); vop(p[6], p[18]); vop(p[6], p[10]); + vop(p[10], p[12]); vop(p[1], p[13]); vop(p[9], p[21]); vop(p[9], p[13]); vop(p[5], p[17]); + vop(p[13], p[17]); vop(p[3], p[15]); vop(p[11], p[23]); vop(p[11], p[15]); vop(p[7], p[19]); + vop(p[7], p[11]); vop(p[11], p[13]); vop(p[11], p[12]); + vop.store(dst+j, p[12]); + } +#endif + for( ; j <= size.width - VecOp::SIZE - cn*2; j += VecOp::SIZE ) + { + VT p[25]; + for( k = 0; k < 5; k++ ) + { + const T* rowk = row[k]; + p[k*5] = vop.load(rowk+j-cn*2); p[k*5+1] = vop.load(rowk+j-cn); + p[k*5+2] = vop.load(rowk+j); p[k*5+3] = vop.load(rowk+j+cn); + p[k*5+4] = vop.load(rowk+j+cn*2); + } + + vop(p[1], p[2]); vop(p[0], p[1]); vop(p[1], p[2]); vop(p[4], p[5]); vop(p[3], p[4]); + vop(p[4], p[5]); vop(p[0], p[3]); vop(p[2], p[5]); vop(p[2], p[3]); vop(p[1], p[4]); + vop(p[1], p[2]); vop(p[3], p[4]); vop(p[7], p[8]); vop(p[6], p[7]); vop(p[7], p[8]); + vop(p[10], p[11]); vop(p[9], p[10]); vop(p[10], p[11]); vop(p[6], p[9]); vop(p[8], p[11]); + vop(p[8], p[9]); vop(p[7], p[10]); vop(p[7], p[8]); vop(p[9], p[10]); vop(p[0], p[6]); + vop(p[4], p[10]); vop(p[4], p[6]); vop(p[2], p[8]); vop(p[2], p[4]); vop(p[6], p[8]); + vop(p[1], p[7]); vop(p[5], p[11]); vop(p[5], p[7]); vop(p[3], p[9]); vop(p[3], p[5]); + vop(p[7], p[9]); vop(p[1], p[2]); vop(p[3], p[4]); vop(p[5], p[6]); vop(p[7], p[8]); + vop(p[9], p[10]); vop(p[13], p[14]); vop(p[12], p[13]); vop(p[13], p[14]); vop(p[16], p[17]); + vop(p[15], p[16]); vop(p[16], p[17]); vop(p[12], p[15]); vop(p[14], p[17]); vop(p[14], p[15]); + vop(p[13], p[16]); vop(p[13], p[14]); vop(p[15], p[16]); vop(p[19], p[20]); vop(p[18], p[19]); + vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[21], p[23]); vop(p[22], p[24]); + vop(p[22], p[23]); vop(p[18], p[21]); vop(p[20], p[23]); vop(p[20], p[21]); vop(p[19], p[22]); + vop(p[22], p[24]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[12], p[18]); + vop(p[16], p[22]); vop(p[16], p[18]); vop(p[14], p[20]); vop(p[20], p[24]); vop(p[14], p[16]); + vop(p[18], p[20]); vop(p[22], p[24]); vop(p[13], p[19]); vop(p[17], p[23]); vop(p[17], p[19]); + vop(p[15], p[21]); vop(p[15], p[17]); vop(p[19], p[21]); vop(p[13], p[14]); vop(p[15], p[16]); + vop(p[17], p[18]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[0], p[12]); + vop(p[8], p[20]); vop(p[8], p[12]); vop(p[4], p[16]); vop(p[16], p[24]); vop(p[12], p[16]); + vop(p[2], p[14]); vop(p[10], p[22]); vop(p[10], p[14]); vop(p[6], p[18]); vop(p[6], p[10]); + vop(p[10], p[12]); vop(p[1], p[13]); vop(p[9], p[21]); vop(p[9], p[13]); vop(p[5], p[17]); + vop(p[13], p[17]); vop(p[3], p[15]); vop(p[11], p[23]); vop(p[11], p[15]); vop(p[7], p[19]); + vop(p[7], p[11]); vop(p[11], p[13]); vop(p[11], p[12]); + vop.store(dst+j, p[12]); + } + + limit = size.width; + } + } +#if CV_SIMD + vx_cleanup(); +#endif + } +} + +#ifdef HAVE_OPENCL + +#define DIVUP(total, grain) ((total + grain - 1) / (grain)) + +static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m) +{ + size_t localsize[2] = { 16, 16 }; + size_t globalsize[2]; + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + + if ( !((depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F) && cn <= 4 && (m == 3 || m == 5)) ) + return false; + + Size imgSize = _src.size(); + bool useOptimized = (1 == cn) && + (size_t)imgSize.width >= localsize[0] * 8 && + (size_t)imgSize.height >= localsize[1] * 8 && + imgSize.width % 4 == 0 && + imgSize.height % 4 == 0 && + (ocl::Device::getDefault().isIntel()); + + cv::String kname = format( useOptimized ? "medianFilter%d_u" : "medianFilter%d", m) ; + cv::String kdefs = useOptimized ? + format("-D T=%s -D T1=%s -D T4=%s%d -D cn=%d -D USE_4OPT", ocl::typeToStr(type), + ocl::typeToStr(depth), ocl::typeToStr(depth), cn*4, cn) + : + format("-D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn) ; + + ocl::Kernel k(kname.c_str(), ocl::imgproc::medianFilter_oclsrc, kdefs.c_str() ); + + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(src.size(), type); + UMat dst = _dst.getUMat(); + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); + + if( useOptimized ) + { + globalsize[0] = DIVUP(src.cols / 4, localsize[0]) * localsize[0]; + globalsize[1] = DIVUP(src.rows / 4, localsize[1]) * localsize[1]; + } + else + { + globalsize[0] = (src.cols + localsize[0] + 2) / localsize[0] * localsize[0]; + globalsize[1] = (src.rows + localsize[1] - 1) / localsize[1] * localsize[1]; + } + + return k.run(2, globalsize, localsize, false); +} + +#undef DIVUP + +#endif + +#ifdef HAVE_OPENVX +namespace ovx { + template <> inline bool skipSmallImages(int w, int h) { return w*h < 1280 * 720; } +} +static bool openvx_medianFilter(InputArray _src, OutputArray _dst, int ksize) +{ + if (_src.type() != CV_8UC1 || _dst.type() != CV_8U +#ifndef VX_VERSION_1_1 + || ksize != 3 +#endif + ) + return false; + + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + if ( +#ifdef VX_VERSION_1_1 + ksize != 3 ? ovx::skipSmallImages(src.cols, src.rows) : +#endif + ovx::skipSmallImages(src.cols, src.rows) + ) + return false; + + try + { + ivx::Context ctx = ovx::getOpenVXContext(); +#ifdef VX_VERSION_1_1 + if ((vx_size)ksize > ctx.nonlinearMaxDimension()) + return false; +#endif + + Mat a; + if (dst.data != src.data) + a = src; + else + src.copyTo(a); + + ivx::Image + ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), + ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, + ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); + + //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments + //since OpenVX standard says nothing about thread-safety for now + ivx::border_t prevBorder = ctx.immediateBorder(); + ctx.setImmediateBorder(VX_BORDER_REPLICATE); +#ifdef VX_VERSION_1_1 + if (ksize == 3) +#endif + { + ivx::IVX_CHECK_STATUS(vxuMedian3x3(ctx, ia, ib)); + } +#ifdef VX_VERSION_1_1 + else + { + ivx::Matrix mtx; + if(ksize == 5) + mtx = ivx::Matrix::createFromPattern(ctx, VX_PATTERN_BOX, ksize, ksize); + else + { + vx_size supportedSize; + ivx::IVX_CHECK_STATUS(vxQueryContext(ctx, VX_CONTEXT_NONLINEAR_MAX_DIMENSION, &supportedSize, sizeof(supportedSize))); + if ((vx_size)ksize > supportedSize) + { + ctx.setImmediateBorder(prevBorder); + return false; + } + Mat mask(ksize, ksize, CV_8UC1, Scalar(255)); + mtx = ivx::Matrix::create(ctx, VX_TYPE_UINT8, ksize, ksize); + mtx.copyFrom(mask); + } + ivx::IVX_CHECK_STATUS(vxuNonLinearFilter(ctx, VX_NONLINEAR_FILTER_MEDIAN, ia, mtx, ib)); + } +#endif + ctx.setImmediateBorder(prevBorder); + } + catch (ivx::RuntimeError & e) + { + VX_DbgThrow(e.what()); + } + catch (ivx::WrapperError & e) + { + VX_DbgThrow(e.what()); + } + + return true; +} +#endif + +#ifdef HAVE_IPP +static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize) +{ + CV_INSTRUMENT_REGION_IPP(); + +#if IPP_VERSION_X100 < 201801 + // Degradations for big kernel + if(ksize > 7) + return false; +#endif + + { + int bufSize; + IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); + IppDataType ippType = ippiGetDataType(src0.type()); + int channels = src0.channels(); + IppAutoBuffer buffer; + + if(src0.isSubmatrix()) + return false; + + Mat src; + if(dst.data != src0.data) + src = src0; + else + src0.copyTo(src); + + if(ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, ippType, channels, &bufSize) < 0) + return false; + + buffer.allocate(bufSize); + + switch(ippType) + { + case ipp8u: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 3) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 4) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + case ipp16u: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 3) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 4) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + case ipp16s: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 3) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else if(channels == 4) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + case ipp32f: + if(channels == 1) + return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_32f_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; + else + return false; + default: + return false; + } + } +} +#endif +} + +void medianBlur( InputArray _src0, OutputArray _dst, int ksize ) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert( (ksize % 2 == 1) && (_src0.dims() <= 2 )); + + if( ksize <= 1 || _src0.empty() ) + { + _src0.copyTo(_dst); + return; + } + + CV_OCL_RUN(_dst.isUMat(), + ocl_medianFilter(_src0,_dst, ksize)) + + Mat src0 = _src0.getMat(); + _dst.create( src0.size(), src0.type() ); + Mat dst = _dst.getMat(); + + CALL_HAL(medianBlur, cv_hal_medianBlur, src0.data, src0.step, dst.data, dst.step, src0.cols, src0.rows, src0.depth(), + src0.channels(), ksize); + + CV_OVX_RUN(true, + openvx_medianFilter(_src0, _dst, ksize)) + + CV_IPP_RUN_FAST(ipp_medianFilter(src0, dst, ksize)); + +#ifdef HAVE_TEGRA_OPTIMIZATION + if (tegra::useTegra() && tegra::medianBlur(src0, dst, ksize)) + return; +#endif + + bool useSortNet = ksize == 3 || (ksize == 5 +#if !(CV_SIMD) + && ( src0.depth() > CV_8U || src0.channels() == 2 || src0.channels() > 4 ) +#endif + ); + + Mat src; + if( useSortNet ) + { + if( dst.data != src0.data ) + src = src0; + else + src0.copyTo(src); + + if( src.depth() == CV_8U ) + medianBlur_SortNet( src, dst, ksize ); + else if( src.depth() == CV_16U ) + medianBlur_SortNet( src, dst, ksize ); + else if( src.depth() == CV_16S ) + medianBlur_SortNet( src, dst, ksize ); + else if( src.depth() == CV_32F ) + medianBlur_SortNet( src, dst, ksize ); + else + CV_Error(CV_StsUnsupportedFormat, ""); + + return; + } + else + { + cv::copyMakeBorder( src0, src, 0, 0, ksize/2, ksize/2, BORDER_REPLICATE|BORDER_ISOLATED); + + int cn = src0.channels(); + CV_Assert( src.depth() == CV_8U && (cn == 1 || cn == 3 || cn == 4) ); + + double img_size_mp = (double)(src0.total())/(1 << 20); + if( ksize <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)* + (CV_SIMD ? 1 : 3)) + medianBlur_8u_Om( src, dst, ksize ); + else + medianBlur_8u_O1( src, dst, ksize ); + } +} + +} + +/* End of file. */ diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index cb815e241a..4d64caac66 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -53,22 +53,6 @@ #include "filter.hpp" #include "fixedpoint.inl.hpp" -/* - * This file includes the code, contributed by Simon Perreault - * (the function icvMedianBlur_8u_O1) - * - * Constant-time median filtering -- http://nomis80.org/ctmf.html - * Copyright (C) 2006 Simon Perreault - * - * Contact: - * Laboratoire de vision et systemes numeriques - * Pavillon Adrien-Pouliot - * Universite Laval - * Sainte-Foy, Quebec, Canada - * G1K 7P4 - * - * perreaul@gel.ulaval.ca - */ namespace cv { @@ -1293,6 +1277,7 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth, return kernel.run(2, globalsize, localsize, false); } +#undef DIVUP #undef ROUNDUP #endif @@ -4154,1166 +4139,6 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType); } -/****************************************************************************************\ - Median Filter -\****************************************************************************************/ - -namespace cv -{ -typedef ushort HT; - -/** - * This structure represents a two-tier histogram. The first tier (known as the - * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level) - * is 8 bit wide. Pixels inserted in the fine level also get inserted into the - * coarse bucket designated by the 4 MSBs of the fine bucket value. - * - * The structure is aligned on 16 bits, which is a prerequisite for SIMD - * instructions. Each bucket is 16 bit wide, which means that extra care must be - * taken to prevent overflow. - */ -typedef struct -{ - HT coarse[16]; - HT fine[16][16]; -} Histogram; - -static void -medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize ) -{ -/** - * HOP is short for Histogram OPeration. This macro makes an operation \a op on - * histogram \a h for pixel value \a x. It takes care of handling both levels. - */ -#define HOP(h,x,op) \ - h.coarse[x>>4] op, \ - *((HT*)h.fine + x) op - -#define COP(c,j,x,op) \ - h_coarse[ 16*(n*c+j) + (x>>4) ] op, \ - h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op - - int cn = _dst.channels(), m = _dst.rows, r = (ksize-1)/2; - CV_Assert(cn > 0 && cn <= 4); - size_t sstep = _src.step, dstep = _dst.step; - Histogram CV_DECL_ALIGNED(16) H[4]; - HT CV_DECL_ALIGNED(16) luc[4][16]; - - int STRIPE_SIZE = std::min( _dst.cols, 512/cn ); - - std::vector _h_coarse(1 * 16 * (STRIPE_SIZE + 2*r) * cn + 16); - std::vector _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + 16); - HT* h_coarse = alignPtr(&_h_coarse[0], 16); - HT* h_fine = alignPtr(&_h_fine[0], 16); - - for( int x = 0; x < _dst.cols; x += STRIPE_SIZE ) - { - int i, j, k, c, n = std::min(_dst.cols - x, STRIPE_SIZE) + r*2; - const uchar* src = _src.ptr() + x*cn; - uchar* dst = _dst.ptr() + (x - r)*cn; - - memset( h_coarse, 0, 16*n*cn*sizeof(h_coarse[0]) ); - memset( h_fine, 0, 16*16*n*cn*sizeof(h_fine[0]) ); - - // First row initialization - for( c = 0; c < cn; c++ ) - { - for( j = 0; j < n; j++ ) - COP( c, j, src[cn*j+c], += (cv::HT)(r+2) ); - - for( i = 1; i < r; i++ ) - { - const uchar* p = src + sstep*std::min(i, m-1); - for ( j = 0; j < n; j++ ) - COP( c, j, p[cn*j+c], ++ ); - } - } - - for( i = 0; i < m; i++ ) - { - const uchar* p0 = src + sstep * std::max( 0, i-r-1 ); - const uchar* p1 = src + sstep * std::min( m-1, i+r ); - - memset( H, 0, cn*sizeof(H[0]) ); - memset( luc, 0, cn*sizeof(luc[0]) ); - for( c = 0; c < cn; c++ ) - { - // Update column histograms for the entire row. - for( j = 0; j < n; j++ ) - { - COP( c, j, p0[j*cn + c], -- ); - COP( c, j, p1[j*cn + c], ++ ); - } - - // First column initialization - for (k = 0; k < 16; ++k) - { -#if CV_SIMD256 - v_store(H[c].fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H[c].fine[k])); -#elif CV_SIMD128 - v_store(H[c].fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16(2 * r + 1)) + v_load(H[c].fine[k])); - v_store(H[c].fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16(2 * r + 1)) + v_load(H[c].fine[k] + 8)); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].fine[k][ind] += (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]; -#endif - } - -#if CV_SIMD256 - v_uint16x16 v_coarse = v256_load(H[c].coarse); -#elif CV_SIMD128 - v_uint16x8 v_coarsel = v_load(H[c].coarse); - v_uint16x8 v_coarseh = v_load(H[c].coarse + 8); -#endif - HT* px = h_coarse + 16 * n*c; - for( j = 0; j < 2*r; ++j, px += 16 ) - { -#if CV_SIMD256 - v_coarse += v256_load(px); -#elif CV_SIMD128 - v_coarsel += v_load(px); - v_coarseh += v_load(px + 8); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].coarse[ind] += px[ind]; -#endif - } - - for( j = r; j < n-r; j++ ) - { - int t = 2*r*r + 2*r, b, sum = 0; - HT* segment; - - px = h_coarse + 16 * (n*c + std::min(j + r, n - 1)); -#if CV_SIMD256 - v_coarse += v256_load(px); - v_store(H[c].coarse, v_coarse); -#elif CV_SIMD128 - v_coarsel += v_load(px); - v_coarseh += v_load(px + 8); - v_store(H[c].coarse, v_coarsel); - v_store(H[c].coarse + 8, v_coarseh); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].coarse[ind] += px[ind]; -#endif - - // Find median at coarse level - for ( k = 0; k < 16 ; ++k ) - { - sum += H[c].coarse[k]; - if ( sum > t ) - { - sum -= H[c].coarse[k]; - break; - } - } - CV_Assert( k < 16 ); - - /* Update corresponding histogram segment */ -#if CV_SIMD256 - v_uint16x16 v_fine; -#elif CV_SIMD128 - v_uint16x8 v_finel; - v_uint16x8 v_fineh; -#endif - if ( luc[c][k] <= j-r ) - { -#if CV_SIMD256 - v_fine = v256_setzero_u16(); -#elif CV_SIMD128 - v_finel = v_setzero_u16(); - v_fineh = v_setzero_u16(); -#else - memset(&H[c].fine[k], 0, 16 * sizeof(HT)); -#endif - px = h_fine + 16 * (n*(16 * c + k) + j - r); - for (luc[c][k] = cv::HT(j - r); luc[c][k] < MIN(j + r + 1, n); ++luc[c][k], px += 16) - { -#if CV_SIMD256 - v_fine += v256_load(px); -#elif CV_SIMD128 - v_finel += v_load(px); - v_fineh += v_load(px + 8); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].fine[k][ind] += px[ind]; -#endif - } - - if ( luc[c][k] < j+r+1 ) - { - px = h_fine + 16 * (n*(16 * c + k) + (n - 1)); -#if CV_SIMD256 - v_fine += v_mul_wrap(v256_load(px), v256_setall_u16(j + r + 1 - n)); -#elif CV_SIMD128 - v_finel += v_mul_wrap(v_load(px), v_setall_u16(j + r + 1 - n)); - v_fineh += v_mul_wrap(v_load(px + 8), v_setall_u16(j + r + 1 - n)); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].fine[k][ind] += (j + r + 1 - n) * px[ind]; -#endif - luc[c][k] = (HT)(j+r+1); - } - } - else - { -#if CV_SIMD256 - v_fine = v256_load(H[c].fine[k]); -#elif CV_SIMD128 - v_finel = v_load(H[c].fine[k]); - v_fineh = v_load(H[c].fine[k] + 8); -#endif - px = h_fine + 16*n*(16 * c + k); - for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) - { -#if CV_SIMD256 - v_fine += v256_load(px + 16 * MIN(luc[c][k], n - 1)) - v256_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0)); -#elif CV_SIMD128 - v_finel += v_load(px + 16 * MIN(luc[c][k], n - 1) ) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0)); - v_fineh += v_load(px + 16 * MIN(luc[c][k], n - 1) + 8) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0) + 8); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].fine[k][ind] += px[16 * MIN(luc[c][k], n - 1) + ind] - px[16 * MAX(luc[c][k] - 2 * r - 1, 0) + ind]; -#endif - } - } - - px = h_coarse + 16 * (n*c + MAX(j - r, 0)); -#if CV_SIMD256 - v_store(H[c].fine[k], v_fine); - v_coarse -= v256_load(px); -#elif CV_SIMD128 - v_store(H[c].fine[k], v_finel); - v_store(H[c].fine[k] + 8, v_fineh); - v_coarsel -= v_load(px); - v_coarseh -= v_load(px + 8); -#else - for (int ind = 0; ind < 16; ++ind) - H[c].coarse[ind] -= px[ind]; -#endif - - /* Find median in segment */ - segment = H[c].fine[k]; - for ( b = 0; b < 16 ; b++ ) - { - sum += segment[b]; - if ( sum > t ) - { - dst[dstep*i+cn*j+c] = (uchar)(16*k + b); - break; - } - } - CV_Assert( b < 16 ); - } - } -#if CV_SIMD - vx_cleanup(); -#endif - } - } - -#undef HOP -#undef COP -} - -static void -medianBlur_8u_Om( const Mat& _src, Mat& _dst, int m ) -{ - #define N 16 - int zone0[4][N]; - int zone1[4][N*N]; - int x, y; - int n2 = m*m/2; - Size size = _dst.size(); - const uchar* src = _src.ptr(); - uchar* dst = _dst.ptr(); - int src_step = (int)_src.step, dst_step = (int)_dst.step; - int cn = _src.channels(); - const uchar* src_max = src + size.height*src_step; - CV_Assert(cn > 0 && cn <= 4); - - #define UPDATE_ACC01( pix, cn, op ) \ - { \ - int p = (pix); \ - zone1[cn][p] op; \ - zone0[cn][p >> 4] op; \ - } - - //CV_Assert( size.height >= nx && size.width >= nx ); - for( x = 0; x < size.width; x++, src += cn, dst += cn ) - { - uchar* dst_cur = dst; - const uchar* src_top = src; - const uchar* src_bottom = src; - int k, c; - int src_step1 = src_step, dst_step1 = dst_step; - - if( x % 2 != 0 ) - { - src_bottom = src_top += src_step*(size.height-1); - dst_cur += dst_step*(size.height-1); - src_step1 = -src_step1; - dst_step1 = -dst_step1; - } - - // init accumulator - memset( zone0, 0, sizeof(zone0[0])*cn ); - memset( zone1, 0, sizeof(zone1[0])*cn ); - - for( y = 0; y <= m/2; y++ ) - { - for( c = 0; c < cn; c++ ) - { - if( y > 0 ) - { - for( k = 0; k < m*cn; k += cn ) - UPDATE_ACC01( src_bottom[k+c], c, ++ ); - } - else - { - for( k = 0; k < m*cn; k += cn ) - UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 ); - } - } - - if( (src_step1 > 0 && y < size.height-1) || - (src_step1 < 0 && size.height-y-1 > 0) ) - src_bottom += src_step1; - } - - for( y = 0; y < size.height; y++, dst_cur += dst_step1 ) - { - // find median - for( c = 0; c < cn; c++ ) - { - int s = 0; - for( k = 0; ; k++ ) - { - int t = s + zone0[c][k]; - if( t > n2 ) break; - s = t; - } - - for( k *= N; ;k++ ) - { - s += zone1[c][k]; - if( s > n2 ) break; - } - - dst_cur[c] = (uchar)k; - } - - if( y+1 == size.height ) - break; - - if( cn == 1 ) - { - for( k = 0; k < m; k++ ) - { - int p = src_top[k]; - int q = src_bottom[k]; - zone1[0][p]--; - zone0[0][p>>4]--; - zone1[0][q]++; - zone0[0][q>>4]++; - } - } - else if( cn == 3 ) - { - for( k = 0; k < m*3; k += 3 ) - { - UPDATE_ACC01( src_top[k], 0, -- ); - UPDATE_ACC01( src_top[k+1], 1, -- ); - UPDATE_ACC01( src_top[k+2], 2, -- ); - - UPDATE_ACC01( src_bottom[k], 0, ++ ); - UPDATE_ACC01( src_bottom[k+1], 1, ++ ); - UPDATE_ACC01( src_bottom[k+2], 2, ++ ); - } - } - else - { - assert( cn == 4 ); - for( k = 0; k < m*4; k += 4 ) - { - UPDATE_ACC01( src_top[k], 0, -- ); - UPDATE_ACC01( src_top[k+1], 1, -- ); - UPDATE_ACC01( src_top[k+2], 2, -- ); - UPDATE_ACC01( src_top[k+3], 3, -- ); - - UPDATE_ACC01( src_bottom[k], 0, ++ ); - UPDATE_ACC01( src_bottom[k+1], 1, ++ ); - UPDATE_ACC01( src_bottom[k+2], 2, ++ ); - UPDATE_ACC01( src_bottom[k+3], 3, ++ ); - } - } - - if( (src_step1 > 0 && src_bottom + src_step1 < src_max) || - (src_step1 < 0 && src_bottom + src_step1 >= src) ) - src_bottom += src_step1; - - if( y >= m/2 ) - src_top += src_step1; - } - } -#undef N -#undef UPDATE_ACC -} - - -struct MinMax8u -{ - typedef uchar value_type; - typedef int arg_type; - enum { SIZE = 1 }; - arg_type load(const uchar* ptr) { return *ptr; } - void store(uchar* ptr, arg_type val) { *ptr = (uchar)val; } - void operator()(arg_type& a, arg_type& b) const - { - int t = CV_FAST_CAST_8U(a - b); - b += t; a -= t; - } -}; - -struct MinMax16u -{ - typedef ushort value_type; - typedef int arg_type; - enum { SIZE = 1 }; - arg_type load(const ushort* ptr) { return *ptr; } - void store(ushort* ptr, arg_type val) { *ptr = (ushort)val; } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = std::min(a, b); - b = std::max(b, t); - } -}; - -struct MinMax16s -{ - typedef short value_type; - typedef int arg_type; - enum { SIZE = 1 }; - arg_type load(const short* ptr) { return *ptr; } - void store(short* ptr, arg_type val) { *ptr = (short)val; } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = std::min(a, b); - b = std::max(b, t); - } -}; - -struct MinMax32f -{ - typedef float value_type; - typedef float arg_type; - enum { SIZE = 1 }; - arg_type load(const float* ptr) { return *ptr; } - void store(float* ptr, arg_type val) { *ptr = val; } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = std::min(a, b); - b = std::max(b, t); - } -}; - -#if CV_SIMD - -struct MinMaxVec8u -{ - typedef uchar value_type; - typedef v_uint8x16 arg_type; - enum { SIZE = v_uint8x16::nlanes }; - arg_type load(const uchar* ptr) { return v_load(ptr); } - void store(uchar* ptr, const arg_type &val) { v_store(ptr, val); } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#if CV_SIMD_WIDTH > 16 - typedef v_uint8 warg_type; - enum { WSIZE = v_uint8::nlanes }; - warg_type wload(const uchar* ptr) { return vx_load(ptr); } - void store(uchar* ptr, const warg_type &val) { v_store(ptr, val); } - void operator()(warg_type& a, warg_type& b) const - { - warg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#endif -}; - - -struct MinMaxVec16u -{ - typedef ushort value_type; - typedef v_uint16x8 arg_type; - enum { SIZE = v_uint16x8::nlanes }; - arg_type load(const ushort* ptr) { return v_load(ptr); } - void store(ushort* ptr, const arg_type &val) { v_store(ptr, val); } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#if CV_SIMD_WIDTH > 16 - typedef v_uint16 warg_type; - enum { WSIZE = v_uint16::nlanes }; - warg_type wload(const ushort* ptr) { return vx_load(ptr); } - void store(ushort* ptr, const warg_type &val) { v_store(ptr, val); } - void operator()(warg_type& a, warg_type& b) const - { - warg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#endif -}; - - -struct MinMaxVec16s -{ - typedef short value_type; - typedef v_int16x8 arg_type; - enum { SIZE = v_int16x8::nlanes }; - arg_type load(const short* ptr) { return v_load(ptr); } - void store(short* ptr, const arg_type &val) { v_store(ptr, val); } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#if CV_SIMD_WIDTH > 16 - typedef v_int16 warg_type; - enum { WSIZE = v_int16::nlanes }; - warg_type wload(const short* ptr) { return vx_load(ptr); } - void store(short* ptr, const warg_type &val) { v_store(ptr, val); } - void operator()(warg_type& a, warg_type& b) const - { - warg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#endif -}; - - -struct MinMaxVec32f -{ - typedef float value_type; - typedef v_float32x4 arg_type; - enum { SIZE = v_float32x4::nlanes }; - arg_type load(const float* ptr) { return v_load(ptr); } - void store(float* ptr, const arg_type &val) { v_store(ptr, val); } - void operator()(arg_type& a, arg_type& b) const - { - arg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#if CV_SIMD_WIDTH > 16 - typedef v_float32 warg_type; - enum { WSIZE = v_float32::nlanes }; - warg_type wload(const float* ptr) { return vx_load(ptr); } - void store(float* ptr, const warg_type &val) { v_store(ptr, val); } - void operator()(warg_type& a, warg_type& b) const - { - warg_type t = a; - a = v_min(a, b); - b = v_max(b, t); - } -#endif -}; - -#else - -typedef MinMax8u MinMaxVec8u; -typedef MinMax16u MinMaxVec16u; -typedef MinMax16s MinMaxVec16s; -typedef MinMax32f MinMaxVec32f; - -#endif - -template -static void -medianBlur_SortNet( const Mat& _src, Mat& _dst, int m ) -{ - typedef typename Op::value_type T; - typedef typename Op::arg_type WT; - typedef typename VecOp::arg_type VT; - typedef typename VecOp::warg_type WVT; - - const T* src = _src.ptr(); - T* dst = _dst.ptr(); - int sstep = (int)(_src.step/sizeof(T)); - int dstep = (int)(_dst.step/sizeof(T)); - Size size = _dst.size(); - int i, j, k, cn = _src.channels(); - Op op; - VecOp vop; - - if( m == 3 ) - { - if( size.width == 1 || size.height == 1 ) - { - int len = size.width + size.height - 1; - int sdelta = size.height == 1 ? cn : sstep; - int sdelta0 = size.height == 1 ? 0 : sstep - cn; - int ddelta = size.height == 1 ? cn : dstep; - - for( i = 0; i < len; i++, src += sdelta0, dst += ddelta ) - for( j = 0; j < cn; j++, src++ ) - { - WT p0 = src[i > 0 ? -sdelta : 0]; - WT p1 = src[0]; - WT p2 = src[i < len - 1 ? sdelta : 0]; - - op(p0, p1); op(p1, p2); op(p0, p1); - dst[j] = (T)p1; - } - return; - } - - size.width *= cn; - for( i = 0; i < size.height; i++, dst += dstep ) - { - const T* row0 = src + std::max(i - 1, 0)*sstep; - const T* row1 = src + i*sstep; - const T* row2 = src + std::min(i + 1, size.height-1)*sstep; - int limit = cn; - - for(j = 0;; ) - { - for( ; j < limit; j++ ) - { - int j0 = j >= cn ? j - cn : j; - int j2 = j < size.width - cn ? j + cn : j; - WT p0 = row0[j0], p1 = row0[j], p2 = row0[j2]; - WT p3 = row1[j0], p4 = row1[j], p5 = row1[j2]; - WT p6 = row2[j0], p7 = row2[j], p8 = row2[j2]; - - op(p1, p2); op(p4, p5); op(p7, p8); op(p0, p1); - op(p3, p4); op(p6, p7); op(p1, p2); op(p4, p5); - op(p7, p8); op(p0, p3); op(p5, p8); op(p4, p7); - op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7); - op(p4, p2); op(p6, p4); op(p4, p2); - dst[j] = (T)p4; - } - - if( limit == size.width ) - break; - -#if CV_SIMD_WIDTH > 16 - for( ; j <= size.width - VecOp::WSIZE - cn; j += VecOp::WSIZE ) - { - WVT p0 = vop.wload(row0+j-cn), p1 = vop.wload(row0+j), p2 = vop.wload(row0+j+cn); - WVT p3 = vop.wload(row1+j-cn), p4 = vop.wload(row1+j), p5 = vop.wload(row1+j+cn); - WVT p6 = vop.wload(row2+j-cn), p7 = vop.wload(row2+j), p8 = vop.wload(row2+j+cn); - - vop(p1, p2); vop(p4, p5); vop(p7, p8); vop(p0, p1); - vop(p3, p4); vop(p6, p7); vop(p1, p2); vop(p4, p5); - vop(p7, p8); vop(p0, p3); vop(p5, p8); vop(p4, p7); - vop(p3, p6); vop(p1, p4); vop(p2, p5); vop(p4, p7); - vop(p4, p2); vop(p6, p4); vop(p4, p2); - vop.store(dst+j, p4); - } -#endif - for( ; j <= size.width - VecOp::SIZE - cn; j += VecOp::SIZE ) - { - VT p0 = vop.load(row0+j-cn), p1 = vop.load(row0+j), p2 = vop.load(row0+j+cn); - VT p3 = vop.load(row1+j-cn), p4 = vop.load(row1+j), p5 = vop.load(row1+j+cn); - VT p6 = vop.load(row2+j-cn), p7 = vop.load(row2+j), p8 = vop.load(row2+j+cn); - - vop(p1, p2); vop(p4, p5); vop(p7, p8); vop(p0, p1); - vop(p3, p4); vop(p6, p7); vop(p1, p2); vop(p4, p5); - vop(p7, p8); vop(p0, p3); vop(p5, p8); vop(p4, p7); - vop(p3, p6); vop(p1, p4); vop(p2, p5); vop(p4, p7); - vop(p4, p2); vop(p6, p4); vop(p4, p2); - vop.store(dst+j, p4); - } - - limit = size.width; - } - } -#if CV_SIMD - vx_cleanup(); -#endif - } - else if( m == 5 ) - { - if( size.width == 1 || size.height == 1 ) - { - int len = size.width + size.height - 1; - int sdelta = size.height == 1 ? cn : sstep; - int sdelta0 = size.height == 1 ? 0 : sstep - cn; - int ddelta = size.height == 1 ? cn : dstep; - - for( i = 0; i < len; i++, src += sdelta0, dst += ddelta ) - for( j = 0; j < cn; j++, src++ ) - { - int i1 = i > 0 ? -sdelta : 0; - int i0 = i > 1 ? -sdelta*2 : i1; - int i3 = i < len-1 ? sdelta : 0; - int i4 = i < len-2 ? sdelta*2 : i3; - WT p0 = src[i0], p1 = src[i1], p2 = src[0], p3 = src[i3], p4 = src[i4]; - - op(p0, p1); op(p3, p4); op(p2, p3); op(p3, p4); op(p0, p2); - op(p2, p4); op(p1, p3); op(p1, p2); - dst[j] = (T)p2; - } - return; - } - - size.width *= cn; - for( i = 0; i < size.height; i++, dst += dstep ) - { - const T* row[5]; - row[0] = src + std::max(i - 2, 0)*sstep; - row[1] = src + std::max(i - 1, 0)*sstep; - row[2] = src + i*sstep; - row[3] = src + std::min(i + 1, size.height-1)*sstep; - row[4] = src + std::min(i + 2, size.height-1)*sstep; - int limit = cn*2; - - for(j = 0;; ) - { - for( ; j < limit; j++ ) - { - WT p[25]; - int j1 = j >= cn ? j - cn : j; - int j0 = j >= cn*2 ? j - cn*2 : j1; - int j3 = j < size.width - cn ? j + cn : j; - int j4 = j < size.width - cn*2 ? j + cn*2 : j3; - for( k = 0; k < 5; k++ ) - { - const T* rowk = row[k]; - p[k*5] = rowk[j0]; p[k*5+1] = rowk[j1]; - p[k*5+2] = rowk[j]; p[k*5+3] = rowk[j3]; - p[k*5+4] = rowk[j4]; - } - - op(p[1], p[2]); op(p[0], p[1]); op(p[1], p[2]); op(p[4], p[5]); op(p[3], p[4]); - op(p[4], p[5]); op(p[0], p[3]); op(p[2], p[5]); op(p[2], p[3]); op(p[1], p[4]); - op(p[1], p[2]); op(p[3], p[4]); op(p[7], p[8]); op(p[6], p[7]); op(p[7], p[8]); - op(p[10], p[11]); op(p[9], p[10]); op(p[10], p[11]); op(p[6], p[9]); op(p[8], p[11]); - op(p[8], p[9]); op(p[7], p[10]); op(p[7], p[8]); op(p[9], p[10]); op(p[0], p[6]); - op(p[4], p[10]); op(p[4], p[6]); op(p[2], p[8]); op(p[2], p[4]); op(p[6], p[8]); - op(p[1], p[7]); op(p[5], p[11]); op(p[5], p[7]); op(p[3], p[9]); op(p[3], p[5]); - op(p[7], p[9]); op(p[1], p[2]); op(p[3], p[4]); op(p[5], p[6]); op(p[7], p[8]); - op(p[9], p[10]); op(p[13], p[14]); op(p[12], p[13]); op(p[13], p[14]); op(p[16], p[17]); - op(p[15], p[16]); op(p[16], p[17]); op(p[12], p[15]); op(p[14], p[17]); op(p[14], p[15]); - op(p[13], p[16]); op(p[13], p[14]); op(p[15], p[16]); op(p[19], p[20]); op(p[18], p[19]); - op(p[19], p[20]); op(p[21], p[22]); op(p[23], p[24]); op(p[21], p[23]); op(p[22], p[24]); - op(p[22], p[23]); op(p[18], p[21]); op(p[20], p[23]); op(p[20], p[21]); op(p[19], p[22]); - op(p[22], p[24]); op(p[19], p[20]); op(p[21], p[22]); op(p[23], p[24]); op(p[12], p[18]); - op(p[16], p[22]); op(p[16], p[18]); op(p[14], p[20]); op(p[20], p[24]); op(p[14], p[16]); - op(p[18], p[20]); op(p[22], p[24]); op(p[13], p[19]); op(p[17], p[23]); op(p[17], p[19]); - op(p[15], p[21]); op(p[15], p[17]); op(p[19], p[21]); op(p[13], p[14]); op(p[15], p[16]); - op(p[17], p[18]); op(p[19], p[20]); op(p[21], p[22]); op(p[23], p[24]); op(p[0], p[12]); - op(p[8], p[20]); op(p[8], p[12]); op(p[4], p[16]); op(p[16], p[24]); op(p[12], p[16]); - op(p[2], p[14]); op(p[10], p[22]); op(p[10], p[14]); op(p[6], p[18]); op(p[6], p[10]); - op(p[10], p[12]); op(p[1], p[13]); op(p[9], p[21]); op(p[9], p[13]); op(p[5], p[17]); - op(p[13], p[17]); op(p[3], p[15]); op(p[11], p[23]); op(p[11], p[15]); op(p[7], p[19]); - op(p[7], p[11]); op(p[11], p[13]); op(p[11], p[12]); - dst[j] = (T)p[12]; - } - - if( limit == size.width ) - break; - -#if CV_SIMD_WIDTH > 16 - for( ; j <= size.width - VecOp::WSIZE - cn*2; j += VecOp::WSIZE ) - { - WVT p[25]; - for( k = 0; k < 5; k++ ) - { - const T* rowk = row[k]; - p[k*5] = vop.wload(rowk+j-cn*2); p[k*5+1] = vop.wload(rowk+j-cn); - p[k*5+2] = vop.wload(rowk+j); p[k*5+3] = vop.wload(rowk+j+cn); - p[k*5+4] = vop.wload(rowk+j+cn*2); - } - - vop(p[1], p[2]); vop(p[0], p[1]); vop(p[1], p[2]); vop(p[4], p[5]); vop(p[3], p[4]); - vop(p[4], p[5]); vop(p[0], p[3]); vop(p[2], p[5]); vop(p[2], p[3]); vop(p[1], p[4]); - vop(p[1], p[2]); vop(p[3], p[4]); vop(p[7], p[8]); vop(p[6], p[7]); vop(p[7], p[8]); - vop(p[10], p[11]); vop(p[9], p[10]); vop(p[10], p[11]); vop(p[6], p[9]); vop(p[8], p[11]); - vop(p[8], p[9]); vop(p[7], p[10]); vop(p[7], p[8]); vop(p[9], p[10]); vop(p[0], p[6]); - vop(p[4], p[10]); vop(p[4], p[6]); vop(p[2], p[8]); vop(p[2], p[4]); vop(p[6], p[8]); - vop(p[1], p[7]); vop(p[5], p[11]); vop(p[5], p[7]); vop(p[3], p[9]); vop(p[3], p[5]); - vop(p[7], p[9]); vop(p[1], p[2]); vop(p[3], p[4]); vop(p[5], p[6]); vop(p[7], p[8]); - vop(p[9], p[10]); vop(p[13], p[14]); vop(p[12], p[13]); vop(p[13], p[14]); vop(p[16], p[17]); - vop(p[15], p[16]); vop(p[16], p[17]); vop(p[12], p[15]); vop(p[14], p[17]); vop(p[14], p[15]); - vop(p[13], p[16]); vop(p[13], p[14]); vop(p[15], p[16]); vop(p[19], p[20]); vop(p[18], p[19]); - vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[21], p[23]); vop(p[22], p[24]); - vop(p[22], p[23]); vop(p[18], p[21]); vop(p[20], p[23]); vop(p[20], p[21]); vop(p[19], p[22]); - vop(p[22], p[24]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[12], p[18]); - vop(p[16], p[22]); vop(p[16], p[18]); vop(p[14], p[20]); vop(p[20], p[24]); vop(p[14], p[16]); - vop(p[18], p[20]); vop(p[22], p[24]); vop(p[13], p[19]); vop(p[17], p[23]); vop(p[17], p[19]); - vop(p[15], p[21]); vop(p[15], p[17]); vop(p[19], p[21]); vop(p[13], p[14]); vop(p[15], p[16]); - vop(p[17], p[18]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[0], p[12]); - vop(p[8], p[20]); vop(p[8], p[12]); vop(p[4], p[16]); vop(p[16], p[24]); vop(p[12], p[16]); - vop(p[2], p[14]); vop(p[10], p[22]); vop(p[10], p[14]); vop(p[6], p[18]); vop(p[6], p[10]); - vop(p[10], p[12]); vop(p[1], p[13]); vop(p[9], p[21]); vop(p[9], p[13]); vop(p[5], p[17]); - vop(p[13], p[17]); vop(p[3], p[15]); vop(p[11], p[23]); vop(p[11], p[15]); vop(p[7], p[19]); - vop(p[7], p[11]); vop(p[11], p[13]); vop(p[11], p[12]); - vop.store(dst+j, p[12]); - } -#endif - for( ; j <= size.width - VecOp::SIZE - cn*2; j += VecOp::SIZE ) - { - VT p[25]; - for( k = 0; k < 5; k++ ) - { - const T* rowk = row[k]; - p[k*5] = vop.load(rowk+j-cn*2); p[k*5+1] = vop.load(rowk+j-cn); - p[k*5+2] = vop.load(rowk+j); p[k*5+3] = vop.load(rowk+j+cn); - p[k*5+4] = vop.load(rowk+j+cn*2); - } - - vop(p[1], p[2]); vop(p[0], p[1]); vop(p[1], p[2]); vop(p[4], p[5]); vop(p[3], p[4]); - vop(p[4], p[5]); vop(p[0], p[3]); vop(p[2], p[5]); vop(p[2], p[3]); vop(p[1], p[4]); - vop(p[1], p[2]); vop(p[3], p[4]); vop(p[7], p[8]); vop(p[6], p[7]); vop(p[7], p[8]); - vop(p[10], p[11]); vop(p[9], p[10]); vop(p[10], p[11]); vop(p[6], p[9]); vop(p[8], p[11]); - vop(p[8], p[9]); vop(p[7], p[10]); vop(p[7], p[8]); vop(p[9], p[10]); vop(p[0], p[6]); - vop(p[4], p[10]); vop(p[4], p[6]); vop(p[2], p[8]); vop(p[2], p[4]); vop(p[6], p[8]); - vop(p[1], p[7]); vop(p[5], p[11]); vop(p[5], p[7]); vop(p[3], p[9]); vop(p[3], p[5]); - vop(p[7], p[9]); vop(p[1], p[2]); vop(p[3], p[4]); vop(p[5], p[6]); vop(p[7], p[8]); - vop(p[9], p[10]); vop(p[13], p[14]); vop(p[12], p[13]); vop(p[13], p[14]); vop(p[16], p[17]); - vop(p[15], p[16]); vop(p[16], p[17]); vop(p[12], p[15]); vop(p[14], p[17]); vop(p[14], p[15]); - vop(p[13], p[16]); vop(p[13], p[14]); vop(p[15], p[16]); vop(p[19], p[20]); vop(p[18], p[19]); - vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[21], p[23]); vop(p[22], p[24]); - vop(p[22], p[23]); vop(p[18], p[21]); vop(p[20], p[23]); vop(p[20], p[21]); vop(p[19], p[22]); - vop(p[22], p[24]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[12], p[18]); - vop(p[16], p[22]); vop(p[16], p[18]); vop(p[14], p[20]); vop(p[20], p[24]); vop(p[14], p[16]); - vop(p[18], p[20]); vop(p[22], p[24]); vop(p[13], p[19]); vop(p[17], p[23]); vop(p[17], p[19]); - vop(p[15], p[21]); vop(p[15], p[17]); vop(p[19], p[21]); vop(p[13], p[14]); vop(p[15], p[16]); - vop(p[17], p[18]); vop(p[19], p[20]); vop(p[21], p[22]); vop(p[23], p[24]); vop(p[0], p[12]); - vop(p[8], p[20]); vop(p[8], p[12]); vop(p[4], p[16]); vop(p[16], p[24]); vop(p[12], p[16]); - vop(p[2], p[14]); vop(p[10], p[22]); vop(p[10], p[14]); vop(p[6], p[18]); vop(p[6], p[10]); - vop(p[10], p[12]); vop(p[1], p[13]); vop(p[9], p[21]); vop(p[9], p[13]); vop(p[5], p[17]); - vop(p[13], p[17]); vop(p[3], p[15]); vop(p[11], p[23]); vop(p[11], p[15]); vop(p[7], p[19]); - vop(p[7], p[11]); vop(p[11], p[13]); vop(p[11], p[12]); - vop.store(dst+j, p[12]); - } - - limit = size.width; - } - } -#if CV_SIMD - vx_cleanup(); -#endif - } -} - -#ifdef HAVE_OPENCL - -static bool ocl_medianFilter(InputArray _src, OutputArray _dst, int m) -{ - size_t localsize[2] = { 16, 16 }; - size_t globalsize[2]; - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - - if ( !((depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F) && cn <= 4 && (m == 3 || m == 5)) ) - return false; - - Size imgSize = _src.size(); - bool useOptimized = (1 == cn) && - (size_t)imgSize.width >= localsize[0] * 8 && - (size_t)imgSize.height >= localsize[1] * 8 && - imgSize.width % 4 == 0 && - imgSize.height % 4 == 0 && - (ocl::Device::getDefault().isIntel()); - - cv::String kname = format( useOptimized ? "medianFilter%d_u" : "medianFilter%d", m) ; - cv::String kdefs = useOptimized ? - format("-D T=%s -D T1=%s -D T4=%s%d -D cn=%d -D USE_4OPT", ocl::typeToStr(type), - ocl::typeToStr(depth), ocl::typeToStr(depth), cn*4, cn) - : - format("-D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn) ; - - ocl::Kernel k(kname.c_str(), ocl::imgproc::medianFilter_oclsrc, kdefs.c_str() ); - - if (k.empty()) - return false; - - UMat src = _src.getUMat(); - _dst.create(src.size(), type); - UMat dst = _dst.getUMat(); - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); - - if( useOptimized ) - { - globalsize[0] = DIVUP(src.cols / 4, localsize[0]) * localsize[0]; - globalsize[1] = DIVUP(src.rows / 4, localsize[1]) * localsize[1]; - } - else - { - globalsize[0] = (src.cols + localsize[0] + 2) / localsize[0] * localsize[0]; - globalsize[1] = (src.rows + localsize[1] - 1) / localsize[1] * localsize[1]; - } - - return k.run(2, globalsize, localsize, false); -} - -#endif - -} - -#ifdef HAVE_OPENVX -namespace cv -{ - namespace ovx { - template <> inline bool skipSmallImages(int w, int h) { return w*h < 1280 * 720; } - } - static bool openvx_medianFilter(InputArray _src, OutputArray _dst, int ksize) - { - if (_src.type() != CV_8UC1 || _dst.type() != CV_8U -#ifndef VX_VERSION_1_1 - || ksize != 3 -#endif - ) - return false; - - Mat src = _src.getMat(); - Mat dst = _dst.getMat(); - - if ( -#ifdef VX_VERSION_1_1 - ksize != 3 ? ovx::skipSmallImages(src.cols, src.rows) : -#endif - ovx::skipSmallImages(src.cols, src.rows) - ) - return false; - - try - { - ivx::Context ctx = ovx::getOpenVXContext(); -#ifdef VX_VERSION_1_1 - if ((vx_size)ksize > ctx.nonlinearMaxDimension()) - return false; -#endif - - Mat a; - if (dst.data != src.data) - a = src; - else - src.copyTo(a); - - ivx::Image - ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data), - ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8, - ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data); - - //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments - //since OpenVX standard says nothing about thread-safety for now - ivx::border_t prevBorder = ctx.immediateBorder(); - ctx.setImmediateBorder(VX_BORDER_REPLICATE); -#ifdef VX_VERSION_1_1 - if (ksize == 3) -#endif - { - ivx::IVX_CHECK_STATUS(vxuMedian3x3(ctx, ia, ib)); - } -#ifdef VX_VERSION_1_1 - else - { - ivx::Matrix mtx; - if(ksize == 5) - mtx = ivx::Matrix::createFromPattern(ctx, VX_PATTERN_BOX, ksize, ksize); - else - { - vx_size supportedSize; - ivx::IVX_CHECK_STATUS(vxQueryContext(ctx, VX_CONTEXT_NONLINEAR_MAX_DIMENSION, &supportedSize, sizeof(supportedSize))); - if ((vx_size)ksize > supportedSize) - { - ctx.setImmediateBorder(prevBorder); - return false; - } - Mat mask(ksize, ksize, CV_8UC1, Scalar(255)); - mtx = ivx::Matrix::create(ctx, VX_TYPE_UINT8, ksize, ksize); - mtx.copyFrom(mask); - } - ivx::IVX_CHECK_STATUS(vxuNonLinearFilter(ctx, VX_NONLINEAR_FILTER_MEDIAN, ia, mtx, ib)); - } -#endif - ctx.setImmediateBorder(prevBorder); - } - catch (ivx::RuntimeError & e) - { - VX_DbgThrow(e.what()); - } - catch (ivx::WrapperError & e) - { - VX_DbgThrow(e.what()); - } - - return true; - } -} -#endif - -#ifdef HAVE_IPP -namespace cv -{ -static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize) -{ - CV_INSTRUMENT_REGION_IPP(); - -#if IPP_VERSION_X100 < 201801 - // Degradations for big kernel - if(ksize > 7) - return false; -#endif - - { - int bufSize; - IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); - IppDataType ippType = ippiGetDataType(src0.type()); - int channels = src0.channels(); - IppAutoBuffer buffer; - - if(src0.isSubmatrix()) - return false; - - Mat src; - if(dst.data != src0.data) - src = src0; - else - src0.copyTo(src); - - if(ippiFilterMedianBorderGetBufferSize(dstRoiSize, maskSize, ippType, channels, &bufSize) < 0) - return false; - - buffer.allocate(bufSize); - - switch(ippType) - { - case ipp8u: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 3) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 4) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_8u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - case ipp16u: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 3) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 4) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16u_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - case ipp16s: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 3) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C3R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else if(channels == 4) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_16s_C4R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - case ipp32f: - if(channels == 1) - return CV_INSTRUMENT_FUN_IPP(ippiFilterMedianBorder_32f_C1R, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, dstRoiSize, maskSize, ippBorderRepl, 0, buffer) >= 0; - else - return false; - default: - return false; - } - } -} -} -#endif - -void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize ) -{ - CV_INSTRUMENT_REGION(); - - CV_Assert( (ksize % 2 == 1) && (_src0.dims() <= 2 )); - - if( ksize <= 1 || _src0.empty() ) - { - _src0.copyTo(_dst); - return; - } - - CV_OCL_RUN(_dst.isUMat(), - ocl_medianFilter(_src0,_dst, ksize)) - - Mat src0 = _src0.getMat(); - _dst.create( src0.size(), src0.type() ); - Mat dst = _dst.getMat(); - - CALL_HAL(medianBlur, cv_hal_medianBlur, src0.data, src0.step, dst.data, dst.step, src0.cols, src0.rows, src0.depth(), - src0.channels(), ksize); - - CV_OVX_RUN(true, - openvx_medianFilter(_src0, _dst, ksize)) - - CV_IPP_RUN_FAST(ipp_medianFilter(src0, dst, ksize)); - -#ifdef HAVE_TEGRA_OPTIMIZATION - if (tegra::useTegra() && tegra::medianBlur(src0, dst, ksize)) - return; -#endif - - bool useSortNet = ksize == 3 || (ksize == 5 -#if !(CV_SIMD) - && ( src0.depth() > CV_8U || src0.channels() == 2 || src0.channels() > 4 ) -#endif - ); - - Mat src; - if( useSortNet ) - { - if( dst.data != src0.data ) - src = src0; - else - src0.copyTo(src); - - if( src.depth() == CV_8U ) - medianBlur_SortNet( src, dst, ksize ); - else if( src.depth() == CV_16U ) - medianBlur_SortNet( src, dst, ksize ); - else if( src.depth() == CV_16S ) - medianBlur_SortNet( src, dst, ksize ); - else if( src.depth() == CV_32F ) - medianBlur_SortNet( src, dst, ksize ); - else - CV_Error(CV_StsUnsupportedFormat, ""); - - return; - } - else - { - cv::copyMakeBorder( src0, src, 0, 0, ksize/2, ksize/2, BORDER_REPLICATE|BORDER_ISOLATED); - - int cn = src0.channels(); - CV_Assert( src.depth() == CV_8U && (cn == 1 || cn == 3 || cn == 4) ); - - double img_size_mp = (double)(src0.total())/(1 << 20); - if( ksize <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)* - (CV_SIMD ? 1 : 3)) - medianBlur_8u_Om( src, dst, ksize ); - else - medianBlur_8u_O1( src, dst, ksize ); - } -} - /****************************************************************************************\ Bilateral Filtering \****************************************************************************************/ From ce00d38bd918e229ce5967a824e493a57d883111 Mon Sep 17 00:00:00 2001 From: fegorsch Date: Fri, 2 Nov 2018 15:13:46 +0100 Subject: [PATCH 07/14] Add test for symmetric circles with clustering --- modules/calib3d/test/test_chesscorners.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/modules/calib3d/test/test_chesscorners.cpp b/modules/calib3d/test/test_chesscorners.cpp index e55d069de0..73e91e1942 100644 --- a/modules/calib3d/test/test_chesscorners.cpp +++ b/modules/calib3d/test/test_chesscorners.cpp @@ -468,5 +468,24 @@ TEST(Calib3d_AsymmetricCirclesPatternDetector, accuracy) { CV_ChessboardDetector TEST(Calib3d_AsymmetricCirclesPatternDetectorWithClustering, accuracy) { CV_ChessboardDetectorTest test( ASYMMETRIC_CIRCLES_GRID, CALIB_CB_CLUSTERING ); test.safe_run(); } #endif +TEST(Calib3d_CirclesPatternDetectorWithClustering, accuracy) +{ + cv::String dataDir = string(TS::ptr()->get_data_path()) + "cv/cameracalibration/circles/"; + + cv::Mat expected; + FileStorage fs(dataDir + "circles_corners15.dat", FileStorage::READ); + fs["corners"] >> expected; + fs.release(); + + cv::Mat image = cv::imread(dataDir + "circles15.png"); + + std::vector centers; + cv::findCirclesGrid(image, Size(10, 8), centers, CALIB_CB_SYMMETRIC_GRID | CALIB_CB_CLUSTERING); + ASSERT_EQ(expected.total(), centers.size()); + + double error = calcError(centers, expected); + ASSERT_LE(error, precise_success_error_level); +} + }} // namespace /* End of file. */ From c6d2f0399a706028f476f3d08358ea787503020d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 3 Nov 2018 02:17:17 +0000 Subject: [PATCH 08/14] videoio: fix build of standalone ffmpeg plugin --- modules/videoio/src/cap_ffmpeg_impl.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 3694bfb8e8..17fc67e9cf 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -48,7 +48,10 @@ #include #include -#define OPENCV_FOURCC(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24)) +#ifndef __OPENCV_BUILD +#define CV_FOURCC(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24)) +#endif + #define CALC_FFMPEG_VERSION(a,b,c) ( a<<16 | b<<8 | c ) #if defined _MSC_VER && _MSC_VER >= 1200 @@ -1197,7 +1200,7 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const return codec_tag; } - return (double) OPENCV_FOURCC(codec_fourcc[0], codec_fourcc[1], codec_fourcc[2], codec_fourcc[3]); + return (double) CV_FOURCC(codec_fourcc[0], codec_fourcc[1], codec_fourcc[2], codec_fourcc[3]); case CV_FFMPEG_CAP_PROP_SAR_NUM: return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).num; case CV_FFMPEG_CAP_PROP_SAR_DEN: From 2007e1d8a3d670153a94409b19588a5db12f1591 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 3 Nov 2018 03:08:49 +0000 Subject: [PATCH 09/14] ffmpeg: update 3.4.5 (OpenCV 3.4 branch) ffmpeg 3.4.2 -> 3.4.5 --- 3rdparty/ffmpeg/ffmpeg.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/3rdparty/ffmpeg/ffmpeg.cmake b/3rdparty/ffmpeg/ffmpeg.cmake index c8ac9d5e53..6349721723 100644 --- a/3rdparty/ffmpeg/ffmpeg.cmake +++ b/3rdparty/ffmpeg/ffmpeg.cmake @@ -1,8 +1,8 @@ -# Binaries branch name: ffmpeg/3.4_20180608 -# Binaries were created for OpenCV: f5ddbbf65937d8f44e481e4ee1082961821f5c62 -ocv_update(FFMPEG_BINARIES_COMMIT "8041bd6f5ad37045c258904ba3030bb3442e3911") -ocv_update(FFMPEG_FILE_HASH_BIN32 "fa5a2a4e2f37defcb95bde8ed145c2b3") -ocv_update(FFMPEG_FILE_HASH_BIN64 "2cc08fc4fef8199fe80e0f126684834f") +# Binaries branch name: ffmpeg/3.4_20181103 +# Binaries were created for OpenCV: c6d2f0399a706028f476f3d08358ea787503020d +ocv_update(FFMPEG_BINARIES_COMMIT "fe71c0ad807fdc33c2178e48e488f1e9b177c39a") +ocv_update(FFMPEG_FILE_HASH_BIN32 "41b81bb9a50cabd4bea385f7b50a069a") +ocv_update(FFMPEG_FILE_HASH_BIN64 "a9ea7dbbc8e5afd08e00e223a831b578") ocv_update(FFMPEG_FILE_HASH_CMAKE "3b90f67f4b429e77d3da36698cef700c") function(download_win_ffmpeg script_var) From 79dc0ed175ad81a835e532dd706130f276d5163d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 4 Nov 2018 01:41:06 +0000 Subject: [PATCH 10/14] docs: intro formatting update, minor cleanup --- doc/Doxyfile.in | 4 +- modules/core/doc/intro.markdown | 133 +++++++++--------- modules/core/include/opencv2/core/base.hpp | 2 +- modules/core/include/opencv2/core/utility.hpp | 2 +- 4 files changed, 72 insertions(+), 69 deletions(-) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 8f9d02d811..50f492623f 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -107,7 +107,7 @@ RECURSIVE = YES EXCLUDE = EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = *.inl.hpp *.impl.hpp *_detail.hpp */cudev/**/detail/*.hpp *.m */opencl/runtime/* -EXCLUDE_SYMBOLS = cv::DataType<*> cv::traits::* int void CV__* +EXCLUDE_SYMBOLS = cv::DataType<*> cv::traits::* int void CV__* T __CV* EXAMPLE_PATH = @CMAKE_DOXYGEN_EXAMPLE_PATH@ EXAMPLE_PATTERNS = * EXAMPLE_RECURSIVE = YES @@ -250,6 +250,8 @@ PREDEFINED = __cplusplus=1 \ CV_DEFAULT(x)=" = x" \ CV_NEON=1 \ CV_SSE2=1 \ + CV__DEBUG_NS_BEGIN= \ + CV__DEBUG_NS_END= \ CV_DEPRECATED= EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES diff --git a/modules/core/doc/intro.markdown b/modules/core/doc/intro.markdown index a1a2946f5a..901c11d706 100644 --- a/modules/core/doc/intro.markdown +++ b/modules/core/doc/intro.markdown @@ -3,27 +3,26 @@ Introduction {#intro} OpenCV (Open Source Computer Vision Library: ) is an open-source BSD-licensed library that includes several hundreds of computer vision algorithms. The document describes the -so-called OpenCV 2.x API, which is essentially a C++ API, as opposed to the C-based OpenCV 1.x API. -The latter is described in opencv1x.pdf. +so-called OpenCV 2.x API, which is essentially a C++ API, as opposed to the C-based OpenCV 1.x API +(C API is deprecated and not tested with "C" compiler since OpenCV 2.4 releases) OpenCV has a modular structure, which means that the package includes several shared or static libraries. The following modules are available: -- @ref core - a compact module defining basic data structures, including the dense +- @ref core (**core**) - a compact module defining basic data structures, including the dense multi-dimensional array Mat and basic functions used by all other modules. -- @ref imgproc - an image processing module that includes linear and non-linear image filtering, +- @ref imgproc (**imgproc**) - an image processing module that includes linear and non-linear image filtering, geometrical image transformations (resize, affine and perspective warping, generic table-based remapping), color space conversion, histograms, and so on. -- **video** - a video analysis module that includes motion estimation, background subtraction, +- @ref video (**video**) - a video analysis module that includes motion estimation, background subtraction, and object tracking algorithms. -- **calib3d** - basic multiple-view geometry algorithms, single and stereo camera calibration, +- @ref calib3d (**calib3d**) - basic multiple-view geometry algorithms, single and stereo camera calibration, object pose estimation, stereo correspondence algorithms, and elements of 3D reconstruction. -- **features2d** - salient feature detectors, descriptors, and descriptor matchers. -- **objdetect** - detection of objects and instances of the predefined classes (for example, +- @ref features2d (**features2d**) - salient feature detectors, descriptors, and descriptor matchers. +- @ref objdetect (**objdetect**) - detection of objects and instances of the predefined classes (for example, faces, eyes, mugs, people, cars, and so on). -- **highgui** - an easy-to-use interface to simple UI capabilities. -- @ref videoio - an easy-to-use interface to video capturing and video codecs. -- **gpu** - GPU-accelerated algorithms from different OpenCV modules. +- @ref highgui (**highgui**) - an easy-to-use interface to simple UI capabilities. +- @ref videoio (**videoio**) - an easy-to-use interface to video capturing and video codecs. - ... some other helper modules, such as FLANN and Google test wrappers, Python bindings, and others. @@ -35,36 +34,37 @@ API Concepts ### cv Namespace -All the OpenCV classes and functions are placed into the cv namespace. Therefore, to access this -functionality from your code, use the cv:: specifier or using namespace cv; directive: -@code +All the OpenCV classes and functions are placed into the `cv` namespace. Therefore, to access this +functionality from your code, use the `cv::` specifier or `using namespace cv;` directive: + +```.cpp #include "opencv2/core.hpp" ... -cv::Mat H = cv::findHomography(points1, points2, CV_RANSAC, 5); +cv::Mat H = cv::findHomography(points1, points2, cv::RANSAC, 5); ... -@endcode +``` or : -~~~ +```.cpp #include "opencv2/core.hpp" using namespace cv; ... - Mat H = findHomography(points1, points2, CV_RANSAC, 5 ); + Mat H = findHomography(points1, points2, RANSAC, 5 ); ... -~~~ +``` Some of the current or future OpenCV external names may conflict with STL or other libraries. In this case, use explicit namespace specifiers to resolve the name conflicts: -@code +```.cpp Mat a(100, 100, CV_32F); randu(a, Scalar::all(1), Scalar::all(std::rand())); cv::log(a, a); a /= std::log(2.); -@endcode +``` ### Automatic Memory Management OpenCV handles all the memory automatically. -First of all, std::vector, Mat, and other data structures used by the functions and methods have +First of all, std::vector, cv::Mat, and other data structures used by the functions and methods have destructors that deallocate the underlying memory buffers when needed. This means that the destructors do not always deallocate the buffers as in case of Mat. They take into account possible data sharing. A destructor decrements the reference counter associated with the matrix data buffer. @@ -73,7 +73,7 @@ structures refer to the same buffer. Similarly, when a Mat instance is copied, n really copied. Instead, the reference counter is incremented to memorize that there is another owner of the same data. There is also the Mat::clone method that creates a full copy of the matrix data. See the example below: -@code +```.cpp // create a big 8Mb matrix Mat A(1000, 1000, CV_64F); @@ -98,24 +98,24 @@ See the example below: // finally, make a full copy of C. As a result, the big modified // matrix will be deallocated, since it is not referenced by anyone C = C.clone(); -@endcode +``` You see that the use of Mat and other basic structures is simple. But what about high-level classes or even user data types created without taking automatic memory management into account? For them, -OpenCV offers the Ptr template class that is similar to std::shared\_ptr from C++11. So, instead of +OpenCV offers the cv::Ptr template class that is similar to std::shared_ptr from C++11. So, instead of using plain pointers: -@code +```.cpp T* ptr = new T(...); -@endcode +``` you can use: -@code +```.cpp Ptr ptr(new T(...)); -@endcode +``` or: -@code +```.cpp Ptr ptr = makePtr(...); -@endcode -Ptr\ encapsulates a pointer to a T instance and a reference counter associated with the pointer. -See the Ptr description for details. +``` +`Ptr` encapsulates a pointer to a T instance and a reference counter associated with the pointer. +See the cv::Ptr description for details. ### Automatic Allocation of the Output Data @@ -126,7 +126,7 @@ size and type of the output arrays are determined from the size and type of inpu the functions take extra parameters that help to figure out the output array properties. Example: -@code +```.cpp #include "opencv2/imgproc.hpp" #include "opencv2/highgui.hpp" @@ -138,7 +138,7 @@ Example: if(!cap.isOpened()) return -1; Mat frame, edges; - namedWindow("edges",1); + namedWindow("edges", WINDOW_AUTOSIZE); for(;;) { cap >> frame; @@ -150,11 +150,11 @@ Example: } return 0; } -@endcode -The array frame is automatically allocated by the \>\> operator since the video frame resolution and +``` +The array frame is automatically allocated by the `>>` operator since the video frame resolution and the bit-depth is known to the video capturing module. The array edges is automatically allocated by the cvtColor function. It has the same size and the bit-depth as the input array. The number of -channels is 1 because the color conversion code COLOR\_BGR2GRAY is passed, which means a color to +channels is 1 because the color conversion code cv::COLOR_BGR2GRAY is passed, which means a color to grayscale conversion. Note that frame and edges are allocated only once during the first execution of the loop body since all the next video frames have the same resolution. If you somehow change the video resolution, the arrays are automatically reallocated. @@ -184,11 +184,11 @@ within the 0..255 range: \f[I(x,y)= \min ( \max (\textrm{round}(r), 0), 255)\f] Similar rules are applied to 8-bit signed, 16-bit signed and unsigned types. This semantics is used -everywhere in the library. In C++ code, it is done using the saturate\_cast\<\> functions that +everywhere in the library. In C++ code, it is done using the `cv::saturate_cast<>` functions that resemble standard C++ cast operations. See below the implementation of the formula provided above: -@code +```.cpp I.at(y, x) = saturate_cast(r); -@endcode +``` where cv::uchar is an OpenCV 8-bit unsigned integer type. In the optimized SIMD code, such SSE2 instructions as paddusb, packuswb, and so on are used. They help achieve exactly the same behavior as in C++ code. @@ -206,7 +206,7 @@ Because of this and also to simplify development of bindings for other languages Matlab that do not have templates at all or have limited template capabilities, the current OpenCV implementation is based on polymorphism and runtime dispatching over templates. In those places where runtime dispatching would be too slow (like pixel access operators), impossible (generic -Ptr\<\> implementation), or just very inconvenient (saturate\_cast\<\>()) the current implementation +`cv::Ptr<>` implementation), or just very inconvenient (`cv::saturate_cast<>()`) the current implementation introduces small template classes, methods, and functions. Anywhere else in the current OpenCV version the use of templates is limited. @@ -223,25 +223,25 @@ is, array elements should have one of the following types: - a tuple of several elements where all elements have the same type (one of the above). An array whose elements are such tuples, are called multi-channel arrays, as opposite to the single-channel arrays, whose elements are scalar values. The maximum possible number of - channels is defined by the CV\_CN\_MAX constant, which is currently set to 512. + channels is defined by the #CV_CN_MAX constant, which is currently set to 512. For these basic types, the following enumeration is applied: -@code +```.cpp enum { CV_8U=0, CV_8S=1, CV_16U=2, CV_16S=3, CV_32S=4, CV_32F=5, CV_64F=6 }; -@endcode +``` Multi-channel (n-channel) types can be specified using the following options: -- CV_8UC1 ... CV_64FC4 constants (for a number of channels from 1 to 4) +- #CV_8UC1 ... #CV_64FC4 constants (for a number of channels from 1 to 4) - CV_8UC(n) ... CV_64FC(n) or CV_MAKETYPE(CV_8U, n) ... CV_MAKETYPE(CV_64F, n) macros when the number of channels is more than 4 or unknown at the compilation time. -@note `CV_32FC1 == CV_32F, CV_32FC2 == CV_32FC(2) == CV_MAKETYPE(CV_32F, 2)`, and -`CV_MAKETYPE(depth, n) == ((depth&7) + ((n-1)<<3)``. This means that the constant type is formed from the +@note `#CV_32FC1 == #CV_32F, #CV_32FC2 == #CV_32FC(2) == #CV_MAKETYPE(CV_32F, 2)`, and +`#CV_MAKETYPE(depth, n) == ((depth&7) + ((n-1)<<3)`. This means that the constant type is formed from the depth, taking the lowest 3 bits, and the number of channels minus 1, taking the next -`log2(CV_CN_MAX)`` bits. +`log2(CV_CN_MAX)` bits. Examples: -@code +```.cpp Mat mtx(3, 3, CV_32F); // make a 3x3 floating-point matrix Mat cmtx(10, 1, CV_64FC2); // make a 10x1 2-channel floating-point // matrix (10-element complex vector) @@ -250,7 +250,7 @@ Examples: Mat grayscale(image.size(), CV_MAKETYPE(image.depth(), 1)); // make a 1-channel image of // the same size and same // channel type as img -@endcode +``` Arrays with more complex elements cannot be constructed or processed using OpenCV. Furthermore, each function or method can handle only a subset of all possible array types. Usually, the more complex the algorithm is, the smaller the supported subset of formats is. See below typical examples of such @@ -270,13 +270,13 @@ extended in future based on user requests. Many OpenCV functions process dense 2-dimensional or multi-dimensional numerical arrays. Usually, such functions take cppMat as parameters, but in some cases it's more convenient to use -std::vector\<\> (for a point set, for example) or Matx\<\> (for 3x3 homography matrix and such). To +`std::vector<>` (for a point set, for example) or `cv::Matx<>` (for 3x3 homography matrix and such). To avoid many duplicates in the API, special "proxy" classes have been introduced. The base "proxy" -class is InputArray. It is used for passing read-only arrays on a function input. The derived from -InputArray class OutputArray is used to specify an output array for a function. Normally, you should +class is cv::InputArray. It is used for passing read-only arrays on a function input. The derived from +InputArray class cv::OutputArray is used to specify an output array for a function. Normally, you should not care of those intermediate types (and you should not declare variables of those types explicitly) - it will all just work automatically. You can assume that instead of -InputArray/OutputArray you can always use Mat, std::vector\<\>, Matx\<\>, Vec\<\> or Scalar. When a +InputArray/OutputArray you can always use `Mat`, `std::vector<>`, `cv::Matx<>`, `cv::Vec<>` or `cv::Scalar`. When a function has an optional input or output array, and you do not have or do not want one, pass cv::noArray(). @@ -291,28 +291,29 @@ The exceptions can be instances of the cv::Exception class or its derivatives. I cv::Exception is a derivative of std::exception. So it can be gracefully handled in the code using other standard C++ library components. -The exception is typically thrown either using the CV\_Error(errcode, description) macro, or its -printf-like CV\_Error\_(errcode, printf-spec, (printf-args)) variant, or using the -CV\_Assert(condition) macro that checks the condition and throws an exception when it is not -satisfied. For performance-critical code, there is CV\_DbgAssert(condition) that is only retained in +The exception is typically thrown either using the `#CV_Error(errcode, description)` macro, or its +printf-like `#CV_Error_(errcode, (printf-spec, printf-args))` variant, or using the +#CV_Assert(condition) macro that checks the condition and throws an exception when it is not +satisfied. For performance-critical code, there is #CV_DbgAssert(condition) that is only retained in the Debug configuration. Due to the automatic memory management, all the intermediate buffers are automatically deallocated in case of a sudden error. You only need to add a try statement to catch exceptions, if needed: : -@code +```.cpp try { ... // call OpenCV } - catch( cv::Exception& e ) + catch (const cv::Exception& e) { const char* err_msg = e.what(); std::cout << "exception caught: " << err_msg << std::endl; } -@endcode +``` ### Multi-threading and Re-enterability -The current OpenCV implementation is fully re-enterable. That is, the same function, the same -*constant* method of a class instance, or the same *non-constant* method of different class -instances can be called from different threads. Also, the same cv::Mat can be used in different -threads because the reference-counting operations use the architecture-specific atomic instructions. +The current OpenCV implementation is fully re-enterable. +That is, the same function or the same methods of different class instances +can be called from different threads. +Also, the same Mat can be used in different threads +because the reference-counting operations use the architecture-specific atomic instructions. diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index b688a39638..31cd7a8202 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -352,7 +352,7 @@ This macro can be used to construct an error message on-fly to include some dyna for example: @code // note the extra parentheses around the formatted text message - CV_Error_( CV_StsOutOfRange, + CV_Error_(Error::StsOutOfRange, ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue)); @endcode @param code one of Error::Code diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index a15bbff096..c576c56b86 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -519,7 +519,7 @@ static inline size_t divUp(size_t a, unsigned int b) /** @brief Enables or disables the optimized code. -The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX, +The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2, and other instructions on the platforms that support it). It sets a global flag that is further checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only safe to call the function on the very top level in your application where you can be sure that no From 513210286355e1988a39974b0963c70dcfc330c7 Mon Sep 17 00:00:00 2001 From: LaurentBerger Date: Sun, 4 Nov 2018 21:30:31 +0100 Subject: [PATCH 11/14] typo in kmeans doc --- modules/core/include/opencv2/core.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 5f2fbe2028..8e61322164 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -2962,7 +2962,7 @@ An example on K-means clustering /** @brief Finds centers of clusters and groups input samples around the clusters. The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters -and groups the input samples around the clusters. As an output, \f$\texttt{labels}_i\f$ contains a +and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ contains a 0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix. @note From cae2a5356320025401dac7e6b92e68d12016ae9c Mon Sep 17 00:00:00 2001 From: berak Date: Tue, 6 Nov 2018 11:43:58 +0100 Subject: [PATCH 12/14] highgui: fix broken waitKey() condition in window_w32 --- modules/highgui/src/window_w32.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index a4ec2d51b6..e67fb4e187 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -1976,7 +1976,7 @@ cvWaitKey( int delay ) MSG message; int is_processed = 0; - if( delay <= 0 ) + if( (delay <= 0) && hg_windows) GetMessage(&message, 0, 0, 0); else if( PeekMessage(&message, 0, 0, 0, PM_REMOVE) == FALSE ) { From eaee63d96b63d17db6f2d5e519119786513e0cdf Mon Sep 17 00:00:00 2001 From: berak Date: Sat, 3 Nov 2018 09:15:27 +0100 Subject: [PATCH 13/14] java: add converters, tests for MatOfRotatedRect --- .../src/java/org/opencv/utils/Converters.java | 39 +++++++++++++++++++ .../src/org/opencv/test/OpenCVTestCase.java | 19 +++++++++ .../org/opencv/test/utils/ConvertersTest.java | 29 +++++++++++++- .../src/org/opencv/test/OpenCVTestCase.java | 19 +++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) diff --git a/modules/java/generator/src/java/org/opencv/utils/Converters.java b/modules/java/generator/src/java/org/opencv/utils/Converters.java index c0575a6665..9faf2ecee9 100644 --- a/modules/java/generator/src/java/org/opencv/utils/Converters.java +++ b/modules/java/generator/src/java/org/opencv/utils/Converters.java @@ -13,7 +13,9 @@ import org.opencv.core.MatOfPoint2f; import org.opencv.core.MatOfPoint3f; import org.opencv.core.Point; import org.opencv.core.Point3; +import org.opencv.core.Size; import org.opencv.core.Rect; +import org.opencv.core.RotatedRect; import org.opencv.core.Rect2d; import org.opencv.core.DMatch; import org.opencv.core.KeyPoint; @@ -770,4 +772,41 @@ public class Converters { } mats.clear(); } + + public static Mat vector_RotatedRect_to_Mat(List rs) { + Mat res; + int count = (rs != null) ? rs.size() : 0; + if (count > 0) { + res = new Mat(count, 1, CvType.CV_32FC(5)); + float[] buff = new float[5 * count]; + for (int i = 0; i < count; i++) { + RotatedRect r = rs.get(i); + buff[5 * i] = (float)r.center.x; + buff[5 * i + 1] = (float)r.center.y; + buff[5 * i + 2] = (float)r.size.width; + buff[5 * i + 3] = (float)r.size.height; + buff[5 * i + 4] = (float)r.angle; + } + res.put(0, 0, buff); + } else { + res = new Mat(); + } + return res; + } + + public static void Mat_to_vector_RotatedRect(Mat m, List rs) { + if (rs == null) + throw new java.lang.IllegalArgumentException("rs == null"); + int count = m.rows(); + if (CvType.CV_32FC(5) != m.type() || m.cols() != 1) + throw new java.lang.IllegalArgumentException( + "CvType.CV_32FC5 != m.type() || m.rows()!=1\n" + m); + + rs.clear(); + float[] buff = new float[5 * count]; + m.get(0, 0, buff); + for (int i = 0; i < count; i++) { + rs.add(new RotatedRect(new Point(buff[5 * i], buff[5 * i + 1]), new Size(buff[5 * i + 2], buff[5 * i + 3]), buff[5 * i + 4])); + } + } } diff --git a/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java b/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java index 2cd2b86155..c3af0b343b 100644 --- a/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java +++ b/modules/java/test/android_test/src/org/opencv/test/OpenCVTestCase.java @@ -17,6 +17,7 @@ import org.opencv.core.Mat; import org.opencv.core.Point; import org.opencv.core.Point3; import org.opencv.core.Rect; +import org.opencv.core.RotatedRect; import org.opencv.core.Scalar; import org.opencv.core.Size; import org.opencv.core.DMatch; @@ -336,6 +337,15 @@ public class OpenCVTestCase extends TestCase { assertRectEquals(list1.get(i), list2.get(i)); } + public static void assertListRotatedRectEquals(List list1, List list2) { + if (list1.size() != list2.size()) { + throw new UnsupportedOperationException(); + } + + for (int i = 0; i < list1.size(); i++) + assertRotatedRectEquals(list1.get(i), list2.get(i)); + } + public static void assertRectEquals(Rect expected, Rect actual) { String msg = "expected:<" + expected + "> but was:<" + actual + ">"; assertEquals(msg, expected.x, actual.x); @@ -344,6 +354,15 @@ public class OpenCVTestCase extends TestCase { assertEquals(msg, expected.height, actual.height); } + public static void assertRotatedRectEquals(RotatedRect expected, RotatedRect actual) { + String msg = "expected:<" + expected + "> but was:<" + actual + ">"; + assertEquals(msg, expected.center.x, actual.center.x); + assertEquals(msg, expected.center.y, actual.center.y); + assertEquals(msg, expected.size.width, actual.size.width); + assertEquals(msg, expected.size.height, actual.size.height); + assertEquals(msg, expected.angle, actual.angle); + } + public static void assertMatEqual(Mat m1, Mat m2) { compareMats(m1, m2, true); } diff --git a/modules/java/test/common_test/src/org/opencv/test/utils/ConvertersTest.java b/modules/java/test/common_test/src/org/opencv/test/utils/ConvertersTest.java index 117bbd8083..54d2736c46 100644 --- a/modules/java/test/common_test/src/org/opencv/test/utils/ConvertersTest.java +++ b/modules/java/test/common_test/src/org/opencv/test/utils/ConvertersTest.java @@ -4,7 +4,9 @@ import org.opencv.core.CvType; import org.opencv.core.Mat; import org.opencv.core.Point; import org.opencv.core.Point3; +import org.opencv.core.Size; import org.opencv.core.Rect; +import org.opencv.core.RotatedRect; import org.opencv.core.DMatch; import org.opencv.core.KeyPoint; import org.opencv.test.OpenCVTestCase; @@ -222,6 +224,19 @@ public class ConvertersTest extends OpenCVTestCase { assertListRectEquals(truth, rectangles); } + public void testMat_to_vector_RotatedRect() { + Mat src = new Mat(2, 1, CvType.CV_32FC(5)); + src.put(0, 0, 2, 2, 5, 2, 7, + 0, 6, 4, 1, 3); + List rectangles = new ArrayList(); + + Converters.Mat_to_vector_RotatedRect(src, rectangles); + List truth = new ArrayList(); + truth.add(new RotatedRect(new Point(2, 2), new Size(5, 2), 7)); + truth.add(new RotatedRect(new Point(0, 6), new Size(4, 1), 3)); + assertListRotatedRectEquals(truth, rectangles); + } + public void testMat_to_vector_uchar() { Mat src = new Mat(3, 1, CvType.CV_8UC1); src.put(0, 0, 2, 4, 3); @@ -465,6 +480,19 @@ public class ConvertersTest extends OpenCVTestCase { assertMatEqual(truth, dst); } + public void testVector_RotatedRect_to_Mat() { + List rectangles = new ArrayList(); + rectangles.add(new RotatedRect(new Point(2, 2), new Size(5, 2), 7)); + rectangles.add(new RotatedRect(new Point(0, 0), new Size(6, 4), 3)); + + Mat dst = Converters.vector_RotatedRect_to_Mat(rectangles); + + Mat truth = new Mat(2, 1, CvType.CV_32FC(5)); + truth.put(0, 0, 2, 2, 5, 2, 7, + 0, 0, 6, 4, 3); + assertMatEqual(truth, dst, EPS); + } + public void testVector_uchar_to_Mat() { List bytes = new ArrayList(); byte value1 = 1; @@ -498,5 +526,4 @@ public class ConvertersTest extends OpenCVTestCase { fail("Not yet implemented"); } - } diff --git a/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java b/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java index f369bb1783..a66206e223 100644 --- a/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java +++ b/modules/java/test/pure_test/src/org/opencv/test/OpenCVTestCase.java @@ -20,6 +20,7 @@ import org.opencv.core.Mat; import org.opencv.core.Point; import org.opencv.core.Point3; import org.opencv.core.Rect; +import org.opencv.core.RotatedRect; import org.opencv.core.Scalar; import org.opencv.core.Size; import org.opencv.core.DMatch; @@ -362,6 +363,15 @@ public class OpenCVTestCase extends TestCase { assertRectEquals(list1.get(i), list2.get(i)); } + public static void assertListRotatedRectEquals(List list1, List list2) { + if (list1.size() != list2.size()) { + throw new UnsupportedOperationException(); + } + + for (int i = 0; i < list1.size(); i++) + assertRotatedRectEquals(list1.get(i), list2.get(i)); + } + public static void assertRectEquals(Rect expected, Rect actual) { String msg = "expected:<" + expected + "> but was:<" + actual + ">"; assertEquals(msg, expected.x, actual.x); @@ -370,6 +380,15 @@ public class OpenCVTestCase extends TestCase { assertEquals(msg, expected.height, actual.height); } + public static void assertRotatedRectEquals(RotatedRect expected, RotatedRect actual) { + String msg = "expected:<" + expected + "> but was:<" + actual + ">"; + assertEquals(msg, expected.center.x, actual.center.x); + assertEquals(msg, expected.center.y, actual.center.y); + assertEquals(msg, expected.size.width, actual.size.width); + assertEquals(msg, expected.size.height, actual.size.height); + assertEquals(msg, expected.angle, actual.angle); + } + public static void assertMatEqual(Mat m1, Mat m2) { compareMats(m1, m2, true); } From cceeca3052bf1e38dd48dd422e9273c39bce76ce Mon Sep 17 00:00:00 2001 From: lqy123000 Date: Wed, 7 Nov 2018 00:13:48 +0800 Subject: [PATCH 14/14] Merge pull request #12916 from lqy123000:bugfix_templmatch * avoid rounding errors * imgproc: replace condition in matchTemplate --- modules/imgproc/src/templmatch.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index 1dabdb0b05..b5a08f087a 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -947,7 +947,12 @@ static void common_matchTemplate( Mat& img, Mat& templ, Mat& result, int method, if( isNormed ) { - t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm; + double diff2 = MAX(wndSum2 - wndMean2, 0); + if (diff2 <= std::min(0.5, 10 * FLT_EPSILON * wndSum2)) + t = 0; // avoid rounding errors + else + t = std::sqrt(diff2)*templNorm; + if( fabs(num) < t ) num /= t; else if( fabs(num) < t*1.125 )