Added edge-aware filters.

pull/52/head
vludv 11 years ago
parent 079ff5c06d
commit b0e450d2ed
  1. 4
      modules/ximgproc/CMakeLists.txt
  2. 227
      modules/ximgproc/doc/edge_aware_filters.rst
  3. 10
      modules/ximgproc/doc/ximgproc.rst
  4. 136
      modules/ximgproc/include/opencv2/edge_filter.hpp
  5. 55
      modules/ximgproc/perf/perf_adaptive_manifold.cpp
  6. 52
      modules/ximgproc/perf/perf_domain_transform.cpp
  7. 43
      modules/ximgproc/perf/perf_guided_filter.cpp
  8. 3
      modules/ximgproc/perf/perf_main.cpp
  9. 10
      modules/ximgproc/perf/perf_precomp.hpp
  10. 9
      modules/ximgproc/samples/CMakeLists.txt
  11. 181
      modules/ximgproc/samples/live_demo.cpp
  12. 764
      modules/ximgproc/src/adaptive_manifold_filter_n.cpp
  13. 48
      modules/ximgproc/src/domain_transform.cpp
  14. 174
      modules/ximgproc/src/dtfilter_cpu.cpp
  15. 255
      modules/ximgproc/src/dtfilter_cpu.hpp
  16. 590
      modules/ximgproc/src/dtfilter_cpu.inl.hpp
  17. 829
      modules/ximgproc/src/dtfilter_ocl.cpp
  18. 112
      modules/ximgproc/src/dtfilter_ocl.hpp
  19. 512
      modules/ximgproc/src/edgeaware_filters_common.cpp
  20. 57
      modules/ximgproc/src/edgeaware_filters_common.hpp
  21. 753
      modules/ximgproc/src/guided_filter.cpp
  22. 349
      modules/ximgproc/src/guided_filter_ocl.cpp
  23. 8328
      modules/ximgproc/src/opencl/OpenCLKernel.hpp
  24. 250
      modules/ximgproc/src/opencl/dtfilter_dt.cl
  25. 427
      modules/ximgproc/src/opencl/dtfilter_flt.cl
  26. 15
      modules/ximgproc/src/precomp.hpp
  27. 224
      modules/ximgproc/src/weighted_median_filter.cpp
  28. 173
      modules/ximgproc/test/test_adaptive_manifold.cpp
  29. 944
      modules/ximgproc/test/test_adaptive_manifold_ref_impl.cpp
  30. 219
      modules/ximgproc/test/test_domain_transform.cpp
  31. 361
      modules/ximgproc/test/test_guided_filter.cpp
  32. 3
      modules/ximgproc/test/test_main.cpp
  33. 13
      modules/ximgproc/test/test_precomp.hpp
  34. BIN
      modules/ximgproc/testdata/edgefilter/amf/kodim23_amf_ss15_sr0.3_outliers_ref.png
  35. BIN
      modules/ximgproc/testdata/edgefilter/amf/kodim23_amf_ss30_sr0.1_ref.png
  36. BIN
      modules/ximgproc/testdata/edgefilter/amf/kodim23_amf_ss50_sr0.3_ref.png
  37. BIN
      modules/ximgproc/testdata/edgefilter/amf/kodim23_amf_ss50_sr0.5_outliers_ref.png
  38. BIN
      modules/ximgproc/testdata/edgefilter/amf/kodim23_amf_ss5_sr0.1_outliers_ref.png
  39. BIN
      modules/ximgproc/testdata/edgefilter/amf/kodim23_amf_ss5_sr0.3_ref.png
  40. BIN
      modules/ximgproc/testdata/edgefilter/dt/authors_statue_IC_ss30_sc0.2.png
  41. BIN
      modules/ximgproc/testdata/edgefilter/dt/authors_statue_NC_ss30_sc0.2.png
  42. BIN
      modules/ximgproc/testdata/edgefilter/dt/authors_statue_RF_ss30_sc0.2.png
  43. BIN
      modules/ximgproc/testdata/edgefilter/kodim23.png
  44. BIN
      modules/ximgproc/testdata/edgefilter/statue.png

@ -0,0 +1,4 @@
set(the_description "Extended image processing module. It includes edge-aware filters and etc.")
ocv_define_module(ximgproc opencv_imgproc opencv_core opencv_highgui)
target_link_libraries(opencv_ximgproc)

@ -0,0 +1,227 @@
.. highlight:: cpp
Domain Transform filter
====================================
This section describes interface for Domain Transform filter.
For more details about this filter see [Gastal11]_ and References_.
DTFilter
------------------------------------
.. ocv:class:: DTFilter : public Algorithm
Interface for realizations of Domain Transform filter.
createDTFilter
------------------------------------
Factory method, create instance of :ocv:class:`DTFilter` and produce initialization routines.
.. ocv:function:: Ptr<DTFilter> createDTFilter(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3)
.. ocv:pyfunction:: cv2.createDTFilter(guide, sigmaSpatial, sigmaColor[, mode[, numIters]]) -> instance
:param guide: guided image (used to build transformed distance, which describes edge structure of guided image).
:param sigmaSpatial: :math:`{\sigma}_H` parameter in the original article, it's similar to the sigma in the coordinate space into :ocv:func:`bilateralFilter`.
:param sigmaColor: :math:`{\sigma}_r` parameter in the original article, it's similar to the sigma in the color space into :ocv:func:`bilateralFilter`.
:param mode: one form three modes ``DTF_NC``, ``DTF_RF`` and ``DTF_IC`` which corresponds to three modes for filtering 2D signals in the article.
:param numIters: optional number of iterations used for filtering, 3 is quite enough.
For more details about Domain Transform filter parameters, see the original article [Gastal11]_ and `Domain Transform filter homepage <http://www.inf.ufrgs.br/~eslgastal/DomainTransform/>`_.
DTFilter::filter
------------------------------------
Produce domain transform filtering operation on source image.
.. ocv:function:: void DTFilter::filter(InputArray src, OutputArray dst, int dDepth = -1)
.. ocv:pyfunction:: cv2.DTFilter.filter(src, dst[, dDepth]) -> None
:param src: filtering image with unsigned 8-bit or floating 32-bit depth and up to 4 channels.
:param dst: destination image.
:param dDepth: optional depth of the output image. ``dDepth`` can be set to -1, which will be equivalent to ``src.depth()``.
dtFilter
------------------------------------
Simple one-line Domain Transform filter call.
If you have multiple images to filter with the same guided image then use :ocv:class:`DTFilter` interface to avoid extra computations on initialization stage.
.. ocv:function:: void dtFilter(InputArray guide, InputArray src, OutputArray dst, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3)
.. ocv:pyfunction:: cv2.dtFilter(guide, src, sigmaSpatial, sigmaColor[, mode[, numIters]]) -> None
:param guide: guided image (also called as joint image) with unsigned 8-bit or floating 32-bit depth and up to 4 channels.
:param src: filtering image with unsigned 8-bit or floating 32-bit depth and up to 4 channels.
:param sigmaSpatial: :math:`{\sigma}_H` parameter in the original article, it's similar to the sigma in the coordinate space into :ocv:func:`bilateralFilter`.
:param sigmaColor: :math:`{\sigma}_r` parameter in the original article, it's similar to the sigma in the color space into :ocv:func:`bilateralFilter`.
:param mode: one form three modes ``DTF_NC``, ``DTF_RF`` and ``DTF_IC`` which corresponds to three modes for filtering 2D signals in the article.
:param numIters: optional number of iterations used for filtering, 3 is quite enough.
.. seealso:: :ocv:func:`bilateralFilter`, :ocv:func:`guidedFilter`, :ocv:func:`amFilter`
Guided Filter
====================================
This section describes interface for Guided Filter.
For more details about this filter see [Kaiming10]_ and References_.
GuidedFilter
------------------------------------
.. ocv:class:: GuidedFilter : public Algorithm
Interface for realizations of Guided Filter.
createGuidedFilter
------------------------------------
Factory method, create instance of :ocv:class:`GuidedFilter` and produce initialization routines.
.. ocv:function:: Ptr<GuidedFilter> createGuidedFilter(InputArray guide, int radius, double eps)
.. ocv:pyfunction:: cv2.createGuidedFilter(guide, radius, eps) -> instance
:param guide: guided image (or array of images) with up to 3 channels, if it have more then 3 channels then only first 3 channels will be used.
:param radius: radius of Guided Filter.
:param eps: regularization term of Guided Filter. :math:`{eps}^2` is similar to the sigma in the color space into :ocv:func:`bilateralFilter`.
For more details about Guided Filter parameters, see the original article [Kaiming10]_.
GuidedFilter::filter
------------------------------------
Apply Guided Filter to the filtering image.
.. ocv:function:: void GuidedFilter::filter(InputArray src, OutputArray dst, int dDepth = -1)
.. ocv:pyfunction:: cv2.GuidedFilter.filter(src, dst[, dDepth]) -> None
:param src: filtering image with any numbers of channels.
:param dst: output image.
:param dDepth: optional depth of the output image. ``dDepth`` can be set to -1, which will be equivalent to ``src.depth()``.
guidedFilter
------------------------------------
Simple one-line Guided Filter call.
If you have multiple images to filter with the same guided image then use :ocv:class:`GuidedFilter` interface to avoid extra computations on initialization stage.
.. ocv:function:: void guidedFilter(InputArray guide, InputArray src, OutputArray dst, int radius, double eps, int dDepth = -1)
.. ocv:pyfunction:: cv2.guidedFilter(guide, src, dst, radius, eps, [, dDepth]) -> None
:param guide: guided image (or array of images) with up to 3 channels, if it have more then 3 channels then only first 3 channels will be used.
:param src: filtering image with any numbers of channels.
:param dst: output image.
:param radius: radius of Guided Filter.
:param eps: regularization term of Guided Filter. :math:`{eps}^2` is similar to the sigma in the color space into :ocv:func:`bilateralFilter`.
:param dDepth: optional depth of the output image.
.. seealso:: :ocv:func:`bilateralFilter`, :ocv:func:`dtFilter`, :ocv:func:`amFilter`
Adaptive Manifold Filter
====================================
This section describes interface for Adaptive Manifold Filter.
For more details about this filter see [Gastal12]_ and References_.
AdaptiveManifoldFilter
------------------------------------
.. ocv:class:: AdaptiveManifoldFilter : public Algorithm
Interface for Adaptive Manifold Filter realizations.
Below listed optional parameters which may be set up with :ocv:func:`Algorithm::set` function.
.. ocv:member:: double sigma_s = 16.0
Spatial standard deviation.
.. ocv:member:: double sigma_r = 0.2
Color space standard deviation.
.. ocv:member:: int tree_height = -1
Height of the manifold tree (default = -1 : automatically computed).
.. ocv:member:: int num_pca_iterations = 1
Number of iterations to computed the eigenvector.
.. ocv:member:: bool adjust_outliers = false
Specify adjust outliers using Eq. 9 or not.
.. ocv:member:: bool use_RNG = true
Specify use random number generator to compute eigenvector or not.
createAMFilter
------------------------------------
Factory method, create instance of :ocv:class:`AdaptiveManifoldFilter` and produce some initialization routines.
.. ocv:function:: Ptr<AdaptiveManifoldFilter> createAMFilter(double sigma_s, double sigma_r, bool adjust_outliers = false)
.. ocv:pyfunction:: cv2.createAMFilter(sigma_s, sigma_r, adjust_outliers) -> instance
:param sigma_s: spatial standard deviation.
:param sigma_r: color space standard deviation, it is similar to the sigma in the color space into :ocv:func:`bilateralFilter`.
:param adjust_outliers: optional, specify perform outliers adjust operation or not, (Eq. 9) in the original paper.
For more details about Adaptive Manifold Filter parameters, see the original article [Gastal12]_.
.. note::
Joint images with `CV_8U` and `CV_16U` depth converted to images with `CV_32F` depth and [0; 1] color range before processing.
Hence color space sigma `sigma_r` must be in [0; 1] range, unlike same sigmas in :ocv:func:`bilateralFilter` and :ocv:func:`dtFilter` functions.
AdaptiveManifoldFilter::filter
------------------------------------
Apply high-dimensional filtering using adaptive manifolds.
.. ocv:function:: void AdaptiveManifoldFilter::filter(InputArray src, OutputArray dst, InputArray joint = noArray())
.. ocv:pyfunction:: cv2.AdaptiveManifoldFilter.filter(src, dst[, joint]) -> None
:param src: filtering image with any numbers of channels.
:param dst: output image.
:param joint: optional joint (also called as guided) image with any numbers of channels.
amFilter
------------------------------------
Simple one-line Adaptive Manifold Filter call.
.. ocv:function:: void amFilter(InputArray joint, InputArray src, OutputArray dst, double sigma_s, double sigma_r, bool adjust_outliers = false)
.. ocv:pyfunction:: cv2.amFilter(joint, src, dst, sigma_s, sigma_r, [, adjust_outliers]) -> None
:param joint: joint (also called as guided) image or array of images with any numbers of channels.
:param src: filtering image with any numbers of channels.
:param dst: output image.
:param sigma_s: spatial standard deviation.
:param sigma_r: color space standard deviation, it is similar to the sigma in the color space into :ocv:func:`bilateralFilter`.
:param adjust_outliers: optional, specify perform outliers adjust operation or not, (Eq. 9) in the original paper.
.. note::
Joint images with `CV_8U` and `CV_16U` depth converted to images with `CV_32F` depth and [0; 1] color range before processing.
Hence color space sigma `sigma_r` must be in [0; 1] range, unlike same sigmas in :ocv:func:`bilateralFilter` and :ocv:func:`dtFilter` functions.
.. seealso:: :ocv:func:`bilateralFilter`, :ocv:func:`dtFilter`, :ocv:func:`guidedFilter`
References
==========
.. [Gastal11] E. Gastal and M. Oliveira, "Domain Transform for Edge-Aware Image and Video Processing", Proceedings of SIGGRAPH, 2011, vol. 30, pp. 69:1 - 69:12.
The paper is available `online <http://www.inf.ufrgs.br/~eslgastal/DomainTransform/>`_.
.. [Gastal12] E. Gastal and M. Oliveira, "Adaptive manifolds for real-time high-dimensional filtering," Proceedings of SIGGRAPH, 2012, vol. 31, pp. 33:1 - 33:13.
The paper is available `online <http://inf.ufrgs.br/~eslgastal/AdaptiveManifolds/>`_.
.. [Kaiming10] Kaiming He et. al., "Guided Image Filtering," ECCV 2010, pp. 1 - 14.
The paper is available `online <http://research.microsoft.com/en-us/um/people/kahe/eccv10/>`_.
.. [Ziyang13] Ziyang Ma et al., "Constant Time Weighted Median Filtering for Stereo Matching and Beyond," ICCV, 2013, pp. 49 - 56.
The paper is available `online <http://www.cv-foundation.org/openaccess/content_iccv_2013/papers/Ma_Constant_Time_Weighted_2013_ICCV_paper.pdf>`_.

@ -0,0 +1,10 @@
********************************
ximgproc. Extended image processing module.
********************************
.. highlight:: cpp
.. toctree::
:maxdepth: 2
edge_aware_filters

@ -0,0 +1,136 @@
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2009, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef __OPENCV_EDGEFILTER_HPP__
#define __OPENCV_EDGEFILTER_HPP__
#ifdef __cplusplus
#include <opencv2/core.hpp>
namespace cv
{
enum EdgeAwareFiltersList
{
DTF_NC,
DTF_IC,
DTF_RF,
GUIDED_FILTER,
AM_FILTER
};
/*Interface for DT filters*/
class CV_EXPORTS_W DTFilter : public Algorithm
{
public:
CV_WRAP virtual void filter(InputArray src, OutputArray dst, int dDepth = -1) = 0;
};
typedef Ptr<DTFilter> DTFilterPtr;
/*Fabric function for DT filters*/
CV_EXPORTS_W
Ptr<DTFilter> createDTFilter(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
/*One-line DT filter call*/
CV_EXPORTS_W
void dtFilter(InputArray guide, InputArray src, OutputArray dst, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/*Interface for Guided Filter*/
class CV_EXPORTS_W GuidedFilter : public Algorithm
{
public:
CV_WRAP virtual void filter(InputArray src, OutputArray dst, int dDepth = -1) = 0;
};
/*Fabric function for Guided Filter*/
CV_EXPORTS_W Ptr<GuidedFilter> createGuidedFilter(InputArray guide, int radius, double eps);
/*One-line Guided Filter call*/
CV_EXPORTS_W void guidedFilter(InputArray guide, InputArray src, OutputArray dst, int radius, double eps, int dDepth = -1);
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
class CV_EXPORTS_W AdaptiveManifoldFilter : public Algorithm
{
public:
/**
* @brief Apply High-dimensional filtering using adaptive manifolds
* @param src Input image to be filtered.
* @param dst Adaptive-manifold filter response.
* @param src_joint Image for joint filtering (optional).
*/
CV_WRAP virtual void filter(InputArray src, OutputArray dst, InputArray joint = noArray()) = 0;
CV_WRAP virtual void collectGarbage() = 0;
static Ptr<AdaptiveManifoldFilter> create();
};
//Fabric function for AM filter algorithm
CV_EXPORTS_W Ptr<AdaptiveManifoldFilter> createAMFilter(double sigma_s, double sigma_r, bool adjust_outliers = false);
//One-line Adaptive Manifold filter call
CV_EXPORTS_W void amFilter(InputArray joint, InputArray src, OutputArray dst, double sigma_s, double sigma_r, bool adjust_outliers = false);
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/*Interface for Weighted Median Filter*/
class CV_EXPORTS_W WeightedMedian : public Algorithm
{
public:
CV_WRAP virtual void filter(InputArray src, OutputArray dst, double rangeQuantizer = 1, int dDepth = -1) = 0;
};
/*Fabric function for Weighted Median Filter*/
CV_EXPORTS_W
Ptr<WeightedMedian> createWeightedMedianFilter(InputArray guide, double spatialSize, double colorSize, int filterType = DTF_NC);
/*One-line Weighted Median Filter call*/
CV_EXPORTS_W void weightedMedianFilter(InputArray guide, InputArray src, OutputArray dst, double spatialSize, double colorSize, int filterType = DTF_NC, double rangeQuantizer = 1);
}
#endif
#endif

@ -0,0 +1,55 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace perf;
using namespace testing;
namespace cvtest
{
typedef tuple<bool, Size, int, int, MatType> AMPerfTestParam;
typedef TestBaseWithParam<AMPerfTestParam> AdaptiveManifoldPerfTest;
PERF_TEST_P( AdaptiveManifoldPerfTest, perf,
Combine(
Values(true, false), //adjust_outliers flag
Values(sz1080p, sz720p), //size
Values(1, 3, 8), //joint channels num
Values(1, 3), //source channels num
Values(CV_8U, CV_32F) //source and joint depth
)
)
{
AMPerfTestParam params = GetParam();
bool adjustOutliers = get<0>(params);
Size sz = get<1>(params);
int jointCnNum = get<2>(params);
int srcCnNum = get<3>(params);
int depth = get<4>(params);
Mat joint(sz, CV_MAKE_TYPE(depth, jointCnNum));
Mat src(sz, CV_MAKE_TYPE(depth, srcCnNum));
Mat dst(sz, CV_MAKE_TYPE(depth, srcCnNum));
cv::setNumThreads(cv::getNumberOfCPUs());
declare.in(joint, src, WARMUP_RNG).out(dst).tbb_threads(cv::getNumberOfCPUs());
double sigma_s = 16;
double sigma_r = 0.5;
TEST_CYCLE_N(3)
{
Mat res;
amFilter(joint, src, res, sigma_s, sigma_r, adjustOutliers);
//at 5th cycle sigma_s will be five times more and tree depth will be 5
sigma_s *= 1.38;
sigma_r /= 1.38;
}
SANITY_CHECK(dst);
}
}

@ -0,0 +1,52 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace perf;
using namespace testing;
namespace cvtest
{
CV_ENUM(GuideMatType, CV_8UC1, CV_8UC3, CV_32FC1, CV_32FC3) //reduced set
CV_ENUM(SourceMatType, CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4) //full supported set
CV_ENUM(DTFMode, DTF_NC, DTF_IC, DTF_RF)
typedef tuple<GuideMatType, SourceMatType, Size, double, double, DTFMode> DTTestParams;
typedef TestBaseWithParam<DTTestParams> DomainTransformTest;
PERF_TEST_P( DomainTransformTest, perf,
Combine(
GuideMatType::all(),
SourceMatType::all(),
Values(szVGA, sz720p),
Values(10.0, 80.0),
Values(30.0, 50.0),
DTFMode::all()
)
)
{
int guideType = get<0>(GetParam());
int srcType = get<1>(GetParam());
Size size = get<2>(GetParam());
double sigmaSpatial = get<3>(GetParam());
double sigmaColor = get<4>(GetParam());
int dtfType = get<5>(GetParam());
Mat guide(size, guideType);
Mat src(size, srcType);
Mat dst(size, srcType);
declare.in(guide, src, WARMUP_RNG).out(dst).tbb_threads(cv::getNumberOfCPUs());
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(5)
{
dtFilter(guide, src, dst, sigmaSpatial, sigmaColor, dtfType);
}
SANITY_CHECK(dst);
}
}

@ -0,0 +1,43 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace perf;
using namespace testing;
namespace cvtest
{
CV_ENUM(GuideTypes, CV_8UC1, CV_8UC2, CV_8UC3, CV_32FC1, CV_32FC2, CV_32FC3);
CV_ENUM(SrcTypes, CV_8UC1, CV_8UC3, CV_32FC1, CV_32FC3);
typedef tuple<GuideTypes, SrcTypes, Size> GFParams;
typedef TestBaseWithParam<GFParams> GuidedFilterPerfTest;
PERF_TEST_P( GuidedFilterPerfTest, perf, Combine(GuideTypes::all(), SrcTypes::all(), Values(sz1080p, sz2K)) )
{
RNG rng(0);
GFParams params = GetParam();
int guideType = get<0>(params);
int srcType = get<1>(params);
Size sz = get<2>(params);
Mat guide(sz, guideType);
Mat src(sz, srcType);
Mat dst(sz, srcType);
declare.in(guide, src, WARMUP_RNG).out(dst).tbb_threads(cv::getNumberOfCPUs());
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(3)
{
int radius = rng.uniform(5, 30);
double eps = rng.uniform(0.1, 1e5);
guidedFilter(guide, src, dst, radius, eps);
}
SANITY_CHECK(dst);
}
}

@ -0,0 +1,3 @@
#include "perf_precomp.hpp"
CV_PERF_TEST_MAIN(edgefilter)

@ -0,0 +1,10 @@
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include <opencv2/ts.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/edge_filter.hpp>
#endif

@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 2.8)
project(live_demo)
find_package(OpenCV 3.0 REQUIRED)
set(SOURCES live_demo.cpp)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(live_demo ${SOURCES} ${HEADERS})
target_link_libraries(live_demo ${OpenCV_LIBS})

@ -0,0 +1,181 @@
#include <opencv2/core.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/edge_filter.hpp>
using namespace cv;
#include <iostream>
using namespace std;
typedef void(*FilteringOperation)(const Mat& src, Mat& dst);
//current mode (filtering operation example)
FilteringOperation g_filterOp = NULL;
//common sliders for every mode
int g_sigmaColor = 25;
int g_sigmaSpatial = 10;
//for Stylizing mode
int g_edgesGamma = 100;
//for Details Enhancement mode
int g_contrastBase = 100;
int g_detailsLevel = 100;
int g_numberOfCPUs = cv::getNumberOfCPUs();
//trivial filter
void filterDoNothing(const Mat& frame, Mat& dst)
{
frame.copyTo(dst);
}
//simple edge-aware blurring
void filterBlurring(const Mat& frame, Mat& dst)
{
dtFilter(frame, frame, dst, g_sigmaSpatial, g_sigmaColor, DTF_RF);
}
//stylizing filter
void filterStylize(const Mat& frame, Mat& dst)
{
//blur frame
Mat filtered;
dtFilter(frame, frame, filtered, g_sigmaSpatial, g_sigmaColor, DTF_NC);
//compute grayscale blurred frame
Mat filteredGray;
cvtColor(filtered, filteredGray, COLOR_BGR2GRAY);
//find gradients of blurred image
Mat gradX, gradY;
Sobel(filteredGray, gradX, CV_32F, 1, 0, 3, 1.0/255);
Sobel(filteredGray, gradY, CV_32F, 0, 1, 3, 1.0/255);
//compute magnitude of gradient and fit it accordingly the gamma parameter
Mat gradMagnitude;
magnitude(gradX, gradY, gradMagnitude);
cv::pow(gradMagnitude, g_edgesGamma/100.0, gradMagnitude);
//multiply a blurred frame to the value inversely proportional to the magnitude
Mat multiplier = 1.0/(1.0 + gradMagnitude);
cvtColor(multiplier, multiplier, COLOR_GRAY2BGR);
multiply(filtered, multiplier, dst, 1, dst.type());
}
void filterDetailEnhancement(const Mat& frame8u, Mat& dst)
{
Mat frame;
frame8u.convertTo(frame, CV_32F, 1.0/255);
//Decompose image to 3 Lab channels
Mat frameLab, frameLabCn[3];
cvtColor(frame, frameLab, COLOR_BGR2Lab);
split(frameLab, frameLabCn);
//Generate progressively smoother versions of the lightness channel
Mat layer0 = frameLabCn[0]; //first channel is original lightness
Mat layer1, layer2;
dtFilter(layer0, layer0, layer1, g_sigmaSpatial, g_sigmaColor, DTF_IC);
dtFilter(layer1, layer1, layer2, 2*g_sigmaSpatial, g_sigmaColor, DTF_IC);
//Compute detail layers
Mat detailLayer1 = layer0 - layer1;
Mat detailLayer2 = layer1 - layer2;
double cBase = g_contrastBase / 100.0;
double cDetails1 = g_detailsLevel / 100.0;
double cDetails2 = 2.0 - g_detailsLevel / 100.0;
//Generate lightness
double meanLigtness = mean(frameLabCn[0])[0];
frameLabCn[0] = cBase*(layer2 - meanLigtness) + meanLigtness; //fit contrast of base (most blurred) layer
frameLabCn[0] += cDetails1*detailLayer1; //add weighted sum of detail layers to new lightness
frameLabCn[0] += cDetails2*detailLayer2; //
//Update new lightness
merge(frameLabCn, 3, frameLab);
cvtColor(frameLab, frame, COLOR_Lab2BGR);
frame.convertTo(dst, CV_8U, 255);
}
void splitScreen(const Mat& rawFrame, Mat& outputFrame, Mat& srcFrame, Mat& processedFrame)
{
int h = rawFrame.rows;
int w = rawFrame.cols;
int cn = rawFrame.channels();
outputFrame.create(h, 2 * w, CV_MAKE_TYPE(CV_8U, cn));
srcFrame = outputFrame(Range::all(), Range(0, w));
processedFrame = outputFrame(Range::all(), Range(w, 2*w));
rawFrame.convertTo(srcFrame, srcFrame.type());
}
void changeModeCallback(int state, void *filter)
{
if (state == 1)
g_filterOp = (FilteringOperation) filter;
}
void changeNumberOfCpuCallback(int count, void *data)
{
count = std::max(1, count);
cv::setNumThreads(count);
g_numberOfCPUs = count;
}
int main(int argc, char **argv)
{
VideoCapture cap(0);
if (!cap.isOpened())
{
cerr << "Capture device was not found" << endl;
return -1;
}
namedWindow("Demo");
displayOverlay("Demo", "Press Ctrl+P to show property window", 5000);
//Thread trackbar
cv::setNumThreads(g_numberOfCPUs); //speedup filtering
createTrackbar("Threads", NULL, &g_numberOfCPUs, cv::getNumberOfCPUs(), changeNumberOfCpuCallback);
//Buttons to choose different modes
createButton("Mode Details Enhancement", changeModeCallback, filterDetailEnhancement, QT_RADIOBOX, true);
createButton("Mode Stylizing", changeModeCallback, filterStylize, QT_RADIOBOX, false);
createButton("Mode Blurring", changeModeCallback, filterBlurring, QT_RADIOBOX, false);
createButton("Mode DoNothing", changeModeCallback, filterDoNothing, QT_RADIOBOX, false);
//sliders for Details Enhancement mode
g_filterOp = filterDetailEnhancement; //set Details Enhancement as default filter
createTrackbar("Detail contrast", NULL, &g_contrastBase, 200);
createTrackbar("Detail level" , NULL, &g_detailsLevel, 200);
//sliders for Stylizing mode
createTrackbar("Style gamma", NULL, &g_edgesGamma, 300);
//sliders for every mode
createTrackbar("Sigma Spatial", NULL, &g_sigmaSpatial, 200);
createTrackbar("Sigma Color" , NULL, &g_sigmaColor, 200);
Mat rawFrame, outputFrame;
Mat srcFrame, processedFrame;
for (;;)
{
do
{
cap >> rawFrame;
} while (rawFrame.empty());
splitScreen(rawFrame, outputFrame, srcFrame, processedFrame);
g_filterOp(srcFrame, processedFrame);
imshow("Demo", outputFrame);
if (waitKey(1) == 27) break;
}
return 0;
}

@ -0,0 +1,764 @@
#include "precomp.hpp"
#include <opencv2/core/private.hpp>
#include <cmath>
#include <cstring>
#include <opencv2/edge_filter.hpp>
#include "edgeaware_filters_common.hpp"
using namespace cv::eaf;
using std::numeric_limits;
using std::vector;
using namespace cv;
#ifndef SQR
#define SQR(x) ((x)*(x))
#endif
namespace
{
void computeEigenVector(const Mat1f& X, const Mat1b& mask, Mat1f& dst, int num_pca_iterations, const Mat1f& rand_vec);
inline double Log2(double n)
{
return log(n) / log(2.0);
}
inline double floor_to_power_of_two(double r)
{
return pow(2.0, floor(Log2(r)));
}
inline int computeManifoldTreeHeight(double sigma_s, double sigma_r)
{
const double Hs = floor(Log2(sigma_s)) - 1.0;
const double Lr = 1.0 - sigma_r;
return max(2, static_cast<int>(ceil(Hs * Lr)));
}
static void splitChannels(InputArrayOfArrays src, vector<Mat>& dst)
{
CV_Assert(src.isMat() || src.isUMat() || src.isMatVector() || src.isUMatVector());
if ( src.isMat() || src.isUMat() )
{
split(src, dst);
}
else
{
Size sz;
int depth, totalCnNum;
checkSameSizeAndDepth(src, sz, depth);
totalCnNum = getTotalNumberOfChannels(src);
dst.resize(totalCnNum);
vector<int> fromTo(2 * totalCnNum);
for (int i = 0; i < totalCnNum; i++)
{
fromTo[i * 2 + 0] = i;
fromTo[i * 2 + 1] = i;
dst[i].create(sz, CV_MAKE_TYPE(depth, 1));
}
mixChannels(src, dst, fromTo);
}
}
class AdaptiveManifoldFilterN : public AdaptiveManifoldFilter
{
public:
AlgorithmInfo* info() const;
AdaptiveManifoldFilterN();
void filter(InputArray src, OutputArray dst, InputArray joint);
void collectGarbage();
protected:
bool adjust_outliers_;
double sigma_s_;
double sigma_r_;
int tree_height_;
int num_pca_iterations_;
bool useRNG;
private:
Size srcSize;
Size smallSize;
int jointCnNum;
int srcCnNum;
vector<Mat> jointCn;
vector<Mat> srcCn;
vector<Mat> etaFull;
vector<Mat> sum_w_ki_Psi_blur_;
Mat sum_w_ki_Psi_blur_0_;
Mat w_k;
Mat Psi_splat_0_small;
vector<Mat> Psi_splat_small;
Mat1f minDistToManifoldSquared;
int curTreeHeight;
float sigma_r_over_sqrt_2;
RNG rnd;
private: /*inline functions*/
double getNormalizer(int depth)
{
double normalizer = 1.0;
if (depth == CV_8U)
normalizer = 1.0 / 0xFF;
else if (depth == CV_16U)
normalizer = 1.0 / 0xFFFF;
return normalizer;
}
double getResizeRatio()
{
double df = min(sigma_s_ / 4.0, 256.0 * sigma_r_);
df = floor_to_power_of_two(df);
df = max(1.0, df);
return df;
}
Size getSmallSize()
{
double df = getResizeRatio();
return Size( cvRound(srcSize.width * (1.0/df)), cvRound(srcSize.height*(1.0/df)) ) ;
}
void downsample(InputArray src, OutputArray dst)
{
if (src.isMatVector())
{
vector<Mat>& srcv = *static_cast< vector<Mat>* >(src.getObj());
vector<Mat>& dstv = *static_cast< vector<Mat>* >(dst.getObj());
dstv.resize(srcv.size());
for (int i = 0; i < (int)srcv.size(); i++)
downsample(srcv[i], dstv[i]);
}
else
{
double df = getResizeRatio();
CV_DbgAssert(src.empty() || src.size() == srcSize);
resize(src, dst, Size(), 1.0 / df, 1.0 / df, INTER_LINEAR);
CV_DbgAssert(dst.size() == smallSize);
}
}
void upsample(InputArray src, OutputArray dst)
{
if (src.isMatVector())
{
vector<Mat>& srcv = *static_cast< vector<Mat>* >(src.getObj());
vector<Mat>& dstv = *static_cast< vector<Mat>* >(dst.getObj());
dstv.resize(srcv.size());
for (int i = 0; i < (int)srcv.size(); i++)
upsample(srcv[i], dstv[i]);
}
else
{
CV_DbgAssert(src.empty() || src.size() == smallSize);
resize(src, dst, srcSize, 0, 0);
}
}
private:
void initBuffers(InputArray src_, InputArray joint_);
void initSrcAndJoint(InputArray src_, InputArray joint_);
void buildManifoldsAndPerformFiltering(vector<Mat>& eta, Mat1b& cluster, int treeLevel);
void gatherResult(InputArray src_, OutputArray dst_);
void compute_w_k(vector<Mat>& etak, Mat& dst, float sigma, int curTreeLevel);
void computeClusters(Mat1b& cluster, Mat1b& cluster_minus, Mat1b& cluster_plus);
void computeEta(Mat& teta, Mat1b& cluster, vector<Mat>& etaDst);
static void h_filter(const Mat1f& src, Mat& dst, float sigma);
static void RFFilterPass(vector<Mat>& joint, vector<Mat>& Psi_splat, Mat& Psi_splat_0, vector<Mat>& Psi_splat_dst, Mat& Psi_splat_0_dst, float ss, float sr);
static void computeDTHor(vector<Mat>& srcCn, Mat& dst, float ss, float sr);
static void computeDTVer(vector<Mat>& srcCn, Mat& dst, float ss, float sr);
};
CV_INIT_ALGORITHM(AdaptiveManifoldFilterN, "AdaptiveManifoldFilter",
obj.info()->addParam(obj, "sigma_s", obj.sigma_s_, false, 0, 0, "Filter spatial standard deviation");
obj.info()->addParam(obj, "sigma_r", obj.sigma_r_, false, 0, 0, "Filter range standard deviation");
obj.info()->addParam(obj, "tree_height", obj.tree_height_, false, 0, 0, "Height of the manifold tree (default = -1 : automatically computed)");
obj.info()->addParam(obj, "num_pca_iterations", obj.num_pca_iterations_, false, 0, 0, "Number of iterations to computed the eigenvector v1");
obj.info()->addParam(obj, "adjust_outliers", obj.adjust_outliers_, false, 0, 0, "Specify adjust outliers using Eq. 9 or not");
obj.info()->addParam(obj, "use_RNG", obj.useRNG, false, 0, 0, "Specify use random to compute eigenvector or not");)
AdaptiveManifoldFilterN::AdaptiveManifoldFilterN()
{
sigma_s_ = 16.0;
sigma_r_ = 0.2;
tree_height_ = -1;
num_pca_iterations_ = 1;
adjust_outliers_ = false;
useRNG = true;
}
void AdaptiveManifoldFilterN::initBuffers(InputArray src_, InputArray joint_)
{
initSrcAndJoint(src_, joint_);
jointCn.resize(jointCnNum);
Psi_splat_small.resize(jointCnNum);
for (int i = 0; i < jointCnNum; i++)
{
//jointCn[i].create(srcSize, CV_32FC1);
Psi_splat_small[i].create(smallSize, CV_32FC1);
}
srcCn.resize(srcCnNum);
sum_w_ki_Psi_blur_.resize(srcCnNum);
for (int i = 0; i < srcCnNum; i++)
{
//srcCn[i].create(srcSize, CV_32FC1);
sum_w_ki_Psi_blur_[i] = Mat::zeros(srcSize, CV_32FC1);
}
sum_w_ki_Psi_blur_0_ = Mat::zeros(srcSize, CV_32FC1);
w_k.create(srcSize, CV_32FC1);
Psi_splat_0_small.create(smallSize, CV_32FC1);
if (adjust_outliers_)
minDistToManifoldSquared.create(srcSize);
}
void AdaptiveManifoldFilterN::initSrcAndJoint(InputArray src_, InputArray joint_)
{
srcSize = src_.size();
smallSize = getSmallSize();
srcCnNum = src_.channels();
split(src_, srcCn);
if (src_.depth() != CV_32F)
{
for (int i = 0; i < srcCnNum; i++)
srcCn[i].convertTo(srcCn[i], CV_32F);
}
if (joint_.empty() || joint_.getObj() == src_.getObj())
{
jointCnNum = srcCnNum;
if (src_.depth() == CV_32F)
{
jointCn = srcCn;
}
else
{
jointCn.resize(jointCnNum);
for (int i = 0; i < jointCnNum; i++)
srcCn[i].convertTo(jointCn[i], CV_32F, getNormalizer(src_.depth()));
}
}
else
{
splitChannels(joint_, jointCn);
jointCnNum = (int)jointCn.size();
int jointDepth = jointCn[0].depth();
Size jointSize = jointCn[0].size();
CV_Assert( jointSize == srcSize && (jointDepth == CV_8U || jointDepth == CV_16U || jointDepth == CV_32F) );
if (jointDepth != CV_32F)
{
for (int i = 0; i < jointCnNum; i++)
jointCn[i].convertTo(jointCn[i], CV_32F, getNormalizer(jointDepth));
}
}
}
void AdaptiveManifoldFilterN::filter(InputArray src, OutputArray dst, InputArray joint)
{
CV_Assert(sigma_s_ >= 1 && (sigma_r_ > 0 && sigma_r_ <= 1));
num_pca_iterations_ = std::max(1, num_pca_iterations_);
initBuffers(src, joint);
curTreeHeight = tree_height_ <= 0 ? computeManifoldTreeHeight(sigma_s_, sigma_r_) : tree_height_;
sigma_r_over_sqrt_2 = (float) (sigma_r_ / sqrt(2.0));
const double seedCoef = jointCn[0].at<float>(srcSize.height/2, srcSize.width/2);
const uint64 baseCoef = numeric_limits<uint64>::max() / 0xFFFF;
rnd.state = static_cast<int64>(baseCoef*seedCoef);
Mat1b cluster0(srcSize, 0xFF);
vector<Mat> eta0(jointCnNum);
for (int i = 0; i < jointCnNum; i++)
h_filter(jointCn[i], eta0[i], (float)sigma_s_);
buildManifoldsAndPerformFiltering(eta0, cluster0, 1);
gatherResult(src, dst);
}
void AdaptiveManifoldFilterN::gatherResult(InputArray src_, OutputArray dst_)
{
int dDepth = src_.depth();
vector<Mat> dstCn(srcCnNum);
if (!adjust_outliers_)
{
for (int i = 0; i < srcCnNum; i++)
divide(sum_w_ki_Psi_blur_[i], sum_w_ki_Psi_blur_0_, dstCn[i], 1.0, dDepth);
merge(dstCn, dst_);
}
else
{
Mat1f& alpha = minDistToManifoldSquared;
double sigmaMember = -0.5 / (sigma_r_*sigma_r_);
multiply(minDistToManifoldSquared, sigmaMember, minDistToManifoldSquared);
cv::exp(minDistToManifoldSquared, alpha);
for (int i = 0; i < srcCnNum; i++)
{
Mat& f = srcCn[i];
Mat& g = dstCn[i];
divide(sum_w_ki_Psi_blur_[i], sum_w_ki_Psi_blur_0_, g);
subtract(g, f, g);
multiply(alpha, g, g);
add(g, f, g);
g.convertTo(g, dDepth);
}
merge(dstCn, dst_);
}
}
void AdaptiveManifoldFilterN::buildManifoldsAndPerformFiltering(vector<Mat>& eta, Mat1b& cluster, int treeLevel)
{
CV_DbgAssert((int)eta.size() == jointCnNum);
//splatting
Size etaSize = eta[0].size();
CV_DbgAssert(etaSize == srcSize || etaSize == smallSize);
if (etaSize == srcSize)
{
compute_w_k(eta, w_k, sigma_r_over_sqrt_2, treeLevel);
etaFull = eta;
downsample(eta, eta);
}
else
{
upsample(eta, etaFull);
compute_w_k(etaFull, w_k, sigma_r_over_sqrt_2, treeLevel);
}
//blurring
Psi_splat_small.resize(srcCnNum);
for (int si = 0; si < srcCnNum; si++)
{
Mat tmp;
multiply(srcCn[si], w_k, tmp);
downsample(tmp, Psi_splat_small[si]);
}
downsample(w_k, Psi_splat_0_small);
vector<Mat>& Psi_splat_small_blur = Psi_splat_small;
Mat& Psi_splat_0_small_blur = Psi_splat_0_small;
float rf_ss = (float)(sigma_s_ / getResizeRatio());
float rf_sr = (float)(sigma_r_over_sqrt_2);
RFFilterPass(eta, Psi_splat_small, Psi_splat_0_small, Psi_splat_small_blur, Psi_splat_0_small_blur, rf_ss, rf_sr);
//slicing
{
Mat tmp;
for (int i = 0; i < srcCnNum; i++)
{
upsample(Psi_splat_small_blur[i], tmp);
multiply(tmp, w_k, tmp);
add(sum_w_ki_Psi_blur_[i], tmp, sum_w_ki_Psi_blur_[i]);
}
upsample(Psi_splat_0_small_blur, tmp);
multiply(tmp, w_k, tmp);
add(sum_w_ki_Psi_blur_0_, tmp, sum_w_ki_Psi_blur_0_);
}
//build new manifolds
if (treeLevel < curTreeHeight)
{
Mat1b cluster_minus, cluster_plus;
computeClusters(cluster, cluster_minus, cluster_plus);
vector<Mat> eta_minus(jointCnNum), eta_plus(jointCnNum);
{
Mat1f teta = 1.0 - w_k;
computeEta(teta, cluster_minus, eta_minus);
computeEta(teta, cluster_plus, eta_plus);
}
//free memory to continue deep recursion
eta.clear();
cluster.release();
buildManifoldsAndPerformFiltering(eta_minus, cluster_minus, treeLevel + 1);
buildManifoldsAndPerformFiltering(eta_plus, cluster_plus, treeLevel + 1);
}
}
void AdaptiveManifoldFilterN::collectGarbage()
{
srcCn.clear();
jointCn.clear();
etaFull.clear();
sum_w_ki_Psi_blur_.clear();
Psi_splat_small.clear();
sum_w_ki_Psi_blur_0_.release();
w_k.release();
Psi_splat_0_small.release();
minDistToManifoldSquared.release();
}
void AdaptiveManifoldFilterN::h_filter(const Mat1f& src, Mat& dst, float sigma)
{
CV_DbgAssert(src.depth() == CV_32F);
const float a = exp(-sqrt(2.0f) / sigma);
dst.create(src.size(), CV_32FC1);
for (int y = 0; y < src.rows; ++y)
{
const float* src_row = src[y];
float* dst_row = dst.ptr<float>(y);
dst_row[0] = src_row[0];
for (int x = 1; x < src.cols; ++x)
{
dst_row[x] = src_row[x] + a * (dst_row[x - 1] - src_row[x]);
}
for (int x = src.cols - 2; x >= 0; --x)
{
dst_row[x] = dst_row[x] + a * (dst_row[x + 1] - dst_row[x]);
}
}
for (int y = 1; y < src.rows; ++y)
{
float* dst_cur_row = dst.ptr<float>(y);
float* dst_prev_row = dst.ptr<float>(y-1);
rf_vert_row_pass(dst_cur_row, dst_prev_row, a, src.cols);
}
for (int y = src.rows - 2; y >= 0; --y)
{
float* dst_cur_row = dst.ptr<float>(y);
float* dst_prev_row = dst.ptr<float>(y+1);
rf_vert_row_pass(dst_cur_row, dst_prev_row, a, src.cols);
}
}
void AdaptiveManifoldFilterN::compute_w_k(vector<Mat>& etak, Mat& dst, float sigma, int curTreeLevel)
{
CV_DbgAssert((int)etak.size() == jointCnNum);
dst.create(srcSize, CV_32FC1);
float argConst = -0.5f / (sigma*sigma);
for (int i = 0; i < srcSize.height; i++)
{
float *dstRow = dst.ptr<float>(i);
for (int cn = 0; cn < jointCnNum; cn++)
{
float *eta_kCnRow = etak[cn].ptr<float>(i);
float *jointCnRow = jointCn[cn].ptr<float>(i);
if (cn == 0)
{
sqr_dif(dstRow, eta_kCnRow, jointCnRow, srcSize.width);
}
else
{
add_sqr_dif(dstRow, eta_kCnRow, jointCnRow, srcSize.width);
}
}
if (adjust_outliers_)
{
float *minDistRow = minDistToManifoldSquared.ptr<float>(i);
if (curTreeLevel != 1)
{
min_(minDistRow, minDistRow, dstRow, srcSize.width);
}
else
{
std::memcpy(minDistRow, dstRow, srcSize.width*sizeof(float));
}
}
mul(dstRow, dstRow, argConst, srcSize.width);
//Exp_32f(dstRow, dstRow, srcSize.width);
}
cv::exp(dst, dst);
}
void AdaptiveManifoldFilterN::computeDTHor(vector<Mat>& srcCn, Mat& dst, float sigma_s, float sigma_r)
{
int cnNum = (int)srcCn.size();
int h = srcCn[0].rows;
int w = srcCn[0].cols;
float sigmaRatioSqr = (float) SQR(sigma_s / sigma_r);
float lnAlpha = (float) (-sqrt(2.0) / sigma_s);
dst.create(h, w-1, CV_32F);
for (int i = 0; i < h; i++)
{
float *dstRow = dst.ptr<float>(i);
for (int cn = 0; cn < cnNum; cn++)
{
float *curCnRow = srcCn[cn].ptr<float>(i);
if (cn == 0)
sqr_dif(dstRow, curCnRow, curCnRow + 1, w - 1);
else
add_sqr_dif(dstRow, curCnRow, curCnRow + 1, w - 1);
}
mad(dstRow, dstRow, sigmaRatioSqr, 1.0f, w - 1);
sqrt_(dstRow, dstRow, w - 1);
mul(dstRow, dstRow, lnAlpha, w - 1);
//Exp_32f(dstRow, dstRow, w - 1);
}
cv::exp(dst, dst);
}
void AdaptiveManifoldFilterN::computeDTVer(vector<Mat>& srcCn, Mat& dst, float sigma_s, float sigma_r)
{
int cnNum = (int)srcCn.size();
int h = srcCn[0].rows;
int w = srcCn[0].cols;
dst.create(h-1, w, CV_32F);
float sigmaRatioSqr = (float) SQR(sigma_s / sigma_r);
float lnAlpha = (float) (-sqrt(2.0) / sigma_s);
for (int i = 0; i < h-1; i++)
{
float *dstRow = dst.ptr<float>(i);
for (int cn = 0; cn < cnNum; cn++)
{
float *srcRow1 = srcCn[cn].ptr<float>(i);
float *srcRow2 = srcCn[cn].ptr<float>(i+1);
if (cn == 0)
sqr_dif(dstRow, srcRow1, srcRow2, w);
else
add_sqr_dif(dstRow, srcRow1, srcRow2, w);
}
mad(dstRow, dstRow, sigmaRatioSqr, 1.0f, w);
sqrt_(dstRow, dstRow, w);
mul(dstRow, dstRow, lnAlpha, w);
//Exp_32f(dstRow, dstRow, w);
}
cv::exp(dst, dst);
}
void AdaptiveManifoldFilterN::RFFilterPass(vector<Mat>& joint, vector<Mat>& Psi_splat, Mat& Psi_splat_0, vector<Mat>& Psi_splat_dst, Mat& Psi_splat_0_dst, float ss, float sr)
{
int h = joint[0].rows;
int w = joint[0].cols;
int cnNum = (int)Psi_splat.size();
Mat adth, adtv;
computeDTHor(joint, adth, ss, sr);
computeDTVer(joint, adtv, ss, sr);
Psi_splat_0_dst.create(h, w, CV_32FC1);
Psi_splat_dst.resize(cnNum);
for (int cn = 0; cn < cnNum; cn++)
Psi_splat_dst[cn].create(h, w, CV_32FC1);
Ptr<DTFilter> dtf = createDTFilterRF(adth, adtv, ss, sr, 1);
for (int cn = 0; cn < cnNum; cn++)
dtf->filter(Psi_splat[cn], Psi_splat_dst[cn]);
dtf->filter(Psi_splat_0, Psi_splat_0_dst);
}
void AdaptiveManifoldFilterN::computeClusters(Mat1b& cluster, Mat1b& cluster_minus, Mat1b& cluster_plus)
{
Mat difEtaSrc;
{
vector<Mat> eta_difCn(jointCnNum);
for (int i = 0; i < jointCnNum; i++)
subtract(jointCn[i], etaFull[i], eta_difCn[i]);
merge(eta_difCn, difEtaSrc);
difEtaSrc = difEtaSrc.reshape(1, difEtaSrc.total());
CV_DbgAssert(difEtaSrc.cols == jointCnNum);
}
Mat1f initVec(1, jointCnNum);
if (useRNG)
{
rnd.fill(initVec, RNG::UNIFORM, -0.5, 0.5);
}
else
{
for (int i = 0; i < initVec.total(); i++)
initVec(0, i) = (i % 2 == 0) ? 0.5f : -0.5f;
}
Mat1f eigenVec(1, jointCnNum);
computeEigenVector(difEtaSrc, cluster, eigenVec, num_pca_iterations_, initVec);
Mat1f difOreientation;
gemm(difEtaSrc, eigenVec, 1, noArray(), 0, difOreientation, GEMM_2_T);
difOreientation = difOreientation.reshape(1, srcSize.height);
CV_DbgAssert(difOreientation.size() == srcSize);
compare(difOreientation, 0, cluster_minus, CMP_LT);
bitwise_and(cluster_minus, cluster, cluster_minus);
compare(difOreientation, 0, cluster_plus, CMP_GE);
bitwise_and(cluster_plus, cluster, cluster_plus);
}
void AdaptiveManifoldFilterN::computeEta(Mat& teta, Mat1b& cluster, vector<Mat>& etaDst)
{
CV_DbgAssert(teta.size() == srcSize && cluster.size() == srcSize);
Mat1f tetaMasked = Mat1f::zeros(srcSize);
teta.copyTo(tetaMasked, cluster);
float sigma_s = (float)(sigma_s_ / getResizeRatio());
Mat1f tetaMaskedBlur;
downsample(tetaMasked, tetaMaskedBlur);
h_filter(tetaMaskedBlur, tetaMaskedBlur, sigma_s);
Mat mul;
etaDst.resize(jointCnNum);
for (int i = 0; i < jointCnNum; i++)
{
multiply(tetaMasked, jointCn[i], mul);
downsample(mul, etaDst[i]);
h_filter(etaDst[i], etaDst[i], sigma_s);
divide(etaDst[i], tetaMaskedBlur, etaDst[i]);
}
}
void computeEigenVector(const Mat1f& X, const Mat1b& mask, Mat1f& dst, int num_pca_iterations, const Mat1f& rand_vec)
{
CV_DbgAssert( X.cols == rand_vec.cols );
CV_DbgAssert( X.rows == mask.size().area() );
CV_DbgAssert( rand_vec.rows == 1 );
dst.create(rand_vec.size());
rand_vec.copyTo(dst);
Mat1f t(X.size());
float* dst_row = dst[0];
for (int i = 0; i < num_pca_iterations; ++i)
{
t.setTo(Scalar::all(0));
for (int y = 0, ind = 0; y < mask.rows; ++y)
{
const uchar* mask_row = mask[y];
for (int x = 0; x < mask.cols; ++x, ++ind)
{
if (mask_row[x])
{
const float* X_row = X[ind];
float* t_row = t[ind];
float dots = 0.0;
for (int c = 0; c < X.cols; ++c)
dots += dst_row[c] * X_row[c];
for (int c = 0; c < X.cols; ++c)
t_row[c] = dots * X_row[c];
}
}
}
dst.setTo(0.0);
for (int i = 0; i < X.rows; ++i)
{
const float* t_row = t[i];
for (int c = 0; c < X.cols; ++c)
{
dst_row[c] += t_row[c];
}
}
}
double n = norm(dst);
divide(dst, n, dst);
}
}
Ptr<AdaptiveManifoldFilter> cv::AdaptiveManifoldFilter::create()
{
return Ptr<AdaptiveManifoldFilter>(new AdaptiveManifoldFilterN());
}
CV_EXPORTS_W Ptr<AdaptiveManifoldFilter> cv::createAMFilter(double sigma_s, double sigma_r, bool adjust_outliers)
{
Ptr<AdaptiveManifoldFilter> amf(new AdaptiveManifoldFilterN());
amf->set("sigma_s", sigma_s);
amf->set("sigma_r", sigma_r);
amf->set("adjust_outliers", adjust_outliers);
return amf;
}
CV_EXPORTS_W void cv::amFilter(InputArray joint, InputArray src, OutputArray dst, double sigma_s, double sigma_r, bool adjust_outliers)
{
Ptr<AdaptiveManifoldFilter> amf = createAMFilter(sigma_s, sigma_r, adjust_outliers);
amf->filter(src, dst, joint);
}

@ -0,0 +1,48 @@
#include "precomp.hpp"
#include "dtfilter_cpu.hpp"
#include "dtfilter_ocl.hpp"
namespace cv
{
static bool dtUseOpenCLVersion(InputArray guide, InputArray src, int mode, int numIters)
{
return
false && guide.isUMat() && ocl::useOpenCL() &&
(guide.cols() >= 256 && guide.rows() >= 256) &&
(guide.depth() == CV_32F || guide.depth() == CV_8U) &&
(src.depth() == CV_32F || src.depth() == CV_8U) &&
(numIters <= 3);
}
CV_EXPORTS_W
Ptr<DTFilter> createDTFilter(InputArray guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
return Ptr<DTFilter>(DTFilterCPU::create(guide, sigmaSpatial, sigmaColor, mode, numIters));
}
CV_EXPORTS_W
void dtFilter(InputArray guide, InputArray src, OutputArray dst, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
if (dtUseOpenCLVersion(guide, src, mode, numIters))
{
Ptr<DTFilterOCL> dtf = DTFilterOCL::create(guide, sigmaSpatial, sigmaColor, mode, numIters);
dtf->setSingleFilterCall(true);
dtf->filter(src, dst);
}
else
{
Ptr<DTFilterCPU> dtf = DTFilterCPU::create(guide, sigmaSpatial, sigmaColor, mode, numIters);
dtf->setSingleFilterCall(true);
dtf->filter(src, dst);
}
}
CV_EXPORTS_W
Ptr<DTFilter> createDTFilterOCL(InputArray guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
Ptr<DTFilterOCL> dtf = DTFilterOCL::create(guide, sigmaSpatial, sigmaColor, mode, numIters);
return Ptr<DTFilter>(dtf);
}
}

@ -0,0 +1,174 @@
#include "precomp.hpp"
#include "dtfilter_cpu.hpp"
namespace cv
{
typedef Vec<uchar, 1> Vec1b;
typedef Vec<float, 1> Vec1f;
Ptr<DTFilterCPU> DTFilterCPU::create(InputArray guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
Ptr<DTFilterCPU> dtf(new DTFilterCPU());
dtf->init(guide, sigmaSpatial, sigmaColor, mode, numIters);
return dtf;
}
Ptr<DTFilterCPU> DTFilterCPU::createRF(InputArray adistHor, InputArray adistVert, double sigmaSpatial, double sigmaColor, int numIters /*= 3*/)
{
Mat adh = adistHor.getMat();
Mat adv = adistVert.getMat();
CV_Assert(adh.type() == CV_32FC1 && adv.type() == CV_32FC1 && adh.rows == adv.rows + 1 && adh.cols == adv.cols - 1);
Ptr<DTFilterCPU> dtf(new DTFilterCPU());
dtf->release();
dtf->mode = DTF_RF;
dtf->numIters = std::max(1, numIters);
dtf->h = adh.rows;
dtf->w = adh.cols + 1;
dtf->sigmaSpatial = std::max(1.0f, (float)sigmaSpatial);
dtf->sigmaColor = std::max(0.01f, (float)sigmaColor);
dtf->a0distHor = adh;
dtf->a0distVert = adv;
return dtf;
}
void DTFilterCPU::init(InputArray guide_, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
Mat guide = guide_.getMat();
int cn = guide.channels();
int depth = guide.depth();
CV_Assert(cn <= 4);
CV_Assert((depth == CV_8U || depth == CV_32F) && !guide.empty());
#define CREATE_DTF(Vect) init_<Vect>(guide, sigmaSpatial, sigmaColor, mode, numIters);
if (cn == 1)
{
if (depth == CV_8U)
CREATE_DTF(Vec1b);
if (depth == CV_32F)
CREATE_DTF(Vec1f);
}
else if (cn == 2)
{
if (depth == CV_8U)
CREATE_DTF(Vec2b);
if (depth == CV_32F)
CREATE_DTF(Vec2f);
}
else if (cn == 3)
{
if (depth == CV_8U)
CREATE_DTF(Vec3b);
if (depth == CV_32F)
CREATE_DTF(Vec3f);
}
else if (cn == 4)
{
if (depth == CV_8U)
CREATE_DTF(Vec4b);
if (depth == CV_32F)
CREATE_DTF(Vec4f);
}
#undef CREATE_DTF
}
void DTFilterCPU::filter(InputArray src_, OutputArray dst_, int dDepth)
{
Mat src = src_.getMat();
dst_.create(src.size(), src.type());
Mat& dst = dst_.getMatRef();
int cn = src.channels();
int depth = src.depth();
CV_Assert(cn <= 4 && (depth == CV_8U || depth == CV_32F));
if (cn == 1)
{
if (depth == CV_8U)
filter_<Vec1b>(src, dst, dDepth);
if (depth == CV_32F)
filter_<Vec1f>(src, dst, dDepth);
}
else if (cn == 2)
{
if (depth == CV_8U)
filter_<Vec2b>(src, dst, dDepth);
if (depth == CV_32F)
filter_<Vec2f>(src, dst, dDepth);
}
else if (cn == 3)
{
if (depth == CV_8U)
filter_<Vec3b>(src, dst, dDepth);
if (depth == CV_32F)
filter_<Vec3f>(src, dst, dDepth);
}
else if (cn == 4)
{
if (depth == CV_8U)
filter_<Vec4b>(src, dst, dDepth);
if (depth == CV_32F)
filter_<Vec4f>(src, dst, dDepth);
}
}
void DTFilterCPU::setSingleFilterCall(bool value)
{
singleFilterCall = value;
}
void DTFilterCPU::release()
{
if (mode == -1) return;
idistHor.release();
idistVert.release();
distHor.release();
distVert.release();
a0distHor.release();
a0distVert.release();
adistHor.release();
adistVert.release();
}
Mat DTFilterCPU::getWExtendedMat(int h, int w, int type, int brdleft /*= 0*/, int brdRight /*= 0*/, int cacheAlign /*= 0*/)
{
int wrapperCols = w + brdleft + brdRight;
if (cacheAlign > 0)
wrapperCols += ((wrapperCols + cacheAlign-1) / cacheAlign)*cacheAlign;
Mat mat(h, wrapperCols, type);
return mat(Range::all(), Range(brdleft, w + brdleft));
}
Range DTFilterCPU::getWorkRangeByThread(const Range& itemsRange, const Range& rangeThread, int declaredNumThreads)
{
if (declaredNumThreads <= 0)
declaredNumThreads = cv::getNumThreads();
int chunk = itemsRange.size() / declaredNumThreads;
int start = itemsRange.start + chunk * rangeThread.start;
int end = itemsRange.start + ((rangeThread.end >= declaredNumThreads) ? itemsRange.size() : chunk * rangeThread.end);
return Range(start, end);
}
Range DTFilterCPU::getWorkRangeByThread(int items, const Range& rangeThread, int declaredNumThreads)
{
return getWorkRangeByThread(Range(0, items), rangeThread, declaredNumThreads);
}
}

@ -0,0 +1,255 @@
#ifndef __OPENCV_DTFILTER_HPP__
#define __OPENCV_DTFILTER_HPP__
#include "precomp.hpp"
#ifdef _MSC_VER
#pragma warning(disable: 4512)
#pragma warning(disable: 4127)
#endif
#define CV_GET_NUM_THREAD_WORKS_PROPERLY
#undef CV_GET_NUM_THREAD_WORKS_PROPERLY
namespace cv
{
class DTFilterCPU : public DTFilter
{
public: /*Non-template methods*/
static Ptr<DTFilterCPU> create(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
static Ptr<DTFilterCPU> createRF(InputArray adistHor, InputArray adistVert, double sigmaSpatial, double sigmaColor, int numIters = 3);
void filter(InputArray src, OutputArray dst, int dDepth = -1);
void setSingleFilterCall(bool value);
public: /*Template methods*/
/*Use this static methods instead of constructor*/
template<typename GuideVec>
static DTFilterCPU* create_p_(const Mat& guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
template<typename GuideVec>
static DTFilterCPU create_(const Mat& guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
template<typename GuideVec>
void init_(Mat& guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
template<typename SrcVec>
void filter_(const Mat& src, Mat& dst, int dDepth = -1);
protected: /*Typedefs declarations*/
typedef float IDistType;
typedef Vec<IDistType, 1> IDistVec;
typedef float DistType;
typedef Vec<DistType, 1> DistVec;
typedef float WorkType;
public: /*Members declarations*/
int h, w, mode;
float sigmaSpatial, sigmaColor;
bool singleFilterCall;
int numFilterCalls;
Mat idistHor, idistVert;
Mat distHor, distVert;
Mat a0distHor, a0distVert;
Mat adistHor, adistVert;
int numIters;
protected: /*Functions declarations*/
DTFilterCPU() : mode(-1), singleFilterCall(false), numFilterCalls(0) {}
void init(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
void release();
template<typename GuideVec>
inline IDistType getTransformedDistance(const GuideVec &l, const GuideVec &r)
{
return (IDistType)(1.0f + sigmaSpatial / sigmaColor * norm1<IDistType>(l, r));
}
inline double getIterSigmaH(int iterNum)
{
return sigmaSpatial * std::pow(2.0, numIters - iterNum) / sqrt(std::pow(4.0, numIters) - 1);
}
inline IDistType getIterRadius(int iterNum)
{
return (IDistType)(3.0*getIterSigmaH(iterNum));
}
inline float getIterAlpha(int iterNum)
{
return (float)std::exp(-std::sqrt(2.0 / 3.0) / getIterSigmaH(iterNum));
}
protected: /*Wrappers for parallelization*/
template <typename WorkVec>
struct FilterNC_horPass : public ParallelLoopBody
{
Mat &src, &idist, &dst;
float radius;
FilterNC_horPass(Mat& src_, Mat& idist_, Mat& dst_);
void operator() (const Range& range) const;
};
template <typename WorkVec>
struct FilterIC_horPass : public ParallelLoopBody
{
Mat &src, &idist, &dist, &dst, isrcBuf;
float radius;
FilterIC_horPass(Mat& src_, Mat& idist_, Mat& dist_, Mat& dst_);
void operator() (const Range& range) const;
};
template <typename WorkVec>
struct FilterRF_horPass : public ParallelLoopBody
{
Mat &res, &alphaD;
int iteration;
FilterRF_horPass(Mat& res_, Mat& alphaD_, int iteration_);
void operator() (const Range& range) const;
Range getRange() const { return Range(0, res.rows); }
};
template <typename WorkVec>
struct FilterRF_vertPass : public ParallelLoopBody
{
Mat &res, &alphaD;
int iteration;
FilterRF_vertPass(Mat& res_, Mat& alphaD_, int iteration_);
void operator() (const Range& range) const;
#ifdef CV_GET_NUM_THREAD_WORKS_PROPERLY
Range getRange() const { return Range(0, cv::getNumThreads()); }
#else
Range getRange() const { return Range(0, res.cols); }
#endif
};
template <typename GuideVec>
struct ComputeIDTHor_ParBody: public ParallelLoopBody
{
DTFilterCPU &dtf;
Mat &guide, &dst;
ComputeIDTHor_ParBody(DTFilterCPU& dtf_, Mat& guide_, Mat& dst_);
void operator() (const Range& range) const;
Range getRange() { return Range(0, guide.rows); }
};
template <typename GuideVec>
struct ComputeDTandIDTHor_ParBody : public ParallelLoopBody
{
DTFilterCPU &dtf;
Mat &guide, &dist, &idist;
IDistType maxRadius;
ComputeDTandIDTHor_ParBody(DTFilterCPU& dtf_, Mat& guide_, Mat& dist_, Mat& idist_);
void operator() (const Range& range) const;
Range getRange() { return Range(0, guide.rows); }
};
template <typename GuideVec>
struct ComputeA0DTHor_ParBody : public ParallelLoopBody
{
DTFilterCPU &dtf;
Mat &guide;
float lna;
ComputeA0DTHor_ParBody(DTFilterCPU& dtf_, Mat& guide_);
void operator() (const Range& range) const;
Range getRange() { return Range(0, guide.rows); }
~ComputeA0DTHor_ParBody();
};
template <typename GuideVec>
struct ComputeA0DTVert_ParBody : public ParallelLoopBody
{
DTFilterCPU &dtf;
Mat &guide;
float lna;
ComputeA0DTVert_ParBody(DTFilterCPU& dtf_, Mat& guide_);
void operator() (const Range& range) const;
Range getRange() const { return Range(0, guide.rows - 1); }
~ComputeA0DTVert_ParBody();
};
protected: /*Auxiliary implementation functions*/
static Range getWorkRangeByThread(const Range& itemsRange, const Range& rangeThread, int maxThreads = 0);
static Range getWorkRangeByThread(int items, const Range& rangeThread, int maxThreads = 0);
template<typename SrcVec>
static void prepareSrcImg_IC(const Mat& src, Mat& inner, Mat& outer);
static Mat getWExtendedMat(int h, int w, int type, int brdleft = 0, int brdRight = 0, int cacheAlign = 0);
template<typename SrcVec, typename SrcWorkVec>
static void integrateSparseRow(const SrcVec *src, const float *dist, SrcWorkVec *dst, int cols);
template<typename SrcVec, typename SrcWorkVec>
static void integrateRow(const SrcVec *src, SrcWorkVec *dst, int cols);
inline static int getLeftBound(IDistType *idist, int pos, IDistType searchValue)
{
while (idist[pos] < searchValue)
pos++;
return pos;
}
inline static int getRightBound(IDistType *idist, int pos, IDistType searchValue)
{
while (idist[pos + 1] < searchValue)
pos++;
return pos;
}
template <typename T, typename T1, typename T2, int n>
inline static T norm1(const cv::Vec<T1, n>& v1, const cv::Vec<T2, n>& v2)
{
T sum = (T) 0;
for (int i = 0; i < n; i++)
sum += std::abs( (T)v1[i] - (T)v2[i] );
return sum;
}
};
/*One-line template wrappers for DT call*/
template<typename GuideVec, typename SrcVec>
void domainTransformFilter( const Mat_<GuideVec>& guide,
const Mat_<SrcVec>& source,
Mat& dst,
double sigmaSpatial, double sigmaColor,
int mode = cv::DTF_NC, int numPasses = 3
);
template<typename GuideVec, typename SrcVec>
void domainTransformFilter( const Mat& guide,
const Mat& source,
Mat& dst,
double sigmaSpatial, double sigmaColor,
int mode = cv::DTF_NC, int numPasses = 3
);
}
#include "dtfilter_cpu.inl.hpp"
#endif

@ -0,0 +1,590 @@
#ifndef __OPENCV_DTFILTER_INL_HPP__
#define __OPENCV_DTFILTER_INL_HPP__
#include "precomp.hpp"
#include <iostream>
#include "edgeaware_filters_common.hpp"
using namespace cv::eaf;
#define NC_USE_INTEGRAL_SRC
//#undef NC_USE_INTEGRAL_SRC
namespace cv
{
template<typename GuideVec>
DTFilterCPU DTFilterCPU::create_(const Mat& guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
DTFilterCPU dtf;
dtf.init_<GuideVec>(guide, sigmaSpatial, sigmaColor, mode, numIters);
return dtf;
}
template<typename GuideVec>
DTFilterCPU* DTFilterCPU::create_p_(const Mat& guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
DTFilterCPU* dtf = new DTFilterCPU();
dtf->init_<GuideVec>(guide, sigmaSpatial, sigmaColor, mode, numIters);
return dtf;
}
template<typename GuideVec>
void DTFilterCPU::init_(Mat& guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
CV_Assert(guide.type() == cv::DataType<GuideVec>::type);
this->release();
this->h = guide.rows;
this->w = guide.cols;
this->sigmaSpatial = std::max(1.0f, (float)sigmaSpatial);
this->sigmaColor = std::max(0.01f, (float)sigmaColor);
this->mode = mode;
this->numIters = std::max(1, numIters);
switch (this->mode)
{
case DTF_NC:
{
{
ComputeIDTHor_ParBody<GuideVec> horBody(*this, guide, idistHor);
parallel_for_(horBody.getRange(), horBody);
}
{
Mat guideT = guide.t();
ComputeIDTHor_ParBody<GuideVec> horBody(*this, guideT, idistVert);
parallel_for_(horBody.getRange(), horBody);
}
}
break;
case DTF_IC:
{
{
ComputeDTandIDTHor_ParBody<GuideVec> horBody(*this, guide, distHor, idistHor);
parallel_for_(horBody.getRange(), horBody);
}
{
Mat guideT = guide.t();
ComputeDTandIDTHor_ParBody<GuideVec> horBody(*this, guideT, distVert, idistVert);
parallel_for_(horBody.getRange(), horBody);
}
}
break;
case DTF_RF:
{
ComputeA0DTHor_ParBody<GuideVec> horBody(*this, guide);
ComputeA0DTVert_ParBody<GuideVec> vertBody(*this, guide);
parallel_for_(horBody.getRange(), horBody);
parallel_for_(vertBody.getRange(), vertBody);
}
break;
default:
CV_Error(Error::StsBadFlag, "Incorrect DT filter mode");
break;
}
}
template <typename SrcVec>
void DTFilterCPU::filter_(const Mat& src, Mat& dst, int dDepth)
{
typedef typename DataType<Vec<WorkType, SrcVec::channels> >::vec_type WorkVec;
CV_Assert( src.type() == SrcVec::type );
if ( src.cols != w || src.rows != h )
{
CV_Error(Error::StsBadSize, "Size of filtering image must be equal to size of guide image");
}
if (singleFilterCall)
{
CV_Assert(numFilterCalls == 0);
}
numFilterCalls++;
Mat res;
if (dDepth == -1) dDepth = src.depth();
//small optimization to avoid extra copying of data
bool useDstAsRes = (dDepth == WorkVec::depth && (mode == DTF_NC || mode == DTF_RF));
if (useDstAsRes)
{
dst.create(h, w, WorkVec::type);
res = dst;
}
if (mode == DTF_NC)
{
Mat resT(src.cols, src.rows, WorkVec::type);
src.convertTo(res, WorkVec::type);
FilterNC_horPass<WorkVec> horParBody(res, idistHor, resT);
FilterNC_horPass<WorkVec> vertParBody(resT, idistVert, res);
for (int iter = 1; iter <= numIters; iter++)
{
horParBody.radius = vertParBody.radius = getIterRadius(iter);
parallel_for_(Range(0, res.rows), horParBody);
parallel_for_(Range(0, resT.rows), vertParBody);
}
}
else if (mode == DTF_IC)
{
Mat resT;
prepareSrcImg_IC<WorkVec>(src, res, resT);
FilterIC_horPass<WorkVec> horParBody(res, idistHor, distHor, resT);
FilterIC_horPass<WorkVec> vertParBody(resT, idistVert, distVert, res);
for (int iter = 1; iter <= numIters; iter++)
{
horParBody.radius = vertParBody.radius = getIterRadius(iter);
parallel_for_(Range(0, res.rows), horParBody);
parallel_for_(Range(0, resT.rows), vertParBody);
}
}
else if (mode == DTF_RF)
{
src.convertTo(res, WorkVec::type);
for (int iter = 1; iter <= numIters; iter++)
{
if (!singleFilterCall && iter == 2)
{
a0distHor.copyTo(adistHor);
a0distVert.copyTo(adistVert);
}
bool useA0DT = (singleFilterCall || iter == 1);
Mat& a0dHor = (useA0DT) ? a0distHor : adistHor;
Mat& a0dVert = (useA0DT) ? a0distVert : adistVert;
FilterRF_horPass<WorkVec> horParBody(res, a0dHor, iter);
FilterRF_vertPass<WorkVec> vertParBody(res, a0dVert, iter);
parallel_for_(horParBody.getRange(), horParBody);
parallel_for_(vertParBody.getRange(), vertParBody);
}
}
if (!useDstAsRes)
{
res.convertTo(dst, dDepth);
}
}
template<typename SrcVec, typename SrcWorkVec>
void DTFilterCPU::integrateRow(const SrcVec *src, SrcWorkVec *dst, int cols)
{
SrcWorkVec sum = SrcWorkVec::all(0);
dst[0] = sum;
for (int j = 0; j < cols; j++)
{
sum += SrcWorkVec(src[j]);
dst[j + 1] = sum;
}
}
template<typename SrcVec, typename SrcWorkVec>
void DTFilterCPU::integrateSparseRow(const SrcVec *src, const float *dist, SrcWorkVec *dst, int cols)
{
SrcWorkVec sum = SrcWorkVec::all(0);
dst[0] = sum;
for (int j = 0; j < cols-1; j++)
{
sum += dist[j] * 0.5f * (SrcWorkVec(src[j]) + SrcWorkVec(src[j+1]));
dst[j + 1] = sum;
}
}
template<typename WorkVec>
void DTFilterCPU::prepareSrcImg_IC(const Mat& src, Mat& dst, Mat& dstT)
{
Mat dstOut(src.rows, src.cols + 2, WorkVec::type);
Mat dstOutT(src.cols, src.rows + 2, WorkVec::type);
dst = dstOut(Range::all(), Range(1, src.cols+1));
dstT = dstOutT(Range::all(), Range(1, src.rows+1));
src.convertTo(dst, WorkVec::type);
WorkVec *line;
int ri = dstOut.cols - 1;
for (int i = 0; i < src.rows; i++)
{
line = dstOut.ptr<WorkVec>(i);
line[0] = line[1];
line[ri] = line[ri - 1];
}
WorkVec *topLine = dst.ptr<WorkVec>(0);
WorkVec *bottomLine = dst.ptr<WorkVec>(dst.rows - 1);
ri = dstOutT.cols - 1;
for (int i = 0; i < src.cols; i++)
{
line = dstOutT.ptr<WorkVec>(i);
line[0] = topLine[i];
line[ri] = bottomLine[i];
}
}
template <typename WorkVec>
DTFilterCPU::FilterNC_horPass<WorkVec>::FilterNC_horPass(Mat& src_, Mat& idist_, Mat& dst_)
: src(src_), idist(idist_), dst(dst_), radius(1.0f)
{
CV_DbgAssert(src.type() == WorkVec::type && dst.type() == WorkVec::type && dst.rows == src.cols && dst.cols == src.rows);
}
template <typename WorkVec>
void DTFilterCPU::FilterNC_horPass<WorkVec>::operator()(const Range& range) const
{
#ifdef NC_USE_INTEGRAL_SRC
std::vector<WorkVec> isrcBuf(src.cols + 1);
WorkVec *isrcLine = &isrcBuf[0];
#endif
for (int i = range.start; i < range.end; i++)
{
const WorkVec *srcLine = src.ptr<WorkVec>(i);
IDistType *idistLine = idist.ptr<IDistType>(i);
int leftBound = 0, rightBound = 0;
WorkVec sum;
#ifdef NC_USE_INTEGRAL_SRC
integrateRow(srcLine, isrcLine, src.cols);
#else
sum = srcLine[0];
#endif
for (int j = 0; j < src.cols; j++)
{
IDistType curVal = idistLine[j];
#ifdef NC_USE_INTEGRAL_SRC
leftBound = getLeftBound(idistLine, leftBound, curVal - radius);
rightBound = getRightBound(idistLine, rightBound, curVal + radius);
sum = (isrcLine[rightBound + 1] - isrcLine[leftBound]);
#else
while (idistLine[leftBound] < curVal - radius)
{
sum -= srcLine[leftBound];
leftBound++;
}
while (idistLine[rightBound + 1] < curVal + radius)
{
rightBound++;
sum += srcLine[rightBound];
}
#endif
dst.at<WorkVec>(j, i) = sum / (float)(rightBound + 1 - leftBound);
}
}
}
template <typename WorkVec>
DTFilterCPU::FilterIC_horPass<WorkVec>::FilterIC_horPass(Mat& src_, Mat& idist_, Mat& dist_, Mat& dst_)
: src(src_), idist(idist_), dist(dist_), dst(dst_), radius(1.0f)
{
CV_DbgAssert(src.type() == WorkVec::type && dst.type() == WorkVec::type && dst.rows == src.cols && dst.cols == src.rows);
#ifdef CV_GET_NUM_THREAD_WORKS_PROPERLY
isrcBuf.create(cv::getNumThreads(), src.cols + 1, WorkVec::type);
#else
isrcBuf.create(src.rows, src.cols + 1, WorkVec::type);
#endif
}
template <typename WorkVec>
void DTFilterCPU::FilterIC_horPass<WorkVec>::operator()(const Range& range) const
{
#ifdef CV_GET_NUM_THREAD_WORKS_PROPERLY
WorkVec *isrcLine = const_cast<WorkVec*>( isrcBuf.ptr<WorkVec>(cv::getThreadNum()) );
#else
WorkVec *isrcLine = const_cast<WorkVec*>( isrcBuf.ptr<WorkVec>(range.start) );
#endif
for (int i = range.start; i < range.end; i++)
{
WorkVec *srcLine = src.ptr<WorkVec>(i);
DistType *distLine = dist.ptr<DistType>(i);
IDistType *idistLine = idist.ptr<IDistType>(i);
integrateSparseRow(srcLine, distLine, isrcLine, src.cols);
int leftBound = 0, rightBound = 0;
WorkVec sumL, sumR, sumC;
srcLine[-1] = srcLine[0];
srcLine[src.cols] = srcLine[src.cols - 1];
for (int j = 0; j < src.cols; j++)
{
IDistType curVal = idistLine[j];
IDistType valueLeft = curVal - radius;
IDistType valueRight = curVal + radius;
leftBound = getLeftBound(idistLine, leftBound, valueLeft);
rightBound = getRightBound(idistLine, rightBound, valueRight);
float areaL = idistLine[leftBound] - valueLeft;
float areaR = valueRight - idistLine[rightBound];
float dl = areaL / distLine[leftBound - 1];
float dr = areaR / distLine[rightBound];
sumL = 0.5f*areaL*(dl*srcLine[leftBound - 1] + (2.0f - dl)*srcLine[leftBound]);
sumR = 0.5f*areaR*((2.0f - dr)*srcLine[rightBound] + dr*srcLine[rightBound + 1]);
sumC = isrcLine[rightBound] - isrcLine[leftBound];
dst.at<WorkVec>(j, i) = (sumL + sumC + sumR) / (2.0f * radius);
}
}
}
template <typename WorkVec>
DTFilterCPU::FilterRF_horPass<WorkVec>::FilterRF_horPass(Mat& res_, Mat& alphaD_, int iteration_)
: res(res_), alphaD(alphaD_), iteration(iteration_)
{
CV_DbgAssert(res.type() == WorkVec::type);
CV_DbgAssert(res.type() == WorkVec::type && res.size() == res.size());
}
template <typename WorkVec>
void DTFilterCPU::FilterRF_horPass<WorkVec>::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
WorkVec *dstLine = res.ptr<WorkVec>(i);
DistType *adLine = alphaD.ptr<DistType>(i);
int j;
if (iteration > 1)
{
for (j = res.cols - 2; j >= 0; j--)
adLine[j] *= adLine[j];
}
for (j = 1; j < res.cols; j++)
{
dstLine[j] += adLine[j-1] * (dstLine[j-1] - dstLine[j]);
}
for (j = res.cols - 2; j >= 0; j--)
{
dstLine[j] += adLine[j] * (dstLine[j+1] - dstLine[j]);
}
}
}
template <typename WorkVec>
DTFilterCPU::FilterRF_vertPass<WorkVec>::FilterRF_vertPass(Mat& res_, Mat& alphaD_, int iteration_)
: res(res_), alphaD(alphaD_), iteration(iteration_)
{
CV_DbgAssert(res.type() == WorkVec::type);
CV_DbgAssert(res.type() == WorkVec::type && res.size() == res.size());
}
template <typename WorkVec>
void DTFilterCPU::FilterRF_vertPass<WorkVec>::operator()(const Range& range) const
{
#ifdef CV_GET_NUM_THREAD_WORKS_PROPERLY
Range rcols = getWorkRangeByThread(res.cols, range);
#else
Range rcols = range;
#endif
for (int i = 1; i < res.rows; i++)
{
WorkVec *curRow = res.ptr<WorkVec>(i);
WorkVec *prevRow = res.ptr<WorkVec>(i - 1);
DistType *adRow = alphaD.ptr<DistType>(i - 1);
if (iteration > 1)
{
for (int j = rcols.start; j < rcols.end; j++)
adRow[j] *= adRow[j];
}
for (int j = rcols.start; j < rcols.end; j++)
{
curRow[j] += adRow[j] * (prevRow[j] - curRow[j]);
}
}
for (int i = res.rows - 2; i >= 0; i--)
{
WorkVec *prevRow = res.ptr<WorkVec>(i + 1);
WorkVec *curRow = res.ptr<WorkVec>(i);
DistType *adRow = alphaD.ptr<DistType>(i);
for (int j = rcols.start; j < rcols.end; j++)
{
curRow[j] += adRow[j] * (prevRow[j] - curRow[j]);
}
}
}
template <typename GuideVec>
DTFilterCPU::ComputeIDTHor_ParBody<GuideVec>::ComputeIDTHor_ParBody(DTFilterCPU& dtf_, Mat& guide_, Mat& dst_)
: dtf(dtf_), guide(guide_), dst(dst_)
{
dst.create(guide.rows, guide.cols + 1, IDistVec::type);
}
template <typename GuideVec>
void DTFilterCPU::ComputeIDTHor_ParBody<GuideVec>::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
const GuideVec *guideLine = guide.ptr<GuideVec>(i);
IDistType *idistLine = dst.ptr<IDistType>(i);
IDistType curDist = (IDistType)0;
idistLine[0] = (IDistType)0;
for (int j = 1; j < guide.cols; j++)
{
curDist += dtf.getTransformedDistance(guideLine[j-1], guideLine[j]);
idistLine[j] = curDist;
}
idistLine[guide.cols] = std::numeric_limits<IDistType>::max();
}
}
template <typename GuideVec>
DTFilterCPU::ComputeDTandIDTHor_ParBody<GuideVec>::ComputeDTandIDTHor_ParBody(DTFilterCPU& dtf_, Mat& guide_, Mat& dist_, Mat& idist_)
: dtf(dtf_), guide(guide_), dist(dist_), idist(idist_)
{
dist = getWExtendedMat(guide.rows, guide.cols, IDistVec::type, 1, 1);
idist = getWExtendedMat(guide.rows, guide.cols + 1, IDistVec::type);
maxRadius = dtf.getIterRadius(1);
}
template <typename GuideVec>
void DTFilterCPU::ComputeDTandIDTHor_ParBody<GuideVec>::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
const GuideVec *guideLine = guide.ptr<GuideVec>(i);
DistType *distLine = dist.ptr<DistType>(i);
IDistType *idistLine = idist.ptr<IDistType>(i);
DistType curDist;
IDistType curIDist = (IDistType)0;
int j;
distLine[-1] = maxRadius;
//idistLine[-1] = curIDist - maxRadius;
idistLine[0] = curIDist;
for (j = 0; j < guide.cols-1; j++)
{
curDist = (DistType) dtf.getTransformedDistance(guideLine[j], guideLine[j + 1]);
curIDist += curDist;
distLine[j] = curDist;
idistLine[j + 1] = curIDist;
}
idistLine[j + 1] = curIDist + maxRadius;
distLine[j] = maxRadius;
}
}
template <typename GuideVec>
DTFilterCPU::ComputeA0DTHor_ParBody<GuideVec>::ComputeA0DTHor_ParBody(DTFilterCPU& dtf_, Mat& guide_)
: dtf(dtf_), guide(guide_)
{
dtf.a0distHor.create(guide.rows, guide.cols - 1, DistVec::type);
lna = std::log(dtf.getIterAlpha(1));
}
template <typename GuideVec>
void DTFilterCPU::ComputeA0DTHor_ParBody<GuideVec>::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
const GuideVec *guideLine = guide.ptr<GuideVec>(i);
DistType *dstLine = dtf.a0distHor.ptr<DistType>(i);
for (int j = 0; j < guide.cols - 1; j++)
{
DistType d = (DistType)dtf.getTransformedDistance(guideLine[j], guideLine[j + 1]);
dstLine[j] = lna*d;
}
}
}
template <typename GuideVec>
DTFilterCPU::ComputeA0DTHor_ParBody<GuideVec>::~ComputeA0DTHor_ParBody()
{
cv::exp(dtf.a0distHor, dtf.a0distHor);
}
template <typename GuideVec>
DTFilterCPU::ComputeA0DTVert_ParBody<GuideVec>::ComputeA0DTVert_ParBody(DTFilterCPU& dtf_, Mat& guide_)
: dtf(dtf_), guide(guide_)
{
dtf.a0distVert.create(guide.rows - 1, guide.cols, DistVec::type);
lna = std::log(dtf.getIterAlpha(1));
}
template <typename GuideVec>
void DTFilterCPU::ComputeA0DTVert_ParBody<GuideVec>::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
DistType *dstLine = dtf.a0distVert.ptr<DistType>(i);
GuideVec *guideRow1 = guide.ptr<GuideVec>(i);
GuideVec *guideRow2 = guide.ptr<GuideVec>(i+1);
for (int j = 0; j < guide.cols; j++)
{
DistType d = (DistType)dtf.getTransformedDistance(guideRow1[j], guideRow2[j]);
dstLine[j] = lna*d;
}
}
}
template <typename GuideVec>
DTFilterCPU::ComputeA0DTVert_ParBody<GuideVec>::~ComputeA0DTVert_ParBody()
{
cv::exp(dtf.a0distVert, dtf.a0distVert);
}
template<typename GuideVec, typename SrcVec>
void domainTransformFilter( const Mat_<GuideVec>& guide,
const Mat_<SrcVec>& source,
Mat& dst,
double sigmaSpatial, double sigmaColor,
int mode, int numPasses
)
{
DTFilterCPU *dtf = DTFilterCPU::create_p_<GuideVec>(guide, sigmaSpatial, sigmaColor, mode, numPasses);
dtf->filter_<SrcVec>(source, dst);
delete dtf;
}
template<typename GuideVec, typename SrcVec>
void domainTransformFilter( const Mat& guide,
const Mat& source,
Mat& dst,
double sigmaSpatial, double sigmaColor,
int mode, int numPasses
)
{
DTFilterCPU *dtf = DTFilterCPU::create_p_<GuideVec>(guide, sigmaSpatial, sigmaColor, mode, numPasses);
dtf->filter_<SrcVec>(source, dst);
delete dtf;
}
}
#endif

@ -0,0 +1,829 @@
#include "precomp.hpp"
#include "dtfilter_ocl.hpp"
#include "modules/edgefilter/opencl_kernels.hpp"
//#include <opencv2/highgui.hpp>
using namespace std;
#define MEASURE_TIME(counter, code) { counter -= getTickCount(); {code;} counter += getTickCount(); }
#define MEASURE_KER_TIME(code) { kernelsTime -= getTickCount(); code; kernelsTime += getTickCount(); }
namespace cv
{
int64 DTFilterOCL::totalKernelsTime = 0;
Ptr<DTFilterOCL> DTFilterOCL::create(InputArray guide, double sigmaSpatial, double sigmaColor, int mode, int numIters)
{
DTFilterOCL *p = new DTFilterOCL();
p->init(guide, sigmaSpatial, sigmaColor, mode, numIters);
return Ptr<DTFilterOCL>(p);
}
void DTFilterOCL::init(InputArray guide_, double sigmaSpatial_, double sigmaColor_, int mode_, int numIters_)
{
CV_Assert( guide_.channels() <= 4 && (guide_.depth() == CV_8U || guide_.depth() == CV_32F) );
CV_Assert( numIters_ <= 3);
mode = mode_;
numIters = numIters_;
sigmaSpatial = std::max(1.00f, (float)sigmaSpatial_);
sigmaColor = std::max(0.01f, (float)sigmaColor_);
kernelsTime = 0;
singleFilterCall = false;
h = guide_.rows();
w = guide_.cols();
NC_ocl_implememtation = ALG_INDEX;
UMat guide = guide_.getUMat();
initProgram();
setBuildOptionsDT(guide);
if (mode == DTF_NC)
{
initDT_NC(guide);
}
else if (mode == DTF_RF)
{
initDT_RF(guide);
}
else if (mode == DTF_IC)
{
initDT_IC(guide);
}
else
{
CV_Error(Error::StsBadFlag, "Incorrect DT filter mode");
}
}
void DTFilterOCL::initDT_NC(UMat &guide)
{
bool useIDT = false;
if (useIDT)
{
UMat guideT;
MEASURE_KER_TIME( transpose(guide, guideT); );
computeIDT(guide, guideT);
if (NC_ocl_implememtation == ALG_INDEX)
{
computeIndexMat(idistHor, distIndexHor);
computeIndexMat(idistVert, distIndexVert);
idistHor.release();
idistVert.release();
}
}
else
{
UMat dsitHorWrapper, distVertWrapper;
computeDT(guide, dsitHorWrapper, distVertWrapper);
distHor = dsitHorWrapper(Range::all(), Range(1, w + 1));
MEASURE_KER_TIME( transpose(distVertWrapper, distVertWrapper); );
distVert = distVertWrapper(Range::all(), Range(1, h + 1));
if (NC_ocl_implememtation == ALG_INDEX)
{
computeIndexMat(distHor, distIndexHor, false);
computeIndexMat(distVert, distIndexVert, false);
distHor.release();
distVert.release();
}
}
}
void DTFilterOCL::initDT_RF(UMat &guide)
{
float sigmaRatio = (float)(sigmaSpatial / sigmaColor);
float alpha1 = getIterAlpha(1);
ocl::Kernel kerHor("compute_a0DT_vert", kerProgSrcDT, buildOptionsDT);
ocl::Kernel kerVert("compute_a0DT_vert", kerProgSrcDT, buildOptionsDT);
CV_Assert(!kerHor.empty() && !kerVert.empty());
UMat guideT;
transpose(guide, guideT);
a0distHor.create(guide.cols - 1, guide.rows, CV_32FC1);
a0distVert.create(guide.rows - 1, guide.cols, CV_32FC1);
kerHor.args(
ocl::KernelArg::ReadOnly(guideT),
ocl::KernelArg::ReadWriteNoSize(a0distHor),
sigmaRatio, alpha1);
kerVert.args(
ocl::KernelArg::ReadOnly(guide),
ocl::KernelArg::ReadWriteNoSize(a0distVert),
sigmaRatio, alpha1);
size_t globalSizeHor[] = { guide.cols - 1, guide.rows };
size_t globalSizeVert[] = { guide.rows - 1, guide.cols };
size_t localSizeHor[] = { 1, getMaxWorkGropSize() };
size_t localSizeVert[] = { 1, getMaxWorkGropSize() };
bool sync = true;
MEASURE_KER_TIME(kerHor.run(2, globalSizeHor, localSizeHor, sync));
MEASURE_KER_TIME(kerVert.run(2, globalSizeVert, localSizeVert, sync));
}
void DTFilterOCL::initDT_IC(UMat& guide)
{
UMat distHorWrapper, distVertWrapper;
computeDT(guide, distHorWrapper, distVertWrapper);
distHor = distHorWrapper(Range::all(), Range(1, w + 1));
distVert = distVertWrapper(Range(1, 1 + h), Range::all());
UMat distHorWrapperT;
MEASURE_KER_TIME( transpose(distHorWrapper, distHorWrapperT); );
distHorT = distHorWrapperT(Range(1, w + 1), Range::all());
UMat distVertWrapperT;
MEASURE_KER_TIME( transpose(distVertWrapper, distVertWrapperT); );
distVertT = distVertWrapperT(Range::all(), Range(1, h + 1));
computeIndexAndTailMat(distHor, distIndexHor, tailHor);
computeIndexAndTailMat(distVertT, distIndexVert, tailVert);
}
void DTFilterOCL::filter(InputArray src_, OutputArray dst_, int dDepth)
{
CV_Assert( src_.channels() <= 4 && (src_.depth() == CV_8U || src_.depth() == CV_32F) );
CV_Assert( src_.cols() == w && src_.rows() == h );
setBuildOptionsFlt(src_);
if (dDepth == -1) dDepth = src_.depth();
if (mode == DTF_NC)
{
UMat src = UMatAligned(h, w, workType);
UMat dst;
MEASURE_KER_TIME( src_.getUMat().convertTo(src, workType) );
if (NC_ocl_implememtation == ALG_PER_ROW)
{
filterNC_perRowAlg(src, dst);
}
else if (NC_ocl_implememtation == ALG_PER_PIXEL)
{
filterNC_PerPixelAlg(src, dst);
}
else if (NC_ocl_implememtation == ALG_INDEX)
{
filterNC_IndexAlg(src, dst);
}
dst.convertTo(dst_, dDepth);
}
else if (mode == DTF_RF)
{
filterRF(src_, dst_, dDepth);
}
else if (mode == DTF_IC)
{
filterIC(src_, dst_, dDepth);
}
else
{
CV_Error(Error::StsBadFlag, "Incorrect DT filter mode");
}
}
void DTFilterOCL::initProgram()
{
//CV_Assert(ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU);
kerProgSrcDT = ocl::edgefilter::dtfilter_dt_oclsrc;
kerProgSrcFilter = ocl::edgefilter::dtfilter_flt_oclsrc;
}
DTFilterOCL::DTFilterOCL()
: a0distHor(distHor), a0distVert(distVert)
{
}
DTFilterOCL::~DTFilterOCL()
{
totalKernelsTime += kernelsTime;
//std::cout << "Kernels time " << kernelsTime / getTickFrequency() << " sec.\n";
//std::cout << "Total kernels time " << totalKernelsTime / getTickFrequency() << " sec.\n";
}
void DTFilterOCL::integrateCols(UMat& src, UMat& isrc)
{
CV_Assert(src.depth() == CV_32F);
int sizeof_float4 = 16;
int reqChuncks4f = (src.cols*src.elemSize() + sizeof_float4 - 1) / sizeof_float4;
int maxChuncks4f = src.step.p[0] / sizeof_float4;
CV_Assert(reqChuncks4f <= maxChuncks4f);
createAligned(isrc, src.rows + 1, src.cols, src.type(), sizeof_float4, USAGE_ALLOCATE_DEVICE_MEMORY);
ocl::Kernel ker = ocl::Kernel("integrate_cols_4f", kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!ker.empty());
ker.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(isrc), (int)src.rows, reqChuncks4f);
size_t globKerSize4f[] = { reqChuncks4f };
bool oclKerStatus;
MEASURE_KER_TIME( oclKerStatus = ker.run(1, globKerSize4f, NULL, true); );
CV_Assert(oclKerStatus);
}
void DTFilterOCL::computeIndexMat(UMat& _dist, UMat& distIndex, bool useIDT)
{
distIndex.create(_dist.rows*numIters, 2*_dist.cols, CV_32SC1);
const char *kernelName = (useIDT) ? "find_conv_bounds_by_idt" : "find_conv_bounds_by_dt";
ocl::Kernel ker = ocl::Kernel(kernelName, kerProgSrcDT, buildOptionsDT);
ker.args(
ocl::KernelArg::ReadOnly(_dist),
ocl::KernelArg::WriteOnlyNoSize(distIndex),
getIterRadius(1), getIterRadius(2), getIterRadius(3)
);
size_t globSize[] = { _dist.rows, _dist.cols };
size_t localSize[] = { 1, getMaxWorkGropSize() };
bool oclKernelStatus;
MEASURE_KER_TIME(oclKernelStatus = ker.run(2, globSize, localSize, true));
CV_Assert(oclKernelStatus);
}
void DTFilterOCL::computeIndexAndTailMat(UMat& dist, UMat& distIndex, UMat& tail)
{
distIndex.create(dist.rows*numIters, 2*dist.cols, CV_32SC1);
tail.create(dist.rows*numIters, 2*dist.cols, CV_32FC1);
ocl::Kernel ker = ocl::Kernel("find_conv_bounds_and_tails", kerProgSrcDT, buildOptionsDT);
CV_Assert(!ker.empty());
ker.args(
ocl::KernelArg::ReadOnly(dist),
ocl::KernelArg::WriteOnlyNoSize(distIndex),
ocl::KernelArg::WriteOnlyNoSize(tail),
getIterRadius(1), getIterRadius(2), getIterRadius(3)
);
size_t globSize[] = {dist.rows, dist.cols};
size_t localSize[] = {1, getMaxWorkGropSize()};
bool oclKerStatus;
MEASURE_KER_TIME( oclKerStatus = ker.run(2, globSize, localSize, true); );
CV_Assert(oclKerStatus);
}
int DTFilterOCL::getMaxWorkGropSize()
{
int maxWorkGropSize = ocl::Device::getDefault().maxWorkGroupSize();
return (maxWorkGropSize <= 0) ? 256 : maxWorkGropSize;
}
int DTFilterOCL::getCacheLineSize()
{
int lineSize = ocl::Device::getDefault().globalMemCacheLineSize();
return (lineSize <= 0) ? 128 : lineSize;
}
void DTFilterOCL::filterRF(InputArray src_, OutputArray dst_, int dDepth)
{
UMat res = UMatAligned(h, w, workType, getCacheLineSize());
UMat resT = UMatAligned(w, h, workType, getCacheLineSize());
UMat adistHor, adistVert;
MEASURE_KER_TIME(src_.getUMat().convertTo(res, workType));
if (singleFilterCall || numIters == 1)
{
adistHor = a0distHor;
adistVert = a0distVert;
}
else
{
a0distHor.copyTo(adistHor);
a0distVert.copyTo(adistVert);
}
if (true)
{
filterRF_blockAlg(res, resT, adistHor, adistVert);
}
else
{
filterRF_naiveAlg(res, resT, adistHor, adistVert);
}
res.convertTo(dst_, dDepth);
}
void DTFilterOCL::filterRF_blockAlg(UMat& res, UMat& resT, UMat& adistHor, UMat& adistVert)
{
resT.create(res.cols, res.rows, res.type());
UMat weights = UMatAligned(res.rows, res.cols, CV_32FC1, getCacheLineSize(), USAGE_ALLOCATE_DEVICE_MEMORY);
UMat weightsT = UMatAligned(res.cols, res.rows, CV_32FC1, getCacheLineSize(), USAGE_ALLOCATE_DEVICE_MEMORY);
for (int iter = 1; iter <= numIters; iter++)
{
transpose(res, resT);
filterRF_iter_vert_pass(resT, adistHor, weightsT);
transpose(resT, res);
filterRF_iter_vert_pass(res, adistVert, weights);
if (iter < numIters)
{
cv::pow(adistHor, 2, adistHor);
cv::pow(adistVert, 2, adistVert);
}
}
}
void DTFilterOCL::filterRF_iter_vert_pass(UMat& res, UMat& adist, UMat& weights)
{
int blockSize = (int)std::sqrt((float)res.rows);
int blocksCount = (res.rows + blockSize-1)/blockSize;
bool sync = true;
for (int passId = 0; passId <= 1; passId++)
{
{
char *knames[] = {"filter_RF_block_init_fwd", "filter_RF_block_init_bwd"};
ocl::Kernel kerInit(knames[passId], kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!kerInit.empty());
kerInit.args(
ocl::KernelArg::ReadWrite(res),
ocl::KernelArg::ReadWriteNoSize(adist),
ocl::KernelArg::WriteOnlyNoSize(weights),
blockSize);
size_t globSize[] = { blocksCount, res.cols };
size_t localSize[] = { 1, getMaxWorkGropSize() };
CV_Assert(kerInit.run(2, globSize, localSize, sync));
//imwrite(String(knames[passId]) + ".png", res.t());
}
{
const char *knames[] = {"filter_RF_block_fill_borders_fwd", "filter_RF_block_fill_borders_bwd"};
ocl::Kernel kerFillBorders(knames[passId], kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!kerFillBorders.empty());
kerFillBorders.args(
ocl::KernelArg::ReadWrite(res),
ocl::KernelArg::ReadOnlyNoSize(weights),
blockSize);
size_t globSize[] = { res.cols };
CV_Assert(kerFillBorders.run(1, globSize, NULL, sync));
//imwrite(String(knames[passId]) + ".png", res.t());
}
{
const char *knames[] = {"filter_RF_block_fill_fwd", "filter_RF_block_fill_bwd"};
ocl::Kernel kerFill(knames[passId], kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!kerFill.empty());
kerFill.args(
ocl::KernelArg::ReadWrite(res),
ocl::KernelArg::ReadOnlyNoSize(weights),
blockSize);
size_t globSize[] = { res.rows - blockSize, res.cols };
size_t localSize[] = { 1, getMaxWorkGropSize() };
CV_Assert(kerFill.run(2, globSize, localSize, sync));
//imwrite(String(knames[passId]) + ".png", res.t());
}
}
}
void DTFilterOCL::filterRF_naiveAlg(UMat& res, UMat& resT, UMat& adistHor, UMat& adistVert)
{
ocl::Kernel kerHor("filter_RF_vert", kerProgSrcFilter, buildOptionsFlt);
ocl::Kernel kerVert("filter_RF_vert", kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!kerHor.empty() && !kerVert.empty());
size_t globSizeHor[] = { h };
size_t globSizeVert[] = { w };
bool syncQueue = true;
for (int iter = 1; iter <= numIters; iter++)
{
bool statusHorPass, statusVertPass;
int write_a0 = (iter < numIters);
MEASURE_KER_TIME(transpose(res, resT));
kerHor.args(ocl::KernelArg::ReadWrite(resT), ocl::KernelArg::ReadWriteNoSize(adistHor), write_a0);
MEASURE_KER_TIME(statusHorPass = kerHor.run(1, globSizeHor, NULL, syncQueue));
MEASURE_KER_TIME(transpose(resT, res));
kerVert.args(ocl::KernelArg::ReadWrite(res), ocl::KernelArg::ReadWriteNoSize(adistVert), write_a0);
MEASURE_KER_TIME(statusVertPass = kerVert.run(1, globSizeVert, NULL, syncQueue));
CV_Assert(statusHorPass && statusVertPass);
}
}
void DTFilterOCL::filterNC_IndexAlg(UMat& src, UMat& dst)
{
CV_Assert(src.rows == h && src.cols == w);
UMat srcT = UMatAligned(w, h, workType);
UMat isrc, isrcT;
ocl::Kernel kerFiltHor = ocl::Kernel("filter_NC_hor_by_bounds", kerProgSrcFilter, buildOptionsFlt);
ocl::Kernel kerFiltVert = ocl::Kernel("filter_NC_hor_by_bounds", kerProgSrcFilter, buildOptionsFlt);
size_t globSizeHor[] = {h, w};
size_t globSizeVert[] = {w, h};
size_t localSize[] = {1, getMaxWorkGropSize()};
bool sync = true;
MEASURE_KER_TIME( transpose(src, srcT) );
for (int iter = 0; iter < numIters; iter++)
{
bool kerStatus = true;
MEASURE_KER_TIME( integrateCols(srcT, isrcT) );
MEASURE_KER_TIME( transpose(isrcT, isrc) );
kerFiltHor.args(
ocl::KernelArg::ReadOnlyNoSize(isrc),
ocl::KernelArg::ReadOnlyNoSize(distIndexHor),
ocl::KernelArg::WriteOnly(src),
iter
);
MEASURE_KER_TIME( kerStatus &= kerFiltHor.run(2, globSizeHor, localSize, sync) );
MEASURE_KER_TIME( integrateCols(src, isrc) );
MEASURE_KER_TIME( transpose(isrc, isrcT) );
kerFiltVert.args(
ocl::KernelArg::ReadOnlyNoSize(isrcT),
ocl::KernelArg::ReadOnlyNoSize(distIndexVert),
ocl::KernelArg::WriteOnly(srcT),
iter
);
MEASURE_KER_TIME( kerStatus &= kerFiltVert.run(2, globSizeVert, localSize, sync) );
CV_Assert(kerStatus);
}
MEASURE_KER_TIME( transpose(srcT, dst) );
}
void DTFilterOCL::filterNC_perRowAlg(UMat& src, UMat& dst)
{
dst.create(h, w, workType);
UMat& res = dst;
UMat srcT = UMat(w, h, workType);
UMat resT = UMat(w, h, workType);
MEASURE_KER_TIME(transpose(src, srcT));
ocl::Kernel kerHor("filter_NC_vert", kerProgSrcFilter, buildOptionsFlt);
ocl::Kernel kerVert("filter_NC_vert", kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!kerHor.empty() && !kerVert.empty());
size_t globSizeHor[] = { h };
size_t globSizeVert[] = { w };
bool syncQueue = true;
for (int iter = 1; iter <= numIters; iter++)
{
bool statusHorPass, statusVertPass;
float radius = getIterRadius(iter);
kerHor.args(
ocl::KernelArg::ReadOnly(srcT),
ocl::KernelArg::ReadOnlyNoSize(idistHor),
ocl::KernelArg::WriteOnlyNoSize(resT),
radius
);
MEASURE_KER_TIME(statusHorPass = kerHor.run(1, globSizeHor, NULL, syncQueue));
MEASURE_KER_TIME(transpose(resT, src));
kerVert.args(
ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::ReadOnlyNoSize(idistVert),
ocl::KernelArg::WriteOnlyNoSize(res),
radius
);
MEASURE_KER_TIME(statusVertPass = kerVert.run(1, globSizeVert, NULL, syncQueue));
if (iter < numIters)
MEASURE_KER_TIME(transpose(res, srcT));
CV_Assert(statusHorPass && statusVertPass);
}
}
void DTFilterOCL::filterNC_PerPixelAlg(UMat& src, UMat& dst)
{
dst = src;
UMat res(h, w, workType);
UMat srcT(w, h, workType), resT(w, h, workType);
ocl::Kernel kerHor("filter_NC_hor", kerProgSrcFilter);
ocl::Kernel kerVert("filter_NC_hor", kerProgSrcFilter);
CV_Assert(!kerHor.empty() && !kerVert.empty());
size_t globSizeHor[] = { h, w };
size_t globSizeVert[] = { w, h };
size_t localSize[] = { 1, getMaxWorkGropSize() };
bool sync = true;
for (int iter = 1; iter <= numIters; iter++)
{
bool kerStatus = true;
float radius = getIterRadius(iter);
kerHor.args(
ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::ReadOnlyNoSize(idistHor),
ocl::KernelArg::WriteOnlyNoSize(res),
radius
);
MEASURE_KER_TIME(kerStatus &= kerHor.run(2, globSizeHor, localSize, sync));
MEASURE_KER_TIME(transpose(res, srcT));
kerVert.args(
ocl::KernelArg::ReadOnly(srcT),
ocl::KernelArg::ReadOnlyNoSize(idistVert),
ocl::KernelArg::WriteOnlyNoSize(resT),
radius
);
MEASURE_KER_TIME(kerStatus &= kerVert.run(2, globSizeVert, localSize, sync));
MEASURE_KER_TIME(transpose(resT, src));
CV_Assert(kerStatus);
}
}
void DTFilterOCL::filterIC(InputArray src_, OutputArray dst_, int dDepth)
{
UMat srcWrapper(h + 1, w + 2, workType);
UMat srcTWrapper(w + 1, h + 2, workType);
UMat src = srcWrapper(Range(1, 1 + h), Range(1, 1 + w));
UMat srcT = srcTWrapper(Range(1, 1 + w), Range(1, 1 + h));
UMat isrc = UMatAligned(h, w + 1, workType);
UMat isrcT = UMatAligned(w + 1, h, workType);
UMat res = UMatAligned(h, w, workType);
UMat resT = UMatAligned(w, h, workType);
MEASURE_KER_TIME( src_.getUMat().convertTo(src, workType) );
ocl::Kernel kerHor = ocl::Kernel("filter_IC_hor", kerProgSrcFilter, buildOptionsFlt);
ocl::Kernel kerVert = ocl::Kernel("filter_IC_hor", kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!kerHor.empty() && !kerVert.empty());
size_t globSizeHor[] = {h, w};
size_t globSizeVert[] = {w, h};
size_t localSize[] = {1, getMaxWorkGropSize()};
bool sync = true;
MEASURE_KER_TIME( transpose(src, resT) );
for (int iter = 0; iter < numIters; iter++)
{
bool oclKerStatus = true;
float curIterRadius = getIterRadius(iter);
integrateColsIC(resT, distHorT, isrcT);
MEASURE_KER_TIME( transpose(isrcT, isrc); );
if (iter != 0) MEASURE_KER_TIME( transpose(resT, src); );
duplicateVerticalBorders(srcWrapper);
kerHor.args(
ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::ReadOnlyNoSize(isrc),
ocl::KernelArg::ReadOnlyNoSize(distIndexHor),
ocl::KernelArg::ReadOnlyNoSize(tailHor),
ocl::KernelArg::ReadOnlyNoSize(distHor),
ocl::KernelArg::WriteOnlyNoSize(res),
curIterRadius, iter
);
MEASURE_KER_TIME( oclKerStatus &= kerHor.run(2, globSizeHor, localSize, sync); );
integrateColsIC(res, distVert, isrc);
MEASURE_KER_TIME( transpose(isrc, isrcT); );
MEASURE_KER_TIME( transpose(res, srcT); );
duplicateVerticalBorders(srcTWrapper);
kerVert.args(
ocl::KernelArg::ReadOnly(srcT),
ocl::KernelArg::ReadOnlyNoSize(isrcT),
ocl::KernelArg::ReadOnlyNoSize(distIndexVert),
ocl::KernelArg::ReadOnlyNoSize(tailVert),
ocl::KernelArg::ReadOnlyNoSize(distVertT),
ocl::KernelArg::WriteOnlyNoSize(resT),
curIterRadius, iter
);
MEASURE_KER_TIME( oclKerStatus &= kerVert.run(2, globSizeVert, localSize, sync); );
CV_Assert(oclKerStatus);
}
if (dDepth == resT.depth())
{
transpose(resT, dst_);
}
else
{
transpose(resT, res);
res.convertTo(dst_, dDepth);
}
}
void DTFilterOCL::setBuildOptionsFlt(InputArray src_)
{
int cn = src_.channels();
//int depth = src_.depth();
workType = CV_MAKE_TYPE(CV_32F, cn);
buildOptionsFlt = format(
"-D cn=%d "
"-D SrcVec=%s "
"-D NUM_ITERS=%d "
,
cn,
ocl::typeToStr(workType),
numIters
);
//std::cout << "build options flt.: " << buildOptionsFlt.c_str() << "\n";
}
void DTFilterOCL::setBuildOptionsDT(UMat &guide)
{
int gType = guide.type();
int gcn = guide.channels();
int gDepth = guide.depth();
char strBuf[40];
buildOptionsDT = format(
"-D NUM_ITERS=%d "
"-D cn=%d "
"-D GuideType=%s "
"-D GuideVec=%s "
,
numIters,
gcn,
ocl::typeToStr(gDepth),
ocl::typeToStr(gType)
);
if (gDepth != CV_32F)
{
buildOptionsDT += format("-D convert_guide=%s ", ocl::convertTypeStr(gDepth, CV_32F, gcn, strBuf));
}
//cout << "buildOptions:" << buildOptionsDT.c_str() << "\n";
}
void DTFilterOCL::computeDT(UMat& guide, UMat& distHorOuter, UMat& distVertOuter)
{
ocl::Kernel kerHor = ocl::Kernel("compute_dt_hor", kerProgSrcDT, buildOptionsDT);
ocl::Kernel kerVert = ocl::Kernel("compute_dt_vert", kerProgSrcDT, buildOptionsDT);
CV_Assert(!kerHor.empty() && !kerVert.empty());
distHorOuter.create(h, w + 1, CV_32FC1);
distVertOuter.create(h + 1, w, CV_32FC1);
UMat distHor = distHorOuter(Range::all(), Range(1, w + 1));
UMat distVert = distVertOuter(Range(1, h + 1), Range::all());
float maxRadius = 1.1f*getIterRadius(1);
float sigmaRatio = (float)(sigmaSpatial/sigmaColor);
kerHor.args(
ocl::KernelArg::ReadOnly(guide),
ocl::KernelArg::WriteOnlyNoSize(distHor),
sigmaRatio,
maxRadius
);
kerVert.args(
ocl::KernelArg::ReadOnly(guide),
ocl::KernelArg::WriteOnlyNoSize(distVert),
sigmaRatio,
maxRadius
);
size_t globSizeHor[] = {h, w + 1};
size_t globSizeVert[] = {h + 1, w};
size_t localSize[] = {1, getMaxWorkGropSize()};
bool sync = true;
bool oclKernelStatus = true;
MEASURE_KER_TIME( oclKernelStatus &= kerHor.run(2, globSizeHor, localSize, sync); );
MEASURE_KER_TIME( oclKernelStatus &= kerVert.run(2, globSizeVert, localSize, sync); );
CV_Assert(oclKernelStatus);
}
void DTFilterOCL::computeIDT(UMat& guide, UMat& guideT)
{
ocl::Kernel kerHor("compute_idt_vert", kerProgSrcDT, buildOptionsDT);
ocl::Kernel kerVert("compute_idt_vert", kerProgSrcDT, buildOptionsDT);
CV_Assert(!kerHor.empty() && !kerVert.empty());
UMat idistHorWrap(w + 2, h, CV_32FC1);
UMat idistVertWrap(h + 2, w, CV_32FC1);
idistHor = idistHorWrap(Range(1, 1 + w), Range::all());
idistVert = idistVertWrap(Range(1, 1 + h), Range::all());
float sigmaRatio = (float)(sigmaSpatial / sigmaColor);
kerHor.args(ocl::KernelArg::ReadOnly(guideT),
ocl::KernelArg::WriteOnlyNoSize(idistHor),
sigmaRatio);
kerVert.args(ocl::KernelArg::ReadOnly(guide),
ocl::KernelArg::WriteOnlyNoSize(idistVert),
sigmaRatio);
size_t globalSizeHor[] = { guide.rows };
size_t globalSizeVert[] = { guide.cols };
bool sync = true;
MEASURE_KER_TIME(kerHor.run(1, globalSizeHor, NULL, sync));
MEASURE_KER_TIME(kerVert.run(1, globalSizeVert, NULL, sync));
if (NC_ocl_implememtation != ALG_PER_ROW)
{
MEASURE_KER_TIME(transpose(idistHorWrap, idistHorWrap));
MEASURE_KER_TIME(transpose(idistVertWrap, idistVertWrap));
idistHor = idistHorWrap(Range::all(), Range(1, 1 + w));
idistVert = idistVertWrap(Range::all(), Range(1, 1 + h));
}
else
{
idistHor = idistHorWrap(Range(1, 1 + w), Range::all());
idistVert = idistVertWrap(Range(1, 1 + h), Range::all());
}
}
void DTFilterOCL::duplicateVerticalBorders(UMat& srcOuter)
{
Range rangeRows(0, srcOuter.rows);
{
UMat leftBorderSrc = srcOuter(rangeRows, Range(1, 2));
UMat leftBorderDst = srcOuter(rangeRows, Range(0, 1));
leftBorderSrc.copyTo(leftBorderDst);
}
{
UMat rightBorderSrc = srcOuter(rangeRows, Range(srcOuter.cols - 2, srcOuter.cols - 1));
UMat rightBorderDst = srcOuter(rangeRows, Range(srcOuter.cols - 1, srcOuter.cols));
rightBorderSrc.copyTo(rightBorderDst);
}
}
void DTFilterOCL::integrateColsIC(UMat& src, UMat& dist, UMat& isrc)
{
CV_Assert(src.size() == dist.size() && src.depth() == CV_32F);
isrc.create(src.rows + 1, src.cols, workType);
ocl::Kernel ker("integrate_cols_with_dist", kerProgSrcFilter, buildOptionsFlt);
CV_Assert(!ker.empty());
ker.args(
ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::ReadOnlyNoSize(dist),
ocl::KernelArg::WriteOnlyNoSize(isrc)
);
size_t globSize[] = {src.cols};
size_t localSize[] = {getMaxWorkGropSize()};
bool oclKernelStatus;
MEASURE_KER_TIME( oclKernelStatus = ker.run(1, globSize, localSize, true) );
CV_Assert(oclKernelStatus);
}
cv::UMat DTFilterOCL::UMatAligned(int rows, int cols, int type, int align, int usageFlags)
{
int depth = CV_MAT_DEPTH(type);
int cn = CV_MAT_CN(type);
int elemSize1 = CV_ELEM_SIZE1(type);
int elemSize = CV_ELEM_SIZE(type);
int lineSize = ((cols*elemSize + align-1) / align)*align;
CV_Assert(lineSize % elemSize1 == 0);
UMat dst(rows, lineSize/elemSize1, CV_MAKE_TYPE(depth, 1), usageFlags);
return dst(Range::all(), Range(0, cols*cn)).reshape(cn);
}
void DTFilterOCL::createAligned(UMat& src, int rows, int cols, int type, int align, int usageFlags)
{
if (src.empty() || src.rows != rows || src.cols != cols || src.type() != type)
{
if (CV_ELEM_SIZE(type)*cols % align == 0)
{
src.create(rows, cols, type, (UMatUsageFlags)usageFlags);
}
else
{
src = UMatAligned(rows, cols, type, align, usageFlags);
}
}
}
void DTFilterOCL::setSingleFilterCall(bool flag)
{
this->singleFilterCall = flag;
}
}

@ -0,0 +1,112 @@
#pragma once
#include "precomp.hpp"
namespace cv
{
class DTFilterOCL : public DTFilter
{
public:
static Ptr<DTFilterOCL> create(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
void filter(InputArray src, OutputArray dst, int dDepth = -1);
void setSingleFilterCall(bool flag);
~DTFilterOCL();
protected: /*Members declarations*/
UMat idistHor, idistVert;
UMat distHor, distVert;
UMat distHorT, distVertT;
UMat &a0distHor, &a0distVert; //synonyms of distHor distVert
UMat distIndexHor, distIndexVert;
UMat tailVert, tailHor;
int mode, numIters;
float sigmaSpatial, sigmaColor;
bool singleFilterCall;
int h, w;
int workType;
double meanDist;
static int64 totalKernelsTime;
int64 kernelsTime;
ocl::ProgramSource kerProgSrcDT;
ocl::ProgramSource kerProgSrcFilter;
cv::String buildOptionsFlt;
cv::String buildOptionsDT;
int NC_ocl_implememtation;
enum NCOclImplementation
{
ALG_PER_ROW,
ALG_PER_PIXEL,
ALG_INDEX,
};
protected: /*Functions declarations*/
DTFilterOCL();
void init(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3);
inline double getIterSigmaH(int iterNum)
{
return sigmaSpatial * std::pow(2.0, numIters - iterNum) / sqrt(std::pow(4.0, numIters) - 1);
}
inline float getIterRadius(int iterNum)
{
return (float)(3.0*getIterSigmaH(iterNum));
}
inline float getIterAlpha(int iterNum)
{
return (float)std::exp(-std::sqrt(2.0 / 3.0) / getIterSigmaH(iterNum));
}
int getMaxWorkGropSize();
int getCacheLineSize();
void initProgram();
void setBuildOptionsFlt(InputArray src_);
void setBuildOptionsDT(UMat& guide);
void initDT_NC(UMat& guide);
void initDT_RF(UMat& guide);
void initDT_IC(UMat& guide);
void computeDT(UMat& guide, UMat& distHorOuter, UMat& distVertOuter);
void computeIDT(UMat& guide, UMat& guideT);
void filterNC_IndexAlg(UMat& src, UMat& dst);
void filterNC_perRowAlg(UMat& src, UMat& dst);
void filterNC_PerPixelAlg(UMat& src, UMat& dst);
void filterIC(InputArray src_, OutputArray dst_, int dDepth);
void filterRF(InputArray src_, OutputArray dst_, int dDepth);
void filterRF_naiveAlg(UMat& res, UMat& resT, UMat& adistHor, UMat& adistVert);
void filterRF_blockAlg(UMat& res, UMat& resT, UMat& adistHor, UMat& adistVert);
void filterRF_iter_vert_pass(UMat& res, UMat& adist, UMat& weights);
void computeIndexMat(UMat& idist, UMat& distIndex, bool useIDT = true);
void integrateCols(UMat& src, UMat& isrc);
void computeIndexAndTailMat(UMat& dist, UMat& distIndex, UMat& tail);
void duplicateVerticalBorders(UMat& srcOuter);
void integrateColsIC(UMat& src, UMat& dist, UMat& isrc);
static UMat UMatAligned(int rows, int cols, int type, int align = 16, int useageFlags = USAGE_DEFAULT);
static void createAligned(UMat& src, int rows, int cols, int type, int align = 16, int usageFlags = USAGE_DEFAULT);
};
}

@ -0,0 +1,512 @@
#include "precomp.hpp"
#include "edgeaware_filters_common.hpp"
#include "dtfilter_cpu.hpp"
#include <opencv2/core/cvdef.h>
#include <opencv2/core/utility.hpp>
#include <cmath>
using namespace std;
#ifndef SQR
#define SQR(x) ((x)*(x))
#endif
#if defined(CV_SSE)
static volatile bool CPU_SUPPORT_SSE1 = cv::checkHardwareSupport(CV_CPU_SSE);
#endif
#ifdef CV_SSE2
static volatile bool CPU_SUPPORT_SSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
#endif
namespace cv
{
Ptr<DTFilter> createDTFilterRF(InputArray adistHor, InputArray adistVert, double sigmaSpatial, double sigmaColor, int numIters)
{
return Ptr<DTFilter>(DTFilterCPU::createRF(adistHor, adistVert, sigmaSpatial, sigmaColor, numIters));
}
int getTotalNumberOfChannels(InputArrayOfArrays src)
{
CV_Assert(src.isMat() || src.isUMat() || src.isMatVector() || src.isUMatVector());
if (src.isMat() || src.isUMat())
{
return src.channels();
}
else if (src.isMatVector())
{
int cnNum = 0;
const vector<Mat>& srcv = *static_cast<const vector<Mat>*>(src.getObj());
for (unsigned i = 0; i < srcv.size(); i++)
cnNum += srcv[i].channels();
return cnNum;
}
else if (src.isUMatVector())
{
int cnNum = 0;
const vector<UMat>& srcv = *static_cast<const vector<UMat>*>(src.getObj());
for (unsigned i = 0; i < srcv.size(); i++)
cnNum += srcv[i].channels();
return cnNum;
}
else
{
return 0;
}
}
void checkSameSizeAndDepth(InputArrayOfArrays src, Size &sz, int &depth)
{
CV_Assert(src.isMat() || src.isUMat() || src.isMatVector() || src.isUMatVector());
if (src.isMat() || src.isUMat())
{
CV_Assert(!src.empty());
sz = src.size();
depth = src.depth();
}
else if (src.isMatVector())
{
const vector<Mat>& srcv = *static_cast<const vector<Mat>*>(src.getObj());
CV_Assert(srcv.size() > 0);
for (unsigned i = 0; i < srcv.size(); i++)
{
CV_Assert(srcv[i].depth() == srcv[0].depth());
CV_Assert(srcv[i].size() == srcv[0].size());
}
sz = srcv[0].size();
depth = srcv[0].depth();
}
else if (src.isUMatVector())
{
const vector<UMat>& srcv = *static_cast<const vector<UMat>*>(src.getObj());
CV_Assert(srcv.size() > 0);
for (unsigned i = 0; i < srcv.size(); i++)
{
CV_Assert(srcv[i].depth() == srcv[0].depth());
CV_Assert(srcv[i].size() == srcv[0].size());
}
sz = srcv[0].size();
depth = srcv[0].depth();
}
}
namespace eaf
{
inline float getFloatSignBit()
{
union
{
int signInt;
float signFloat;
};
signInt = 0x80000000;
return signFloat;
}
void add_(register float *dst, register float *src1, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
b = _mm_loadu_ps(dst + j);
b = _mm_add_ps(b, a);
_mm_storeu_ps(dst + j, b);
}
}
#endif
for (; j < w; j++)
dst[j] += src1[j];
}
void mul(register float *dst, register float *src1, register float *src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
b = _mm_loadu_ps(src2 + j);
b = _mm_mul_ps(a, b);
_mm_storeu_ps(dst + j, b);
}
}
#endif
for (; j < w; j++)
dst[j] = src1[j] * src2[j];
}
void mul(register float *dst, register float *src1, float src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b;
b = _mm_set_ps1(src2);
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
a = _mm_mul_ps(a, b);
_mm_storeu_ps(dst + j, a);
}
}
#endif
for (; j < w; j++)
dst[j] = src1[j]*src2;
}
void mad(register float *dst, register float *src1, float alpha, float beta, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b, c;
a = _mm_set_ps1(alpha);
b = _mm_set_ps1(beta);
for (; j < w - 3; j += 4)
{
c = _mm_loadu_ps(src1 + j);
c = _mm_mul_ps(c, a);
c = _mm_add_ps(c, b);
_mm_storeu_ps(dst + j, c);
}
}
#endif
for (; j < w; j++)
dst[j] = alpha*src1[j] + beta;
}
void sqr_(register float *dst, register float *src1, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
a = _mm_mul_ps(a, a);
_mm_storeu_ps(dst + j, a);
}
}
#endif
for (; j < w; j++)
dst[j] = src1[j] * src1[j];
}
void sqr_dif(register float *dst, register float *src1, register float *src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 d;
for (; j < w - 3; j += 4)
{
d = _mm_sub_ps(_mm_loadu_ps(src1 + j), _mm_loadu_ps(src2 + j));
d = _mm_mul_ps(d, d);
_mm_storeu_ps(dst + j, d);
}
}
#endif
for (; j < w; j++)
dst[j] = (src1[j] - src2[j])*(src1[j] - src2[j]);
}
void add_mul(register float *dst, register float *src1, register float *src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b, c;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
b = _mm_loadu_ps(src2 + j);
b = _mm_mul_ps(b, a);
c = _mm_loadu_ps(dst + j);
c = _mm_add_ps(c, b);
_mm_storeu_ps(dst + j, c);
}
}
#endif
for (; j < w; j++)
{
dst[j] += src1[j] * src2[j];
}
}
void add_sqr(register float *dst, register float *src1, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, c;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
a = _mm_mul_ps(a, a);
c = _mm_loadu_ps(dst + j);
c = _mm_add_ps(c, a);
_mm_storeu_ps(dst + j, c);
}
}
#endif
for (; j < w; j++)
{
dst[j] += src1[j] * src1[j];
}
}
void add_sqr_dif(register float *dst, register float *src1, register float *src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, d;
for (; j < w - 3; j += 4)
{
d = _mm_sub_ps(_mm_loadu_ps(src1 + j), _mm_loadu_ps(src2 + j));
d = _mm_mul_ps(d, d);
a = _mm_loadu_ps(dst + j);
a = _mm_add_ps(a, d);
_mm_storeu_ps(dst + j, a);
}
}
#endif
for (; j < w; j++)
{
dst[j] += (src1[j] - src2[j])*(src1[j] - src2[j]);
}
}
void sub_mul(register float *dst, register float *src1, register float *src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b, c;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
b = _mm_loadu_ps(src2 + j);
b = _mm_mul_ps(b, a);
c = _mm_loadu_ps(dst + j);
c = _mm_sub_ps(c, b);
_mm_storeu_ps(dst + j, c);
}
}
#endif
for (; j < w; j++)
dst[j] -= src1[j] * src2[j];
}
void sub_mad(register float *dst, register float *src1, register float *src2, float c0, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b, c;
__m128 cnst = _mm_set_ps1(c0);
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
b = _mm_loadu_ps(src2 + j);
b = _mm_mul_ps(b, a);
c = _mm_loadu_ps(dst + j);
c = _mm_sub_ps(c, cnst);
c = _mm_sub_ps(c, b);
_mm_storeu_ps(dst + j, c);
}
}
#endif
for (; j < w; j++)
dst[j] -= src1[j] * src2[j] + c0;
}
void det_2x2(register float *dst, register float *a00, register float *a01, register float *a10, register float *a11, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b;
for (; j < w - 3; j += 4)
{
a = _mm_mul_ps(_mm_loadu_ps(a00 + j), _mm_loadu_ps(a11 + j));
b = _mm_mul_ps(_mm_loadu_ps(a01 + j), _mm_loadu_ps(a10 + j));
a = _mm_sub_ps(a, b);
_mm_storeu_ps(dst + j, a);
}
}
#endif
for (; j < w; j++)
dst[j] = a00[j]*a11[j] - a01[j]*a10[j];
}
void div_det_2x2(register float *a00, register float *a01, register float *a11, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
const __m128 SIGN_MASK = _mm_set_ps1(getFloatSignBit());
__m128 a, b, _a00, _a01, _a11;
for (; j < w - 3; j += 4)
{
_a00 = _mm_loadu_ps(a00 + j);
_a11 = _mm_loadu_ps(a11 + j);
a = _mm_mul_ps(_a00, _a11);
_a01 = _mm_loadu_ps(a01 + j);
_a01 = _mm_xor_ps(_a01, SIGN_MASK);
b = _mm_mul_ps(_a01, _a01);
a = _mm_sub_ps(a, b);
_a01 = _mm_div_ps(_a01, a);
_a00 = _mm_div_ps(_a00, a);
_a11 = _mm_div_ps(_a11, a);
_mm_storeu_ps(a01 + j, _a01);
_mm_storeu_ps(a00 + j, _a00);
_mm_storeu_ps(a11 + j, _a11);
}
}
#endif
for (; j < w; j++)
{
float det = a00[j] * a11[j] - a01[j] * a01[j];
a00[j] /= det;
a11[j] /= det;
a01[j] /= -det;
}
}
void div_1x(register float *a1, register float *b1, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 _a1, _b1;
for (; j < w - 3; j += 4)
{
_b1 = _mm_loadu_ps(b1 + j);
_a1 = _mm_loadu_ps(a1 + j);
_mm_storeu_ps(a1 + j, _mm_div_ps(_a1, _b1));
}
}
#endif
for (; j < w; j++)
{
a1[j] /= b1[j];
}
}
void inv_self(register float *src, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a;
for (; j < w - 3; j += 4)
{
a = _mm_rcp_ps(_mm_loadu_ps(src + j));
_mm_storeu_ps(src + j, a);
}
}
#endif
for (; j < w; j++)
{
src[j] = 1.0f / src[j];
}
}
void sqrt_(register float *dst, register float *src, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a;
for (; j < w - 3; j += 4)
{
a = _mm_sqrt_ps(_mm_loadu_ps(src + j));
_mm_storeu_ps(dst + j, a);
}
}
#endif
for (; j < w; j++)
dst[j] = sqrt(src[j]);
}
void min_(register float *dst, register float *src1, register float *src2, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 a, b;
for (; j < w - 3; j += 4)
{
a = _mm_loadu_ps(src1 + j);
b = _mm_loadu_ps(src2 + j);
b = _mm_min_ps(b, a);
_mm_storeu_ps(dst + j, b);
}
}
#endif
for (; j < w; j++)
dst[j] = std::min(src1[j], src2[j]);
}
void rf_vert_row_pass(register float *curRow, register float *prevRow, float alphaVal, int w)
{
register int j = 0;
#ifdef CV_SSE
if (CPU_SUPPORT_SSE1)
{
__m128 cur, prev, res;
__m128 alpha = _mm_set_ps1(alphaVal);
for (; j < w - 3; j += 4)
{
cur = _mm_loadu_ps(curRow + j);
prev = _mm_loadu_ps(prevRow + j);
res = _mm_mul_ps(alpha, _mm_sub_ps(prev, cur));
res = _mm_add_ps(res, cur);
_mm_storeu_ps(curRow + j, res);
}
}
#endif
for (; j < w; j++)
curRow[j] += alphaVal*(prevRow[j] - curRow[j]);
}
} //end of cv::eaf
} //end of cv

@ -0,0 +1,57 @@
#ifndef __EDGEAWAREFILTERS_COMMON_HPP__
#define __EDGEAWAREFILTERS_COMMON_HPP__
#ifdef __cplusplus
namespace cv
{
Ptr<DTFilter> createDTFilterRF(InputArray adistHor, InputArray adistVert, double sigmaSpatial, double sigmaColor, int numIters);
int getTotalNumberOfChannels(InputArrayOfArrays src);
void checkSameSizeAndDepth(InputArrayOfArrays src, Size &sz, int &depth);
namespace eaf
{
void add_(register float *dst, register float *src1, int w);
void mul(register float *dst, register float *src1, register float *src2, int w);
void mul(register float *dst, register float *src1, float src2, int w);
//dst = alpha*src + beta
void mad(register float *dst, register float *src1, float alpha, float beta, int w);
void add_mul(register float *dst, register float *src1, register float *src2, int w);
void sub_mul(register float *dst, register float *src1, register float *src2, int w);
void sub_mad(register float *dst, register float *src1, register float *src2, float c0, int w);
void det_2x2(register float *dst, register float *a00, register float *a01, register float *a10, register float *a11, int w);
void div_det_2x2(register float *a00, register float *a01, register float *a11, int w);
void div_1x(register float *a1, register float *b1, int w);
void inv_self(register float *src, int w);
void sqr_(register float *dst, register float *src1, int w);
void sqrt_(register float *dst, register float *src, int w);
void sqr_dif(register float *dst, register float *src1, register float *src2, int w);
void add_sqr_dif(register float *dst, register float *src1, register float *src2, int w);
void add_sqr(register float *dst, register float *src1, int w);
void min_(register float *dst, register float *src1, register float *src2, int w);
void rf_vert_row_pass(register float *curRow, register float *prevRow, float alphaVal, int w);
}
}
#endif
#endif

@ -0,0 +1,753 @@
#include "precomp.hpp"
#include <vector>
#include <iostream>
using std::vector;
#include "edgeaware_filters_common.hpp"
using namespace cv::eaf;
#ifdef _MSC_VER
# pragma warning(disable: 4512)
#endif
namespace cv
{
template <typename T>
struct SymArray2D
{
vector<T> vec;
int sz;
SymArray2D()
{
sz = 0;
}
void create(int sz_)
{
CV_DbgAssert(sz_ > 0);
sz = sz_;
vec.resize(total());
}
inline T& operator()(int i, int j)
{
CV_DbgAssert(i >= 0 && i < sz && j >= 0 && j < sz);
if (i < j) std::swap(i, j);
return vec[i*(i+1)/2 + j];
}
inline T& operator()(int i)
{
return vec[i];
}
int total() const
{
return sz*(sz + 1)/2;
}
void release()
{
vec.clear();
sz = 0;
}
};
template <typename XMat>
static void splitFirstNChannels(InputArrayOfArrays src, vector<XMat>& dst, int maxDstCn)
{
CV_Assert(src.isMat() || src.isUMat() || src.isMatVector() || src.isUMatVector());
if ( (src.isMat() || src.isUMat()) && src.channels() == maxDstCn )
{
split(src, dst);
}
else
{
Size sz;
int depth, totalCnNum;
checkSameSizeAndDepth(src, sz, depth);
totalCnNum = std::min(maxDstCn, getTotalNumberOfChannels(src));
dst.resize(totalCnNum);
vector<int> fromTo(2*totalCnNum);
for (int i = 0; i < totalCnNum; i++)
{
fromTo[i*2 + 0] = i;
fromTo[i*2 + 1] = i;
dst[i].create(sz, CV_MAKE_TYPE(depth, 1));
}
mixChannels(src, dst, fromTo);
}
}
class GuidedFilterImpl : public GuidedFilter
{
public:
static Ptr<GuidedFilterImpl> create(InputArray guide, int radius, double eps);
void filter(InputArray src, OutputArray dst, int dDepth = -1);
protected:
int radius;
double eps;
int h, w;
vector<Mat> guideCn;
vector<Mat> guideCnMean;
SymArray2D<Mat> covarsInv;
int gCnNum;
protected:
GuidedFilterImpl() {}
void init(InputArray guide, int radius, double eps);
void computeCovGuide(SymArray2D<Mat>& covars);
void computeCovGuideAndSrc(vector<Mat>& srcCn, vector<Mat>& srcCnMean, vector<vector<Mat> >& cov);
void getWalkPattern(int eid, int &cn1, int &cn2);
inline void meanFilter(Mat& src, Mat& dst)
{
boxFilter(src, dst, CV_32F, Size(2 * radius + 1, 2 * radius + 1), cv::Point(-1, -1), true, BORDER_REFLECT);
}
inline void convertToWorkType(Mat& src, Mat& dst)
{
src.convertTo(dst, CV_32F);
}
private: /*Routines to parallelize boxFilter and convertTo*/
typedef void (GuidedFilterImpl::*TransformFunc)(Mat& src, Mat& dst);
struct GFTransform_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
mutable vector<Mat*> src;
mutable vector<Mat*> dst;
TransformFunc func;
GFTransform_ParBody(GuidedFilterImpl& gf_, vector<Mat>& srcv, vector<Mat>& dstv, TransformFunc func_);
GFTransform_ParBody(GuidedFilterImpl& gf_, vector<vector<Mat> >& srcvv, vector<vector<Mat> >& dstvv, TransformFunc func_);
void operator () (const Range& range) const;
Range getRange() const
{
return Range(0, src.size());
}
};
template<typename V>
void parConvertToWorkType(V &src, V &dst)
{
GFTransform_ParBody pb(*this, src, dst, &GuidedFilterImpl::convertToWorkType);
parallel_for_(pb.getRange(), pb);
}
template<typename V>
void parMeanFilter(V &src, V &dst)
{
GFTransform_ParBody pb(*this, src, dst, &GuidedFilterImpl::meanFilter);
parallel_for_(pb.getRange(), pb);
}
private: /*Parallel body classes*/
inline void runParBody(const ParallelLoopBody& pb)
{
parallel_for_(Range(0, h), pb);
}
struct MulChannelsGuide_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
SymArray2D<Mat> &covars;
MulChannelsGuide_ParBody(GuidedFilterImpl& gf_, SymArray2D<Mat>& covars_)
: gf(gf_), covars(covars_) {}
void operator () (const Range& range) const;
};
struct ComputeCovGuideFromChannelsMul_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
SymArray2D<Mat> &covars;
ComputeCovGuideFromChannelsMul_ParBody(GuidedFilterImpl& gf_, SymArray2D<Mat>& covars_)
: gf(gf_), covars(covars_) {}
void operator () (const Range& range) const;
};
struct ComputeCovGuideInv_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
SymArray2D<Mat> &covars;
ComputeCovGuideInv_ParBody(GuidedFilterImpl& gf_, SymArray2D<Mat>& covars_);
void operator () (const Range& range) const;
};
struct MulChannelsGuideAndSrc_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
vector<vector<Mat> > &cov;
vector<Mat> &srcCn;
MulChannelsGuideAndSrc_ParBody(GuidedFilterImpl& gf_, vector<Mat>& srcCn_, vector<vector<Mat> >& cov_)
: gf(gf_), srcCn(srcCn_), cov(cov_) {}
void operator () (const Range& range) const;
};
struct ComputeCovFromSrcChannelsMul_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
vector<vector<Mat> > &cov;
vector<Mat> &srcCnMean;
ComputeCovFromSrcChannelsMul_ParBody(GuidedFilterImpl& gf_, vector<Mat>& srcCnMean_, vector<vector<Mat> >& cov_)
: gf(gf_), srcCnMean(srcCnMean_), cov(cov_) {}
void operator () (const Range& range) const;
};
struct ComputeAlpha_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
vector<vector<Mat> > &alpha;
vector<vector<Mat> > &covSrc;
ComputeAlpha_ParBody(GuidedFilterImpl& gf_, vector<vector<Mat> >& alpha_, vector<vector<Mat> >& covSrc_)
: gf(gf_), alpha(alpha_), covSrc(covSrc_) {}
void operator () (const Range& range) const;
};
struct ComputeBeta_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
vector<vector<Mat> > &alpha;
vector<Mat> &srcCnMean;
vector<Mat> &beta;
ComputeBeta_ParBody(GuidedFilterImpl& gf_, vector<vector<Mat> >& alpha_, vector<Mat>& srcCnMean_, vector<Mat>& beta_)
: gf(gf_), alpha(alpha_), srcCnMean(srcCnMean_), beta(beta_) {}
void operator () (const Range& range) const;
};
struct ApplyTransform_ParBody : public ParallelLoopBody
{
GuidedFilterImpl &gf;
vector<vector<Mat> > &alpha;
vector<Mat> &beta;
ApplyTransform_ParBody(GuidedFilterImpl& gf_, vector<vector<Mat> >& alpha_, vector<Mat>& beta_)
: gf(gf_), alpha(alpha_), beta(beta_) {}
void operator () (const Range& range) const;
};
};
void GuidedFilterImpl::MulChannelsGuide_ParBody::operator()(const Range& range) const
{
int total = covars.total();
for (int i = range.start; i < range.end; i++)
{
int c1, c2;
float *cov, *guide1, *guide2;
for (int k = 0; k < total; k++)
{
gf.getWalkPattern(k, c1, c2);
guide1 = gf.guideCn[c1].ptr<float>(i);
guide2 = gf.guideCn[c2].ptr<float>(i);
cov = covars(c1, c2).ptr<float>(i);
mul(cov, guide1, guide2, gf.w);
}
}
}
void GuidedFilterImpl::ComputeCovGuideFromChannelsMul_ParBody::operator()(const Range& range) const
{
int total = covars.total();
float diagSummand = (float)(gf.eps);
for (int i = range.start; i < range.end; i++)
{
int c1, c2;
float *cov, *guide1, *guide2;
for (int k = 0; k < total; k++)
{
gf.getWalkPattern(k, c1, c2);
guide1 = gf.guideCnMean[c1].ptr<float>(i);
guide2 = gf.guideCnMean[c2].ptr<float>(i);
cov = covars(c1, c2).ptr<float>(i);
if (c1 != c2)
{
sub_mul(cov, guide1, guide2, gf.w);
}
else
{
sub_mad(cov, guide1, guide2, -diagSummand, gf.w);
}
}
}
}
GuidedFilterImpl::ComputeCovGuideInv_ParBody::ComputeCovGuideInv_ParBody(GuidedFilterImpl& gf_, SymArray2D<Mat>& covars_)
: gf(gf_), covars(covars_)
{
gf.covarsInv.create(gf.gCnNum);
if (gf.gCnNum == 3)
{
for (int k = 0; k < 2; k++)
for (int l = 0; l < 3; l++)
gf.covarsInv(k, l).create(gf.h, gf.w, CV_32FC1);
////trick to avoid memory allocation
gf.covarsInv(2, 0).create(gf.h, gf.w, CV_32FC1);
gf.covarsInv(2, 1) = covars(2, 1);
gf.covarsInv(2, 2) = covars(2, 2);
return;
}
if (gf.gCnNum == 2)
{
gf.covarsInv(0, 0) = covars(1, 1);
gf.covarsInv(0, 1) = covars(0, 1);
gf.covarsInv(1, 1) = covars(0, 0);
return;
}
if (gf.gCnNum == 1)
{
gf.covarsInv(0, 0) = covars(0, 0);
return;
}
}
void GuidedFilterImpl::ComputeCovGuideInv_ParBody::operator()(const Range& range) const
{
if (gf.gCnNum == 3)
{
vector<float> covarsDet(gf.w);
float *det = &covarsDet[0];
for (int i = range.start; i < range.end; i++)
{
for (int k = 0; k < 3; k++)
for (int l = 0; l <= k; l++)
{
float *dst = gf.covarsInv(k, l).ptr<float>(i);
float *a00 = covars((k + 1) % 3, (l + 1) % 3).ptr<float>(i);
float *a01 = covars((k + 1) % 3, (l + 2) % 3).ptr<float>(i);
float *a10 = covars((k + 2) % 3, (l + 1) % 3).ptr<float>(i);
float *a11 = covars((k + 2) % 3, (l + 2) % 3).ptr<float>(i);
det_2x2(dst, a00, a01, a10, a11, gf.w);
}
for (int k = 0; k < 3; k++)
{
register float *a = covars(k, 0).ptr<float>(i);
register float *ac = gf.covarsInv(k, 0).ptr<float>(i);
if (k == 0)
mul(det, a, ac, gf.w);
else
add_mul(det, a, ac, gf.w);
}
for (int k = 0; k < gf.covarsInv.total(); k += 1)
{
div_1x(gf.covarsInv(k).ptr<float>(i), det, gf.w);
}
}
return;
}
if (gf.gCnNum == 2)
{
for (int i = range.start; i < range.end; i++)
{
float *a00 = gf.covarsInv(0, 0).ptr<float>(i);
float *a10 = gf.covarsInv(1, 0).ptr<float>(i);
float *a11 = gf.covarsInv(1, 1).ptr<float>(i);
div_det_2x2(a00, a10, a11, gf.w);
}
return;
}
if (gf.gCnNum == 1)
{
//divide(1.0, covars(0, 0)(range, Range::all()), gf.covarsInv(0, 0)(range, Range::all()));
//return;
for (int i = range.start; i < range.end; i++)
{
float *res = covars(0, 0).ptr<float>(i);
inv_self(res, gf.w);
}
return;
}
}
void GuidedFilterImpl::MulChannelsGuideAndSrc_ParBody::operator()(const Range& range) const
{
int srcCnNum = (int)srcCn.size();
for (int i = range.start; i < range.end; i++)
{
for (int si = 0; si < srcCnNum; si++)
{
int step = (si % 2) * 2 - 1;
int start = (si % 2) ? 0 : gf.gCnNum - 1;
int end = (si % 2) ? gf.gCnNum : -1;
float *srcLine = srcCn[si].ptr<float>(i);
for (int gi = start; gi != end; gi += step)
{
float *guideLine = gf.guideCn[gi].ptr<float>(i);
float *dstLine = cov[si][gi].ptr<float>(i);
mul(dstLine, srcLine, guideLine, gf.w);
}
}
}
}
void GuidedFilterImpl::ComputeCovFromSrcChannelsMul_ParBody::operator()(const Range& range) const
{
int srcCnNum = (int)srcCnMean.size();
for (int i = range.start; i < range.end; i++)
{
for (int si = 0; si < srcCnNum; si++)
{
int step = (si % 2) * 2 - 1;
int start = (si % 2) ? 0 : gf.gCnNum - 1;
int end = (si % 2) ? gf.gCnNum : -1;
register float *srcMeanLine = srcCnMean[si].ptr<float>(i);
for (int gi = start; gi != end; gi += step)
{
float *guideMeanLine = gf.guideCnMean[gi].ptr<float>(i);
float *covLine = cov[si][gi].ptr<float>(i);
sub_mul(covLine, srcMeanLine, guideMeanLine, gf.w);
}
}
}
}
void GuidedFilterImpl::ComputeAlpha_ParBody::operator()(const Range& range) const
{
int srcCnNum = (int)covSrc.size();
for (int i = range.start; i < range.end; i++)
{
for (int si = 0; si < srcCnNum; si++)
{
for (int gi = 0; gi < gf.gCnNum; gi++)
{
float *y, *A, *dstAlpha;
dstAlpha = alpha[si][gi].ptr<float>(i);
for (int k = 0; k < gf.gCnNum; k++)
{
y = covSrc[si][k].ptr<float>(i);
A = gf.covarsInv(gi, k).ptr<float>(i);
if (k == 0)
{
mul(dstAlpha, A, y, gf.w);
}
else
{
add_mul(dstAlpha, A, y, gf.w);
}
}
}
}
}
}
void GuidedFilterImpl::ComputeBeta_ParBody::operator()(const Range& range) const
{
int srcCnNum = (int)srcCnMean.size();
CV_DbgAssert(&srcCnMean == &beta);
for (int i = range.start; i < range.end; i++)
{
float *_g[4];
for (int gi = 0; gi < gf.gCnNum; gi++)
_g[gi] = gf.guideCnMean[gi].ptr<float>(i);
float *betaDst, *g, *a;
for (int si = 0; si < srcCnNum; si++)
{
betaDst = beta[si].ptr<float>(i);
for (int gi = 0; gi < gf.gCnNum; gi++)
{
a = alpha[si][gi].ptr<float>(i);
g = _g[gi];
sub_mul(betaDst, a, g, gf.w);
}
}
}
}
void GuidedFilterImpl::ApplyTransform_ParBody::operator()(const Range& range) const
{
int srcCnNum = (int)alpha.size();
for (int i = range.start; i < range.end; i++)
{
float *_g[4];
for (int gi = 0; gi < gf.gCnNum; gi++)
_g[gi] = gf.guideCn[gi].ptr<float>(i);
float *betaDst, *g, *a;
for (int si = 0; si < srcCnNum; si++)
{
betaDst = beta[si].ptr<float>(i);
for (int gi = 0; gi < gf.gCnNum; gi++)
{
a = alpha[si][gi].ptr<float>(i);
g = _g[gi];
add_mul(betaDst, a, g, gf.w);
}
}
}
}
GuidedFilterImpl::GFTransform_ParBody::GFTransform_ParBody(GuidedFilterImpl& gf_, vector<Mat>& srcv, vector<Mat>& dstv, TransformFunc func_)
: gf(gf_), func(func_)
{
CV_DbgAssert(srcv.size() == dstv.size());
src.resize(srcv.size());
dst.resize(srcv.size());
for (int i = 0; i < (int)srcv.size(); i++)
{
src[i] = &srcv[i];
dst[i] = &dstv[i];
}
}
GuidedFilterImpl::GFTransform_ParBody::GFTransform_ParBody(GuidedFilterImpl& gf_, vector<vector<Mat> >& srcvv, vector<vector<Mat> >& dstvv, TransformFunc func_)
: gf(gf_), func(func_)
{
CV_DbgAssert(srcvv.size() == dstvv.size());
int n = (int)srcvv.size();
int total = 0;
for (int i = 0; i < n; i++)
{
CV_DbgAssert(srcvv[i].size() == dstvv[i].size());
total += (int)srcvv[i].size();
}
src.resize(total);
dst.resize(total);
int k = 0;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < (int)srcvv[i].size(); j++)
{
src[k] = &srcvv[i][j];
dst[k] = &dstvv[i][j];
k++;
}
}
}
void GuidedFilterImpl::GFTransform_ParBody::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
(gf.*func)(*src[i], *dst[i]);
}
}
void GuidedFilterImpl::getWalkPattern(int eid, int &cn1, int &cn2)
{
static int wdata[] = {
0, -1, -1, -1, -1, -1,
0, -1, -1, -1, -1, -1,
0, 0, 1, -1, -1, -1,
0, 1, 1, -1, -1, -1,
0, 0, 0, 2, 1, 1,
0, 1, 2, 2, 2, 1,
};
cn1 = wdata[6 * 2 * (gCnNum-1) + eid];
cn2 = wdata[6 * 2 * (gCnNum-1) + 6 + eid];
}
Ptr<GuidedFilterImpl> GuidedFilterImpl::create(InputArray guide, int radius, double eps)
{
GuidedFilterImpl *gf = new GuidedFilterImpl();
gf->init(guide, radius, eps);
return Ptr<GuidedFilterImpl>(gf);
}
void GuidedFilterImpl::init(InputArray guide, int radius_, double eps_)
{
CV_Assert( !guide.empty() && radius_ >= 0 && eps_ >= 0 );
CV_Assert( (guide.depth() == CV_32F || guide.depth() == CV_8U || guide.depth() == CV_16U) && (guide.channels() <= 3) );
radius = radius_;
eps = eps_;
splitFirstNChannels(guide, guideCn, 3);
gCnNum = (int)guideCn.size();
h = guideCn[0].rows;
w = guideCn[0].cols;
guideCnMean.resize(gCnNum);
parConvertToWorkType(guideCn, guideCn);
parMeanFilter(guideCn, guideCnMean);
SymArray2D<Mat> covars;
computeCovGuide(covars);
runParBody(ComputeCovGuideInv_ParBody(*this, covars));
covars.release();
}
void GuidedFilterImpl::computeCovGuide(SymArray2D<Mat>& covars)
{
covars.create(gCnNum);
for (int i = 0; i < covars.total(); i++)
covars(i).create(h, w, CV_32FC1);
runParBody(MulChannelsGuide_ParBody(*this, covars));
parMeanFilter(covars.vec, covars.vec);
runParBody(ComputeCovGuideFromChannelsMul_ParBody(*this, covars));
}
void GuidedFilterImpl::filter(InputArray src, OutputArray dst, int dDepth /*= -1*/)
{
CV_Assert( !src.empty() && (src.depth() == CV_32F || src.depth() == CV_8U) );
if (src.rows() != h || src.cols() != w)
{
CV_Error(Error::StsBadSize, "Size of filtering image must be equal to size of guide image");
return;
}
if (dDepth == -1) dDepth = src.depth();
int srcCnNum = src.channels();
vector<Mat> srcCn(srcCnNum);
vector<Mat>& srcCnMean = srcCn;
split(src, srcCn);
if (src.depth() != CV_32F)
{
parConvertToWorkType(srcCn, srcCn);
}
vector<vector<Mat> > covSrcGuide(srcCnNum);
computeCovGuideAndSrc(srcCn, srcCnMean, covSrcGuide);
vector<vector<Mat> > alpha(srcCnNum);
for (int si = 0; si < srcCnNum; si++)
{
alpha[si].resize(gCnNum);
for (int gi = 0; gi < gCnNum; gi++)
alpha[si][gi].create(h, w, CV_32FC1);
}
runParBody(ComputeAlpha_ParBody(*this, alpha, covSrcGuide));
covSrcGuide.clear();
vector<Mat>& beta = srcCnMean;
runParBody(ComputeBeta_ParBody(*this, alpha, srcCnMean, beta));
parMeanFilter(beta, beta);
parMeanFilter(alpha, alpha);
runParBody(ApplyTransform_ParBody(*this, alpha, beta));
if (dDepth != CV_32F)
{
for (int i = 0; i < srcCnNum; i++)
beta[i].convertTo(beta[i], dDepth);
}
merge(beta, dst);
}
void GuidedFilterImpl::computeCovGuideAndSrc(vector<Mat>& srcCn, vector<Mat>& srcCnMean, vector<vector<Mat> >& cov)
{
int srcCnNum = (int)srcCn.size();
cov.resize(srcCnNum);
for (int i = 0; i < srcCnNum; i++)
{
cov[i].resize(gCnNum);
for (int j = 0; j < gCnNum; j++)
cov[i][j].create(h, w, CV_32FC1);
}
runParBody(MulChannelsGuideAndSrc_ParBody(*this, srcCn, cov));
parMeanFilter(srcCn, srcCnMean);
parMeanFilter(cov, cov);
runParBody(ComputeCovFromSrcChannelsMul_ParBody(*this, srcCnMean, cov));
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
CV_EXPORTS_W
Ptr<GuidedFilter> createGuidedFilter(InputArray guide, int radius, double eps)
{
return Ptr<GuidedFilter>(GuidedFilterImpl::create(guide, radius, eps));
}
CV_EXPORTS_W
void guidedFilter(InputArray guide, InputArray src, OutputArray dst, int radius, double eps, int dDepth)
{
Ptr<GuidedFilter> gf = createGuidedFilter(guide, radius, eps);
gf->filter(src, dst, dDepth);
}
}

@ -0,0 +1,349 @@
#include "precomp.hpp"
#include <vector>
#include <iostream>
using std::vector;
#ifdef _MSC_VER
# pragma warning(disable: 4512)
#endif
namespace cv
{
template <typename T>
struct SymArray2D
{
vector<T> vec;
int sz;
SymArray2D()
{
sz = 0;
}
void create(int sz_)
{
CV_DbgAssert(sz_ > 0);
sz = sz_;
vec.resize(total());
}
inline T& operator()(int i, int j)
{
CV_DbgAssert(i >= 0 && i < sz && j >= 0 && j < sz);
if (i < j) std::swap(i, j);
return vec[i*(i+1)/2 + j];
}
inline T& operator()(int i)
{
return vec[i];
}
int total() const
{
return sz*(sz + 1)/2;
}
void release()
{
vec.clear();
sz = 0;
}
};
class GuidedFilterImplOCL : public GuidedFilter
{
Size sz;
int radius;
double eps;
SymArray2D<UMat> covGuide;
SymArray2D<UMat> covGuideInv;
int guideCnNum;
vector<UMat> guideCn;
vector<UMat> guideCnMean;
inline void meanFilter(UMat& src, UMat& dst);
public:
GuidedFilterImplOCL(InputArray guide, int radius, double eps);
void filter(InputArray src, OutputArray dst, int dDepth);
void computeCovGuide();
void computeCovGuideInv();
void computeCovSrcGuide(vector<UMat>& srcCn, vector<UMat>& srcCnMean, vector<vector<UMat> >& covSrcGuide);
void computeAlpha(vector<vector<UMat> >& covSrcGuide, vector<vector<UMat> >& alpha);
void computeBeta(vector<vector<UMat> >&alpha, vector<UMat>& srcCnMean, vector<UMat>& beta);
void applyTransform(vector<vector<UMat> >& alpha, vector<UMat>& beta);
};
void GuidedFilterImplOCL::meanFilter(UMat& src, UMat& dst)
{
boxFilter(src, dst, CV_32F, Size(2 * radius + 1, 2 * radius + 1), Point(-1, -1), true, BORDER_REFLECT);
}
GuidedFilterImplOCL::GuidedFilterImplOCL(InputArray guide, int radius_, double eps_)
{
CV_Assert(!guide.empty() && guide.channels() <= 3);
CV_Assert(radius_ >= 0 && eps_ >= 0);
radius = radius_;
eps = eps_;
guideCnNum = guide.channels();
guideCn.resize(guideCnNum);
guideCnMean.resize(guideCnNum);
if (guide.depth() == CV_32F)
{
split(guide, guideCn);
}
else
{
UMat buf;
guide.getUMat().convertTo(buf, CV_32F);
split(buf, guideCn);
}
for (int i = 0; i < guideCnNum; i++)
{
meanFilter(guideCn[i], guideCnMean[i]);
}
computeCovGuide();
computeCovGuideInv();
}
void GuidedFilterImplOCL::computeCovGuide()
{
covGuide.create(guideCnNum);
UMat buf;
for (int i = 0; i < guideCnNum; i++)
for (int j = 0; j <= i; j++)
{
multiply(guideCn[i], guideCn[j], covGuide(i, j));
meanFilter(covGuide(i, j), covGuide(i, j));
multiply(guideCnMean[i], guideCnMean[j], buf);
subtract(covGuide(i, j), buf, covGuide(i, j));
//add regulariztion term
if (i == j)
covGuide(i, j).convertTo(covGuide(i, j), covGuide(i, j).depth(), 1.0, eps);
}
}
void GuidedFilterImplOCL::computeCovGuideInv()
{
covGuideInv.create(guideCnNum);
if (guideCnNum == 3)
{
//for (int l = 0; l < 3; l++)
// covGuideInv(2, l) = covGuide(2, l);
UMat det, tmp1, tmp2;
for (int l = 0; l < 3; l++)
{
int l1 = (l + 1) % 3;
int l2 = (l + 2) % 3;
multiply(covGuide(1, l1), covGuide(2, l2), tmp1);
multiply(covGuide(1, l2), covGuide(2, l1), tmp2);
if (l == 0)
{
subtract(tmp1, tmp2, det);
}
else
{
add(det, tmp1, det);
subtract(det, tmp2, det);
}
}
for (int k = 0; k < 3; k++)
for (int l = 0; l <= k; l++)
{
int k1 = (k + 1) % 3, l1 = (l + 1) % 3;
int k2 = (k + 2) % 3, l2 = (l + 2) % 3;
multiply(covGuide(k1, l1), covGuide(k2, l2), tmp1);
multiply(covGuide(k1, l2), covGuide(k2, l1), tmp2);
subtract(tmp1, tmp2, covGuideInv(k, l));
divide(covGuideInv(k, l), det, covGuideInv(k, l));
}
}
else if (guideCnNum == 2)
{
covGuideInv(0, 0) = covGuide(1, 1);
covGuideInv(1, 1) = covGuide(0, 0);
covGuideInv(0, 1) = covGuide(0, 1);
UMat det, tmp;
multiply(covGuide(0, 0), covGuide(1, 1), det);
multiply(covGuide(0, 1), covGuide(1, 0), tmp);
subtract(det, tmp, det);
tmp.release();
divide(covGuideInv(0, 0), det, covGuideInv(0, 0));
divide(covGuideInv(1, 1), det, covGuideInv(1, 1));
divide(covGuideInv(0, 1), det, covGuideInv(0, 1), -1);
}
else
{
covGuideInv(0, 0) = covGuide(0, 0);
divide(1.0, covGuide(0, 0), covGuideInv(0, 0));
}
covGuide.release();
}
void GuidedFilterImplOCL::filter(InputArray src, OutputArray dst, int dDepth)
{
if (dDepth == -1) dDepth = src.depth();
int srcCnNum = src.channels();
vector<UMat> srcCn(srcCnNum);
vector<UMat> srcCnMean(srcCnNum);
if (src.depth() != CV_32F)
{
UMat tmp;
src.getUMat().convertTo(tmp, CV_32F);
split(tmp, srcCn);
}
else
{
split(src, srcCn);
}
for (int i = 0; i < srcCnNum; i++)
meanFilter(srcCn[i], srcCnMean[i]);
vector<vector<UMat> > covSrcGuide(srcCnNum);
computeCovSrcGuide(srcCn, srcCnMean, covSrcGuide);
vector<vector<UMat> > alpha(srcCnNum);
vector<UMat>& beta = srcCnMean;
computeAlpha(covSrcGuide, alpha);
computeBeta(alpha, srcCnMean, beta);
//if (true)
//{
// for (int i = 0; i < srcCnNum; i++)
// {
// Mat alphaViz;
// merge(alpha[i], alphaViz);
// imwrite("res/alpha" + format("%d", i) + ".png", alphaViz * 100);
// }
// Mat betaViz;
// merge(beta, betaViz);
// imwrite("res/beta.png", betaViz + Scalar::all(127));
//}
for (int i = 0; i < srcCnNum; i++)
for (int j = 0; j < guideCnNum; j++)
meanFilter(alpha[i][j], alpha[i][j]);
applyTransform(alpha, beta);
if (dDepth != CV_32F)
{
for (int i = 0; i < srcCnNum; i++)
beta[i].convertTo(beta[i], dDepth);
}
merge(beta, dst);
}
void GuidedFilterImplOCL::computeCovSrcGuide(vector<UMat>& srcCn, vector<UMat>& srcCnMean, vector<vector<UMat> >& covSrcGuide)
{
int srcCnNum = (int)srcCn.size();
covSrcGuide.resize(srcCnNum);
UMat buf;
for (int i = 0; i < srcCnNum; i++)
{
covSrcGuide[i].resize(guideCnNum);
for (int j = 0; j < guideCnNum; j++)
{
UMat& cov = covSrcGuide[i][j];
multiply(srcCn[i], guideCn[i], cov);
meanFilter(cov, cov);
multiply(srcCnMean[i], guideCnMean[j], buf);
subtract(cov, buf, cov);
}
}
}
void GuidedFilterImplOCL::computeAlpha(vector<vector<UMat> >& covSrcGuide, vector<vector<UMat> >& alpha)
{
int srcCnNum = (int)covSrcGuide.size();
alpha.resize(srcCnNum);
UMat buf;
for (int i = 0; i < srcCnNum; i++)
{
alpha[i].resize(guideCnNum);
for (int k = 0; k < guideCnNum; k++)
{
multiply(covGuideInv(k, 0), covSrcGuide[i][0], alpha[i][k]);
for (int l = 1; l < guideCnNum; l++)
{
multiply(covGuideInv(k, l), covSrcGuide[i][l], buf);
add(buf, alpha[i][k], alpha[i][k]);
}
}
}
}
void GuidedFilterImplOCL::computeBeta(vector<vector<UMat> >& alpha, vector<UMat>& srcCnMean, vector<UMat>& beta)
{
int srcCnNum = (int)srcCnMean.size();
CV_Assert(&beta == &srcCnMean);
UMat buf;
for (int i = 0; i < srcCnNum; i++)
{
multiply(alpha[i][0], guideCnMean[0], beta[i]);
for (int j = 1; j < guideCnNum; j++)
{
multiply(alpha[i][j], guideCnMean[j], buf);
subtract(beta[i], buf, beta[i]);
}
meanFilter(beta[i], beta[i]);
}
}
void GuidedFilterImplOCL::applyTransform(vector<vector<UMat> >& alpha, vector<UMat>& beta)
{
int srcCnNum = (int)beta.size();
UMat buf;
for (int i = 0; i < srcCnNum; i++)
{
for (int j = 0; j < guideCnNum; j++)
{
multiply(guideCn[j], alpha[i][j], buf);
add(beta[i], buf, beta[i]);
}
}
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,250 @@
#if defined(_MSC_VER) //for correct syntax highlighting
#include "OpenCLKernel.hpp"
#define cn 3
#define GuideType uchar
#define GuideVec uchar3
#define convert_guide convert_float3
#define NUM_ITERS 3
#endif
#if !defined(convert_guide)
#define convert_guide(a) (a)
#endif
#if cn==3
#define ELEM_SIZE (3*sizeof(GuideType))
#define loadGuide(addr) vload3(0, (__global const GuideType *)(addr))
#else
#define ELEM_SIZE sizeof(GuideVec)
#define loadGuide(addr) ( *(__global const GuideVec*)(addr) )
#endif
#define storeDist(val, addr) *(__global float*)(addr) = val
#if cn == 1
#define SUM(a) (a)
#elif cn == 2
#define SUM(a) (a.x + a.y)
#elif cn == 3
#define SUM(a) (a.x + a.y + a.z)
#elif cn == 4
#define SUM(a) (a.x + a.y + a.z + a.w)
#else
#error "cn should be <= 4"
#endif
#define NORM(a, b) SUM(fabs(convert_guide(a) - convert_guide(b)))
#define DT(a, b, sigmaRatios) (1.0f + (sigmaRatios)*NORM(a, b))
#define getFloat(addr, index) ( *(__global const float*)(addr + (index)*sizeof(float)) )
#define storeFloat(val, addr, index) ( *(__global float*)(addr + (index)*sizeof(float)) = val )
#define PtrAdd(addr, bytesNum, Type) ((__global Type *)((__global uchar*)(addr) + bytesNum))
#define PtrAddConst(addr, bytesNum, Type) ((__global const Type *)((__global const uchar*)(addr) + bytesNum))
__kernel void find_conv_bounds_by_idt(__global const uchar *idist, int idist_step, int idist_offset, int rows, int cols,
__global uchar *bounds, int bounds_step, int bounds_offset,
float radius1, float radius2, float radius3)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < rows) || !(j >= 0 && j < cols))
return;
idist += mad24(i, idist_step, idist_offset);
__global int *bound = bounds + mad24((NUM_ITERS-1)*rows + i, bounds_step, mad24(j, 2*(int)sizeof(int), bounds_offset));
float center_idt_val = getFloat(idist, j);
float radius[] = {radius1, radius2, radius3};
int low_bound = j;
int high_bound = j;
float search_idt_val;
for (int iter = NUM_ITERS - 1; iter >= 0; iter--)
{
search_idt_val = center_idt_val - radius[iter];
while (search_idt_val < getFloat(idist, low_bound-1))
low_bound--;
bound[0] = low_bound;
search_idt_val = center_idt_val + radius[iter];
while (getFloat(idist, high_bound + 1) < search_idt_val)
high_bound++;
bound[1] = high_bound;
bound = PtrAdd(bound, -rows*bounds_step, int);
}
}
__kernel void find_conv_bounds_by_dt(__global const uchar *dist, int dist_step, int dist_offset, int rows, int cols,
__global uchar *bounds, int bounds_step, int bounds_offset,
float radius1, float radius2, float radius3)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < rows) || !(j >= 0 && j < cols))
return;
dist += mad24(i, dist_step, dist_offset);
int rowsOffset = mad24(NUM_ITERS - 1, rows, i);
__global int *bound = PtrAdd(bounds, mad24(rowsOffset, bounds_step, mad24(j, 2*(int)sizeof(int), bounds_offset)), int);
float radius[] = {radius1, radius2, radius3};
int low_bound = j;
int high_bound = j;
float val, cur_radius, low_dt_val = 0.0f, high_dt_val = 0.0f;
for (int iter = NUM_ITERS - 1; iter >= 0; iter--)
{
cur_radius = radius[iter];
while (cur_radius > (val = low_dt_val + getFloat(dist, low_bound - 1)) )
{
low_dt_val = val;
low_bound--;
}
bound[0] = low_bound;
while (cur_radius > (val = high_dt_val + getFloat(dist, high_bound)) )
{
high_dt_val = val;
high_bound++;
}
bound[1] = high_bound;
bound = PtrAdd(bound, -rows*bounds_step, int);
}
}
__kernel void find_conv_bounds_and_tails(__global const uchar *dist, int dist_step, int dist_offset, int rows, int cols,
__global uchar *bounds, int bounds_step, int bounds_offset,
__global float *tailmat, int tail_step, int tail_offset,
float radius1, float radius2, float radius3)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < rows) || !(j >= 0 && j < cols))
return;
dist += mad24(i, dist_step, dist_offset);
int rowsOffset = mad24(NUM_ITERS - 1, rows, i);
__global int *bound = PtrAdd(bounds, mad24(rowsOffset, bounds_step, mad24(j, 2*(int)sizeof(int), bounds_offset)), int);
__global float *tail = PtrAdd(tailmat, mad24(rowsOffset, tail_step, mad24(j, 2*(int)sizeof(float), tail_offset)), float);
float radius[] = {radius1, radius2, radius3};
int low_bound = j;
int high_bound = j;
float val, cur_radius, low_dt_val = 0.0f, high_dt_val = 0.0f;
for (int iter = NUM_ITERS - 1; iter >= 0; iter--)
{
cur_radius = radius[iter];
while (cur_radius > (val = low_dt_val + getFloat(dist, low_bound - 1)) )
{
low_dt_val = val;
low_bound--;
}
bound[0] = low_bound;
tail[0] = (cur_radius - low_dt_val);
while (cur_radius > (val = high_dt_val + getFloat(dist, high_bound)) )
{
high_dt_val = val;
high_bound++;
}
bound[1] = high_bound;
tail[1] = (cur_radius - high_dt_val);
bound = PtrAdd(bound, -rows*bounds_step, int);
tail = PtrAdd(tail, -rows*tail_step, float);
}
}
__kernel void compute_dt_hor(__global const uchar *src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar *dst, int dst_step, int dst_offset,
float sigma_ratio, float max_radius)
{
int i = get_global_id(0);
int j = get_global_id(1) - 1;
if (!(i >= 0 && i < src_rows && j >= -1 && j < src_cols))
return;
src += mad24(i, src_step, mad24(j, (int)ELEM_SIZE, src_offset));
dst += mad24(i, dst_step, mad24(j, (int)sizeof(float) , dst_offset));
float dist;
if (j == -1 || j == src_cols - 1)
dist = max_radius;
else
dist = DT(loadGuide(src), loadGuide(src + ELEM_SIZE), sigma_ratio);
storeDist(dist, dst);
}
__kernel void compute_dt_vert(__global const uchar *src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar *dst, int dst_step, int dst_offset,
float sigma_ratio, float max_radius)
{
int i = get_global_id(0) - 1;
int j = get_global_id(1);
if (!(i >= -1 && i < src_rows && j >= 0 && j < src_cols))
return;
src += mad24(i, src_step, mad24(j, (int)ELEM_SIZE, src_offset));
dst += mad24(i, dst_step, mad24(j, (int)sizeof(float) , dst_offset));
float dist;
if (i == -1 || i == src_rows - 1)
dist = max_radius;
else
dist = DT(loadGuide(src), loadGuide(src + src_step), sigma_ratio);
storeDist(dist, dst);
}
__kernel void compute_idt_vert(__global const uchar *src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar *dst, int dst_step, int dst_offset,
float sigma_ratio)
{
int j = get_global_id(0);
if (!(j >= 0 && j < src_cols))
return;
int i;
float idist = 0;
__global const uchar *src_col = src + mad24(j, (int)ELEM_SIZE, src_offset);
__global uchar *dst_col = dst + mad24(j, (int)sizeof(float), dst_offset);
storeFloat(-FLT_MAX, dst_col + (-1)*dst_step, 0);
storeFloat(0.0f, dst_col + 0*dst_step, 0);
for (i = 1; i < src_rows; i++, src_col += src_step)
{
idist += DT(loadGuide(src_col), loadGuide(src_col + src_step), sigma_ratio);
storeFloat(idist, dst_col + i*dst_step, 0);
}
storeFloat(FLT_MAX, dst_col + i*dst_step, 0);
}
__kernel void compute_a0DT_vert(__global const uchar *src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar *dst, int dst_step, int dst_offset,
float sigma_ratio, float alpha)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < src_rows-1 && j >= 0 && j < src_cols))
return;
src += mad24(i, src_step, mad24(j, (int)ELEM_SIZE, src_offset));
dst += mad24(i, dst_step, mad24(j, (int)sizeof(float) , dst_offset));
float dist = DT(loadGuide(src), loadGuide(src + src_step), sigma_ratio);
storeDist(native_powr(alpha, dist), dst);
}

@ -0,0 +1,427 @@
#if defined(_MSC_VER) //for correct syntax highlighting
#include "OpenCLKernel.hpp"
#define cn 3
#define SrcVec float3
#define NUM_ITERS 3
#endif
#define getFloatn(n, addr, index) vload##n(index, (__global const float*)(addr))
#define storeFloatn(n, val, addr, index) vstore##n(val, index, (__global float*)(addr))
#define PtrAdd(addr, bytesNum, Type) ((__global Type *)((__global uchar*)(addr) + bytesNum))
#define PtrAddConst(addr, bytesNum, Type) ((__global const Type *)((__global const uchar*)(addr) + bytesNum))
#if cn==3
#define ELEM_SIZE (int)(3*sizeof(float))
#define getPix(addr, index) vload3(index, (__global const float*)(addr))
#define storePix(val, addr, index) vstore3(val, index, (__global float*)(addr))
#else
#define ELEM_SIZE (int)sizeof(SrcVec)
#define getPix(addr, index) ( *(__global const SrcVec*)(addr + (index)*ELEM_SIZE) )
#define storePix(val, addr, index) ( *(__global SrcVec*)(addr + (index)*ELEM_SIZE) = val )
#endif
#define getFloat(addr, index) ( *(__global const float*)(addr + (index)*sizeof(float)) )
#define storeFloat(val, addr, index) ( *(__global float*)(addr + (index)*sizeof(float)) = val )
#define getInt(addr, index) ( *(__global const int*)(addr + (index)*sizeof(int)) )
#define storeInt(val, addr, index) ( *(__global int*)(addr + (index)*sizeof(int)) = val )
#define getFloat4(addr, index) getFloatn(4, addr, index)
#define storeFloat4(val, addr, index) storeFloatn(4, val, addr, index)
#define NC_USE_INTEGRAL_SRC
#undef NC_USE_INTEGRAL_SRC
__kernel void integrate_cols_4f(__global uchar *src, int src_step, int src_offset,
__global uchar *isrc, int isrc_step, int isrc_offset,
int rows, int col_chunks)
{
int j = get_global_id(0);
if ( !(j >= 0 && j < col_chunks) )
return;
src += mad24(j, (int)sizeof(float4), src_offset);
isrc += mad24(j, (int)sizeof(float4), isrc_offset);
float4 sum = 0;
storeFloat4(0, isrc, 0);
isrc += isrc_step;
for (int i = 0; i < rows; i++, src += src_step, isrc += isrc_step)
{
sum += getFloat4(src, 0);
storeFloat4(sum, isrc, 0);
}
}
__kernel void integrate_cols_with_dist(__global const uchar *src, int src_step, int src_offset,
int src_rows, int src_cols,
__global const uchar *dist, int dist_step, int dist_offset,
__global uchar *isrc, int isrc_step, int isrc_offset)
{
int j = get_global_id(0);
if ( !(j >= 0 && j < src_cols) )
return;
src += mad24(j, ELEM_SIZE, src_offset);
isrc += mad24(j, ELEM_SIZE, isrc_offset);
dist += mad24(j, (int)sizeof(float), dist_offset);
SrcVec sum = 0;
storePix(0, isrc, 0);
isrc += isrc_step;
for (int i = 0; i < src_rows; i++, src += src_step, isrc += isrc_step, dist += dist_step)
{
sum += 0.5f * (getPix(src, 0) + getPix(src + src_step, 0)) * getFloat(dist, 0);
storePix(sum, isrc, 0);
}
}
__kernel void filter_IC_hor(__global const uchar *src, int src_step, int src_offset, int rows, int cols,
__global const uchar *isrc, int isrc_step, int isrc_offset,
__global const int *bounds, int bounds_step, int bounds_offset,
__global const float *tail, int tail_step, int tail_offset,
__global float *dist, int dist_step, int dist_offset,
__global uchar *dst, int dst_step, int dst_offset,
float radius, int iterNum
)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < rows) || !(j >= 0 && j < cols))
return;
dst += mad24(i, dst_step, dst_offset);
isrc += mad24(i, isrc_step, isrc_offset);
src += mad24(i, src_step, src_offset);
int ioffset = mad24(iterNum*rows + i, bounds_step, mad24(j, 2*(int)sizeof(int), bounds_offset));
bounds = PtrAddConst(bounds, ioffset, int);
int il = bounds[0];
int ir = bounds[1];
int toffset = mad24(iterNum*rows + i, tail_step, mad24(j, 2*(int)sizeof(float), tail_offset));
tail = PtrAddConst(tail, toffset, float);
float tl = tail[0];
float tr = tail[1];
dist = PtrAdd(dist, mad24(i, dist_step, dist_offset), float);
float dl = tl / dist[il-1];
float dr = tr / dist[ir];
SrcVec res;
res = getPix(isrc, ir) - getPix(isrc, il); //center part
res += 0.5f*tl * ( dl*getPix(src, il - 1) + (2.0f - dl)*getPix(src, il) ); //left tail
res += 0.5f*tr * ( dr*getPix(src, ir + 1) + (2.0f - dr)*getPix(src, ir) ); //right tail
res /= radius;
storePix(res, dst, j);
}
__kernel void filter_NC_hor_by_bounds(__global const uchar *isrc, int isrc_step, int isrc_offset,
__global const uchar *bounds, int bounds_step, int bounds_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols,
int iterNum)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < rows) || !(j >= 0 && j < cols))
return;
dst += mad24(i, dst_step, dst_offset);
isrc += mad24(i, isrc_step, isrc_offset);
int ioffset = mad24(iterNum*rows + i, bounds_step, mad24(j, 2*(int)sizeof(int), bounds_offset));
__global const int *index = PtrAddConst(bounds, ioffset, int);
int li = index[0];
int hi = index[1];
SrcVec res = (getPix(isrc, hi+1) - getPix(isrc, li)) / (float)(hi - li + 1);
storePix(res, dst, j);
}
__kernel void filter_NC_hor(__global uchar *src, int src_step, int src_offset, int src_rows, int src_cols,
__global const uchar *idist, int idist_step, int idist_offset,
__global uchar *dst, int dst_step, int dst_offset,
float radius)
{
int i = get_global_id(0);
int j = get_global_id(1);
if (!(i >= 0 && i < src_rows) || !(j >= 0 && j < src_cols))
return;
src += mad24(i, src_step, src_offset);
idist += mad24(i, idist_step, idist_offset);
dst += mad24(i, dst_step, dst_offset);
int low_bound = j;
int high_bound = j;
float cur_idt_val = getFloat(idist, j);
float low_idt_val = cur_idt_val - radius;
float high_idt_val = cur_idt_val + radius;
SrcVec sum = getPix(src, j);
while (low_bound > 0 && low_idt_val < getFloat(idist, low_bound-1))
{
low_bound--;
sum += getPix(src, low_bound);
}
while (getFloat(idist, high_bound + 1) < high_idt_val)
{
high_bound++;
sum += getPix(src, high_bound);
}
storePix(sum / (float)(high_bound - low_bound + 1), dst, j);
}
__kernel void filter_NC_vert(__global const uchar *src, int src_step, int src_offset, int src_rows, int src_cols,
__global const uchar *idist, int idist_step, int idist_offset,
__global uchar *dst, int dst_step, int dst_offset,
float radius)
{
int j = get_global_id(0);
if (!(j >= 0 && j < src_cols))
return;
__global const uchar *src_col = src + mad24(j, (int)ELEM_SIZE, src_offset);
__global uchar *dst_col = dst + mad24(j, (int)ELEM_SIZE, dst_offset);
__global const uchar *idist_col = idist + mad24(j, (int)sizeof(float), idist_offset);
int low_bound = 0, high_bound = 0;
SrcVec sum = getPix(src_col, 0);
SrcVec res;
for (int i = 0; i < src_rows; i++)
{
float cur_idt_value = getFloat(idist_col + i*idist_step, 0);
float low_idt_value = cur_idt_value - radius;
float high_idt_value = cur_idt_value + radius;
while (getFloat(idist_col + low_bound*idist_step, 0) < low_idt_value)
{
sum -= getPix(src_col + low_bound*src_step, 0);
low_bound++;
}
while (getFloat(idist_col + (high_bound + 1)*idist_step, 0) < high_idt_value)
{
high_bound++;
sum += getPix(src_col + high_bound*src_step, 0);
}
res = sum / (float)(high_bound - low_bound + 1);
storePix(res, dst_col + i*dst_step, 0);
}
}
__kernel void filter_RF_vert(__global uchar *res, int res_step, int res_offset, int res_rows, int res_cols,
__global uchar *adist, int adist_step, int adist_offset,
int write_new_a_dist)
{
int j = get_global_id(0);
if (!(j >= 0 && j < res_cols))
return;
res += mad24(j, (int)ELEM_SIZE, res_offset);
adist += mad24(j, (int)sizeof(float), adist_offset);
SrcVec cur_val;
float ad;
int i;
cur_val = getPix(res + 0*res_step, 0);
for (i = 1; i < res_rows; i++)
{
ad = getFloat(adist + (i-1)*adist_step, 0);
cur_val = (1.0f - ad)*getPix(res + i*res_step, 0) + ad*cur_val;
storePix(cur_val, res + i*res_step, 0);
}
for (i = res_rows - 2; i >= 0; i--)
{
ad = getFloat(adist + i*adist_step, 0);
cur_val = (1.0f - ad)*getPix(res + i*res_step, 0) + ad*cur_val;
storePix(cur_val, res + i*res_step, 0);
if (write_new_a_dist)
storeFloat(ad*ad, adist + i*adist_step, 0);
}
}
#define genMinMaxPtrs(ptr, h) __global const uchar *ptr##_min = (__global const uchar *)ptr;\
__global const uchar *ptr##_max = (__global const uchar *)ptr + mad24(h, ptr##_step, ptr##_offset);
#define checkPtr(ptr, bound) (bound##_min <= (__global const uchar *)(ptr) && (__global const uchar *)(ptr) < bound##_max)
__kernel void filter_RF_block_init_fwd(__global uchar *res, int res_step, int res_offset, int rows, int cols,
__global uchar *adist, int adist_step, int adist_offset,
__global uchar *weights, int weights_step, int weights_offset,
int blockSize)
{
int bid = get_global_id(0);
int j = get_global_id(1);
if (j < 0 || j >= cols) return;
int startRow = max(1, bid*blockSize); //skip first row
int endRow = min(rows, (bid + 1)*blockSize);
res += mad24(j, ELEM_SIZE, mad24(startRow, res_step, res_offset));;
adist += mad24(j, (int) sizeof(float), mad24(startRow-1, adist_step, adist_offset));
weights += mad24(j, (int) sizeof(float), mad24(startRow, weights_step, weights_offset));
SrcVec cur_val = (startRow != 1) ? 0 : getPix(res - res_step, 0);
float weight0 = 1.0f;
for (int i = startRow; i < endRow; i++)
{
float ad = getFloat(adist, 0);
cur_val = (1.0f - ad)*getPix(res, 0) + ad*cur_val;
storePix(cur_val, res, 0);
weight0 *= ad;
storeFloat(weight0, weights, 0);
res += res_step;
adist += adist_step;
weights += weights_step;
}
}
__kernel void filter_RF_block_fill_borders_fwd(__global uchar *res, int res_step, int res_offset, int rows, int cols,
__global uchar *weights, int weights_step, int weights_offset,
int blockSize)
{
int j = get_global_id(0);
if (j < 0 || j >= cols) return;
int startRow = 2*blockSize - 1;
res += mad24(j, ELEM_SIZE, mad24(startRow, res_step, res_offset));
weights += mad24(j, (int) sizeof(float), mad24(startRow, weights_step, weights_offset));
int res_step_block = blockSize*res_step;
int weights_step_block = blockSize*weights_step;
SrcVec prev_pix = getPix(res - res_step_block, 0);
for (int i = startRow; i < rows; i += blockSize)
{
prev_pix = getPix(res, 0) + getFloat(weights, 0)*prev_pix;
storePix(prev_pix, res, 0);
res += res_step_block;
weights += weights_step_block;
}
}
__kernel void filter_RF_block_fill_fwd(__global uchar *res, int res_step, int res_offset, int rows, int cols,
__global uchar *weights, int weights_step, int weights_offset,
int blockSize)
{
int i = get_global_id(0) + blockSize;
int j = get_global_id(1);
if (j < 0 || j >= cols || i < 0 || i >= rows) return;
if (i % blockSize == blockSize - 1) return; //to avoid rewriting bound pixels
int bid = i / blockSize;
int ref_row_id = bid*blockSize - 1;
__global uchar *res_ref_row = res + mad24(ref_row_id, res_step, res_offset);
__global uchar *res_dst_row = res + mad24(i, res_step, res_offset);
__global float *weights_row = PtrAdd(weights, mad24(i, weights_step, weights_offset), float);
storePix(getPix(res_dst_row, j) + weights_row[j]*getPix(res_ref_row, j), res_dst_row, j);
}
__kernel void filter_RF_block_init_bwd(__global uchar *res, int res_step, int res_offset, int rows, int cols,
__global uchar *adist, int adist_step, int adist_offset,
__global uchar *weights, int weights_step, int weights_offset,
int blockSize)
{
int bid = get_global_id(0);
int j = get_global_id(1);
if (j < 0 || j >= cols) return;
int startRow = rows-1 - max(1, bid*blockSize);
int endRow = max(-1, rows-1 - (bid+1)*blockSize);
res += mad24(j, ELEM_SIZE, mad24(startRow, res_step, res_offset));;
adist += mad24(j, (int) sizeof(float), mad24(startRow, adist_step, adist_offset));
weights += mad24(j, (int) sizeof(float), mad24(startRow, weights_step, weights_offset));
SrcVec cur_val = (startRow != rows-2) ? 0 : getPix(res + res_step, 0);
float weight0 = 1.0f;
for (int i = startRow; i > endRow; i--)
{
float ad = getFloat(adist, 0);
cur_val = (1.0f - ad)*getPix(res, 0) + ad*cur_val;
storePix(cur_val, res, 0);
weight0 *= ad;
storeFloat(weight0, weights, 0);
res -= res_step;
adist -= adist_step;
weights -= weights_step;
}
}
__kernel void filter_RF_block_fill_borders_bwd(__global uchar *res, int res_step, int res_offset, int rows, int cols,
__global uchar *weights, int weights_step, int weights_offset,
int blockSize)
{
int j = get_global_id(0);
if (j < 0 || j >= cols) return;
int startRow = rows-1 - (2*blockSize - 1);
res += mad24(j, ELEM_SIZE, mad24(startRow, res_step, res_offset));
weights += mad24(j, (int) sizeof(float), mad24(startRow, weights_step, weights_offset));
int res_step_block = blockSize*res_step;
int weights_step_block = blockSize*weights_step;
SrcVec prev_pix = getPix(res + res_step_block, 0);
for (int i = startRow; i >= 0; i -= blockSize)
{
prev_pix = getPix(res, 0) + getFloat(weights, 0)*prev_pix;
storePix(prev_pix, res, 0);
res -= blockSize*res_step;
weights -= blockSize*weights_step;
}
}
__kernel void filter_RF_block_fill_bwd(__global uchar *res, int res_step, int res_offset, int rows, int cols,
__global uchar *weights, int weights_step, int weights_offset,
int blockSize)
{
int i = get_global_id(0) + blockSize;
int j = get_global_id(1);
if (j < 0 || j >= cols || i < 0 || i >= rows) return;
if (i % blockSize == blockSize - 1) return; //to avoid rewriting bound pixels
int bid = i / blockSize;
int ref_row_id = rows-1 - (bid*blockSize - 1);
int dst_row_id = rows-1 - i;
__global uchar *res_ref_row = res + mad24(ref_row_id, res_step, res_offset);
__global uchar *res_dst_row = res + mad24(dst_row_id, res_step, res_offset);
__global float *weights_row = PtrAdd(weights, mad24(dst_row_id, weights_step, weights_offset), float);
storePix(getPix(res_dst_row, j) + weights_row[j]*getPix(res_ref_row, j), res_dst_row, j);
}

@ -0,0 +1,15 @@
#ifndef _OPENCV_EDGEFILTER_PRECOMP_HPP_
#define _OPENCV_EDGEFILTER_PRECOMP_HPP_
#include <opencv2/edge_filter.hpp>
#include <opencv2/core.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/core/base.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/core/cvdef.h>
#include <opencv2/core/core_c.h>
#include <opencv2/core/private.hpp>
#include <opencv2/imgproc.hpp>
#include <limits>
#endif

@ -0,0 +1,224 @@
#include "precomp.hpp"
#include <vector>
using std::vector;
namespace cv
{
class WeightedMedianImpl : public WeightedMedian
{
public:
void filter(InputArray src, OutputArray dst, double rangeQuantizer, int dDepth);
WeightedMedianImpl(InputArray guide, double spatialSize, double colorSize, int convFilter);
protected:
Ptr<DTFilter> dtf;
Ptr<GuidedFilter> gf;
Size sz;
bool isDT;
class UpdateMedian_ParBody : public ParallelLoopBody
{
Mat &sum, &reached, &res;
float medianLevel;
int curLevelValue;
public:
UpdateMedian_ParBody(Mat& sum_, Mat& reached_, Mat& res_, int curLevelValue_, float medianLevel_ = 127.5f)
: sum(sum_), reached(reached_), res(res_), curLevelValue(curLevelValue_), medianLevel(medianLevel_) {}
void operator() (const Range& range) const;
};
class ComputeMedian_ParBody : public ParallelLoopBody
{
vector<Mat>& layersRes;
Mat &sum, &res;
int layersStart, layerSize;
public:
ComputeMedian_ParBody(vector<Mat>& layersRes_, Mat& sum_, Mat& res_, int layersStart_, int layerSize_)
:layersRes(layersRes_), sum(sum_), res(res_), layersStart(layersStart_), layerSize(layerSize_) {}
void operator() (const Range& range) const;
};
};
WeightedMedianImpl::WeightedMedianImpl(InputArray guide, double spatialSize, double colorSize, int filterType)
{
isDT = (filterType == DTF_NC || filterType == DTF_IC || filterType == DTF_RF);
if (isDT)
{
dtf = createDTFilter(guide, spatialSize, colorSize, filterType, 3);
}
else if (filterType == GUIDED_FILTER)
{
gf = createGuidedFilter(guide, cvRound(spatialSize), colorSize);
}
else
{
CV_Error(Error::StsBadFlag, "Unsupported type of edge aware filter (only guided filter and domain transform allowed)");
}
sz = guide.size();
}
void WeightedMedianImpl::filter(InputArray src, OutputArray dst, double rangeQuantizer, int dDepth)
{
CV_Assert(src.size() == sz && src.depth() == CV_8U);
int layerSize = cvRound(rangeQuantizer);
int srcCnNum = src.channels();
vector<Mat> srcCn(srcCnNum);
vector<Mat> resCn(srcCnNum);
if (srcCnNum == 1)
srcCn[0] = src.getMat();
else
split(src, srcCn);
Mat reached, sum;
vector<Mat> resLayers;
for (int cnId = 0; cnId < srcCnNum; cnId++)
{
double minVal, maxVal;
minMaxLoc(srcCn[cnId], &minVal, &maxVal);
int layerStart = (int)minVal;
int layerEnd = cvRound(maxVal);
int layersCount = (layerEnd - layerStart) / layerSize;
sum = Mat::zeros(sz, CV_32FC1);
if (isDT)
{
resCn[cnId] = Mat::zeros(sz, CV_8UC1);
reached = Mat::zeros(sz, CV_8UC1);
}
else
{
resLayers.resize(layersCount);
}
Mat layer, layerFiltered;
for (int layerId = 0; layerId < layersCount; layerId++)
{
int curLevelVal = layerStart + layerId*layerSize;
if (src.depth() == CV_8U && layerSize == 1)
{
layer = (srcCn[cnId] == layerStart + layerId);
}
else
{
layer = (srcCn[cnId] >= curLevelVal) & (srcCn[cnId] < curLevelVal + layerSize);
}
if (isDT)
{
dtf->filter(layer, layerFiltered, CV_32F);
add(layerFiltered, sum, sum);
UpdateMedian_ParBody updateMedian(sum, reached, resCn[cnId], curLevelVal);
parallel_for_(Range(0, sz.height), updateMedian);
}
else
{
gf->filter(layer, resLayers[layerId], CV_32F);
add(sum, resLayers[layerId], sum);
}
}
if (!isDT)
{
resCn[cnId].create(sz, CV_8UC1);
ComputeMedian_ParBody computeMedian(resLayers, sum, resCn[cnId], layerStart, layerSize);
parallel_for_(Range(0, sz.height), computeMedian);
}
}
if (dDepth == -1) dDepth = src.depth();
if (dDepth == CV_8U)
{
merge(resCn, dst);
}
else
{
Mat res;
merge(resCn, res);
res.convertTo(dst, dDepth);
}
}
void WeightedMedianImpl::UpdateMedian_ParBody::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
float *sumLine = sum.ptr<float>(i);
uchar *resLine = res.ptr<uchar>(i);
uchar *reachedLine = reached.ptr<uchar>(i);
for (int j = 0; j < sum.cols; j++)
{
if (reachedLine[j])
{
continue;
}
else if (sumLine[j] >= medianLevel)
{
resLine[j] = (uchar) curLevelValue;
reachedLine[j] = 0xFF;
}
}
}
}
void WeightedMedianImpl::ComputeMedian_ParBody::operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
float *sumLine = sum.ptr<float>(i);
uchar *resLine = res.ptr<uchar>(i);
for (int j = 0; j < sum.cols; j++)
{
float curSum = 0.0f;
float medianSum = sumLine[j] / 2.0f;
int l = 0;
for (l = 0; l < (int)layersRes.size(); l++)
{
if (curSum >= medianSum)
break;
curSum += layersRes[l].at<float>(i, j);
}
resLine[j] = (uchar) (layersStart + l*layerSize);
}
}
}
CV_EXPORTS_W
Ptr<WeightedMedian> createWeightedMedianFilter(InputArray guide, double spatialSize, double colorSize, int filterType)
{
return Ptr<WeightedMedian>(new WeightedMedianImpl(guide, spatialSize, colorSize, filterType));
}
CV_EXPORTS_W void weightedMedianFilter(InputArray guide, InputArray src, OutputArray dst, double spatialSize, double colorSize, int filterType, double rangeQuantizer)
{
WeightedMedian *wm = new WeightedMedianImpl(guide, spatialSize, colorSize, filterType);
wm->filter(src, dst, rangeQuantizer);
delete wm;
}
}

@ -0,0 +1,173 @@
#include "test_precomp.hpp"
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace testing;
#ifndef SQR
#define SQR(x) ((x)*(x))
#endif
namespace cvtest
{
static string getOpenCVExtraDir()
{
return cvtest::TS::ptr()->get_data_path();
}
static void checkSimilarity(InputArray res, InputArray ref)
{
double normInf = cvtest::norm(res, ref, NORM_INF);
double normL2 = cvtest::norm(res, ref, NORM_L2) / res.total();
EXPECT_LE(normInf, 1);
EXPECT_LE(normL2, 1.0 / 64);
}
TEST(AdaptiveManifoldTest, SplatSurfaceAccuracy)
{
RNG rnd(0);
for (int i = 0; i < 10; i++)
{
Size sz(rnd.uniform(512, 1024), rnd.uniform(512, 1024));
int guideCn = rnd.uniform(1, 8);
Mat guide(sz, CV_MAKE_TYPE(CV_32F, guideCn));
randu(guide, 0, 1);
Scalar surfaceValue;
int srcCn = rnd.uniform(1, 4);
rnd.fill(surfaceValue, RNG::UNIFORM, 0, 255);
Mat src(sz, CV_MAKE_TYPE(CV_8U, srcCn), surfaceValue);
double sigma_s = rnd.uniform(1.0, 50.0);
double sigma_r = rnd.uniform(0.1, 0.9);
Mat res;
amFilter(guide, src, res, sigma_s, sigma_r, false);
double normInf = cvtest::norm(src, res, NORM_INF);
EXPECT_EQ(normInf, 0);
}
}
TEST(AdaptiveManifoldTest, AuthorsReferenceAccuracy)
{
String srcImgPath = "cv/edgefilter/kodim23.png";
String refPaths[] =
{
"cv/edgefilter/amf/kodim23_amf_ss5_sr0.3_ref.png",
"cv/edgefilter/amf/kodim23_amf_ss30_sr0.1_ref.png",
"cv/edgefilter/amf/kodim23_amf_ss50_sr0.3_ref.png"
};
pair<double, double> refParams[] =
{
make_pair(5.0, 0.3),
make_pair(30.0, 0.1),
make_pair(50.0, 0.3)
};
String refOutliersPaths[] =
{
"cv/edgefilter/amf/kodim23_amf_ss5_sr0.1_outliers_ref.png",
"cv/edgefilter/amf/kodim23_amf_ss15_sr0.3_outliers_ref.png",
"cv/edgefilter/amf/kodim23_amf_ss50_sr0.5_outliers_ref.png"
};
pair<double, double> refOutliersParams[] =
{
make_pair(5.0, 0.1),
make_pair(15.0, 0.3),
make_pair(50.0, 0.5),
};
Mat srcImg = imread(getOpenCVExtraDir() + srcImgPath);
ASSERT_TRUE(!srcImg.empty());
for (int i = 0; i < 3; i++)
{
Mat refRes = imread(getOpenCVExtraDir() + refPaths[i]);
double sigma_s = refParams[i].first;
double sigma_r = refParams[i].second;
ASSERT_TRUE(!refRes.empty());
Mat res;
Ptr<AdaptiveManifoldFilter> amf = createAMFilter(sigma_s, sigma_r, false);
amf->setBool("use_RNG", false);
amf->filter(srcImg, res, srcImg);
amf->collectGarbage();
checkSimilarity(res, refRes);
}
for (int i = 0; i < 3; i++)
{
Mat refRes = imread(getOpenCVExtraDir() + refOutliersPaths[i]);
double sigma_s = refOutliersParams[i].first;
double sigma_r = refOutliersParams[i].second;
ASSERT_TRUE(!refRes.empty());
Mat res;
Ptr<AdaptiveManifoldFilter> amf = createAMFilter(sigma_s, sigma_r, true);
amf->setBool("use_RNG", false);
amf->filter(srcImg, res, srcImg);
amf->collectGarbage();
checkSimilarity(res, refRes);
}
}
typedef tuple<string, string> AMRefTestParams;
typedef TestWithParam<AMRefTestParams> AdaptiveManifoldRefImplTest;
Ptr<AdaptiveManifoldFilter> createAMFilterRefImpl(double sigma_s, double sigma_r, bool adjust_outliers = false);
TEST_P(AdaptiveManifoldRefImplTest, RefImplAccuracy)
{
AMRefTestParams params = GetParam();
string guideFileName = get<0>(params);
string srcFileName = get<1>(params);
Mat guide = imread(getOpenCVExtraDir() + guideFileName);
Mat src = imread(getOpenCVExtraDir() + srcFileName);
ASSERT_TRUE(!guide.empty() && !src.empty());
int seed = 10 * guideFileName.length() + srcFileName.length();
RNG rnd(seed);
//inconsistent downsample/upsample operations in reference implementation
Size dstSize((guide.cols + 15) & ~15, (guide.rows + 15) & ~15);
resize(guide, guide, dstSize);
resize(src, src, dstSize);
for (int iter = 0; iter < 6; iter++)
{
double sigma_s = rnd.uniform(1.0, 50.0);
double sigma_r = rnd.uniform(0.1, 0.9);
bool adjust_outliers = (iter % 2 == 0);
Mat res;
amFilter(guide, src, res, sigma_s, sigma_r, adjust_outliers);
Mat resRef;
Ptr<AdaptiveManifoldFilter> amf = createAMFilterRefImpl(sigma_s, sigma_r, adjust_outliers);
amf->filter(src, resRef, guide);
checkSimilarity(res, resRef);
}
}
INSTANTIATE_TEST_CASE_P(TypicalSet, AdaptiveManifoldRefImplTest,
Combine(
Values("cv/shared/lena.png", "cv/edgefilter/kodim23.png", "cv/npr/test4.png"),
Values("cv/shared/lena.png", "cv/edgefilter/kodim23.png", "cv/npr/test4.png")
));
}

@ -0,0 +1,944 @@
/*
* The MIT License(MIT)
*
* Copyright(c) 2013 Vladislav Vinogradov
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files(the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "precomp.hpp"
#include <opencv2/core/private.hpp>
#include <cmath>
using std::numeric_limits;
using namespace cv;
namespace
{
struct Buf
{
Mat_<Point3f> eta_1;
Mat_<uchar> cluster_1;
Mat_<Point3f> tilde_dst;
Mat_<float> alpha;
Mat_<Point3f> diff;
Mat_<Point3f> dst;
Mat_<float> V;
Mat_<Point3f> dIcdx;
Mat_<Point3f> dIcdy;
Mat_<float> dIdx;
Mat_<float> dIdy;
Mat_<float> dHdx;
Mat_<float> dVdy;
Mat_<float> t;
Mat_<float> theta_masked;
Mat_<Point3f> mul;
Mat_<Point3f> numerator;
Mat_<float> denominator;
Mat_<Point3f> numerator_filtered;
Mat_<float> denominator_filtered;
Mat_<Point3f> X;
Mat_<Point3f> eta_k_small;
Mat_<Point3f> eta_k_big;
Mat_<Point3f> X_squared;
Mat_<float> pixel_dist_to_manifold_squared;
Mat_<float> gaussian_distance_weights;
Mat_<Point3f> Psi_splat;
Mat_<Vec4f> Psi_splat_joined;
Mat_<Vec4f> Psi_splat_joined_resized;
Mat_<Vec4f> blurred_projected_values;
Mat_<Point3f> w_ki_Psi_blur;
Mat_<float> w_ki_Psi_blur_0;
Mat_<Point3f> w_ki_Psi_blur_resized;
Mat_<float> w_ki_Psi_blur_0_resized;
Mat_<float> rand_vec;
Mat_<float> v1;
Mat_<float> Nx_v1_mult;
Mat_<float> theta;
std::vector<Mat_<Point3f> > eta_minus;
std::vector<Mat_<uchar> > cluster_minus;
std::vector<Mat_<Point3f> > eta_plus;
std::vector<Mat_<uchar> > cluster_plus;
void release();
};
void Buf::release()
{
eta_1.release();
cluster_1.release();
tilde_dst.release();
alpha.release();
diff.release();
dst.release();
V.release();
dIcdx.release();
dIcdy.release();
dIdx.release();
dIdy.release();
dHdx.release();
dVdy.release();
t.release();
theta_masked.release();
mul.release();
numerator.release();
denominator.release();
numerator_filtered.release();
denominator_filtered.release();
X.release();
eta_k_small.release();
eta_k_big.release();
X_squared.release();
pixel_dist_to_manifold_squared.release();
gaussian_distance_weights.release();
Psi_splat.release();
Psi_splat_joined.release();
Psi_splat_joined_resized.release();
blurred_projected_values.release();
w_ki_Psi_blur.release();
w_ki_Psi_blur_0.release();
w_ki_Psi_blur_resized.release();
w_ki_Psi_blur_0_resized.release();
rand_vec.release();
v1.release();
Nx_v1_mult.release();
theta.release();
eta_minus.clear();
cluster_minus.clear();
eta_plus.clear();
cluster_plus.clear();
}
class AdaptiveManifoldFilterRefImpl : public AdaptiveManifoldFilter
{
public:
AlgorithmInfo* info() const;
AdaptiveManifoldFilterRefImpl();
void filter(InputArray src, OutputArray dst, InputArray joint);
void collectGarbage();
protected:
bool adjust_outliers_;
double sigma_s_;
double sigma_r_;
int tree_height_;
int num_pca_iterations_;
bool useRNG;
private:
void buildManifoldsAndPerformFiltering(const Mat_<Point3f>& eta_k, const Mat_<uchar>& cluster_k, int current_tree_level);
Buf buf_;
Mat_<Point3f> src_f_;
Mat_<Point3f> src_joint_f_;
Mat_<Point3f> sum_w_ki_Psi_blur_;
Mat_<float> sum_w_ki_Psi_blur_0_;
Mat_<float> min_pixel_dist_to_manifold_squared_;
RNG rng_;
int cur_tree_height_;
float sigma_r_over_sqrt_2_;
};
AdaptiveManifoldFilterRefImpl::AdaptiveManifoldFilterRefImpl()
{
sigma_s_ = 16.0;
sigma_r_ = 0.2;
tree_height_ = -1;
num_pca_iterations_ = 1;
adjust_outliers_ = false;
useRNG = true;
}
void AdaptiveManifoldFilterRefImpl::collectGarbage()
{
buf_.release();
src_f_.release();
src_joint_f_.release();
sum_w_ki_Psi_blur_.release();
sum_w_ki_Psi_blur_0_.release();
min_pixel_dist_to_manifold_squared_.release();
}
CV_INIT_ALGORITHM(AdaptiveManifoldFilterRefImpl, "AdaptiveManifoldFilterRefImpl",
obj.info()->addParam(obj, "sigma_s", obj.sigma_s_, false, 0, 0, "Filter spatial standard deviation");
obj.info()->addParam(obj, "sigma_r", obj.sigma_r_, false, 0, 0, "Filter range standard deviation");
obj.info()->addParam(obj, "tree_height", obj.tree_height_, false, 0, 0, "Height of the manifold tree (default = -1 : automatically computed)");
obj.info()->addParam(obj, "num_pca_iterations", obj.num_pca_iterations_, false, 0, 0, "Number of iterations to computed the eigenvector v1");
obj.info()->addParam(obj, "adjust_outliers", obj.adjust_outliers_, false, 0, 0, "Specify supress outliers using Eq. 9 or not");
obj.info()->addParam(obj, "use_RNG", obj.useRNG, false, 0, 0, "Specify use random to compute eigenvector or not");)
inline double Log2(double n)
{
return log(n) / log(2.0);
}
inline int computeManifoldTreeHeight(double sigma_s, double sigma_r)
{
const double Hs = floor(Log2(sigma_s)) - 1.0;
const double Lr = 1.0 - sigma_r;
return max(2, static_cast<int>(ceil(Hs * Lr)));
}
void ensureSizeIsEnough(int rows, int cols, int type, Mat& m)
{
if (m.empty() || m.type() != type || m.data != m.datastart)
m.create(rows, cols, type);
else
{
const size_t esz = m.elemSize();
const ptrdiff_t delta2 = m.dataend - m.datastart;
const size_t minstep = m.cols * esz;
Size wholeSize;
wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / m.step + 1), m.rows);
wholeSize.width = std::max(static_cast<int>((delta2 - m.step * (wholeSize.height - 1)) / esz), m.cols);
if (wholeSize.height < rows || wholeSize.width < cols)
m.create(rows, cols, type);
else
{
m.cols = cols;
m.rows = rows;
}
}
}
inline void ensureSizeIsEnough(Size size, int type, Mat& m)
{
ensureSizeIsEnough(size.height, size.width, type, m);
}
template <typename T>
inline void ensureSizeIsEnough(int rows, int cols, Mat_<T>& m)
{
if (m.empty() || m.data != m.datastart)
m.create(rows, cols);
else
{
const size_t esz = m.elemSize();
const ptrdiff_t delta2 = m.dataend - m.datastart;
const size_t minstep = m.cols * esz;
Size wholeSize;
wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / m.step + 1), m.rows);
wholeSize.width = std::max(static_cast<int>((delta2 - m.step * (wholeSize.height - 1)) / esz), m.cols);
if (wholeSize.height < rows || wholeSize.width < cols)
m.create(rows, cols);
else
{
m.cols = cols;
m.rows = rows;
}
}
}
template <typename T>
inline void ensureSizeIsEnough(Size size, Mat_<T>& m)
{
ensureSizeIsEnough(size.height, size.width, m);
}
template <typename T>
void h_filter(const Mat_<T>& src, Mat_<T>& dst, float sigma)
{
CV_DbgAssert( src.depth() == CV_32F );
const float a = exp(-sqrt(2.0f) / sigma);
ensureSizeIsEnough(src.size(), dst);
for (int y = 0; y < src.rows; ++y)
{
const T* src_row = src[y];
T* dst_row = dst[y];
dst_row[0] = src_row[0];
for (int x = 1; x < src.cols; ++x)
{
//dst_row[x] = src_row[x] + a * (src_row[x - 1] - src_row[x]);
dst_row[x] = src_row[x] + a * (dst_row[x - 1] - src_row[x]); //!!!
}
for (int x = src.cols - 2; x >= 0; --x)
{
dst_row[x] = dst_row[x] + a * (dst_row[x + 1] - dst_row[x]);
}
}
for (int y = 1; y < src.rows; ++y)
{
T* dst_cur_row = dst[y];
T* dst_prev_row = dst[y - 1];
for (int x = 0; x < src.cols; ++x)
{
dst_cur_row[x] = dst_cur_row[x] + a * (dst_prev_row[x] - dst_cur_row[x]);
}
}
for (int y = src.rows - 2; y >= 0; --y)
{
T* dst_cur_row = dst[y];
T* dst_prev_row = dst[y + 1];
for (int x = 0; x < src.cols; ++x)
{
dst_cur_row[x] = dst_cur_row[x] + a * (dst_prev_row[x] - dst_cur_row[x]);
}
}
}
template <typename T>
void rdivide(const Mat_<T>& a, const Mat_<float>& b, Mat_<T>& dst)
{
CV_DbgAssert( a.depth() == CV_32F );
CV_DbgAssert( a.size() == b.size() );
ensureSizeIsEnough(a.size(), dst);
dst.setTo(0);
for (int y = 0; y < a.rows; ++y)
{
const T* a_row = a[y];
const float* b_row = b[y];
T* dst_row = dst[y];
for (int x = 0; x < a.cols; ++x)
{
//if (b_row[x] > numeric_limits<float>::epsilon())
dst_row[x] = a_row[x] * (1.0f / b_row[x]);
}
}
}
template <typename T>
void times(const Mat_<T>& a, const Mat_<float>& b, Mat_<T>& dst)
{
CV_DbgAssert( a.depth() == CV_32F );
CV_DbgAssert( a.size() == b.size() );
ensureSizeIsEnough(a.size(), dst);
for (int y = 0; y < a.rows; ++y)
{
const T* a_row = a[y];
const float* b_row = b[y];
T* dst_row = dst[y];
for (int x = 0; x < a.cols; ++x)
{
dst_row[x] = a_row[x] * b_row[x];
}
}
}
void AdaptiveManifoldFilterRefImpl::filter(InputArray _src, OutputArray _dst, InputArray _joint)
{
const Mat src = _src.getMat();
const Mat src_joint = _joint.getMat();
const Size srcSize = src.size();
CV_Assert( src.type() == CV_8UC3 );
CV_Assert( src_joint.empty() || (src_joint.type() == src.type() && src_joint.size() == srcSize) );
ensureSizeIsEnough(srcSize, src_f_);
src.convertTo(src_f_, src_f_.type(), 1.0 / 255.0);
ensureSizeIsEnough(srcSize, sum_w_ki_Psi_blur_);
sum_w_ki_Psi_blur_.setTo(Scalar::all(0));
ensureSizeIsEnough(srcSize, sum_w_ki_Psi_blur_0_);
sum_w_ki_Psi_blur_0_.setTo(Scalar::all(0));
ensureSizeIsEnough(srcSize, min_pixel_dist_to_manifold_squared_);
min_pixel_dist_to_manifold_squared_.setTo(Scalar::all(numeric_limits<float>::max()));
// If the tree_height was not specified, compute it using Eq. (10) of our paper.
cur_tree_height_ = tree_height_ > 0 ? tree_height_ : computeManifoldTreeHeight(sigma_s_, sigma_r_);
// If no joint signal was specified, use the original signal
ensureSizeIsEnough(srcSize, src_joint_f_);
if (src_joint.empty())
src_f_.copyTo(src_joint_f_);
else
src_joint.convertTo(src_joint_f_, src_joint_f_.type(), 1.0 / 255.0);
// Use the center pixel as seed to random number generation.
const double seedCoef = src_joint_f_(src_joint_f_.rows / 2, src_joint_f_.cols / 2).x;
const uint64 baseCoef = numeric_limits<uint64>::max() / 0xFFFF;
rng_.state = static_cast<uint64>(baseCoef*seedCoef);
// Dividing the covariance matrix by 2 is equivalent to dividing the standard deviations by sqrt(2).
sigma_r_over_sqrt_2_ = static_cast<float>(sigma_r_ / sqrt(2.0));
// Algorithm 1, Step 1: compute the first manifold by low-pass filtering.
h_filter(src_joint_f_, buf_.eta_1, static_cast<float>(sigma_s_));
ensureSizeIsEnough(srcSize, buf_.cluster_1);
buf_.cluster_1.setTo(Scalar::all(1));
buf_.eta_minus.resize(cur_tree_height_);
buf_.cluster_minus.resize(cur_tree_height_);
buf_.eta_plus.resize(cur_tree_height_);
buf_.cluster_plus.resize(cur_tree_height_);
buildManifoldsAndPerformFiltering(buf_.eta_1, buf_.cluster_1, 1);
// Compute the filter response by normalized convolution -- Eq. (4)
rdivide(sum_w_ki_Psi_blur_, sum_w_ki_Psi_blur_0_, buf_.tilde_dst);
if (!adjust_outliers_)
{
buf_.tilde_dst.convertTo(_dst, CV_8U, 255.0);
}
else
{
// Adjust the filter response for outlier pixels -- Eq. (10)
ensureSizeIsEnough(srcSize, buf_.alpha);
exp(min_pixel_dist_to_manifold_squared_ * (-0.5 / sigma_r_ / sigma_r_), buf_.alpha);
ensureSizeIsEnough(srcSize, buf_.diff);
subtract(buf_.tilde_dst, src_f_, buf_.diff);
times(buf_.diff, buf_.alpha, buf_.diff);
ensureSizeIsEnough(srcSize, buf_.dst);
add(src_f_, buf_.diff, buf_.dst);
buf_.dst.convertTo(_dst, CV_8U, 255.0);
}
}
inline double floor_to_power_of_two(double r)
{
return pow(2.0, floor(Log2(r)));
}
void channelsSum(const Mat_<Point3f>& src, Mat_<float>& dst)
{
ensureSizeIsEnough(src.size(), dst);
for (int y = 0; y < src.rows; ++y)
{
const Point3f* src_row = src[y];
float* dst_row = dst[y];
for (int x = 0; x < src.cols; ++x)
{
const Point3f src_val = src_row[x];
dst_row[x] = src_val.x + src_val.y + src_val.z;
}
}
}
void phi(const Mat_<float>& src, Mat_<float>& dst, float sigma)
{
ensureSizeIsEnough(src.size(), dst);
for (int y = 0; y < dst.rows; ++y)
{
const float* src_row = src[y];
float* dst_row = dst[y];
for (int x = 0; x < dst.cols; ++x)
{
dst_row[x] = exp(-0.5f * src_row[x] / sigma / sigma);
}
}
}
void catCn(const Mat_<Point3f>& a, const Mat_<float>& b, Mat_<Vec4f>& dst)
{
ensureSizeIsEnough(a.size(), dst);
for (int y = 0; y < a.rows; ++y)
{
const Point3f* a_row = a[y];
const float* b_row = b[y];
Vec4f* dst_row = dst[y];
for (int x = 0; x < a.cols; ++x)
{
const Point3f a_val = a_row[x];
const float b_val = b_row[x];
dst_row[x] = Vec4f(a_val.x, a_val.y, a_val.z, b_val);
}
}
}
void diffY(const Mat_<Point3f>& src, Mat_<Point3f>& dst)
{
ensureSizeIsEnough(src.rows - 1, src.cols, dst);
for (int y = 0; y < src.rows - 1; ++y)
{
const Point3f* src_cur_row = src[y];
const Point3f* src_next_row = src[y + 1];
Point3f* dst_row = dst[y];
for (int x = 0; x < src.cols; ++x)
{
dst_row[x] = src_next_row[x] - src_cur_row[x];
}
}
}
void diffX(const Mat_<Point3f>& src, Mat_<Point3f>& dst)
{
ensureSizeIsEnough(src.rows, src.cols - 1, dst);
for (int y = 0; y < src.rows; ++y)
{
const Point3f* src_row = src[y];
Point3f* dst_row = dst[y];
for (int x = 0; x < src.cols - 1; ++x)
{
dst_row[x] = src_row[x + 1] - src_row[x];
}
}
}
void TransformedDomainRecursiveFilter(const Mat_<Vec4f>& I, const Mat_<float>& DH, const Mat_<float>& DV, Mat_<Vec4f>& dst, float sigma, Buf& buf)
{
CV_DbgAssert( I.size() == DH.size() );
const float a = exp(-sqrt(2.0f) / sigma);
ensureSizeIsEnough(I.size(), dst);
I.copyTo(dst);
ensureSizeIsEnough(DH.size(), buf.V);
for (int y = 0; y < DH.rows; ++y)
{
const float* D_row = DH[y];
float* V_row = buf.V[y];
for (int x = 0; x < DH.cols; ++x)
{
V_row[x] = pow(a, D_row[x]);
}
}
for (int y = 0; y < I.rows; ++y)
{
const float* V_row = buf.V[y];
Vec4f* dst_row = dst[y];
for (int x = 1; x < I.cols; ++x)
{
Vec4f dst_cur_val = dst_row[x];
const Vec4f dst_prev_val = dst_row[x - 1];
const float V_val = V_row[x];
dst_cur_val[0] += V_val * (dst_prev_val[0] - dst_cur_val[0]);
dst_cur_val[1] += V_val * (dst_prev_val[1] - dst_cur_val[1]);
dst_cur_val[2] += V_val * (dst_prev_val[2] - dst_cur_val[2]);
dst_cur_val[3] += V_val * (dst_prev_val[3] - dst_cur_val[3]);
dst_row[x] = dst_cur_val;
}
for (int x = I.cols - 2; x >= 0; --x)
{
Vec4f dst_cur_val = dst_row[x];
const Vec4f dst_prev_val = dst_row[x + 1];
//const float V_val = V_row[x];
const float V_val = V_row[x+1];
dst_cur_val[0] += V_val * (dst_prev_val[0] - dst_cur_val[0]);
dst_cur_val[1] += V_val * (dst_prev_val[1] - dst_cur_val[1]);
dst_cur_val[2] += V_val * (dst_prev_val[2] - dst_cur_val[2]);
dst_cur_val[3] += V_val * (dst_prev_val[3] - dst_cur_val[3]);
dst_row[x] = dst_cur_val;
}
}
for (int y = 0; y < DV.rows; ++y)
{
const float* D_row = DV[y];
float* V_row = buf.V[y];
for (int x = 0; x < DV.cols; ++x)
{
V_row[x] = pow(a, D_row[x]);
}
}
for (int y = 1; y < I.rows; ++y)
{
const float* V_row = buf.V[y];
Vec4f* dst_cur_row = dst[y];
Vec4f* dst_prev_row = dst[y - 1];
for (int x = 0; x < I.cols; ++x)
{
Vec4f dst_cur_val = dst_cur_row[x];
const Vec4f dst_prev_val = dst_prev_row[x];
const float V_val = V_row[x];
dst_cur_val[0] += V_val * (dst_prev_val[0] - dst_cur_val[0]);
dst_cur_val[1] += V_val * (dst_prev_val[1] - dst_cur_val[1]);
dst_cur_val[2] += V_val * (dst_prev_val[2] - dst_cur_val[2]);
dst_cur_val[3] += V_val * (dst_prev_val[3] - dst_cur_val[3]);
dst_cur_row[x] = dst_cur_val;
}
}
for (int y = I.rows - 2; y >= 0; --y)
{
//const float* V_row = buf.V[y];
const float* V_row = buf.V[y + 1];
Vec4f* dst_cur_row = dst[y];
Vec4f* dst_prev_row = dst[y + 1];
for (int x = 0; x < I.cols; ++x)
{
Vec4f dst_cur_val = dst_cur_row[x];
const Vec4f dst_prev_val = dst_prev_row[x];
const float V_val = V_row[x];
dst_cur_val[0] += V_val * (dst_prev_val[0] - dst_cur_val[0]);
dst_cur_val[1] += V_val * (dst_prev_val[1] - dst_cur_val[1]);
dst_cur_val[2] += V_val * (dst_prev_val[2] - dst_cur_val[2]);
dst_cur_val[3] += V_val * (dst_prev_val[3] - dst_cur_val[3]);
dst_cur_row[x] = dst_cur_val;
}
}
}
void RF_filter(const Mat_<Vec4f>& src, const Mat_<Point3f>& src_joint, Mat_<Vec4f>& dst, float sigma_s, float sigma_r, Buf& buf)
{
CV_DbgAssert( src_joint.size() == src.size() );
diffX(src_joint, buf.dIcdx);
diffY(src_joint, buf.dIcdy);
ensureSizeIsEnough(src.size(), buf.dIdx);
buf.dIdx.setTo(Scalar::all(0));
for (int y = 0; y < src.rows; ++y)
{
const Point3f* dIcdx_row = buf.dIcdx[y];
float* dIdx_row = buf.dIdx[y];
for (int x = 1; x < src.cols; ++x)
{
const Point3f val = dIcdx_row[x - 1];
dIdx_row[x] = val.dot(val);
}
}
ensureSizeIsEnough(src.size(), buf.dIdy);
buf.dIdy.setTo(Scalar::all(0));
for (int y = 1; y < src.rows; ++y)
{
const Point3f* dIcdy_row = buf.dIcdy[y - 1];
float* dIdy_row = buf.dIdy[y];
for (int x = 0; x < src.cols; ++x)
{
const Point3f val = dIcdy_row[x];
dIdy_row[x] = val.dot(val);
}
}
ensureSizeIsEnough(buf.dIdx.size(), buf.dHdx);
buf.dIdx.convertTo(buf.dHdx, buf.dHdx.type(), (sigma_s / sigma_r) * (sigma_s / sigma_r), (sigma_s / sigma_s) * (sigma_s / sigma_s));
sqrt(buf.dHdx, buf.dHdx);
ensureSizeIsEnough(buf.dIdy.size(), buf.dVdy);
buf.dIdy.convertTo(buf.dVdy, buf.dVdy.type(), (sigma_s / sigma_r) * (sigma_s / sigma_r), (sigma_s / sigma_s) * (sigma_s / sigma_s));
sqrt(buf.dVdy, buf.dVdy);
ensureSizeIsEnough(src.size(), dst);
src.copyTo(dst);
TransformedDomainRecursiveFilter(src, buf.dHdx, buf.dVdy, dst, sigma_s, buf);
}
void split_3_1(const Mat_<Vec4f>& src, Mat_<Point3f>& dst1, Mat_<float>& dst2)
{
ensureSizeIsEnough(src.size(), dst1);
ensureSizeIsEnough(src.size(), dst2);
for (int y = 0; y < src.rows; ++y)
{
const Vec4f* src_row = src[y];
Point3f* dst1_row = dst1[y];
float* dst2_row = dst2[y];
for (int x = 0; x < src.cols; ++x)
{
Vec4f val = src_row[x];
dst1_row[x] = Point3f(val[0], val[1], val[2]);
dst2_row[x] = val[3];
}
}
}
void computeEigenVector(const Mat_<float>& X, const Mat_<uchar>& mask, Mat_<float>& dst, int num_pca_iterations, const Mat_<float>& rand_vec, Buf& buf)
{
CV_DbgAssert( X.cols == rand_vec.cols );
CV_DbgAssert( X.rows == mask.size().area() );
CV_DbgAssert( rand_vec.rows == 1 );
ensureSizeIsEnough(rand_vec.size(), dst);
rand_vec.copyTo(dst);
ensureSizeIsEnough(X.size(), buf.t);
float* dst_row = dst[0];
for (int i = 0; i < num_pca_iterations; ++i)
{
buf.t.setTo(Scalar::all(0));
for (int y = 0, ind = 0; y < mask.rows; ++y)
{
const uchar* mask_row = mask[y];
for (int x = 0; x < mask.cols; ++x, ++ind)
{
if (mask_row[x])
{
const float* X_row = X[ind];
float* t_row = buf.t[ind];
float dots = 0.0;
for (int c = 0; c < X.cols; ++c)
dots += dst_row[c] * X_row[c];
for (int c = 0; c < X.cols; ++c)
t_row[c] = dots * X_row[c];
}
}
}
dst.setTo(0.0);
for (int i = 0; i < X.rows; ++i)
{
const float* t_row = buf.t[i];
for (int c = 0; c < X.cols; ++c)
{
dst_row[c] += t_row[c];
}
}
}
double n = norm(dst);
divide(dst, n, dst);
}
void calcEta(const Mat_<Point3f>& src_joint_f, const Mat_<float>& theta, const Mat_<uchar>& cluster, Mat_<Point3f>& dst, float sigma_s, float df, Buf& buf)
{
ensureSizeIsEnough(theta.size(), buf.theta_masked);
buf.theta_masked.setTo(Scalar::all(0));
theta.copyTo(buf.theta_masked, cluster);
times(src_joint_f, buf.theta_masked, buf.mul);
const Size nsz = Size(saturate_cast<int>(buf.mul.cols * (1.0 / df)), saturate_cast<int>(buf.mul.rows * (1.0 / df)));
ensureSizeIsEnough(nsz, buf.numerator);
resize(buf.mul, buf.numerator, Size(), 1.0 / df, 1.0 / df);
ensureSizeIsEnough(nsz, buf.denominator);
resize(buf.theta_masked, buf.denominator, Size(), 1.0 / df, 1.0 / df);
h_filter(buf.numerator, buf.numerator_filtered, sigma_s / df);
h_filter(buf.denominator, buf.denominator_filtered, sigma_s / df);
rdivide(buf.numerator_filtered, buf.denominator_filtered, dst);
}
void AdaptiveManifoldFilterRefImpl::buildManifoldsAndPerformFiltering(const Mat_<Point3f>& eta_k, const Mat_<uchar>& cluster_k, int current_tree_level)
{
// Compute downsampling factor
double df = min(sigma_s_ / 4.0, 256.0 * sigma_r_);
df = floor_to_power_of_two(df);
df = max(1.0, df);
// Splatting: project the pixel values onto the current manifold eta_k
if (eta_k.rows == src_joint_f_.rows)
{
ensureSizeIsEnough(src_joint_f_.size(), buf_.X);
subtract(src_joint_f_, eta_k, buf_.X);
const Size nsz = Size(saturate_cast<int>(eta_k.cols * (1.0 / df)), saturate_cast<int>(eta_k.rows * (1.0 / df)));
ensureSizeIsEnough(nsz, buf_.eta_k_small);
resize(eta_k, buf_.eta_k_small, Size(), 1.0 / df, 1.0 / df);
}
else
{
ensureSizeIsEnough(eta_k.size(), buf_.eta_k_small);
eta_k.copyTo(buf_.eta_k_small);
ensureSizeIsEnough(src_joint_f_.size(), buf_.eta_k_big);
resize(eta_k, buf_.eta_k_big, src_joint_f_.size());
ensureSizeIsEnough(src_joint_f_.size(), buf_.X);
subtract(src_joint_f_, buf_.eta_k_big, buf_.X);
}
// Project pixel colors onto the manifold -- Eq. (3), Eq. (5)
ensureSizeIsEnough(buf_.X.size(), buf_.X_squared);
multiply(buf_.X, buf_.X, buf_.X_squared);
channelsSum(buf_.X_squared, buf_.pixel_dist_to_manifold_squared);
phi(buf_.pixel_dist_to_manifold_squared, buf_.gaussian_distance_weights, sigma_r_over_sqrt_2_);
times(src_f_, buf_.gaussian_distance_weights, buf_.Psi_splat);
const Mat_<float>& Psi_splat_0 = buf_.gaussian_distance_weights;
// Save min distance to later perform adjustment of outliers -- Eq. (10)
if (adjust_outliers_)
{
cv::min(_InputArray(min_pixel_dist_to_manifold_squared_), _InputArray(buf_.pixel_dist_to_manifold_squared), _OutputArray(min_pixel_dist_to_manifold_squared_));
}
// Blurring: perform filtering over the current manifold eta_k
catCn(buf_.Psi_splat, Psi_splat_0, buf_.Psi_splat_joined);
ensureSizeIsEnough(buf_.eta_k_small.size(), buf_.Psi_splat_joined_resized);
resize(buf_.Psi_splat_joined, buf_.Psi_splat_joined_resized, buf_.eta_k_small.size());
RF_filter(buf_.Psi_splat_joined_resized, buf_.eta_k_small, buf_.blurred_projected_values, static_cast<float>(sigma_s_ / df), sigma_r_over_sqrt_2_, buf_);
split_3_1(buf_.blurred_projected_values, buf_.w_ki_Psi_blur, buf_.w_ki_Psi_blur_0);
// Slicing: gather blurred values from the manifold
// Since we perform splatting and slicing at the same points over the manifolds,
// the interpolation weights are equal to the gaussian weights used for splatting.
const Mat_<float>& w_ki = buf_.gaussian_distance_weights;
ensureSizeIsEnough(src_f_.size(), buf_.w_ki_Psi_blur_resized);
resize(buf_.w_ki_Psi_blur, buf_.w_ki_Psi_blur_resized, src_f_.size());
times(buf_.w_ki_Psi_blur_resized, w_ki, buf_.w_ki_Psi_blur_resized);
add(sum_w_ki_Psi_blur_, buf_.w_ki_Psi_blur_resized, sum_w_ki_Psi_blur_);
ensureSizeIsEnough(src_f_.size(), buf_.w_ki_Psi_blur_0_resized);
resize(buf_.w_ki_Psi_blur_0, buf_.w_ki_Psi_blur_0_resized, src_f_.size());
times(buf_.w_ki_Psi_blur_0_resized, w_ki, buf_.w_ki_Psi_blur_0_resized);
add(sum_w_ki_Psi_blur_0_, buf_.w_ki_Psi_blur_0_resized, sum_w_ki_Psi_blur_0_);
// Compute two new manifolds eta_minus and eta_plus
if (current_tree_level < cur_tree_height_)
{
// Algorithm 1, Step 2: compute the eigenvector v1
const Mat_<float> nX(src_joint_f_.size().area(), 3, (float*) buf_.X.data);
ensureSizeIsEnough(1, nX.cols, buf_.rand_vec);
if (useRNG)
{
rng_.fill(buf_.rand_vec, RNG::UNIFORM, -0.5, 0.5);
}
else
{
for (int i = 0; i < buf_.rand_vec.total(); i++)
buf_.rand_vec(0, i) = (i % 2 == 0) ? 0.5f : -0.5f;
}
computeEigenVector(nX, cluster_k, buf_.v1, num_pca_iterations_, buf_.rand_vec, buf_);
// Algorithm 1, Step 3: Segment pixels into two clusters -- Eq. (6)
ensureSizeIsEnough(nX.rows, buf_.v1.rows, buf_.Nx_v1_mult);
gemm(nX, buf_.v1, 1.0, noArray(), 0.0, buf_.Nx_v1_mult, GEMM_2_T);
const Mat_<float> dot(src_joint_f_.rows, src_joint_f_.cols, (float*) buf_.Nx_v1_mult.data);
Mat_<uchar>& cluster_minus = buf_.cluster_minus[current_tree_level];
ensureSizeIsEnough(dot.size(), cluster_minus);
compare(dot, 0, cluster_minus, CMP_LT);
bitwise_and(cluster_minus, cluster_k, cluster_minus);
Mat_<uchar>& cluster_plus = buf_.cluster_plus[current_tree_level];
ensureSizeIsEnough(dot.size(), cluster_plus);
//compare(dot, 0, cluster_plus, CMP_GT);
compare(dot, 0, cluster_plus, CMP_GE);
bitwise_and(cluster_plus, cluster_k, cluster_plus);
// Algorithm 1, Step 4: Compute new manifolds by weighted low-pass filtering -- Eq. (7-8)
ensureSizeIsEnough(w_ki.size(), buf_.theta);
buf_.theta.setTo(Scalar::all(1.0));
subtract(buf_.theta, w_ki, buf_.theta);
Mat_<Point3f>& eta_minus = buf_.eta_minus[current_tree_level];
calcEta(src_joint_f_, buf_.theta, cluster_minus, eta_minus, sigma_s_, df, buf_);
Mat_<Point3f>& eta_plus = buf_.eta_plus[current_tree_level];
calcEta(src_joint_f_, buf_.theta, cluster_plus, eta_plus, sigma_s_, df, buf_);
// Algorithm 1, Step 5: recursively build more manifolds.
buildManifoldsAndPerformFiltering(eta_minus, cluster_minus, current_tree_level + 1);
buildManifoldsAndPerformFiltering(eta_plus, cluster_plus, current_tree_level + 1);
}
}
}
namespace cvtest
{
Ptr<AdaptiveManifoldFilter> createAMFilterRefImpl(double sigma_s, double sigma_r, bool adjust_outliers)
{
Ptr<AdaptiveManifoldFilter> amf(new AdaptiveManifoldFilterRefImpl());
amf->set("sigma_s", sigma_s);
amf->set("sigma_r", sigma_r);
amf->set("adjust_outliers", adjust_outliers);
return amf;
}
}

@ -0,0 +1,219 @@
#include "test_precomp.hpp"
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace testing;
using namespace perf;
namespace cvtest
{
static string getOpenCVExtraDir()
{
return cvtest::TS::ptr()->get_data_path();
}
CV_ENUM(SupportedTypes, CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4);
CV_ENUM(ModeType, DTF_NC, DTF_IC, DTF_RF)
typedef tuple<Size, ModeType, SupportedTypes, SupportedTypes> DTParams;
Mat convertTypeAndSize(Mat src, int dstType, Size dstSize)
{
Mat dst;
CV_Assert(src.channels() == 3);
int dstChannels = CV_MAT_CN(dstType);
if (dstChannels == 1)
{
cvtColor(src, dst, COLOR_BGR2GRAY);
}
else if (dstChannels == 2)
{
Mat srcCn[3];
split(src, srcCn);
merge(srcCn, 2, dst);
}
else if (dstChannels == 3)
{
dst = src.clone();
}
else if (dstChannels == 4)
{
Mat srcCn[4];
split(src, srcCn);
srcCn[3] = srcCn[0].clone();
merge(srcCn, 4, dst);
}
dst.convertTo(dst, dstType);
resize(dst, dst, dstSize);
return dst;
}
TEST(DomainTransformTest, SplatSurfaceAccuracy)
{
static int dtModes[] = {DTF_NC, DTF_RF, DTF_IC};
RNG rnd(0);
for (int i = 0; i < 15; i++)
{
Size sz(rnd.uniform(512, 1024), rnd.uniform(512, 1024));
int guideCn = rnd.uniform(1, 4);
Mat guide(sz, CV_MAKE_TYPE(CV_32F, guideCn));
randu(guide, 0, 255);
Scalar surfaceValue;
int srcCn = rnd.uniform(1, 4);
rnd.fill(surfaceValue, RNG::UNIFORM, 0, 255);
Mat src(sz, CV_MAKE_TYPE(CV_8U, srcCn), surfaceValue);
double sigma_s = rnd.uniform(1.0, 100.0);
double sigma_r = rnd.uniform(1.0, 100.0);
int mode = dtModes[i%3];
Mat res;
dtFilter(guide, src, res, sigma_s, sigma_r, mode, 1);
double normL1 = cvtest::norm(src, res, NORM_L1)/src.total()/src.channels();
EXPECT_LE(normL1, 1.0/64);
}
}
typedef TestWithParam<DTParams> DomainTransformTest;
TEST_P(DomainTransformTest, MultiThreadReproducibility)
{
if (cv::getNumberOfCPUs() == 1)
return;
double MAX_DIF = 1.0;
double MAX_MEAN_DIF = 1.0 / 256.0;
int loopsCount = 2;
RNG rng(0);
DTParams params = GetParam();
Size size = get<0>(params);
int mode = get<1>(params);
int guideType = get<2>(params);
int srcType = get<3>(params);
Mat original = imread(getOpenCVExtraDir() + "cv/edgefilter/statue.png");
Mat guide = convertTypeAndSize(original, guideType, size);
Mat src = convertTypeAndSize(original, srcType, size);
for (int iter = 0; iter <= loopsCount; iter++)
{
double ss = rng.uniform(0.0, 100.0);
double sc = rng.uniform(0.0, 100.0);
cv::setNumThreads(cv::getNumberOfCPUs());
Mat resMultithread;
dtFilter(guide, src, resMultithread, ss, sc, mode);
cv::setNumThreads(1);
Mat resSingleThread;
dtFilter(guide, src, resSingleThread, ss, sc, mode);
EXPECT_LE(cv::norm(resSingleThread, resMultithread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultithread, NORM_L1), MAX_MEAN_DIF*src.total());
}
}
INSTANTIATE_TEST_CASE_P(FullSet, DomainTransformTest,
Combine(Values(szODD, szQVGA), ModeType::all(), SupportedTypes::all(), SupportedTypes::all())
);
template<typename SrcVec>
Mat getChessMat1px(Size sz, double whiteIntensity = 255)
{
typedef typename DataType<SrcVec>::channel_type SrcType;
Mat dst(sz, DataType<SrcVec>::type);
SrcVec black = SrcVec::all(0);
SrcVec white = SrcVec::all((SrcType)whiteIntensity);
for (int i = 0; i < dst.rows; i++)
for (int j = 0; j < dst.cols; j++)
dst.at<SrcVec>(i, j) = ((i + j) % 2) ? white : black;
return dst;
}
TEST(DomainTransformTest, ChessBoard_NC_accuracy)
{
RNG rng(0);
double MAX_DIF = 1;
Size sz = szVGA;
double ss = 80;
double sc = 60;
Mat srcb = randomMat(rng, sz, CV_8UC4, 0, 255, true);
Mat srcf = randomMat(rng, sz, CV_32FC4, 0, 255, true);
Mat chessb = getChessMat1px<Vec3b>(sz);
Mat dstb, dstf;
dtFilter(chessb, srcb.clone(), dstb, ss, sc, DTF_NC);
dtFilter(chessb, srcf.clone(), dstf, ss, sc, DTF_NC);
EXPECT_LE(cv::norm(srcb, dstb, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(srcf, dstf, NORM_INF), MAX_DIF);
}
TEST(DomainTransformTest, BoxFilter_NC_accuracy)
{
double MAX_DIF = 1;
int radius = 5;
double sc = 1.0;
double ss = 1.01*radius / sqrt(3.0);
Mat src = imread(getOpenCVExtraDir() + "cv/edgefilter/statue.png");
ASSERT_TRUE(!src.empty());
Mat1b guide(src.size(), 200);
Mat res_dt, res_box;
blur(src, res_box, Size(2 * radius + 1, 2 * radius + 1));
dtFilter(guide, src, res_dt, ss, sc, DTF_NC, 1);
EXPECT_LE(cv::norm(res_dt, res_box, NORM_L2), MAX_DIF*src.total());
}
TEST(DomainTransformTest, AuthorReferenceAccuracy)
{
string dir = getOpenCVExtraDir() + "cv/edgefilter";
double ss = 30;
double sc = 0.2 * 255;
Mat src = imread(dir + "/statue.png");
Mat ref_NC = imread(dir + "/dt/authors_statue_NC_ss30_sc0.2.png");
Mat ref_IC = imread(dir + "/dt/authors_statue_IC_ss30_sc0.2.png");
Mat ref_RF = imread(dir + "/dt/authors_statue_RF_ss30_sc0.2.png");
ASSERT_FALSE(src.empty());
ASSERT_FALSE(ref_NC.empty());
ASSERT_FALSE(ref_IC.empty());
ASSERT_FALSE(ref_RF.empty());
cv::setNumThreads(cv::getNumberOfCPUs());
Mat res_NC, res_IC, res_RF;
dtFilter(src, src, res_NC, ss, sc, DTF_NC);
dtFilter(src, src, res_IC, ss, sc, DTF_IC);
dtFilter(src, src, res_RF, ss, sc, DTF_RF);
double totalMaxError = 1.0/64.0*src.total();
EXPECT_LE(cvtest::norm(res_NC, ref_NC, NORM_L2), totalMaxError);
EXPECT_LE(cvtest::norm(res_NC, ref_NC, NORM_INF), 1);
EXPECT_LE(cvtest::norm(res_IC, ref_IC, NORM_L2), totalMaxError);
EXPECT_LE(cvtest::norm(res_IC, ref_IC, NORM_INF), 1);
EXPECT_LE(cvtest::norm(res_RF, ref_RF, NORM_L2), totalMaxError);
EXPECT_LE(cvtest::norm(res_IC, ref_IC, NORM_INF), 1);
}
}

@ -0,0 +1,361 @@
#include "test_precomp.hpp"
using namespace std;
using namespace std::tr1;
using namespace cv;
using namespace testing;
#ifndef SQR
#define SQR(x) ((x)*(x))
#endif
namespace cvtest
{
static string getOpenCVExtraDir()
{
return cvtest::TS::ptr()->get_data_path();
}
static Mat convertTypeAndSize(Mat src, int dstType, Size dstSize)
{
Mat dst;
int srcCnNum = src.channels();
int dstCnNum = CV_MAT_CN(dstType);
CV_Assert(srcCnNum == 3);
if (srcCnNum == dstCnNum)
{
src.copyTo(dst);
}
else if (dstCnNum == 1 && srcCnNum == 3)
{
cvtColor(src, dst, COLOR_BGR2GRAY);
}
else if (dstCnNum == 1 && srcCnNum == 4)
{
cvtColor(src, dst, COLOR_BGRA2GRAY);
}
else
{
vector<Mat> srcCn;
split(src, srcCn);
srcCn.resize(dstCnNum);
uint64 seed = 10000 * src.rows + 1000 * src.cols + 100 * dstSize.height + 10 * dstSize.width + dstType;
RNG rnd(seed);
for (int i = srcCnNum; i < dstCnNum; i++)
{
Mat& donor = srcCn[i % srcCnNum];
double minVal, maxVal;
minMaxLoc(donor, &minVal, &maxVal);
Mat randItem(src.size(), CV_MAKE_TYPE(src.depth(), 1));
randn(randItem, 0, (maxVal - minVal) / 100);
add(donor, randItem, srcCn[i]);
}
merge(srcCn, dst);
}
dst.convertTo(dst, dstType);
resize(dst, dst, dstSize);
return dst;
}
class GuidedFilterRefImpl : public GuidedFilter
{
int rad, height, width, chNum;
Mat det;
Mat *channels, *exps, **vars, **A;
double eps;
void meanFilter(const Mat &src, Mat & dst);
void computeCovGuide();
void computeCovGuideInv();
void applyTransform(int cNum, Mat *Ichannels, Mat *beta, Mat **alpha, int dDepth);
void computeCovGuideAndSrc(int cNum, Mat **vars_I, Mat *Ichannels, Mat *exp_I);
void computeBeta(int cNum, Mat *beta, Mat *exp_I, Mat **alpha);
void computeAlpha(int cNum, Mat **alpha, Mat **vars_I);
public:
GuidedFilterRefImpl(InputArray guide_, int rad, double eps);
void filter(InputArray src, OutputArray dst, int dDepth = -1);
~GuidedFilterRefImpl();
};
void GuidedFilterRefImpl::meanFilter(const Mat &src, Mat & dst)
{
boxFilter(src, dst, CV_32F, Size(2 * rad + 1, 2 * rad + 1), Point(-1, -1), true, BORDER_REFLECT);
}
GuidedFilterRefImpl::GuidedFilterRefImpl(InputArray _guide, int _rad, double _eps) :
height(_guide.rows()), width(_guide.cols()), chNum(_guide.channels()), rad(_rad), eps(_eps)
{
Mat guide = _guide.getMat();
CV_Assert(chNum > 0 && chNum <= 3);
channels = new Mat[chNum];
exps = new Mat[chNum];
A = new Mat *[chNum];
vars = new Mat *[chNum];
for (int i = 0; i < chNum; ++i)
{
A[i] = new Mat[chNum];
vars[i] = new Mat[chNum];
}
split(guide, channels);
for (int i = 0; i < chNum; ++i)
{
channels[i].convertTo(channels[i], CV_32F);
meanFilter(channels[i], exps[i]);
}
computeCovGuide();
computeCovGuideInv();
}
void GuidedFilterRefImpl::computeCovGuide()
{
static const int pY[] = { 0, 0, 1, 0, 1, 2 };
static const int pX[] = { 0, 1, 1, 2, 2, 2 };
int numOfIterations = (SQR(chNum) - chNum) / 2 + chNum;
for (int k = 0; k < numOfIterations; ++k)
{
int i = pY[k], j = pX[k];
vars[i][j] = channels[i].mul(channels[j]);
meanFilter(vars[i][j], vars[i][j]);
vars[i][j] -= exps[i].mul(exps[j]);
if (i == j)
vars[i][j] += eps * Mat::ones(height, width, CV_32F);
else
vars[j][i] = vars[i][j];
}
}
void GuidedFilterRefImpl::computeCovGuideInv()
{
static const int pY[] = { 0, 0, 1, 0, 1, 2 };
static const int pX[] = { 0, 1, 1, 2, 2, 2 };
int numOfIterations = (SQR(chNum) - chNum) / 2 + chNum;
if (chNum == 3)
{
for (int k = 0; k < numOfIterations; ++k){
int i = pY[k], i1 = (pY[k] + 1) % 3, i2 = (pY[k] + 2) % 3;
int j = pX[k], j1 = (pX[k] + 1) % 3, j2 = (pX[k] + 2) % 3;
A[i][j] = vars[i1][j1].mul(vars[i2][j2])
- vars[i1][j2].mul(vars[i2][j1]);
}
}
else if (chNum == 2)
{
A[0][0] = vars[1][1];
A[1][1] = vars[0][0];
A[0][1] = -vars[0][1];
}
else if (chNum == 1)
A[0][0] = Mat::ones(height, width, CV_32F);
for (int i = 0; i < chNum; ++i)
for (int j = 0; j < i; ++j)
A[i][j] = A[j][i];
det = vars[0][0].mul(A[0][0]);
for (int k = 0; k < chNum - 1; ++k)
det += vars[0][k + 1].mul(A[0][k + 1]);
}
GuidedFilterRefImpl::~GuidedFilterRefImpl(){
delete [] channels;
delete [] exps;
for (int i = 0; i < chNum; ++i)
{
delete [] A[i];
delete [] vars[i];
}
delete [] A;
delete [] vars;
}
void GuidedFilterRefImpl::filter(InputArray src_, OutputArray dst_, int dDepth)
{
if (dDepth == -1) dDepth = src_.depth();
dst_.create(height, width, src_.type());
Mat src = src_.getMat();
Mat dst = dst_.getMat();
int cNum = src.channels();
CV_Assert(height == src.rows && width == src.cols);
Mat *Ichannels, *exp_I, **vars_I, **alpha, *beta;
Ichannels = new Mat[cNum];
exp_I = new Mat[cNum];
beta = new Mat[cNum];
vars_I = new Mat *[chNum];
alpha = new Mat *[chNum];
for (int i = 0; i < chNum; ++i){
vars_I[i] = new Mat[cNum];
alpha[i] = new Mat[cNum];
}
split(src, Ichannels);
for (int i = 0; i < cNum; ++i)
{
Ichannels[i].convertTo(Ichannels[i], CV_32F);
meanFilter(Ichannels[i], exp_I[i]);
}
computeCovGuideAndSrc(cNum, vars_I, Ichannels, exp_I);
computeAlpha(cNum, alpha, vars_I);
computeBeta(cNum, beta, exp_I, alpha);
for (int i = 0; i < chNum + 1; ++i)
for (int j = 0; j < cNum; ++j)
if (i < chNum)
meanFilter(alpha[i][j], alpha[i][j]);
else
meanFilter(beta[j], beta[j]);
applyTransform(cNum, Ichannels, beta, alpha, dDepth);
merge(Ichannels, cNum, dst);
delete [] Ichannels;
delete [] exp_I;
delete [] beta;
for (int i = 0; i < chNum; ++i)
{
delete [] vars_I[i];
delete [] alpha[i];
}
delete [] vars_I;
delete [] alpha;
}
void GuidedFilterRefImpl::computeAlpha(int cNum, Mat **alpha, Mat **vars_I)
{
for (int i = 0; i < chNum; ++i)
for (int j = 0; j < cNum; ++j)
{
alpha[i][j] = vars_I[0][j].mul(A[i][0]);
for (int k = 1; k < chNum; ++k)
alpha[i][j] += vars_I[k][j].mul(A[i][k]);
alpha[i][j] /= det;
}
}
void GuidedFilterRefImpl::computeBeta(int cNum, Mat *beta, Mat *exp_I, Mat **alpha)
{
for (int i = 0; i < cNum; ++i)
{
beta[i] = exp_I[i];
for (int j = 0; j < chNum; ++j)
beta[i] -= alpha[j][i].mul(exps[j]);
}
}
void GuidedFilterRefImpl::computeCovGuideAndSrc(int cNum, Mat **vars_I, Mat *Ichannels, Mat *exp_I)
{
for (int i = 0; i < chNum; ++i)
for (int j = 0; j < cNum; ++j)
{
vars_I[i][j] = channels[i].mul(Ichannels[j]);
meanFilter(vars_I[i][j], vars_I[i][j]);
vars_I[i][j] -= exp_I[j].mul(exps[i]);
}
}
void GuidedFilterRefImpl::applyTransform(int cNum, Mat *Ichannels, Mat *beta, Mat **alpha, int dDepth)
{
for (int i = 0; i < cNum; ++i)
{
Ichannels[i] = beta[i];
for (int j = 0; j < chNum; ++j)
Ichannels[i] += alpha[j][i].mul(channels[j]);
Ichannels[i].convertTo(Ichannels[i], dDepth);
}
}
typedef tuple<int, int, string, string> GFParams;
typedef TestWithParam<GFParams> GuidedFilterTest;
TEST_P(GuidedFilterTest, accuracy)
{
GFParams params = GetParam();
int guideCnNum = get<0>(params);
int srcCnNum = get<1>(params);
string guideFileName = get<2>(params);
string srcFileName = get<3>(params);
int seed = 100 * guideCnNum + 50 * srcCnNum + 5*guideFileName.length() + srcFileName.length();
RNG rng(seed);
Mat guide = imread(getOpenCVExtraDir() + guideFileName);
Mat src = imread(getOpenCVExtraDir() + srcFileName);
ASSERT_TRUE(!guide.empty() && !src.empty());
Size dstSize(guide.cols + 1 + rng.uniform(0, 3), guide.rows);
guide = convertTypeAndSize(guide, CV_MAKE_TYPE(guide.depth(), guideCnNum), dstSize);
src = convertTypeAndSize(src, CV_MAKE_TYPE(src.depth(), srcCnNum), dstSize);
for (int iter = 0; iter < 3; iter++)
{
int radius = rng.uniform(0, 50);
double eps = rng.uniform(0.0, SQR(255.0));
cv::setNumThreads(cv::getNumberOfCPUs());
Mat res;
Ptr<GuidedFilter> gf = createGuidedFilter(guide, radius, eps);
gf->filter(src, res);
cv::setNumThreads(1);
Mat resRef;
Ptr<GuidedFilter> gfRef(new GuidedFilterRefImpl(guide, radius, eps));
gfRef->filter(src, resRef);
double normInf = cv::norm(res, resRef, NORM_INF);
double normL2 = cv::norm(res, resRef, NORM_L2) / guide.total();
EXPECT_LE(normInf, 1.0);
EXPECT_LE(normL2, 1.0/64.0);
}
}
INSTANTIATE_TEST_CASE_P(TypicalSet, GuidedFilterTest,
Combine(
Values(1, 2, 3),
Values(1, 2, 3),
Values("cv/shared/lena.png", "cv/shared/baboon.png", "cv/npr/test2.png"),
Values("cv/shared/lena.png", "cv/shared/baboon.png", "cv/npr/test2.png")
));
}

@ -0,0 +1,3 @@
#include "test_precomp.hpp"
CV_TEST_MAIN("")

@ -0,0 +1,13 @@
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include <opencv2/ts.hpp>
#include <opencv2/ts/ts_perf.hpp>
#include <opencv2/core.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/edge_filter.hpp>
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 345 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 274 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 255 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 343 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 291 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 396 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 417 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 549 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 666 KiB

Loading…
Cancel
Save