Merge pull request #26617 from CodeLinaro:xuezha_2ndPost

FastCV-based HAL for OpenCV acceleration 2ndpost-1 #26617

### Detailed description:

- Add parallel support for cv_hal_sobel
- Add cv_hal_gaussianBlurBinomial and parallel support.
- Add cv_hal_addWeighted8u and parallel support
- Add cv_hal_warpPerspective and parallel support

Requires binary from [opencv/opencv_3rdparty#90](https://github.com/opencv/opencv_3rdparty/pull/90)
Related patch to opencv_contrib: [opencv/opencv_contrib#3844](https://github.com/opencv/opencv_contrib/pull/3844)

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/26638/head
quic-xuezha 4 months ago committed by GitHub
parent 23f6a9ee3e
commit 1c28a98b34
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 14
      3rdparty/fastcv/fastcv.cmake
  2. 25
      3rdparty/fastcv/include/fastcv_hal_core.hpp
  3. 73
      3rdparty/fastcv/include/fastcv_hal_imgproc.hpp
  4. 5
      3rdparty/fastcv/include/fastcv_hal_utils.hpp
  5. 98
      3rdparty/fastcv/src/fastcv_hal_core.cpp
  6. 506
      3rdparty/fastcv/src/fastcv_hal_imgproc.cpp

@ -1,23 +1,23 @@
function(download_fastcv root_dir)
# Commit SHA in the opencv_3rdparty repo
set(FASTCV_COMMIT "b8f0d48fa9dbebb0237d3e0abd206f9930c89db6")
set(FASTCV_COMMIT "dc5d58018f3af915a8d209386d2c58c0501c0f2c")
# Define actual FastCV versions
if(ANDROID)
if(AARCH64)
message(STATUS "Download FastCV for Android aarch64")
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2024_10_24.tgz")
set(FCV_PACKAGE_HASH "14486af00dc0282dac591dc9ccdd957e")
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2024_12_11.tgz")
set(FCV_PACKAGE_HASH "9dac41e86597305f846212dae31a4a88")
else()
message(STATUS "Download FastCV for Android armv7")
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2024_10_24.tgz")
set(FCV_PACKAGE_HASH "b5afadd5a5b55f8f6c2e7361f225fa21")
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2024_12_11.tgz")
set(FCV_PACKAGE_HASH "fe2d30334180b17e3031eee92aac43b6")
endif()
elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
if(AARCH64)
set(FCV_PACKAGE_NAME "fastcv_linux_aarch64_2024_10_24.tgz")
set(FCV_PACKAGE_HASH "d15c7b77f2d3577ba46bd94e6cf15230")
set(FCV_PACKAGE_NAME "fastcv_linux_aarch64_2024_12_11.tgz")
set(FCV_PACKAGE_HASH "7b33ad833e6f15ab6d4ec64fa3c17acd")
else()
message("FastCV: fastcv lib for 32-bit Linux is not supported for now!")
endif()

@ -24,6 +24,8 @@
#define cv_hal_flip fastcv_hal_flip
#undef cv_hal_rotate90
#define cv_hal_rotate90 fastcv_hal_rotate
#undef cv_hal_addWeighted8u
#define cv_hal_addWeighted8u fastcv_hal_addWeighted8u
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// @brief look-up table transform of an array.
@ -152,4 +154,27 @@ int fastcv_hal_rotate(
size_t dst_step,
int angle);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// @brief weighted sum of two arrays using formula: dst[i] = a * src1[i] + b * src2[i]
/// @param src1_data first source image data
/// @param src1_step first source image step
/// @param src2_data second source image data
/// @param src2_step second source image step
/// @param dst_data destination image data
/// @param dst_step destination image step
/// @param width width of the images
/// @param height height of the images
/// @param scalars numbers a, b, and c
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int fastcv_hal_addWeighted8u(
const uchar* src1_data,
size_t src1_step,
const uchar* src2_data,
size_t src2_step,
uchar* dst_data,
size_t dst_step,
int width,
int height,
const double scalars[3]);
#endif

@ -12,10 +12,14 @@
#define cv_hal_medianBlur fastcv_hal_medianBlur
#undef cv_hal_sobel
#define cv_hal_sobel fastcv_hal_sobel
#undef cv_hal_boxFilter
#undef cv_hal_boxFilter
#define cv_hal_boxFilter fastcv_hal_boxFilter
#undef cv_hal_adaptiveThreshold
#undef cv_hal_adaptiveThreshold
#define cv_hal_adaptiveThreshold fastcv_hal_adaptiveThreshold
#undef cv_hal_gaussianBlurBinomial
#define cv_hal_gaussianBlurBinomial fastcv_hal_gaussianBlurBinomial
#undef cv_hal_warpPerspective
#define cv_hal_warpPerspective fastcv_hal_warpPerspective
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Calculate medianBlur filter
@ -148,4 +152,69 @@ int fastcv_hal_adaptiveThreshold(
int blockSize,
double C);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Blurs an image using a Gaussian filter.
/// @param src_data Source image data
/// @param src_step Source image step
/// @param dst_data Destination image data
/// @param dst_step Destination image step
/// @param width Source image width
/// @param height Source image height
/// @param depth Depth of source and destination image
/// @param cn Number of channels
/// @param margin_left Left margins for source image
/// @param margin_top Top margins for source image
/// @param margin_right Right margins for source image
/// @param margin_bottom Bottom margins for source image
/// @param ksize Kernel size
/// @param border_type Border type
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int fastcv_hal_gaussianBlurBinomial(
const uchar* src_data,
size_t src_step,
uchar* dst_data,
size_t dst_step,
int width,
int height,
int depth,
int cn,
size_t margin_left,
size_t margin_top,
size_t margin_right,
size_t margin_bottom,
size_t ksize,
int border_type);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// @brief Applies a perspective transformation to an image.
///
/// @param src_type Source and destination image type
/// @param src_data Source image data
/// @param src_step Source image step
/// @param src_width Source image width
/// @param src_height Source image height
/// @param dst_data Destination image data
/// @param dst_step Destination image step
/// @param dst_width Destination image width
/// @param dst_height Destination image height
/// @param M 3x3 matrix with transform coefficients
/// @param interpolation Interpolation mode (CV_HAL_INTER_NEAREST, ...)
/// @param border_type Border processing mode (CV_HAL_BORDER_REFLECT, ...)
/// @param border_value Values to use for CV_HAL_BORDER_CONSTANT mode
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int fastcv_hal_warpPerspective(
int src_type,
const uchar* src_data,
size_t src_step,
int src_width,
int src_height,
uchar* dst_data,
size_t dst_step,
int dst_width,
int dst_height,
const double M[9],
int interpolation,
int border_type,
const double border_value[4]);
#endif

@ -29,7 +29,7 @@
status == FASTCV_EHWGPU) \
{ \
CV_LOG_DEBUG(NULL, "FastCV status:"<<getFastCVErrorString(status) \
<<"Switching to default OpenCV solution!"); \
<<", Switching to default OpenCV solution!"); \
return CV_HAL_ERROR_NOT_IMPLEMENTED; \
} \
else \
@ -38,7 +38,7 @@
return CV_HAL_ERROR_UNKNOWN; \
} \
}
#define CV_HAL_RETURN_NOT_IMPLEMENTED(reason) \
{ \
CV_LOG_DEBUG(NULL,"Switching to default OpenCV\nInfo: "<<reason); \
@ -47,6 +47,7 @@
#define FCV_KernelSize_SHIFT 3
#define FCV_MAKETYPE(ksize,depth) ((ksize<<FCV_KernelSize_SHIFT) + depth)
#define FCV_CMP_EQ(val1,val2) (fabs(val1 - val2) < FLT_EPSILON)
const char* getFastCVErrorString(int status);
const char* borderToString(int border);

@ -38,15 +38,15 @@ private:
};
int fastcv_hal_lut(
const uchar* src_data,
size_t src_step,
size_t src_type,
const uchar* lut_data,
size_t lut_channel_size,
size_t lut_channels,
uchar* dst_data,
size_t dst_step,
int width,
const uchar* src_data,
size_t src_step,
size_t src_type,
const uchar* lut_data,
size_t lut_channel_size,
size_t lut_channels,
uchar* dst_data,
size_t dst_step,
int width,
int height)
{
if((width*height)<=(320*240))
@ -69,10 +69,10 @@ int fastcv_hal_lut(
}
int fastcv_hal_normHammingDiff8u(
const uchar* a,
const uchar* b,
int n,
int cellSize,
const uchar* a,
const uchar* b,
int n,
int cellSize,
int* result)
{
fcvStatus status;
@ -169,15 +169,15 @@ int fastcv_hal_transpose2d(
switch (element_size)
{
case 1:
status = fcvTransposeu8_v2(src_data, src_width, src_height, src_step,
status = fcvTransposeu8_v2(src_data, src_width, src_height, src_step,
dst_data, dst_step);
break;
case 2:
status = fcvTransposeu16_v2((const uint16_t*)src_data, src_width, src_height,
status = fcvTransposeu16_v2((const uint16_t*)src_data, src_width, src_height,
src_step, (uint16_t*)dst_data, dst_step);
break;
case 4:
status = fcvTransposef32_v2((const float32_t*)src_data, src_width, src_height,
status = fcvTransposef32_v2((const float32_t*)src_data, src_width, src_height,
src_step, (float32_t*)dst_data, dst_step);
break;
default:
@ -205,18 +205,18 @@ int fastcv_hal_meanStdDev(
if(src_type != CV_8UC1)
{
CV_HAL_RETURN_NOT_IMPLEMENTED("src type not supported");
}
}
else if(mask != nullptr)
{
CV_HAL_RETURN_NOT_IMPLEMENTED("mask not supported");
}
}
else if(mean_val == nullptr && stddev_val == nullptr)
{
CV_HAL_RETURN_NOT_IMPLEMENTED("null ptr for mean and stddev");
}
float32_t mean, variance;
fcvStatus status = fcvImageIntensityStats_v2(src_data, src_step, 0, 0, width, height,
&mean, &variance, FASTCV_BIASED_VARIANCE_ESTIMATOR);
@ -278,7 +278,7 @@ int fastcv_hal_flip(
status = fcvFlipRGB888u8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data, dst_step, dir);
else
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Data type:%d is not supported, Switching to default OpenCV solution!", src_type));
CV_HAL_RETURN(status, hal_flip);
}
@ -294,7 +294,7 @@ int fastcv_hal_rotate(
{
if((src_width*src_height)<(120*80))
CV_HAL_RETURN_NOT_IMPLEMENTED("Switching to default OpenCV solution for lower resolution!");
fcvStatus status;
fcvRotateDegree degree;
@ -324,11 +324,63 @@ int fastcv_hal_rotate(
status = fcvRotateImageu8(src_data, src_width, src_height, src_step, dst_data, dst_step, degree);
break;
case CV_8UC2:
status = fcvRotateImageInterleavedu8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data,
status = fcvRotateImageInterleavedu8((uint8_t*)src_data, src_width, src_height, src_step, (uint8_t*)dst_data,
dst_step, degree);
break;
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("src_type:%d is not supported", src_type));
}
CV_HAL_RETURN(status, hal_rotate);
}
int fastcv_hal_addWeighted8u(
const uchar* src1_data,
size_t src1_step,
const uchar* src2_data,
size_t src2_step,
uchar* dst_data,
size_t dst_step,
int width,
int height,
const double scalars[3])
{
if( (scalars[0] < -128.0f) || (scalars[0] >= 128.0f) ||
(scalars[1] < -128.0f) || (scalars[1] >= 128.0f) ||
(scalars[2] < -(1<<23))|| (scalars[2] >= 1<<23))
CV_HAL_RETURN_NOT_IMPLEMENTED(
cv::format("Alpha:%f,Beta:%f,Gamma:%f is not supported because it's too large or too small\n",
scalars[0],scalars[1],scalars[2]));
INITIALIZATION_CHECK;
fcvStatus status = FASTCV_SUCCESS;
if (height == 1)
{
src1_step = width*sizeof(uchar);
src2_step = width*sizeof(uchar);
dst_step = width*sizeof(uchar);
cv::parallel_for_(cv::Range(0, width), [&](const cv::Range &range){
int rangeWidth = range.end - range.start;
const uint8_t *src1 = src1_data + range.start;
const uint8_t *src2 = src2_data + range.start;
uint8_t *dst = dst_data + range.start;
fcvAddWeightedu8_v2(src1, rangeWidth, height, src1_step, src2, src2_step,
scalars[0], scalars[1], scalars[2], dst, dst_step);
});
}
else
{
cv::parallel_for_(cv::Range(0, height), [&](const cv::Range &range){
int rangeHeight = range.end - range.start;
const uint8_t *src1 = src1_data + range.start * src1_step;
const uint8_t *src2 = src2_data + range.start * src2_step;
uint8_t *dst = dst_data + range.start * dst_step;
fcvAddWeightedu8_v2(src1, width, rangeHeight, src1_step, src2, src2_step,
scalars[0], scalars[1], scalars[2], dst, dst_step);
});
}
CV_HAL_RETURN(status, hal_addWeighted8u_v2);
}

@ -34,7 +34,7 @@ int fastcv_hal_medianBlur(
INITIALIZATION_CHECK;
fcvStatus status;
fcvStatus status = FASTCV_SUCCESS;
int fcvFuncType = FCV_MAKETYPE(ksize,depth);
switch (fcvFuncType)
@ -52,6 +52,101 @@ int fastcv_hal_medianBlur(
CV_HAL_RETURN(status, hal_medianBlur);
}
class FcvSobelLoop_Invoker : public cv::ParallelLoopBody
{
public:
FcvSobelLoop_Invoker(const cv::Mat& _src, cv::Mat& _dst, int _dx, int _dy, int _ksize, fcvBorderType _fcvBorder,
int _fcvBorderValue) : cv::ParallelLoopBody(), src(_src), dst(_dst), dx(_dx), dy(_dy), ksize(_ksize),
fcvBorder(_fcvBorder), fcvBorderValue(_fcvBorderValue)
{
width = src.cols;
height = src.rows;
halfKernelSize = ksize/2;
fcvFuncType = FCV_MAKETYPE(ksize,src.depth());
}
virtual void operator()(const cv::Range& range) const CV_OVERRIDE
{
int topLines = 0;
int rangeHeight = range.end-range.start;
int paddedHeight = rangeHeight;
// Need additional lines to be border.
if(range.start > 0)
{
topLines += halfKernelSize;
paddedHeight += halfKernelSize;
}
if(range.end < height)
{
paddedHeight += halfKernelSize;
}
cv::Mat srcPadded = src(cv::Rect(0, range.start-topLines, width, paddedHeight));
cv::Mat dstPadded = cv::Mat(paddedHeight, width, dst.depth());
int16_t *dxBuffer = nullptr, *dyBuffer = nullptr;
if ((dx == 1) && (dy == 0))
{
dxBuffer = (int16_t*)dstPadded.data;
}
else if ((dx == 0) && (dy == 1))
{
dyBuffer = (int16_t*)dstPadded.data;
}
switch (fcvFuncType)
{
case FCV_MAKETYPE(3,CV_8U):
{
fcvFilterSobel3x3u8s16(srcPadded.data, width, paddedHeight, srcPadded.step, dxBuffer, dyBuffer, dstPadded.step,
fcvBorder, 0);
break;
}
case FCV_MAKETYPE(5,CV_8U):
{
fcvFilterSobel5x5u8s16(srcPadded.data, width, paddedHeight, srcPadded.step, dxBuffer, dyBuffer, dstPadded.step,
fcvBorder, 0);
break;
}
case FCV_MAKETYPE(7,CV_8U):
{
fcvFilterSobel7x7u8s16(srcPadded.data, width, paddedHeight, srcPadded.step, dxBuffer, dyBuffer, dstPadded.step,
fcvBorder, 0);
break;
}
default:
CV_Error(cv::Error::StsBadArg, cv::format("Ksize:%d, src_depth:%s is not supported",
ksize, cv::depthToString(src.depth())));
break;
}
// Only copy center part back to output image and ignore the padded lines
cv::Mat temp1 = dstPadded(cv::Rect(0, topLines, width, rangeHeight));
cv::Mat temp2 = dst(cv::Rect(0, range.start, width, rangeHeight));
temp1.copyTo(temp2);
}
private:
const cv::Mat& src;
cv::Mat& dst;
int width;
int height;
int dx;
int dy;
int ksize;
int halfKernelSize;
int fcvFuncType;
fcvBorderType fcvBorder;
int fcvBorderValue;
FcvSobelLoop_Invoker(const FcvSobelLoop_Invoker &); // = delete;
const FcvSobelLoop_Invoker& operator= (const FcvSobelLoop_Invoker &); // = delete;
};
int fastcv_hal_sobel(
const uchar* src_data,
size_t src_step,
@ -73,10 +168,13 @@ int fastcv_hal_sobel(
double delta,
int border_type)
{
if(scale != 1.0f || delta != 0.0f)
if (!(FCV_CMP_EQ(scale, 1.0f) && FCV_CMP_EQ(delta, 0.0f)))
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Scale:%f, delta:%f is not supported", scale, delta));
// Only support one direction derivatives and the order is 1.(dx=1 && dy=0)||(dx=0 && dy=1)
if ((dx + dy == 0) || (dx + dy > 1))
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Dx:%d Dy:%d is not supported",dx, dy));
// Do not support inplace case
if (src_data == dst_data)
CV_HAL_RETURN_NOT_IMPLEMENTED("Inplace is not supported");
@ -89,10 +187,6 @@ int fastcv_hal_sobel(
if (cn != 1)
CV_HAL_RETURN_NOT_IMPLEMENTED("Multi-channels is not supported");
// Do not support for ROI case
if((margin_left!=0) || (margin_top != 0) || (margin_right != 0) || (margin_bottom !=0))
CV_HAL_RETURN_NOT_IMPLEMENTED("ROI is not supported");
// 1. When ksize <= 0, OpenCV will use Scharr Derivatives
// 2. When ksize == 1, OpenCV will use 3×1 or 1×3 kernel(no Gaussian smoothing is done)
// FastCV doesn't support above two situation
@ -103,26 +197,16 @@ int fastcv_hal_sobel(
if (dst_depth != CV_16S)
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Dst depth:%s is not supported", cv::depthToString(dst_depth)));
INITIALIZATION_CHECK;
// Only support following ksize and src_depth as input
if ((FCV_MAKETYPE(ksize,src_depth) != FCV_MAKETYPE(3, CV_8U)) &&
(FCV_MAKETYPE(ksize,src_depth) != FCV_MAKETYPE(5, CV_8U)) &&
(FCV_MAKETYPE(ksize,src_depth) != FCV_MAKETYPE(7, CV_8U)))
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Ksize:%d, src_depth:%s is not supported", ksize, cv::depthToString(src_depth)));
// Only support one direction derivatives and the order is 1.(dx=1 && dy=0)||(dx=0 && dy=1)
int16_t *dxBuffer, *dyBuffer;
if ((dx == 1) && (dy == 0))
{
dxBuffer = (int16_t*)dst_data;
dyBuffer = NULL;
}
else if ((dx == 0) && (dy == 1))
{
dxBuffer = NULL;
dyBuffer = (int16_t*)dst_data;
}
else
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Dx:%d Dy:%d is not supported",dx, dy));
INITIALIZATION_CHECK;
fcvStatus status;
fcvBorderType fcvBorder;
fcvStatus status = FASTCV_SUCCESS;
fcvBorderType fcvBorder = FASTCV_BORDER_CONSTANT;
switch (border_type)
{
@ -141,28 +225,89 @@ int fastcv_hal_sobel(
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Border type:%s is not supported", borderToString(border_type)));
}
int fcvFuncType = FCV_MAKETYPE(ksize,src_depth);
cv::Mat src = cv::Mat(height, width, CV_MAKE_TYPE(src_depth, 1), (void*)src_data, src_step);
cv::Mat dst = cv::Mat(height, width, CV_MAKE_TYPE(dst_depth, 1), (void*)dst_data, dst_step);
switch (fcvFuncType)
if (margin_left||margin_top||margin_top||margin_bottom)
{
case FCV_MAKETYPE(3,CV_8U):
// Need additional lines to be border.
int paddedHeight = height, paddedWidth = width, startX = 0, startY = 0;
if(margin_left != 0)
{
status = fcvFilterSobel3x3u8s16(src_data, width, height, src_step, dxBuffer, dyBuffer, dst_step, fcvBorder, 0);
break;
src_data -= ksize/2;
paddedWidth += ksize/2;
startX = ksize/2;
}
case FCV_MAKETYPE(5,CV_8U):
if(margin_top != 0)
{
status = fcvFilterSobel5x5u8s16(src_data, width, height, src_step, dxBuffer, dyBuffer, dst_step, fcvBorder, 0);
break;
src_data -= (ksize/2) * src_step;
paddedHeight += ksize/2;
startY = ksize/2;
}
case FCV_MAKETYPE(7,CV_8U):
if(margin_right != 0)
{
status = fcvFilterSobel7x7u8s16(src_data, width, height, src_step, dxBuffer, dyBuffer, dst_step, fcvBorder, 0);
break;
paddedWidth += ksize/2;
}
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Ksize:%d, src_depth:%s, border type:%s is not supported",
ksize, cv::depthToString(src_depth), borderToString(border_type)));
if(margin_bottom != 0)
{
paddedHeight += ksize/2;
}
cv::Mat padded(paddedHeight, paddedWidth, src_depth);
int16_t *dxBuffer = nullptr, *dyBuffer = nullptr;
if ((dx == 1) && (dy == 0))
{
dxBuffer = (int16_t*)padded.data;
dyBuffer = NULL;
}
else if ((dx == 0) && (dy == 1))
{
dxBuffer = NULL;
dyBuffer = (int16_t*)padded.data;
}
int fcvFuncType = FCV_MAKETYPE(ksize, src_depth);
switch (fcvFuncType)
{
case FCV_MAKETYPE(3,CV_8U):
{
status = fcvFilterSobel3x3u8s16(src_data, paddedWidth, paddedHeight, src_step, dxBuffer, dyBuffer, padded.step,
fcvBorder, 0);
break;
}
case FCV_MAKETYPE(5,CV_8U):
{
status = fcvFilterSobel5x5u8s16(src_data, paddedWidth, paddedHeight, src_step, dxBuffer, dyBuffer, padded.step,
fcvBorder, 0);
break;
}
case FCV_MAKETYPE(7,CV_8U):
{
status = fcvFilterSobel7x7u8s16(src_data, paddedWidth, paddedHeight, src_step, dxBuffer, dyBuffer, padded.step,
fcvBorder, 0);
break;
}
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Ksize:%d, src_depth:%s is not supported",
ksize, cv::depthToString(src_depth)));
break;
}
cv::Mat temp1 = padded(cv::Rect(startX, startY, width, height));
temp1.copyTo(dst);
}
else
{
int nThreads = cv::getNumThreads();
int nStripes = nThreads > 1 ? 3*nThreads : 1;
cv::parallel_for_(cv::Range(0, height), FcvSobelLoop_Invoker(src, dst, dx, dy, ksize, fcvBorder, 0), nStripes);
}
CV_HAL_RETURN(status, hal_sobel);
@ -316,3 +461,286 @@ int fastcv_hal_adaptiveThreshold(
CV_HAL_RETURN(status,hal_adaptiveThreshold);
}
class FcvGaussianBlurLoop_Invoker : public cv::ParallelLoopBody
{
public:
FcvGaussianBlurLoop_Invoker(const cv::Mat& _src, cv::Mat& _dst, int _ksize, fcvBorderType _fcvBorder, int _fcvBorderValue) :
cv::ParallelLoopBody(), src(_src),dst(_dst), ksize(_ksize), fcvBorder(_fcvBorder), fcvBorderValue(_fcvBorderValue)
{
width = src.cols;
height = src.rows;
halfKernelSize = ksize / 2;
fcvFuncType = FCV_MAKETYPE(ksize, src.depth());
}
virtual void operator()(const cv::Range& range) const CV_OVERRIDE
{
int topLines = 0;
int rangeHeight = range.end-range.start;
int paddedHeight = rangeHeight;
// Need additional lines to be border.
if(range.start != 0)
{
topLines += halfKernelSize;
paddedHeight += halfKernelSize;
}
if(range.end != height)
{
paddedHeight += halfKernelSize;
}
const cv::Mat srcPadded = src(cv::Rect(0, range.start - topLines, width, paddedHeight));
cv::Mat dstPadded = cv::Mat(paddedHeight, width, CV_8U);
if (fcvFuncType == FCV_MAKETYPE(3,CV_8U))
fcvFilterGaussian3x3u8_v4(srcPadded.data, width, paddedHeight, srcPadded.step, dstPadded.data, dstPadded.step,
fcvBorder, 0);
else if (fcvFuncType == FCV_MAKETYPE(5,CV_8U))
fcvFilterGaussian5x5u8_v3(srcPadded.data, width, paddedHeight, srcPadded.step, dstPadded.data, dstPadded.step,
fcvBorder, 0);
// Only copy center part back to output image and ignore the padded lines
cv::Mat temp1 = dstPadded(cv::Rect(0, topLines, width, rangeHeight));
cv::Mat temp2 = dst(cv::Rect(0, range.start, width, rangeHeight));
temp1.copyTo(temp2);
}
private:
const cv::Mat& src;
cv::Mat& dst;
int width;
int height;
const int ksize;
int halfKernelSize;
int fcvFuncType;
fcvBorderType fcvBorder;
int fcvBorderValue;
FcvGaussianBlurLoop_Invoker(const FcvGaussianBlurLoop_Invoker &); // = delete;
const FcvGaussianBlurLoop_Invoker& operator= (const FcvGaussianBlurLoop_Invoker &); // = delete;
};
int fastcv_hal_gaussianBlurBinomial(
const uchar* src_data,
size_t src_step,
uchar* dst_data,
size_t dst_step,
int width,
int height,
int depth,
int cn,
size_t margin_left,
size_t margin_top,
size_t margin_right,
size_t margin_bottom,
size_t ksize,
int border_type)
{
// Do not support inplace case
if (src_data == dst_data)
CV_HAL_RETURN_NOT_IMPLEMENTED("Inplace is not supported");
// The input image width and height should greater than kernel size
if (((size_t)height <= ksize) || ((size_t)width <= ksize))
CV_HAL_RETURN_NOT_IMPLEMENTED("Input image size should be larger than kernel size");
// The input channel should be 1
if (cn != 1)
CV_HAL_RETURN_NOT_IMPLEMENTED("Multi-channels is not supported");
// Do not support for ROI case
if((margin_left!=0) || (margin_top != 0) || (margin_right != 0) || (margin_bottom !=0))
CV_HAL_RETURN_NOT_IMPLEMENTED("ROI is not supported");
INITIALIZATION_CHECK;
fcvStatus status = FASTCV_SUCCESS;
fcvBorderType fcvBorder = fcvBorderType::FASTCV_BORDER_UNDEFINED;
int fcvFuncType = FCV_MAKETYPE(ksize,depth);
switch (border_type)
{
case cv::BorderTypes::BORDER_REPLICATE:
{
fcvBorder = fcvBorderType::FASTCV_BORDER_REPLICATE;
break;
}
// For constant border, there are no border value, OpenCV default value is 0
case cv::BorderTypes::BORDER_CONSTANT:
{
fcvBorder = fcvBorderType::FASTCV_BORDER_CONSTANT;
break;
}
case cv::BorderTypes::BORDER_REFLECT:
{
fcvBorder = fcvBorderType::FASTCV_BORDER_REFLECT;
break;
}
case cv::BorderTypes::BORDER_REFLECT_101:
{
fcvBorder = fcvBorderType::FASTCV_BORDER_REFLECT_V2;
break;
}
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Border type:%s is not supported", borderToString(border_type)));
}
int nThreads = cv::getNumThreads();
int nStripes = (nThreads > 1) ? ((height > 60) ? 3 * nThreads : 1) : 1;
switch (fcvFuncType)
{
case FCV_MAKETYPE(3,CV_8U):
case FCV_MAKETYPE(5,CV_8U):
{
cv::Mat src = cv::Mat(height, width, CV_8UC1, (void*)src_data, src_step);
cv::Mat dst = cv::Mat(height, width, CV_8UC1, (void*)dst_data, dst_step);
cv::parallel_for_(cv::Range(0, height), FcvGaussianBlurLoop_Invoker(src, dst, ksize, fcvBorder, 0), nStripes);
break;
}
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Ksize:%d, depth:%s is not supported", (int)ksize, cv::depthToString(depth)));
}
CV_HAL_RETURN(status, hal_gaussianBlurBinomial);
}
class FcvWarpPerspectiveLoop_Invoker : public cv::ParallelLoopBody
{
public:
FcvWarpPerspectiveLoop_Invoker(const uchar* _src_data, int _src_width, int _src_height, size_t _src_step, uchar* _dst_data,
int _dst_width, int _dst_height, size_t _dst_step, int _type, const double* _M,
fcvInterpolationType _fcvInterpolation, fcvBorderType _fcvBorder, int _fcvBorderValue) :
cv::ParallelLoopBody(), src_data(_src_data), src_width(_src_width), src_height(_src_height), src_step(_src_step),
dst_data(_dst_data), dst_width(_dst_width), dst_height(_dst_height), dst_step(_dst_step), type(_type),
M(_M), fcvInterpolation(_fcvInterpolation),fcvBorder(_fcvBorder),
fcvBorderValue(_fcvBorderValue) {}
virtual void operator()(const cv::Range& range) const CV_OVERRIDE
{
uchar* dst = dst_data + range.start*dst_step;
int rangeHeight = range.end - range.start;
float rangeMatrix[9];
rangeMatrix[0] = (float)(M[0]);
rangeMatrix[1] = (float)(M[1]);
rangeMatrix[2] = (float)(M[2]+range.start*M[1]);
rangeMatrix[3] = (float)(M[3]);
rangeMatrix[4] = (float)(M[4]);
rangeMatrix[5] = (float)(M[5]+range.start*M[4]);
rangeMatrix[6] = (float)(M[6]);
rangeMatrix[7] = (float)(M[7]);
rangeMatrix[8] = (float)(M[8]+range.start*M[7]);
fcvWarpPerspectiveu8_v5(src_data, src_width, src_height, src_step, CV_MAT_CN(type), dst, dst_width, rangeHeight,
dst_step, rangeMatrix, fcvInterpolation, fcvBorder, fcvBorderValue);
}
private:
const uchar* src_data;
const int src_width;
const int src_height;
const size_t src_step;
uchar* dst_data;
const int dst_width;
const int dst_height;
const size_t dst_step;
const int type;
const double* M;
fcvInterpolationType fcvInterpolation;
fcvBorderType fcvBorder;
int fcvBorderValue;
FcvWarpPerspectiveLoop_Invoker(const FcvWarpPerspectiveLoop_Invoker &); // = delete;
const FcvWarpPerspectiveLoop_Invoker& operator= (const FcvWarpPerspectiveLoop_Invoker &); // = delete;
};
int fastcv_hal_warpPerspective(
int src_type,
const uchar* src_data,
size_t src_step,
int src_width,
int src_height,
uchar* dst_data,
size_t dst_step,
int dst_width,
int dst_height,
const double M[9],
int interpolation,
int border_type,
const double border_value[4])
{
// Do not support inplace case
if (src_data == dst_data)
CV_HAL_RETURN_NOT_IMPLEMENTED("Inplace is not supported");
// The input channel should be 1
if (CV_MAT_CN(src_type) != 1)
CV_HAL_RETURN_NOT_IMPLEMENTED("Multi-channels is not supported");
INITIALIZATION_CHECK;
fcvStatus status = FASTCV_SUCCESS;
fcvBorderType fcvBorder;
uint8_t fcvBorderValue = 0;
fcvInterpolationType fcvInterpolation;
switch (border_type)
{
case cv::BorderTypes::BORDER_CONSTANT:
{
if ((border_value[0] == border_value[1]) &&
(border_value[0] == border_value[2]) &&
(border_value[0] == border_value[3]))
{
fcvBorder = fcvBorderType::FASTCV_BORDER_CONSTANT;
fcvBorderValue = static_cast<uint8_t>(border_value[0]);
break;
}
else
CV_HAL_RETURN_NOT_IMPLEMENTED("Different border value is not supported");
}
case cv::BorderTypes::BORDER_REPLICATE:
{
fcvBorder = fcvBorderType::FASTCV_BORDER_REPLICATE;
break;
}
case cv::BorderTypes::BORDER_TRANSPARENT:
{
fcvBorder = fcvBorderType::FASTCV_BORDER_UNDEFINED;
break;
}
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Border type:%s is not supported", borderToString(border_type)));
}
switch(interpolation)
{
case cv::InterpolationFlags::INTER_NEAREST:
{
fcvInterpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
break;
}
default:
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Interpolation type:%s is not supported",
interpolationToString(interpolation)));
}
int nThreads = cv::getNumThreads();
int nStripes = nThreads > 1 ? 3*nThreads : 1;
if(CV_MAT_DEPTH(src_type) == CV_8U)
{
cv::parallel_for_(cv::Range(0, dst_height),
FcvWarpPerspectiveLoop_Invoker(src_data, src_width, src_height, src_step, dst_data, dst_width, dst_height,
dst_step, src_type, M, fcvInterpolation, fcvBorder, fcvBorderValue), nStripes);
}
else
CV_HAL_RETURN_NOT_IMPLEMENTED(cv::format("Src type:%s is not supported", cv::typeToString(src_type).c_str()));
CV_HAL_RETURN(status, hal_warpPerspective);
}
Loading…
Cancel
Save