Merge pull request #26327 from asmorkalov:as/drop_convertFp16

Finally dropped convertFp16 function in favor of cv::Mat::convertTo() #26327 Partially address https://github.com/opencv/opencv/issues/24909 Related PR to contrib: https://github.com/opencv/opencv_contrib/pull/3812 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
4 months ago · 9f0c3f5b2b
parent a40ceff215
commit 9f0c3f5b2b
8 changed files with 26 additions and 231 deletions
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -492,20 +492,6 @@ For example:
 CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
                                  double alpha = 1, double beta = 0);

-/** @brief Converts an array to half precision floating number.
-
-This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data.
-There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or
-CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error.
-The format of half precision floating point is defined in IEEE 754-2008.
-
-@param src input array.
-@param dst output array.
-
-@deprecated Use Mat::convertTo with CV_16F instead.
-*/
-CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst);
-
 /** @brief Performs a look-up table transform of an array.

 The function LUT fills the output array with values from the look-up table. Indices of the entries
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@ -1324,7 +1324,7 @@ CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
@param _src input array.
@param _dst output array.
@param stream Stream for the asynchronous version.
-@sa convertFp16
+@sa cv::Mat::convertTo
 */
 CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());

--- a/modules/core/perf/opencl/perf_matop.cpp
+++ b/modules/core/perf/opencl/perf_matop.cpp
@ -81,7 +81,6 @@ OCL_PERF_TEST_P(ConvertToFixture, ConvertTo,
 }


-//#define RUN_CONVERTFP16
 static Size convertFP16_srcSize(4000, 4000);

 OCL_PERF_TEST(Core, ConvertFP32FP16MatMat)
@ -97,11 +96,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16MatMat)
    Mat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -119,11 +114,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16MatUMat)
    UMat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -141,11 +132,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16UMatMat)
    Mat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -163,11 +150,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16UMatUMat)
    UMat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -185,11 +168,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32MatMat)
    Mat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -207,11 +186,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32MatUMat)
    UMat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -229,11 +204,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32UMatMat)
    Mat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
@ -251,11 +222,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32UMatUMat)
    UMat dst(srcSize, dtype);
    declare.in(src, WARMUP_RNG).out(dst);

-#ifdef RUN_CONVERTFP16
-    OCL_TEST_CYCLE() convertFp16(src, dst);
-#else
    OCL_TEST_CYCLE() src.convertTo(dst, dtype);
-#endif

    SANITY_CHECK_NOTHING();
 }
--- a/modules/core/src/convert.dispatch.cpp
+++ b/modules/core/src/convert.dispatch.cpp
@ -53,34 +53,6 @@ BinaryFunc getConvertFunc(int sdepth, int ddepth)
 }

 #ifdef HAVE_OPENCL
-static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int sdepth, int ddepth )
-{
-    int type = _src.type(), cn = CV_MAT_CN(type);
-
-    _dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) );
-    int kercn = 1;
-    int rowsPerWI = 1;
-    String build_opt = format("-D HALF_SUPPORT -D srcT=%s -D dstT=%s -D rowsPerWI=%d%s",
-                              sdepth == CV_32F ? "float" : "half",
-                              sdepth == CV_32F ? "half" : "float",
-                              rowsPerWI,
-                              sdepth == CV_32F ? " -D FLOAT_TO_HALF " : "");
-    ocl::Kernel k(sdepth == CV_32F ? "convertFp16_FP32_to_FP16" : "convertFp16_FP16_to_FP32", ocl::core::halfconvert_oclsrc, build_opt);
-    if (k.empty())
-        return false;
-
-    UMat src = _src.getUMat();
-    UMat dst = _dst.getUMat();
-
-    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
-    dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
-
-    k.args(srcarg, dstarg);
-
-    size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
-    return k.run(2, globalsize, NULL, false);
-}
-
 static bool ocl_convertTo(InputArray src_, OutputArray dst_, int ddepth, bool noScale, double alpha, double beta)
 {
    CV_INSTRUMENT_REGION();
@ -254,65 +226,4 @@ void UMat::convertTo(OutputArray dst, int type_, double alpha, double beta) cons
    (void)src;
 }

-//==================================================================================================
-
-void convertFp16(InputArray _src, OutputArray _dst)
-{
-    CV_INSTRUMENT_REGION();
-
-    int sdepth = _src.depth(), ddepth = 0;
-    BinaryFunc func = 0;
-
-    switch( sdepth )
-    {
-    case CV_32F:
-        if(_dst.fixedType())
-        {
-            ddepth = _dst.depth();
-            CV_Assert(ddepth == CV_16S || ddepth == CV_16F);
-            CV_Assert(_dst.channels() == _src.channels());
-        }
-        else
-            ddepth =  CV_16S;
-        func = getConvertFunc(CV_32F, CV_16F);
-        break;
-    case CV_16S:
-    case CV_16F:
-        ddepth = CV_32F;
-        func = getConvertFunc(CV_16F, CV_32F);
-        break;
-    default:
-        CV_Error(Error::StsUnsupportedFormat, "Unsupported input depth");
-        return;
-    }
-
-    CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
-               ocl_convertFp16(_src, _dst, sdepth, ddepth))
-
-    Mat src = _src.getMat();
-
-    int type = CV_MAKETYPE(ddepth, src.channels());
-    _dst.create( src.dims, src.size, type );
-    Mat dst = _dst.getMat();
-    int cn = src.channels();
-
-    CV_Assert( func != 0 );
-
-    if( src.dims <= 2 )
-    {
-        Size sz = getContinuousSize2D(src, dst, cn);
-        func( src.data, src.step, 0, 0, dst.data, dst.step, sz, 0);
-    }
-    else
-    {
-        const Mat* arrays[] = {&src, &dst, 0};
-        uchar* ptrs[2] = {};
-        NAryMatIterator it(arrays, ptrs);
-        Size sz((int)(it.size*cn), 1);
-
-        for( size_t i = 0; i < it.nplanes; i++, ++it )
-            func(ptrs[0], 0, 0, 0, ptrs[1], 0, sz, 0);
-    }
-}
-
 } // namespace cv
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@ -597,7 +597,10 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
        {convertToScale<double, uchar>, convertToScale<double, schar>, convertToScale<double, ushort>, convertToScale<double, short>, convertToScale<double, int>, convertToScale<double, float>, convertToScale<double, double>}
    };

-    funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), alpha, beta, stream);
+    func_t func = funcs[sdepth][ddepth];
+    CV_Assert(func);
+
+    func(src.reshape(1), dst.reshape(1), alpha, beta, stream);
 }

 void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
@ -608,9 +611,9 @@ void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
    switch(src.depth())
    {
    case CV_32F:
-        ddepth = CV_16S;
+        ddepth = CV_16F;
        break;
-    case CV_16S:
+    case CV_16F:
        ddepth = CV_32F;
        break;
    default:
@ -625,11 +628,13 @@ void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
    static const func_t funcs[] =
    {
        0, 0, 0,
-        convertScaleHalf<float, short>, 0, convertScaleHalf<short, float>,
-        0, 0,
+        0, 0, convertScaleHalf<short, float>,
+        0, convertScaleHalf<float, short>, 0,
    };

-    funcs[ddepth](src.reshape(1), dst.reshape(1), stream);
+    func_t func = funcs[ddepth];
+    CV_Assert(func);
+    func(src.reshape(1), dst.reshape(1), stream);
 }

 #endif
--- a/modules/core/src/opencl/halfconvert.cl
+++ b/modules/core/src/opencl/halfconvert.cl
@ -1,82 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the copyright holders or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifdef HALF_SUPPORT
-#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16:enable
-#endif
-#endif
-
-__kernel void
-#ifdef FLOAT_TO_HALF
-    convertFp16_FP32_to_FP16
-#else
-    convertFp16_FP16_to_FP32
-#endif
-(
-    __global const uchar * srcptr, int src_step, int src_offset,
-    __global uchar * dstptr, int dst_step, int dst_offset,
-    int dst_rows, int dst_cols
-)
-{
-    int x = get_global_id(0);
-    int y0 = get_global_id(1) * rowsPerWI;
-
-    if (x < dst_cols)
-    {
-        int src_index = mad24(y0, src_step, mad24(x, (int)sizeof(srcT), src_offset));
-        int dst_index = mad24(y0, dst_step, mad24(x, (int)sizeof(dstT), dst_offset));
-
-        for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src_index += src_step, dst_index += dst_step)
-        {
-            __global const srcT * src = (__global const srcT *)(srcptr + src_index);
-            __global dstT * dst = (__global dstT *)(dstptr + dst_index);
-
-#ifdef FLOAT_TO_HALF
-            vstore_half(src[0], 0, dst);
-#else
-            dst[0] = vload_half(0, src);
-#endif
-        }
-    }
-}
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@ -1672,9 +1672,9 @@ OCL_TEST_P(ConvertFp16, Mat)
    for (int j = 0; j < test_loop_times; j++)
    {
        generateTestData();
-
-        OCL_OFF(cv::convertFp16(src_roi, dst_roi));
-        OCL_ON(cv::convertFp16(usrc_roi, udst_roi));
+        int cvt_code = src_roi.depth() == CV_32F ? CV_16F : CV_32F;
+        OCL_OFF(src_roi.convertTo(dst_roi, cvt_code));
+        OCL_ON(usrc_roi.convertTo(udst_roi, cvt_code));

        Near(1);
    }
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -924,8 +924,16 @@ struct ConvertScaleFp16Op : public BaseElemWiseOp
    void op(const vector<Mat>& src, Mat& dst, const Mat&)
    {
        Mat m;
-        convertFp16(src[0], m);
-        convertFp16(m, dst);
+        if (src[0].depth() == CV_32F)
+        {
+            src[0].convertTo(m, CV_16F);
+            m.convertTo(dst, CV_32F);
+        }
+        else
+        {
+            src[0].convertTo(m, CV_32F);
+            m.convertTo(dst, CV_16F);
+        }
    }
    void refop(const vector<Mat>& src, Mat& dst, const Mat&)
    {
@ -935,7 +943,7 @@ struct ConvertScaleFp16Op : public BaseElemWiseOp
    {
        // 0: FP32 -> FP16 -> FP32
        // 1: FP16 -> FP32 -> FP16
-        int srctype = (nextRange & 1) == 0 ? CV_32F : CV_16S;
+        int srctype = (nextRange & 1) == 0 ? CV_32F : CV_16F;
        return srctype;
    }
    void getValueRange(int, double& minval, double& maxval)