Merge pull request #26327 from asmorkalov:as/drop_convertFp16

Finally dropped convertFp16 function in favor of cv::Mat::convertTo() #26327 

Partially address https://github.com/opencv/opencv/issues/24909
Related PR to contrib: https://github.com/opencv/opencv_contrib/pull/3812

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/26358/head
Alexander Smorkalov 4 months ago committed by GitHub
parent a40ceff215
commit 9f0c3f5b2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 14
      modules/core/include/opencv2/core.hpp
  2. 2
      modules/core/include/opencv2/core/cuda.hpp
  3. 33
      modules/core/perf/opencl/perf_matop.cpp
  4. 89
      modules/core/src/convert.dispatch.cpp
  5. 17
      modules/core/src/cuda/gpu_mat.cu
  6. 82
      modules/core/src/opencl/halfconvert.cl
  7. 6
      modules/core/test/ocl/test_arithm.cpp
  8. 14
      modules/core/test/test_arithm.cpp

@ -492,20 +492,6 @@ For example:
CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
double alpha = 1, double beta = 0);
/** @brief Converts an array to half precision floating number.
This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data.
There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or
CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error.
The format of half precision floating point is defined in IEEE 754-2008.
@param src input array.
@param dst output array.
@deprecated Use Mat::convertTo with CV_16F instead.
*/
CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst);
/** @brief Performs a look-up table transform of an array.
The function LUT fills the output array with values from the look-up table. Indices of the entries

@ -1324,7 +1324,7 @@ CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
@param _src input array.
@param _dst output array.
@param stream Stream for the asynchronous version.
@sa convertFp16
@sa cv::Mat::convertTo
*/
CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());

@ -81,7 +81,6 @@ OCL_PERF_TEST_P(ConvertToFixture, ConvertTo,
}
//#define RUN_CONVERTFP16
static Size convertFP16_srcSize(4000, 4000);
OCL_PERF_TEST(Core, ConvertFP32FP16MatMat)
@ -97,11 +96,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16MatMat)
Mat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -119,11 +114,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16MatUMat)
UMat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -141,11 +132,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16UMatMat)
Mat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -163,11 +150,7 @@ OCL_PERF_TEST(Core, ConvertFP32FP16UMatUMat)
UMat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -185,11 +168,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32MatMat)
Mat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -207,11 +186,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32MatUMat)
UMat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -229,11 +204,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32UMatMat)
Mat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}
@ -251,11 +222,7 @@ OCL_PERF_TEST(Core, ConvertFP16FP32UMatUMat)
UMat dst(srcSize, dtype);
declare.in(src, WARMUP_RNG).out(dst);
#ifdef RUN_CONVERTFP16
OCL_TEST_CYCLE() convertFp16(src, dst);
#else
OCL_TEST_CYCLE() src.convertTo(dst, dtype);
#endif
SANITY_CHECK_NOTHING();
}

@ -53,34 +53,6 @@ BinaryFunc getConvertFunc(int sdepth, int ddepth)
}
#ifdef HAVE_OPENCL
static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int sdepth, int ddepth )
{
int type = _src.type(), cn = CV_MAT_CN(type);
_dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) );
int kercn = 1;
int rowsPerWI = 1;
String build_opt = format("-D HALF_SUPPORT -D srcT=%s -D dstT=%s -D rowsPerWI=%d%s",
sdepth == CV_32F ? "float" : "half",
sdepth == CV_32F ? "half" : "float",
rowsPerWI,
sdepth == CV_32F ? " -D FLOAT_TO_HALF " : "");
ocl::Kernel k(sdepth == CV_32F ? "convertFp16_FP32_to_FP16" : "convertFp16_FP16_to_FP32", ocl::core::halfconvert_oclsrc, build_opt);
if (k.empty())
return false;
UMat src = _src.getUMat();
UMat dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
k.args(srcarg, dstarg);
size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
static bool ocl_convertTo(InputArray src_, OutputArray dst_, int ddepth, bool noScale, double alpha, double beta)
{
CV_INSTRUMENT_REGION();
@ -254,65 +226,4 @@ void UMat::convertTo(OutputArray dst, int type_, double alpha, double beta) cons
(void)src;
}
//==================================================================================================
void convertFp16(InputArray _src, OutputArray _dst)
{
CV_INSTRUMENT_REGION();
int sdepth = _src.depth(), ddepth = 0;
BinaryFunc func = 0;
switch( sdepth )
{
case CV_32F:
if(_dst.fixedType())
{
ddepth = _dst.depth();
CV_Assert(ddepth == CV_16S || ddepth == CV_16F);
CV_Assert(_dst.channels() == _src.channels());
}
else
ddepth = CV_16S;
func = getConvertFunc(CV_32F, CV_16F);
break;
case CV_16S:
case CV_16F:
ddepth = CV_32F;
func = getConvertFunc(CV_16F, CV_32F);
break;
default:
CV_Error(Error::StsUnsupportedFormat, "Unsupported input depth");
return;
}
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
ocl_convertFp16(_src, _dst, sdepth, ddepth))
Mat src = _src.getMat();
int type = CV_MAKETYPE(ddepth, src.channels());
_dst.create( src.dims, src.size, type );
Mat dst = _dst.getMat();
int cn = src.channels();
CV_Assert( func != 0 );
if( src.dims <= 2 )
{
Size sz = getContinuousSize2D(src, dst, cn);
func( src.data, src.step, 0, 0, dst.data, dst.step, sz, 0);
}
else
{
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2] = {};
NAryMatIterator it(arrays, ptrs);
Size sz((int)(it.size*cn), 1);
for( size_t i = 0; i < it.nplanes; i++, ++it )
func(ptrs[0], 0, 0, 0, ptrs[1], 0, sz, 0);
}
}
} // namespace cv

@ -597,7 +597,10 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
{convertToScale<double, uchar>, convertToScale<double, schar>, convertToScale<double, ushort>, convertToScale<double, short>, convertToScale<double, int>, convertToScale<double, float>, convertToScale<double, double>}
};
funcs[sdepth][ddepth](src.reshape(1), dst.reshape(1), alpha, beta, stream);
func_t func = funcs[sdepth][ddepth];
CV_Assert(func);
func(src.reshape(1), dst.reshape(1), alpha, beta, stream);
}
void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
@ -608,9 +611,9 @@ void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
switch(src.depth())
{
case CV_32F:
ddepth = CV_16S;
ddepth = CV_16F;
break;
case CV_16S:
case CV_16F:
ddepth = CV_32F;
break;
default:
@ -625,11 +628,13 @@ void cv::cuda::convertFp16(InputArray _src, OutputArray _dst, Stream& stream)
static const func_t funcs[] =
{
0, 0, 0,
convertScaleHalf<float, short>, 0, convertScaleHalf<short, float>,
0, 0,
0, 0, convertScaleHalf<short, float>,
0, convertScaleHalf<float, short>, 0,
};
funcs[ddepth](src.reshape(1), dst.reshape(1), stream);
func_t func = funcs[ddepth];
CV_Assert(func);
func(src.reshape(1), dst.reshape(1), stream);
}
#endif

@ -1,82 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef HALF_SUPPORT
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16:enable
#endif
#endif
__kernel void
#ifdef FLOAT_TO_HALF
convertFp16_FP32_to_FP16
#else
convertFp16_FP16_to_FP32
#endif
(
__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int dst_rows, int dst_cols
)
{
int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI;
if (x < dst_cols)
{
int src_index = mad24(y0, src_step, mad24(x, (int)sizeof(srcT), src_offset));
int dst_index = mad24(y0, dst_step, mad24(x, (int)sizeof(dstT), dst_offset));
for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src_index += src_step, dst_index += dst_step)
{
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
#ifdef FLOAT_TO_HALF
vstore_half(src[0], 0, dst);
#else
dst[0] = vload_half(0, src);
#endif
}
}
}

@ -1672,9 +1672,9 @@ OCL_TEST_P(ConvertFp16, Mat)
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::convertFp16(src_roi, dst_roi));
OCL_ON(cv::convertFp16(usrc_roi, udst_roi));
int cvt_code = src_roi.depth() == CV_32F ? CV_16F : CV_32F;
OCL_OFF(src_roi.convertTo(dst_roi, cvt_code));
OCL_ON(usrc_roi.convertTo(udst_roi, cvt_code));
Near(1);
}

@ -924,8 +924,16 @@ struct ConvertScaleFp16Op : public BaseElemWiseOp
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
Mat m;
convertFp16(src[0], m);
convertFp16(m, dst);
if (src[0].depth() == CV_32F)
{
src[0].convertTo(m, CV_16F);
m.convertTo(dst, CV_32F);
}
else
{
src[0].convertTo(m, CV_32F);
m.convertTo(dst, CV_16F);
}
}
void refop(const vector<Mat>& src, Mat& dst, const Mat&)
{
@ -935,7 +943,7 @@ struct ConvertScaleFp16Op : public BaseElemWiseOp
{
// 0: FP32 -> FP16 -> FP32
// 1: FP16 -> FP32 -> FP16
int srctype = (nextRange & 1) == 0 ? CV_32F : CV_16S;
int srctype = (nextRange & 1) == 0 ? CV_32F : CV_16F;
return srctype;
}
void getValueRange(int, double& minval, double& maxval)

Loading…
Cancel
Save