removed NPP implementation

12 years ago · f826bd8bce
parent e1397f6c1f
commit f826bd8bce
2 changed files with 33 additions and 122 deletions
--- a/modules/gpu/src/resize.cpp
+++ b/modules/gpu/src/resize.cpp
@ -44,18 +44,7 @@
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
+void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }
 {
    (void)src;
    (void)dst;
    (void)dsize;
    (void)fx;
    (void)fy;
    (void)interpolation;
    (void)s;
    throw_nogpu();
 }
 #else // HAVE_CUDA
@ -69,94 +58,57 @@ namespace cv { namespace gpu { namespace device
    }
 }}}
-void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
+void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
 {
-    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
+    using namespace ::cv::gpu::device::imgproc;
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
+
-            || interpolation == INTER_CUBIC || interpolation == INTER_AREA);
+    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
-    CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
+    static const func_t funcs[6][4] =
    {
        {resize_gpu<uchar>      , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3>     , resize_gpu<uchar4>     },
        {0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/  , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
        {resize_gpu<ushort>     , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3>    , resize_gpu<ushort4>    },
        {resize_gpu<short>      , 0 /*resize_gpu<short2>*/ , resize_gpu<short3>     , resize_gpu<short4>     },
        {0 /*resize_gpu<int>*/  , 0 /*resize_gpu<int2>*/   , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
        {resize_gpu<float>      , 0 /*resize_gpu<float2>*/ , resize_gpu<float3>     , resize_gpu<float4>     }
    };
    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA );
    CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
    if (dsize == Size())
    {
        dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
    }
    else
    {
        fx = static_cast<double>(dsize.width) / src.cols;
        fy = static_cast<double>(dsize.height) / src.rows;
    }
-    if (dsize != dst.size())
+
-        dst.create(dsize, src.type());
+    dst.create(dsize, src.type());
    if (dsize == src.size())
    {
-        if (s)
+        if (stream)
-            s.enqueueCopy(src, dst);
+            stream.enqueueCopy(src, dst);
        else
            src.copyTo(dst);
        return;
    }
-    cudaStream_t stream = StreamAccessor::getStream(s);
+    const func_t func = funcs[src.depth()][src.channels() - 1];
    if (!func)
        CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
    Size wholeSize;
    Point ofs;
    src.locateROI(wholeSize, ofs);
    PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step);
-    bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
+    func(src, wholeSrc, ofs.x, ofs.y, static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, StreamAccessor::getStream(stream));
    useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);
    if (useNpp)
    {
        typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize,
                                    double xFactor, double yFactor, int eInterpolation);
        const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R };
        static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
        NppiSize srcsz;
        srcsz.width  = wholeSize.width;
        srcsz.height = wholeSize.height;
        NppiRect srcrect;
        srcrect.x = ofs.x;
        srcrect.y = ofs.y;
        srcrect.width  = src.cols;
        srcrect.height = src.rows;
        NppiSize dstsz;
        dstsz.width  = dst.cols;
        dstsz.height = dst.rows;
        NppStreamHandler h(stream);
        nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast<int>(src.step), srcrect,
                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
        if (stream == 0)
            cudaSafeCall( cudaDeviceSynchronize() );
    }
    else
    {
        using namespace ::cv::gpu::device::imgproc;
        typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
        static const func_t funcs[6][4] =
        {
            {resize_gpu<uchar>      , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3>     , resize_gpu<uchar4>     },
            {0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/  , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
            {resize_gpu<ushort>     , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3>    , resize_gpu<ushort4>    },
            {resize_gpu<short>      , 0 /*resize_gpu<short2>*/ , resize_gpu<short3>     , resize_gpu<short4>     },
            {0 /*resize_gpu<int>*/  , 0 /*resize_gpu<int2>*/   , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
            {resize_gpu<float>      , 0 /*resize_gpu<float2>*/ , resize_gpu<float3>     , resize_gpu<float4>     }
        };
        const func_t func = funcs[src.depth()][src.channels() - 1];
        CV_Assert(func != 0);
        func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
            static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
    }
 }
 #endif // HAVE_CUDA
--- a/modules/gpu/test/test_resize.cpp
+++ b/modules/gpu/test/test_resize.cpp
@ -155,7 +155,7 @@ GPU_TEST_P(Resize, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
    testing::Values(0.3, 0.5, 1.5, 2.0),
    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
    WHOLE_SUBMAT));
@ -201,50 +201,9 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
    testing::Values(0.3, 0.5),
-    testing::Values(Interpolation(cv::INTER_AREA), Interpolation(cv::INTER_NEAREST)),  //, Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)),
    WHOLE_SUBMAT));
 ///////////////////////////////////////////////////////////////////
 // Test NPP
 PARAM_TEST_CASE(ResizeNPP, cv::gpu::DeviceInfo, MatType, double, Interpolation)
 {
    cv::gpu::DeviceInfo devInfo;
    double coeff;
    int interpolation;
    int type;
    virtual void SetUp()
    {
        devInfo = GET_PARAM(0);
        type = GET_PARAM(1);
        coeff = GET_PARAM(2);
        interpolation = GET_PARAM(3);
        cv::gpu::setDevice(devInfo.deviceID());
    }
 };
 GPU_TEST_P(ResizeNPP, Accuracy)
 {
    cv::Mat src = readImageType("stereobp/aloe-L.png", type);
    ASSERT_FALSE(src.empty());
    cv::gpu::GpuMat dst;
    cv::gpu::resize(loadMat(src), dst, cv::Size(), coeff, coeff, interpolation);
    cv::Mat dst_gold;
    resizeGold(src, dst_gold, coeff, coeff, interpolation);
    EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-1);
 }
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeNPP, testing::Combine(
    ALL_DEVICES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
    testing::Values(0.3, 0.5, 1.5, 2.0),
    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR))));
 #endif // HAVE_CUDA