added more types support for gpu separable filters

pull/328/head
Vladislav Vinogradov 12 years ago
parent 281d036fcf
commit e446903aac
  1. 53
      modules/gpu/src/cuda/column_filter.0.cu
  2. 53
      modules/gpu/src/cuda/column_filter.1.cu
  3. 53
      modules/gpu/src/cuda/column_filter.10.cu
  4. 53
      modules/gpu/src/cuda/column_filter.11.cu
  5. 53
      modules/gpu/src/cuda/column_filter.12.cu
  6. 53
      modules/gpu/src/cuda/column_filter.13.cu
  7. 53
      modules/gpu/src/cuda/column_filter.14.cu
  8. 53
      modules/gpu/src/cuda/column_filter.2.cu
  9. 53
      modules/gpu/src/cuda/column_filter.3.cu
  10. 53
      modules/gpu/src/cuda/column_filter.4.cu
  11. 53
      modules/gpu/src/cuda/column_filter.5.cu
  12. 53
      modules/gpu/src/cuda/column_filter.6.cu
  13. 53
      modules/gpu/src/cuda/column_filter.7.cu
  14. 53
      modules/gpu/src/cuda/column_filter.8.cu
  15. 53
      modules/gpu/src/cuda/column_filter.9.cu
  16. 391
      modules/gpu/src/cuda/column_filter.cu
  17. 373
      modules/gpu/src/cuda/column_filter.h
  18. 53
      modules/gpu/src/cuda/row_filter.0.cu
  19. 53
      modules/gpu/src/cuda/row_filter.1.cu
  20. 53
      modules/gpu/src/cuda/row_filter.10.cu
  21. 53
      modules/gpu/src/cuda/row_filter.11.cu
  22. 53
      modules/gpu/src/cuda/row_filter.12.cu
  23. 53
      modules/gpu/src/cuda/row_filter.13.cu
  24. 53
      modules/gpu/src/cuda/row_filter.14.cu
  25. 53
      modules/gpu/src/cuda/row_filter.2.cu
  26. 53
      modules/gpu/src/cuda/row_filter.3.cu
  27. 53
      modules/gpu/src/cuda/row_filter.4.cu
  28. 53
      modules/gpu/src/cuda/row_filter.5.cu
  29. 53
      modules/gpu/src/cuda/row_filter.6.cu
  30. 53
      modules/gpu/src/cuda/row_filter.7.cu
  31. 53
      modules/gpu/src/cuda/row_filter.8.cu
  32. 53
      modules/gpu/src/cuda/row_filter.9.cu
  33. 390
      modules/gpu/src/cuda/row_filter.cu
  34. 372
      modules/gpu/src/cuda/row_filter.h
  35. 160
      modules/gpu/src/filtering.cpp

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float, uchar>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float3, uchar3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float, unsigned short>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float3, ushort3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float4, ushort4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float3, int3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float4, int4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float, int>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float, short>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "column_filter.h"
namespace filter
{
template void linearColumn<float4, short4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -1,391 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
{
namespace column_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = KSIZE <= 16 ? 1 : 2;
#else
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 2;
const int HALO_SIZE = 2;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
if (x >= src.cols)
return;
const T* src_col = src.ptr() + x;
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
if (blockIdx.y > 0)
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
}
else
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
}
if (blockIdx.y + 2 < gridDim.y)
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
}
else
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int y = yStart + j * BLOCK_DIM_Y;
if (y < src.rows)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 16;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 2;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
B<T> brd(src.rows);
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T, typename D>
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReflect101>,
linearColumnFilter_caller< 2, T, D, BrdColReflect101>,
linearColumnFilter_caller< 3, T, D, BrdColReflect101>,
linearColumnFilter_caller< 4, T, D, BrdColReflect101>,
linearColumnFilter_caller< 5, T, D, BrdColReflect101>,
linearColumnFilter_caller< 6, T, D, BrdColReflect101>,
linearColumnFilter_caller< 7, T, D, BrdColReflect101>,
linearColumnFilter_caller< 8, T, D, BrdColReflect101>,
linearColumnFilter_caller< 9, T, D, BrdColReflect101>,
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
linearColumnFilter_caller<17, T, D, BrdColReflect101>,
linearColumnFilter_caller<18, T, D, BrdColReflect101>,
linearColumnFilter_caller<19, T, D, BrdColReflect101>,
linearColumnFilter_caller<20, T, D, BrdColReflect101>,
linearColumnFilter_caller<21, T, D, BrdColReflect101>,
linearColumnFilter_caller<22, T, D, BrdColReflect101>,
linearColumnFilter_caller<23, T, D, BrdColReflect101>,
linearColumnFilter_caller<24, T, D, BrdColReflect101>,
linearColumnFilter_caller<25, T, D, BrdColReflect101>,
linearColumnFilter_caller<26, T, D, BrdColReflect101>,
linearColumnFilter_caller<27, T, D, BrdColReflect101>,
linearColumnFilter_caller<28, T, D, BrdColReflect101>,
linearColumnFilter_caller<29, T, D, BrdColReflect101>,
linearColumnFilter_caller<30, T, D, BrdColReflect101>,
linearColumnFilter_caller<31, T, D, BrdColReflect101>,
linearColumnFilter_caller<32, T, D, BrdColReflect101>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReplicate>,
linearColumnFilter_caller< 2, T, D, BrdColReplicate>,
linearColumnFilter_caller< 3, T, D, BrdColReplicate>,
linearColumnFilter_caller< 4, T, D, BrdColReplicate>,
linearColumnFilter_caller< 5, T, D, BrdColReplicate>,
linearColumnFilter_caller< 6, T, D, BrdColReplicate>,
linearColumnFilter_caller< 7, T, D, BrdColReplicate>,
linearColumnFilter_caller< 8, T, D, BrdColReplicate>,
linearColumnFilter_caller< 9, T, D, BrdColReplicate>,
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
linearColumnFilter_caller<17, T, D, BrdColReplicate>,
linearColumnFilter_caller<18, T, D, BrdColReplicate>,
linearColumnFilter_caller<19, T, D, BrdColReplicate>,
linearColumnFilter_caller<20, T, D, BrdColReplicate>,
linearColumnFilter_caller<21, T, D, BrdColReplicate>,
linearColumnFilter_caller<22, T, D, BrdColReplicate>,
linearColumnFilter_caller<23, T, D, BrdColReplicate>,
linearColumnFilter_caller<24, T, D, BrdColReplicate>,
linearColumnFilter_caller<25, T, D, BrdColReplicate>,
linearColumnFilter_caller<26, T, D, BrdColReplicate>,
linearColumnFilter_caller<27, T, D, BrdColReplicate>,
linearColumnFilter_caller<28, T, D, BrdColReplicate>,
linearColumnFilter_caller<29, T, D, BrdColReplicate>,
linearColumnFilter_caller<30, T, D, BrdColReplicate>,
linearColumnFilter_caller<31, T, D, BrdColReplicate>,
linearColumnFilter_caller<32, T, D, BrdColReplicate>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColConstant>,
linearColumnFilter_caller< 2, T, D, BrdColConstant>,
linearColumnFilter_caller< 3, T, D, BrdColConstant>,
linearColumnFilter_caller< 4, T, D, BrdColConstant>,
linearColumnFilter_caller< 5, T, D, BrdColConstant>,
linearColumnFilter_caller< 6, T, D, BrdColConstant>,
linearColumnFilter_caller< 7, T, D, BrdColConstant>,
linearColumnFilter_caller< 8, T, D, BrdColConstant>,
linearColumnFilter_caller< 9, T, D, BrdColConstant>,
linearColumnFilter_caller<10, T, D, BrdColConstant>,
linearColumnFilter_caller<11, T, D, BrdColConstant>,
linearColumnFilter_caller<12, T, D, BrdColConstant>,
linearColumnFilter_caller<13, T, D, BrdColConstant>,
linearColumnFilter_caller<14, T, D, BrdColConstant>,
linearColumnFilter_caller<15, T, D, BrdColConstant>,
linearColumnFilter_caller<16, T, D, BrdColConstant>,
linearColumnFilter_caller<17, T, D, BrdColConstant>,
linearColumnFilter_caller<18, T, D, BrdColConstant>,
linearColumnFilter_caller<19, T, D, BrdColConstant>,
linearColumnFilter_caller<20, T, D, BrdColConstant>,
linearColumnFilter_caller<21, T, D, BrdColConstant>,
linearColumnFilter_caller<22, T, D, BrdColConstant>,
linearColumnFilter_caller<23, T, D, BrdColConstant>,
linearColumnFilter_caller<24, T, D, BrdColConstant>,
linearColumnFilter_caller<25, T, D, BrdColConstant>,
linearColumnFilter_caller<26, T, D, BrdColConstant>,
linearColumnFilter_caller<27, T, D, BrdColConstant>,
linearColumnFilter_caller<28, T, D, BrdColConstant>,
linearColumnFilter_caller<29, T, D, BrdColConstant>,
linearColumnFilter_caller<30, T, D, BrdColConstant>,
linearColumnFilter_caller<31, T, D, BrdColConstant>,
linearColumnFilter_caller<32, T, D, BrdColConstant>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReflect>,
linearColumnFilter_caller< 2, T, D, BrdColReflect>,
linearColumnFilter_caller< 3, T, D, BrdColReflect>,
linearColumnFilter_caller< 4, T, D, BrdColReflect>,
linearColumnFilter_caller< 5, T, D, BrdColReflect>,
linearColumnFilter_caller< 6, T, D, BrdColReflect>,
linearColumnFilter_caller< 7, T, D, BrdColReflect>,
linearColumnFilter_caller< 8, T, D, BrdColReflect>,
linearColumnFilter_caller< 9, T, D, BrdColReflect>,
linearColumnFilter_caller<10, T, D, BrdColReflect>,
linearColumnFilter_caller<11, T, D, BrdColReflect>,
linearColumnFilter_caller<12, T, D, BrdColReflect>,
linearColumnFilter_caller<13, T, D, BrdColReflect>,
linearColumnFilter_caller<14, T, D, BrdColReflect>,
linearColumnFilter_caller<15, T, D, BrdColReflect>,
linearColumnFilter_caller<16, T, D, BrdColReflect>,
linearColumnFilter_caller<17, T, D, BrdColReflect>,
linearColumnFilter_caller<18, T, D, BrdColReflect>,
linearColumnFilter_caller<19, T, D, BrdColReflect>,
linearColumnFilter_caller<20, T, D, BrdColReflect>,
linearColumnFilter_caller<21, T, D, BrdColReflect>,
linearColumnFilter_caller<22, T, D, BrdColReflect>,
linearColumnFilter_caller<23, T, D, BrdColReflect>,
linearColumnFilter_caller<24, T, D, BrdColReflect>,
linearColumnFilter_caller<25, T, D, BrdColReflect>,
linearColumnFilter_caller<26, T, D, BrdColReflect>,
linearColumnFilter_caller<27, T, D, BrdColReflect>,
linearColumnFilter_caller<28, T, D, BrdColReflect>,
linearColumnFilter_caller<29, T, D, BrdColReflect>,
linearColumnFilter_caller<30, T, D, BrdColReflect>,
linearColumnFilter_caller<31, T, D, BrdColReflect>,
linearColumnFilter_caller<32, T, D, BrdColReflect>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColWrap>,
linearColumnFilter_caller< 2, T, D, BrdColWrap>,
linearColumnFilter_caller< 3, T, D, BrdColWrap>,
linearColumnFilter_caller< 4, T, D, BrdColWrap>,
linearColumnFilter_caller< 5, T, D, BrdColWrap>,
linearColumnFilter_caller< 6, T, D, BrdColWrap>,
linearColumnFilter_caller< 7, T, D, BrdColWrap>,
linearColumnFilter_caller< 8, T, D, BrdColWrap>,
linearColumnFilter_caller< 9, T, D, BrdColWrap>,
linearColumnFilter_caller<10, T, D, BrdColWrap>,
linearColumnFilter_caller<11, T, D, BrdColWrap>,
linearColumnFilter_caller<12, T, D, BrdColWrap>,
linearColumnFilter_caller<13, T, D, BrdColWrap>,
linearColumnFilter_caller<14, T, D, BrdColWrap>,
linearColumnFilter_caller<15, T, D, BrdColWrap>,
linearColumnFilter_caller<16, T, D, BrdColWrap>,
linearColumnFilter_caller<17, T, D, BrdColWrap>,
linearColumnFilter_caller<18, T, D, BrdColWrap>,
linearColumnFilter_caller<19, T, D, BrdColWrap>,
linearColumnFilter_caller<20, T, D, BrdColWrap>,
linearColumnFilter_caller<21, T, D, BrdColWrap>,
linearColumnFilter_caller<22, T, D, BrdColWrap>,
linearColumnFilter_caller<23, T, D, BrdColWrap>,
linearColumnFilter_caller<24, T, D, BrdColWrap>,
linearColumnFilter_caller<25, T, D, BrdColWrap>,
linearColumnFilter_caller<26, T, D, BrdColWrap>,
linearColumnFilter_caller<27, T, D, BrdColWrap>,
linearColumnFilter_caller<28, T, D, BrdColWrap>,
linearColumnFilter_caller<29, T, D, BrdColWrap>,
linearColumnFilter_caller<30, T, D, BrdColWrap>,
linearColumnFilter_caller<31, T, D, BrdColWrap>,
linearColumnFilter_caller<32, T, D, BrdColWrap>
}
};
loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, uchar3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

@ -0,0 +1,373 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace column_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = KSIZE <= 16 ? 1 : 2;
#else
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 2;
const int HALO_SIZE = 2;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
if (x >= src.cols)
return;
const T* src_col = src.ptr() + x;
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
if (blockIdx.y > 0)
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
}
else
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
}
if (blockIdx.y + 2 < gridDim.y)
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
}
else
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int y = yStart + j * BLOCK_DIM_Y;
if (y < src.rows)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 16;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 2;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
B<T> brd(src.rows);
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
namespace filter
{
template <typename T, typename D>
void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
column_filter::caller< 1, T, D, BrdColReflect101>,
column_filter::caller< 2, T, D, BrdColReflect101>,
column_filter::caller< 3, T, D, BrdColReflect101>,
column_filter::caller< 4, T, D, BrdColReflect101>,
column_filter::caller< 5, T, D, BrdColReflect101>,
column_filter::caller< 6, T, D, BrdColReflect101>,
column_filter::caller< 7, T, D, BrdColReflect101>,
column_filter::caller< 8, T, D, BrdColReflect101>,
column_filter::caller< 9, T, D, BrdColReflect101>,
column_filter::caller<10, T, D, BrdColReflect101>,
column_filter::caller<11, T, D, BrdColReflect101>,
column_filter::caller<12, T, D, BrdColReflect101>,
column_filter::caller<13, T, D, BrdColReflect101>,
column_filter::caller<14, T, D, BrdColReflect101>,
column_filter::caller<15, T, D, BrdColReflect101>,
column_filter::caller<16, T, D, BrdColReflect101>,
column_filter::caller<17, T, D, BrdColReflect101>,
column_filter::caller<18, T, D, BrdColReflect101>,
column_filter::caller<19, T, D, BrdColReflect101>,
column_filter::caller<20, T, D, BrdColReflect101>,
column_filter::caller<21, T, D, BrdColReflect101>,
column_filter::caller<22, T, D, BrdColReflect101>,
column_filter::caller<23, T, D, BrdColReflect101>,
column_filter::caller<24, T, D, BrdColReflect101>,
column_filter::caller<25, T, D, BrdColReflect101>,
column_filter::caller<26, T, D, BrdColReflect101>,
column_filter::caller<27, T, D, BrdColReflect101>,
column_filter::caller<28, T, D, BrdColReflect101>,
column_filter::caller<29, T, D, BrdColReflect101>,
column_filter::caller<30, T, D, BrdColReflect101>,
column_filter::caller<31, T, D, BrdColReflect101>,
column_filter::caller<32, T, D, BrdColReflect101>
},
{
0,
column_filter::caller< 1, T, D, BrdColReplicate>,
column_filter::caller< 2, T, D, BrdColReplicate>,
column_filter::caller< 3, T, D, BrdColReplicate>,
column_filter::caller< 4, T, D, BrdColReplicate>,
column_filter::caller< 5, T, D, BrdColReplicate>,
column_filter::caller< 6, T, D, BrdColReplicate>,
column_filter::caller< 7, T, D, BrdColReplicate>,
column_filter::caller< 8, T, D, BrdColReplicate>,
column_filter::caller< 9, T, D, BrdColReplicate>,
column_filter::caller<10, T, D, BrdColReplicate>,
column_filter::caller<11, T, D, BrdColReplicate>,
column_filter::caller<12, T, D, BrdColReplicate>,
column_filter::caller<13, T, D, BrdColReplicate>,
column_filter::caller<14, T, D, BrdColReplicate>,
column_filter::caller<15, T, D, BrdColReplicate>,
column_filter::caller<16, T, D, BrdColReplicate>,
column_filter::caller<17, T, D, BrdColReplicate>,
column_filter::caller<18, T, D, BrdColReplicate>,
column_filter::caller<19, T, D, BrdColReplicate>,
column_filter::caller<20, T, D, BrdColReplicate>,
column_filter::caller<21, T, D, BrdColReplicate>,
column_filter::caller<22, T, D, BrdColReplicate>,
column_filter::caller<23, T, D, BrdColReplicate>,
column_filter::caller<24, T, D, BrdColReplicate>,
column_filter::caller<25, T, D, BrdColReplicate>,
column_filter::caller<26, T, D, BrdColReplicate>,
column_filter::caller<27, T, D, BrdColReplicate>,
column_filter::caller<28, T, D, BrdColReplicate>,
column_filter::caller<29, T, D, BrdColReplicate>,
column_filter::caller<30, T, D, BrdColReplicate>,
column_filter::caller<31, T, D, BrdColReplicate>,
column_filter::caller<32, T, D, BrdColReplicate>
},
{
0,
column_filter::caller< 1, T, D, BrdColConstant>,
column_filter::caller< 2, T, D, BrdColConstant>,
column_filter::caller< 3, T, D, BrdColConstant>,
column_filter::caller< 4, T, D, BrdColConstant>,
column_filter::caller< 5, T, D, BrdColConstant>,
column_filter::caller< 6, T, D, BrdColConstant>,
column_filter::caller< 7, T, D, BrdColConstant>,
column_filter::caller< 8, T, D, BrdColConstant>,
column_filter::caller< 9, T, D, BrdColConstant>,
column_filter::caller<10, T, D, BrdColConstant>,
column_filter::caller<11, T, D, BrdColConstant>,
column_filter::caller<12, T, D, BrdColConstant>,
column_filter::caller<13, T, D, BrdColConstant>,
column_filter::caller<14, T, D, BrdColConstant>,
column_filter::caller<15, T, D, BrdColConstant>,
column_filter::caller<16, T, D, BrdColConstant>,
column_filter::caller<17, T, D, BrdColConstant>,
column_filter::caller<18, T, D, BrdColConstant>,
column_filter::caller<19, T, D, BrdColConstant>,
column_filter::caller<20, T, D, BrdColConstant>,
column_filter::caller<21, T, D, BrdColConstant>,
column_filter::caller<22, T, D, BrdColConstant>,
column_filter::caller<23, T, D, BrdColConstant>,
column_filter::caller<24, T, D, BrdColConstant>,
column_filter::caller<25, T, D, BrdColConstant>,
column_filter::caller<26, T, D, BrdColConstant>,
column_filter::caller<27, T, D, BrdColConstant>,
column_filter::caller<28, T, D, BrdColConstant>,
column_filter::caller<29, T, D, BrdColConstant>,
column_filter::caller<30, T, D, BrdColConstant>,
column_filter::caller<31, T, D, BrdColConstant>,
column_filter::caller<32, T, D, BrdColConstant>
},
{
0,
column_filter::caller< 1, T, D, BrdColReflect>,
column_filter::caller< 2, T, D, BrdColReflect>,
column_filter::caller< 3, T, D, BrdColReflect>,
column_filter::caller< 4, T, D, BrdColReflect>,
column_filter::caller< 5, T, D, BrdColReflect>,
column_filter::caller< 6, T, D, BrdColReflect>,
column_filter::caller< 7, T, D, BrdColReflect>,
column_filter::caller< 8, T, D, BrdColReflect>,
column_filter::caller< 9, T, D, BrdColReflect>,
column_filter::caller<10, T, D, BrdColReflect>,
column_filter::caller<11, T, D, BrdColReflect>,
column_filter::caller<12, T, D, BrdColReflect>,
column_filter::caller<13, T, D, BrdColReflect>,
column_filter::caller<14, T, D, BrdColReflect>,
column_filter::caller<15, T, D, BrdColReflect>,
column_filter::caller<16, T, D, BrdColReflect>,
column_filter::caller<17, T, D, BrdColReflect>,
column_filter::caller<18, T, D, BrdColReflect>,
column_filter::caller<19, T, D, BrdColReflect>,
column_filter::caller<20, T, D, BrdColReflect>,
column_filter::caller<21, T, D, BrdColReflect>,
column_filter::caller<22, T, D, BrdColReflect>,
column_filter::caller<23, T, D, BrdColReflect>,
column_filter::caller<24, T, D, BrdColReflect>,
column_filter::caller<25, T, D, BrdColReflect>,
column_filter::caller<26, T, D, BrdColReflect>,
column_filter::caller<27, T, D, BrdColReflect>,
column_filter::caller<28, T, D, BrdColReflect>,
column_filter::caller<29, T, D, BrdColReflect>,
column_filter::caller<30, T, D, BrdColReflect>,
column_filter::caller<31, T, D, BrdColReflect>,
column_filter::caller<32, T, D, BrdColReflect>
},
{
0,
column_filter::caller< 1, T, D, BrdColWrap>,
column_filter::caller< 2, T, D, BrdColWrap>,
column_filter::caller< 3, T, D, BrdColWrap>,
column_filter::caller< 4, T, D, BrdColWrap>,
column_filter::caller< 5, T, D, BrdColWrap>,
column_filter::caller< 6, T, D, BrdColWrap>,
column_filter::caller< 7, T, D, BrdColWrap>,
column_filter::caller< 8, T, D, BrdColWrap>,
column_filter::caller< 9, T, D, BrdColWrap>,
column_filter::caller<10, T, D, BrdColWrap>,
column_filter::caller<11, T, D, BrdColWrap>,
column_filter::caller<12, T, D, BrdColWrap>,
column_filter::caller<13, T, D, BrdColWrap>,
column_filter::caller<14, T, D, BrdColWrap>,
column_filter::caller<15, T, D, BrdColWrap>,
column_filter::caller<16, T, D, BrdColWrap>,
column_filter::caller<17, T, D, BrdColWrap>,
column_filter::caller<18, T, D, BrdColWrap>,
column_filter::caller<19, T, D, BrdColWrap>,
column_filter::caller<20, T, D, BrdColWrap>,
column_filter::caller<21, T, D, BrdColWrap>,
column_filter::caller<22, T, D, BrdColWrap>,
column_filter::caller<23, T, D, BrdColWrap>,
column_filter::caller<24, T, D, BrdColWrap>,
column_filter::caller<25, T, D, BrdColWrap>,
column_filter::caller<26, T, D, BrdColWrap>,
column_filter::caller<27, T, D, BrdColWrap>,
column_filter::caller<28, T, D, BrdColWrap>,
column_filter::caller<29, T, D, BrdColWrap>,
column_filter::caller<30, T, D, BrdColWrap>,
column_filter::caller<31, T, D, BrdColWrap>,
column_filter::caller<32, T, D, BrdColWrap>
}
};
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
}

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<uchar, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<uchar3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<unsigned short, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<ushort3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<ushort4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<int3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<int4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<int, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<float, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<short, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -0,0 +1,53 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "row_filter.h"
namespace filter
{
template void linearRow<short4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
#endif /* CUDA_DISABLER */

@ -1,390 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
{
namespace row_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#else
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 4;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y >= src.rows)
return;
const T* src_row = src.ptr(y);
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
if (blockIdx.x > 0)
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
}
else
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
}
if (blockIdx.x + 2 < gridDim.x)
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
}
else
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int x = xStart + j * BLOCK_DIM_X;
if (x < src.cols)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearRowFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 4;
PATCH_PER_BLOCK = 4;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
B<T> brd(src.cols);
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T, typename D>
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReflect101>,
linearRowFilter_caller< 2, T, D, BrdRowReflect101>,
linearRowFilter_caller< 3, T, D, BrdRowReflect101>,
linearRowFilter_caller< 4, T, D, BrdRowReflect101>,
linearRowFilter_caller< 5, T, D, BrdRowReflect101>,
linearRowFilter_caller< 6, T, D, BrdRowReflect101>,
linearRowFilter_caller< 7, T, D, BrdRowReflect101>,
linearRowFilter_caller< 8, T, D, BrdRowReflect101>,
linearRowFilter_caller< 9, T, D, BrdRowReflect101>,
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
linearRowFilter_caller<32, T, D, BrdRowReflect101>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReplicate>,
linearRowFilter_caller< 2, T, D, BrdRowReplicate>,
linearRowFilter_caller< 3, T, D, BrdRowReplicate>,
linearRowFilter_caller< 4, T, D, BrdRowReplicate>,
linearRowFilter_caller< 5, T, D, BrdRowReplicate>,
linearRowFilter_caller< 6, T, D, BrdRowReplicate>,
linearRowFilter_caller< 7, T, D, BrdRowReplicate>,
linearRowFilter_caller< 8, T, D, BrdRowReplicate>,
linearRowFilter_caller< 9, T, D, BrdRowReplicate>,
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
linearRowFilter_caller<32, T, D, BrdRowReplicate>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowConstant>,
linearRowFilter_caller< 2, T, D, BrdRowConstant>,
linearRowFilter_caller< 3, T, D, BrdRowConstant>,
linearRowFilter_caller< 4, T, D, BrdRowConstant>,
linearRowFilter_caller< 5, T, D, BrdRowConstant>,
linearRowFilter_caller< 6, T, D, BrdRowConstant>,
linearRowFilter_caller< 7, T, D, BrdRowConstant>,
linearRowFilter_caller< 8, T, D, BrdRowConstant>,
linearRowFilter_caller< 9, T, D, BrdRowConstant>,
linearRowFilter_caller<10, T, D, BrdRowConstant>,
linearRowFilter_caller<11, T, D, BrdRowConstant>,
linearRowFilter_caller<12, T, D, BrdRowConstant>,
linearRowFilter_caller<13, T, D, BrdRowConstant>,
linearRowFilter_caller<14, T, D, BrdRowConstant>,
linearRowFilter_caller<15, T, D, BrdRowConstant>,
linearRowFilter_caller<16, T, D, BrdRowConstant>,
linearRowFilter_caller<17, T, D, BrdRowConstant>,
linearRowFilter_caller<18, T, D, BrdRowConstant>,
linearRowFilter_caller<19, T, D, BrdRowConstant>,
linearRowFilter_caller<20, T, D, BrdRowConstant>,
linearRowFilter_caller<21, T, D, BrdRowConstant>,
linearRowFilter_caller<22, T, D, BrdRowConstant>,
linearRowFilter_caller<23, T, D, BrdRowConstant>,
linearRowFilter_caller<24, T, D, BrdRowConstant>,
linearRowFilter_caller<25, T, D, BrdRowConstant>,
linearRowFilter_caller<26, T, D, BrdRowConstant>,
linearRowFilter_caller<27, T, D, BrdRowConstant>,
linearRowFilter_caller<28, T, D, BrdRowConstant>,
linearRowFilter_caller<29, T, D, BrdRowConstant>,
linearRowFilter_caller<30, T, D, BrdRowConstant>,
linearRowFilter_caller<31, T, D, BrdRowConstant>,
linearRowFilter_caller<32, T, D, BrdRowConstant>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReflect>,
linearRowFilter_caller< 2, T, D, BrdRowReflect>,
linearRowFilter_caller< 3, T, D, BrdRowReflect>,
linearRowFilter_caller< 4, T, D, BrdRowReflect>,
linearRowFilter_caller< 5, T, D, BrdRowReflect>,
linearRowFilter_caller< 6, T, D, BrdRowReflect>,
linearRowFilter_caller< 7, T, D, BrdRowReflect>,
linearRowFilter_caller< 8, T, D, BrdRowReflect>,
linearRowFilter_caller< 9, T, D, BrdRowReflect>,
linearRowFilter_caller<10, T, D, BrdRowReflect>,
linearRowFilter_caller<11, T, D, BrdRowReflect>,
linearRowFilter_caller<12, T, D, BrdRowReflect>,
linearRowFilter_caller<13, T, D, BrdRowReflect>,
linearRowFilter_caller<14, T, D, BrdRowReflect>,
linearRowFilter_caller<15, T, D, BrdRowReflect>,
linearRowFilter_caller<16, T, D, BrdRowReflect>,
linearRowFilter_caller<17, T, D, BrdRowReflect>,
linearRowFilter_caller<18, T, D, BrdRowReflect>,
linearRowFilter_caller<19, T, D, BrdRowReflect>,
linearRowFilter_caller<20, T, D, BrdRowReflect>,
linearRowFilter_caller<21, T, D, BrdRowReflect>,
linearRowFilter_caller<22, T, D, BrdRowReflect>,
linearRowFilter_caller<23, T, D, BrdRowReflect>,
linearRowFilter_caller<24, T, D, BrdRowReflect>,
linearRowFilter_caller<25, T, D, BrdRowReflect>,
linearRowFilter_caller<26, T, D, BrdRowReflect>,
linearRowFilter_caller<27, T, D, BrdRowReflect>,
linearRowFilter_caller<28, T, D, BrdRowReflect>,
linearRowFilter_caller<29, T, D, BrdRowReflect>,
linearRowFilter_caller<30, T, D, BrdRowReflect>,
linearRowFilter_caller<31, T, D, BrdRowReflect>,
linearRowFilter_caller<32, T, D, BrdRowReflect>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowWrap>,
linearRowFilter_caller< 2, T, D, BrdRowWrap>,
linearRowFilter_caller< 3, T, D, BrdRowWrap>,
linearRowFilter_caller< 4, T, D, BrdRowWrap>,
linearRowFilter_caller< 5, T, D, BrdRowWrap>,
linearRowFilter_caller< 6, T, D, BrdRowWrap>,
linearRowFilter_caller< 7, T, D, BrdRowWrap>,
linearRowFilter_caller< 8, T, D, BrdRowWrap>,
linearRowFilter_caller< 9, T, D, BrdRowWrap>,
linearRowFilter_caller<10, T, D, BrdRowWrap>,
linearRowFilter_caller<11, T, D, BrdRowWrap>,
linearRowFilter_caller<12, T, D, BrdRowWrap>,
linearRowFilter_caller<13, T, D, BrdRowWrap>,
linearRowFilter_caller<14, T, D, BrdRowWrap>,
linearRowFilter_caller<15, T, D, BrdRowWrap>,
linearRowFilter_caller<16, T, D, BrdRowWrap>,
linearRowFilter_caller<17, T, D, BrdRowWrap>,
linearRowFilter_caller<18, T, D, BrdRowWrap>,
linearRowFilter_caller<19, T, D, BrdRowWrap>,
linearRowFilter_caller<20, T, D, BrdRowWrap>,
linearRowFilter_caller<21, T, D, BrdRowWrap>,
linearRowFilter_caller<22, T, D, BrdRowWrap>,
linearRowFilter_caller<23, T, D, BrdRowWrap>,
linearRowFilter_caller<24, T, D, BrdRowWrap>,
linearRowFilter_caller<25, T, D, BrdRowWrap>,
linearRowFilter_caller<26, T, D, BrdRowWrap>,
linearRowFilter_caller<27, T, D, BrdRowWrap>,
linearRowFilter_caller<28, T, D, BrdRowWrap>,
linearRowFilter_caller<29, T, D, BrdRowWrap>,
linearRowFilter_caller<30, T, D, BrdRowWrap>,
linearRowFilter_caller<31, T, D, BrdRowWrap>,
linearRowFilter_caller<32, T, D, BrdRowWrap>
}
};
loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace row_filter
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

@ -0,0 +1,372 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace row_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#else
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 4;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y >= src.rows)
return;
const T* src_row = src.ptr(y);
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
if (blockIdx.x > 0)
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
}
else
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
}
if (blockIdx.x + 2 < gridDim.x)
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
}
else
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int x = xStart + j * BLOCK_DIM_X;
if (x < src.cols)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 4;
PATCH_PER_BLOCK = 4;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
B<T> brd(src.cols);
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
namespace filter
{
template <typename T, typename D>
void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
row_filter::caller< 1, T, D, BrdRowReflect101>,
row_filter::caller< 2, T, D, BrdRowReflect101>,
row_filter::caller< 3, T, D, BrdRowReflect101>,
row_filter::caller< 4, T, D, BrdRowReflect101>,
row_filter::caller< 5, T, D, BrdRowReflect101>,
row_filter::caller< 6, T, D, BrdRowReflect101>,
row_filter::caller< 7, T, D, BrdRowReflect101>,
row_filter::caller< 8, T, D, BrdRowReflect101>,
row_filter::caller< 9, T, D, BrdRowReflect101>,
row_filter::caller<10, T, D, BrdRowReflect101>,
row_filter::caller<11, T, D, BrdRowReflect101>,
row_filter::caller<12, T, D, BrdRowReflect101>,
row_filter::caller<13, T, D, BrdRowReflect101>,
row_filter::caller<14, T, D, BrdRowReflect101>,
row_filter::caller<15, T, D, BrdRowReflect101>,
row_filter::caller<16, T, D, BrdRowReflect101>,
row_filter::caller<17, T, D, BrdRowReflect101>,
row_filter::caller<18, T, D, BrdRowReflect101>,
row_filter::caller<19, T, D, BrdRowReflect101>,
row_filter::caller<20, T, D, BrdRowReflect101>,
row_filter::caller<21, T, D, BrdRowReflect101>,
row_filter::caller<22, T, D, BrdRowReflect101>,
row_filter::caller<23, T, D, BrdRowReflect101>,
row_filter::caller<24, T, D, BrdRowReflect101>,
row_filter::caller<25, T, D, BrdRowReflect101>,
row_filter::caller<26, T, D, BrdRowReflect101>,
row_filter::caller<27, T, D, BrdRowReflect101>,
row_filter::caller<28, T, D, BrdRowReflect101>,
row_filter::caller<29, T, D, BrdRowReflect101>,
row_filter::caller<30, T, D, BrdRowReflect101>,
row_filter::caller<31, T, D, BrdRowReflect101>,
row_filter::caller<32, T, D, BrdRowReflect101>
},
{
0,
row_filter::caller< 1, T, D, BrdRowReplicate>,
row_filter::caller< 2, T, D, BrdRowReplicate>,
row_filter::caller< 3, T, D, BrdRowReplicate>,
row_filter::caller< 4, T, D, BrdRowReplicate>,
row_filter::caller< 5, T, D, BrdRowReplicate>,
row_filter::caller< 6, T, D, BrdRowReplicate>,
row_filter::caller< 7, T, D, BrdRowReplicate>,
row_filter::caller< 8, T, D, BrdRowReplicate>,
row_filter::caller< 9, T, D, BrdRowReplicate>,
row_filter::caller<10, T, D, BrdRowReplicate>,
row_filter::caller<11, T, D, BrdRowReplicate>,
row_filter::caller<12, T, D, BrdRowReplicate>,
row_filter::caller<13, T, D, BrdRowReplicate>,
row_filter::caller<14, T, D, BrdRowReplicate>,
row_filter::caller<15, T, D, BrdRowReplicate>,
row_filter::caller<16, T, D, BrdRowReplicate>,
row_filter::caller<17, T, D, BrdRowReplicate>,
row_filter::caller<18, T, D, BrdRowReplicate>,
row_filter::caller<19, T, D, BrdRowReplicate>,
row_filter::caller<20, T, D, BrdRowReplicate>,
row_filter::caller<21, T, D, BrdRowReplicate>,
row_filter::caller<22, T, D, BrdRowReplicate>,
row_filter::caller<23, T, D, BrdRowReplicate>,
row_filter::caller<24, T, D, BrdRowReplicate>,
row_filter::caller<25, T, D, BrdRowReplicate>,
row_filter::caller<26, T, D, BrdRowReplicate>,
row_filter::caller<27, T, D, BrdRowReplicate>,
row_filter::caller<28, T, D, BrdRowReplicate>,
row_filter::caller<29, T, D, BrdRowReplicate>,
row_filter::caller<30, T, D, BrdRowReplicate>,
row_filter::caller<31, T, D, BrdRowReplicate>,
row_filter::caller<32, T, D, BrdRowReplicate>
},
{
0,
row_filter::caller< 1, T, D, BrdRowConstant>,
row_filter::caller< 2, T, D, BrdRowConstant>,
row_filter::caller< 3, T, D, BrdRowConstant>,
row_filter::caller< 4, T, D, BrdRowConstant>,
row_filter::caller< 5, T, D, BrdRowConstant>,
row_filter::caller< 6, T, D, BrdRowConstant>,
row_filter::caller< 7, T, D, BrdRowConstant>,
row_filter::caller< 8, T, D, BrdRowConstant>,
row_filter::caller< 9, T, D, BrdRowConstant>,
row_filter::caller<10, T, D, BrdRowConstant>,
row_filter::caller<11, T, D, BrdRowConstant>,
row_filter::caller<12, T, D, BrdRowConstant>,
row_filter::caller<13, T, D, BrdRowConstant>,
row_filter::caller<14, T, D, BrdRowConstant>,
row_filter::caller<15, T, D, BrdRowConstant>,
row_filter::caller<16, T, D, BrdRowConstant>,
row_filter::caller<17, T, D, BrdRowConstant>,
row_filter::caller<18, T, D, BrdRowConstant>,
row_filter::caller<19, T, D, BrdRowConstant>,
row_filter::caller<20, T, D, BrdRowConstant>,
row_filter::caller<21, T, D, BrdRowConstant>,
row_filter::caller<22, T, D, BrdRowConstant>,
row_filter::caller<23, T, D, BrdRowConstant>,
row_filter::caller<24, T, D, BrdRowConstant>,
row_filter::caller<25, T, D, BrdRowConstant>,
row_filter::caller<26, T, D, BrdRowConstant>,
row_filter::caller<27, T, D, BrdRowConstant>,
row_filter::caller<28, T, D, BrdRowConstant>,
row_filter::caller<29, T, D, BrdRowConstant>,
row_filter::caller<30, T, D, BrdRowConstant>,
row_filter::caller<31, T, D, BrdRowConstant>,
row_filter::caller<32, T, D, BrdRowConstant>
},
{
0,
row_filter::caller< 1, T, D, BrdRowReflect>,
row_filter::caller< 2, T, D, BrdRowReflect>,
row_filter::caller< 3, T, D, BrdRowReflect>,
row_filter::caller< 4, T, D, BrdRowReflect>,
row_filter::caller< 5, T, D, BrdRowReflect>,
row_filter::caller< 6, T, D, BrdRowReflect>,
row_filter::caller< 7, T, D, BrdRowReflect>,
row_filter::caller< 8, T, D, BrdRowReflect>,
row_filter::caller< 9, T, D, BrdRowReflect>,
row_filter::caller<10, T, D, BrdRowReflect>,
row_filter::caller<11, T, D, BrdRowReflect>,
row_filter::caller<12, T, D, BrdRowReflect>,
row_filter::caller<13, T, D, BrdRowReflect>,
row_filter::caller<14, T, D, BrdRowReflect>,
row_filter::caller<15, T, D, BrdRowReflect>,
row_filter::caller<16, T, D, BrdRowReflect>,
row_filter::caller<17, T, D, BrdRowReflect>,
row_filter::caller<18, T, D, BrdRowReflect>,
row_filter::caller<19, T, D, BrdRowReflect>,
row_filter::caller<20, T, D, BrdRowReflect>,
row_filter::caller<21, T, D, BrdRowReflect>,
row_filter::caller<22, T, D, BrdRowReflect>,
row_filter::caller<23, T, D, BrdRowReflect>,
row_filter::caller<24, T, D, BrdRowReflect>,
row_filter::caller<25, T, D, BrdRowReflect>,
row_filter::caller<26, T, D, BrdRowReflect>,
row_filter::caller<27, T, D, BrdRowReflect>,
row_filter::caller<28, T, D, BrdRowReflect>,
row_filter::caller<29, T, D, BrdRowReflect>,
row_filter::caller<30, T, D, BrdRowReflect>,
row_filter::caller<31, T, D, BrdRowReflect>,
row_filter::caller<32, T, D, BrdRowReflect>
},
{
0,
row_filter::caller< 1, T, D, BrdRowWrap>,
row_filter::caller< 2, T, D, BrdRowWrap>,
row_filter::caller< 3, T, D, BrdRowWrap>,
row_filter::caller< 4, T, D, BrdRowWrap>,
row_filter::caller< 5, T, D, BrdRowWrap>,
row_filter::caller< 6, T, D, BrdRowWrap>,
row_filter::caller< 7, T, D, BrdRowWrap>,
row_filter::caller< 8, T, D, BrdRowWrap>,
row_filter::caller< 9, T, D, BrdRowWrap>,
row_filter::caller<10, T, D, BrdRowWrap>,
row_filter::caller<11, T, D, BrdRowWrap>,
row_filter::caller<12, T, D, BrdRowWrap>,
row_filter::caller<13, T, D, BrdRowWrap>,
row_filter::caller<14, T, D, BrdRowWrap>,
row_filter::caller<15, T, D, BrdRowWrap>,
row_filter::caller<16, T, D, BrdRowWrap>,
row_filter::caller<17, T, D, BrdRowWrap>,
row_filter::caller<18, T, D, BrdRowWrap>,
row_filter::caller<19, T, D, BrdRowWrap>,
row_filter::caller<20, T, D, BrdRowWrap>,
row_filter::caller<21, T, D, BrdRowWrap>,
row_filter::caller<22, T, D, BrdRowWrap>,
row_filter::caller<23, T, D, BrdRowWrap>,
row_filter::caller<24, T, D, BrdRowWrap>,
row_filter::caller<25, T, D, BrdRowWrap>,
row_filter::caller<26, T, D, BrdRowWrap>,
row_filter::caller<27, T, D, BrdRowWrap>,
row_filter::caller<28, T, D, BrdRowWrap>,
row_filter::caller<29, T, D, BrdRowWrap>,
row_filter::caller<30, T, D, BrdRowWrap>,
row_filter::caller<31, T, D, BrdRowWrap>,
row_filter::caller<32, T, D, BrdRowWrap>
}
};
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
}

@ -830,20 +830,14 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
////////////////////////////////////////////////////////////////////////////////////////////////////
// Separable Linear Filter
namespace cv { namespace gpu { namespace device
namespace filter
{
namespace row_filter
{
template <typename T, typename D>
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
template <typename T, typename D>
void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
namespace column_filter
{
template <typename T, typename D>
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
}}}
template <typename T, typename D>
void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
}
namespace
{
@ -899,72 +893,56 @@ namespace
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
{
using namespace ::cv::gpu::device::row_filter;
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
static const gpuFilter1D_t funcs[7][4] =
{
{filter::linearRow<uchar, float>, 0, filter::linearRow<uchar3, float3>, filter::linearRow<uchar4, float4>},
{0, 0, 0, 0},
{filter::linearRow<ushort, float>, 0, filter::linearRow<ushort3, float3>, filter::linearRow<ushort4, float4>},
{filter::linearRow<short, float>, 0, filter::linearRow<short3, float3>, filter::linearRow<short4, float4>},
{filter::linearRow<int, float>, 0, filter::linearRow<int3, float3>, filter::linearRow<int4, float4>},
{filter::linearRow<float, float>, 0, filter::linearRow<float3, float3>, filter::linearRow<float4, float4>},
{0, 0, 0, 0}
};
static const nppFilter1D_t npp_funcs[] =
{
0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R
};
if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))
{
CV_Assert(borderType == BORDER_CONSTANT);
CV_Assert( borderType == BORDER_CONSTANT );
GpuMat gpu_row_krnl;
int nDivisor;
normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true);
int ksize = gpu_row_krnl.cols;
const int ksize = gpu_row_krnl.cols;
normalizeAnchor(anchor, ksize);
return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,
nppFilter1D_callers[CV_MAT_CN(srcType)]));
return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor, npp_funcs[CV_MAT_CN(srcType)]));
}
CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
CV_Assert( borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
CV_Assert( tryConvertToGpuBorderType(borderType, gpuBorderType) );
CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4);
const int sdepth = CV_MAT_DEPTH(srcType);
const int cn = CV_MAT_CN(srcType);
CV_Assert( sdepth <= CV_64F && cn <= 4 );
CV_Assert( CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(bufType) == cn );
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
const gpuFilter1D_t func = funcs[sdepth][cn - 1];
CV_Assert( func != 0 );
GpuMat gpu_row_krnl;
normalizeKernel(rowKernel, gpu_row_krnl, CV_32F);
int ksize = gpu_row_krnl.cols;
CV_Assert(ksize > 0 && ksize <= 32);
const int ksize = gpu_row_krnl.cols;
CV_Assert( ksize > 0 && ksize <= 32 );
normalizeAnchor(anchor, ksize);
gpuFilter1D_t func = 0;
switch (srcType)
{
case CV_8UC1:
func = linearRowFilter_gpu<uchar, float>;
break;
case CV_8UC3:
func = linearRowFilter_gpu<uchar3, float3>;
break;
case CV_8UC4:
func = linearRowFilter_gpu<uchar4, float4>;
break;
case CV_16SC3:
func = linearRowFilter_gpu<short3, float3>;
break;
case CV_32SC1:
func = linearRowFilter_gpu<int, float>;
break;
case CV_32FC1:
func = linearRowFilter_gpu<float, float>;
break;
case CV_32FC3:
func = linearRowFilter_gpu<float3, float3>;
break;
case CV_32FC4:
func = linearRowFilter_gpu<float4, float4>;
break;
}
return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, gpuBorderType));
}
@ -1020,72 +998,56 @@ namespace
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
{
using namespace ::cv::gpu::device::column_filter;
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
static const gpuFilter1D_t funcs[7][4] =
{
{filter::linearColumn<float, uchar>, 0, filter::linearColumn<float3, uchar3>, filter::linearColumn<float4, uchar4>},
{0, 0, 0, 0},
{filter::linearColumn<float, ushort>, 0, filter::linearColumn<float3, ushort3>, filter::linearColumn<float4, ushort4>},
{filter::linearColumn<float, short>, 0, filter::linearColumn<float3, short3>, filter::linearColumn<float4, short4>},
{filter::linearColumn<float, int>, 0, filter::linearColumn<float3, int3>, filter::linearColumn<float4, int4>},
{filter::linearColumn<float, float>, 0, filter::linearColumn<float3, float3>, filter::linearColumn<float4, float4>},
{0, 0, 0, 0}
};
static const nppFilter1D_t npp_funcs[] =
{
0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R
};
if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))
{
CV_Assert(borderType == BORDER_CONSTANT);
CV_Assert( borderType == BORDER_CONSTANT );
GpuMat gpu_col_krnl;
int nDivisor;
normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true);
int ksize = gpu_col_krnl.cols;
const int ksize = gpu_col_krnl.cols;
normalizeAnchor(anchor, ksize);
return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,
nppFilter1D_callers[CV_MAT_CN(bufType)]));
return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor, npp_funcs[CV_MAT_CN(bufType)]));
}
CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
CV_Assert( borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
CV_Assert( tryConvertToGpuBorderType(borderType, gpuBorderType) );
CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC3 || dstType == CV_8UC4 || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1 || dstType == CV_32FC3 || dstType == CV_32FC4);
const int ddepth = CV_MAT_DEPTH(dstType);
const int cn = CV_MAT_CN(dstType);
CV_Assert( ddepth <= CV_64F && cn <= 4 );
CV_Assert( CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(bufType) == cn );
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
gpuFilter1D_t func = funcs[ddepth][cn - 1];
CV_Assert( func != 0 );
GpuMat gpu_col_krnl;
normalizeKernel(columnKernel, gpu_col_krnl, CV_32F);
int ksize = gpu_col_krnl.cols;
const int ksize = gpu_col_krnl.cols;
CV_Assert(ksize > 0 && ksize <= 32);
normalizeAnchor(anchor, ksize);
gpuFilter1D_t func = 0;
switch (dstType)
{
case CV_8UC1:
func = linearColumnFilter_gpu<float, uchar>;
break;
case CV_8UC3:
func = linearColumnFilter_gpu<float3, uchar3>;
break;
case CV_8UC4:
func = linearColumnFilter_gpu<float4, uchar4>;
break;
case CV_16SC3:
func = linearColumnFilter_gpu<float3, short3>;
break;
case CV_32SC1:
func = linearColumnFilter_gpu<float, int>;
break;
case CV_32FC1:
func = linearColumnFilter_gpu<float, float>;
break;
case CV_32FC3:
func = linearColumnFilter_gpu<float3, float3>;
break;
case CV_32FC4:
func = linearColumnFilter_gpu<float4, float4>;
break;
}
return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, gpuBorderType));
}

Loading…
Cancel
Save