Merge pull request #1824 from vpisarev:ocl_experiments5

pull/1841/head
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit 8d1a8df1a9
  1. 5
      cmake/OpenCVModule.cmake
  2. 29
      cmake/cl2cpp.cmake
  3. 1
      modules/bioinspired/src/precomp.hpp
  4. 2
      modules/bioinspired/src/retina_ocl.cpp
  5. 4
      modules/core/include/opencv2/core.hpp
  6. 14
      modules/core/include/opencv2/core/mat.hpp
  7. 6
      modules/core/include/opencv2/core/mat.inl.hpp
  8. 133
      modules/core/include/opencv2/core/ocl.hpp
  9. 60
      modules/core/include/opencv2/core/ocl_genbase.hpp
  10. 460
      modules/core/src/arithm.cpp
  11. 15
      modules/core/src/copy.cpp
  12. 285
      modules/core/src/matrix.cpp
  13. 309
      modules/core/src/ocl.cpp
  14. 303
      modules/core/src/opencl/arithm.cl
  15. 73
      modules/core/src/opencl/copyset.cl
  16. 28
      modules/core/src/precomp.hpp
  17. 55
      modules/core/src/umatrix.cpp
  18. 73
      modules/core/test/test_umat.cpp
  19. 6
      modules/highgui/doc/reading_and_writing_images_and_video.rst
  20. 5
      modules/highgui/include/opencv2/highgui.hpp
  21. 10
      modules/highgui/src/cap.cpp
  22. 128
      modules/imgproc/src/color.cpp
  23. 62
      modules/imgproc/src/imgwarp.cpp
  24. 306
      modules/imgproc/src/opencl/cvtcolor.cl
  25. 151
      modules/imgproc/src/opencl/resize.cl
  26. 1
      modules/imgproc/src/precomp.hpp
  27. 82
      modules/imgproc/test/test_imgproc_umat.cpp
  28. 2
      modules/nonfree/src/precomp.hpp
  29. 20
      modules/nonfree/src/surf.ocl.cpp
  30. 4
      modules/objdetect/doc/cascade_classification.rst
  31. 6
      modules/objdetect/include/opencv2/objdetect.hpp
  32. 9
      modules/objdetect/src/cascadedetect.cpp
  33. 1
      modules/objdetect/src/precomp.hpp
  34. 8
      modules/ocl/include/opencv2/ocl/private/util.hpp
  35. 22
      modules/photo/test/test_hdr.cpp
  36. 2
      modules/superres/src/btv_l1_ocl.cpp
  37. 1
      modules/superres/src/precomp.hpp
  38. 276
      samples/cpp/ufacedetect.cpp
  39. 47
      samples/ocl/facedetect.cpp

@ -499,11 +499,12 @@ macro(ocv_glob_module_sources)
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
file(GLOB cl_kernels "src/opencl/*.cl")
if(HAVE_opencv_ocl AND cl_kernels)
if(cl_kernels)
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
string(REGEX REPLACE "opencv_" "" the_module_barename "${the_module}")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp"
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
COMMAND ${CMAKE_COMMAND} -DMODULE_NAME="${the_module_barename}" -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")

@ -4,6 +4,15 @@ list(SORT cl_list)
string(REPLACE ".cpp" ".hpp" OUTPUT_HPP "${OUTPUT}")
get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME)
if("${MODULE_NAME}" STREQUAL "ocl")
set(nested_namespace_start "")
set(nested_namespace_end "")
else()
set(new_mode ON)
set(nested_namespace_start "namespace ${MODULE_NAME}\n{")
set(nested_namespace_end "}")
endif()
set(STR_CPP "// This file is auto-generated. Do not edit!
#include \"precomp.hpp\"
@ -13,16 +22,19 @@ namespace cv
{
namespace ocl
{
${nested_namespace_start}
")
set(STR_HPP "// This file is auto-generated. Do not edit!
#include \"opencv2/ocl/private/util.hpp\"
#include \"opencv2/core/ocl_genbase.hpp\"
namespace cv
{
namespace ocl
{
${nested_namespace_start}
")
@ -49,12 +61,19 @@ foreach(cl ${cl_list})
string(MD5 hash "${lines}")
set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n")
set(STR_CPP_DECL "const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
set(STR_HPP_DECL "extern const struct ProgramEntry ${cl_filename};\n")
if(new_mode)
set(STR_CPP_DECL "${STR_CPP_DECL}ProgramSource2 ${cl_filename}_oclsrc(${cl_filename}.programStr);\n")
set(STR_HPP_DECL "${STR_HPP_DECL}extern ProgramSource2 ${cl_filename}_oclsrc;\n")
endif()
set(STR_CPP "${STR_CPP}${STR_CPP_DECL}")
set(STR_HPP "${STR_HPP}${STR_HPP_DECL}")
endforeach()
set(STR_CPP "${STR_CPP}}\n}\n")
set(STR_HPP "${STR_HPP}}\n}\n")
set(STR_CPP "${STR_CPP}}\n${nested_namespace_end}}\n")
set(STR_HPP "${STR_HPP}}\n${nested_namespace_end}}\n")
file(WRITE "${OUTPUT}" "${STR_CPP}")

@ -47,6 +47,7 @@
#include "opencv2/bioinspired.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#include <valarray>

@ -56,6 +56,8 @@
namespace cv
{
static ocl::ProgramEntry retina_kernel = ocl::bioinspired::retina_kernel;
namespace bioinspired
{
namespace ocl

@ -347,6 +347,10 @@ CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst);
CV_EXPORTS void min(const Mat& src1, const Mat& src2, Mat& dst);
//! computes per-element maximum of two arrays (dst = max(src1, src2))
CV_EXPORTS void max(const Mat& src1, const Mat& src2, Mat& dst);
//! computes per-element minimum of two arrays (dst = min(src1, src2))
CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst);
//! computes per-element maximum of two arrays (dst = max(src1, src2))
CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst);
//! computes square root of each matrix element (dst = src**0.5)
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst);

@ -58,6 +58,8 @@ namespace cv
enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
class CV_EXPORTS _OutputArray;
//////////////////////// Input/Output Array Arguments /////////////////////////////////
/*!
@ -116,12 +118,22 @@ public:
void* getObj() const;
virtual int kind() const;
virtual int dims(int i=-1) const;
virtual Size size(int i=-1) const;
virtual int sizend(int* sz, int i=-1) const;
virtual bool sameSize(const _InputArray& arr) const;
virtual size_t total(int i=-1) const;
virtual int type(int i=-1) const;
virtual int depth(int i=-1) const;
virtual int channels(int i=-1) const;
virtual bool isContinuous(int i=-1) const;
virtual bool empty() const;
virtual void copyTo(const _OutputArray& arr) const;
bool isMat() const;
bool isUMat() const;
bool isMatVectot() const;
bool isUMatVector() const;
bool isMatx();
virtual ~_InputArray();
@ -197,8 +209,10 @@ public:
virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void createSameSize(const _InputArray& arr, int mtype) const;
virtual void release() const;
virtual void clear() const;
virtual void setTo(const _InputArray& value) const;
};

@ -108,6 +108,12 @@ inline _InputArray::_InputArray(const cuda::CudaMem& cuda_mem)
inline _InputArray::~_InputArray() {}
inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; }
inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; }
inline bool _InputArray::isMatVectot() const { return kind() == _InputArray::STD_VECTOR_MAT; }
inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; }
inline bool _InputArray::isMatx() { return kind() == _InputArray::MATX; }
////////////////////////////////////////////////////////////////////////////////////////
inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }

@ -49,13 +49,13 @@ namespace cv { namespace ocl {
CV_EXPORTS bool haveOpenCL();
CV_EXPORTS bool useOpenCL();
CV_EXPORTS void setUseOpenCL(bool flag);
CV_EXPORTS void finish();
CV_EXPORTS void finish2();
class CV_EXPORTS Context;
class CV_EXPORTS Context2;
class CV_EXPORTS Device;
class CV_EXPORTS Kernel;
class CV_EXPORTS Program;
class CV_EXPORTS ProgramSource;
class CV_EXPORTS ProgramSource2;
class CV_EXPORTS Queue;
class CV_EXPORTS Device
@ -199,22 +199,22 @@ protected:
};
class CV_EXPORTS Context
class CV_EXPORTS Context2
{
public:
Context();
explicit Context(int dtype);
~Context();
Context(const Context& c);
Context& operator = (const Context& c);
Context2();
explicit Context2(int dtype);
~Context2();
Context2(const Context2& c);
Context2& operator = (const Context2& c);
bool create(int dtype);
size_t ndevices() const;
const Device& device(size_t idx) const;
Program getProg(const ProgramSource& prog,
Program getProg(const ProgramSource2& prog,
const String& buildopt, String& errmsg);
static Context& getDefault();
static Context2& getDefault();
void* ptr() const;
protected:
struct Impl;
@ -226,12 +226,12 @@ class CV_EXPORTS Queue
{
public:
Queue();
explicit Queue(const Context& c, const Device& d=Device());
explicit Queue(const Context2& c, const Device& d=Device());
~Queue();
Queue(const Queue& q);
Queue& operator = (const Queue& q);
bool create(const Context& c=Context(), const Device& d=Device());
bool create(const Context2& c=Context2(), const Device& d=Device());
void finish();
void* ptr() const;
static Queue& getDefault();
@ -245,41 +245,55 @@ protected:
class CV_EXPORTS KernelArg
{
public:
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8 };
KernelArg(int _flags, UMat* _m, void* _obj=0, size_t _sz=0);
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, NO_SIZE=256 };
KernelArg(int _flags, UMat* _m, int wscale=1, const void* _obj=0, size_t _sz=0);
KernelArg();
static KernelArg Local() { return KernelArg(LOCAL, 0); }
static KernelArg ReadOnly(const UMat& m) { return KernelArg(READ_ONLY, (UMat*)&m); }
static KernelArg WriteOnly(const UMat& m) { return KernelArg(WRITE_ONLY, (UMat*)&m); }
static KernelArg ReadWrite(const UMat& m, int wscale=1)
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale); }
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1)
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale); }
static KernelArg ReadOnly(const UMat& m, int wscale=1)
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale); }
static KernelArg WriteOnly(const UMat& m, int wscale=1)
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale); }
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1)
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale); }
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1)
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale); }
static KernelArg Constant(const Mat& m);
template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
{ return KernelArg(CONSTANT, 0, (void*)arr, n); }
{ return KernelArg(CONSTANT, 0, 1, (void*)arr, n); }
int flags;
UMat* m;
void* obj;
const void* obj;
size_t sz;
int wscale;
};
class CV_EXPORTS Kernel
{
public:
Kernel();
Kernel(const char* kname, const Program& prog);
Kernel(const char* kname, const ProgramSource& prog,
const String& buildopts, String& errmsg);
Kernel(const char* kname, const ProgramSource2& prog,
const String& buildopts, String* errmsg=0);
~Kernel();
Kernel(const Kernel& k);
Kernel& operator = (const Kernel& k);
bool empty() const;
bool create(const char* kname, const Program& prog);
bool create(const char* kname, const ProgramSource& prog,
const String& buildopts, String& errmsg);
bool create(const char* kname, const ProgramSource2& prog,
const String& buildopts, String* errmsg=0);
void set(int i, const void* value, size_t sz);
void set(int i, const UMat& m);
void set(int i, const KernelArg& arg);
template<typename _Tp> void set(int i, const _Tp& value)
int set(int i, const void* value, size_t sz);
int set(int i, const UMat& m);
int set(int i, const KernelArg& arg);
template<typename _Tp> int set(int i, const _Tp& value)
{ return set(i, &value, sizeof(value)); }
template<typename _Tp0>
@ -291,26 +305,27 @@ public:
template<typename _Tp0, typename _Tp1>
Kernel& args(const _Tp0& a0, const _Tp1& a1)
{
set(0, a0); set(1, a1); return *this;
int i = set(0, a0); set(i, a1); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
{
set(0, a0); set(1, a1); set(2, a2); return *this;
int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
const _Tp3& a3, const _Tp4& a4)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
i = set(i, a3); set(i, a4); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2,
@ -318,8 +333,8 @@ public:
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
{
set(0, a0); set(1, a1); set(2, a2);
set(3, a3); set(4, a4); set(5, a5); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2);
i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
@ -327,8 +342,8 @@ public:
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3);
set(4, a4); set(5, a5); set(6, a6); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
@ -336,8 +351,8 @@ public:
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3);
set(4, a4); set(5, a5); set(6, a6); set(7, a7); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
@ -346,8 +361,8 @@ public:
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4);
set(5, a5); set(6, a6); set(7, a7); set(8, a8); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
@ -356,8 +371,8 @@ public:
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
set(6, a6); set(7, a7); set(8, a8); set(9, a9); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
@ -367,8 +382,8 @@ public:
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
@ -378,13 +393,13 @@ public:
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
{
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); set(11, a11); return *this;
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
}
void run(int dims, size_t offset[], size_t globalsize[],
bool run(int dims, size_t globalsize[],
size_t localsize[], bool sync, const Queue& q=Queue());
void runTask(bool sync, const Queue& q=Queue());
bool runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;
bool compileWorkGroupSize(size_t wsz[]) const;
@ -401,7 +416,7 @@ class CV_EXPORTS Program
{
public:
Program();
Program(const ProgramSource& src,
Program(const ProgramSource2& src,
const String& buildflags, String& errmsg);
explicit Program(const String& buf);
Program(const Program& prog);
@ -409,12 +424,12 @@ public:
Program& operator = (const Program& prog);
~Program();
bool create(const ProgramSource& src,
bool create(const ProgramSource2& src,
const String& buildflags, String& errmsg);
bool read(const String& buf, const String& buildflags);
bool write(String& buf) const;
const ProgramSource& source() const;
const ProgramSource2& source() const;
void* ptr() const;
String getPrefix() const;
@ -426,17 +441,17 @@ protected:
};
class CV_EXPORTS ProgramSource
class CV_EXPORTS ProgramSource2
{
public:
typedef uint64 hash_t;
ProgramSource();
explicit ProgramSource(const String& prog);
explicit ProgramSource(const char* prog);
~ProgramSource();
ProgramSource(const ProgramSource& prog);
ProgramSource& operator = (const ProgramSource& prog);
ProgramSource2();
explicit ProgramSource2(const String& prog);
explicit ProgramSource2(const char* prog);
~ProgramSource2();
ProgramSource2(const ProgramSource2& prog);
ProgramSource2& operator = (const ProgramSource2& prog);
const String& source() const;
hash_t hash() const;
@ -446,6 +461,10 @@ protected:
Impl* p;
};
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t);
}}
#endif

@ -0,0 +1,60 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OPENCL_GENBASE_HPP__
#define __OPENCV_OPENCL_GENBASE_HPP__
namespace cv
{
namespace ocl
{
struct ProgramEntry
{
const char* name;
const char* programStr;
const char* programHash;
};
}
}
#endif

@ -47,6 +47,7 @@
// */
#include "precomp.hpp"
#include "opencl_kernels.hpp"
namespace cv
{
@ -911,33 +912,112 @@ void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t bl
scbuf[i] = scbuf[i - esz];
}
static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, const BinaryFunc* tab, bool bitwise)
enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14 };
static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
"OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
"OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", 0 };
static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, bool bitwise, int oclop, bool haveScalar )
{
int kind1 = _src1.kind(), kind2 = _src2.kind();
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
bool haveMask = !_mask.empty();
int srctype = _src1.type();
int srcdepth = CV_MAT_DEPTH(srctype);
int cn = CV_MAT_CN(srctype);
if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) )
return false;
UMat src1 = _src1.getUMat(), src2;
UMat dst = _dst.getUMat(), mask = _mask.getUMat();
char opts[1024];
int kercn = haveMask || haveScalar ? cn : 1;
sprintf(opts, "-D %s%s -D %s -D dstT=%s",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop],
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)));
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if( k.empty() )
return false;
int cscale = cn/kercn;
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale);
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) :
ocl::KernelArg::WriteOnly(dst, cscale);
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
if( haveScalar )
{
size_t esz = CV_ELEM_SIZE(srctype);
double buf[4] = {0,0,0,0};
if( oclop != OCL_OP_NOT )
{
Mat src2sc = _src2.getMat();
convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1);
}
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
if( !haveMask )
k.args(src1arg, dstarg, scalararg);
else
k.args(src1arg, maskarg, dstarg, scalararg);
}
else
{
src2 = _src2.getUMat();
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
if( !haveMask )
k.args(src1arg, src2arg, dstarg);
else
k.args(src1arg, src2arg, maskarg, dstarg);
}
size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows };
return k.run(2, globalsize, 0, false);
}
static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, const BinaryFunc* tab,
bool bitwise, int oclop )
{
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
int kind1 = psrc1->kind(), kind2 = psrc2->kind();
int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
int dims1 = psrc1->dims(), dims2 = psrc2->dims();
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
bool haveMask = !_mask.empty(), haveScalar = false;
BinaryFunc func;
int c;
if( src1.dims <= 2 && src2.dims <= 2 && kind1 == kind2 &&
src1.size() == src2.size() && src1.type() == src2.type() && !haveMask )
if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask )
{
_dst.create(src1.size(), src1.type());
Mat dst = _dst.getMat();
_dst.create(sz1, type1);
if( use_opencl && ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false) )
return;
if( bitwise )
{
func = *tab;
c = (int)src1.elemSize();
cn = (int)CV_ELEM_SIZE(type1);
}
else
{
func = tab[src1.depth()];
c = src1.channels();
}
func = tab[depth1];
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
Size sz = getContinuousSize(src1, src2, dst);
size_t len = sz.width*(size_t)c;
size_t len = sz.width*(size_t)cn;
if( len == (size_t)(int)len )
{
sz.width = (int)len;
@ -946,56 +1026,67 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
}
}
if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
src1.size != src2.size || src1.type() != src2.type() )
if( oclop == OCL_OP_NOT )
haveScalar = true;
else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
!psrc1->sameSize(*psrc2) || type1 != type2 )
{
if( checkScalar(src1, src2.type(), kind1, kind2) )
if( checkScalar(*psrc1, type2, kind1, kind2) )
{
// src1 is a scalar; swap it with src2
swap(src1, src2);
else if( !checkScalar(src2, src1.type(), kind2, kind1) )
swap(psrc1, psrc2);
swap(type1, type2);
swap(depth1, depth2);
swap(cn, cn2);
swap(sz1, sz2);
}
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
CV_Error( CV_StsUnmatchedSizes,
"The operation is neither 'array op array' (where arrays have the same size and type), "
"nor 'array op scalar', nor 'scalar op array'" );
haveScalar = true;
}
else
{
CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 );
}
size_t esz = src1.elemSize();
size_t esz = CV_ELEM_SIZE(type1);
size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
int cn = src1.channels();
BinaryFunc copymask = 0;
Mat mask;
bool reallocate = false;
if( haveMask )
{
mask = _mask.getMat();
CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) );
CV_Assert( mask.size == src1.size );
int mtype = _mask.type();
CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1));
copymask = getCopyMaskFunc(esz);
Mat tdst = _dst.getMat();
reallocate = tdst.size != src1.size || tdst.type() != src1.type();
reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1;
}
AutoBuffer<uchar> _buf;
uchar *scbuf = 0, *maskbuf = 0;
_dst.create(src1.dims, src1.size, src1.type());
Mat dst = _dst.getMat();
_dst.createSameSize(*psrc1, type1);
// if this is mask operation and dst has been reallocated,
// we have to
// we have to clear the destination
if( haveMask && reallocate )
dst = Scalar::all(0);
_dst.setTo(0.);
if( use_opencl && ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar ))
return;
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat();
Mat dst = _dst.getMat(), mask = _mask.getMat();
if( bitwise )
{
func = *tab;
c = (int)esz;
cn = (int)esz;
}
else
{
func = tab[src1.depth()];
c = cn;
func = tab[depth1];
}
if( !haveScalar )
@ -1006,8 +1097,8 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = total;
if( blocksize*c > INT_MAX )
blocksize = INT_MAX/c;
if( blocksize*cn > INT_MAX )
blocksize = INT_MAX/cn;
if( haveMask )
{
@ -1022,7 +1113,7 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
{
int bsz = (int)MIN(total - j, blocksize);
func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*c, 1), 0 );
func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*cn, 1), 0 );
if( haveMask )
{
copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz );
@ -1054,7 +1145,7 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
{
int bsz = (int)MIN(total - j, blocksize);
func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*c, 1), 0 );
func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*cn, 1), 0 );
if( haveMask )
{
copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz );
@ -1101,47 +1192,59 @@ static BinaryFunc* getMinTab()
void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask)
{
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(and8u);
binary_op(a, b, c, mask, &f, true);
binary_op(a, b, c, mask, &f, true, OCL_OP_AND);
}
void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask)
{
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(or8u);
binary_op(a, b, c, mask, &f, true);
binary_op(a, b, c, mask, &f, true, OCL_OP_OR);
}
void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask)
{
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(xor8u);
binary_op(a, b, c, mask, &f, true);
binary_op(a, b, c, mask, &f, true, OCL_OP_XOR);
}
void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
{
BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(not8u);
binary_op(a, a, c, mask, &f, true);
binary_op(a, a, c, mask, &f, true, OCL_OP_NOT);
}
void cv::max( InputArray src1, InputArray src2, OutputArray dst )
{
binary_op(src1, src2, dst, noArray(), getMaxTab(), false );
binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
}
void cv::min( InputArray src1, InputArray src2, OutputArray dst )
{
binary_op(src1, src2, dst, noArray(), getMinTab(), false );
binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN );
}
void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
{
OutputArray _dst(dst);
binary_op(src1, src2, _dst, noArray(), getMaxTab(), false );
binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
}
void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
{
OutputArray _dst(dst);
binary_op(src1, src2, _dst, noArray(), getMinTab(), false );
binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
}
void cv::max(const UMat& src1, const UMat& src2, UMat& dst)
{
OutputArray _dst(dst);
binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
}
void cv::min(const UMat& src1, const UMat& src2, UMat& dst)
{
OutputArray _dst(dst);
binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
}
@ -1171,73 +1274,213 @@ static int actualScalarDepth(const double* data, int len)
CV_32S;
}
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false, void* usrdata=0)
static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, int wtype,
void* usrdata, int oclop,
bool haveScalar )
{
int kind1 = _src1.kind(), kind2 = _src2.kind();
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
bool haveMask = !_mask.empty();
bool reallocate = false;
bool src1Scalar = checkScalar(src1, src2.type(), kind1, kind2);
bool src2Scalar = checkScalar(src2, src1.type(), kind2, kind1);
if( (haveMask || haveScalar) && cn > 4 )
return false;
if( (kind1 == kind2 || src1.channels() == 1) && src1.dims <= 2 && src2.dims <= 2 &&
src1.size() == src2.size() && src1.type() == src2.type() &&
!haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == src1.depth())) ||
(_dst.fixedType() && _dst.type() == _src1.type())) &&
int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = CV_MAT_DEPTH(wtype);
wtype = CV_MAKETYPE(wdepth, cn);
int type2 = haveScalar ? _src2.type() : wtype, depth2 = CV_MAT_DEPTH(type2);
UMat src1 = _src1.getUMat(), src2;
UMat dst = _dst.getUMat(), mask = _mask.getUMat();
char opts[1024];
int kercn = haveMask || haveScalar ? cn : 1;
if( (depth1 == depth2 || haveScalar) && ddepth == depth1 && wdepth == depth1 )
{
const char* oclopstr = oclop2str[oclop];
if( wdepth <= CV_16S )
{
oclopstr = oclop == OCL_OP_ADD ? "OCL_OP_ADD_SAT" :
oclop == OCL_OP_SUB ? "OCL_OP_SUB_SAT" :
oclop == OCL_OP_RSUB ? "OCL_OP_RSUB_SAT" : oclopstr;
}
sprintf(opts, "-D %s%s -D %s -D dstT=%s",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)));
}
else
{
char cvtstr[3][32];
sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s "
"-D dstT=%s -D workT=%s -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]));
}
const uchar* usrdata_p = (const uchar*)usrdata;
const double* usrdata_d = (const double*)usrdata;
float usrdata_f[3];
int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
if( n > 0 && wdepth == CV_32F )
{
for( i = 0; i < n; i++ )
usrdata_f[i] = (float)usrdata_d[i];
usrdata_p = (const uchar*)usrdata_f;
}
size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if( k.empty() )
return false;
int cscale = cn/kercn;
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale);
ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) :
ocl::KernelArg::WriteOnly(dst, cscale);
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
if( haveScalar )
{
size_t esz = CV_ELEM_SIZE(wtype);
double buf[4]={0,0,0,0};
Mat src2sc = _src2.getMat();
if( !src2sc.empty() )
{
convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1);
}
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
if( !haveMask )
k.args(src1arg, dstarg, scalararg);
else
k.args(src1arg, maskarg, dstarg, scalararg);
}
else
{
src2 = _src2.getUMat();
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
if( !haveMask )
{
if(n == 0)
k.args(src1arg, src2arg, dstarg);
else if(n == 1)
k.args(src1arg, src2arg, dstarg,
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz));
else if(n == 3)
k.args(src1arg, src2arg, dstarg,
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz),
ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
else
CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
}
else
{
k.args(src1arg, src2arg, maskarg, dstarg);
}
}
size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows };
return k.run(2, globalsize, 0, false);
}
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false,
void* usrdata=0, int oclop=-1 )
{
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
int kind1 = psrc1->kind(), kind2 = psrc2->kind();
bool haveMask = !_mask.empty();
bool reallocate = false;
int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 &&
!haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) ||
(_dst.fixedType() && _dst.type() == type1)) &&
((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
{
_dst.create(src1.size(), src1.type());
Mat dst = _dst.getMat();
_dst.createSameSize(*psrc1, type1);
if( use_opencl &&
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask,
(!usrdata ? type1 : std::max(depth1, CV_32F)),
usrdata, oclop, false))
return;
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
Size sz = getContinuousSize(src1, src2, dst, src1.channels());
tab[src1.depth()](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata);
tab[depth1](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata);
return;
}
bool haveScalar = false, swapped12 = false;
int depth2 = src2.depth();
if( src1.size != src2.size || src1.channels() != src2.channels() ||
if( dims1 != dims2 || sz1 != sz2 || cn != cn2 ||
((kind1 == _InputArray::MATX || kind2 == _InputArray::MATX) &&
src1.cols == 1 && src2.rows == 4) )
(sz1 == Size(1,4) || sz2 == Size(1,4))) )
{
if( checkScalar(src1, src2.type(), kind1, kind2) )
if( checkScalar(*psrc1, type2, kind1, kind2) )
{
// src1 is a scalar; swap it with src2
swap(src1, src2);
swap(psrc1, psrc2);
swap(sz1, sz2);
swap(type1, type2);
swap(depth1, depth2);
swap(cn, cn2);
swap(dims1, dims2);
swapped12 = true;
if( oclop == OCL_OP_SUB )
oclop = OCL_OP_RSUB;
}
else if( !checkScalar(src2, src1.type(), kind2, kind1) )
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
CV_Error( CV_StsUnmatchedSizes,
"The operation is neither 'array op array' (where arrays have the same size and the same number of channels), "
"The operation is neither 'array op array' "
"(where arrays have the same size and the same number of channels), "
"nor 'array op scalar', nor 'scalar op array'" );
haveScalar = true;
CV_Assert(src2.type() == CV_64F && (src2.rows == 4 || src2.rows == 1));
CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
if (!muldiv)
{
depth2 = actualScalarDepth(src2.ptr<double>(), src1.channels());
if( depth2 == CV_64F && (src1.depth() < CV_32S || src1.depth() == CV_32F) )
Mat sc = psrc2->getMat();
depth2 = actualScalarDepth(sc.ptr<double>(), cn);
if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) )
depth2 = CV_32F;
}
else
depth2 = CV_64F;
}
int cn = src1.channels(), depth1 = src1.depth(), wtype;
BinaryFunc cvtsrc1 = 0, cvtsrc2 = 0, cvtdst = 0;
if( dtype < 0 )
{
if( _dst.fixedType() )
dtype = _dst.type();
else
{
if( !haveScalar && src1.type() != src2.type() )
if( !haveScalar && type1 != type2 )
CV_Error(CV_StsBadArg,
"When the input arrays in add/subtract/multiply/divide functions have different types, "
"the output array type must be explicitly specified");
dtype = src1.type();
dtype = type1;
}
}
dtype = CV_MAT_DEPTH(dtype);
@ -1262,39 +1505,41 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
wtype = std::max(wtype, dtype);
}
cvtsrc1 = depth1 == wtype ? 0 : getConvertFunc(depth1, wtype);
cvtsrc2 = depth2 == depth1 ? cvtsrc1 : depth2 == wtype ? 0 : getConvertFunc(depth2, wtype);
cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
dtype = CV_MAKETYPE(dtype, cn);
wtype = CV_MAKETYPE(wtype, cn);
size_t esz1 = src1.elemSize(), esz2 = src2.elemSize();
size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
BinaryFunc copymask = 0;
Mat mask;
if( haveMask )
{
mask = _mask.getMat();
CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) );
CV_Assert( mask.size == src1.size );
copymask = getCopyMaskFunc(dsz);
Mat tdst = _dst.getMat();
reallocate = tdst.size != src1.size || tdst.type() != dtype;
int mtype = _mask.type();
CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) );
reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype;
}
AutoBuffer<uchar> _buf;
uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
size_t bufesz = (cvtsrc1 ? wsz : 0) + (cvtsrc2 || haveScalar ? wsz : 0) + (cvtdst ? wsz : 0) + (haveMask ? dsz : 0);
_dst.createSameSize(*psrc1, dtype);
if( reallocate )
_dst.setTo(0.);
_dst.create(src1.dims, src1.size, dtype);
Mat dst = _dst.getMat();
if( use_opencl &&
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
usrdata, oclop, haveScalar))
return;
if( haveMask && reallocate )
dst = Scalar::all(0);
BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
BinaryFunc copymask = getCopyMaskFunc(dsz);
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat();
AutoBuffer<uchar> _buf;
uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
size_t bufesz = (cvtsrc1 ? wsz : 0) +
(cvtsrc2 || haveScalar ? wsz : 0) +
(cvtdst ? wsz : 0) +
(haveMask ? dsz : 0);
BinaryFunc func = tab[CV_MAT_DEPTH(wtype)];
if( !haveScalar )
@ -1476,7 +1721,7 @@ static BinaryFunc* getAbsDiffTab()
void cv::add( InputArray src1, InputArray src2, OutputArray dst,
InputArray mask, int dtype )
{
arithm_op(src1, src2, dst, mask, dtype, getAddTab() );
arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
}
void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
@ -1511,12 +1756,12 @@ void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
}
}
#endif
arithm_op(src1, src2, dst, mask, dtype, getSubTab() );
arithm_op(src1, src2, dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB );
}
void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
{
arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab());
arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
}
/****************************************************************************************\
@ -1847,19 +2092,20 @@ static BinaryFunc* getRecipTab()
void cv::multiply(InputArray src1, InputArray src2,
OutputArray dst, double scale, int dtype)
{
arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), true, &scale);
arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
true, &scale, scale == 1. ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
}
void cv::divide(InputArray src1, InputArray src2,
OutputArray dst, double scale, int dtype)
{
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale);
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
}
void cv::divide(double scale, InputArray src2,
OutputArray dst, int dtype)
{
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale);
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
}
/****************************************************************************************\
@ -2020,7 +2266,7 @@ void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
double beta, double gamma, OutputArray dst, int dtype )
{
double scalars[] = {alpha, beta, gamma};
arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars);
arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
}

@ -220,6 +220,21 @@ void Mat::copyTo( OutputArray _dst ) const
return;
}
if( _dst.isUMat() )
{
_dst.create( dims, size.p, type() );
UMat dst = _dst.getUMat();
size_t i, sz[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
for( i = 0; i < (size_t)dims; i++ )
sz[i] = size.p[i];
sz[dims-1] *= esz;
dst.ndoffset(dstofs);
dstofs[dims-1] *= esz;
dst.u->currAllocator->upload(dst.u, data, dims, sz, dstofs, dst.step.p, step.p);
return;
}
if( dims <= 2 )
{
_dst.create( rows, cols, type() );

@ -1436,6 +1436,181 @@ Size _InputArray::size(int i) const
}
}
int _InputArray::sizend(int* arrsz, int i) const
{
int j, d=0, k = kind();
if( k == NONE )
;
else if( k == MAT )
{
CV_Assert( i < 0 );
const Mat& m = *(const Mat*)obj;
d = m.dims;
if(arrsz)
for(j = 0; j < d; j++)
arrsz[j] = m.size.p[j];
}
else if( k == UMAT )
{
CV_Assert( i < 0 );
const UMat& m = *(const UMat*)obj;
d = m.dims;
if(arrsz)
for(j = 0; j < d; j++)
arrsz[j] = m.size.p[j];
}
else if( k == STD_VECTOR_MAT && i >= 0 )
{
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
CV_Assert( i < (int)vv.size() );
const Mat& m = vv[i];
d = m.dims;
if(arrsz)
for(j = 0; j < d; j++)
arrsz[j] = m.size.p[j];
}
else if( k == STD_VECTOR_UMAT && i >= 0 )
{
const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
CV_Assert( i < (int)vv.size() );
const UMat& m = vv[i];
d = m.dims;
if(arrsz)
for(j = 0; j < d; j++)
arrsz[j] = m.size.p[j];
}
else
{
Size sz2d = size(i);
d = 2;
if(arrsz)
{
arrsz[0] = sz2d.height;
arrsz[1] = sz2d.width;
}
}
return d;
}
bool _InputArray::sameSize(const _InputArray& arr) const
{
int k1 = kind(), k2 = arr.kind();
Size sz1;
if( k1 == MAT )
{
const Mat* m = ((const Mat*)obj);
if( k2 == MAT )
return m->size == ((const Mat*)arr.obj)->size;
if( k2 == UMAT )
return m->size == ((const UMat*)arr.obj)->size;
if( m->dims > 2 )
return false;
sz1 = m->size();
}
else if( k1 == UMAT )
{
const UMat* m = ((const UMat*)obj);
if( k2 == MAT )
return m->size == ((const Mat*)arr.obj)->size;
if( k2 == UMAT )
return m->size == ((const UMat*)arr.obj)->size;
if( m->dims > 2 )
return false;
sz1 = m->size();
}
else
sz1 = size();
if( arr.dims() > 2 )
return false;
return sz1 == arr.size();
}
int _InputArray::dims(int i) const
{
int k = kind();
if( k == MAT )
{
CV_Assert( i < 0 );
return ((const Mat*)obj)->dims;
}
if( k == EXPR )
{
CV_Assert( i < 0 );
return ((const MatExpr*)obj)->a.dims;
}
if( k == UMAT )
{
CV_Assert( i < 0 );
return ((const UMat*)obj)->dims;
}
if( k == MATX )
{
CV_Assert( i < 0 );
return 2;
}
if( k == STD_VECTOR )
{
CV_Assert( i < 0 );
return 2;
}
if( k == NONE )
return 0;
if( k == STD_VECTOR_VECTOR )
{
const std::vector<std::vector<uchar> >& vv = *(const std::vector<std::vector<uchar> >*)obj;
if( i < 0 )
return 1;
CV_Assert( i < (int)vv.size() );
return 2;
}
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
if( i < 0 )
return 1;
CV_Assert( i < (int)vv.size() );
return vv[i].dims;
}
if( k == OPENGL_BUFFER )
{
CV_Assert( i < 0 );
return 2;
}
if( k == GPU_MAT )
{
CV_Assert( i < 0 );
return 2;
}
if( k == OCL_MAT )
{
return 2;
}
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
{
CV_Assert( i < 0 );
return 2;
}
}
size_t _InputArray::total(int i) const
{
int k = kind();
@ -1570,6 +1745,61 @@ bool _InputArray::empty() const
return ((const cuda::CudaMem*)obj)->empty();
}
bool _InputArray::isContinuous(int i) const
{
int k = kind();
if( k == MAT )
return i < 0 ? ((const Mat*)obj)->isContinuous() : true;
if( k == UMAT )
return i < 0 ? ((const UMat*)obj)->isContinuous() : true;
if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR)
return true;
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
CV_Assert((size_t)i < vv.size());
return vv[i].isContinuous();
}
if( k == STD_VECTOR_UMAT )
{
const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
CV_Assert((size_t)i < vv.size());
return vv[i].isContinuous();
}
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
return false;
}
void _InputArray::copyTo(const _OutputArray& arr) const
{
int k = kind();
if( k == NONE )
arr.release();
else if( k == MAT || k == MATX || k == STD_VECTOR )
{
Mat m = getMat();
m.copyTo(arr);
}
else if( k == EXPR )
{
const MatExpr& e = *((MatExpr*)obj);
if( arr.kind() == MAT )
arr.getMatRef() = e;
else
Mat(e).copyTo(arr);
}
else if( k == UMAT )
((UMat*)obj)->copyTo(arr);
else
CV_Error(Error::StsNotImplemented, "");
}
bool _OutputArray::fixedSize() const
{
@ -1665,7 +1895,7 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
void _OutputArray::create(int d, const int* sizes, int mtype, int i,
bool allowTransposed, int fixedDepthMask) const
{
int k = kind();
@ -1683,7 +1913,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
m.release();
}
if( dims == 2 && m.dims == 2 && m.data &&
if( d == 2 && m.dims == 2 && m.data &&
m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
return;
}
@ -1697,11 +1927,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
}
if(fixedSize())
{
CV_Assert(m.dims == dims);
for(int j = 0; j < dims; ++j)
CV_Assert(m.dims == d);
for(int j = 0; j < d; ++j)
CV_Assert(m.size[j] == sizes[j]);
}
m.create(dims, sizes, mtype);
m.create(d, sizes, mtype);
return;
}
@ -1717,7 +1947,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
m.release();
}
if( dims == 2 && m.dims == 2 && !m.empty() &&
if( d == 2 && m.dims == 2 && !m.empty() &&
m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
return;
}
@ -1731,11 +1961,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
}
if(fixedSize())
{
CV_Assert(m.dims == dims);
for(int j = 0; j < dims; ++j)
CV_Assert(m.dims == d);
for(int j = 0; j < d; ++j)
CV_Assert(m.size[j] == sizes[j]);
}
m.create(dims, sizes, mtype);
m.create(d, sizes, mtype);
return;
}
@ -1744,14 +1974,14 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
CV_Assert( i < 0 );
int type0 = CV_MAT_TYPE(flags);
CV_Assert( mtype == type0 || (CV_MAT_CN(mtype) == 1 && ((1 << type0) & fixedDepthMask) != 0) );
CV_Assert( dims == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) ||
CV_Assert( d == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) ||
(allowTransposed && sizes[0] == sz.width && sizes[1] == sz.height)));
return;
}
if( k == STD_VECTOR || k == STD_VECTOR_VECTOR )
{
CV_Assert( dims == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
CV_Assert( d == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
size_t len = sizes[0]*sizes[1] > 0 ? sizes[0] + sizes[1] - 1 : 0;
std::vector<uchar>* v = (std::vector<uchar>*)obj;
@ -1843,7 +2073,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
if( i < 0 )
{
CV_Assert( dims == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
CV_Assert( d == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) );
size_t len = sizes[0]*sizes[1] > 0 ? sizes[0] + sizes[1] - 1 : 0, len0 = v.size();
CV_Assert(!fixedSize() || len == len0);
@ -1873,7 +2103,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
m.release();
}
if( dims == 2 && m.dims == 2 && m.data &&
if( d == 2 && m.dims == 2 && m.data &&
m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
return;
}
@ -1887,18 +2117,24 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
}
if(fixedSize())
{
CV_Assert(m.dims == dims);
for(int j = 0; j < dims; ++j)
CV_Assert(m.dims == d);
for(int j = 0; j < d; ++j)
CV_Assert(m.size[j] == sizes[j]);
}
m.create(dims, sizes, mtype);
m.create(d, sizes, mtype);
return;
}
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
void _OutputArray::createSameSize(const _InputArray& arr, int mtype) const
{
int arrsz[CV_MAX_DIM], d = arr.sizend(arrsz);
create(d, arrsz, mtype);
}
void _OutputArray::release() const
{
CV_Assert(!fixedSize());
@ -2010,6 +2246,23 @@ cuda::CudaMem& _OutputArray::getCudaMemRef() const
return *(cuda::CudaMem*)obj;
}
void _OutputArray::setTo(const _InputArray& arr) const
{
int k = kind();
if( k == NONE )
;
else if( k == MAT || k == MATX || k == STD_VECTOR )
{
Mat m = getMat();
m.setTo(arr);
}
else if( k == UMAT )
((UMat*)obj)->setTo(arr);
else
CV_Error(Error::StsNotImplemented, "");
}
static _InputOutputArray _none;
InputOutputArray noArray() { return _none; }

@ -114,8 +114,13 @@ typedef struct _cl_sampler * cl_sampler;
typedef int cl_int;
typedef unsigned cl_uint;
typedef long cl_long;
typedef unsigned long cl_ulong;
#if defined (_WIN32) && defined(_MSC_VER)
typedef __int64 cl_long;
typedef unsigned __int64 cl_ulong;
#else
typedef long cl_long;
typedef unsigned long cl_ulong;
#endif
typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
typedef cl_ulong cl_bitfield;
@ -592,9 +597,16 @@ static void* initOpenCLAndLoad(const char* funcname)
{
if(!initialized)
{
handle = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_LAZY);
const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
"/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
handle = dlopen(oclpath, RTLD_LAZY);
initialized = true;
g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
if( g_haveOpenCL )
fprintf(stderr, "Succesffuly loaded OpenCL v1.1+ runtime from %s\n", oclpath);
else
fprintf(stderr, "Failed to load OpenCL runtime\n");
}
if(!handle)
return 0;
@ -1212,16 +1224,13 @@ namespace cv { namespace ocl {
struct UMat2D
{
UMat2D(const UMat& m, int accessFlags)
UMat2D(const UMat& m)
{
CV_Assert(m.dims == 2);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step;
rows = m.rows;
cols = m.cols;
}
cl_mem data;
size_t offset;
size_t step;
int rows;
@ -1230,10 +1239,8 @@ struct UMat2D
struct UMat3D
{
UMat3D(const UMat& m, int accessFlags)
UMat3D(const UMat& m)
{
CV_Assert(m.dims == 3);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step.p[1];
slicestep = m.step.p[0];
@ -1241,7 +1248,6 @@ struct UMat3D
rows = m.size.p[1];
cols = m.size.p[2];
}
cl_mem data;
size_t offset;
size_t slicestep;
size_t step;
@ -1315,7 +1321,7 @@ void setUseOpenCL(bool flag)
}
}
void finish()
void finish2()
{
Queue::getDefault().finish();
}
@ -1528,7 +1534,7 @@ String Device::OpenCLVersion() const
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
String Device::driverVersion() const
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); }
int Device::type() const
{ return p ? p->getProp<cl_device_type, int>(CL_DEVICE_TYPE) : 0; }
@ -1705,14 +1711,14 @@ size_t Device::profilingTimerResolution() const
const Device& Device::getDefault()
{
const Context& ctx = Context::getDefault();
const Context2& ctx = Context2::getDefault();
int idx = TLSData::get()->device;
return ctx.device(idx);
}
/////////////////////////////////////////////////////////////////////////////////////////
struct Context::Impl
struct Context2::Impl
{
Impl(int dtype0)
{
@ -1777,7 +1783,7 @@ struct Context::Impl
devices.clear();
}
Program getProg(const ProgramSource& src,
Program getProg(const ProgramSource2& src,
const String& buildflags, String& errmsg)
{
String prefix = Program::getPrefix(buildflags);
@ -1787,7 +1793,8 @@ struct Context::Impl
return it->second;
//String filename = format("%08x%08x_%08x%08x.clb2",
Program prog(src, buildflags, errmsg);
phash.insert(std::pair<HashKey,Program>(k, prog));
if(prog.ptr())
phash.insert(std::pair<HashKey,Program>(k, prog));
return prog;
}
@ -1797,7 +1804,7 @@ struct Context::Impl
std::vector<Device> devices;
bool initialized;
typedef ProgramSource::hash_t hash_t;
typedef ProgramSource2::hash_t hash_t;
struct HashKey
{
@ -1812,18 +1819,18 @@ struct Context::Impl
};
Context::Context()
Context2::Context2()
{
p = 0;
}
Context::Context(int dtype)
Context2::Context2(int dtype)
{
p = 0;
create(dtype);
}
bool Context::create(int dtype0)
bool Context2::create(int dtype0)
{
if( !haveOpenCL() )
return false;
@ -1838,19 +1845,19 @@ bool Context::create(int dtype0)
return p != 0;
}
Context::~Context()
Context2::~Context2()
{
p->release();
}
Context::Context(const Context& c)
Context2::Context2(const Context2& c)
{
p = (Impl*)c.p;
if(p)
p->addref();
}
Context& Context::operator = (const Context& c)
Context2& Context2::operator = (const Context2& c)
{
Impl* newp = (Impl*)c.p;
if(newp)
@ -1861,30 +1868,30 @@ Context& Context::operator = (const Context& c)
return *this;
}
void* Context::ptr() const
void* Context2::ptr() const
{
return p->handle;
}
size_t Context::ndevices() const
size_t Context2::ndevices() const
{
return p ? p->devices.size() : 0;
}
const Device& Context::device(size_t idx) const
const Device& Context2::device(size_t idx) const
{
static Device dummy;
return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
}
Context& Context::getDefault()
Context2& Context2::getDefault()
{
static Context ctx;
static Context2 ctx;
if( !ctx.p && haveOpenCL() )
{
// do not create new Context right away.
// do not create new Context2 right away.
// First, try to retrieve existing context of the same type.
// In its turn, Platform::getContext() may call Context::create()
// In its turn, Platform::getContext() may call Context2::create()
// if there is no such context.
ctx.create(Device::TYPE_ACCELERATOR);
if(!ctx.p)
@ -1898,7 +1905,7 @@ Context& Context::getDefault()
return ctx;
}
Program Context::getProg(const ProgramSource& prog,
Program Context2::getProg(const ProgramSource2& prog,
const String& buildopts, String& errmsg)
{
return p ? p->getProg(prog, buildopts, errmsg) : Program();
@ -1906,14 +1913,14 @@ Program Context::getProg(const ProgramSource& prog,
struct Queue::Impl
{
Impl(const Context& c, const Device& d)
Impl(const Context2& c, const Device& d)
{
refcount = 1;
const Context* pc = &c;
const Context2* pc = &c;
cl_context ch = (cl_context)pc->ptr();
if( !ch )
{
pc = &Context::getDefault();
pc = &Context2::getDefault();
ch = (cl_context)pc->ptr();
}
cl_device_id dh = (cl_device_id)d.ptr();
@ -1943,7 +1950,7 @@ Queue::Queue()
p = 0;
}
Queue::Queue(const Context& c, const Device& d)
Queue::Queue(const Context2& c, const Device& d)
{
p = 0;
create(c, d);
@ -1973,7 +1980,7 @@ Queue::~Queue()
p->release();
}
bool Queue::create(const Context& c, const Device& d)
bool Queue::create(const Context2& c, const Device& d)
{
if(p)
p->release();
@ -1996,7 +2003,7 @@ Queue& Queue::getDefault()
{
Queue& q = TLSData::get()->oclQueue;
if( !q.p )
q.create(Context::getDefault());
q.create(Context2::getDefault());
return q;
}
@ -2008,15 +2015,20 @@ static cl_command_queue getQueue(const Queue& q)
return qq;
}
KernelArg::KernelArg(int _flags, UMat* _m, void* _obj, size_t _sz)
: flags(_flags), m(_m), obj(_obj), sz(_sz)
KernelArg::KernelArg()
: flags(0), m(0), obj(0), sz(0), wscale(1)
{
}
KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, const void* _obj, size_t _sz)
: flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale)
{
}
KernelArg KernelArg::Constant(const Mat& m)
{
CV_Assert(m.isContinuous());
return KernelArg(CONSTANT, 0, m.data, m.total()*m.elemSize());
return KernelArg(CONSTANT, 0, 1, m.data, m.total()*m.elemSize());
}
@ -2099,8 +2111,8 @@ Kernel::Kernel(const char* kname, const Program& prog)
create(kname, prog);
}
Kernel::Kernel(const char* kname, const ProgramSource& src,
const String& buildopts, String& errmsg)
Kernel::Kernel(const char* kname, const ProgramSource2& src,
const String& buildopts, String* errmsg)
{
p = 0;
create(kname, src, buildopts, errmsg);
@ -2143,15 +2155,17 @@ bool Kernel::create(const char* kname, const Program& prog)
return p != 0;
}
bool Kernel::create(const char* kname, const ProgramSource& src,
const String& buildopts, String& errmsg)
bool Kernel::create(const char* kname, const ProgramSource2& src,
const String& buildopts, String* errmsg)
{
if(p)
{
p->release();
p = 0;
}
const Program& prog = Context::getDefault().getProg(src, buildopts, errmsg);
String tempmsg;
if( !errmsg ) errmsg = &tempmsg;
const Program& prog = Context2::getDefault().getProg(src, buildopts, *errmsg);
return create(kname, prog);
}
@ -2160,55 +2174,91 @@ void* Kernel::ptr() const
return p ? p->handle : 0;
}
void Kernel::set(int i, const void* value, size_t sz)
bool Kernel::empty() const
{
CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
return ptr() == 0;
}
int Kernel::set(int i, const void* value, size_t sz)
{
CV_Assert(i >= 0);
if( i == 0 )
p->cleanupUMats();
if( !p || !p->handle || clSetKernelArg(p->handle, (cl_uint)i, sz, value) < 0 )
return -1;
return i+1;
}
void Kernel::set(int i, const UMat& m)
int Kernel::set(int i, const UMat& m)
{
set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
}
void Kernel::set(int i, const KernelArg& arg)
int Kernel::set(int i, const KernelArg& arg)
{
CV_Assert( p && p->handle );
CV_Assert( i >= 0 );
if( i == 0 )
p->cleanupUMats();
if( !p || !p->handle )
return -1;
if( arg.m )
{
int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
cl_mem h = (cl_mem)arg.m->handle(accessFlags);
if( arg.m->dims <= 2 )
{
UMat2D u2d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
UMat2D u2d(*arg.m);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
i += 3;
if( !(arg.flags & KernelArg::NO_SIZE) )
{
int cols = u2d.cols*arg.wscale;
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.cols), &cols);
i += 2;
}
}
else
{
UMat3D u3d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
UMat3D u3d(*arg.m);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
i += 4;
if( !(arg.flags & KernelArg::NO_SIZE) )
{
int cols = u3d.cols*arg.wscale;
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
i += 3;
}
}
p->addUMat(*arg.m);
return i;
}
else
{
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
}
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
return i+1;
}
void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
bool Kernel::run(int dims, size_t globalsize[], size_t localsize[],
bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
if(!p || !p->handle || p->e != 0)
return false;
cl_command_queue qq = getQueue(q);
clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
offset, globalsize, localsize, 0, 0,
sync ? 0 : &p->e);
if( sync )
size_t offset[CV_MAX_DIM] = {0};
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
offset, globalsize, localsize, 0, 0,
sync ? 0 : &p->e);
if( sync || retval < 0 )
{
clFinish(qq);
p->cleanupUMats();
@ -2218,14 +2268,17 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
return retval >= 0;
}
void Kernel::runTask(bool sync, const Queue& q)
bool Kernel::runTask(bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
if(!p || !p->handle || p->e != 0)
return false;
cl_command_queue qq = getQueue(q);
clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
if( sync )
cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
if( sync || retval < 0 )
{
clFinish(qq);
p->cleanupUMats();
@ -2235,6 +2288,7 @@ void Kernel::runTask(bool sync, const Queue& q)
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
return retval >= 0;
}
@ -2273,11 +2327,11 @@ size_t Kernel::localMemSize() const
struct Program::Impl
{
Impl(const ProgramSource& _src,
Impl(const ProgramSource2& _src,
const String& _buildflags, String& errmsg)
{
refcount = 1;
const Context& ctx = Context::getDefault();
const Context2& ctx = Context2::getDefault();
src = _src;
buildflags = _buildflags;
const String& srcstr = src.source();
@ -2293,17 +2347,20 @@ struct Program::Impl
void** deviceList = deviceListBuf;
for( i = 0; i < n; i++ )
deviceList[i] = ctx.device(i).ptr();
printf("Building the OpenCL program ...\n");
retval = clBuildProgram(handle, n,
(const cl_device_id*)deviceList,
buildflags.c_str(), 0, 0);
if( retval == CL_BUILD_PROGRAM_FAILURE )
{
char buf[1024];
char buf[1<<16];
size_t retsz = 0;
clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG,
sizeof(buf)-16, buf, &retsz);
errmsg = String(buf);
CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
}
CV_Assert(retval >= 0);
}
}
@ -2315,7 +2372,7 @@ struct Program::Impl
if(_buf.empty())
return;
String prefix0 = Program::getPrefix(buildflags);
const Context& ctx = Context::getDefault();
const Context2& ctx = Context2::getDefault();
const Device& dev = Device::getDefault();
const char* pos0 = _buf.c_str();
const char* pos1 = strchr(pos0, '\n');
@ -2366,7 +2423,7 @@ struct Program::Impl
IMPLEMENT_REFCOUNTABLE();
ProgramSource src;
ProgramSource2 src;
String buildflags;
cl_program handle;
};
@ -2374,7 +2431,7 @@ struct Program::Impl
Program::Program() { p = 0; }
Program::Program(const ProgramSource& src,
Program::Program(const ProgramSource2& src,
const String& buildflags, String& errmsg)
{
p = 0;
@ -2405,7 +2462,7 @@ Program::~Program()
p->release();
}
bool Program::create(const ProgramSource& src,
bool Program::create(const ProgramSource2& src,
const String& buildflags, String& errmsg)
{
if(p)
@ -2419,9 +2476,9 @@ bool Program::create(const ProgramSource& src,
return p != 0;
}
const ProgramSource& Program::source() const
const ProgramSource2& Program::source() const
{
static ProgramSource dummy;
static ProgramSource2 dummy;
return p ? p->src : dummy;
}
@ -2455,7 +2512,7 @@ String Program::getPrefix() const
String Program::getPrefix(const String& buildflags)
{
const Context& ctx = Context::getDefault();
const Context2& ctx = Context2::getDefault();
const Device& dev = ctx.device(0);
return format("name=%s\ndriver=%s\nbuildflags=%s\n",
dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
@ -2463,7 +2520,7 @@ String Program::getPrefix(const String& buildflags)
////////////////////////////////////////////////////////////////////////////////////////
struct ProgramSource::Impl
struct ProgramSource2::Impl
{
Impl(const char* _src)
{
@ -2482,39 +2539,39 @@ struct ProgramSource::Impl
IMPLEMENT_REFCOUNTABLE();
String src;
ProgramSource::hash_t h;
ProgramSource2::hash_t h;
};
ProgramSource::ProgramSource()
ProgramSource2::ProgramSource2()
{
p = 0;
}
ProgramSource::ProgramSource(const char* prog)
ProgramSource2::ProgramSource2(const char* prog)
{
p = new Impl(prog);
}
ProgramSource::ProgramSource(const String& prog)
ProgramSource2::ProgramSource2(const String& prog)
{
p = new Impl(prog);
}
ProgramSource::~ProgramSource()
ProgramSource2::~ProgramSource2()
{
if(p)
p->release();
}
ProgramSource::ProgramSource(const ProgramSource& prog)
ProgramSource2::ProgramSource2(const ProgramSource2& prog)
{
p = prog.p;
if(p)
p->addref();
}
ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
ProgramSource2& ProgramSource2::operator = (const ProgramSource2& prog)
{
Impl* newp = (Impl*)prog.p;
if(newp)
@ -2525,13 +2582,13 @@ ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
return *this;
}
const String& ProgramSource::source() const
const String& ProgramSource2::source() const
{
static String dummy;
return p ? p->src : dummy;
}
ProgramSource::hash_t ProgramSource::hash() const
ProgramSource2::hash_t ProgramSource2::hash() const
{
return p ? p->h : 0;
}
@ -2551,7 +2608,7 @@ public:
return u;
}
void getBestFlags(const Context& ctx, int& createFlags, int& flags0) const
void getBestFlags(const Context2& ctx, int& createFlags, int& flags0) const
{
const Device& dev = ctx.device(0);
createFlags = CL_MEM_READ_WRITE;
@ -2574,7 +2631,7 @@ public:
total *= sizes[i];
}
Context& ctx = Context::getDefault();
Context2& ctx = Context2::getDefault();
int createFlags = 0, flags0 = 0;
getBestFlags(ctx, createFlags, flags0);
@ -2603,7 +2660,7 @@ public:
if(u->handle == 0)
{
CV_Assert(u->origdata != 0);
Context& ctx = Context::getDefault();
Context2& ctx = Context2::getDefault();
int createFlags = 0, flags0 = 0;
getBestFlags(ctx, createFlags, flags0);
@ -2848,7 +2905,6 @@ public:
new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 );
}
clFinish(q);
}
void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
@ -2890,6 +2946,9 @@ public:
if( iscontinuous )
{
int crc = 0;
for( size_t i = 0; i < total; i++ )
crc ^= ((uchar*)srcptr)[i];
CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 );
}
@ -2949,10 +3008,11 @@ public:
}
else
{
CV_Assert( clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
cl_int retval;
CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
new_srcofs, new_dstofs, new_sz,
new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1],
0, 0, 0) >= 0 );
0, 0, 0)) >= 0 );
}
dst->markHostCopyObsolete(true);
@ -2969,4 +3029,61 @@ MatAllocator* getOpenCLAllocator()
return &allocator;
}
const char* typeToStr(int t)
{
static const char* tab[]=
{
"uchar", "uchar2", "uchar3", "uchar4",
"char", "char2", "char3", "char4",
"ushort", "ushort2", "ushort3", "ushort4",
"short", "short2", "short3", "short4",
"int", "int2", "int3", "int4",
"float", "float2", "float3", "float4",
"double", "double2", "double3", "double4",
"?", "?", "?", "?"
};
int cn = CV_MAT_CN(t);
return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1];
}
const char* memopTypeToStr(int t)
{
static const char* tab[]=
{
"uchar", "uchar2", "uchar3", "uchar4",
"uchar", "uchar2", "uchar3", "uchar4",
"ushort", "ushort2", "ushort3", "ushort4",
"ushort", "ushort2", "ushort3", "ushort4",
"int", "int2", "int3", "int4",
"int", "int2", "int3", "int4",
"long", "long2", "long3", "long4",
"?", "?", "?", "?"
};
int cn = CV_MAT_CN(t);
return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1];
}
const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
{
if( sdepth == ddepth )
return "noconvert";
const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
if( ddepth >= CV_32F ||
(ddepth == CV_32S && sdepth < CV_32S) ||
(ddepth == CV_16S && sdepth <= CV_8S) ||
(ddepth == CV_16U && sdepth == CV_8U))
{
sprintf(buf, "convert_%s", typestr);
}
else if( sdepth >= CV_32F )
{
sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
}
else
{
sprintf(buf, "convert_%s_sat", typestr);
}
return buf;
}
}}

@ -0,0 +1,303 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
Usage:
after compiling this program user gets a single kernel called KF.
the following flags should be passed:
1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP"
2) the actual operation performed, one of "-D OP_...", see below the list of operations.
2a) "-D dstDepth=<destination depth> [-D cn=<num channels]"
for some operations, like min/max/and/or/xor it's enough
2b) "-D srcDepth1=<source1 depth> -D srcDepth2=<source2 depth> -D dstDepth=<destination depth>
-D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
#define CV_32S 4
#define CV_32F 5
#define dstelem *(dstT*)(dstptr + dst_index)
#define noconvert(x) x
#ifndef workT
#define srcT1 dstT
#define srcT2 dstT
#define workT dstT
#define srcelem1 *(dstT*)(srcptr1 + src1_index)
#define srcelem2 *(dstT*)(srcptr2 + src2_index)
#define convertToDT noconvert
#else
#define srcelem1 convertToWT1(*(srcT1*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(srcT2*)(srcptr2 + src2_index))
#endif
#define EXTRA_PARAMS
#if defined OP_ADD_SAT
#define PROCESS_ELEM dstelem = add_sat(srcelem1, srcelem2)
#elif defined OP_ADD
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2)
#elif defined OP_SUB_SAT
#define PROCESS_ELEM dstelem = sub_sat(srcelem1, srcelem2)
#elif defined OP_SUB
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2)
#elif defined OP_RSUB_SAT
#define PROCESS_ELEM dstelem = sub_sat(srcelem2, srcelem1)
#elif defined OP_RSUB
#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1)
#elif defined OP_ABSDIFF
#define PROCESS_ELEM dstelem = abs_diff(srcelem1, srcelem2)
#elif defined OP_AND
#define PROCESS_ELEM dstelem = srcelem1 & srcelem2
#elif defined OP_OR
#define PROCESS_ELEM dstelem = srcelem1 | srcelem2
#elif defined OP_XOR
#define PROCESS_ELEM dstelem = srcelem1 ^ srcelem2
#elif defined OP_NOT
#define PROCESS_ELEM dstelem = ~srcelem1
#elif defined OP_MIN
#define PROCESS_ELEM dstelem = min(srcelem1, srcelem2)
#elif defined OP_MAX
#define PROCESS_ELEM dstelem = max(srcelem1, srcelem2)
#elif defined OP_MUL
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2)
#elif defined OP_MUL_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT scale
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale)
#elif defined OP_DIV
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero)
#elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT scale
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero)
#elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT scale
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 != zero ? scale / e1 : zero)
#elif defined OP_ADDW
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma)
#elif defined OP_MAG
#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2)
#elif defined OP_PHASE_RADIANS
#define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1); \
if(tmp < 0) tmp += 6.283185307179586232; \
dstelem = tmp
#elif defined OP_PHASE_DEGREES
#define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465; \
if(tmp < 0) tmp += 360; \
dstelem = tmp
#elif defined OP_EXP
#define PROCESS_ELEM dstelem = exp(srcelem1)
#elif defined OP_SQRT
#define PROCESS_ELEM dstelem = sqrt(srcelem1)
#elif defined OP_LOG
#define PROCESS_ELEM dstelem = log(abs(srcelem1))
#elif defined OP_CMP
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
#elif defined OP_CONVERT
#define PROCESS_ELEM dstelem = convertToDT(srcelem1)
#elif defined OP_CONVERT_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
#else
#error "unknown op type"
#endif
#if defined UNARY_OP || defined MASK_UNARY_OP
#undef srcelem2
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT srcelem2
#endif
#endif
#if defined BINARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2);
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
PROCESS_ELEM;
//printf("(x=%d, y=%d). %d, %d, %d\n", x, y, (int)srcelem1, (int)srcelem2, (int)dstelem);
}
}
#elif defined MASK_BINARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2);
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
PROCESS_ELEM;
}
}
}
#elif defined UNARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
PROCESS_ELEM;
}
}
#elif defined MASK_UNARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1);
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
PROCESS_ELEM;
}
}
}
#else
#error "Unknown operation type"
#endif

@ -0,0 +1,73 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
__kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, dstT value )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
*(dstT*)(dstptr + dst_index) = value;
}
}
}
__kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, dstT value )
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
*(dstT*)(dstptr + dst_index) = value;
}
}

@ -205,13 +205,30 @@ enum { BLOCK_SIZE = 1024 };
inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind)
{
if( sc.dims > 2 || (sc.cols != 1 && sc.rows != 1) || !sc.isContinuous() )
if( sc.dims > 2 || !sc.isContinuous() )
return false;
Size sz = sc.size();
if(sz.width != 1 && sz.height != 1)
return false;
int cn = CV_MAT_CN(atype);
if( akind == _InputArray::MATX && sckind != _InputArray::MATX )
return false;
return sc.size() == Size(1, 1) || sc.size() == Size(1, cn) || sc.size() == Size(cn, 1) ||
(sc.size() == Size(1, 4) && sc.type() == CV_64F && cn <= 4);
return sz == Size(1, 1) || sz == Size(1, cn) || sz == Size(cn, 1) ||
(sz == Size(1, 4) && sc.type() == CV_64F && cn <= 4);
}
inline bool checkScalar(InputArray sc, int atype, int sckind, int akind)
{
if( sc.dims() > 2 || !sc.isContinuous() )
return false;
Size sz = sc.size();
if(sz.width != 1 && sz.height != 1)
return false;
int cn = CV_MAT_CN(atype);
if( akind == _InputArray::MATX && sckind != _InputArray::MATX )
return false;
return sz == Size(1, 1) || sz == Size(1, cn) || sz == Size(cn, 1) ||
(sz == Size(1, 4) && sc.type() == CV_64F && cn <= 4);
}
void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize );
@ -227,7 +244,10 @@ struct TLSData
static TLSData* get();
};
namespace ocl { MatAllocator* getOpenCLAllocator(); }
namespace ocl
{
MatAllocator* getOpenCLAllocator();
}
}

@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
///////////////////////////////// UMat implementation ///////////////////////////////
@ -174,8 +175,8 @@ static void updateContinuityFlag(UMat& m)
break;
}
uint64 t = (uint64)m.step[0]*m.size[0];
if( j <= i && t == (size_t)t )
uint64 total = (uint64)m.step[0]*m.size[0];
if( j <= i && total == (size_t)total )
m.flags |= UMat::CONTINUOUS_FLAG;
else
m.flags &= ~UMat::CONTINUOUS_FLAG;
@ -197,6 +198,7 @@ UMat Mat::getUMat(int accessFlags) const
if(!u)
return hdr;
UMat::getStdAllocator()->allocate(u, accessFlags);
hdr.flags = flags;
setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr);
hdr.u = u;
@ -548,7 +550,8 @@ Mat UMat::getMat(int accessFlags) const
CV_Assert(u->data != 0);
Mat hdr(dims, size.p, type(), u->data + offset, step.p);
hdr.u = u;
hdr.datastart = hdr.data = u->data;
hdr.datastart = u->data;
hdr.data = hdr.datastart + offset;
hdr.datalimit = hdr.dataend = u->data + u->size;
CV_XADD(&hdr.u->refcount, 1);
return hdr;
@ -617,7 +620,7 @@ void UMat::copyTo(OutputArray _dst) const
void* dsthandle = dst.handle(ACCESS_WRITE);
if( srchandle == dsthandle && dst.offset == offset )
return;
ndoffset(dstofs);
dst.ndoffset(dstofs);
CV_Assert(u->currAllocator == dst.u->currAllocator);
u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
}
@ -633,6 +636,50 @@ void UMat::convertTo(OutputArray, int, double, double) const
CV_Error(Error::StsNotImplemented, "");
}
UMat& UMat::setTo(InputArray _value, InputArray _mask)
{
bool haveMask = !_mask.empty();
int tp = type(), cn = CV_MAT_CN(tp);
if( dims <= 2 && cn <= 4 && ocl::useOpenCL() )
{
Mat value = _value.getMat();
CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
double buf[4];
convertAndUnrollScalar(value, tp, (uchar*)buf, 1);
char opts[1024];
sprintf(opts, "-D dstT=%s", ocl::memopTypeToStr(tp));
ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
if( !setK.empty() )
{
ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE(tp));
UMat mask;
if( haveMask )
{
mask = _mask.getUMat();
CV_Assert( mask.size() == size() && mask.type() == CV_8U );
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
ocl::KernelArg dstarg = ocl::KernelArg::ReadWrite(*this);
setK.args(maskarg, dstarg, scalararg);
}
else
{
ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this);
setK.args(dstarg, scalararg);
}
size_t globalsize[] = { cols, rows };
if( setK.run(2, globalsize, 0, false) )
return *this;
}
}
Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
m.setTo(_value, _mask);
return *this;
}
UMat& UMat::operator = (const Scalar&)
{
CV_Error(Error::StsNotImplemented, "");

@ -91,11 +91,11 @@ bool CV_UMatTest::TestUMat()
{
try
{
Mat a(100, 100, CV_16S), b;
Mat a(100, 100, CV_16SC2), b, c;
randu(a, Scalar::all(-100), Scalar::all(100));
Rect roi(1, 3, 10, 20);
Mat ra(a, roi), rb;
UMat ua, ura;
Rect roi(1, 3, 5, 4);
Mat ra(a, roi), rb, rc, rc0;
UMat ua, ura, ub, urb, uc, urc;
a.copyTo(ua);
ua.copyTo(b);
CHECK_DIFF(a, b);
@ -112,6 +112,71 @@ bool CV_UMatTest::TestUMat()
}
ra.copyTo(rb);
CHECK_DIFF(ra, rb);
b = a.clone();
ra = a(roi);
rb = b(roi);
randu(b, Scalar::all(-100), Scalar::all(100));
b.copyTo(ub);
urb = ub(roi);
/*std::cout << "==============================================\nbefore op (CPU):\n";
std::cout << "ra: " << ra << std::endl;
std::cout << "rb: " << rb << std::endl;*/
ra.copyTo(ura);
rb.copyTo(urb);
ra.release();
rb.release();
ura.copyTo(ra);
urb.copyTo(rb);
/*std::cout << "==============================================\nbefore op (GPU):\n";
std::cout << "ra: " << ra << std::endl;
std::cout << "rb: " << rb << std::endl;*/
cv::max(ra, rb, rc);
cv::max(ura, urb, urc);
urc.copyTo(rc0);
/*std::cout << "==============================================\nafter op:\n";
std::cout << "rc: " << rc << std::endl;
std::cout << "rc0: " << rc0 << std::endl;*/
CHECK_DIFF(rc0, rc);
{
UMat tmp = rc0.getUMat(ACCESS_WRITE);
cv::max(ura, urb, tmp);
}
CHECK_DIFF(rc0, rc);
ura.copyTo(urc);
cv::max(urc, urb, urc);
urc.copyTo(rc0);
CHECK_DIFF(rc0, rc);
rc = ra ^ rb;
cv::bitwise_xor(ura, urb, urc);
urc.copyTo(rc0);
/*std::cout << "==============================================\nafter op:\n";
std::cout << "ra: " << rc0 << std::endl;
std::cout << "rc: " << rc << std::endl;*/
CHECK_DIFF(rc0, rc);
rc = ra + rb;
cv::add(ura, urb, urc);
urc.copyTo(rc0);
CHECK_DIFF(rc0, rc);
cv::subtract(ra, Scalar::all(5), rc);
cv::subtract(ura, Scalar::all(5), urc);
urc.copyTo(rc0);
CHECK_DIFF(rc0, rc);
}
catch (const test_excep& e)
{

@ -320,7 +320,7 @@ VideoCapture::retrieve
----------------------
Decodes and returns the grabbed video frame.
.. ocv:function:: bool VideoCapture::retrieve( Mat& image, int flag=0 )
.. ocv:function:: bool VideoCapture::retrieve( OutputArray image, int flag=0 )
.. ocv:pyfunction:: cv2.VideoCapture.retrieve([image[, flag]]) -> retval, image
@ -337,7 +337,9 @@ Grabs, decodes and returns the next video frame.
.. ocv:function:: VideoCapture& VideoCapture::operator >> (Mat& image)
.. ocv:function:: bool VideoCapture::read(Mat& image)
.. ocv:function:: VideoCapture& VideoCapture::operator >> (UMat& image)
.. ocv:function:: bool VideoCapture::read(OutputArray image)
.. ocv:pyfunction:: cv2.VideoCapture.read([image]) -> retval, image

@ -511,9 +511,10 @@ public:
CV_WRAP virtual void release();
CV_WRAP virtual bool grab();
CV_WRAP virtual bool retrieve(CV_OUT Mat& image, int flag = 0);
CV_WRAP virtual bool retrieve(OutputArray image, int flag = 0);
virtual VideoCapture& operator >> (CV_OUT Mat& image);
CV_WRAP virtual bool read(CV_OUT Mat& image);
virtual VideoCapture& operator >> (CV_OUT UMat& image);
CV_WRAP virtual bool read(OutputArray image);
CV_WRAP virtual bool set(int propId, double value);
CV_WRAP virtual double get(int propId);

@ -515,7 +515,7 @@ bool VideoCapture::grab()
return cvGrabFrame(cap) != 0;
}
bool VideoCapture::retrieve(Mat& image, int channel)
bool VideoCapture::retrieve(OutputArray image, int channel)
{
IplImage* _img = cvRetrieveFrame(cap, channel);
if( !_img )
@ -533,7 +533,7 @@ bool VideoCapture::retrieve(Mat& image, int channel)
return true;
}
bool VideoCapture::read(Mat& image)
bool VideoCapture::read(OutputArray image)
{
if(grab())
retrieve(image);
@ -548,6 +548,12 @@ VideoCapture& VideoCapture::operator >> (Mat& image)
return *this;
}
VideoCapture& VideoCapture::operator >> (UMat& image)
{
read(image);
return *this;
}
bool VideoCapture::set(int propId, double value)
{
return cvSetCaptureProperty(cap, propId, value) != 0;

@ -90,6 +90,7 @@
\**********************************************************************************/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <limits>
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
@ -2687,6 +2688,125 @@ struct mRGBA2RGBA
}
};
static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
{
bool ok = true;
UMat src = _src.getUMat(), dst;
Size sz = src.size(), dstSz = sz;
int scn = src.channels(), depth = src.depth(), bidx;
size_t globalsize[] = { src.cols, src.rows };
ocl::Kernel k;
if(depth != CV_8U && depth != CV_16U && depth != CV_32F)
return false;
switch (code)
{
/*
case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
*/
case COLOR_BGR2GRAY:
case COLOR_BGRA2GRAY:
case COLOR_RGB2GRAY:
case COLOR_RGBA2GRAY:
{
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
dcn = 1;
k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d", depth, scn, bidx));
break;
}
case COLOR_GRAY2BGR:
case COLOR_GRAY2BGRA:
{
CV_Assert(scn == 1);
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=1 -D dcn=%d", depth, dcn));
break;
}
case COLOR_BGR2YUV:
case COLOR_RGB2YUV:
{
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_RGB2YUV ? 0 : 2;
dcn = 3;
k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx));
break;
}
case COLOR_YUV2BGR:
case COLOR_YUV2RGB:
{
if(dcn < 0) dcn = 3;
CV_Assert(dcn == 3 || dcn == 4);
bidx = code == COLOR_YUV2RGB ? 0 : 2;
k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d", depth, dcn, bidx));
break;
}
case COLOR_YUV2RGB_NV12:
case COLOR_YUV2BGR_NV12:
case COLOR_YUV2RGBA_NV12:
case COLOR_YUV2BGRA_NV12:
{
CV_Assert( scn == 1 );
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3;
bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2;
dstSz = Size(sz.width, sz.height * 2 / 3);
globalsize[0] = dstSz.height/2;
globalsize[1] = dstSz.width/2;
k.create("YUV2RGBA_NV12", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=0 -D scn=1 -D dcn=%d -D bidx=%d", dcn, bidx));
break;
}
case COLOR_BGR2YCrCb:
case COLOR_RGB2YCrCb:
{
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
dcn = 3;
k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx));
break;
}
case COLOR_YCrCb2BGR:
case COLOR_YCrCb2RGB:
break;
/*
case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
*/
default:
;
}
if( !k.empty() )
{
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
dst = _dst.getUMat();
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
ok = k.run(2, globalsize, 0, false);
}
return ok;
}
}//namespace cv
//////////////////////////////////////////////////////////////////////////////////////////
@ -2695,9 +2815,15 @@ struct mRGBA2RGBA
void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
{
bool use_opencl = ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT;
int stype = _src.type();
int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx;
if( use_opencl && ocl_cvtColor(_src, _dst, code, dcn) )
return;
Mat src = _src.getMat(), dst;
Size sz = src.size();
int scn = src.channels(), depth = src.depth(), bidx;
CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );

@ -47,6 +47,7 @@
// */
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <iostream>
#include <vector>
@ -1901,8 +1902,45 @@ private:
};
#endif
static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
double fx, double fy, int interpolation)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if( !(cn <= 4 &&
(interpolation == INTER_NEAREST ||
(interpolation == INTER_LINEAR && (depth == CV_8U || depth == CV_32F)))) )
return false;
UMat src = _src.getUMat();
_dst.create(dsize, type);
UMat dst = _dst.getUMat();
ocl::Kernel k;
if (interpolation == INTER_LINEAR)
{
int wdepth = depth == CV_8U ? CV_32S : CV_32F;
int wtype = CV_MAKETYPE(wdepth, cn);
char buf[2][32];
k.create("resizeLN", ocl::imgproc::resize_oclsrc,
format("-D INTER_LINEAR -D depth=%s -D PIXTYPE=%s -D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s",
depth, ocl::typeToStr(type), ocl::typeToStr(wtype),
ocl::convertTypeStr(depth, wdepth, cn, buf[0]),
ocl::convertTypeStr(wdepth, depth, cn, buf[1])));
}
else if (interpolation == INTER_NEAREST)
{
k.create("resizeNN", ocl::imgproc::resize_oclsrc,
format("-D INTER_NEAREST -D PIXTYPE=%s", ocl::memopTypeToStr(type) ));
}
if( k.empty() )
return false;
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
(float)(1./fx), (float)(1./fy));
size_t globalsize[] = { dst.cols, dst.rows };
return k.run(2, globalsize, 0, false);
}
}
//////////////////////////////////////////////////////////////////////////////////////////
@ -2013,26 +2051,30 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
resizeArea_<double, double>, 0
};
Mat src = _src.getMat();
Size ssize = src.size();
Size ssize = _src.size();
CV_Assert( ssize.area() > 0 );
CV_Assert( dsize.area() || (inv_scale_x > 0 && inv_scale_y > 0) );
if( !dsize.area() )
CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) );
if( dsize.area() == 0 )
{
dsize = Size(saturate_cast<int>(src.cols*inv_scale_x),
saturate_cast<int>(src.rows*inv_scale_y));
CV_Assert( dsize.area() );
dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
saturate_cast<int>(ssize.height*inv_scale_y));
CV_Assert( dsize.area() > 0 );
}
else
{
inv_scale_x = (double)dsize.width/src.cols;
inv_scale_y = (double)dsize.height/src.rows;
inv_scale_x = (double)dsize.width/ssize.width;
inv_scale_y = (double)dsize.height/ssize.height;
}
if( ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT &&
ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation) )
return;
Mat src = _src.getMat();
_dst.create(dsize, src.type());
Mat dst = _dst.getMat();
#ifdef HAVE_TEGRA_OPTIMIZATION
if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation))
return;

@ -0,0 +1,306 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if defined (DOUBLE_SUPPORT)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#if depth == 0
#define DATA_TYPE uchar
#define MAX_NUM 255
#define HALF_MAX 128
#define SAT_CAST(num) convert_uchar_sat(num)
#define DEPTH_0
#elif depth == 2
#define DATA_TYPE ushort
#define MAX_NUM 65535
#define HALF_MAX 32768
#define SAT_CAST(num) convert_ushort_sat(num)
#define DEPTH_2
#elif depth == 5
#define DATA_TYPE float
#define MAX_NUM 1.0f
#define HALF_MAX 0.5f
#define SAT_CAST(num) (num)
#define DEPTH_5
#else
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
{
yuv_shift = 14,
xyz_shift = 12,
R2Y = 4899,
G2Y = 9617,
B2Y = 1868,
BLOCK_SIZE = 256
};
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
__kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if (y < rows && x < cols)
{
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
#if defined (DEPTH_5)
dst[0] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f;
#else
dst[0] = (DATA_TYPE)CV_DESCALE((src[bidx] * B2Y + src[1] * G2Y + src[(bidx^2)] * R2Y), yuv_shift);
#endif
}
}
__kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if (y < rows && x < cols)
{
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
DATA_TYPE val = src[0];
dst[0] = dst[1] = dst[2] = val;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
}
}
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
__kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < rows && x < cols)
{
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2];
#if defined (DEPTH_5)
__constant float * coeffs = c_RGB2YUVCoeffs_f;
const DATA_TYPE Y = b * coeffs[0] + g * coeffs[1] + r * coeffs[2];
const DATA_TYPE U = (b - Y) * coeffs[3] + HALF_MAX;
const DATA_TYPE V = (r - Y) * coeffs[4] + HALF_MAX;
#else
__constant int * coeffs = c_RGB2YUVCoeffs_i;
const int delta = HALF_MAX * (1 << yuv_shift);
const int Y = CV_DESCALE(b * coeffs[0] + g * coeffs[1] + r * coeffs[2], yuv_shift);
const int U = CV_DESCALE((b - Y) * coeffs[3] + delta, yuv_shift);
const int V = CV_DESCALE((r - Y) * coeffs[4] + delta, yuv_shift);
#endif
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( U );
dst[2] = SAT_CAST( V );
}
}
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
__kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < rows && x < cols)
{
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
DATA_TYPE Y = src[0], U = src[1], V = src[2];
#if defined (DEPTH_5)
__constant float * coeffs = c_YUV2RGBCoeffs_f;
const float r = Y + (V - HALF_MAX) * coeffs[3];
const float g = Y + (V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1];
const float b = Y + (U - HALF_MAX) * coeffs[0];
#else
__constant int * coeffs = c_YUV2RGBCoeffs_i;
const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift);
const int g = Y + CV_DESCALE((V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1], yuv_shift);
const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift);
#endif
dst[bidx] = SAT_CAST( b );
dst[1] = SAT_CAST( g );
dst[bidx^2] = SAT_CAST( r );
#if dcn == 4
dst[3] = MAX_NUM;
#endif
}
}
__constant int ITUR_BT_601_CY = 1220542;
__constant int ITUR_BT_601_CUB = 2116026;
__constant int ITUR_BT_601_CUG = 409993;
__constant int ITUR_BT_601_CVG = 852492;
__constant int ITUR_BT_601_CVR = 1673527;
__constant int ITUR_BT_601_SHIFT = 20;
__kernel void YUV2RGBA_NV12(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
const int x = get_global_id(0); // max_x = width / 2
const int y = get_global_id(1); // max_y = height/ 2
if (y < rows / 2 && x < cols / 2 )
{
__global const uchar* ysrc = srcptr + mad24(y << 1, srcstep, (x << 1) + srcoffset);
__global const uchar* usrc = srcptr + mad24(rows + y, srcstep, (x << 1) + srcoffset);
__global uchar* dst1 = dstptr + mad24(y << 1, dststep, x*(dcn*2) + dstoffset);
__global uchar* dst2 = dstptr + mad24((y << 1) + 1, dststep, x*(dcn*2) + dstoffset);
int Y1 = ysrc[0];
int Y2 = ysrc[1];
int Y3 = ysrc[srcstep];
int Y4 = ysrc[srcstep + 1];
int U = usrc[0] - 128;
int V = usrc[1] - 128;
int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V;
int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U;
int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U;
Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY;
dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT);
dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT);
dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT);
#if dcn == 4
dst1[3] = 255;
#endif
Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY;
dst1[(dcn + 2) - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT);
dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT);
dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT);
#if dcn == 4
dst1[7] = 255;
#endif
Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY;
dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT);
dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT);
dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT);
#if dcn == 4
dst2[3] = 255;
#endif
Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY;
dst2[(dcn + 2) - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT);
dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT);
dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT);
#if dcn == 4
dst2[7] = 255;
#endif
}
}
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
__kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < rows && x < cols)
{
const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes));
DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes));
DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2];
#if defined (DEPTH_5)
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
const DATA_TYPE Y = b * coeffs[0] + g * coeffs[1] + r * coeffs[2];
const DATA_TYPE Cr = (r - Y) * coeffs[3] + HALF_MAX;
const DATA_TYPE Cb = (b - Y) * coeffs[4] + HALF_MAX;
#else
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
const int delta = HALF_MAX * (1 << yuv_shift);
const int Y = CV_DESCALE(b * coeffs[0] + g * coeffs[1] + r * coeffs[2], yuv_shift);
const int Cr = CV_DESCALE((r - Y) * coeffs[3] + delta, yuv_shift);
const int Cb = CV_DESCALE((b - Y) * coeffs[4] + delta, yuv_shift);
#endif
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( Cr );
dst[2] = SAT_CAST( Cb );
}
}

@ -0,0 +1,151 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Ying, zhangying913@gmail.com
// Niko Li, newlife20080214@gmail.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
// resize kernel
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
// We shall support other types later if necessary.
#if defined DOUBLE_SUPPORT
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define F double
#else
#define F float
#endif
#define INTER_RESIZE_COEF_BITS 11
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
#define CAST_SCALE (1.0f/(1<<CAST_BITS))
#define INC(x,l) min(x+1,l-1)
#define PIXSIZE ((int)sizeof(PIXTYPE))
#define noconvert(x) (x)
#if defined INTER_LINEAR
__kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
int srcrows, int srccols,
__global uchar* dstptr, int dststep, int dstoffset,
int dstrows, int dstcols,
float ifx, float ify)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
int x = floor(sx), y = floor(sy);
float u = sx - x, v = sy - y;
if ( x<0 ) x=0,u=0;
if ( x>=srccols ) x=srccols-1,u=0;
if ( y<0 ) y=0,v=0;
if ( y>=srcrows ) y=srcrows-1,v=0;
int y_ = INC(y,srcrows);
int x_ = INC(x,srccols);
const PIXTYPE* src = (const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE));
#if depth == 0
u = u * INTER_RESIZE_COEF_SCALE;
v = v * INTER_RESIZE_COEF_SCALE;
int U = rint(u);
int V = rint(v);
int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
WORKTYPE data0 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data1 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
WORKTYPE data2 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data3 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
WORKTYPE val = mul24((WORKTYPE)mul24(U1, V1), data0) + mul24((WORKTYPE)mul24(U, V1), data1) +
mul24((WORKTYPE)mul24(U1, V), data2) + mul24((WORKTYPE)mul24(U, V), data3);
PIXTYPE uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
#else
float u1 = 1.f-u;
float v1 = 1.f-v;
WORKTYPE data0 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data1 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE)));
WORKTYPE data2 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE)));
WORKTYPE data3 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE)));
PIXTYPE uval = u1 * v1 * s_data1 + u * v1 * s_data2 + u1 * v *s_data3 + u * v *s_data4;
#endif
if(dx < dstcols && dy < dstrows)
{
PIXTYPE* dst = (PIXTYPE*)(dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE));
dst[0] = uval;
}
}
#elif defined INTER_NEAREST
__kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
int srcrows, int srccols,
__global uchar* dstptr, int dststep, int dstoffset,
int dstrows, int dstcols,
float ifx, float ify)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if( dx < dstcols && dy < dstrows )
{
F s1 = dx*ifx;
F s2 = dy*ify;
int sx = min(convert_int_rtz(s1), srccols-1);
int sy = min(convert_int_rtz(s2), srcrows-1);
PIXTYPE* dst = (PIXTYPE*)(dstptr +
mad24(dy, dststep, dstoffset + dx*PIXSIZE));
const PIXTYPE* src = (const PIXTYPE*)(srcptr +
mad24(sy, srcstep, srcoffset + sx*PIXSIZE));
dst[0] = src[0];
}
}
#endif

@ -48,6 +48,7 @@
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#include <math.h>
#include <assert.h>

@ -0,0 +1,82 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include <string>
using namespace cv;
using namespace std;
class CV_ImgprocUMatTest : public cvtest::BaseTest
{
public:
CV_ImgprocUMatTest() {}
~CV_ImgprocUMatTest() {}
protected:
void run(int)
{
string imgpath = string(ts->get_data_path()) + "shared/lena.png";
Mat img = imread(imgpath, 1), gray, smallimg, result;
UMat uimg = img.getUMat(ACCESS_READ), ugray, usmallimg, uresult;
cvtColor(img, gray, COLOR_BGR2GRAY);
resize(gray, smallimg, Size(), 0.75, 0.75, INTER_LINEAR);
equalizeHist(smallimg, result);
cvtColor(uimg, ugray, COLOR_BGR2GRAY);
resize(ugray, usmallimg, Size(), 0.75, 0.75, INTER_LINEAR);
equalizeHist(usmallimg, uresult);
#if 0
imshow("orig", uimg);
imshow("small", usmallimg);
imshow("equalized gray", uresult);
waitKey();
destroyWindow("orig");
destroyWindow("small");
destroyWindow("equalized gray");
#endif
ts->set_failed_test_info(cvtest::TS::OK);
}
};
TEST(Imgproc_UMat, regression) { CV_ImgprocUMatTest test; test.safe_run(); }

@ -52,6 +52,8 @@
#include "opencv2/nonfree/cuda.hpp"
#include "opencv2/core/private.cuda.hpp"
#include "opencv2/core/ocl.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDAARITHM

@ -51,6 +51,8 @@
using namespace cv;
using namespace cv::ocl;
static ProgramEntry surfprog = cv::ocl::nonfree::surf;
namespace cv
{
namespace ocl
@ -499,7 +501,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
1
};
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
@ -545,7 +547,7 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
1
};
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
@ -570,7 +572,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
size_t localThreads[3] = {3, 3, 3};
size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
@ -597,7 +599,7 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
size_t localThreads[3] = {32, 4, 1};
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
@ -614,7 +616,7 @@ void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
size_t localThreads[3] = {256, 1, 1};
size_t globalThreads[3] = {saturate_cast<size_t>(nFeatures), 1, 1};
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
@ -654,7 +656,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
kernelName = "normalize_descriptors64";
@ -668,7 +670,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
@ -697,7 +699,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
kernelName = "normalize_descriptors128";
@ -711,7 +713,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}
}

@ -188,8 +188,8 @@ CascadeClassifier::detectMultiScale
---------------------------------------
Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles.
.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, vector<int>& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
.. ocv:function:: void CascadeClassifier::detectMultiScale( InputArray image, vector<Rect>& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
.. ocv:function:: void CascadeClassifier::detectMultiScale( InputArray image, vector<Rect>& objects, vector<int>& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size())
.. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize]]]]]) -> objects
.. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize[, outputRejectLevels]]]]]]) -> objects, rejectLevels, levelWeights

@ -159,14 +159,14 @@ public:
CV_WRAP virtual bool empty() const;
CV_WRAP bool load( const String& filename );
virtual bool read( const FileNode& node );
CV_WRAP virtual void detectMultiScale( const Mat& image,
CV_WRAP virtual void detectMultiScale( InputArray image,
CV_OUT std::vector<Rect>& objects,
double scaleFactor = 1.1,
int minNeighbors = 3, int flags = 0,
Size minSize = Size(),
Size maxSize = Size() );
CV_WRAP virtual void detectMultiScale( const Mat& image,
CV_WRAP virtual void detectMultiScale( InputArray image,
CV_OUT std::vector<Rect>& objects,
CV_OUT std::vector<int>& numDetections,
double scaleFactor=1.1,
@ -174,7 +174,7 @@ public:
Size minSize=Size(),
Size maxSize=Size() );
CV_WRAP virtual void detectMultiScale( const Mat& image,
CV_WRAP virtual void detectMultiScale( InputArray image,
CV_OUT std::vector<Rect>& objects,
CV_OUT std::vector<int>& rejectLevels,
CV_OUT std::vector<double>& levelWeights,

@ -1154,13 +1154,14 @@ void CascadeClassifier::detectMultiScaleNoGrouping( const Mat& image, std::vecto
}
}
void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& objects,
void CascadeClassifier::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
std::vector<int>& rejectLevels,
std::vector<double>& levelWeights,
double scaleFactor, int minNeighbors,
int flags, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
Mat image = _image.getMat();
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
if( empty() )
@ -1188,21 +1189,23 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& o
}
}
void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& objects,
void CascadeClassifier::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
double scaleFactor, int minNeighbors,
int flags, Size minObjectSize, Size maxObjectSize)
{
Mat image = _image.getMat();
std::vector<int> fakeLevels;
std::vector<double> fakeWeights;
detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor,
minNeighbors, flags, minObjectSize, maxObjectSize );
}
void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& objects,
void CascadeClassifier::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
std::vector<int>& numDetections, double scaleFactor,
int minNeighbors, int flags, Size minObjectSize,
Size maxObjectSize )
{
Mat image = _image.getMat();
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
if( empty() )

@ -49,6 +49,7 @@
#include "opencv2/ml.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/ocl.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_HIGHGUI

@ -47,6 +47,7 @@
#define __OPENCV_OCL_PRIVATE_UTIL__
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp"
#include "opencv2/core/ocl_genbase.hpp"
#include "opencv2/ocl.hpp"
@ -55,13 +56,6 @@ namespace cv
namespace ocl
{
struct ProgramEntry
{
const char* name;
const char* programStr;
const char* programHash;
};
inline cl_device_id getClDeviceID(const Context *ctx)
{
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());

@ -50,11 +50,11 @@ void loadImage(string path, Mat &img)
ASSERT_FALSE(img.empty()) << "Could not load input image " << path;
}
void checkEqual(Mat img0, Mat img1, double threshold)
void checkEqual(Mat img0, Mat img1, double threshold, const string& name)
{
double max = 1.0;
minMaxLoc(abs(img0 - img1), NULL, &max);
ASSERT_FALSE(max > threshold) << max;
ASSERT_FALSE(max > threshold) << "max=" << max << " threshold=" << threshold << " method=" << name;
}
static vector<float> DEFAULT_VECTOR;
@ -98,31 +98,31 @@ TEST(Photo_Tonemap, regression)
linear->process(img, result);
loadImage(test_path + "linear.png", expected);
result.convertTo(result, CV_8UC3, 255);
checkEqual(result, expected, 3);
checkEqual(result, expected, 3, "Simple");
Ptr<TonemapDrago> drago = createTonemapDrago(gamma);
drago->process(img, result);
loadImage(test_path + "drago.png", expected);
result.convertTo(result, CV_8UC3, 255);
checkEqual(result, expected, 3);
checkEqual(result, expected, 3, "Drago");
Ptr<TonemapDurand> durand = createTonemapDurand(gamma);
durand->process(img, result);
loadImage(test_path + "durand.png", expected);
result.convertTo(result, CV_8UC3, 255);
checkEqual(result, expected, 3);
checkEqual(result, expected, 3, "Durand");
Ptr<TonemapReinhard> reinhard = createTonemapReinhard(gamma);
reinhard->process(img, result);
loadImage(test_path + "reinhard.png", expected);
result.convertTo(result, CV_8UC3, 255);
checkEqual(result, expected, 3);
checkEqual(result, expected, 3, "Reinhard");
Ptr<TonemapMantiuk> mantiuk = createTonemapMantiuk(gamma);
mantiuk->process(img, result);
loadImage(test_path + "mantiuk.png", expected);
result.convertTo(result, CV_8UC3, 255);
checkEqual(result, expected, 3);
checkEqual(result, expected, 3, "Mantiuk");
}
TEST(Photo_AlignMTB, regression)
@ -165,7 +165,7 @@ TEST(Photo_MergeMertens, regression)
loadImage(test_path + "merge/mertens.png", expected);
merge->process(images, result);
result.convertTo(result, CV_8UC3, 255);
checkEqual(expected, result, 3);
checkEqual(expected, result, 3, "Mertens");
}
TEST(Photo_MergeDebevec, regression)
@ -188,7 +188,7 @@ TEST(Photo_MergeDebevec, regression)
map->process(result, result);
map->process(expected, expected);
checkEqual(expected, result, 1e-2f);
checkEqual(expected, result, 1e-2f, "Debevec");
}
TEST(Photo_MergeRobertson, regression)
@ -208,7 +208,7 @@ TEST(Photo_MergeRobertson, regression)
map->process(result, result);
map->process(expected, expected);
checkEqual(expected, result, 1e-2f);
checkEqual(expected, result, 1e-2f, "MergeRobertson");
}
TEST(Photo_CalibrateDebevec, regression)
@ -242,5 +242,5 @@ TEST(Photo_CalibrateRobertson, regression)
Ptr<CalibrateRobertson> calibrate = createCalibrateRobertson();
calibrate->process(images, response, times);
checkEqual(expected, response, 1e-3f);
checkEqual(expected, response, 1e-3f, "CalibrateRobertson");
}

@ -64,6 +64,8 @@ using namespace cv::ocl;
using namespace cv::superres;
using namespace cv::superres::detail;
static ProgramEntry superres_btvl1 = cv::ocl::superres::superres_btvl1;
namespace cv
{
namespace ocl

@ -56,6 +56,7 @@
#include "opencv2/core/private.hpp"
#include "opencv2/core/private.cuda.hpp"
#include "opencv2/core/ocl.hpp"
#ifdef HAVE_OPENCV_CUDAARITHM
# include "opencv2/cudaarithm.hpp"

@ -0,0 +1,276 @@
#include "opencv2/objdetect.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/ocl.hpp"
#include <cctype>
#include <iostream>
#include <iterator>
#include <stdio.h>
using namespace std;
using namespace cv;
static void help()
{
cout << "\nThis program demonstrates the cascade recognizer. Now you can use Haar or LBP features.\n"
"This classifier can recognize many kinds of rigid objects, once the appropriate classifier is trained.\n"
"It's most known use is for faces.\n"
"Usage:\n"
"./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n"
" [--nested-cascade[=nested_cascade_path this an optional secondary classifier such as eyes]]\n"
" [--scale=<image scale greater or equal to 1, try 1.3 for example>]\n"
" [--try-flip]\n"
" [filename|camera_index]\n\n"
"see facedetect.cmd for one call:\n"
"./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --nested-cascade=\"../../data/haarcascades/haarcascade_eye.xml\" --scale=1.3\n\n"
"During execution:\n\tHit any key to quit.\n"
"\tUsing OpenCV version " << CV_VERSION << "\n" << endl;
}
void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
CascadeClassifier& nestedCascade,
double scale, bool tryflip );
string cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml";
string nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
int main( int argc, const char** argv )
{
VideoCapture capture;
UMat frame, image;
Mat canvas;
const string scaleOpt = "--scale=";
size_t scaleOptLen = scaleOpt.length();
const string cascadeOpt = "--cascade=";
size_t cascadeOptLen = cascadeOpt.length();
const string nestedCascadeOpt = "--nested-cascade";
size_t nestedCascadeOptLen = nestedCascadeOpt.length();
const string tryFlipOpt = "--try-flip";
size_t tryFlipOptLen = tryFlipOpt.length();
String inputName;
bool tryflip = false;
help();
CascadeClassifier cascade, nestedCascade;
double scale = 1;
for( int i = 1; i < argc; i++ )
{
cout << "Processing " << i << " " << argv[i] << endl;
if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 )
{
cascadeName.assign( argv[i] + cascadeOptLen );
cout << " from which we have cascadeName= " << cascadeName << endl;
}
else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 )
{
if( argv[i][nestedCascadeOpt.length()] == '=' )
nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 );
if( !nestedCascade.load( nestedCascadeName ) )
cerr << "WARNING: Could not load classifier cascade for nested objects" << endl;
}
else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
{
if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale > 1 )
scale = 1;
cout << " from which we read scale = " << scale << endl;
}
else if( tryFlipOpt.compare( 0, tryFlipOptLen, argv[i], tryFlipOptLen ) == 0 )
{
tryflip = true;
cout << " will try to flip image horizontally to detect assymetric objects\n";
}
else if( argv[i][0] == '-' )
{
cerr << "WARNING: Unknown option %s" << argv[i] << endl;
}
else
inputName = argv[i];
}
if( !cascade.load( cascadeName ) )
{
cerr << "ERROR: Could not load classifier cascade" << endl;
help();
return -1;
}
if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
{
int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0';
if(!capture.open(c))
cout << "Capture from camera #" << c << " didn't work" << endl;
}
else
{
if( inputName.empty() )
inputName = "lena.jpg";
image = imread( inputName, 1 ).getUMat(ACCESS_READ);
if( image.empty() )
{
if(!capture.open( inputName ))
cout << "Could not read " << inputName << endl;
}
}
namedWindow( "result", 1 );
if( capture.isOpened() )
{
cout << "Video capturing has been started ..." << endl;
for(;;)
{
capture >> frame;
if( frame.empty() )
break;
detectAndDraw( frame, canvas, cascade, nestedCascade, scale, tryflip );
if( waitKey( 10 ) >= 0 )
break;
}
}
else
{
cout << "Detecting face(s) in " << inputName << endl;
if( !image.empty() )
{
detectAndDraw( image, canvas, cascade, nestedCascade, scale, tryflip );
waitKey(0);
}
else if( !inputName.empty() )
{
/* assume it is a text file containing the
list of the image filenames to be processed - one per line */
FILE* f = fopen( inputName.c_str(), "rt" );
if( f )
{
char buf[1000+1];
while( fgets( buf, 1000, f ) )
{
int len = (int)strlen(buf), c;
while( len > 0 && isspace(buf[len-1]) )
len--;
buf[len] = '\0';
cout << "file " << buf << endl;
image = imread( buf, 1 ).getUMat(ACCESS_READ);
if( !image.empty() )
{
detectAndDraw( image, canvas, cascade, nestedCascade, scale, tryflip );
c = waitKey(0);
if( c == 27 || c == 'q' || c == 'Q' )
break;
}
else
{
cerr << "Aw snap, couldn't read image " << buf << endl;
}
}
fclose(f);
}
}
}
return 0;
}
void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
CascadeClassifier& nestedCascade,
double scale0, bool tryflip )
{
int i = 0;
double t = 0, scale=1;
vector<Rect> faces, faces2;
const static Scalar colors[] =
{
Scalar(0,0,255),
Scalar(0,128,255),
Scalar(0,255,255),
Scalar(0,255,0),
Scalar(255,128,0),
Scalar(255,255,0),
Scalar(255,0,0),
Scalar(255,0,255)
};
static UMat gray, smallImg;
t = (double)getTickCount();
cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, Size(), scale0, scale0, INTER_LINEAR );
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
equalizeHist( smallImg, smallImg );
cascade.detectMultiScale( smallImg, faces,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|CASCADE_SCALE_IMAGE
,
Size(30, 30) );
if( tryflip )
{
flip(smallImg, smallImg, 1);
cascade.detectMultiScale( smallImg, faces2,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|CASCADE_SCALE_IMAGE
,
Size(30, 30) );
for( vector<Rect>::const_iterator r = faces2.begin(); r != faces2.end(); r++ )
{
faces.push_back(Rect(smallImg.cols - r->x - r->width, r->y, r->width, r->height));
}
}
t = (double)getTickCount() - t;
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
double fps = getTickFrequency()/t;
putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", fps), Point(250, 50),
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{
vector<Rect> nestedObjects;
Point center;
Scalar color = colors[i%8];
int radius;
double aspect_ratio = (double)r->width/r->height;
if( 0.75 < aspect_ratio && aspect_ratio < 1.3 )
{
center.x = cvRound((r->x + r->width*0.5)*scale);
center.y = cvRound((r->y + r->height*0.5)*scale);
radius = cvRound((r->width + r->height)*0.25*scale);
circle( canvas, center, radius, color, 3, 8, 0 );
}
else
rectangle( canvas, Point(cvRound(r->x*scale), cvRound(r->y*scale)),
Point(cvRound((r->x + r->width-1)*scale), cvRound((r->y + r->height-1)*scale)),
color, 3, 8, 0);
if( nestedCascade.empty() )
continue;
UMat smallImgROI = smallImg(*r);
nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
//|CASCADE_DO_CANNY_PRUNING
|CASCADE_SCALE_IMAGE
,
Size(30, 30) );
for( vector<Rect>::const_iterator nr = nestedObjects.begin(); nr != nestedObjects.end(); nr++ )
{
center.x = cvRound((r->x + nr->x + nr->width*0.5)*scale);
center.y = cvRound((r->y + nr->y + nr->height*0.5)*scale);
radius = cvRound((nr->width + nr->height)*0.25*scale);
circle( canvas, center, radius, color, 3, 8, 0 );
}
}
imshow( "result", canvas );
}

@ -11,7 +11,7 @@
using namespace std;
using namespace cv;
#define LOOP_NUM 10
#define LOOP_NUM 1
const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(0,128,255),
@ -46,12 +46,12 @@ static double getTime()
static void detect( Mat& img, vector<Rect>& faces,
ocl::OclCascadeClassifier& cascade,
double scale, bool calTime);
double scale);
static void detectCPU( Mat& img, vector<Rect>& faces,
CascadeClassifier& cascade,
double scale, bool calTime);
double scale);
static void Draw(Mat& img, vector<Rect>& faces, double scale);
@ -83,7 +83,7 @@ int main( int argc, const char** argv )
}
CvCapture* capture = 0;
Mat frame, frameCopy, image;
Mat frame, frameCopy0, frameCopy, image;
bool useCPU = cmd.get<bool>("s");
string inputName = cmd.get<string>("i");
@ -129,16 +129,21 @@ int main( int argc, const char** argv )
if( frame.empty() )
break;
if( iplImg->origin == IPL_ORIGIN_TL )
frame.copyTo( frameCopy );
frame.copyTo( frameCopy0 );
else
flip( frame, frameCopy, 0 );
flip( frame, frameCopy0, 0 );
if( scale == 1)
frameCopy0.copyTo(frameCopy);
else
resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
work_end = 0;
if(useCPU)
detectCPU(frameCopy, faces, cpu_cascade, scale, false);
detectCPU(frameCopy, faces, cpu_cascade, 1);
else
detect(frameCopy, faces, cascade, scale, false);
detect(frameCopy, faces, cascade, 1);
Draw(frameCopy, faces, scale);
Draw(frameCopy, faces, 1);
if( waitKey( 10 ) >= 0 )
break;
}
@ -150,17 +155,19 @@ int main( int argc, const char** argv )
vector<Rect> faces;
vector<Rect> ref_rst;
double accuracy = 0.;
detectCPU(image, ref_rst, cpu_cascade, scale);
work_end = 0;
for(int i = 0; i <= LOOP_NUM; i ++)
{
cout << "loop" << i << endl;
if(useCPU)
detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
detectCPU(image, faces, cpu_cascade, scale);
else
{
detect(image, faces, cascade, scale, i==0?false:true);
detect(image, faces, cascade, scale);
if(i == 0)
{
detectCPU(image, ref_rst, cpu_cascade, scale, false);
accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
}
}
@ -184,11 +191,11 @@ int main( int argc, const char** argv )
void detect( Mat& img, vector<Rect>& faces,
ocl::OclCascadeClassifier& cascade,
double scale, bool calTime)
double scale)
{
ocl::oclMat image(img);
ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
if(calTime) workBegin();
workBegin();
ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
ocl::equalizeHist( smallImg, smallImg );
@ -197,14 +204,14 @@ void detect( Mat& img, vector<Rect>& faces,
3, 0
|CASCADE_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
if(calTime) workEnd();
workEnd();
}
void detectCPU( Mat& img, vector<Rect>& faces,
CascadeClassifier& cascade,
double scale, bool calTime)
double scale)
{
if(calTime) workBegin();
workBegin();
Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
cvtColor(img, cpu_gray, COLOR_BGR2GRAY);
resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
@ -212,13 +219,15 @@ void detectCPU( Mat& img, vector<Rect>& faces,
cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
3, 0 | CASCADE_SCALE_IMAGE,
Size(30, 30), Size(0, 0));
if(calTime) workEnd();
workEnd();
}
void Draw(Mat& img, vector<Rect>& faces, double scale)
{
int i = 0;
putText(img, format("fps: %.1f", 1000./getTime()), Point(450, 50),
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{
Point center;
@ -229,7 +238,7 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
radius = cvRound((r->width + r->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 );
}
imwrite( outputName, img );
//imwrite( outputName, img );
if(abs(scale-1.0)>.001)
{
resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));

Loading…
Cancel
Save