Merge pull request #2921 from ilya-lavrenov:tapi_memop

pull/3161/head
Alexander Alekhin 10 years ago
commit 6d253a2068
  1. 11
      modules/core/src/copy.cpp
  2. 8
      modules/core/src/ocl.cpp

@ -615,9 +615,15 @@ enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
{
CV_Assert(flipCode >= -1 && flipCode <= 1);
const ocl::Device & dev = ocl::Device::getDefault();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4);
bool doubleSupport = dev.doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F)
kercn = cn;
if (cn > 4)
return false;
@ -629,14 +635,13 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
else
kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
ocl::Device dev = ocl::Device::getDefault();
int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
format( "-D T=%s -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
ocl::memopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
ocl::memopTypeToStr(depth), cn, pxPerWIy, kercn));
kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::memopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
kercn != cn ? ocl::typeToStr(depth) : ocl::memopTypeToStr(depth), cn, pxPerWIy, kercn));
if (k.empty())
return false;

@ -4360,10 +4360,10 @@ const char* memopTypeToStr(int type)
{
static const char* tab[] =
{
"uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
"char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
"ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
"short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
"uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
"char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
"ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
"short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
"int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
"int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
"ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",

Loading…
Cancel
Save