improved cv::ocl::predictOptimalVectorWidth

pull/2956/head
Ilya Lavrenov 11 years ago
parent a350b76738
commit 2c6b7a52e9
  1. 17
      modules/core/include/opencv2/core/ocl.hpp
  2. 43
      modules/core/src/ocl.cpp

@ -598,9 +598,24 @@ CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS const char* memopTypeToStr(int t); CV_EXPORTS const char* memopTypeToStr(int t);
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info); CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
enum OclVectorStrategy
{
// all matrices have its own vector width
OCL_VECTOR_OWN = 0,
// all matrices have maximal vector width among all matrices
// (useful for cases when matrices have different data types)
OCL_VECTOR_MAX = 1,
// default strategy
OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
};
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);

@ -4451,42 +4451,45 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
if (!src.empty()) \ if (!src.empty()) \
{ \ { \
CV_Assert(src.isMat() || src.isUMat()); \ CV_Assert(src.isMat() || src.isUMat()); \
int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
Size csize = src.size(); \ Size csize = src.size(); \
cols.push_back(ccn * csize.width); \ int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
if (ctype != type) \ ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
if (cwidth < ckercn || ckercn <= 0) \
return 1; \
cols.push_back(cwidth); \
if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
return 1; \ return 1; \
offsets.push_back(src.offset()); \ offsets.push_back(src.offset()); \
steps.push_back(src.step()); \ steps.push_back(src.step()); \
dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
} \ } \
} \ } \
while ((void)0, 0) while ((void)0, 0)
int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
InputArray src4, InputArray src5, InputArray src6, InputArray src4, InputArray src5, InputArray src6,
InputArray src7, InputArray src8, InputArray src9) InputArray src7, InputArray src8, InputArray src9,
OclVectorStrategy strat)
{ {
int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth);
Size ssize = src1.size();
const ocl::Device & d = ocl::Device::getDefault(); const ocl::Device & d = ocl::Device::getDefault();
int ref_type = src1.type();
int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(), int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
d.preferredVectorWidthShort(), d.preferredVectorWidthShort(), d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(), d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth]; d.preferredVectorWidthDouble(), -1 };
// if the device says don't use vectors // if the device says don't use vectors
if (vectorWidths[0] == 1) if (vectorWidths[0] == 1)
{ {
// it's heuristic // it's heuristic
int vectorWidthsOthers[] = { 16, 16, 8, 8, 1, 1, 1, -1 }; vectorWidths[0] = vectorWidths[1] = 4;
kercn = vectorWidthsOthers[depth]; vectorWidths[2] = vectorWidths[3] = 2;
vectorWidths[4] = vectorWidths[5] = vectorWidths[6] = 4;
} }
if (ssize.width * cn < kercn || kercn <= 0)
return 1;
std::vector<size_t> offsets, steps, cols; std::vector<size_t> offsets, steps, cols;
std::vector<int> dividers;
PROCESS_SRC(src1); PROCESS_SRC(src1);
PROCESS_SRC(src2); PROCESS_SRC(src2);
PROCESS_SRC(src3); PROCESS_SRC(src3);
@ -4498,23 +4501,21 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
PROCESS_SRC(src9); PROCESS_SRC(src9);
size_t size = offsets.size(); size_t size = offsets.size();
int wsz = kercn * esz1;
std::vector<int> dividers(size, wsz);
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0) while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
dividers[i] >>= 1; dividers[i] >>= 1;
// default strategy // default strategy
for (size_t i = 0; i < size; ++i) int kercn = *std::min_element(dividers.begin(), dividers.end());
if (dividers[i] != wsz)
{
kercn = 1;
break;
}
// another strategy // another strategy
// width = *std::min_element(dividers.begin(), dividers.end()); // for (size_t i = 0; i < size; ++i)
// if (dividers[i] != wsz)
// {
// kercn = 1;
// break;
// }
return kercn; return kercn;
} }

Loading…
Cancel
Save