Merge pull request #7618 from lupustr3:pvlasov/instrumentation_update

pull/7625/head
Alexander Alekhin 8 years ago
commit b2fa82ffcd
  1. 66
      modules/core/include/opencv2/core/private.hpp
  2. 49
      modules/core/include/opencv2/core/utility.hpp
  3. 6
      modules/core/src/dxt.cpp
  4. 4
      modules/core/src/ocl.cpp
  5. 42
      modules/core/src/parallel.cpp
  6. 2
      modules/core/src/stat.cpp
  7. 98
      modules/core/src/system.cpp
  8. 2
      modules/imgproc/src/canny.cpp
  9. 2
      modules/imgproc/src/color.cpp
  10. 2
      modules/imgproc/src/histogram.cpp
  11. 8
      modules/imgproc/src/imgwarp.cpp
  12. 2
      modules/imgproc/src/smooth.cpp
  13. 81
      modules/ts/src/ts_perf.cpp

@ -457,10 +457,11 @@ class InstrStruct
public: public:
InstrStruct() InstrStruct()
{ {
useInstr = false; useInstr = false;
enableMapping = true; flags = FLAGS_MAPPING;
maxDepth = 0;
rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN); rootNode.m_payload = NodeData("ROOT", NULL, 0, NULL, false, TYPE_GENERAL, IMPL_PLAIN);
tlsStruct.get()->pCurrentNode = &rootNode; tlsStruct.get()->pCurrentNode = &rootNode;
} }
@ -468,7 +469,8 @@ public:
Mutex mutexCount; Mutex mutexCount;
bool useInstr; bool useInstr;
bool enableMapping; int flags;
int maxDepth;
InstrNode rootNode; InstrNode rootNode;
TLSData<InstrTLSStruct> tlsStruct; TLSData<InstrTLSStruct> tlsStruct;
}; };
@ -476,7 +478,7 @@ public:
class CV_EXPORTS IntrumentationRegion class CV_EXPORTS IntrumentationRegion
{ {
public: public:
IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN); IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
~IntrumentationRegion(); ~IntrumentationRegion();
private: private:
@ -484,20 +486,28 @@ private:
uint64 m_regionTicks; uint64 m_regionTicks;
}; };
InstrStruct& getInstrumentStruct(); CV_EXPORTS InstrStruct& getInstrumentStruct();
InstrTLSStruct& getInstrumentTLSStruct(); InstrTLSStruct& getInstrumentTLSStruct();
CV_EXPORTS InstrNode* getCurrentNode(); CV_EXPORTS InstrNode* getCurrentNode();
} }
} }
///// General instrumentation #ifdef _WIN32
#define CV_INSTRUMENT_GET_RETURN_ADDRESS _ReturnAddress()
#else
#define CV_INSTRUMENT_GET_RETURN_ADDRESS __builtin_extract_return_addr(__builtin_return_address(0))
#endif
// Instrument region // Instrument region
#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL); #define CV_INSTRUMENT_REGION_META(NAME, ALWAYS_EXPAND, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, CV_INSTRUMENT_GET_RETURN_ADDRESS, ALWAYS_EXPAND, TYPE, IMPL);
#define CV_INSTRUMENT_REGION_CUSTOM_META(NAME, ALWAYS_EXPAND, TYPE, IMPL)\
void *__curr_address__ = [&]() {return CV_INSTRUMENT_GET_RETURN_ADDRESS;}();\
::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, __curr_address__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
// Instrument functions with non-void return type // Instrument functions with non-void return type
#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\ #define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
{\ {\
if(::cv::instr::useInstrumentation()){\ if(::cv::instr::useInstrumentation()){\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\ ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
try{\ try{\
auto status = ((FUN)(__VA_ARGS__));\ auto status = ((FUN)(__VA_ARGS__));\
if(ERROR_COND){\ if(ERROR_COND){\
@ -518,7 +528,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\ #define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
{\ {\
if(::cv::instr::useInstrumentation()){\ if(::cv::instr::useInstrumentation()){\
::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\ ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\
try{\ try{\
(FUN)(__VA_ARGS__);\ (FUN)(__VA_ARGS__);\
}catch(...){\ }catch(...){\
@ -531,17 +541,19 @@ CV_EXPORTS InstrNode* getCurrentNode();
}\ }\
}()) }())
// Instrumentation information marker // Instrumentation information marker
#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);} #define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, NULL, false, ::cv::instr::TYPE_MARKER, IMPL);}
///// General instrumentation ///// General instrumentation
// General OpenCV region instrumentation macro // General OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) #define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Parallel OpenCV region instrumentation macro // Custom OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) #define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Instrumentation for parallel_for_ or other regions which forks and gathers threads
#define CV_INSTRUMENT_REGION_MT_FORK() CV_INSTRUMENT_REGION_META(__FUNCTION__, true, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN);
///// IPP instrumentation ///// IPP instrumentation
// Wrapper region instrumentation macro // Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP) #define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
// Function instrumentation macro // Function instrumentation macro
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__) #define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
// Diagnostic markers // Diagnostic markers
@ -549,26 +561,28 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// OpenCL instrumentation ///// OpenCL instrumentation
// Wrapper region instrumentation macro // Wrapper region instrumentation macro
#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) #define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
#define CV_INSTRUMENT_REGION_OPENCL_(NAME) CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) // OpenCL kernel compilation wrapper
// Function instrumentation macro #define CV_INSTRUMENT_REGION_OPENCL_COMPILE(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__) // OpenCL kernel run wrapper
#define CV_INSTRUMENT_REGION_OPENCL_RUN(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL)
// Diagnostic markers // Diagnostic markers
#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME) #define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
#else #else
#define CV_INSTRUMENT_REGION_META(...) #define CV_INSTRUMENT_REGION_META(...)
#define CV_INSTRUMENT_REGION() #define CV_INSTRUMENT_REGION()
#define CV_INSTRUMENT_REGION_MT() #define CV_INSTRUMENT_REGION_NAME(...)
#define CV_INSTRUMENT_REGION_MT_FORK()
#define CV_INSTRUMENT_REGION_IPP() #define CV_INSTRUMENT_REGION_IPP()
#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__)) #define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
#define CV_INSTRUMENT_MARK_IPP(NAME) #define CV_INSTRUMENT_MARK_IPP(...)
#define CV_INSTRUMENT_REGION_OPENCL() #define CV_INSTRUMENT_REGION_OPENCL()
#define CV_INSTRUMENT_REGION_OPENCL_(...) #define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...)
#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__)) #define CV_INSTRUMENT_REGION_OPENCL_RUN(...)
#define CV_INSTRUMENT_MARK_OPENCL(NAME) #define CV_INSTRUMENT_MARK_OPENCL(...)
#endif #endif
//! @endcond //! @endcond

@ -1029,7 +1029,7 @@ public:
Node<OBJECT>* findChild(OBJECT& payload) const Node<OBJECT>* findChild(OBJECT& payload) const
{ {
for(int i = 0; i < this->m_childs.size(); i++) for(size_t i = 0; i < this->m_childs.size(); i++)
{ {
if(this->m_childs[i]->m_payload == payload) if(this->m_childs[i]->m_payload == payload)
return this->m_childs[i]; return this->m_childs[i];
@ -1039,10 +1039,10 @@ public:
int findChild(Node<OBJECT> *pNode) const int findChild(Node<OBJECT> *pNode) const
{ {
for (int i = 0; i < this->m_childs.size(); i++) for (size_t i = 0; i < this->m_childs.size(); i++)
{ {
if(this->m_childs[i] == pNode) if(this->m_childs[i] == pNode)
return i; return (int)i;
} }
return -1; return -1;
} }
@ -1059,7 +1059,7 @@ public:
void removeChilds() void removeChilds()
{ {
for(int i = 0; i < m_childs.size(); i++) for(size_t i = 0; i < m_childs.size(); i++)
{ {
m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
delete m_childs[i]; delete m_childs[i];
@ -1067,6 +1067,14 @@ public:
m_childs.clear(); m_childs.clear();
} }
int getDepth()
{
int count = 0;
Node *pParent = m_pParent;
while(pParent) count++, pParent = pParent->m_pParent;
return count;
}
public: public:
OBJECT m_payload; OBJECT m_payload;
Node<OBJECT>* m_pParent; Node<OBJECT>* m_pParent;
@ -1094,10 +1102,19 @@ enum IMPL
IMPL_OPENCL, IMPL_OPENCL,
}; };
struct NodeDataTls
{
NodeDataTls()
{
m_ticksTotal = 0;
}
uint64 m_ticksTotal;
};
class CV_EXPORTS NodeData class CV_EXPORTS NodeData
{ {
public: public:
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
NodeData(NodeData &ref); NodeData(NodeData &ref);
~NodeData(); ~NodeData();
NodeData& operator=(const NodeData&); NodeData& operator=(const NodeData&);
@ -1107,17 +1124,18 @@ public:
cv::instr::IMPL m_implType; cv::instr::IMPL m_implType;
const char* m_fileName; const char* m_fileName;
int m_lineNum; int m_lineNum;
void* m_retAddress;
bool m_alwaysExpand;
bool m_funError;
volatile int m_counter; volatile int m_counter;
volatile uint64 m_ticksTotal; volatile uint64 m_ticksTotal;
TLSData<NodeDataTls> m_tls;
int m_threads;
// No synchronization // No synchronization
double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); } double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
// No synchronization double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); }
bool m_funError;
bool m_stopPoint;
}; };
bool operator==(const NodeData& lhs, const NodeData& rhs); bool operator==(const NodeData& lhs, const NodeData& rhs);
@ -1134,8 +1152,9 @@ CV_EXPORTS void resetTrace();
enum FLAGS enum FLAGS
{ {
FLAGS_NONE = 0, FLAGS_NONE = 0,
FLAGS_MAPPING = 1 << 0, FLAGS_MAPPING = 0x01,
FLAGS_EXPAND_SAME_NAMES = 0x02,
}; };
CV_EXPORTS void setFlags(FLAGS modeFlags); CV_EXPORTS void setFlags(FLAGS modeFlags);

@ -1564,8 +1564,6 @@ public:
virtual void operator()(const Range& range) const virtual void operator()(const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP();
IppStatus status; IppStatus status;
Ipp8u* pBuffer = 0; Ipp8u* pBuffer = 0;
Ipp8u* pMemInit= 0; Ipp8u* pMemInit= 0;
@ -1647,8 +1645,6 @@ public:
virtual void operator()(const Range& range) const virtual void operator()(const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP();
IppStatus status; IppStatus status;
Ipp8u* pBuffer = 0; Ipp8u* pBuffer = 0;
Ipp8u* pMemInit= 0; Ipp8u* pMemInit= 0;
@ -3809,8 +3805,6 @@ public:
virtual void operator()(const Range& range) const virtual void operator()(const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP()
if(*ok == false) if(*ok == false)
return; return;

@ -3450,7 +3450,7 @@ int Kernel::set(int i, const KernelArg& arg)
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
bool sync, const Queue& q) bool sync, const Queue& q)
{ {
CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL); CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
if(!p || !p->handle || p->e != 0) if(!p || !p->handle || p->e != 0)
return false; return false;
@ -3563,7 +3563,7 @@ struct Program::Impl
Impl(const ProgramSource& _src, Impl(const ProgramSource& _src,
const String& _buildflags, String& errmsg) const String& _buildflags, String& errmsg)
{ {
CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str()); CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
refcount = 1; refcount = 1;
const Context& ctx = Context::getDefault(); const Context& ctx = Context::getDefault();
src = _src; src = _src;

@ -144,7 +144,33 @@ namespace cv
namespace namespace
{ {
#ifdef CV_PARALLEL_FRAMEWORK #ifdef CV_PARALLEL_FRAMEWORK
class ParallelLoopBodyWrapper #ifdef ENABLE_INSTRUMENTATION
static void SyncNodes(cv::instr::InstrNode *pNode)
{
std::vector<cv::instr::NodeDataTls*> data;
pNode->m_payload.m_tls.gather(data);
uint64 ticksMax = 0;
int threads = 0;
for(size_t i = 0; i < data.size(); i++)
{
if(data[i] && data[i]->m_ticksTotal)
{
ticksMax = MAX(ticksMax, data[i]->m_ticksTotal);
pNode->m_payload.m_ticksTotal -= data[i]->m_ticksTotal;
data[i]->m_ticksTotal = 0;
threads++;
}
}
pNode->m_payload.m_ticksTotal += ticksMax;
pNode->m_payload.m_threads = MAX(pNode->m_payload.m_threads, threads);
for(size_t i = 0; i < pNode->m_childs.size(); i++)
SyncNodes(pNode->m_childs[i]);
}
#endif
class ParallelLoopBodyWrapper : public cv::ParallelLoopBody
{ {
public: public:
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
@ -159,6 +185,13 @@ namespace
pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode; pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
#endif #endif
} }
#ifdef ENABLE_INSTRUMENTATION
~ParallelLoopBodyWrapper()
{
for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++)
SyncNodes(pThreadRoot->m_childs[i]);
}
#endif
void operator()(const cv::Range& sr) const void operator()(const cv::Range& sr) const
{ {
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
@ -167,6 +200,7 @@ namespace
pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
} }
#endif #endif
CV_INSTRUMENT_REGION()
cv::Range r; cv::Range r;
r.start = (int)(wholeRange.start + r.start = (int)(wholeRange.start +
@ -267,7 +301,9 @@ static SchedPtr pplScheduler;
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{ {
CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_MT_FORK()
if (range.empty())
return;
#ifdef CV_PARALLEL_FRAMEWORK #ifdef CV_PARALLEL_FRAMEWORK
@ -326,7 +362,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
#elif defined HAVE_PTHREADS_PF #elif defined HAVE_PTHREADS_PF
parallel_for_pthreads(range, body, nstripes); parallel_for_pthreads(pbody.stripeRange(), pbody, pbody.stripeRange().size());
#else #else

@ -1597,7 +1597,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
size_t globalsize = groups * wgs; size_t globalsize = groups * wgs;
if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false)) if(!k.run(1, &globalsize, &wgs, false))
return false; return false;
typedef Scalar (* part_sum)(Mat m); typedef Scalar (* part_sum)(Mat m);

@ -1340,7 +1340,7 @@ void resetTrace()
void setFlags(FLAGS modeFlags) void setFlags(FLAGS modeFlags)
{ {
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING); getInstrumentStruct().flags = modeFlags;
#else #else
CV_UNUSED(modeFlags); CV_UNUSED(modeFlags);
#endif #endif
@ -1348,31 +1348,27 @@ void setFlags(FLAGS modeFlags)
FLAGS getFlags() FLAGS getFlags()
{ {
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
int flags = 0; return (FLAGS)getInstrumentStruct().flags;
if(getInstrumentStruct().enableMapping)
flags |= FLAGS_MAPPING;
return (FLAGS)flags;
#else #else
return (FLAGS)0; return (FLAGS)0;
#endif #endif
} }
NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType) NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType)
{ {
m_instrType = TYPE_GENERAL; m_funName = funName;
m_implType = IMPL_PLAIN; m_instrType = instrType;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_retAddress = retAddress;
m_alwaysExpand = alwaysExpand;
m_funName = funName; m_threads = 1;
m_instrType = instrType; m_counter = 0;
m_implType = implType;
m_fileName = fileName;
m_lineNum = lineNum;
m_counter = 0;
m_ticksTotal = 0; m_ticksTotal = 0;
m_funError = false; m_funError = false;
m_stopPoint = false;
} }
NodeData::NodeData(NodeData &ref) NodeData::NodeData(NodeData &ref)
{ {
@ -1380,15 +1376,20 @@ NodeData::NodeData(NodeData &ref)
} }
NodeData& NodeData::operator=(const NodeData &right) NodeData& NodeData::operator=(const NodeData &right)
{ {
this->m_funName = right.m_funName; this->m_funName = right.m_funName;
this->m_instrType = right.m_instrType; this->m_instrType = right.m_instrType;
this->m_implType = right.m_implType; this->m_implType = right.m_implType;
this->m_fileName = right.m_fileName; this->m_fileName = right.m_fileName;
this->m_lineNum = right.m_lineNum; this->m_lineNum = right.m_lineNum;
this->m_retAddress = right.m_retAddress;
this->m_alwaysExpand = right.m_alwaysExpand;
this->m_threads = right.m_threads;
this->m_counter = right.m_counter; this->m_counter = right.m_counter;
this->m_ticksTotal = right.m_ticksTotal; this->m_ticksTotal = right.m_ticksTotal;
this->m_funError = right.m_funError; this->m_funError = right.m_funError;
this->m_stopPoint = right.m_stopPoint;
return *this; return *this;
} }
NodeData::~NodeData() NodeData::~NodeData()
@ -1397,7 +1398,10 @@ NodeData::~NodeData()
bool operator==(const NodeData& left, const NodeData& right) bool operator==(const NodeData& left, const NodeData& right)
{ {
if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName) if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
return true; {
if(left.m_retAddress == right.m_retAddress || !(cv::instr::getFlags()&cv::instr::FLAGS_EXPAND_SAME_NAMES || left.m_alwaysExpand))
return true;
}
return false; return false;
} }
@ -1418,7 +1422,7 @@ InstrNode* getCurrentNode()
return getInstrumentTLSStruct().pCurrentNode; return getInstrumentTLSStruct().pCurrentNode;
} }
IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType) IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType, IMPL implType)
{ {
m_disabled = false; m_disabled = false;
m_regionTicks = 0; m_regionTicks = 0;
@ -1435,14 +1439,17 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file
return; return;
} }
m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint; int depth = pTLS->pCurrentNode->getDepth();
if(m_disabled) if(pStruct->maxDepth && pStruct->maxDepth <= depth)
{
m_disabled = true;
return; return;
}
NodeData payload(funName, fileName, lineNum, instrType, implType); NodeData payload(funName, fileName, lineNum, retAddress, alwaysExpand, instrType, implType);
Node<NodeData>* pChild = NULL; Node<NodeData>* pChild = NULL;
if(pStruct->enableMapping) if(pStruct->flags&FLAGS_MAPPING)
{ {
// Critical section // Critical section
cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
@ -1458,7 +1465,7 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file
pChild = pTLS->pCurrentNode->findChild(payload); pChild = pTLS->pCurrentNode->findChild(payload);
if(!pChild) if(!pChild)
{ {
pTLS->pCurrentNode->m_payload.m_stopPoint = true; m_disabled = true;
return; return;
} }
} }
@ -1476,28 +1483,23 @@ IntrumentationRegion::~IntrumentationRegion()
if(!m_disabled) if(!m_disabled)
{ {
InstrTLSStruct *pTLS = &getInstrumentTLSStruct(); InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
if(pTLS->pCurrentNode->m_payload.m_stopPoint)
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{ {
pTLS->pCurrentNode->m_payload.m_stopPoint = false; cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
} }
else
{
if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
(pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
{
cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
}
uint64 ticks = (getTickCount() - m_regionTicks); uint64 ticks = (getTickCount() - m_regionTicks);
{ {
cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
pTLS->pCurrentNode->m_payload.m_counter++; pTLS->pCurrentNode->m_payload.m_counter++;
pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks; pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
} pTLS->pCurrentNode->m_payload.m_tls.get()->m_ticksTotal += ticks;
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
} }
pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
} }
} }
} }

@ -142,6 +142,8 @@ template <bool useCustomDeriv>
static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh, static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh,
int aperture_size, bool L2gradient, int cn, const Size & size) int aperture_size, bool L2gradient, int cn, const Size & size)
{ {
CV_INSTRUMENT_REGION_OPENCL()
UMat map; UMat map;
const ocl::Device &dev = ocl::Device::getDefault(); const ocl::Device &dev = ocl::Device::getDefault();

@ -259,8 +259,6 @@ public:
virtual void operator()(const Range& range) const virtual void operator()(const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP();
const void *yS = src_data + src_step * range.start; const void *yS = src_data + src_step * range.start;
void *yD = dst_data + dst_step * range.start; void *yD = dst_data + dst_step * range.start;
if( !cvt(yS, static_cast<int>(src_step), yD, static_cast<int>(dst_step), width, range.end - range.start) ) if( !cvt(yS, static_cast<int>(src_step), yD, static_cast<int>(dst_step), width, range.end - range.start) )

@ -1188,8 +1188,6 @@ public:
virtual void operator() (const Range & range) const virtual void operator() (const Range & range) const
{ {
CV_INSTRUMENT_REGION_IPP()
Ipp32s levelNum = histSize + 1; Ipp32s levelNum = histSize + 1;
Mat phist(hist->size(), hist->type(), Scalar::all(0)); Mat phist(hist->size(), hist->type(), Scalar::all(0));
#if IPP_VERSION_X100 >= 900 #if IPP_VERSION_X100 >= 900

@ -2795,8 +2795,6 @@ public:
virtual void operator() (const Range& range) const virtual void operator() (const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP()
if (*ok == false) if (*ok == false)
return; return;
@ -4772,8 +4770,6 @@ public:
virtual void operator() (const Range & range) const virtual void operator() (const Range & range) const
{ {
CV_INSTRUMENT_REGION_IPP()
IppiRect srcRoiRect = { 0, 0, src.cols, src.rows }; IppiRect srcRoiRect = { 0, 0, src.cols, src.rows };
Mat dstRoi = dst.rowRange(range); Mat dstRoi = dst.rowRange(range);
IppiSize dstRoiSize = ippiSize(dstRoi.size()); IppiSize dstRoiSize = ippiSize(dstRoi.size());
@ -5609,8 +5605,6 @@ public:
virtual void operator() (const Range& range) const virtual void operator() (const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP()
IppiSize srcsize = { src.cols, src.rows }; IppiSize srcsize = { src.cols, src.rows };
IppiRect srcroi = { 0, 0, src.cols, src.rows }; IppiRect srcroi = { 0, 0, src.cols, src.rows };
IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start }; IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
@ -6254,8 +6248,6 @@ public:
virtual void operator() (const Range& range) const virtual void operator() (const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP()
IppiSize srcsize = {src.cols, src.rows}; IppiSize srcsize = {src.cols, src.rows};
IppiRect srcroi = {0, 0, src.cols, src.rows}; IppiRect srcroi = {0, 0, src.cols, src.rows};
IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start}; IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};

@ -3368,8 +3368,6 @@ public:
virtual void operator() (const Range& range) const virtual void operator() (const Range& range) const
{ {
CV_INSTRUMENT_REGION_IPP()
int d = radius * 2 + 1; int d = radius * 2 + 1;
IppiSize kernel = {d, d}; IppiSize kernel = {d, d};
IppiSize roi={dst.cols, range.end - range.start}; IppiSize roi={dst.cols, range.end - range.start};

@ -46,7 +46,7 @@ static bool param_verify_sanity;
static bool param_collect_impl; static bool param_collect_impl;
#endif #endif
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
static bool param_instrument; static int param_instrument;
#endif #endif
extern bool test_ipp_check; extern bool test_ipp_check;
@ -744,7 +744,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
} }
} }
// Check if parents have more childs // Check if parents have more childes
std::vector<cv::instr::InstrNode*> cache; std::vector<cv::instr::InstrNode*> cache;
cv::instr::InstrNode *pTmpNode = pNode; cv::instr::InstrNode *pTmpNode = pNode;
while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot) while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
@ -756,7 +756,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
{ {
if(cache[i]->m_pParent) if(cache[i]->m_pParent)
{ {
if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1) if(cache[i]->m_pParent->findChild(cache[i]) == (int)cache[i]->m_pParent->m_childs.size()-1)
printf(" "); printf(" ");
else else
printf("| "); printf("| ");
@ -810,48 +810,39 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo
if(pNode->m_pParent) if(pNode->m_pParent)
{ {
printf(" - C:%d", pNode->m_payload.m_counter); printf(" - TC:%d C:%d", pNode->m_payload.m_threads, pNode->m_payload.m_counter);
printf(" T:%.4fms", pNode->m_payload.getMeanMs()); printf(" T:%.2fms", pNode->m_payload.getTotalMs());
if(pNode->m_pParent->m_pParent) if(pNode->m_pParent->m_pParent)
printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode)); printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
} }
printf("\n"); printf("\n");
// Group childes
std::vector<cv::String> groups;
{ {
bool bFound = false; // Group childes by name
for(size_t i = 0; i < pNode->m_childs.size(); i++) for(size_t i = 1; i < pNode->m_childs.size(); i++)
{ {
bFound = false; if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[i]->m_payload.m_funName )
for(size_t j = 0; j < groups.size(); j++) continue;
for(size_t j = i+1; j < pNode->m_childs.size(); j++)
{ {
if(groups[j] == pNode->m_childs[i]->m_payload.m_funName) if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[j]->m_payload.m_funName )
{ {
bFound = true; cv::swap(pNode->m_childs[i], pNode->m_childs[j]);
break; i++;
} }
} }
if(!bFound)
groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
} }
} }
for(size_t g = 0; g < groups.size(); g++) for(size_t i = 0; i < pNode->m_childs.size(); i++)
{ {
for(size_t i = 0; i < pNode->m_childs.size(); i++) printShift(pNode->m_childs[i], pRoot);
{
if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
{
printShift(pNode->m_childs[i], pRoot);
if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i])) if(i == pNode->m_childs.size()-1)
printf("\\---"); printf("\\---");
else else
printf("|---"); printf("|---");
printNodeRec(pNode->m_childs[i], pRoot); printNodeRec(pNode->m_childs[i], pRoot);
}
}
} }
} }
@ -871,7 +862,7 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode)
else else
{ {
string = "#"; string = "#";
string += std::to_string(pNode->m_payload.m_instrType); string += std::to_string((int)pNode->m_payload.m_instrType);
string += pNode->m_payload.m_funName; string += pNode->m_payload.m_funName;
string += " - L:"; string += " - L:";
string += to_string_with_precision(calcLocalWeight(pNode)); string += to_string_with_precision(calcLocalWeight(pNode));
@ -931,19 +922,16 @@ static uint64 getTotalTime()
void InstumentData::printTree() void InstumentData::printTree()
{ {
if(cv::instr::getTrace()->m_childs.size()) printf("[ TRACE ]\n");
{ printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
printf("[ TRACE ]\n");
printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
#ifdef HAVE_IPP #ifdef HAVE_IPP
printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())); printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
#endif #endif
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())); printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
#endif #endif
printf("\n[/TRACE ]\n"); printf("\n[/TRACE ]\n");
fflush(stdout); fflush(stdout);
}
} }
#endif #endif
@ -994,7 +982,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
"{ perf_collect_impl |false |collect info about executed implementations}" "{ perf_collect_impl |false |collect info about executed implementations}"
#endif #endif
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
"{ perf_instrument |false |instrument code to collect implementations trace}" "{ perf_instrument |0 |instrument code to collect implementations trace: 1 - perform instrumentation; 2 - separate functions with the same name }"
#endif #endif
"{ help h |false |print help info}" "{ help h |false |print help info}"
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
@ -1048,7 +1036,7 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
param_collect_impl = args.get<bool>("perf_collect_impl"); param_collect_impl = args.get<bool>("perf_collect_impl");
#endif #endif
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
param_instrument = args.get<bool>("perf_instrument"); param_instrument = args.get<int>("perf_instrument");
#endif #endif
#ifdef ANDROID #ifdef ANDROID
param_affinity_mask = args.get<int>("perf_affinity_mask"); param_affinity_mask = args.get<int>("perf_affinity_mask");
@ -1081,8 +1069,12 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
cv::setUseCollection(0); cv::setUseCollection(0);
#endif #endif
#ifdef ENABLE_INSTRUMENTATION #ifdef ENABLE_INSTRUMENTATION
if(param_instrument) if(param_instrument > 0)
{
if(param_instrument == 2)
cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_EXPAND_SAME_NAMES);
cv::instr::setUseInstrumentation(true); cv::instr::setUseInstrumentation(true);
}
else else
cv::instr::setUseInstrumentation(false); cv::instr::setUseInstrumentation(false);
#endif #endif
@ -1856,6 +1848,11 @@ void TestBase::TearDown()
if (HasFailure()) if (HasFailure())
{ {
reportMetrics(false); reportMetrics(false);
#ifdef ENABLE_INSTRUMENTATION
if(cv::instr::useInstrumentation())
InstumentData::printTree();
#endif
return; return;
} }
} }

Loading…
Cancel
Save