mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1146 lines
34 KiB
1146 lines
34 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
|
|
#include "precomp.hpp" |
|
|
|
#include <opencv2/core/utils/trace.hpp> |
|
#include <opencv2/core/utils/trace.private.hpp> |
|
#include <opencv2/core/utils/configuration.private.hpp> |
|
|
|
#include <opencv2/core/opencl/ocl_defs.hpp> |
|
|
|
#include <cstdarg> // va_start |
|
|
|
#include <sstream> |
|
#include <ostream> |
|
#include <fstream> |
|
|
|
#if 0 |
|
#define CV_LOG(...) CV_LOG_INFO(NULL, __VA_ARGS__) |
|
#else |
|
#define CV_LOG(...) {} |
|
#endif |
|
|
|
#if 0 |
|
#define CV_LOG_ITT(...) CV_LOG_INFO(NULL, __VA_ARGS__) |
|
#else |
|
#define CV_LOG_ITT(...) {} |
|
#endif |
|
|
|
#if 1 |
|
#define CV_LOG_TRACE_BAILOUT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) |
|
#else |
|
#define CV_LOG_TRACE_BAILOUT(...) {} |
|
#endif |
|
|
|
#if 0 |
|
#define CV_LOG_PARALLEL(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) |
|
#else |
|
#define CV_LOG_PARALLEL(...) {} |
|
#endif |
|
|
|
#if 0 |
|
#define CV_LOG_CTX_STAT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) |
|
#else |
|
#define CV_LOG_CTX_STAT(...) {} |
|
#endif |
|
|
|
#if 0 |
|
#define CV_LOG_SKIP(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) |
|
#else |
|
#define CV_LOG_SKIP(...) {} |
|
#endif |
|
|
|
namespace cv { |
|
namespace utils { |
|
namespace trace { |
|
namespace details { |
|
|
|
#ifdef OPENCV_TRACE |
|
|
|
#ifdef _MSC_VER |
|
#pragma warning(disable:4065) // switch statement contains 'default' but no 'case' labels |
|
#endif |
|
|
|
static int64 g_zero_timestamp = 0; |
|
|
|
static int64 getTimestamp() |
|
{ |
|
int64 t = getTickCount(); |
|
static double tick_to_ns = 1e9 / getTickFrequency(); |
|
return (int64)((t - g_zero_timestamp) * tick_to_ns); |
|
} |
|
|
|
// TODO lazy configuration flags |
|
static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false); |
|
|
|
static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1); |
|
static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000); |
|
static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000); |
|
static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace"); |
|
|
|
#ifdef HAVE_OPENCL |
|
static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool("OPENCV_TRACE_SYNC_OPENCL", false); |
|
#endif |
|
|
|
#ifdef OPENCV_WITH_ITT |
|
static bool param_ITT_registerParentScope = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_PARENT", false); |
|
#endif |
|
|
|
static const char* _spaces(int count) |
|
{ |
|
static const char buf[64] = |
|
" "; |
|
return &buf[63 - (count & 63)]; |
|
} |
|
|
|
/** |
|
* Text-based trace messages |
|
*/ |
|
class TraceMessage |
|
{ |
|
public: |
|
char buffer[1024]; |
|
size_t len; |
|
bool hasError; |
|
|
|
TraceMessage() : |
|
len(0), |
|
hasError(false) |
|
{} |
|
|
|
bool printf(const char* format, ...) |
|
{ |
|
char* buf = &buffer[len]; |
|
size_t sz = sizeof(buffer) - len; |
|
va_list ap; |
|
va_start(ap, format); |
|
int n = cv_vsnprintf(buf, (int)sz, format, ap); |
|
va_end(ap); |
|
if (n < 0 || (size_t)n > sz) |
|
{ |
|
hasError = true; |
|
return false; |
|
} |
|
len += n; |
|
return true; |
|
} |
|
|
|
bool formatlocation(const Region::LocationStaticStorage& location) |
|
{ |
|
return this->printf("l,%lld,\"%s\",%d,\"%s\",0x%llX\n", |
|
(long long int)(*location.ppExtra)->global_location_id, |
|
location.filename, |
|
location.line, |
|
location.name, |
|
(long long int)(location.flags & ~0xF0000000)); |
|
} |
|
bool formatRegionEnter(const Region& region) |
|
{ |
|
bool ok = this->printf("b,%d,%lld,%lld,%lld", |
|
(int)region.pImpl->threadID, |
|
(long long int)region.pImpl->beginTimestamp, |
|
(long long int)((*region.pImpl->location.ppExtra)->global_location_id), |
|
(long long int)region.pImpl->global_region_id); |
|
if (region.pImpl->parentRegion && region.pImpl->parentRegion->pImpl) |
|
{ |
|
if (region.pImpl->parentRegion->pImpl->threadID != region.pImpl->threadID) |
|
ok &= this->printf(",parentThread=%d,parent=%lld", |
|
(int)region.pImpl->parentRegion->pImpl->threadID, |
|
(long long int)region.pImpl->parentRegion->pImpl->global_region_id); |
|
} |
|
ok &= this->printf("\n"); |
|
return ok; |
|
} |
|
bool formatRegionLeave(const Region& region, const RegionStatistics& result) |
|
{ |
|
CV_DbgAssert(region.pImpl->endTimestamp - region.pImpl->beginTimestamp == result.duration); |
|
bool ok = this->printf("e,%d,%lld,%lld,%lld,%lld", |
|
(int)region.pImpl->threadID, |
|
(long long int)region.pImpl->endTimestamp, |
|
(long long int)(*region.pImpl->location.ppExtra)->global_location_id, |
|
(long long int)region.pImpl->global_region_id, |
|
(long long int)result.duration); |
|
if (result.currentSkippedRegions) |
|
ok &= this->printf(",skip=%d", (int)result.currentSkippedRegions); |
|
#ifdef HAVE_IPP |
|
if (result.durationImplIPP) |
|
ok &= this->printf(",tIPP=%lld", (long long int)result.durationImplIPP); |
|
#endif |
|
#ifdef HAVE_OPENCL |
|
if (result.durationImplOpenCL) |
|
ok &= this->printf(",tOCL=%lld", (long long int)result.durationImplOpenCL); |
|
#endif |
|
#ifdef HAVE_OPENVX |
|
if (result.durationImplOpenVX) |
|
ok &= this->printf(",tOVX=%lld", (long long int)result.durationImplOpenVX); |
|
#endif |
|
ok &= this->printf("\n"); |
|
return ok; |
|
} |
|
bool recordRegionArg(const Region& region, const TraceArg& arg, const char* value) |
|
{ |
|
return this->printf("a,%d,%lld,%lld,\"%s\",\"%s\"\n", |
|
region.pImpl->threadID, |
|
(long long int)region.pImpl->beginTimestamp, |
|
(long long int)region.pImpl->global_region_id, |
|
arg.name, |
|
value); |
|
} |
|
}; |
|
|
|
|
|
#ifdef OPENCV_WITH_ITT |
|
static __itt_domain* domain = NULL; |
|
|
|
static bool isITTEnabled() |
|
{ |
|
static volatile bool isInitialized = false; |
|
static bool isEnabled = false; |
|
if (!isInitialized) |
|
{ |
|
cv::AutoLock lock(cv::getInitializationMutex()); |
|
if (!isInitialized) |
|
{ |
|
bool param_traceITTEnable = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_ENABLE", true); |
|
if (param_traceITTEnable) |
|
{ |
|
isEnabled = !!(__itt_api_version()); |
|
CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled")); |
|
domain = __itt_domain_create("OpenCVTrace"); |
|
} |
|
else |
|
{ |
|
CV_LOG_ITT("ITT is disabled through OpenCV parameter"); |
|
isEnabled = false; |
|
} |
|
isInitialized = true; |
|
} |
|
} |
|
return isEnabled; |
|
} |
|
#endif |
|
|
|
|
|
Region::LocationExtraData::LocationExtraData(const LocationStaticStorage& location) |
|
{ |
|
CV_UNUSED(location); |
|
static int g_location_id_counter = 0; |
|
global_location_id = CV_XADD(&g_location_id_counter, 1) + 1; |
|
CV_LOG("Register location: " << global_location_id << " (" << (void*)&location << ")" |
|
<< std::endl << " file: " << location.filename |
|
<< std::endl << " line: " << location.line |
|
<< std::endl << " name: " << location.name); |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
// Caching is not required here, because there is builtin cache. |
|
// https://software.intel.com/en-us/node/544203: |
|
// Consecutive calls to __itt_string_handle_create with the same name return the same value. |
|
ittHandle_name = __itt_string_handle_create(location.name); |
|
ittHandle_filename = __itt_string_handle_create(location.filename); |
|
} |
|
else |
|
{ |
|
ittHandle_name = 0; |
|
ittHandle_filename = 0; |
|
} |
|
#endif |
|
} |
|
|
|
/*static*/ Region::LocationExtraData* Region::LocationExtraData::init(const Region::LocationStaticStorage& location) |
|
{ |
|
LocationExtraData** pLocationExtra = location.ppExtra; |
|
CV_DbgAssert(pLocationExtra); |
|
if (*pLocationExtra == NULL) |
|
{ |
|
cv::AutoLock lock(cv::getInitializationMutex()); |
|
if (*pLocationExtra == NULL) |
|
{ |
|
*pLocationExtra = new Region::LocationExtraData(location); |
|
TraceStorage* s = getTraceManager().trace_storage.get(); |
|
if (s) |
|
{ |
|
TraceMessage msg; |
|
msg.formatlocation(location); |
|
s->put(msg); |
|
} |
|
} |
|
} |
|
return *pLocationExtra; |
|
} |
|
|
|
|
|
Region::Impl::Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_) : |
|
location(location_), |
|
region(region_), |
|
parentRegion(parentRegion_), |
|
threadID(ctx.threadID), |
|
global_region_id(++ctx.region_counter), |
|
beginTimestamp(beginTimestamp_), |
|
endTimestamp(0), |
|
directChildrenCount(0) |
|
#ifdef OPENCV_WITH_ITT |
|
,itt_id_registered(false) |
|
,itt_id(__itt_null) |
|
#endif |
|
{ |
|
CV_DbgAssert(ctx.currentActiveRegion == parentRegion); |
|
region.pImpl = this; |
|
|
|
registerRegion(ctx); |
|
|
|
enterRegion(ctx); |
|
} |
|
|
|
Region::Impl::~Impl() |
|
{ |
|
#ifdef OPENCV_WITH_ITT |
|
if (itt_id_registered) |
|
{ |
|
CV_LOG_ITT(" Destroy ITT region: I=" << (void*)this); |
|
__itt_id_destroy(domain, itt_id); |
|
itt_id_registered = false; |
|
} |
|
#endif |
|
region.pImpl = NULL; |
|
} |
|
|
|
void Region::Impl::enterRegion(TraceManagerThreadLocal& ctx) |
|
{ |
|
ctx.currentActiveRegion = ®ion; |
|
|
|
if (location.flags & REGION_FLAG_FUNCTION) |
|
{ |
|
if ((location.flags & REGION_FLAG_APP_CODE) == 0) |
|
{ |
|
ctx.regionDepthOpenCV++; |
|
} |
|
ctx.regionDepth++; |
|
} |
|
|
|
TraceStorage* s = ctx.getStorage(); |
|
if (s) |
|
{ |
|
TraceMessage msg; |
|
msg.formatRegionEnter(region); |
|
s->put(msg); |
|
} |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
__itt_id parentID = __itt_null; |
|
if (param_ITT_registerParentScope && parentRegion && parentRegion->pImpl && parentRegion->pImpl->itt_id_registered && (location.flags & REGION_FLAG_REGION_FORCE) == 0) |
|
parentID = parentRegion->pImpl->itt_id; |
|
__itt_task_begin(domain, itt_id, parentID, (*location.ppExtra)->ittHandle_name); |
|
} |
|
#endif |
|
} |
|
|
|
void Region::Impl::leaveRegion(TraceManagerThreadLocal& ctx) |
|
{ |
|
int64 duration = endTimestamp - beginTimestamp; CV_UNUSED(duration); |
|
RegionStatistics result; |
|
ctx.stat.grab(result); |
|
ctx.totalSkippedEvents += result.currentSkippedRegions; |
|
CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "leaveRegion(): " << (void*)this << " " << result); |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
if (result.currentSkippedRegions) |
|
{ |
|
__itt_metadata_add(domain, itt_id, __itt_string_handle_create("skipped trace entries"), __itt_metadata_u32, 1, &result.currentSkippedRegions); |
|
} |
|
#ifdef HAVE_IPP |
|
if (result.durationImplIPP) |
|
__itt_metadata_add(domain, itt_id, __itt_string_handle_create("tIPP"), __itt_metadata_u64, 1, &result.durationImplIPP); |
|
#endif |
|
#ifdef HAVE_OPENCL |
|
if (result.durationImplOpenCL) |
|
__itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenCL"), __itt_metadata_u64, 1, &result.durationImplOpenCL); |
|
#endif |
|
#ifdef HAVE_OPENVX |
|
if (result.durationImplOpenVX) |
|
__itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenVX"), __itt_metadata_u64, 1, &result.durationImplOpenVX); |
|
#endif |
|
__itt_task_end(domain); |
|
} |
|
#endif |
|
TraceStorage* s = ctx.getStorage(); |
|
if (s) |
|
{ |
|
TraceMessage msg; |
|
msg.formatRegionLeave(region, result); |
|
s->put(msg); |
|
} |
|
|
|
if (location.flags & REGION_FLAG_FUNCTION) |
|
{ |
|
if ((location.flags & REGION_FLAG_APP_CODE) == 0) |
|
{ |
|
ctx.regionDepthOpenCV--; |
|
} |
|
ctx.regionDepth--; |
|
} |
|
|
|
ctx.currentActiveRegion = parentRegion; |
|
} |
|
|
|
void Region::Impl::release() |
|
{ |
|
delete this; |
|
} |
|
|
|
void Region::Impl::registerRegion(TraceManagerThreadLocal& ctx) |
|
{ |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
if (!itt_id_registered) |
|
{ |
|
CV_LOG_ITT(" Register ITT region: I=" << (void*)this << " " << ctx.threadID << "-" << global_region_id); |
|
#if 1 // workaround for some ITT backends |
|
itt_id = __itt_id_make((void*)(intptr_t)(((int64)(ctx.threadID + 1) << 32) | global_region_id), global_region_id); |
|
#else |
|
itt_id = __itt_id_make((void*)(intptr_t)(ctx.threadID + 1), global_region_id); |
|
#endif |
|
__itt_id_create(domain, itt_id); |
|
itt_id_registered = true; |
|
} |
|
} |
|
#else |
|
CV_UNUSED(ctx); |
|
#endif |
|
} |
|
|
|
void RegionStatisticsStatus::enableSkipMode(int depth) |
|
{ |
|
CV_DbgAssert(_skipDepth < 0); |
|
CV_LOG_SKIP(NULL, "SKIP-ENABLE: depth=" << depth); |
|
_skipDepth = depth; |
|
} |
|
void RegionStatisticsStatus::checkResetSkipMode(int leaveDepth) |
|
{ |
|
if (leaveDepth <= _skipDepth) |
|
{ |
|
CV_LOG_SKIP(NULL, "SKIP-RESET: leaveDepth=" << leaveDepth << " skipDepth=" << _skipDepth); |
|
_skipDepth = -1; |
|
} |
|
} |
|
|
|
Region::Region(const LocationStaticStorage& location) : |
|
pImpl(NULL), |
|
implFlags(0) |
|
{ |
|
// Checks: |
|
// - global enable flag |
|
// - parent region is disabled |
|
// - children count threshold |
|
// - region location |
|
// - depth (opencv nested calls) |
|
if (!TraceManager::isActivated()) |
|
{ |
|
CV_LOG("Trace is disabled. Bailout"); |
|
return; |
|
} |
|
|
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region(): " << (void*)this << ": " << location.name); |
|
|
|
Region* parentRegion = ctx.stackTopRegion(); |
|
const Region::LocationStaticStorage* parentLocation = ctx.stackTopLocation(); |
|
|
|
if (location.flags & REGION_FLAG_REGION_NEXT) |
|
{ |
|
if (parentRegion && parentRegion->pImpl) |
|
{ |
|
CV_DbgAssert((parentRegion->pImpl->location.flags & REGION_FLAG_FUNCTION) == 0); |
|
parentRegion->destroy(); parentRegion->implFlags = 0; |
|
parentRegion = ctx.stackTopRegion(); |
|
parentLocation = ctx.stackTopLocation(); |
|
} |
|
} |
|
|
|
int parentChildren = 0; |
|
if (parentRegion && parentRegion->pImpl) |
|
{ |
|
if (parentLocation == NULL) |
|
{ |
|
// parallel_for_body code path |
|
parentChildren = CV_XADD(&parentRegion->pImpl->directChildrenCount, 1) + 1; |
|
} |
|
else |
|
{ |
|
parentChildren = ++parentRegion->pImpl->directChildrenCount; |
|
} |
|
} |
|
|
|
int64 beginTimestamp = getTimestamp(); |
|
|
|
int currentDepth = ctx.getCurrentDepth() + 1; |
|
switch (location.flags & REGION_FLAG_IMPL_MASK) |
|
{ |
|
#ifdef HAVE_IPP |
|
case REGION_FLAG_IMPL_IPP: |
|
if (!ctx.stat_status.ignoreDepthImplIPP) |
|
ctx.stat_status.ignoreDepthImplIPP = currentDepth; |
|
break; |
|
#endif |
|
#ifdef HAVE_OPENCL |
|
case REGION_FLAG_IMPL_OPENCL: |
|
if (!ctx.stat_status.ignoreDepthImplOpenCL) |
|
ctx.stat_status.ignoreDepthImplOpenCL = currentDepth; |
|
break; |
|
#endif |
|
#ifdef HAVE_OPENVX |
|
case REGION_FLAG_IMPL_OPENVX: |
|
if (!ctx.stat_status.ignoreDepthImplOpenVX) |
|
ctx.stat_status.ignoreDepthImplOpenVX = currentDepth; |
|
break; |
|
#endif |
|
default: |
|
break; |
|
} |
|
|
|
ctx.stackPush(this, &location, beginTimestamp); |
|
implFlags |= REGION_FLAG__NEED_STACK_POP; |
|
|
|
if ((location.flags & REGION_FLAG_REGION_FORCE) == 0) |
|
{ |
|
if (ctx.stat_status._skipDepth >= 0 && currentDepth > ctx.stat_status._skipDepth) |
|
{ |
|
CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region is disabled. Bailout"); |
|
ctx.stat.currentSkippedRegions++; |
|
return; |
|
} |
|
|
|
if (param_maxRegionChildrenOpenCV > 0 && (location.flags & REGION_FLAG_APP_CODE) == 0 && parentLocation && (parentLocation->flags & REGION_FLAG_APP_CODE) == 0) |
|
{ |
|
if (parentChildren >= param_maxRegionChildrenOpenCV) |
|
{ |
|
CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "OpenCV parent region exceeds children count. Bailout"); |
|
ctx.stat_status.enableSkipMode(currentDepth - 1); |
|
ctx.stat.currentSkippedRegions++; |
|
DEBUG_ONLY(ctx.dumpStack(std::cout, false)); |
|
return; |
|
} |
|
} |
|
if (param_maxRegionChildren > 0 && parentChildren >= param_maxRegionChildren) |
|
{ |
|
CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Parent region exceeds children count. Bailout"); |
|
ctx.stat_status.enableSkipMode(currentDepth - 1); |
|
ctx.stat.currentSkippedRegions++; |
|
DEBUG_ONLY(ctx.dumpStack(std::cout, false)); |
|
return; |
|
} |
|
} |
|
|
|
LocationExtraData::init(location); |
|
|
|
if ((*location.ppExtra)->global_location_id == 0) |
|
{ |
|
CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Region location is disabled. Bailout"); |
|
ctx.stat_status.enableSkipMode(currentDepth); |
|
ctx.stat.currentSkippedRegions++; |
|
return; |
|
} |
|
|
|
if (parentLocation && (parentLocation->flags & REGION_FLAG_SKIP_NESTED)) |
|
{ |
|
CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region disables inner regions. Bailout"); |
|
ctx.stat_status.enableSkipMode(currentDepth); |
|
ctx.stat.currentSkippedRegions++; |
|
return; |
|
} |
|
|
|
if (param_maxRegionDepthOpenCV) |
|
{ |
|
if ((location.flags & REGION_FLAG_APP_CODE) == 0) |
|
{ |
|
if (ctx.regionDepthOpenCV >= param_maxRegionDepthOpenCV) |
|
{ |
|
CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "OpenCV region depth is exceed = " << ctx.regionDepthOpenCV << ". Bailout"); |
|
if (ctx.stat.currentSkippedRegions == 0) |
|
{ |
|
DEBUG_ONLY(ctx.dumpStack(std::cout, false)); |
|
} |
|
ctx.stat_status.enableSkipMode(currentDepth); |
|
ctx.stat.currentSkippedRegions++; |
|
return; |
|
} |
|
} |
|
} |
|
|
|
new Impl(ctx, parentRegion, *this, location, beginTimestamp); |
|
CV_DbgAssert(pImpl != NULL); |
|
implFlags |= REGION_FLAG__ACTIVE; |
|
|
|
// parallel_for path |
|
if (parentRegion && parentRegion->pImpl) |
|
{ |
|
if (parentLocation == NULL) |
|
{ |
|
pImpl->directChildrenCount = parentChildren; |
|
} |
|
} |
|
} |
|
|
|
void Region::destroy() |
|
{ |
|
CV_DbgAssert(implFlags != 0); |
|
|
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region::destruct(): " << (void*)this << " pImpl=" << pImpl << " implFlags=" << implFlags << ' ' << (ctx.stackTopLocation() ? ctx.stackTopLocation()->name : "<unknown>")); |
|
|
|
CV_DbgAssert(implFlags & REGION_FLAG__NEED_STACK_POP); |
|
const int currentDepth = ctx.getCurrentDepth(); CV_UNUSED(currentDepth); |
|
|
|
CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << ctx.stat << ' ' << ctx.stat_status); |
|
|
|
const Region::LocationStaticStorage* location = ctx.stackTopLocation(); |
|
Impl::OptimizationPath myCodePath = Impl::CODE_PATH_PLAIN; |
|
if (location) |
|
{ |
|
switch (location->flags & REGION_FLAG_IMPL_MASK) |
|
{ |
|
#ifdef HAVE_IPP |
|
case REGION_FLAG_IMPL_IPP: |
|
myCodePath = Impl::CODE_PATH_IPP; |
|
break; |
|
#endif |
|
#ifdef HAVE_OPENCL |
|
case REGION_FLAG_IMPL_OPENCL: |
|
if (param_synchronizeOpenCL && cv::ocl::isOpenCLActivated()) |
|
cv::ocl::finish(); |
|
myCodePath = Impl::CODE_PATH_OPENCL; |
|
break; |
|
#endif |
|
#ifdef HAVE_OPENVX |
|
case REGION_FLAG_IMPL_OPENVX: |
|
myCodePath = Impl::CODE_PATH_OPENVX; |
|
break; |
|
#endif |
|
default: |
|
break; |
|
} |
|
} |
|
|
|
int64 endTimestamp = getTimestamp(); |
|
int64 duration = endTimestamp - ctx.stackTopBeginTimestamp(); |
|
|
|
bool active = isActive(); |
|
|
|
if (active) |
|
ctx.stat.duration = duration; |
|
else if (ctx.stack.size() == ctx.parallel_for_stack_size + 1) |
|
ctx.stat.duration += duration; |
|
|
|
switch (myCodePath) { |
|
case Impl::CODE_PATH_PLAIN: |
|
// nothing |
|
break; |
|
#ifdef HAVE_IPP |
|
case Impl::CODE_PATH_IPP: |
|
if (ctx.stat_status.ignoreDepthImplIPP == currentDepth) |
|
{ |
|
ctx.stat.durationImplIPP += duration; |
|
ctx.stat_status.ignoreDepthImplIPP = 0; |
|
} |
|
else if (active) |
|
{ |
|
ctx.stat.durationImplIPP = duration; |
|
} |
|
break; |
|
#endif |
|
#ifdef HAVE_OPENCL |
|
case Impl::CODE_PATH_OPENCL: |
|
if (ctx.stat_status.ignoreDepthImplOpenCL == currentDepth) |
|
{ |
|
ctx.stat.durationImplOpenCL += duration; |
|
ctx.stat_status.ignoreDepthImplOpenCL = 0; |
|
} |
|
else if (active) |
|
{ |
|
ctx.stat.durationImplOpenCL = duration; |
|
} |
|
break; |
|
#endif |
|
#ifdef HAVE_OPENVX |
|
case Impl::CODE_PATH_OPENVX: |
|
if (ctx.stat_status.ignoreDepthImplOpenVX == currentDepth) |
|
{ |
|
ctx.stat.durationImplOpenVX += duration; |
|
ctx.stat_status.ignoreDepthImplOpenVX = 0; |
|
} |
|
else if (active) |
|
{ |
|
ctx.stat.durationImplOpenVX = duration; |
|
} |
|
break; |
|
#endif |
|
default: |
|
break; |
|
} |
|
|
|
if (pImpl) |
|
{ |
|
CV_DbgAssert((implFlags & (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)) == (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)); |
|
CV_DbgAssert(ctx.stackTopRegion() == this); |
|
pImpl->endTimestamp = endTimestamp; |
|
pImpl->leaveRegion(ctx); |
|
pImpl->release(); |
|
pImpl = NULL; |
|
DEBUG_ONLY(implFlags &= ~REGION_FLAG__ACTIVE); |
|
} |
|
else |
|
{ |
|
CV_DbgAssert(ctx.stat_status._skipDepth <= currentDepth); |
|
} |
|
|
|
if (implFlags & REGION_FLAG__NEED_STACK_POP) |
|
{ |
|
CV_DbgAssert(ctx.stackTopRegion() == this); |
|
ctx.stackPop(); |
|
ctx.stat_status.checkResetSkipMode(currentDepth); |
|
DEBUG_ONLY(implFlags &= ~REGION_FLAG__NEED_STACK_POP); |
|
} |
|
CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << "===> " << ctx.stat << ' ' << ctx.stat_status); |
|
} |
|
|
|
|
|
TraceManagerThreadLocal::~TraceManagerThreadLocal() |
|
{ |
|
} |
|
|
|
void TraceManagerThreadLocal::dumpStack(std::ostream& out, bool onlyFunctions) const |
|
{ |
|
std::stringstream ss; |
|
std::deque<StackEntry>::const_iterator it = stack.begin(); |
|
std::deque<StackEntry>::const_iterator end = stack.end(); |
|
int depth = 0; |
|
for (; it != end; ++it) |
|
{ |
|
const Region::LocationStaticStorage* location = it->location; |
|
if (location) |
|
{ |
|
if (!onlyFunctions || (location->flags & REGION_FLAG_FUNCTION)) |
|
{ |
|
ss << _spaces(4*depth) << location->name << std::endl; |
|
depth++; |
|
} |
|
} |
|
else |
|
{ |
|
ss << _spaces(4*depth) << "<unknown>" << std::endl; |
|
depth++; |
|
} |
|
} |
|
out << ss.str(); |
|
} |
|
|
|
class AsyncTraceStorage CV_FINAL : public TraceStorage |
|
{ |
|
mutable std::ofstream out; |
|
public: |
|
const std::string name; |
|
|
|
AsyncTraceStorage(const std::string& filename) : |
|
out(filename.c_str(), std::ios::trunc), |
|
name(filename) |
|
{ |
|
out << "#description: OpenCV trace file" << std::endl; |
|
out << "#version: 1.0" << std::endl; |
|
} |
|
~AsyncTraceStorage() |
|
{ |
|
out.close(); |
|
} |
|
|
|
bool put(const TraceMessage& msg) const CV_OVERRIDE |
|
{ |
|
if (msg.hasError) |
|
return false; |
|
out << msg.buffer; |
|
//DEBUG_ONLY(std::flush(out)); // TODO configure flag |
|
return true; |
|
} |
|
}; |
|
|
|
class SyncTraceStorage CV_FINAL : public TraceStorage |
|
{ |
|
mutable std::ofstream out; |
|
mutable cv::Mutex mutex; |
|
public: |
|
const std::string name; |
|
|
|
SyncTraceStorage(const std::string& filename) : |
|
out(filename.c_str(), std::ios::trunc), |
|
name(filename) |
|
{ |
|
out << "#description: OpenCV trace file" << std::endl; |
|
out << "#version: 1.0" << std::endl; |
|
} |
|
~SyncTraceStorage() |
|
{ |
|
cv::AutoLock l(mutex); |
|
out.close(); |
|
} |
|
|
|
bool put(const TraceMessage& msg) const CV_OVERRIDE |
|
{ |
|
if (msg.hasError) |
|
return false; |
|
{ |
|
cv::AutoLock l(mutex); |
|
out << msg.buffer; |
|
std::flush(out); // TODO configure flag |
|
} |
|
return true; |
|
} |
|
}; |
|
|
|
|
|
TraceStorage* TraceManagerThreadLocal::getStorage() const |
|
{ |
|
// TODO configuration option for stdout/single trace file |
|
if (storage.empty()) |
|
{ |
|
TraceStorage* global = getTraceManager().trace_storage.get(); |
|
if (global) |
|
{ |
|
const std::string filepath = cv::format("%s-%03d.txt", param_traceLocation.c_str(), threadID).c_str(); |
|
TraceMessage msg; |
|
const char* pos = strrchr(filepath.c_str(), '/'); // extract filename |
|
#ifdef _WIN32 |
|
if (!pos) |
|
pos = strrchr(filepath.c_str(), '\\'); |
|
#endif |
|
if (!pos) |
|
pos = filepath.c_str(); |
|
else |
|
pos += 1; // fix to skip extra slash in filename beginning |
|
msg.printf("#thread file: %s\n", pos); |
|
global->put(msg); |
|
storage.reset(new AsyncTraceStorage(filepath)); |
|
} |
|
} |
|
return storage.get(); |
|
} |
|
|
|
|
|
|
|
static bool activated = false; |
|
static bool isInitialized = false; |
|
|
|
TraceManager::TraceManager() |
|
{ |
|
g_zero_timestamp = cv::getTickCount(); |
|
|
|
isInitialized = true; |
|
CV_LOG("TraceManager ctor: " << (void*)this); |
|
|
|
CV_LOG("TraceManager configure()"); |
|
activated = param_traceEnable; |
|
|
|
if (activated) |
|
trace_storage.reset(new SyncTraceStorage(std::string(param_traceLocation) + ".txt")); |
|
|
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
activated = true; // force trace pipeline activation (without OpenCV storage) |
|
__itt_region_begin(domain, __itt_null, __itt_null, __itt_string_handle_create("OpenCVTrace")); |
|
} |
|
#endif |
|
} |
|
TraceManager::~TraceManager() |
|
{ |
|
CV_LOG("TraceManager dtor: " << (void*)this); |
|
|
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
__itt_region_end(domain, __itt_null); |
|
} |
|
#endif |
|
|
|
std::vector<TraceManagerThreadLocal*> threads_ctx; |
|
tls.gather(threads_ctx); |
|
size_t totalEvents = 0, totalSkippedEvents = 0; |
|
for (size_t i = 0; i < threads_ctx.size(); i++) |
|
{ |
|
TraceManagerThreadLocal* ctx = threads_ctx[i]; |
|
if (ctx) |
|
{ |
|
totalEvents += ctx->region_counter; |
|
totalSkippedEvents += ctx->totalSkippedEvents; |
|
} |
|
} |
|
if (totalEvents || activated) |
|
{ |
|
CV_LOG_INFO(NULL, "Trace: Total events: " << totalEvents); |
|
} |
|
if (totalSkippedEvents) |
|
{ |
|
CV_LOG_WARNING(NULL, "Trace: Total skipped events: " << totalSkippedEvents); |
|
} |
|
|
|
// This is a global static object, so process starts shutdown here |
|
// Turn off trace |
|
cv::__termination = true; // also set in DllMain() notifications handler for DLL_PROCESS_DETACH |
|
activated = false; |
|
} |
|
|
|
bool TraceManager::isActivated() |
|
{ |
|
// Check if process starts shutdown, and set earlyExit to true |
|
// to prevent further instrumentation processing earlier. |
|
if (cv::__termination) |
|
{ |
|
activated = false; |
|
return false; |
|
} |
|
|
|
if (!isInitialized) |
|
{ |
|
TraceManager& m = getTraceManager(); |
|
CV_UNUSED(m); // TODO |
|
} |
|
|
|
return activated; |
|
} |
|
|
|
|
|
static TraceManager* getTraceManagerCallOnce() |
|
{ |
|
static TraceManager globalInstance; |
|
return &globalInstance; |
|
} |
|
TraceManager& getTraceManager() |
|
{ |
|
CV_SINGLETON_LAZY_INIT_REF(TraceManager, getTraceManagerCallOnce()) |
|
} |
|
|
|
void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx) |
|
{ |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
|
|
if (ctx.dummy_stack_top.region == &rootRegion) // already attached |
|
return; |
|
|
|
CV_Assert(ctx.dummy_stack_top.region == NULL); |
|
ctx.dummy_stack_top = TraceManagerThreadLocal::StackEntry(const_cast<Region*>(&rootRegion), NULL, -1); |
|
|
|
if (&ctx == &root_ctx) |
|
{ |
|
ctx.stat.grab(ctx.parallel_for_stat); |
|
ctx.parallel_for_stat_status = ctx.stat_status; |
|
ctx.parallel_for_stack_size = ctx.stack.size(); |
|
return; |
|
} |
|
|
|
CV_Assert(ctx.stack.empty()); |
|
|
|
ctx.currentActiveRegion = const_cast<Region*>(&rootRegion); |
|
|
|
ctx.regionDepth = root_ctx.regionDepth; |
|
ctx.regionDepthOpenCV = root_ctx.regionDepthOpenCV; |
|
|
|
ctx.parallel_for_stack_size = 0; |
|
|
|
ctx.stat_status.propagateFrom(root_ctx.stat_status); |
|
} |
|
|
|
void parallelForAttachNestedRegion(const Region& rootRegion) |
|
{ |
|
CV_UNUSED(rootRegion); |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
|
|
CV_DbgAssert(ctx.dummy_stack_top.region == &rootRegion); |
|
|
|
Region* region = ctx.getCurrentActiveRegion(); |
|
CV_LOG_PARALLEL(NULL, " PARALLEL_FOR: " << (void*)region << " ==> " << &rootRegion); |
|
if (!region) |
|
return; |
|
|
|
#ifdef OPENCV_WITH_ITT |
|
if (!rootRegion.pImpl || !rootRegion.pImpl->itt_id_registered) |
|
return; |
|
|
|
if (!region->pImpl) |
|
return; |
|
|
|
CV_LOG_PARALLEL(NULL, " PARALLEL_FOR ITT: " << (void*)rootRegion.pImpl->itt_id.d1 << ":" << rootRegion.pImpl->itt_id.d2 << ":" << (void*)rootRegion.pImpl->itt_id.d3 << " => " |
|
<< (void*)region->pImpl->itt_id.d1 << ":" << region->pImpl->itt_id.d2 << ":" << (void*)region->pImpl->itt_id.d3); |
|
__itt_relation_add(domain, region->pImpl->itt_id, __itt_relation_is_child_of, rootRegion.pImpl->itt_id); |
|
#endif |
|
} |
|
|
|
void parallelForFinalize(const Region& rootRegion) |
|
{ |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
|
|
int64 endTimestamp = getTimestamp(); |
|
int64 duration = endTimestamp - ctx.stackTopBeginTimestamp(); |
|
CV_LOG_PARALLEL(NULL, "parallel_for duration: " << duration << " " << &rootRegion); |
|
|
|
std::vector<TraceManagerThreadLocal*> threads_ctx; |
|
getTraceManager().tls.gather(threads_ctx); |
|
RegionStatistics parallel_for_stat; |
|
int threads = 0; |
|
for (size_t i = 0; i < threads_ctx.size(); i++) |
|
{ |
|
TraceManagerThreadLocal* child_ctx = threads_ctx[i]; |
|
|
|
if (child_ctx && child_ctx->stackTopRegion() == &rootRegion) |
|
{ |
|
CV_LOG_PARALLEL(NULL, "Thread=" << child_ctx->threadID << " " << child_ctx->stat); |
|
threads++; |
|
RegionStatistics child_stat; |
|
child_ctx->stat.grab(child_stat); |
|
parallel_for_stat.append(child_stat); |
|
if (child_ctx != &ctx) |
|
{ |
|
child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry(); |
|
} |
|
else |
|
{ |
|
ctx.parallel_for_stat.grab(ctx.stat); |
|
ctx.stat_status = ctx.parallel_for_stat_status; |
|
child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry(); |
|
} |
|
} |
|
} |
|
float parallel_coeff = std::min(1.0f, duration / (float)(parallel_for_stat.duration)); |
|
CV_LOG_PARALLEL(NULL, "parallel_coeff=" << 1.0f / parallel_coeff); |
|
CV_LOG_PARALLEL(NULL, parallel_for_stat); |
|
if (parallel_coeff != 1.0f) |
|
{ |
|
parallel_for_stat.multiply(parallel_coeff); |
|
CV_LOG_PARALLEL(NULL, parallel_for_stat); |
|
} |
|
parallel_for_stat.duration = 0; |
|
ctx.stat.append(parallel_for_stat); |
|
CV_LOG_PARALLEL(NULL, ctx.stat); |
|
} |
|
|
|
struct TraceArg::ExtraData |
|
{ |
|
#ifdef OPENCV_WITH_ITT |
|
// Special fields for ITT |
|
__itt_string_handle* volatile ittHandle_name; |
|
#endif |
|
ExtraData(TraceManagerThreadLocal& ctx, const TraceArg& arg) |
|
{ |
|
CV_UNUSED(ctx); CV_UNUSED(arg); |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
// Caching is not required here, because there is builtin cache. |
|
// https://software.intel.com/en-us/node/544203: |
|
// Consecutive calls to __itt_string_handle_create with the same name return the same value. |
|
ittHandle_name = __itt_string_handle_create(arg.name); |
|
} |
|
else |
|
{ |
|
ittHandle_name = 0; |
|
} |
|
#endif |
|
} |
|
}; |
|
|
|
static void initTraceArg(TraceManagerThreadLocal& ctx, const TraceArg& arg) |
|
{ |
|
TraceArg::ExtraData** pExtra = arg.ppExtra; |
|
if (*pExtra == NULL) |
|
{ |
|
cv::AutoLock lock(cv::getInitializationMutex()); |
|
if (*pExtra == NULL) |
|
{ |
|
*pExtra = new TraceArg::ExtraData(ctx, arg); |
|
} |
|
} |
|
} |
|
void traceArg(const TraceArg& arg, const char* value) |
|
{ |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
Region* region = ctx.getCurrentActiveRegion(); |
|
if (!region) |
|
return; |
|
CV_Assert(region->pImpl); |
|
initTraceArg(ctx, arg); |
|
if (!value) |
|
value = "<null>"; |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
__itt_metadata_str_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, value, strlen(value)); |
|
} |
|
#endif |
|
} |
|
void traceArg(const TraceArg& arg, int value) |
|
{ |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
Region* region = ctx.getCurrentActiveRegion(); |
|
if (!region) |
|
return; |
|
CV_Assert(region->pImpl); |
|
initTraceArg(ctx, arg); |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
__itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, sizeof(int) == 4 ? __itt_metadata_s32 : __itt_metadata_s64, 1, &value); |
|
} |
|
#else |
|
CV_UNUSED(value); |
|
#endif |
|
} |
|
void traceArg(const TraceArg& arg, int64 value) |
|
{ |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
Region* region = ctx.getCurrentActiveRegion(); |
|
if (!region) |
|
return; |
|
CV_Assert(region->pImpl); |
|
initTraceArg(ctx, arg); |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
__itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_s64, 1, &value); |
|
} |
|
#else |
|
CV_UNUSED(value); |
|
#endif |
|
} |
|
void traceArg(const TraceArg& arg, double value) |
|
{ |
|
TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); |
|
Region* region = ctx.getCurrentActiveRegion(); |
|
if (!region) |
|
return; |
|
CV_Assert(region->pImpl); |
|
initTraceArg(ctx, arg); |
|
#ifdef OPENCV_WITH_ITT |
|
if (isITTEnabled()) |
|
{ |
|
__itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_double, 1, &value); |
|
} |
|
#else |
|
CV_UNUSED(value); |
|
#endif |
|
} |
|
|
|
#else |
|
|
|
Region::Region(const LocationStaticStorage&) : pImpl(NULL), implFlags(0) {} |
|
void Region::destroy() {} |
|
|
|
void traceArg(const TraceArg&, const char*) {} |
|
void traceArg(const TraceArg&, int) {}; |
|
void traceArg(const TraceArg&, int64) {}; |
|
void traceArg(const TraceArg&, double) {}; |
|
|
|
#endif |
|
|
|
}}}} // namespace
|
|
|