parent
8e75947a7d
commit
e8d9ed8955
36 changed files with 1705 additions and 1540 deletions
@ -0,0 +1,507 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Guoping Long, longguoping@gmail.com
|
||||||
|
// Niko Li, newlife20080214@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
#include <fstream> |
||||||
|
#include "binarycaching.hpp" |
||||||
|
|
||||||
|
#undef __CL_ENABLE_EXCEPTIONS |
||||||
|
#include <CL/cl.hpp> |
||||||
|
|
||||||
|
namespace cv { namespace ocl { |
||||||
|
|
||||||
|
extern void fft_teardown(); |
||||||
|
extern void clBlasTeardown(); |
||||||
|
|
||||||
|
struct PlatformInfoImpl |
||||||
|
{ |
||||||
|
cl_platform_id platform_id; |
||||||
|
|
||||||
|
std::vector<int> deviceIDs; |
||||||
|
|
||||||
|
PlatformInfo info; |
||||||
|
|
||||||
|
PlatformInfoImpl() |
||||||
|
: platform_id(NULL) |
||||||
|
{ |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
struct DeviceInfoImpl |
||||||
|
{ |
||||||
|
cl_platform_id platform_id; |
||||||
|
cl_device_id device_id; |
||||||
|
|
||||||
|
DeviceInfo info; |
||||||
|
|
||||||
|
DeviceInfoImpl() |
||||||
|
: platform_id(NULL), device_id(NULL) |
||||||
|
{ |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
static std::vector<PlatformInfoImpl> global_platforms; |
||||||
|
static std::vector<DeviceInfoImpl> global_devices; |
||||||
|
|
||||||
|
static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor) |
||||||
|
{ |
||||||
|
size_t p0 = versionStr.find(' '); |
||||||
|
while (true) |
||||||
|
{ |
||||||
|
if (p0 == std::string::npos) |
||||||
|
break; |
||||||
|
if (p0 + 1 >= versionStr.length()) |
||||||
|
break; |
||||||
|
char c = versionStr[p0 + 1]; |
||||||
|
if (isdigit(c)) |
||||||
|
break; |
||||||
|
p0 = versionStr.find(' ', p0 + 1); |
||||||
|
} |
||||||
|
size_t p1 = versionStr.find('.', p0); |
||||||
|
size_t p2 = versionStr.find(' ', p1); |
||||||
|
if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos) |
||||||
|
{ |
||||||
|
major = 0; |
||||||
|
minor = 0; |
||||||
|
return false; |
||||||
|
} |
||||||
|
std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1); |
||||||
|
std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1); |
||||||
|
major = atoi(majorStr.c_str()); |
||||||
|
minor = atoi(minorStr.c_str()); |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
static int initializeOpenCLDevices() |
||||||
|
{ |
||||||
|
assert(global_devices.size() == 0); |
||||||
|
|
||||||
|
std::vector<cl::Platform> platforms; |
||||||
|
try |
||||||
|
{ |
||||||
|
openCLSafeCall(cl::Platform::get(&platforms)); |
||||||
|
} |
||||||
|
catch (cv::Exception& e) |
||||||
|
{ |
||||||
|
return 0; // OpenCL not found
|
||||||
|
} |
||||||
|
|
||||||
|
global_platforms.resize(platforms.size()); |
||||||
|
|
||||||
|
for (size_t i = 0; i < platforms.size(); ++i) |
||||||
|
{ |
||||||
|
PlatformInfoImpl& platformInfo = global_platforms[i]; |
||||||
|
platformInfo.info._id = i; |
||||||
|
|
||||||
|
cl::Platform& platform = platforms[i]; |
||||||
|
|
||||||
|
platformInfo.platform_id = platform(); |
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile)); |
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion)); |
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName)); |
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor)); |
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons)); |
||||||
|
|
||||||
|
parseOpenCLVersion(platformInfo.info.platformVersion, |
||||||
|
platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor); |
||||||
|
|
||||||
|
std::vector<cl::Device> devices; |
||||||
|
cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); |
||||||
|
if(status != CL_DEVICE_NOT_FOUND) |
||||||
|
openCLVerifyCall(status); |
||||||
|
|
||||||
|
if(devices.size() > 0) |
||||||
|
{ |
||||||
|
int baseIndx = global_devices.size(); |
||||||
|
global_devices.resize(baseIndx + devices.size()); |
||||||
|
platformInfo.deviceIDs.resize(devices.size()); |
||||||
|
platformInfo.info.devices.resize(devices.size()); |
||||||
|
|
||||||
|
for(size_t j = 0; j < devices.size(); ++j) |
||||||
|
{ |
||||||
|
cl::Device& device = devices[j]; |
||||||
|
|
||||||
|
DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j]; |
||||||
|
deviceInfo.info._id = baseIndx + j; |
||||||
|
deviceInfo.platform_id = platform(); |
||||||
|
deviceInfo.device_id = device(); |
||||||
|
|
||||||
|
deviceInfo.info.platform = &platformInfo.info; |
||||||
|
platformInfo.deviceIDs[j] = deviceInfo.info._id; |
||||||
|
|
||||||
|
cl_device_type type = -1; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); |
||||||
|
deviceInfo.info.deviceType = DeviceType(type); |
||||||
|
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile)); |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); |
||||||
|
cl_uint vendorID = -1; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); |
||||||
|
deviceInfo.info.deviceVendorId = vendorID; |
||||||
|
openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); |
||||||
|
|
||||||
|
parseOpenCLVersion(deviceInfo.info.deviceVersion, |
||||||
|
deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor); |
||||||
|
|
||||||
|
size_t maxWorkGroupSize = 0; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize)); |
||||||
|
deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize; |
||||||
|
|
||||||
|
cl_uint maxDimensions = 0; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions)); |
||||||
|
std::vector<size_t> maxWorkItemSizes(maxDimensions); |
||||||
|
openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, |
||||||
|
(void *)&maxWorkItemSizes[0], 0)); |
||||||
|
deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes; |
||||||
|
|
||||||
|
cl_uint maxComputeUnits = 0; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits)); |
||||||
|
deviceInfo.info.maxComputeUnits = maxComputeUnits; |
||||||
|
|
||||||
|
cl_ulong localMemorySize = 0; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize)); |
||||||
|
deviceInfo.info.localMemorySize = (size_t)localMemorySize; |
||||||
|
|
||||||
|
|
||||||
|
cl_bool unifiedMemory = false; |
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory)); |
||||||
|
deviceInfo.info.isUnifiedMemory = unifiedMemory != 0; |
||||||
|
|
||||||
|
//initialize extra options for compilation. Currently only fp64 is included.
|
||||||
|
//Assume 4KB is enough to store all possible extensions.
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); |
||||||
|
|
||||||
|
size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64"); |
||||||
|
if(fp64_khr != std::string::npos) |
||||||
|
{ |
||||||
|
deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT"; |
||||||
|
deviceInfo.info.haveDoubleSupport = true; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
deviceInfo.info.haveDoubleSupport = false; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
for (size_t i = 0; i < platforms.size(); ++i) |
||||||
|
{ |
||||||
|
PlatformInfoImpl& platformInfo = global_platforms[i]; |
||||||
|
for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j) |
||||||
|
{ |
||||||
|
DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]]; |
||||||
|
platformInfo.info.devices[j] = &deviceInfo.info; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return global_devices.size(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
DeviceInfo::DeviceInfo() |
||||||
|
: _id(-1), deviceType(DeviceType(0)), |
||||||
|
deviceVendorId(-1), |
||||||
|
maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), |
||||||
|
deviceVersionMajor(0), deviceVersionMinor(0), |
||||||
|
haveDoubleSupport(false), isUnifiedMemory(false), |
||||||
|
platform(NULL) |
||||||
|
{ |
||||||
|
// nothing
|
||||||
|
} |
||||||
|
|
||||||
|
PlatformInfo::PlatformInfo() |
||||||
|
: _id(-1), |
||||||
|
platformVersionMajor(0), platformVersionMinor(0) |
||||||
|
{ |
||||||
|
// nothing
|
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////// OpenCL context ////////////////////////
|
||||||
|
//This is a global singleton class used to represent a OpenCL context.
|
||||||
|
class ContextImpl : public Context |
||||||
|
{ |
||||||
|
public: |
||||||
|
const cl_device_id clDeviceID; |
||||||
|
cl_context clContext; |
||||||
|
cl_command_queue clCmdQueue; |
||||||
|
const DeviceInfo& deviceInfo; |
||||||
|
|
||||||
|
protected: |
||||||
|
ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID) |
||||||
|
: clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo) |
||||||
|
{ |
||||||
|
// nothing
|
||||||
|
} |
||||||
|
~ContextImpl(); |
||||||
|
public: |
||||||
|
|
||||||
|
static ContextImpl* getContext(); |
||||||
|
static void setContext(const DeviceInfo* deviceInfo); |
||||||
|
|
||||||
|
bool supportsFeature(FEATURE_TYPE featureType) const; |
||||||
|
|
||||||
|
static void cleanupContext(void); |
||||||
|
}; |
||||||
|
|
||||||
|
static cv::Mutex currentContextMutex; |
||||||
|
static ContextImpl* currentContext = NULL; |
||||||
|
|
||||||
|
Context* Context::getContext() |
||||||
|
{ |
||||||
|
return currentContext; |
||||||
|
} |
||||||
|
|
||||||
|
bool Context::supportsFeature(FEATURE_TYPE featureType) const |
||||||
|
{ |
||||||
|
return ((ContextImpl*)this)->supportsFeature(featureType); |
||||||
|
} |
||||||
|
|
||||||
|
const DeviceInfo& Context::getDeviceInfo() const |
||||||
|
{ |
||||||
|
return ((ContextImpl*)this)->deviceInfo; |
||||||
|
} |
||||||
|
|
||||||
|
const void* Context::getOpenCLContextPtr() const |
||||||
|
{ |
||||||
|
return &(((ContextImpl*)this)->clContext); |
||||||
|
} |
||||||
|
|
||||||
|
const void* Context::getOpenCLCommandQueuePtr() const |
||||||
|
{ |
||||||
|
return &(((ContextImpl*)this)->clCmdQueue); |
||||||
|
} |
||||||
|
|
||||||
|
const void* Context::getOpenCLDeviceIDPtr() const |
||||||
|
{ |
||||||
|
return &(((ContextImpl*)this)->clDeviceID); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const |
||||||
|
{ |
||||||
|
switch (featureType) |
||||||
|
{ |
||||||
|
case FEATURE_CL_DOUBLE: |
||||||
|
return deviceInfo.haveDoubleSupport; |
||||||
|
case FEATURE_CL_UNIFIED_MEM: |
||||||
|
return deviceInfo.isUnifiedMemory; |
||||||
|
case FEATURE_CL_VER_1_2: |
||||||
|
return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2); |
||||||
|
} |
||||||
|
CV_Error(CV_StsBadArg, "Invalid feature type"); |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
#if defined(WIN32) |
||||||
|
static bool __termination = false; |
||||||
|
#endif |
||||||
|
|
||||||
|
ContextImpl::~ContextImpl() |
||||||
|
{ |
||||||
|
fft_teardown(); |
||||||
|
clBlasTeardown(); |
||||||
|
|
||||||
|
#ifdef WIN32 |
||||||
|
// if process is on termination stage (ExitProcess was called and other threads were terminated)
|
||||||
|
// then disable command queue release because it may cause program hang
|
||||||
|
if (!__termination) |
||||||
|
#endif |
||||||
|
{ |
||||||
|
if(clCmdQueue) |
||||||
|
{ |
||||||
|
openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here
|
||||||
|
} |
||||||
|
|
||||||
|
if(clContext) |
||||||
|
{ |
||||||
|
openCLSafeCall(clReleaseContext(clContext)); |
||||||
|
} |
||||||
|
} |
||||||
|
clCmdQueue = NULL; |
||||||
|
clContext = NULL; |
||||||
|
} |
||||||
|
|
||||||
|
void ContextImpl::cleanupContext(void) |
||||||
|
{ |
||||||
|
cv::AutoLock lock(currentContextMutex); |
||||||
|
if (currentContext) |
||||||
|
delete currentContext; |
||||||
|
currentContext = NULL; |
||||||
|
} |
||||||
|
|
||||||
|
void ContextImpl::setContext(const DeviceInfo* deviceInfo) |
||||||
|
{ |
||||||
|
CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); |
||||||
|
|
||||||
|
DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; |
||||||
|
CV_Assert(deviceInfo == &infoImpl.info); |
||||||
|
|
||||||
|
cl_int status = 0; |
||||||
|
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 }; |
||||||
|
cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
// TODO add CL_QUEUE_PROFILING_ENABLE
|
||||||
|
cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
|
||||||
|
ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id); |
||||||
|
ctx->clCmdQueue = clCmdQueue; |
||||||
|
ctx->clContext = clContext; |
||||||
|
|
||||||
|
ContextImpl* old = NULL; |
||||||
|
{ |
||||||
|
cv::AutoLock lock(currentContextMutex); |
||||||
|
old = currentContext; |
||||||
|
currentContext = ctx; |
||||||
|
} |
||||||
|
if (old != NULL) |
||||||
|
{ |
||||||
|
delete old; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
ContextImpl* ContextImpl::getContext() |
||||||
|
{ |
||||||
|
return currentContext; |
||||||
|
} |
||||||
|
|
||||||
|
int getOpenCLPlatforms(PlatformsInfo& platforms) |
||||||
|
{ |
||||||
|
platforms.clear(); |
||||||
|
|
||||||
|
for (size_t id = 0; id < global_platforms.size(); ++id) |
||||||
|
{ |
||||||
|
PlatformInfoImpl& impl = global_platforms[id]; |
||||||
|
platforms.push_back(&impl.info); |
||||||
|
} |
||||||
|
|
||||||
|
return platforms.size(); |
||||||
|
} |
||||||
|
|
||||||
|
int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform) |
||||||
|
{ |
||||||
|
devices.clear(); |
||||||
|
|
||||||
|
switch(deviceType) |
||||||
|
{ |
||||||
|
case CVCL_DEVICE_TYPE_DEFAULT: |
||||||
|
case CVCL_DEVICE_TYPE_CPU: |
||||||
|
case CVCL_DEVICE_TYPE_GPU: |
||||||
|
case CVCL_DEVICE_TYPE_ACCELERATOR: |
||||||
|
case CVCL_DEVICE_TYPE_ALL: |
||||||
|
break; |
||||||
|
default: |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
if (platform == NULL) |
||||||
|
{ |
||||||
|
for (size_t id = 0; id < global_devices.size(); ++id) |
||||||
|
{ |
||||||
|
DeviceInfoImpl& deviceInfo = global_devices[id]; |
||||||
|
if (((int)deviceInfo.info.deviceType & deviceType) == deviceType) |
||||||
|
{ |
||||||
|
devices.push_back(&deviceInfo.info); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
for (size_t id = 0; id < platform->devices.size(); ++id) |
||||||
|
{ |
||||||
|
const DeviceInfo* deviceInfo = platform->devices[id]; |
||||||
|
if (((int)deviceInfo->deviceType & deviceType) == deviceType) |
||||||
|
{ |
||||||
|
devices.push_back(deviceInfo); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return (int)devices.size(); |
||||||
|
} |
||||||
|
|
||||||
|
void setDevice(const DeviceInfo* info) |
||||||
|
{ |
||||||
|
ContextImpl::setContext(info); |
||||||
|
} |
||||||
|
|
||||||
|
bool supportsFeature(FEATURE_TYPE featureType) |
||||||
|
{ |
||||||
|
return Context::getContext()->supportsFeature(featureType); |
||||||
|
} |
||||||
|
|
||||||
|
struct __Module |
||||||
|
{ |
||||||
|
__Module() { initializeOpenCLDevices(); } |
||||||
|
~__Module() { ContextImpl::cleanupContext(); } |
||||||
|
}; |
||||||
|
static __Module __module; |
||||||
|
|
||||||
|
|
||||||
|
}//namespace ocl
|
||||||
|
}//namespace cv
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(WIN32) && defined(CVAPI_EXPORTS) |
||||||
|
|
||||||
|
extern "C" |
||||||
|
BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved) |
||||||
|
{ |
||||||
|
if (fdwReason == DLL_PROCESS_DETACH) |
||||||
|
{ |
||||||
|
if (lpReserved != NULL) // called after ExitProcess() call
|
||||||
|
cv::ocl::__termination = true; |
||||||
|
} |
||||||
|
return TRUE; |
||||||
|
} |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,434 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Guoping Long, longguoping@gmail.com
|
||||||
|
// Niko Li, newlife20080214@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
#include <fstream> |
||||||
|
#include "binarycaching.hpp" |
||||||
|
|
||||||
|
#undef __CL_ENABLE_EXCEPTIONS |
||||||
|
#include <CL/cl.hpp> |
||||||
|
|
||||||
|
//#define PRINT_KERNEL_RUN_TIME
|
||||||
|
#define RUN_TIMES 100 |
||||||
|
#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD |
||||||
|
#define CL_MEM_USE_PERSISTENT_MEM_AMD 0 |
||||||
|
#endif |
||||||
|
//#define AMD_DOUBLE_DIFFER
|
||||||
|
|
||||||
|
namespace cv { namespace ocl { |
||||||
|
|
||||||
|
DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; |
||||||
|
DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; |
||||||
|
int gDevMemTypeValueMap[5] = {0, |
||||||
|
CL_MEM_ALLOC_HOST_PTR, |
||||||
|
CL_MEM_USE_HOST_PTR, |
||||||
|
CL_MEM_COPY_HOST_PTR, |
||||||
|
CL_MEM_USE_PERSISTENT_MEM_AMD}; |
||||||
|
int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; |
||||||
|
|
||||||
|
void finish() |
||||||
|
{ |
||||||
|
clFinish(getClCommandQueue(Context::getContext())); |
||||||
|
} |
||||||
|
|
||||||
|
bool isCpuDevice() |
||||||
|
{ |
||||||
|
const DeviceInfo& info = Context::getContext()->getDeviceInfo(); |
||||||
|
return (info.deviceType == CVCL_DEVICE_TYPE_CPU); |
||||||
|
} |
||||||
|
|
||||||
|
size_t queryWaveFrontSize(cl_kernel kernel) |
||||||
|
{ |
||||||
|
const DeviceInfo& info = Context::getContext()->getDeviceInfo(); |
||||||
|
if (info.deviceType == CVCL_DEVICE_TYPE_CPU) |
||||||
|
return 1; |
||||||
|
size_t wavefront = 0; |
||||||
|
CV_Assert(kernel != NULL); |
||||||
|
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()), |
||||||
|
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL)); |
||||||
|
return wavefront; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size) |
||||||
|
{ |
||||||
|
cl_int status; |
||||||
|
status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0, |
||||||
|
size, host_buffer, 0, NULL, NULL); |
||||||
|
openCLVerifyCall(status); |
||||||
|
} |
||||||
|
|
||||||
|
cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size) |
||||||
|
{ |
||||||
|
cl_int status; |
||||||
|
cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
return buffer; |
||||||
|
} |
||||||
|
|
||||||
|
void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch, |
||||||
|
size_t widthInBytes, size_t height) |
||||||
|
{ |
||||||
|
openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType); |
||||||
|
} |
||||||
|
|
||||||
|
void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch, |
||||||
|
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) |
||||||
|
{ |
||||||
|
cl_int status; |
||||||
|
*dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], |
||||||
|
widthInBytes * height, 0, &status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
*pitch = widthInBytes; |
||||||
|
} |
||||||
|
|
||||||
|
void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch, |
||||||
|
const void *src, size_t spitch, |
||||||
|
size_t width, size_t height, openCLMemcpyKind kind, int channels) |
||||||
|
{ |
||||||
|
size_t buffer_origin[3] = {0, 0, 0}; |
||||||
|
size_t host_origin[3] = {0, 0, 0}; |
||||||
|
size_t region[3] = {width, height, 1}; |
||||||
|
if(kind == clMemcpyHostToDevice) |
||||||
|
{ |
||||||
|
if(dpitch == width || channels == 3 || height == 1) |
||||||
|
{ |
||||||
|
openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, |
||||||
|
0, width * height, src, 0, NULL, NULL)); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, |
||||||
|
buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); |
||||||
|
} |
||||||
|
} |
||||||
|
else if(kind == clMemcpyDeviceToHost) |
||||||
|
{ |
||||||
|
if(spitch == width || channels == 3 || height == 1) |
||||||
|
{ |
||||||
|
openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, |
||||||
|
0, width * height, dst, 0, NULL, NULL)); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, |
||||||
|
buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset, |
||||||
|
const void *src, size_t spitch, |
||||||
|
size_t width, size_t height, int src_offset) |
||||||
|
{ |
||||||
|
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; |
||||||
|
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; |
||||||
|
size_t region[3] = {width, height, 1}; |
||||||
|
|
||||||
|
openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, |
||||||
|
region, spitch, 0, dpitch, 0, 0, 0, 0)); |
||||||
|
} |
||||||
|
|
||||||
|
void openCLFree(void *devPtr) |
||||||
|
{ |
||||||
|
openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); |
||||||
|
} |
||||||
|
|
||||||
|
cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName) |
||||||
|
{ |
||||||
|
return openCLGetKernelFromSource(ctx, source, kernelName, NULL); |
||||||
|
} |
||||||
|
|
||||||
|
cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName, |
||||||
|
const char *build_options) |
||||||
|
{ |
||||||
|
cl_kernel kernel; |
||||||
|
cl_int status = 0; |
||||||
|
CV_Assert(ProgramCache::getProgramCache() != NULL); |
||||||
|
cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options); |
||||||
|
CV_Assert(program != NULL); |
||||||
|
kernel = clCreateKernel(program, kernelName.c_str(), &status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
return kernel; |
||||||
|
} |
||||||
|
|
||||||
|
void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads) |
||||||
|
{ |
||||||
|
size_t kernelWorkGroupSize; |
||||||
|
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx), |
||||||
|
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); |
||||||
|
CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] ); |
||||||
|
CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] ); |
||||||
|
CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] ); |
||||||
|
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize ); |
||||||
|
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize ); |
||||||
|
} |
||||||
|
|
||||||
|
#ifdef PRINT_KERNEL_RUN_TIME |
||||||
|
static double total_execute_time = 0; |
||||||
|
static double total_kernel_time = 0; |
||||||
|
#endif |
||||||
|
void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3], |
||||||
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, |
||||||
|
int depth, const char *build_options) |
||||||
|
{ |
||||||
|
//construct kernel name
|
||||||
|
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||||
|
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||||
|
stringstream idxStr; |
||||||
|
if(channels != -1) |
||||||
|
idxStr << "_C" << channels; |
||||||
|
if(depth != -1) |
||||||
|
idxStr << "_D" << depth; |
||||||
|
kernelName += idxStr.str(); |
||||||
|
|
||||||
|
cl_kernel kernel; |
||||||
|
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); |
||||||
|
|
||||||
|
if ( localThreads != NULL) |
||||||
|
{ |
||||||
|
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); |
||||||
|
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); |
||||||
|
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); |
||||||
|
|
||||||
|
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); |
||||||
|
} |
||||||
|
for(size_t i = 0; i < args.size(); i ++) |
||||||
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); |
||||||
|
|
||||||
|
#ifndef PRINT_KERNEL_RUN_TIME |
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||||
|
localThreads, 0, NULL, NULL)); |
||||||
|
#else |
||||||
|
cl_event event = NULL; |
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||||
|
localThreads, 0, NULL, &event)); |
||||||
|
|
||||||
|
cl_ulong start_time, end_time, queue_time; |
||||||
|
double execute_time = 0; |
||||||
|
double total_time = 0; |
||||||
|
|
||||||
|
openCLSafeCall(clWaitForEvents(1, &event)); |
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, |
||||||
|
sizeof(cl_ulong), &start_time, 0)); |
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, |
||||||
|
sizeof(cl_ulong), &end_time, 0)); |
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, |
||||||
|
sizeof(cl_ulong), &queue_time, 0)); |
||||||
|
|
||||||
|
execute_time = (double)(end_time - start_time) / (1000 * 1000); |
||||||
|
total_time = (double)(end_time - queue_time) / (1000 * 1000); |
||||||
|
|
||||||
|
total_execute_time += execute_time; |
||||||
|
total_kernel_time += total_time; |
||||||
|
clReleaseEvent(event); |
||||||
|
#endif |
||||||
|
|
||||||
|
clFlush(getClCommandQueue(ctx)); |
||||||
|
openCLSafeCall(clReleaseKernel(kernel)); |
||||||
|
} |
||||||
|
|
||||||
|
void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, |
||||||
|
size_t globalThreads[3], size_t localThreads[3], |
||||||
|
vector< pair<size_t, const void *> > &args, int channels, int depth) |
||||||
|
{ |
||||||
|
openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, |
||||||
|
channels, depth, NULL); |
||||||
|
} |
||||||
|
void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, |
||||||
|
size_t globalThreads[3], size_t localThreads[3], |
||||||
|
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options) |
||||||
|
|
||||||
|
{ |
||||||
|
#ifndef PRINT_KERNEL_RUN_TIME |
||||||
|
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, |
||||||
|
build_options); |
||||||
|
#else |
||||||
|
string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; |
||||||
|
cout << endl; |
||||||
|
cout << "Function Name: " << kernelName; |
||||||
|
if(depth >= 0) |
||||||
|
cout << " |data type: " << data_type[depth]; |
||||||
|
cout << " |channels: " << channels; |
||||||
|
cout << " |Time Unit: " << "ms" << endl; |
||||||
|
|
||||||
|
total_execute_time = 0; |
||||||
|
total_kernel_time = 0; |
||||||
|
cout << "-------------------------------------" << endl; |
||||||
|
|
||||||
|
cout << setiosflags(ios::left) << setw(15) << "excute time"; |
||||||
|
cout << setiosflags(ios::left) << setw(15) << "lauch time"; |
||||||
|
cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; |
||||||
|
int i = 0; |
||||||
|
for(i = 0; i < RUN_TIMES; i++) |
||||||
|
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, |
||||||
|
build_options); |
||||||
|
|
||||||
|
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
||||||
|
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
|
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName, |
||||||
|
size_t globalThreads[3], size_t localThreads[3], |
||||||
|
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, |
||||||
|
bool finish, bool measureKernelTime, bool cleanUp) |
||||||
|
|
||||||
|
{ |
||||||
|
//construct kernel name
|
||||||
|
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||||
|
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||||
|
stringstream idxStr; |
||||||
|
if(channels != -1) |
||||||
|
idxStr << "_C" << channels; |
||||||
|
if(depth != -1) |
||||||
|
idxStr << "_D" << depth; |
||||||
|
kernelName += idxStr.str(); |
||||||
|
|
||||||
|
cl_kernel kernel; |
||||||
|
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); |
||||||
|
|
||||||
|
double kernelTime = 0.0; |
||||||
|
|
||||||
|
if( globalThreads != NULL) |
||||||
|
{ |
||||||
|
if ( localThreads != NULL) |
||||||
|
{ |
||||||
|
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; |
||||||
|
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; |
||||||
|
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; |
||||||
|
|
||||||
|
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||||
|
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); |
||||||
|
} |
||||||
|
for(size_t i = 0; i < args.size(); i ++) |
||||||
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); |
||||||
|
|
||||||
|
if(measureKernelTime == false) |
||||||
|
{ |
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||||
|
localThreads, 0, NULL, NULL)); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
cl_event event = NULL; |
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, |
||||||
|
localThreads, 0, NULL, &event)); |
||||||
|
|
||||||
|
cl_ulong end_time, queue_time; |
||||||
|
|
||||||
|
openCLSafeCall(clWaitForEvents(1, &event)); |
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, |
||||||
|
sizeof(cl_ulong), &end_time, 0)); |
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, |
||||||
|
sizeof(cl_ulong), &queue_time, 0)); |
||||||
|
|
||||||
|
kernelTime = (double)(end_time - queue_time) / (1000 * 1000); |
||||||
|
|
||||||
|
clReleaseEvent(event); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if(finish) |
||||||
|
{ |
||||||
|
clFinish(getClCommandQueue(ctx)); |
||||||
|
} |
||||||
|
|
||||||
|
if(cleanUp) |
||||||
|
{ |
||||||
|
openCLSafeCall(clReleaseKernel(kernel)); |
||||||
|
} |
||||||
|
|
||||||
|
return kernelTime; |
||||||
|
} |
||||||
|
|
||||||
|
//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName,
|
||||||
|
// size_t globalThreads[3], size_t localThreads[3],
|
||||||
|
// vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
|
||||||
|
// bool finish, bool measureKernelTime, bool cleanUp)
|
||||||
|
//
|
||||||
|
//{
|
||||||
|
// std::vector<std::string> fsource;
|
||||||
|
// for (int i = 0 ; i < numFiles ; i++)
|
||||||
|
// {
|
||||||
|
// std::string str;
|
||||||
|
// if (convertToString(fileName[i], str) >= 0)
|
||||||
|
// fsource.push_back(str);
|
||||||
|
// }
|
||||||
|
// const char **source = new const char *[numFiles];
|
||||||
|
// for (int i = 0 ; i < numFiles ; i++)
|
||||||
|
// source[i] = fsource[i].c_str();
|
||||||
|
// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads,
|
||||||
|
// args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
|
||||||
|
// fsource.clear();
|
||||||
|
// delete []source;
|
||||||
|
// return kernelTime;
|
||||||
|
//}
|
||||||
|
|
||||||
|
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, |
||||||
|
const size_t size) |
||||||
|
{ |
||||||
|
int status; |
||||||
|
cl_mem con_struct; |
||||||
|
|
||||||
|
con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status); |
||||||
|
openCLSafeCall(status); |
||||||
|
|
||||||
|
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, |
||||||
|
value, 0, 0, 0)); |
||||||
|
|
||||||
|
return con_struct; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
}//namespace ocl
|
||||||
|
}//namespace cv
|
@ -0,0 +1,311 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Guoping Long, longguoping@gmail.com
|
||||||
|
// Niko Li, newlife20080214@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
#include <fstream> |
||||||
|
#include "binarycaching.hpp" |
||||||
|
|
||||||
|
#undef __CL_ENABLE_EXCEPTIONS |
||||||
|
#include <CL/cl.hpp> |
||||||
|
|
||||||
|
namespace cv { namespace ocl { |
||||||
|
/*
|
||||||
|
* The binary caching system to eliminate redundant program source compilation. |
||||||
|
* Strictly, this is not a cache because we do not implement evictions right now. |
||||||
|
* We shall add such features to trade-off memory consumption and performance when necessary. |
||||||
|
*/ |
||||||
|
|
||||||
|
std::auto_ptr<ProgramCache> _programCache; |
||||||
|
ProgramCache* ProgramCache::getProgramCache() |
||||||
|
{ |
||||||
|
if (NULL == _programCache.get()) |
||||||
|
_programCache.reset(new ProgramCache()); |
||||||
|
return _programCache.get(); |
||||||
|
} |
||||||
|
|
||||||
|
ProgramCache::ProgramCache() |
||||||
|
{ |
||||||
|
codeCache.clear(); |
||||||
|
cacheSize = 0; |
||||||
|
} |
||||||
|
|
||||||
|
ProgramCache::~ProgramCache() |
||||||
|
{ |
||||||
|
releaseProgram(); |
||||||
|
} |
||||||
|
|
||||||
|
cl_program ProgramCache::progLookup(string srcsign) |
||||||
|
{ |
||||||
|
map<string, cl_program>::iterator iter; |
||||||
|
iter = codeCache.find(srcsign); |
||||||
|
if(iter != codeCache.end()) |
||||||
|
return iter->second; |
||||||
|
else |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
void ProgramCache::addProgram(string srcsign , cl_program program) |
||||||
|
{ |
||||||
|
if(!progLookup(srcsign)) |
||||||
|
{ |
||||||
|
codeCache.insert(map<string, cl_program>::value_type(srcsign, program)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void ProgramCache::releaseProgram() |
||||||
|
{ |
||||||
|
map<string, cl_program>::iterator iter; |
||||||
|
for(iter = codeCache.begin(); iter != codeCache.end(); iter++) |
||||||
|
{ |
||||||
|
openCLSafeCall(clReleaseProgram(iter->second)); |
||||||
|
} |
||||||
|
codeCache.clear(); |
||||||
|
cacheSize = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static int enable_disk_cache = |
||||||
|
#ifdef _DEBUG |
||||||
|
false; |
||||||
|
#else |
||||||
|
true; |
||||||
|
#endif |
||||||
|
static int update_disk_cache = false; |
||||||
|
static String binpath = ""; |
||||||
|
|
||||||
|
void setBinaryDiskCache(int mode, String path) |
||||||
|
{ |
||||||
|
if(mode == CACHE_NONE) |
||||||
|
{ |
||||||
|
update_disk_cache = 0; |
||||||
|
enable_disk_cache = 0; |
||||||
|
return; |
||||||
|
} |
||||||
|
update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; |
||||||
|
enable_disk_cache |= |
||||||
|
#ifdef _DEBUG |
||||||
|
(mode & CACHE_DEBUG) == CACHE_DEBUG; |
||||||
|
#else |
||||||
|
(mode & CACHE_RELEASE) == CACHE_RELEASE; |
||||||
|
#endif |
||||||
|
if(enable_disk_cache && !path.empty()) |
||||||
|
{ |
||||||
|
binpath = path; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void setBinpath(const char *path) |
||||||
|
{ |
||||||
|
binpath = path; |
||||||
|
} |
||||||
|
|
||||||
|
int savetofile(const Context*, cl_program &program, const char *fileName) |
||||||
|
{ |
||||||
|
size_t binarySize; |
||||||
|
openCLSafeCall(clGetProgramInfo(program, |
||||||
|
CL_PROGRAM_BINARY_SIZES, |
||||||
|
sizeof(size_t), |
||||||
|
&binarySize, NULL)); |
||||||
|
char* binary = (char*)malloc(binarySize); |
||||||
|
if(binary == NULL) |
||||||
|
{ |
||||||
|
CV_Error(CV_StsNoMem, "Failed to allocate host memory."); |
||||||
|
} |
||||||
|
openCLSafeCall(clGetProgramInfo(program, |
||||||
|
CL_PROGRAM_BINARIES, |
||||||
|
sizeof(char *), |
||||||
|
&binary, |
||||||
|
NULL)); |
||||||
|
|
||||||
|
FILE *fp = fopen(fileName, "wb+"); |
||||||
|
if(fp != NULL) |
||||||
|
{ |
||||||
|
fwrite(binary, binarySize, 1, fp); |
||||||
|
free(binary); |
||||||
|
fclose(fp); |
||||||
|
} |
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName, |
||||||
|
const char *build_options) |
||||||
|
{ |
||||||
|
cl_program program; |
||||||
|
cl_int status = 0; |
||||||
|
stringstream src_sign; |
||||||
|
string srcsign; |
||||||
|
string filename; |
||||||
|
|
||||||
|
if (NULL != build_options) |
||||||
|
{ |
||||||
|
src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
src_sign << (int64)(*source) << getClContext(ctx); |
||||||
|
} |
||||||
|
srcsign = src_sign.str(); |
||||||
|
|
||||||
|
program = NULL; |
||||||
|
program = ProgramCache::getProgramCache()->progLookup(srcsign); |
||||||
|
|
||||||
|
if (!program) |
||||||
|
{ |
||||||
|
//config build programs
|
||||||
|
std::string all_build_options; |
||||||
|
if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) |
||||||
|
all_build_options += ctx->getDeviceInfo().compilationExtraOptions; |
||||||
|
if (build_options != NULL) |
||||||
|
{ |
||||||
|
all_build_options += " "; |
||||||
|
all_build_options += build_options; |
||||||
|
} |
||||||
|
filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb"; |
||||||
|
|
||||||
|
FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; |
||||||
|
if(fp == NULL || update_disk_cache) |
||||||
|
{ |
||||||
|
if(fp != NULL) |
||||||
|
fclose(fp); |
||||||
|
|
||||||
|
program = clCreateProgramWithSource( |
||||||
|
getClContext(ctx), 1, source, NULL, &status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
cl_device_id device = getClDeviceID(ctx); |
||||||
|
status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); |
||||||
|
if(status == CL_SUCCESS && enable_disk_cache) |
||||||
|
savetofile(ctx, program, filename.c_str()); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
fseek(fp, 0, SEEK_END); |
||||||
|
size_t binarySize = ftell(fp); |
||||||
|
fseek(fp, 0, SEEK_SET); |
||||||
|
char *binary = new char[binarySize]; |
||||||
|
CV_Assert(1 == fread(binary, binarySize, 1, fp)); |
||||||
|
fclose(fp); |
||||||
|
cl_int status = 0; |
||||||
|
cl_device_id device = getClDeviceID(ctx); |
||||||
|
program = clCreateProgramWithBinary(getClContext(ctx), |
||||||
|
1, |
||||||
|
&device, |
||||||
|
(const size_t *)&binarySize, |
||||||
|
(const unsigned char **)&binary, |
||||||
|
NULL, |
||||||
|
&status); |
||||||
|
openCLVerifyCall(status); |
||||||
|
status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); |
||||||
|
delete[] binary; |
||||||
|
} |
||||||
|
|
||||||
|
if(status != CL_SUCCESS) |
||||||
|
{ |
||||||
|
if(status == CL_BUILD_PROGRAM_FAILURE) |
||||||
|
{ |
||||||
|
cl_int logStatus; |
||||||
|
char *buildLog = NULL; |
||||||
|
size_t buildLogSize = 0; |
||||||
|
logStatus = clGetProgramBuildInfo(program, |
||||||
|
getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, |
||||||
|
buildLog, &buildLogSize); |
||||||
|
if(logStatus != CL_SUCCESS) |
||||||
|
std::cout << "Failed to build the program and get the build info." << endl; |
||||||
|
buildLog = new char[buildLogSize]; |
||||||
|
CV_DbgAssert(!!buildLog); |
||||||
|
memset(buildLog, 0, buildLogSize); |
||||||
|
openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), |
||||||
|
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); |
||||||
|
std::cout << "\n\t\t\tBUILD LOG\n"; |
||||||
|
std::cout << buildLog << endl; |
||||||
|
delete [] buildLog; |
||||||
|
} |
||||||
|
openCLVerifyCall(status); |
||||||
|
} |
||||||
|
//Cache the binary for future use if build_options is null
|
||||||
|
if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) |
||||||
|
this->addProgram(srcsign, program); |
||||||
|
else |
||||||
|
cout << "Warning: code cache has been full.\n"; |
||||||
|
} |
||||||
|
return program; |
||||||
|
} |
||||||
|
|
||||||
|
//// Converts the contents of a file into a string
|
||||||
|
//static int convertToString(const char *filename, std::string& s)
|
||||||
|
//{
|
||||||
|
// size_t size;
|
||||||
|
// char* str;
|
||||||
|
//
|
||||||
|
// std::fstream f(filename, (std::fstream::in | std::fstream::binary));
|
||||||
|
// if(f.is_open())
|
||||||
|
// {
|
||||||
|
// size_t fileSize;
|
||||||
|
// f.seekg(0, std::fstream::end);
|
||||||
|
// size = fileSize = (size_t)f.tellg();
|
||||||
|
// f.seekg(0, std::fstream::beg);
|
||||||
|
//
|
||||||
|
// str = new char[size+1];
|
||||||
|
// if(!str)
|
||||||
|
// {
|
||||||
|
// f.close();
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// f.read(str, fileSize);
|
||||||
|
// f.close();
|
||||||
|
// str[size] = '\0';
|
||||||
|
//
|
||||||
|
// s = str;
|
||||||
|
// delete[] str;
|
||||||
|
// return 0;
|
||||||
|
// }
|
||||||
|
// printf("Error: Failed to open file %s\n", filename);
|
||||||
|
// return -1;
|
||||||
|
//}
|
||||||
|
|
||||||
|
} // namespace ocl
|
||||||
|
} // namespace cv
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue