mirror of https://github.com/opencv/opencv.git
commit
ecb6c20915
279 changed files with 28573 additions and 16105 deletions
@ -1,2 +1,6 @@ |
|||||||
add_subdirectory(engine) |
if(BUILD_ANDROID_SERVICE) |
||||||
#add_subdirectory(engine_test) |
add_subdirectory(engine) |
||||||
|
#add_subdirectory(engine_test) |
||||||
|
endif() |
||||||
|
|
||||||
|
install(FILES "readme.txt" DESTINATION "apk/" COMPONENT main) |
||||||
|
@ -1,22 +0,0 @@ |
|||||||
*************** |
|
||||||
Package Content |
|
||||||
*************** |
|
||||||
|
|
||||||
The package provides new OpenCV SDK that uses OpenCV Manager for library initialization. OpenCV Manager provides the following benefits: |
|
||||||
|
|
||||||
* Less memory usage. All apps use the same binaries from service and do not keep native libs inside them self; |
|
||||||
* Hardware specific optimizations for all supported platforms; |
|
||||||
* Trusted OpenCV library source. All packages with OpenCV are published on Google Play service; |
|
||||||
* Regular updates and bug fixes; |
|
||||||
|
|
||||||
Package consists from Library Project for Java development with Eclipse, C++ headers and libraries for native application development, javadoc samples and prebuilt binaries for ARM and X86 platforms. |
|
||||||
To try new SDK on serial device with Google Play just install sample package and follow application messages (Google Play service access will be needed). |
|
||||||
TO start example on device without Google Play you need to install OpenCV manager package and OpenCV binary pack for your platform from apk folder before. |
|
||||||
See docs/doc/tutorials/introduction/android_binary_package/android_binary_package.html and docs/android/refmain.html for details about service. |
|
||||||
On-line documentation will be available at address: http://docs.opencv.org/trunk |
|
||||||
|
|
||||||
******** |
|
||||||
Contacts |
|
||||||
******** |
|
||||||
|
|
||||||
Please send all feedback to Alexander Smorkalov mailto: alexander.smorkalov@itseez.com |
|
@ -0,0 +1,28 @@ |
|||||||
|
OpenCV Manager selection |
||||||
|
======================== |
||||||
|
|
||||||
|
Since version 1.7 several packages of OpenCV Manager is built. Every package includes OpenCV library |
||||||
|
for package target platform. The internal library is used for most cases, except the rare one, when |
||||||
|
arm-v7a without NEON instruction set processor is detected. In this case additional binary package |
||||||
|
for arm-v7a is used. The new package selection logic in most cases simplifies OpenCV installation |
||||||
|
on end user devices. In most cases OpenCV Manager may be installed automatically from Google Play. |
||||||
|
For such case, when Google Play is not available, i.e. emulator, developer board, etc, you can |
||||||
|
install it manually using adb tool: |
||||||
|
|
||||||
|
adb install OpenCV-2.4.3-android-sdk/apk/OpenCV_2.4.3.2_Manager_2.4_<platform_name>.apk |
||||||
|
|
||||||
|
Use table to determine right OpenCV Manager package: |
||||||
|
|
||||||
|
+----------------------------+-----------------+-----------------------------------------------------+ |
||||||
|
| Hardware Platform | Android version | Package name | |
||||||
|
+============================+=================+=====================================================+ |
||||||
|
| Intel x86 | >= 2.3 | OpenCV_2.4.3.2_Manager_2.4_x86.apk | |
||||||
|
+----------------------------+-----------------+-----------------------------------------------------+ |
||||||
|
| MIPS | >= 2.3 | OpenCV_2.4.3.2_Manager_2.4_mips.apk | |
||||||
|
+----------------------------+-----------------+-----------------------------------------------------+ |
||||||
|
| armeabi (arm-v5, arm-v6) | >= 2.3 | OpenCV_2.4.3.2_Manager_2.4_armeabi.apk | |
||||||
|
+----------------------------+-----------------+-----------------------------------------------------+ |
||||||
|
| armeabi-v7a (arm-v7a-NEON) | >= 2.3 | OpenCV_2.4.3.2_Manager_2.4_armv7a-neon.apk | |
||||||
|
+----------------------------+-----------------+-----------------------------------------------------+ |
||||||
|
| armeabi-v7a (arm-v7a-NEON) | 2.2 | OpenCV_2.4.3.2_Manager_2.4_armv7a-neon-android8.apk | |
||||||
|
+----------------------------+-----------------+-----------------------------------------------------+ |
Before Width: | Height: | Size: 16 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,10 @@ |
|||||||
|
cmake_minimum_required(VERSION 2.8.3) |
||||||
|
|
||||||
|
project(nv_perf_test) |
||||||
|
|
||||||
|
find_package(OpenCV REQUIRED) |
||||||
|
include_directories(${OpenCV_INCLUDE_DIR}) |
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME} main.cpp) |
||||||
|
|
||||||
|
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) |
After Width: | Height: | Size: 140 KiB |
After Width: | Height: | Size: 140 KiB |
@ -0,0 +1,489 @@ |
|||||||
|
#include <cstdio> |
||||||
|
#define HAVE_CUDA 1 |
||||||
|
#include <opencv2/core/core.hpp> |
||||||
|
#include <opencv2/gpu/gpu.hpp> |
||||||
|
#include <opencv2/highgui/highgui.hpp> |
||||||
|
#include <opencv2/video/video.hpp> |
||||||
|
#include <opencv2/legacy/legacy.hpp> |
||||||
|
#include <opencv2/ts/ts.hpp> |
||||||
|
#include <opencv2/ts/ts_perf.hpp> |
||||||
|
|
||||||
|
static void printOsInfo() |
||||||
|
{ |
||||||
|
#if defined _WIN32 |
||||||
|
# if defined _WIN64 |
||||||
|
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout); |
||||||
|
# else |
||||||
|
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout); |
||||||
|
# endif |
||||||
|
#elif defined linux |
||||||
|
# if defined _LP64 |
||||||
|
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout); |
||||||
|
# else |
||||||
|
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout); |
||||||
|
# endif |
||||||
|
#elif defined __APPLE__ |
||||||
|
# if defined _LP64 |
||||||
|
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout); |
||||||
|
# else |
||||||
|
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout); |
||||||
|
# endif |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
static void printCudaInfo() |
||||||
|
{ |
||||||
|
const int deviceCount = cv::gpu::getCudaEnabledDeviceCount(); |
||||||
|
|
||||||
|
printf("[----------]\n"); fflush(stdout); |
||||||
|
printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout); |
||||||
|
printf("[----------]\n"); fflush(stdout); |
||||||
|
|
||||||
|
for (int i = 0; i < deviceCount; ++i) |
||||||
|
{ |
||||||
|
cv::gpu::DeviceInfo info(i); |
||||||
|
|
||||||
|
printf("[----------]\n"); fflush(stdout); |
||||||
|
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout); |
||||||
|
printf("[ ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout); |
||||||
|
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()); fflush(stdout); |
||||||
|
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout); |
||||||
|
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)); fflush(stdout); |
||||||
|
if (!info.isCompatible()) |
||||||
|
printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n"); |
||||||
|
printf("[----------]\n"); fflush(stdout); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
int main(int argc, char* argv[]) |
||||||
|
{ |
||||||
|
printOsInfo(); |
||||||
|
printCudaInfo(); |
||||||
|
|
||||||
|
perf::Regression::Init("nv_perf_test"); |
||||||
|
perf::TestBase::Init(argc, argv); |
||||||
|
testing::InitGoogleTest(&argc, argv); |
||||||
|
|
||||||
|
return RUN_ALL_TESTS(); |
||||||
|
} |
||||||
|
|
||||||
|
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name |
||||||
|
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// HoughLinesP
|
||||||
|
|
||||||
|
DEF_PARAM_TEST_1(Image, std::string); |
||||||
|
|
||||||
|
PERF_TEST_P(Image, HoughLinesP, |
||||||
|
testing::Values(std::string("im1_1280x800.jpg"))) |
||||||
|
{ |
||||||
|
declare.time(30.0); |
||||||
|
|
||||||
|
std::string fileName = GetParam(); |
||||||
|
|
||||||
|
const double rho = 1.0; |
||||||
|
const double theta = 1.0; |
||||||
|
const int threshold = 40; |
||||||
|
const int minLineLenght = 20; |
||||||
|
const int maxLineGap = 5; |
||||||
|
|
||||||
|
cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE); |
||||||
|
|
||||||
|
if (PERF_RUN_GPU()) |
||||||
|
{ |
||||||
|
cv::gpu::GpuMat d_image(image); |
||||||
|
cv::gpu::GpuMat d_lines; |
||||||
|
cv::gpu::HoughLinesBuf d_buf; |
||||||
|
|
||||||
|
cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
cv::Mat mask; |
||||||
|
cv::Canny(image, mask, 50, 100); |
||||||
|
|
||||||
|
std::vector<cv::Vec4i> lines; |
||||||
|
cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(0); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// GoodFeaturesToTrack
|
||||||
|
|
||||||
|
DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth); |
||||||
|
|
||||||
|
PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(std::string("im1_1280x800.jpg")), |
||||||
|
testing::Values(CV_8U, CV_16U) |
||||||
|
)) |
||||||
|
{ |
||||||
|
declare.time(60); |
||||||
|
|
||||||
|
const std::string fileName = std::tr1::get<0>(GetParam()); |
||||||
|
const int depth = std::tr1::get<1>(GetParam()); |
||||||
|
|
||||||
|
const int maxCorners = 5000; |
||||||
|
const double qualityLevel = 0.05; |
||||||
|
const int minDistance = 5; |
||||||
|
const int blockSize = 3; |
||||||
|
const bool useHarrisDetector = true; |
||||||
|
const double k = 0.05; |
||||||
|
|
||||||
|
cv::Mat src = cv::imread(fileName, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileName << "]"; |
||||||
|
|
||||||
|
if (depth != CV_8U) |
||||||
|
src.convertTo(src, depth); |
||||||
|
|
||||||
|
cv::Mat mask(src.size(), CV_8UC1, cv::Scalar::all(1)); |
||||||
|
mask(cv::Rect(0, 0, 100, 100)).setTo(cv::Scalar::all(0)); |
||||||
|
|
||||||
|
if (PERF_RUN_GPU()) |
||||||
|
{ |
||||||
|
cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k); |
||||||
|
|
||||||
|
cv::gpu::GpuMat d_src(src); |
||||||
|
cv::gpu::GpuMat d_mask(mask); |
||||||
|
cv::gpu::GpuMat d_pts; |
||||||
|
|
||||||
|
d_detector(d_src, d_pts, d_mask); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
d_detector(d_src, d_pts, d_mask); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
if (depth != CV_8U) |
||||||
|
FAIL() << "Unsupported depth"; |
||||||
|
|
||||||
|
cv::Mat pts; |
||||||
|
|
||||||
|
cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(0); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// OpticalFlowPyrLKSparse
|
||||||
|
|
||||||
|
typedef std::pair<std::string, std::string> string_pair; |
||||||
|
|
||||||
|
DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool); |
||||||
|
|
||||||
|
PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||||
|
testing::Values(CV_8U, CV_16U), |
||||||
|
testing::Bool() |
||||||
|
)) |
||||||
|
{ |
||||||
|
declare.time(60); |
||||||
|
|
||||||
|
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||||
|
const int depth = std::tr1::get<1>(GetParam()); |
||||||
|
const bool graySource = std::tr1::get<2>(GetParam()); |
||||||
|
|
||||||
|
// PyrLK params
|
||||||
|
const cv::Size winSize(15, 15); |
||||||
|
const int maxLevel = 5; |
||||||
|
const cv::TermCriteria criteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01); |
||||||
|
|
||||||
|
// GoodFeaturesToTrack params
|
||||||
|
const int maxCorners = 5000; |
||||||
|
const double qualityLevel = 0.05; |
||||||
|
const int minDistance = 5; |
||||||
|
const int blockSize = 3; |
||||||
|
const bool useHarrisDetector = true; |
||||||
|
const double k = 0.05; |
||||||
|
|
||||||
|
cv::Mat src1 = cv::imread(fileNames.first, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); |
||||||
|
if (src1.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||||
|
|
||||||
|
cv::Mat src2 = cv::imread(fileNames.second, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); |
||||||
|
if (src2.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||||
|
|
||||||
|
cv::Mat gray_src; |
||||||
|
if (graySource) |
||||||
|
gray_src = src1; |
||||||
|
else |
||||||
|
cv::cvtColor(src1, gray_src, cv::COLOR_BGR2GRAY); |
||||||
|
|
||||||
|
cv::Mat pts; |
||||||
|
cv::goodFeaturesToTrack(gray_src, pts, maxCorners, qualityLevel, minDistance, cv::noArray(), blockSize, useHarrisDetector, k); |
||||||
|
|
||||||
|
if (depth != CV_8U) |
||||||
|
{ |
||||||
|
src1.convertTo(src1, depth); |
||||||
|
src2.convertTo(src2, depth); |
||||||
|
} |
||||||
|
|
||||||
|
if (PERF_RUN_GPU()) |
||||||
|
{ |
||||||
|
cv::gpu::GpuMat d_src1(src1); |
||||||
|
cv::gpu::GpuMat d_src2(src2); |
||||||
|
cv::gpu::GpuMat d_pts(pts.reshape(2, 1)); |
||||||
|
cv::gpu::GpuMat d_nextPts; |
||||||
|
cv::gpu::GpuMat d_status; |
||||||
|
|
||||||
|
cv::gpu::PyrLKOpticalFlow d_pyrLK; |
||||||
|
d_pyrLK.winSize = winSize; |
||||||
|
d_pyrLK.maxLevel = maxLevel; |
||||||
|
d_pyrLK.iters = criteria.maxCount; |
||||||
|
d_pyrLK.useInitialFlow = false; |
||||||
|
|
||||||
|
d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
if (depth != CV_8U) |
||||||
|
FAIL() << "Unsupported depth"; |
||||||
|
|
||||||
|
cv::Mat nextPts; |
||||||
|
cv::Mat status; |
||||||
|
|
||||||
|
cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(0); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// OpticalFlowFarneback
|
||||||
|
|
||||||
|
DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth); |
||||||
|
|
||||||
|
PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||||
|
testing::Values(CV_8U, CV_16U) |
||||||
|
)) |
||||||
|
{ |
||||||
|
declare.time(500); |
||||||
|
|
||||||
|
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||||
|
const int depth = std::tr1::get<1>(GetParam()); |
||||||
|
|
||||||
|
const double pyrScale = 0.5; |
||||||
|
const int numLevels = 6; |
||||||
|
const int winSize = 7; |
||||||
|
const int numIters = 15; |
||||||
|
const int polyN = 7; |
||||||
|
const double polySigma = 1.5; |
||||||
|
const int flags = cv::OPTFLOW_USE_INITIAL_FLOW; |
||||||
|
|
||||||
|
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src1.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||||
|
|
||||||
|
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src2.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||||
|
|
||||||
|
if (depth != CV_8U) |
||||||
|
{ |
||||||
|
src1.convertTo(src1, depth); |
||||||
|
src2.convertTo(src2, depth); |
||||||
|
} |
||||||
|
|
||||||
|
if (PERF_RUN_GPU()) |
||||||
|
{ |
||||||
|
cv::gpu::GpuMat d_src1(src1); |
||||||
|
cv::gpu::GpuMat d_src2(src2); |
||||||
|
cv::gpu::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0)); |
||||||
|
cv::gpu::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0)); |
||||||
|
|
||||||
|
cv::gpu::FarnebackOpticalFlow d_farneback; |
||||||
|
d_farneback.pyrScale = pyrScale; |
||||||
|
d_farneback.numLevels = numLevels; |
||||||
|
d_farneback.winSize = winSize; |
||||||
|
d_farneback.numIters = numIters; |
||||||
|
d_farneback.polyN = polyN; |
||||||
|
d_farneback.polySigma = polySigma; |
||||||
|
d_farneback.flags = flags; |
||||||
|
|
||||||
|
d_farneback(d_src1, d_src2, d_u, d_v); |
||||||
|
|
||||||
|
TEST_CYCLE_N(10) |
||||||
|
{ |
||||||
|
d_farneback(d_src1, d_src2, d_u, d_v); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
if (depth != CV_8U) |
||||||
|
FAIL() << "Unsupported depth"; |
||||||
|
|
||||||
|
cv::Mat flow(src1.size(), CV_32FC2, cv::Scalar::all(0)); |
||||||
|
|
||||||
|
cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags); |
||||||
|
|
||||||
|
TEST_CYCLE_N(10) |
||||||
|
{ |
||||||
|
cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(0); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// OpticalFlowBM
|
||||||
|
|
||||||
|
void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr, |
||||||
|
cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious, |
||||||
|
cv::Mat& velx, cv::Mat& vely) |
||||||
|
{ |
||||||
|
cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height); |
||||||
|
|
||||||
|
velx.create(sz, CV_32FC1); |
||||||
|
vely.create(sz, CV_32FC1); |
||||||
|
|
||||||
|
CvMat cvprev = prev; |
||||||
|
CvMat cvcurr = curr; |
||||||
|
|
||||||
|
CvMat cvvelx = velx; |
||||||
|
CvMat cvvely = vely; |
||||||
|
|
||||||
|
cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely); |
||||||
|
} |
||||||
|
|
||||||
|
DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size); |
||||||
|
|
||||||
|
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||||
|
testing::Values(cv::Size(16, 16)), |
||||||
|
testing::Values(cv::Size(2, 2)), |
||||||
|
testing::Values(cv::Size(16, 16)) |
||||||
|
)) |
||||||
|
{ |
||||||
|
declare.time(1000); |
||||||
|
|
||||||
|
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||||
|
const cv::Size block_size = std::tr1::get<1>(GetParam()); |
||||||
|
const cv::Size shift_size = std::tr1::get<2>(GetParam()); |
||||||
|
const cv::Size max_range = std::tr1::get<3>(GetParam()); |
||||||
|
|
||||||
|
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src1.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||||
|
|
||||||
|
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src2.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||||
|
|
||||||
|
if (PERF_RUN_GPU()) |
||||||
|
{ |
||||||
|
cv::gpu::GpuMat d_src1(src1); |
||||||
|
cv::gpu::GpuMat d_src2(src2); |
||||||
|
cv::gpu::GpuMat d_velx, d_vely, buf; |
||||||
|
|
||||||
|
cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf); |
||||||
|
|
||||||
|
TEST_CYCLE_N(10) |
||||||
|
{ |
||||||
|
cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
cv::Mat velx, vely; |
||||||
|
|
||||||
|
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||||
|
|
||||||
|
TEST_CYCLE_N(10) |
||||||
|
{ |
||||||
|
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(0); |
||||||
|
} |
||||||
|
|
||||||
|
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||||
|
testing::Values(cv::Size(16, 16)), |
||||||
|
testing::Values(cv::Size(1, 1)), |
||||||
|
testing::Values(cv::Size(16, 16)) |
||||||
|
)) |
||||||
|
{ |
||||||
|
declare.time(1000); |
||||||
|
|
||||||
|
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||||
|
const cv::Size block_size = std::tr1::get<1>(GetParam()); |
||||||
|
const cv::Size shift_size = std::tr1::get<2>(GetParam()); |
||||||
|
const cv::Size max_range = std::tr1::get<3>(GetParam()); |
||||||
|
|
||||||
|
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src1.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||||
|
|
||||||
|
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); |
||||||
|
if (src2.empty()) |
||||||
|
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||||
|
|
||||||
|
if (PERF_RUN_GPU()) |
||||||
|
{ |
||||||
|
cv::gpu::GpuMat d_src1(src1); |
||||||
|
cv::gpu::GpuMat d_src2(src2); |
||||||
|
cv::gpu::GpuMat d_velx, d_vely; |
||||||
|
|
||||||
|
cv::gpu::FastOpticalFlowBM fastBM; |
||||||
|
|
||||||
|
fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); |
||||||
|
|
||||||
|
TEST_CYCLE_N(10) |
||||||
|
{ |
||||||
|
fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); |
||||||
|
} |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
cv::Mat velx, vely; |
||||||
|
|
||||||
|
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||||
|
|
||||||
|
TEST_CYCLE_N(10) |
||||||
|
{ |
||||||
|
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(0); |
||||||
|
} |
@ -0,0 +1,361 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_GPU_REDUCE_DETAIL_HPP__ |
||||||
|
#define __OPENCV_GPU_REDUCE_DETAIL_HPP__ |
||||||
|
|
||||||
|
#include <thrust/tuple.h> |
||||||
|
#include "../warp.hpp" |
||||||
|
#include "../warp_shuffle.hpp" |
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace device |
||||||
|
{ |
||||||
|
namespace reduce_detail |
||||||
|
{ |
||||||
|
template <typename T> struct GetType; |
||||||
|
template <typename T> struct GetType<T*> |
||||||
|
{ |
||||||
|
typedef T type; |
||||||
|
}; |
||||||
|
template <typename T> struct GetType<volatile T*> |
||||||
|
{ |
||||||
|
typedef T type; |
||||||
|
}; |
||||||
|
template <typename T> struct GetType<T&> |
||||||
|
{ |
||||||
|
typedef T type; |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int I, unsigned int N> |
||||||
|
struct For |
||||||
|
{ |
||||||
|
template <class PointerTuple, class ValTuple> |
||||||
|
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) |
||||||
|
{ |
||||||
|
thrust::get<I>(smem)[tid] = thrust::get<I>(val); |
||||||
|
|
||||||
|
For<I + 1, N>::loadToSmem(smem, val, tid); |
||||||
|
} |
||||||
|
template <class PointerTuple, class ValTuple> |
||||||
|
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) |
||||||
|
{ |
||||||
|
thrust::get<I>(val) = thrust::get<I>(smem)[tid]; |
||||||
|
|
||||||
|
For<I + 1, N>::loadFromSmem(smem, val, tid); |
||||||
|
} |
||||||
|
|
||||||
|
template <class PointerTuple, class ValTuple, class OpTuple> |
||||||
|
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) |
||||||
|
{ |
||||||
|
typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; |
||||||
|
thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); |
||||||
|
|
||||||
|
For<I + 1, N>::merge(smem, val, tid, delta, op); |
||||||
|
} |
||||||
|
template <class ValTuple, class OpTuple> |
||||||
|
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op) |
||||||
|
{ |
||||||
|
typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width); |
||||||
|
thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); |
||||||
|
|
||||||
|
For<I + 1, N>::mergeShfl(val, delta, width, op); |
||||||
|
} |
||||||
|
}; |
||||||
|
template <unsigned int N> |
||||||
|
struct For<N, N> |
||||||
|
{ |
||||||
|
template <class PointerTuple, class ValTuple> |
||||||
|
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
template <class PointerTuple, class ValTuple> |
||||||
|
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
|
||||||
|
template <class PointerTuple, class ValTuple, class OpTuple> |
||||||
|
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&) |
||||||
|
{ |
||||||
|
} |
||||||
|
template <class ValTuple, class OpTuple> |
||||||
|
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&) |
||||||
|
{ |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid) |
||||||
|
{ |
||||||
|
smem[tid] = val; |
||||||
|
} |
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid) |
||||||
|
{ |
||||||
|
val = smem[tid]; |
||||||
|
} |
||||||
|
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, |
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9> |
||||||
|
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, |
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, |
||||||
|
unsigned int tid) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid); |
||||||
|
} |
||||||
|
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, |
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9> |
||||||
|
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, |
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, |
||||||
|
unsigned int tid) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T, class Op> |
||||||
|
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op) |
||||||
|
{ |
||||||
|
T reg = smem[tid + delta]; |
||||||
|
smem[tid] = val = op(val, reg); |
||||||
|
} |
||||||
|
template <typename T, class Op> |
||||||
|
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op) |
||||||
|
{ |
||||||
|
T reg = shfl_down(val, delta, width); |
||||||
|
val = op(val, reg); |
||||||
|
} |
||||||
|
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, |
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9, |
||||||
|
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9> |
||||||
|
__device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, |
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, |
||||||
|
unsigned int tid, |
||||||
|
unsigned int delta, |
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op); |
||||||
|
} |
||||||
|
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9, |
||||||
|
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9> |
||||||
|
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, |
||||||
|
unsigned int delta, |
||||||
|
unsigned int width, |
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op); |
||||||
|
} |
||||||
|
|
||||||
|
template <unsigned int N> struct Generic |
||||||
|
{ |
||||||
|
template <typename Pointer, typename Reference, class Op> |
||||||
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) |
||||||
|
{ |
||||||
|
loadToSmem(smem, val, tid); |
||||||
|
if (N >= 32) |
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
if (N >= 2048) |
||||||
|
{ |
||||||
|
if (tid < 1024) |
||||||
|
merge(smem, val, tid, 1024, op); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 1024) |
||||||
|
{ |
||||||
|
if (tid < 512) |
||||||
|
merge(smem, val, tid, 512, op); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 512) |
||||||
|
{ |
||||||
|
if (tid < 256) |
||||||
|
merge(smem, val, tid, 256, op); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 256) |
||||||
|
{ |
||||||
|
if (tid < 128) |
||||||
|
merge(smem, val, tid, 128, op); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 128) |
||||||
|
{ |
||||||
|
if (tid < 64) |
||||||
|
merge(smem, val, tid, 64, op); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 64) |
||||||
|
{ |
||||||
|
if (tid < 32) |
||||||
|
merge(smem, val, tid, 32, op); |
||||||
|
} |
||||||
|
|
||||||
|
if (tid < 16) |
||||||
|
{ |
||||||
|
merge(smem, val, tid, 16, op); |
||||||
|
merge(smem, val, tid, 8, op); |
||||||
|
merge(smem, val, tid, 4, op); |
||||||
|
merge(smem, val, tid, 2, op); |
||||||
|
merge(smem, val, tid, 1, op); |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int I, typename Pointer, typename Reference, class Op> |
||||||
|
struct Unroll |
||||||
|
{ |
||||||
|
static __device__ void loopShfl(Reference val, Op op, unsigned int N) |
||||||
|
{ |
||||||
|
mergeShfl(val, I, N, op); |
||||||
|
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N); |
||||||
|
} |
||||||
|
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op) |
||||||
|
{ |
||||||
|
merge(smem, val, tid, I, op); |
||||||
|
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); |
||||||
|
} |
||||||
|
}; |
||||||
|
template <typename Pointer, typename Reference, class Op> |
||||||
|
struct Unroll<0, Pointer, Reference, Op> |
||||||
|
{ |
||||||
|
static __device__ void loopShfl(Reference, Op, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
static __device__ void loop(Pointer, Reference, unsigned int, Op) |
||||||
|
{ |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct WarpOptimized |
||||||
|
{ |
||||||
|
template <typename Pointer, typename Reference, class Op> |
||||||
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
(void) smem; |
||||||
|
(void) tid; |
||||||
|
|
||||||
|
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N); |
||||||
|
#else |
||||||
|
loadToSmem(smem, val, tid); |
||||||
|
|
||||||
|
if (tid < N / 2) |
||||||
|
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); |
||||||
|
#endif |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct GenericOptimized32 |
||||||
|
{ |
||||||
|
enum { M = N / 32 }; |
||||||
|
|
||||||
|
template <typename Pointer, typename Reference, class Op> |
||||||
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) |
||||||
|
{ |
||||||
|
const unsigned int laneId = Warp::laneId(); |
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize); |
||||||
|
|
||||||
|
if (laneId == 0) |
||||||
|
loadToSmem(smem, val, tid / 32); |
||||||
|
#else |
||||||
|
loadToSmem(smem, val, tid); |
||||||
|
|
||||||
|
if (laneId < 16) |
||||||
|
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
if (laneId == 0) |
||||||
|
loadToSmem(smem, val, tid / 32); |
||||||
|
#endif |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
loadFromSmem(smem, val, tid); |
||||||
|
|
||||||
|
if (tid < 32) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M); |
||||||
|
#else |
||||||
|
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op); |
||||||
|
#endif |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <bool val, class T1, class T2> struct StaticIf; |
||||||
|
template <class T1, class T2> struct StaticIf<true, T1, T2> |
||||||
|
{ |
||||||
|
typedef T1 type; |
||||||
|
}; |
||||||
|
template <class T1, class T2> struct StaticIf<false, T1, T2> |
||||||
|
{ |
||||||
|
typedef T2 type; |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct IsPowerOf2 |
||||||
|
{ |
||||||
|
enum { value = ((N != 0) && !(N & (N - 1))) }; |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct Dispatcher |
||||||
|
{ |
||||||
|
typedef typename StaticIf< |
||||||
|
(N <= 32) && IsPowerOf2<N>::value, |
||||||
|
WarpOptimized<N>, |
||||||
|
typename StaticIf< |
||||||
|
(N <= 1024) && IsPowerOf2<N>::value, |
||||||
|
GenericOptimized32<N>, |
||||||
|
Generic<N> |
||||||
|
>::type |
||||||
|
>::type reductor; |
||||||
|
}; |
||||||
|
} |
||||||
|
}}} |
||||||
|
|
||||||
|
#endif // __OPENCV_GPU_REDUCE_DETAIL_HPP__
|
@ -0,0 +1,498 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_GPU_PRED_VAL_REDUCE_DETAIL_HPP__ |
||||||
|
#define __OPENCV_GPU_PRED_VAL_REDUCE_DETAIL_HPP__ |
||||||
|
|
||||||
|
#include <thrust/tuple.h> |
||||||
|
#include "../warp.hpp" |
||||||
|
#include "../warp_shuffle.hpp" |
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace device |
||||||
|
{ |
||||||
|
namespace reduce_key_val_detail |
||||||
|
{ |
||||||
|
template <typename T> struct GetType; |
||||||
|
template <typename T> struct GetType<T*> |
||||||
|
{ |
||||||
|
typedef T type; |
||||||
|
}; |
||||||
|
template <typename T> struct GetType<volatile T*> |
||||||
|
{ |
||||||
|
typedef T type; |
||||||
|
}; |
||||||
|
template <typename T> struct GetType<T&> |
||||||
|
{ |
||||||
|
typedef T type; |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int I, unsigned int N> |
||||||
|
struct For |
||||||
|
{ |
||||||
|
template <class PointerTuple, class ReferenceTuple> |
||||||
|
static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) |
||||||
|
{ |
||||||
|
thrust::get<I>(smem)[tid] = thrust::get<I>(data); |
||||||
|
|
||||||
|
For<I + 1, N>::loadToSmem(smem, data, tid); |
||||||
|
} |
||||||
|
template <class PointerTuple, class ReferenceTuple> |
||||||
|
static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) |
||||||
|
{ |
||||||
|
thrust::get<I>(data) = thrust::get<I>(smem)[tid]; |
||||||
|
|
||||||
|
For<I + 1, N>::loadFromSmem(smem, data, tid); |
||||||
|
} |
||||||
|
|
||||||
|
template <class ReferenceTuple> |
||||||
|
static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width) |
||||||
|
{ |
||||||
|
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width); |
||||||
|
|
||||||
|
For<I + 1, N>::copyShfl(val, delta, width); |
||||||
|
} |
||||||
|
template <class PointerTuple, class ReferenceTuple> |
||||||
|
static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta]; |
||||||
|
|
||||||
|
For<I + 1, N>::copy(svals, val, tid, delta); |
||||||
|
} |
||||||
|
|
||||||
|
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple> |
||||||
|
static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width) |
||||||
|
{ |
||||||
|
typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width); |
||||||
|
|
||||||
|
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key))) |
||||||
|
{ |
||||||
|
thrust::get<I>(key) = reg; |
||||||
|
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width); |
||||||
|
} |
||||||
|
|
||||||
|
For<I + 1, N>::mergeShfl(key, val, cmp, delta, width); |
||||||
|
} |
||||||
|
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple> |
||||||
|
static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key, |
||||||
|
const ValPointerTuple& svals, const ValReferenceTuple& val, |
||||||
|
const CmpTuple& cmp, |
||||||
|
unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta]; |
||||||
|
|
||||||
|
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key))) |
||||||
|
{ |
||||||
|
thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg; |
||||||
|
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta]; |
||||||
|
} |
||||||
|
|
||||||
|
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta); |
||||||
|
} |
||||||
|
}; |
||||||
|
template <unsigned int N> |
||||||
|
struct For<N, N> |
||||||
|
{ |
||||||
|
template <class PointerTuple, class ReferenceTuple> |
||||||
|
static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
template <class PointerTuple, class ReferenceTuple> |
||||||
|
static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
|
||||||
|
template <class ReferenceTuple> |
||||||
|
static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int) |
||||||
|
{ |
||||||
|
} |
||||||
|
template <class PointerTuple, class ReferenceTuple> |
||||||
|
static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
|
||||||
|
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple> |
||||||
|
static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int) |
||||||
|
{ |
||||||
|
} |
||||||
|
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple> |
||||||
|
static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&, |
||||||
|
const ValPointerTuple&, const ValReferenceTuple&, |
||||||
|
const CmpTuple&, |
||||||
|
unsigned int, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// loadToSmem
|
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) |
||||||
|
{ |
||||||
|
smem[tid] = data; |
||||||
|
} |
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) |
||||||
|
{ |
||||||
|
data = smem[tid]; |
||||||
|
} |
||||||
|
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> |
||||||
|
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data, |
||||||
|
unsigned int tid) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid); |
||||||
|
} |
||||||
|
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> |
||||||
|
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data, |
||||||
|
unsigned int tid) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// copyVals
|
||||||
|
|
||||||
|
template <typename V> |
||||||
|
__device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width) |
||||||
|
{ |
||||||
|
val = shfl_down(val, delta, width); |
||||||
|
} |
||||||
|
template <typename V> |
||||||
|
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
svals[tid] = val = svals[tid + delta]; |
||||||
|
} |
||||||
|
template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> |
||||||
|
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
unsigned int delta, |
||||||
|
int width) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width); |
||||||
|
} |
||||||
|
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9> |
||||||
|
__device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// merge
|
||||||
|
|
||||||
|
template <typename K, typename V, class Cmp> |
||||||
|
__device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width) |
||||||
|
{ |
||||||
|
K reg = shfl_down(key, delta, width); |
||||||
|
|
||||||
|
if (cmp(reg, key)) |
||||||
|
{ |
||||||
|
key = reg; |
||||||
|
copyValsShfl(val, delta, width); |
||||||
|
} |
||||||
|
} |
||||||
|
template <typename K, typename V, class Cmp> |
||||||
|
__device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
K reg = skeys[tid + delta]; |
||||||
|
|
||||||
|
if (cmp(reg, key)) |
||||||
|
{ |
||||||
|
skeys[tid] = key = reg; |
||||||
|
copyVals(svals, val, tid, delta); |
||||||
|
} |
||||||
|
} |
||||||
|
template <typename K, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, |
||||||
|
class Cmp> |
||||||
|
__device__ __forceinline__ void mergeShfl(K& key, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
const Cmp& cmp, |
||||||
|
unsigned int delta, int width) |
||||||
|
{ |
||||||
|
K reg = shfl_down(key, delta, width); |
||||||
|
|
||||||
|
if (cmp(reg, key)) |
||||||
|
{ |
||||||
|
key = reg; |
||||||
|
copyValsShfl(val, delta, width); |
||||||
|
} |
||||||
|
} |
||||||
|
template <typename K, |
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, |
||||||
|
class Cmp> |
||||||
|
__device__ __forceinline__ void merge(volatile K* skeys, K& key, |
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
const Cmp& cmp, unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
K reg = skeys[tid + delta]; |
||||||
|
|
||||||
|
if (cmp(reg, key)) |
||||||
|
{ |
||||||
|
skeys[tid] = key = reg; |
||||||
|
copyVals(svals, val, tid, delta); |
||||||
|
} |
||||||
|
} |
||||||
|
template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, |
||||||
|
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9> |
||||||
|
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp, |
||||||
|
unsigned int delta, int width) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width); |
||||||
|
} |
||||||
|
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9, |
||||||
|
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9, |
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, |
||||||
|
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9> |
||||||
|
__device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys, |
||||||
|
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key, |
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp, |
||||||
|
unsigned int tid, unsigned int delta) |
||||||
|
{ |
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// Generic
|
||||||
|
|
||||||
|
template <unsigned int N> struct Generic |
||||||
|
{ |
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp> |
||||||
|
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) |
||||||
|
{ |
||||||
|
loadToSmem(skeys, key, tid); |
||||||
|
loadValsToSmem(svals, val, tid); |
||||||
|
if (N >= 32) |
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
if (N >= 2048) |
||||||
|
{ |
||||||
|
if (tid < 1024) |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 1024); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 1024) |
||||||
|
{ |
||||||
|
if (tid < 512) |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 512); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 512) |
||||||
|
{ |
||||||
|
if (tid < 256) |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 256); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 256) |
||||||
|
{ |
||||||
|
if (tid < 128) |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 128); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 128) |
||||||
|
{ |
||||||
|
if (tid < 64) |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 64); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
} |
||||||
|
if (N >= 64) |
||||||
|
{ |
||||||
|
if (tid < 32) |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 32); |
||||||
|
} |
||||||
|
|
||||||
|
if (tid < 16) |
||||||
|
{ |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 16); |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 8); |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 4); |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 2); |
||||||
|
merge(skeys, key, svals, val, cmp, tid, 1); |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp> |
||||||
|
struct Unroll |
||||||
|
{ |
||||||
|
static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N) |
||||||
|
{ |
||||||
|
mergeShfl(key, val, cmp, I, N); |
||||||
|
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N); |
||||||
|
} |
||||||
|
static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) |
||||||
|
{ |
||||||
|
merge(skeys, key, svals, val, cmp, tid, I); |
||||||
|
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); |
||||||
|
} |
||||||
|
}; |
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp> |
||||||
|
struct Unroll<0, KP, KR, VP, VR, Cmp> |
||||||
|
{ |
||||||
|
static __device__ void loopShfl(KR, VR, Cmp, unsigned int) |
||||||
|
{ |
||||||
|
} |
||||||
|
static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp) |
||||||
|
{ |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct WarpOptimized |
||||||
|
{ |
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp> |
||||||
|
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) |
||||||
|
{ |
||||||
|
#if 0 // __CUDA_ARCH__ >= 300
|
||||||
|
(void) skeys; |
||||||
|
(void) svals; |
||||||
|
(void) tid; |
||||||
|
|
||||||
|
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N); |
||||||
|
#else |
||||||
|
loadToSmem(skeys, key, tid); |
||||||
|
loadToSmem(svals, val, tid); |
||||||
|
|
||||||
|
if (tid < N / 2) |
||||||
|
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); |
||||||
|
#endif |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct GenericOptimized32 |
||||||
|
{ |
||||||
|
enum { M = N / 32 }; |
||||||
|
|
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp> |
||||||
|
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) |
||||||
|
{ |
||||||
|
const unsigned int laneId = Warp::laneId(); |
||||||
|
|
||||||
|
#if 0 // __CUDA_ARCH__ >= 300
|
||||||
|
Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize); |
||||||
|
|
||||||
|
if (laneId == 0) |
||||||
|
{ |
||||||
|
loadToSmem(skeys, key, tid / 32); |
||||||
|
loadToSmem(svals, val, tid / 32); |
||||||
|
} |
||||||
|
#else |
||||||
|
loadToSmem(skeys, key, tid); |
||||||
|
loadToSmem(svals, val, tid); |
||||||
|
|
||||||
|
if (laneId < 16) |
||||||
|
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
if (laneId == 0) |
||||||
|
{ |
||||||
|
loadToSmem(skeys, key, tid / 32); |
||||||
|
loadToSmem(svals, val, tid / 32); |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
loadFromSmem(skeys, key, tid); |
||||||
|
|
||||||
|
if (tid < 32) |
||||||
|
{ |
||||||
|
#if 0 // __CUDA_ARCH__ >= 300
|
||||||
|
loadFromSmem(svals, val, tid); |
||||||
|
|
||||||
|
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M); |
||||||
|
#else |
||||||
|
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); |
||||||
|
#endif |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
template <bool val, class T1, class T2> struct StaticIf; |
||||||
|
template <class T1, class T2> struct StaticIf<true, T1, T2> |
||||||
|
{ |
||||||
|
typedef T1 type; |
||||||
|
}; |
||||||
|
template <class T1, class T2> struct StaticIf<false, T1, T2> |
||||||
|
{ |
||||||
|
typedef T2 type; |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct IsPowerOf2 |
||||||
|
{ |
||||||
|
enum { value = ((N != 0) && !(N & (N - 1))) }; |
||||||
|
}; |
||||||
|
|
||||||
|
template <unsigned int N> struct Dispatcher |
||||||
|
{ |
||||||
|
typedef typename StaticIf< |
||||||
|
(N <= 32) && IsPowerOf2<N>::value, |
||||||
|
WarpOptimized<N>, |
||||||
|
typename StaticIf< |
||||||
|
(N <= 1024) && IsPowerOf2<N>::value, |
||||||
|
GenericOptimized32<N>, |
||||||
|
Generic<N> |
||||||
|
>::type |
||||||
|
>::type reductor; |
||||||
|
}; |
||||||
|
} |
||||||
|
}}} |
||||||
|
|
||||||
|
#endif // __OPENCV_GPU_PRED_VAL_REDUCE_DETAIL_HPP__
|
@ -1,841 +0,0 @@ |
|||||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
||||||
//
|
|
||||||
// By downloading, copying, installing or using the software you agree to this license.
|
|
||||||
// If you do not agree to this license, do not download, install,
|
|
||||||
// copy or use the software.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// License Agreement
|
|
||||||
// For Open Source Computer Vision Library
|
|
||||||
//
|
|
||||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
|
||||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
|
||||||
// Third party copyrights are property of their respective owners.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
// are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// * Redistribution's of source code must retain the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// * The name of the copyright holders may not be used to endorse or promote products
|
|
||||||
// derived from this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// This software is provided by the copyright holders and contributors "as is" and
|
|
||||||
// any express or implied warranties, including, but not limited to, the implied
|
|
||||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
||||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
||||||
// indirect, incidental, special, exemplary, or consequential damages
|
|
||||||
// (including, but not limited to, procurement of substitute goods or services;
|
|
||||||
// loss of use, data, or profits; or business interruption) however caused
|
|
||||||
// and on any theory of liability, whether in contract, strict liability,
|
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
|
||||||
//
|
|
||||||
//M*/
|
|
||||||
|
|
||||||
#ifndef __OPENCV_GPU_REDUCTION_DETAIL_HPP__ |
|
||||||
#define __OPENCV_GPU_REDUCTION_DETAIL_HPP__ |
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device |
|
||||||
{ |
|
||||||
namespace utility_detail |
|
||||||
{ |
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Reductor
|
|
||||||
|
|
||||||
template <int n> struct WarpReductor |
|
||||||
{ |
|
||||||
template <typename T, typename Op> static __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
if (tid < n) |
|
||||||
data[tid] = partial_reduction; |
|
||||||
if (n > 32) __syncthreads(); |
|
||||||
|
|
||||||
if (n > 32) |
|
||||||
{ |
|
||||||
if (tid < n - 32) |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 32]); |
|
||||||
if (tid < 16) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]); |
|
||||||
} |
|
||||||
} |
|
||||||
else if (n > 16) |
|
||||||
{ |
|
||||||
if (tid < n - 16) |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]); |
|
||||||
if (tid < 8) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]); |
|
||||||
} |
|
||||||
} |
|
||||||
else if (n > 8) |
|
||||||
{ |
|
||||||
if (tid < n - 8) |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]); |
|
||||||
if (tid < 4) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]); |
|
||||||
} |
|
||||||
} |
|
||||||
else if (n > 4) |
|
||||||
{ |
|
||||||
if (tid < n - 4) |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]); |
|
||||||
if (tid < 2) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]); |
|
||||||
} |
|
||||||
} |
|
||||||
else if (n > 2) |
|
||||||
{ |
|
||||||
if (tid < n - 2) |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]); |
|
||||||
if (tid < 2) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct WarpReductor<64> |
|
||||||
{ |
|
||||||
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction; |
|
||||||
__syncthreads(); |
|
||||||
|
|
||||||
if (tid < 32) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 32]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]); |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct WarpReductor<32> |
|
||||||
{ |
|
||||||
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction; |
|
||||||
|
|
||||||
if (tid < 16) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]); |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct WarpReductor<16> |
|
||||||
{ |
|
||||||
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction; |
|
||||||
|
|
||||||
if (tid < 8) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]); |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct WarpReductor<8> |
|
||||||
{ |
|
||||||
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction; |
|
||||||
|
|
||||||
if (tid < 4) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]); |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
template <bool warp> struct ReductionDispatcher; |
|
||||||
template <> struct ReductionDispatcher<true> |
|
||||||
{ |
|
||||||
template <int n, typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
WarpReductor<n>::reduce(data, partial_reduction, tid, op); |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct ReductionDispatcher<false> |
|
||||||
{ |
|
||||||
template <int n, typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op) |
|
||||||
{ |
|
||||||
if (tid < n) |
|
||||||
data[tid] = partial_reduction; |
|
||||||
__syncthreads(); |
|
||||||
|
|
||||||
|
|
||||||
if (n == 512) { if (tid < 256) { data[tid] = partial_reduction = op(partial_reduction, data[tid + 256]); } __syncthreads(); } |
|
||||||
if (n >= 256) { if (tid < 128) { data[tid] = partial_reduction = op(partial_reduction, data[tid + 128]); } __syncthreads(); } |
|
||||||
if (n >= 128) { if (tid < 64) { data[tid] = partial_reduction = op(partial_reduction, data[tid + 64]); } __syncthreads(); } |
|
||||||
|
|
||||||
if (tid < 32) |
|
||||||
{ |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 32]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]); |
|
||||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]); |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// PredValWarpReductor
|
|
||||||
|
|
||||||
template <int n> struct PredValWarpReductor; |
|
||||||
template <> struct PredValWarpReductor<64> |
|
||||||
{ |
|
||||||
template <typename T, typename V, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 32) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal = sval[tid]; |
|
||||||
|
|
||||||
T reg = sdata[tid + 32]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 32]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 16]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 16]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 8]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 8]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct PredValWarpReductor<32> |
|
||||||
{ |
|
||||||
template <typename T, typename V, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 16) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal = sval[tid]; |
|
||||||
|
|
||||||
T reg = sdata[tid + 16]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 16]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 8]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 8]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
template <> struct PredValWarpReductor<16> |
|
||||||
{ |
|
||||||
template <typename T, typename V, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 8) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal = sval[tid]; |
|
||||||
|
|
||||||
T reg = reg = sdata[tid + 8]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 8]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct PredValWarpReductor<8> |
|
||||||
{ |
|
||||||
template <typename T, typename V, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 4) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal = sval[tid]; |
|
||||||
|
|
||||||
T reg = reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
template <bool warp> struct PredValReductionDispatcher; |
|
||||||
template <> struct PredValReductionDispatcher<true> |
|
||||||
{ |
|
||||||
template <int n, typename T, typename V, typename Pred> static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
PredValWarpReductor<n>::reduce(myData, myVal, sdata, sval, tid, pred); |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct PredValReductionDispatcher<false> |
|
||||||
{ |
|
||||||
template <int n, typename T, typename V, typename Pred> static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal = sval[tid]; |
|
||||||
|
|
||||||
if (n >= 512 && tid < 256) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 256]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 256]; |
|
||||||
} |
|
||||||
__syncthreads(); |
|
||||||
} |
|
||||||
if (n >= 256 && tid < 128) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 128]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 128]; |
|
||||||
} |
|
||||||
__syncthreads(); |
|
||||||
} |
|
||||||
if (n >= 128 && tid < 64) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 64]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 64]; |
|
||||||
} |
|
||||||
__syncthreads(); |
|
||||||
} |
|
||||||
|
|
||||||
if (tid < 32) |
|
||||||
{ |
|
||||||
if (n >= 64) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 32]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 32]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 32) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 16]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 16]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 16) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 8]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 8]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 8) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 4]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 4]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 4) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 2]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 2]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 2) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 1]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval[tid] = myVal = sval[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// PredVal2WarpReductor
|
|
||||||
|
|
||||||
template <int n> struct PredVal2WarpReductor; |
|
||||||
template <> struct PredVal2WarpReductor<64> |
|
||||||
{ |
|
||||||
template <typename T, typename V1, typename V2, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 32) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal1 = sval1[tid]; |
|
||||||
myVal2 = sval2[tid]; |
|
||||||
|
|
||||||
T reg = sdata[tid + 32]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 32]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 32]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 16]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 16]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 16]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 8]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 8]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 8]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 4]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 2]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 1]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct PredVal2WarpReductor<32> |
|
||||||
{ |
|
||||||
template <typename T, typename V1, typename V2, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 16) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal1 = sval1[tid]; |
|
||||||
myVal2 = sval2[tid]; |
|
||||||
|
|
||||||
T reg = sdata[tid + 16]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 16]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 16]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 8]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 8]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 8]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 4]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 2]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 1]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
template <> struct PredVal2WarpReductor<16> |
|
||||||
{ |
|
||||||
template <typename T, typename V1, typename V2, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 8) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal1 = sval1[tid]; |
|
||||||
myVal2 = sval2[tid]; |
|
||||||
|
|
||||||
T reg = reg = sdata[tid + 8]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 8]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 8]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 4]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 2]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 1]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct PredVal2WarpReductor<8> |
|
||||||
{ |
|
||||||
template <typename T, typename V1, typename V2, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
if (tid < 4) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal1 = sval1[tid]; |
|
||||||
myVal2 = sval2[tid]; |
|
||||||
|
|
||||||
T reg = reg = sdata[tid + 4]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 4]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 4]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 2]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 2]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 2]; |
|
||||||
} |
|
||||||
|
|
||||||
reg = sdata[tid + 1]; |
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 1]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
template <bool warp> struct PredVal2ReductionDispatcher; |
|
||||||
template <> struct PredVal2ReductionDispatcher<true> |
|
||||||
{ |
|
||||||
template <int n, typename T, typename V1, typename V2, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
PredVal2WarpReductor<n>::reduce(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred); |
|
||||||
} |
|
||||||
}; |
|
||||||
template <> struct PredVal2ReductionDispatcher<false> |
|
||||||
{ |
|
||||||
template <int n, typename T, typename V1, typename V2, typename Pred> |
|
||||||
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred) |
|
||||||
{ |
|
||||||
myData = sdata[tid]; |
|
||||||
myVal1 = sval1[tid]; |
|
||||||
myVal2 = sval2[tid]; |
|
||||||
|
|
||||||
if (n >= 512 && tid < 256) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 256]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 256]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 256]; |
|
||||||
} |
|
||||||
__syncthreads(); |
|
||||||
} |
|
||||||
if (n >= 256 && tid < 128) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 128]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 128]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 128]; |
|
||||||
} |
|
||||||
__syncthreads(); |
|
||||||
} |
|
||||||
if (n >= 128 && tid < 64) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 64]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 64]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 64]; |
|
||||||
} |
|
||||||
__syncthreads(); |
|
||||||
} |
|
||||||
|
|
||||||
if (tid < 32) |
|
||||||
{ |
|
||||||
if (n >= 64) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 32]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 32]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 32]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 32) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 16]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 16]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 16]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 16) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 8]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 8]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 8]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 8) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 4]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 4]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 4]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 4) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 2]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 2]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 2]; |
|
||||||
} |
|
||||||
} |
|
||||||
if (n >= 2) |
|
||||||
{ |
|
||||||
T reg = sdata[tid + 1]; |
|
||||||
|
|
||||||
if (pred(reg, myData)) |
|
||||||
{ |
|
||||||
sdata[tid] = myData = reg; |
|
||||||
sval1[tid] = myVal1 = sval1[tid + 1]; |
|
||||||
sval2[tid] = myVal2 = sval2[tid + 1]; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
} // namespace utility_detail
|
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_REDUCTION_DETAIL_HPP__
|
|
@ -0,0 +1,197 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_GPU_REDUCE_HPP__ |
||||||
|
#define __OPENCV_GPU_REDUCE_HPP__ |
||||||
|
|
||||||
|
#include <thrust/tuple.h> |
||||||
|
#include "detail/reduce.hpp" |
||||||
|
#include "detail/reduce_key_val.hpp" |
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace device |
||||||
|
{ |
||||||
|
template <int N, typename T, class Op> |
||||||
|
__device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) |
||||||
|
{ |
||||||
|
reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); |
||||||
|
} |
||||||
|
template <int N, |
||||||
|
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9, |
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9, |
||||||
|
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9> |
||||||
|
__device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, |
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val, |
||||||
|
unsigned int tid, |
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op) |
||||||
|
{ |
||||||
|
reduce_detail::Dispatcher<N>::reductor::template reduce< |
||||||
|
const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&, |
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&, |
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op); |
||||||
|
} |
||||||
|
|
||||||
|
template <unsigned int N, typename K, typename V, class Cmp> |
||||||
|
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp) |
||||||
|
{ |
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp); |
||||||
|
} |
||||||
|
template <unsigned int N, |
||||||
|
typename K, |
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, |
||||||
|
class Cmp> |
||||||
|
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, |
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
unsigned int tid, const Cmp& cmp) |
||||||
|
{ |
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, |
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&, |
||||||
|
const Cmp&>(skeys, key, svals, val, tid, cmp); |
||||||
|
} |
||||||
|
template <unsigned int N, |
||||||
|
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9, |
||||||
|
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9, |
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9, |
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9, |
||||||
|
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9> |
||||||
|
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys, |
||||||
|
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key, |
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val, |
||||||
|
unsigned int tid, |
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp) |
||||||
|
{ |
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce< |
||||||
|
const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&, |
||||||
|
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&, |
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&, |
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&, |
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& |
||||||
|
>(skeys, key, svals, val, tid, cmp); |
||||||
|
} |
||||||
|
|
||||||
|
// smem_tuple
|
||||||
|
|
||||||
|
template <typename T0> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*> |
||||||
|
smem_tuple(T0* t0) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*> |
||||||
|
smem_tuple(T0* t0, T1* t1) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8); |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> |
||||||
|
__device__ __forceinline__ |
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*> |
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9) |
||||||
|
{ |
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9); |
||||||
|
} |
||||||
|
}}} |
||||||
|
|
||||||
|
#endif // __OPENCV_GPU_UTILITY_HPP__
|
@ -0,0 +1,145 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_GPU_WARP_SHUFFLE_HPP__ |
||||||
|
#define __OPENCV_GPU_WARP_SHUFFLE_HPP__ |
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace device |
||||||
|
{ |
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
return __shfl(val, srcLane, width); |
||||||
|
#else |
||||||
|
return T(); |
||||||
|
#endif |
||||||
|
} |
||||||
|
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
return (unsigned int) __shfl((int) val, srcLane, width); |
||||||
|
#else |
||||||
|
return 0; |
||||||
|
#endif |
||||||
|
} |
||||||
|
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
int lo = __double2loint(val); |
||||||
|
int hi = __double2hiint(val); |
||||||
|
|
||||||
|
lo = __shfl(lo, srcLane, width); |
||||||
|
hi = __shfl(hi, srcLane, width); |
||||||
|
|
||||||
|
return __hiloint2double(hi, lo); |
||||||
|
#else |
||||||
|
return 0.0; |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
return __shfl_down(val, delta, width); |
||||||
|
#else |
||||||
|
return T(); |
||||||
|
#endif |
||||||
|
} |
||||||
|
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
return (unsigned int) __shfl_down((int) val, delta, width); |
||||||
|
#else |
||||||
|
return 0; |
||||||
|
#endif |
||||||
|
} |
||||||
|
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
int lo = __double2loint(val); |
||||||
|
int hi = __double2hiint(val); |
||||||
|
|
||||||
|
lo = __shfl_down(lo, delta, width); |
||||||
|
hi = __shfl_down(hi, delta, width); |
||||||
|
|
||||||
|
return __hiloint2double(hi, lo); |
||||||
|
#else |
||||||
|
return 0.0; |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
return __shfl_up(val, delta, width); |
||||||
|
#else |
||||||
|
return T(); |
||||||
|
#endif |
||||||
|
} |
||||||
|
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
return (unsigned int) __shfl_up((int) val, delta, width); |
||||||
|
#else |
||||||
|
return 0; |
||||||
|
#endif |
||||||
|
} |
||||||
|
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize) |
||||||
|
{ |
||||||
|
#if __CUDA_ARCH__ >= 300 |
||||||
|
int lo = __double2loint(val); |
||||||
|
int hi = __double2hiint(val); |
||||||
|
|
||||||
|
lo = __shfl_up(lo, delta, width); |
||||||
|
hi = __shfl_up(hi, delta, width); |
||||||
|
|
||||||
|
return __hiloint2double(hi, lo); |
||||||
|
#else |
||||||
|
return 0.0; |
||||||
|
#endif |
||||||
|
} |
||||||
|
}}} |
||||||
|
|
||||||
|
#endif // __OPENCV_GPU_WARP_SHUFFLE_HPP__
|
@ -0,0 +1,26 @@ |
|||||||
|
set(CMAKE_SYSTEM_NAME Linux) |
||||||
|
set(CMAKE_SYSTEM_VERSION 1) |
||||||
|
set(CMAKE_SYSTEM_PROCESSOR arm) |
||||||
|
|
||||||
|
set(CMAKE_C_COMPILER arm-linux-gnueabi-gcc-4.5) |
||||||
|
set(CMAKE_CXX_COMPILER arm-linux-gnueabi-g++-4.5) |
||||||
|
|
||||||
|
#suppress compiller varning |
||||||
|
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-psabi" ) |
||||||
|
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-psabi" ) |
||||||
|
|
||||||
|
# can be any other plases |
||||||
|
set(__arm_linux_eabi_root /usr/arm-linux-gnueabi) |
||||||
|
|
||||||
|
set(CMAKE_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH} ${__arm_linux_eabi_root}) |
||||||
|
|
||||||
|
if(EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) |
||||||
|
set(CMAKE_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH} ${CUDA_TOOLKIT_ROOT_DIR}) |
||||||
|
endif() |
||||||
|
|
||||||
|
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) |
||||||
|
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) |
||||||
|
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) |
||||||
|
|
||||||
|
set(CARMA 1) |
||||||
|
add_definitions(-DCARMA) |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,279 @@ |
|||||||
|
#include "perf_precomp.hpp" |
||||||
|
|
||||||
|
#define GPU_PERF_TEST_P(fixture, name, params) \ |
||||||
|
class fixture##_##name : public fixture {\
|
||||||
|
public:\
|
||||||
|
fixture##_##name() {}\
|
||||||
|
protected:\
|
||||||
|
virtual void __cpu();\
|
||||||
|
virtual void __gpu();\
|
||||||
|
virtual void PerfTestBody();\
|
||||||
|
};\
|
||||||
|
TEST_P(fixture##_##name, name /*perf*/){ RunPerfTestBody(); }\
|
||||||
|
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
|
||||||
|
void fixture##_##name::PerfTestBody() { if (PERF_RUN_GPU()) __gpu(); else __cpu(); } |
||||||
|
|
||||||
|
#define RUN_CPU(fixture, name)\ |
||||||
|
void fixture##_##name::__cpu() |
||||||
|
|
||||||
|
#define RUN_GPU(fixture, name)\ |
||||||
|
void fixture##_##name::__gpu() |
||||||
|
|
||||||
|
#define NO_CPU(fixture, name)\ |
||||||
|
void fixture##_##name::__cpu() { FAIL() << "No such CPU implementation analogy";} |
||||||
|
|
||||||
|
namespace { |
||||||
|
struct DetectionLess |
||||||
|
{ |
||||||
|
bool operator()(const cv::gpu::SCascade::Detection& a, |
||||||
|
const cv::gpu::SCascade::Detection& b) const |
||||||
|
{ |
||||||
|
if (a.x != b.x) return a.x < b.x; |
||||||
|
else if (a.y != b.y) return a.y < b.y; |
||||||
|
else if (a.w != b.w) return a.w < b.w; |
||||||
|
else return a.h < b.h; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
cv::Mat sortDetections(cv::gpu::GpuMat& objects) |
||||||
|
{ |
||||||
|
cv::Mat detections(objects); |
||||||
|
|
||||||
|
typedef cv::gpu::SCascade::Detection Detection; |
||||||
|
Detection* begin = (Detection*)(detections.ptr<char>(0)); |
||||||
|
Detection* end = (Detection*)(detections.ptr<char>(0) + detections.cols); |
||||||
|
std::sort(begin, end, DetectionLess()); |
||||||
|
|
||||||
|
return detections; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
typedef std::tr1::tuple<std::string, std::string> fixture_t; |
||||||
|
typedef perf::TestBaseWithParam<fixture_t> SCascadeTest; |
||||||
|
|
||||||
|
GPU_PERF_TEST_P(SCascadeTest, detect, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), |
||||||
|
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")))) |
||||||
|
|
||||||
|
RUN_GPU(SCascadeTest, detect) |
||||||
|
{ |
||||||
|
cv::Mat cpu = readImage (GET_PARAM(1)); |
||||||
|
ASSERT_FALSE(cpu.empty()); |
||||||
|
cv::gpu::GpuMat colored(cpu); |
||||||
|
|
||||||
|
cv::gpu::SCascade cascade; |
||||||
|
|
||||||
|
cv::FileStorage fs(perf::TestBase::getDataPath(GET_PARAM(0)), cv::FileStorage::READ); |
||||||
|
ASSERT_TRUE(fs.isOpened()); |
||||||
|
|
||||||
|
ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode())); |
||||||
|
|
||||||
|
cv::gpu::GpuMat objectBoxes(1, 10000 * sizeof(cv::gpu::SCascade::Detection), CV_8UC1), rois(colored.size(), CV_8UC1); |
||||||
|
rois.setTo(1); |
||||||
|
|
||||||
|
cascade.detect(colored, rois, objectBoxes); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cascade.detect(colored, rois, objectBoxes); |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(sortDetections(objectBoxes)); |
||||||
|
} |
||||||
|
|
||||||
|
NO_CPU(SCascadeTest, detect) |
||||||
|
|
||||||
|
static cv::Rect getFromTable(int idx) |
||||||
|
{ |
||||||
|
static const cv::Rect rois[] = |
||||||
|
{ |
||||||
|
cv::Rect( 65 * 4, 20 * 4, 35 * 4, 80 * 4), |
||||||
|
cv::Rect( 95 * 4, 35 * 4, 45 * 4, 40 * 4), |
||||||
|
cv::Rect( 45 * 4, 35 * 4, 45 * 4, 40 * 4), |
||||||
|
cv::Rect( 25 * 4, 27 * 4, 50 * 4, 45 * 4), |
||||||
|
cv::Rect(100 * 4, 50 * 4, 45 * 4, 40 * 4), |
||||||
|
|
||||||
|
cv::Rect( 60 * 4, 30 * 4, 45 * 4, 40 * 4), |
||||||
|
cv::Rect( 40 * 4, 55 * 4, 50 * 4, 40 * 4), |
||||||
|
cv::Rect( 48 * 4, 37 * 4, 72 * 4, 80 * 4), |
||||||
|
cv::Rect( 48 * 4, 32 * 4, 85 * 4, 58 * 4), |
||||||
|
cv::Rect( 48 * 4, 0 * 4, 32 * 4, 27 * 4) |
||||||
|
}; |
||||||
|
|
||||||
|
return rois[idx]; |
||||||
|
} |
||||||
|
|
||||||
|
typedef std::tr1::tuple<std::string, std::string, int> roi_fixture_t; |
||||||
|
typedef perf::TestBaseWithParam<roi_fixture_t> SCascadeTestRoi; |
||||||
|
|
||||||
|
GPU_PERF_TEST_P(SCascadeTestRoi, detectInRoi, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), |
||||||
|
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")), |
||||||
|
testing::Range(0, 5))) |
||||||
|
|
||||||
|
RUN_GPU(SCascadeTestRoi, detectInRoi) |
||||||
|
{ |
||||||
|
cv::Mat cpu = readImage (GET_PARAM(1)); |
||||||
|
ASSERT_FALSE(cpu.empty()); |
||||||
|
cv::gpu::GpuMat colored(cpu); |
||||||
|
|
||||||
|
cv::gpu::SCascade cascade; |
||||||
|
|
||||||
|
cv::FileStorage fs(perf::TestBase::getDataPath(GET_PARAM(0)), cv::FileStorage::READ); |
||||||
|
ASSERT_TRUE(fs.isOpened()); |
||||||
|
|
||||||
|
ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode())); |
||||||
|
|
||||||
|
cv::gpu::GpuMat objectBoxes(1, 16384 * 20, CV_8UC1), rois(colored.size(), CV_8UC1); |
||||||
|
rois.setTo(0); |
||||||
|
|
||||||
|
int nroi = GET_PARAM(2); |
||||||
|
cv::RNG rng; |
||||||
|
for (int i = 0; i < nroi; ++i) |
||||||
|
{ |
||||||
|
cv::Rect r = getFromTable(rng(10)); |
||||||
|
cv::gpu::GpuMat sub(rois, r); |
||||||
|
sub.setTo(1); |
||||||
|
} |
||||||
|
|
||||||
|
cascade.detect(colored, rois, objectBoxes); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cascade.detect(colored, rois, objectBoxes); |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(sortDetections(objectBoxes)); |
||||||
|
} |
||||||
|
|
||||||
|
NO_CPU(SCascadeTestRoi, detectInRoi) |
||||||
|
|
||||||
|
|
||||||
|
GPU_PERF_TEST_P(SCascadeTestRoi, detectEachRoi, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), |
||||||
|
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")), |
||||||
|
testing::Range(0, 10))) |
||||||
|
|
||||||
|
RUN_GPU(SCascadeTestRoi, detectEachRoi) |
||||||
|
{ |
||||||
|
cv::Mat cpu = readImage (GET_PARAM(1)); |
||||||
|
ASSERT_FALSE(cpu.empty()); |
||||||
|
cv::gpu::GpuMat colored(cpu); |
||||||
|
|
||||||
|
cv::gpu::SCascade cascade; |
||||||
|
|
||||||
|
cv::FileStorage fs(perf::TestBase::getDataPath(GET_PARAM(0)), cv::FileStorage::READ); |
||||||
|
ASSERT_TRUE(fs.isOpened()); |
||||||
|
|
||||||
|
ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode())); |
||||||
|
|
||||||
|
cv::gpu::GpuMat objectBoxes(1, 16384 * 20, CV_8UC1), rois(colored.size(), CV_8UC1); |
||||||
|
rois.setTo(0); |
||||||
|
|
||||||
|
int idx = GET_PARAM(2); |
||||||
|
cv::Rect r = getFromTable(idx); |
||||||
|
cv::gpu::GpuMat sub(rois, r); |
||||||
|
sub.setTo(1); |
||||||
|
|
||||||
|
cascade.detect(colored, rois, objectBoxes); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cascade.detect(colored, rois, objectBoxes); |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(sortDetections(objectBoxes)); |
||||||
|
} |
||||||
|
|
||||||
|
NO_CPU(SCascadeTestRoi, detectEachRoi) |
||||||
|
|
||||||
|
GPU_PERF_TEST_P(SCascadeTest, detectOnIntegral, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), |
||||||
|
testing::Values(std::string("cv/cascadeandhog/integrals.xml")))) |
||||||
|
|
||||||
|
static std::string itoa(long i) |
||||||
|
{ |
||||||
|
static char s[65]; |
||||||
|
sprintf(s, "%ld", i); |
||||||
|
return std::string(s); |
||||||
|
} |
||||||
|
|
||||||
|
RUN_GPU(SCascadeTest, detectOnIntegral) |
||||||
|
{ |
||||||
|
cv::FileStorage fsi(perf::TestBase::getDataPath(GET_PARAM(1)), cv::FileStorage::READ); |
||||||
|
ASSERT_TRUE(fsi.isOpened()); |
||||||
|
|
||||||
|
cv::gpu::GpuMat hogluv(121 * 10, 161, CV_32SC1); |
||||||
|
for (int i = 0; i < 10; ++i) |
||||||
|
{ |
||||||
|
cv::Mat channel; |
||||||
|
fsi[std::string("channel") + itoa(i)] >> channel; |
||||||
|
cv::gpu::GpuMat gchannel(hogluv, cv::Rect(0, 121 * i, 161, 121)); |
||||||
|
gchannel.upload(channel); |
||||||
|
} |
||||||
|
|
||||||
|
cv::gpu::SCascade cascade; |
||||||
|
|
||||||
|
cv::FileStorage fs(perf::TestBase::getDataPath(GET_PARAM(0)), cv::FileStorage::READ); |
||||||
|
ASSERT_TRUE(fs.isOpened()); |
||||||
|
|
||||||
|
ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode())); |
||||||
|
|
||||||
|
cv::gpu::GpuMat objectBoxes(1, 10000 * sizeof(cv::gpu::SCascade::Detection), CV_8UC1), rois(cv::Size(640, 480), CV_8UC1); |
||||||
|
rois.setTo(1); |
||||||
|
|
||||||
|
cascade.detect(hogluv, rois, objectBoxes); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cascade.detect(hogluv, rois, objectBoxes); |
||||||
|
} |
||||||
|
|
||||||
|
SANITY_CHECK(sortDetections(objectBoxes)); |
||||||
|
} |
||||||
|
|
||||||
|
NO_CPU(SCascadeTest, detectOnIntegral) |
||||||
|
|
||||||
|
GPU_PERF_TEST_P(SCascadeTest, detectStream, |
||||||
|
testing::Combine( |
||||||
|
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), |
||||||
|
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")))) |
||||||
|
|
||||||
|
RUN_GPU(SCascadeTest, detectStream) |
||||||
|
{ |
||||||
|
cv::Mat cpu = readImage (GET_PARAM(1)); |
||||||
|
ASSERT_FALSE(cpu.empty()); |
||||||
|
cv::gpu::GpuMat colored(cpu); |
||||||
|
|
||||||
|
cv::gpu::SCascade cascade; |
||||||
|
|
||||||
|
cv::FileStorage fs(perf::TestBase::getDataPath(GET_PARAM(0)), cv::FileStorage::READ); |
||||||
|
ASSERT_TRUE(fs.isOpened()); |
||||||
|
|
||||||
|
ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode())); |
||||||
|
|
||||||
|
cv::gpu::GpuMat objectBoxes(1, 10000 * sizeof(cv::gpu::SCascade::Detection), CV_8UC1), rois(colored.size(), CV_8UC1); |
||||||
|
rois.setTo(1); |
||||||
|
|
||||||
|
cv::gpu::Stream s; |
||||||
|
|
||||||
|
cascade.detect(colored, rois, objectBoxes, s); |
||||||
|
|
||||||
|
TEST_CYCLE() |
||||||
|
{ |
||||||
|
cascade.detect(colored, rois, objectBoxes, s); |
||||||
|
} |
||||||
|
|
||||||
|
#ifdef HAVE_CUDA |
||||||
|
cudaDeviceSynchronize(); |
||||||
|
#endif |
||||||
|
|
||||||
|
SANITY_CHECK(sortDetections(objectBoxes)); |
||||||
|
} |
||||||
|
|
||||||
|
NO_CPU(SCascadeTest, detectStream) |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float, uchar>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float3, uchar3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float, unsigned short>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float3, ushort3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float4, ushort4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float3, int3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float4, int4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float, int>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float, float>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float, short>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,53 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||||
|
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include "column_filter.h" |
||||||
|
|
||||||
|
namespace filter |
||||||
|
{ |
||||||
|
template void linearColumn<float4, short4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue