From b83d4add2ea096b3481a838f9e26a038d10a93d5 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Wed, 26 Sep 2012 17:15:17 +0400 Subject: [PATCH] memory optimization --- modules/gpu/src/cuda/isf-sc.cu | 56 +++- modules/gpu/src/icf.hpp | 126 ++++---- modules/gpu/src/softcascade.cpp | 531 +++++++++++++++----------------- 3 files changed, 370 insertions(+), 343 deletions(-) diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index e4831e2e62..714bdfa44a 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -41,9 +41,9 @@ //M*/ #include -// #include +#include // #include -// #include +#include // #include // //#define LOG_CUDA_CASCADE @@ -93,6 +93,58 @@ namespace icf { cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); } + + texture tnode; + __global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages, + const Node* nodes, + PtrStepSz objects) + { + const int y = blockIdx.y * blockDim.y + threadIdx.y; + const int x = blockIdx.x * blockDim.x + threadIdx.x; + Level level = levels[blockIdx.z]; + if(x >= level.workRect.x || y >= level.workRect.y) return; + + Octave octave = octaves[level.octave]; + int st = octave.index * octave.stages; + const int stEnd = st + 1000;//octave.stages; + + float confidence = 0.f; + +#pragma unroll 8 + for(; st < stEnd; ++st) + { + const int nId = st * 3; + const Node node = nodes[nId]; + + const float stage = stages[st]; + confidence += node.rect.x * stage; + } + + uchar4 val; + val.x = (int)confidence; + if (x == y) objects(0, threadIdx.x) = val; + + } + + void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, + const PtrStepSzb& nodes, const PtrStepSzb& features, + PtrStepSz objects) + { + int fw = 160; + int fh = 120; + dim3 block(32, 8); + dim3 grid(fw / 32, fh / 8, 47); + const Level* l = (const Level*)levels.ptr(); + const Octave* oct = ((const Octave*)octaves.ptr()); + const float* st = (const float*)stages.ptr(); + const Node* nd = (const Node*)nodes.ptr(); + // cudaSafeCall( cudaBindTexture(0, tnode, nodes.data, rgb.cols / size) ); + + test_kernel<<>>(l, oct, st, nd, objects); + + cudaSafeCall( cudaGetLastError()); + cudaSafeCall( cudaDeviceSynchronize()); + } } }}} diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp index cf13480071..51ea2c068a 100644 --- a/modules/gpu/src/icf.hpp +++ b/modules/gpu/src/icf.hpp @@ -1,4 +1,4 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// +//M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -38,12 +38,12 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -//M*/ +//M -// #include +#include -// #ifndef __OPENCV_ICF_HPP__ -// #define __OPENCV_ICF_HPP__ +#ifndef __OPENCV_ICF_HPP__ +#define __OPENCV_ICF_HPP__ // #if defined __CUDACC__ // # define __device __device__ __forceinline__ @@ -52,49 +52,62 @@ // #endif -// namespace cv { namespace gpu { namespace icf { - -// using cv::gpu::PtrStepSzb; -// using cv::gpu::PtrStepSzf; - -// typedef unsigned char uchar; - -// struct __align__(16) Octave -// { -// ushort index; -// ushort stages; -// ushort shrinkage; -// ushort2 size; -// float scale; - -// Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc) -// : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {} -// }; - -// struct __align__(8) Level //is actually 24 bytes -// { -// int octave; - -// // float origScale; //not actually used -// float relScale; -// float shrScale; // used for marking detection -// float scaling[2]; // calculated according to Dollal paper - -// // for 640x480 we can not get overflow -// uchar2 workRect; -// uchar2 objSize; - -// Level(int idx, const Octave& oct, const float scale, const int w, const int h) -// : octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage) -// { -// workRect.x = round(w / (float)oct.shrinkage); -// workRect.y = round(h / (float)oct.shrinkage); - -// objSize.x = round(oct.size.x * relScale); -// objSize.y = round(oct.size.y * relScale); -// } -// }; - +namespace cv { namespace gpu { namespace device { +namespace icf { + +struct __align__(16) Octave +{ + ushort index; + ushort stages; + ushort shrinkage; + ushort2 size; + float scale; + + Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc) + : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {} +}; + +struct __align__(8) Level //is actually 24 bytes +{ + int octave; + + float relScale; + float shrScale; // used for marking detection + float scaling[2]; // calculated according to Dollal paper + + // for 640x480 we can not get overflow + uchar2 workRect; + uchar2 objSize; + + Level(int idx, const Octave& oct, const float scale, const int w, const int h) + : octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage) + { + workRect.x = round(w / (float)oct.shrinkage); + workRect.y = round(h / (float)oct.shrinkage); + + objSize.x = round(oct.size.x * relScale); + objSize.y = round(oct.size.y * relScale); + } +}; + +struct __align__(8) Node +{ + // int feature; + uchar4 rect; + float threshold; + + Node(const uchar4 c, const int t) : rect(c), threshold(t) {} +}; + +struct __align__(8) Feature +{ + int channel; + uchar4 rect; + + Feature(const int c, const uchar4 r) : channel(c), rect(r) {} +}; +} +}}} // struct Cascade // { // Cascade() {} @@ -146,21 +159,6 @@ // static const float magnitudeScaling = 1.f ;// / sqrt(2); // }; -// struct __align__(8) Node -// { -// int feature; -// float threshold; - -// Node(const int f, const float t) : feature(f), threshold(t) {} -// }; - -// struct __align__(8) Feature -// { -// int channel; -// uchar4 rect; - -// Feature(const int c, const uchar4 r) : channel(c), rect(r) {} -// }; // }}} -// #endif \ No newline at end of file +#endif \ No newline at end of file diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index f336fd2d9b..8d75176ab3 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -53,12 +53,15 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat #else -// #include +#include namespace cv { namespace gpu { namespace device { namespace icf { void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, const int fw, const int fh, const int bins); + void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, + const PtrStepSzb& nodes, const PtrStepSzb& features, + PtrStepSz objects); } }}} @@ -82,19 +85,20 @@ struct cv::gpu::SoftCascade::Filds integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1); hogluv.create((FRAME_HEIGHT / 4 + 1) * HOG_LUV_BINS, FRAME_WIDTH / 4 + 1, CV_32SC1); } -// // scales range -// float minScale; -// float maxScale; -// int origObjWidth; -// int origObjHeight; + // scales range + float minScale; + float maxScale; -// GpuMat octaves; -// GpuMat stages; -// GpuMat nodes; -// GpuMat leaves; -// GpuMat features; -// GpuMat levels; + int origObjWidth; + int origObjHeight; + + GpuMat octaves; + GpuMat stages; + GpuMat nodes; + GpuMat leaves; + GpuMat features; + GpuMat levels; // preallocated buffer 640x480x10 for hogluv + 640x480 got gray GpuMat plane; @@ -114,312 +118,285 @@ struct cv::gpu::SoftCascade::Filds // 161x121x10 GpuMat hogluv; -// // will be removed in final version - - -// // temp matrix for sobel and cartToPolar -// GpuMat dfdx, dfdy, angle, mag, nmag, nangle; - -// std::vector scales; - -// icf::Cascade cascade; -// icf::ChannelStorage storage; + std::vector scales; -// enum { BOOST = 0 }; + enum { BOOST = 0 }; enum { FRAME_WIDTH = 640, FRAME_HEIGHT = 480, -// TOTAL_SCALES = 55, + TOTAL_SCALES = 55, // CLASSIFIERS = 5, -// ORIG_OBJECT_WIDTH = 64, -// ORIG_OBJECT_HEIGHT = 128, + ORIG_OBJECT_WIDTH = 64, + ORIG_OBJECT_HEIGHT = 128, HOG_BINS = 6, LUV_BINS = 3, HOG_LUV_BINS = 10 }; -// bool fill(const FileNode &root, const float mins, const float maxs); -// void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const -// { -// cascade.detect(hogluv, objects, stream); -// } + bool fill(const FileNode &root, const float mins, const float maxs); + void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const + { + device::icf::detect(levels, octaves, stages, nodes, features, objects); + } -// private: -// void calcLevels(const std::vector& octs, -// int frameW, int frameH, int nscales); +private: + void calcLevels(const std::vector& octs, + int frameW, int frameH, int nscales); -// typedef std::vector::const_iterator octIt_t; -// int fitOctave(const std::vector& octs, const float& logFactor) const -// { -// float minAbsLog = FLT_MAX; -// int res = 0; -// for (int oct = 0; oct < (int)octs.size(); ++oct) -// { -// const icf::Octave& octave =octs[oct]; -// float logOctave = ::log(octave.scale); -// float logAbsScale = ::fabs(logFactor - logOctave); - -// if(logAbsScale < minAbsLog) -// { -// res = oct; -// minAbsLog = logAbsScale; -// } -// } -// return res; -// } + typedef std::vector::const_iterator octIt_t; + int fitOctave(const std::vector& octs, const float& logFactor) const + { + float minAbsLog = FLT_MAX; + int res = 0; + for (int oct = 0; oct < (int)octs.size(); ++oct) + { + const device::icf::Octave& octave =octs[oct]; + float logOctave = ::log(octave.scale); + float logAbsScale = ::fabs(logFactor - logOctave); + + if(logAbsScale < minAbsLog) + { + res = oct; + minAbsLog = logAbsScale; + } + } + return res; + } }; -// inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs) -// { -// minScale = mins; -// maxScale = maxs; - -// // cascade properties -// static const char *const SC_STAGE_TYPE = "stageType"; -// static const char *const SC_BOOST = "BOOST"; - -// static const char *const SC_FEATURE_TYPE = "featureType"; -// static const char *const SC_ICF = "ICF"; - -// static const char *const SC_ORIG_W = "width"; -// static const char *const SC_ORIG_H = "height"; - -// static const char *const SC_OCTAVES = "octaves"; -// static const char *const SC_STAGES = "stages"; -// static const char *const SC_FEATURES = "features"; +inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs) +{ + using namespace device::icf; + minScale = mins; + maxScale = maxs; -// static const char *const SC_WEEK = "weakClassifiers"; -// static const char *const SC_INTERNAL = "internalNodes"; -// static const char *const SC_LEAF = "leafValues"; + // cascade properties + static const char *const SC_STAGE_TYPE = "stageType"; + static const char *const SC_BOOST = "BOOST"; -// static const char *const SC_OCT_SCALE = "scale"; -// static const char *const SC_OCT_STAGES = "stageNum"; -// static const char *const SC_OCT_SHRINKAGE = "shrinkingFactor"; + static const char *const SC_FEATURE_TYPE = "featureType"; + static const char *const SC_ICF = "ICF"; -// static const char *const SC_STAGE_THRESHOLD = "stageThreshold"; + static const char *const SC_ORIG_W = "width"; + static const char *const SC_ORIG_H = "height"; -// static const char * const SC_F_CHANNEL = "channel"; -// static const char * const SC_F_RECT = "rect"; + static const char *const SC_OCTAVES = "octaves"; + static const char *const SC_STAGES = "stages"; + static const char *const SC_FEATURES = "features"; -// // only Ada Boost supported -// std::string stageTypeStr = (string)root[SC_STAGE_TYPE]; -// CV_Assert(stageTypeStr == SC_BOOST); + static const char *const SC_WEEK = "weakClassifiers"; + static const char *const SC_INTERNAL = "internalNodes"; + static const char *const SC_LEAF = "leafValues"; -// // only HOG-like integral channel features cupported -// string featureTypeStr = (string)root[SC_FEATURE_TYPE]; -// CV_Assert(featureTypeStr == SC_ICF); + static const char *const SC_OCT_SCALE = "scale"; + static const char *const SC_OCT_STAGES = "stageNum"; + static const char *const SC_OCT_SHRINKAGE = "shrinkingFactor"; -// origObjWidth = (int)root[SC_ORIG_W]; -// CV_Assert(origObjWidth == ORIG_OBJECT_WIDTH); + static const char *const SC_STAGE_THRESHOLD = "stageThreshold"; -// origObjHeight = (int)root[SC_ORIG_H]; -// CV_Assert(origObjHeight == ORIG_OBJECT_HEIGHT); + static const char * const SC_F_CHANNEL = "channel"; + static const char * const SC_F_RECT = "rect"; -// FileNode fn = root[SC_OCTAVES]; -// if (fn.empty()) return false; + // only Ada Boost supported + std::string stageTypeStr = (string)root[SC_STAGE_TYPE]; + CV_Assert(stageTypeStr == SC_BOOST); -// std::vector voctaves; -// std::vector vstages; -// std::vector vnodes; -// std::vector vleaves; -// std::vector vfeatures; -// scales.clear(); + // only HOG-like integral channel features cupported + string featureTypeStr = (string)root[SC_FEATURE_TYPE]; + CV_Assert(featureTypeStr == SC_ICF); -// // std::vector levels; + origObjWidth = (int)root[SC_ORIG_W]; + CV_Assert(origObjWidth == ORIG_OBJECT_WIDTH); -// FileNodeIterator it = fn.begin(), it_end = fn.end(); -// int feature_offset = 0; -// ushort octIndex = 0; -// ushort shrinkage = 1; + origObjHeight = (int)root[SC_ORIG_H]; + CV_Assert(origObjHeight == ORIG_OBJECT_HEIGHT); -// for (; it != it_end; ++it) -// { -// FileNode fns = *it; -// float scale = (float)fns[SC_OCT_SCALE]; -// scales.push_back(scale); -// ushort nstages = saturate_cast((int)fns[SC_OCT_STAGES]); -// ushort2 size; -// size.x = cvRound(ORIG_OBJECT_WIDTH * scale); -// size.y = cvRound(ORIG_OBJECT_HEIGHT * scale); -// shrinkage = saturate_cast((int)fns[SC_OCT_SHRINKAGE]); - -// icf::Octave octave(octIndex, nstages, shrinkage, size, scale); -// CV_Assert(octave.stages > 0); -// voctaves.push_back(octave); - -// FileNode ffs = fns[SC_FEATURES]; -// if (ffs.empty()) return false; - -// fns = fns[SC_STAGES]; -// if (fn.empty()) return false; - -// // for each stage (~ decision tree with H = 2) -// FileNodeIterator st = fns.begin(), st_end = fns.end(); -// for (; st != st_end; ++st ) -// { -// fns = *st; -// vstages.push_back((float)fns[SC_STAGE_THRESHOLD]); - -// fns = fns[SC_WEEK]; -// FileNodeIterator ftr = fns.begin(), ft_end = fns.end(); -// for (; ftr != ft_end; ++ftr) -// { -// fns = (*ftr)[SC_INTERNAL]; -// FileNodeIterator inIt = fns.begin(), inIt_end = fns.end(); -// for (; inIt != inIt_end;) -// { -// int feature = (int)(*(inIt +=2)++) + feature_offset; -// float th = (float)(*(inIt++)); -// vnodes.push_back(icf::Node(feature, th)); -// } - -// fns = (*ftr)[SC_LEAF]; -// inIt = fns.begin(), inIt_end = fns.end(); -// for (; inIt != inIt_end; ++inIt) -// vleaves.push_back((float)(*inIt)); -// } -// } - -// st = ffs.begin(), st_end = ffs.end(); -// for (; st != st_end; ++st ) -// { -// cv::FileNode rn = (*st)[SC_F_RECT]; -// cv::FileNodeIterator r_it = rn.begin(); -// uchar4 rect; -// rect.x = saturate_cast((int)*(r_it++)); -// rect.y = saturate_cast((int)*(r_it++)); -// rect.z = saturate_cast((int)*(r_it++)); -// rect.w = saturate_cast((int)*(r_it++)); -// vfeatures.push_back(icf::Feature((int)(*st)[SC_F_CHANNEL], rect)); -// } - -// feature_offset += octave.stages * 3; -// ++octIndex; -// } + FileNode fn = root[SC_OCTAVES]; + if (fn.empty()) return false; -// // upload in gpu memory -// octaves.upload(cv::Mat(1, voctaves.size() * sizeof(icf::Octave), CV_8UC1, (uchar*)&(voctaves[0]) )); -// CV_Assert(!octaves.empty()); + std::vector voctaves; + std::vector vstages; + std::vector vnodes; + std::vector vleaves; + std::vector vfeatures; + scales.clear(); -// stages.upload(cv::Mat(vstages).reshape(1,1)); -// CV_Assert(!stages.empty()); + FileNodeIterator it = fn.begin(), it_end = fn.end(); + int feature_offset = 0; + ushort octIndex = 0; + ushort shrinkage = 1; -// nodes.upload(cv::Mat(1, vnodes.size() * sizeof(icf::Node), CV_8UC1, (uchar*)&(vnodes[0]) )); -// CV_Assert(!nodes.empty()); + for (; it != it_end; ++it) + { + FileNode fns = *it; + float scale = (float)fns[SC_OCT_SCALE]; + scales.push_back(scale); + ushort nstages = saturate_cast((int)fns[SC_OCT_STAGES]); + ushort2 size; + size.x = cvRound(ORIG_OBJECT_WIDTH * scale); + size.y = cvRound(ORIG_OBJECT_HEIGHT * scale); + shrinkage = saturate_cast((int)fns[SC_OCT_SHRINKAGE]); + + Octave octave(octIndex, nstages, shrinkage, size, scale); + CV_Assert(octave.stages > 0); + voctaves.push_back(octave); + + FileNode ffs = fns[SC_FEATURES]; + if (ffs.empty()) return false; + + fns = fns[SC_STAGES]; + if (fn.empty()) return false; + + // for each stage (~ decision tree with H = 2) + FileNodeIterator st = fns.begin(), st_end = fns.end(); + for (; st != st_end; ++st ) + { + fns = *st; + vstages.push_back((float)fns[SC_STAGE_THRESHOLD]); + + fns = fns[SC_WEEK]; + FileNodeIterator ftr = fns.begin(), ft_end = fns.end(); + for (; ftr != ft_end; ++ftr) + { + fns = (*ftr)[SC_INTERNAL]; + FileNodeIterator inIt = fns.begin(), inIt_end = fns.end(); + for (; inIt != inIt_end;) + { + int feature = (int)(*(inIt +=2)++) + feature_offset; + float th = (float)(*(inIt++)); + uchar4 rect; + vnodes.push_back(Node(rect, th)); + } + + fns = (*ftr)[SC_LEAF]; + inIt = fns.begin(), inIt_end = fns.end(); + for (; inIt != inIt_end; ++inIt) + vleaves.push_back((float)(*inIt)); + } + } + + st = ffs.begin(), st_end = ffs.end(); + for (; st != st_end; ++st ) + { + cv::FileNode rn = (*st)[SC_F_RECT]; + cv::FileNodeIterator r_it = rn.begin(); + uchar4 rect; + rect.x = saturate_cast((int)*(r_it++)); + rect.y = saturate_cast((int)*(r_it++)); + rect.z = saturate_cast((int)*(r_it++)); + rect.w = saturate_cast((int)*(r_it++)); + vfeatures.push_back(Feature((int)(*st)[SC_F_CHANNEL], rect)); + } + + feature_offset += octave.stages * 3; + ++octIndex; + } -// leaves.upload(cv::Mat(vleaves).reshape(1,1)); -// CV_Assert(!leaves.empty()); + // upload in gpu memory + octaves.upload(cv::Mat(1, voctaves.size() * sizeof(Octave), CV_8UC1, (uchar*)&(voctaves[0]) )); + CV_Assert(!octaves.empty()); -// features.upload(cv::Mat(1, vfeatures.size() * sizeof(icf::Feature), CV_8UC1, (uchar*)&(vfeatures[0]) )); -// CV_Assert(!features.empty()); + stages.upload(cv::Mat(vstages).reshape(1,1)); + CV_Assert(!stages.empty()); -// // compute levels -// calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES); -// CV_Assert(!levels.empty()); + nodes.upload(cv::Mat(1, vnodes.size() * sizeof(Node), CV_8UC1, (uchar*)&(vnodes[0]) )); + CV_Assert(!nodes.empty()); -// //init Cascade -// cascade = icf::Cascade(octaves, stages, nodes, leaves, features, levels); + leaves.upload(cv::Mat(vleaves).reshape(1,1)); + CV_Assert(!leaves.empty()); -// // allocate buffers -// dmem.create(FRAME_HEIGHT * (HOG_LUV_BINS + 1), FRAME_WIDTH, CV_8UC1); -// shrunk.create(FRAME_HEIGHT / shrinkage * HOG_LUV_BINS, FRAME_WIDTH / shrinkage, CV_8UC1); -// // hogluv.create( (FRAME_HEIGHT / shrinkage + 1) * HOG_LUV_BINS, (FRAME_WIDTH / shrinkage + 1), CV_16UC1); -// hogluv.create( (FRAME_HEIGHT / shrinkage + 1) * HOG_LUV_BINS, (FRAME_WIDTH / shrinkage + 1), CV_32SC1); -// luv.create(FRAME_HEIGHT, FRAME_WIDTH, CV_8UC3); -// integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1); + features.upload(cv::Mat(1, vfeatures.size() * sizeof(Feature), CV_8UC1, (uchar*)&(vfeatures[0]) )); + CV_Assert(!features.empty()); -// dfdx.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1); -// dfdy.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1); -// angle.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1); -// mag.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1); + // compute levels + calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES); + CV_Assert(!levels.empty()); -// nmag.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1); -// nangle.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1); + return true; +} -// storage = icf::ChannelStorage(dmem, shrunk, hogluv, shrinkage); -// return true; -// } +namespace { + struct CascadeIntrinsics + { + static const float lambda = 1.099f, a = 0.89f; + + static float getFor(int channel, float scaling) + { + CV_Assert(channel < 10); + + if (fabs(scaling - 1.f) < FLT_EPSILON) + return 1.f; + + // according to R. Benenson, M. Mathias, R. Timofte and L. Van Gool's and Dallal's papers + static const float A[2][2] = + { //channel <= 6, otherwise + { 0.89f, 1.f}, // down + { 1.00f, 1.f} // up + }; + + static const float B[2][2] = + { //channel <= 6, otherwise + { 1.099f / log(2), 2.f}, // down + { 0.f, 2.f} // up + }; + + float a = A[(int)(scaling >= 1)][(int)(channel > 6)]; + float b = B[(int)(scaling >= 1)][(int)(channel > 6)]; + + // printf("!!! scaling: %f %f %f -> %f\n", scaling, a, b, a * pow(scaling, b)); + return a * pow(scaling, b); + } + }; +} -// namespace { -// struct CascadeIntrinsics -// { -// static const float lambda = 1.099f, a = 0.89f; - -// static float getFor(int channel, float scaling) -// { -// CV_Assert(channel < 10); - -// if (fabs(scaling - 1.f) < FLT_EPSILON) -// return 1.f; - -// // according to R. Benenson, M. Mathias, R. Timofte and L. Van Gool's and Dallal's papers -// static const float A[2][2] = -// { //channel <= 6, otherwise -// { 0.89f, 1.f}, // down -// { 1.00f, 1.f} // up -// }; - -// static const float B[2][2] = -// { //channel <= 6, otherwise -// { 1.099f / log(2), 2.f}, // down -// { 0.f, 2.f} // up -// }; - -// float a = A[(int)(scaling >= 1)][(int)(channel > 6)]; -// float b = B[(int)(scaling >= 1)][(int)(channel > 6)]; - -// // printf("!!! scaling: %f %f %f -> %f\n", scaling, a, b, a * pow(scaling, b)); -// return a * pow(scaling, b); -// } -// }; -// } +inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector& octs, + int frameW, int frameH, int nscales) +{ + CV_Assert(nscales > 1); + using device::icf::Level; -// inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector& octs, -// int frameW, int frameH, int nscales) -// { -// CV_Assert(nscales > 1); + std::vector vlevels; + float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1); -// std::vector vlevels; -// float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1); + float scale = minScale; + for (int sc = 0; sc < nscales; ++sc) + { + int width = ::std::max(0.0f, frameW - (origObjWidth * scale)); + int height = ::std::max(0.0f, frameH - (origObjHeight * scale)); + + float logScale = ::log(scale); + int fit = fitOctave(octs, logScale); + + Level level(fit, octs[fit], scale, width, height); + level.scaling[0] = CascadeIntrinsics::getFor(0, level.relScale); + level.scaling[1] = CascadeIntrinsics::getFor(9, level.relScale); + + if (!width || !height) + break; + else + vlevels.push_back(level); + + if (::fabs(scale - maxScale) < FLT_EPSILON) break; + scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor)); + + // printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale, + // level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, + //level.objSize.y); + + std::cout << "level " << sc + << " octeve " + << vlevels[sc].octave + << " relScale " + << vlevels[sc].relScale + << " " << vlevels[sc].shrScale + << " [" << (int)vlevels[sc].objSize.x + << " " << (int)vlevels[sc].objSize.y << "] [" + << (int)vlevels[sc].workRect.x << " " << (int)vlevels[sc].workRect.y << "]" << std::endl; + } -// float scale = minScale; -// for (int sc = 0; sc < nscales; ++sc) -// { -// int width = ::std::max(0.0f, frameW - (origObjWidth * scale)); -// int height = ::std::max(0.0f, frameH - (origObjHeight * scale)); - -// float logScale = ::log(scale); -// int fit = fitOctave(octs, logScale); - -// icf::Level level(fit, octs[fit], scale, width, height); -// level.scaling[0] = CascadeIntrinsics::getFor(0, level.relScale); -// level.scaling[1] = CascadeIntrinsics::getFor(9, level.relScale); - -// if (!width || !height) -// break; -// else -// vlevels.push_back(level); - -// if (::fabs(scale - maxScale) < FLT_EPSILON) break; -// scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor)); - -// // printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale, -// // level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, -//level.objSize.y); - -// // std::cout << "level " << sc -// // << " octeve " -// // << vlevels[sc].octave -// // << " relScale " -// // << vlevels[sc].relScale -// // << " " << vlevels[sc].shrScale -// // << " [" << (int)vlevels[sc].objSize.x -// // << " " << (int)vlevels[sc].objSize.y << "] [" -// // << (int)vlevels[sc].workRect.x << " " << (int)vlevels[sc].workRect.y << "]" << std::endl; -// } -// levels.upload(cv::Mat(1, vlevels.size() * sizeof(icf::Level), CV_8UC1, (uchar*)&(vlevels[0]) )); -// } + levels.upload(cv::Mat(1, vlevels.size() * sizeof(Level), CV_8UC1, (uchar*)&(vlevels[0]) )); +} cv::gpu::SoftCascade::SoftCascade() : filds(0) {} @@ -444,7 +421,7 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c filds = new Filds; Filds& flds = *filds; - // if (!flds.fill(fs.getFirstTopLevelNode(), minScale, maxScale)) return false; + if (!flds.fill(fs.getFirstTopLevelNode(), minScale, maxScale)) return false; return true; } @@ -538,7 +515,7 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& cudaStream_t stream = StreamAccessor::getStream(s); // detection -// flds.detect(objects, stream); + flds.detect(objects, stream); // // flds.storage.frame(colored, stream); }