empty cascade

13 years ago · 1917366528
parent 4881205bae
commit 1917366528
3 changed files with 702 additions and 706 deletions
--- a/modules/gpu/src/cuda/isf-sc.cu
+++ b/modules/gpu/src/cuda/isf-sc.cu
@ -40,221 +40,221 @@
 //
 //M*/
-#include <icf.hpp>
+// #include <icf.hpp>
-#include <opencv2/gpu/device/saturate_cast.hpp>
+// #include <opencv2/gpu/device/saturate_cast.hpp>
-#include <stdio.h>
+// #include <stdio.h>
-#include <float.h>
+// #include <float.h>
-
+
-//#define LOG_CUDA_CASCADE
+// //#define LOG_CUDA_CASCADE
-
+
-#if defined LOG_CUDA_CASCADE
+// #if defined LOG_CUDA_CASCADE
-# define dprintf(format, ...) \
+// # define dprintf(format, ...) \
-            do { printf(format, __VA_ARGS__); } while (0)
+//             do { printf(format, __VA_ARGS__); } while (0)
-#else
+// #else
-# define dprintf(format, ...)
+// # define dprintf(format, ...)
-#endif
+// #endif
-
+
-namespace cv { namespace gpu { namespace device {
+// namespace cv { namespace gpu { namespace device {
-
+
-namespace icf {
+// namespace icf {
-
+
-    enum {
+//     enum {
-        HOG_BINS = 6,
+//         HOG_BINS = 6,
-        HOG_LUV_BINS = 10,
+//         HOG_LUV_BINS = 10,
-        WIDTH = 640,
+//         WIDTH = 640,
-        HEIGHT = 480,
+//         HEIGHT = 480,
-        GREY_OFFSET = HEIGHT * HOG_LUV_BINS
+//         GREY_OFFSET = HEIGHT * HOG_LUV_BINS
-    };
+//     };
-
+
-    __global__ void magToHist(const uchar* __restrict__ mag,
+//     __global__ void magToHist(const uchar* __restrict__ mag,
-                              const float* __restrict__ angle, const int angPitch,
+//                               const float* __restrict__ angle, const int angPitch,
-                                    uchar* __restrict__ hog,   const int hogPitch)
+//                                     uchar* __restrict__ hog,   const int hogPitch)
-    {
+//     {
-        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+//         const int y = blockIdx.y * blockDim.y + threadIdx.y;
-        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+//         const int x = blockIdx.x * blockDim.x + threadIdx.x;
-
+
-        const int bin = (int)(angle[y * angPitch + x]);
+//         const int bin = (int)(angle[y * angPitch + x]);
-        const uchar val = mag[y * angPitch + x];
+//         const uchar val = mag[y * angPitch + x];
-
+
-        hog[((HEIGHT * bin) + y) * hogPitch + x] = val;
+//         hog[((HEIGHT * bin) + y) * hogPitch + x] = val;
-    }
+//     }
-    void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle)
+//     void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle)
-    {
+//     {
-        const uchar* mag = (const uchar*)hogluv.ptr(HEIGHT * HOG_BINS);
+//         const uchar* mag = (const uchar*)hogluv.ptr(HEIGHT * HOG_BINS);
-        uchar* hog = (uchar*)hogluv.ptr();
+//         uchar* hog = (uchar*)hogluv.ptr();
-        const float* angle = (const float*)nangle.ptr();
+//         const float* angle = (const float*)nangle.ptr();
-        dim3 block(32, 8);
+//         dim3 block(32, 8);
-        dim3 grid(WIDTH / 32, HEIGHT / 8);
+//         dim3 grid(WIDTH / 32, HEIGHT / 8);
-        magToHist<<<grid, block>>>(mag, angle, nangle.step / sizeof(float), hog, hogluv.step);
+//         magToHist<<<grid, block>>>(mag, angle, nangle.step / sizeof(float), hog, hogluv.step);
-        cudaSafeCall( cudaGetLastError() );
+//         cudaSafeCall( cudaGetLastError() );
-        cudaSafeCall( cudaDeviceSynchronize() );
+//         cudaSafeCall( cudaDeviceSynchronize() );
-    }
+//     }
-}
+// }
-
+
-__global__ void detect(const cv::gpu::icf::Cascade cascade, const int* __restrict__ hogluv, const int pitch,
+// __global__ void detect(const cv::gpu::icf::Cascade cascade, const int* __restrict__ hogluv, const int pitch,
-    PtrStepSz<uchar4> objects)
+//     PtrStepSz<uchar4> objects)
-{
+// {
-    cascade.detectAt(hogluv, pitch, objects);
+//     cascade.detectAt(hogluv, pitch, objects);
-}
+// }
-}
+// }
-float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect,
+// float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect,
-                                     const int channel, const float threshold) const
+//                                      const int channel, const float threshold) const
-{
+// {
-    dprintf("feature %d box %d %d %d %d\n", channel, scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w);
+//     dprintf("feature %d box %d %d %d %d\n", channel, scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w);
-    dprintf("rescale: %f [%f %f]\n",level.relScale, level.scaling[0], level.scaling[1]);
+//     dprintf("rescale: %f [%f %f]\n",level.relScale, level.scaling[0], level.scaling[1]);
-    float relScale = level.relScale;
+//     float relScale = level.relScale;
-    float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
+//     float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
-    // rescale
+//     // rescale
-    scaledRect.x = __float2int_rn(relScale * scaledRect.x);
+//     scaledRect.x = __float2int_rn(relScale * scaledRect.x);
-    scaledRect.y = __float2int_rn(relScale * scaledRect.y);
+//     scaledRect.y = __float2int_rn(relScale * scaledRect.y);
-    scaledRect.z = __float2int_rn(relScale * scaledRect.z);
+//     scaledRect.z = __float2int_rn(relScale * scaledRect.z);
-    scaledRect.w = __float2int_rn(relScale * scaledRect.w);
+//     scaledRect.w = __float2int_rn(relScale * scaledRect.w);
-    float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
+//     float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
-    float approx = 1.f;
+//     float approx = 1.f;
-    if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
+//     if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
-    {
+//     {
-        const float expected_new_area = farea * relScale * relScale;
+//         const float expected_new_area = farea * relScale * relScale;
-        approx = expected_new_area / sarea;
+//         approx = expected_new_area / sarea;
-    }
+//     }
-    dprintf("new rect: %d box %d %d %d %d  rel areas %f %f\n", channel,
+//     dprintf("new rect: %d box %d %d %d %d  rel areas %f %f\n", channel,
-        scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
+//         scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
-    // compensation areas rounding
+//     // compensation areas rounding
-    float rootThreshold = threshold / approx;
+//     float rootThreshold = threshold / approx;
-    // printf("    approx %f\n", rootThreshold);
+//     // printf("    approx %f\n", rootThreshold);
-    rootThreshold *= level.scaling[(int)(channel > 6)];
+//     rootThreshold *= level.scaling[(int)(channel > 6)];
-    dprintf("approximation %f %f -> %f %f\n", approx, threshold, rootThreshold, level.scaling[(int)(channel > 6)]);
+//     dprintf("approximation %f %f -> %f %f\n", approx, threshold, rootThreshold, level.scaling[(int)(channel > 6)]);
-    return rootThreshold;
+//     return rootThreshold;
-}
+// }
-typedef unsigned char uchar;
+// typedef unsigned char uchar;
-float __device get(const int* __restrict__ hogluv, const int pitch,
+// float __device get(const int* __restrict__ hogluv, const int pitch,
-                   const int x, const int y, int channel, uchar4 area)
+//                    const int x, const int y, int channel, uchar4 area)
-{
+// {
-    dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
+//     dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
-    dprintf("get for channel %d\n", channel);
+//     dprintf("get for channel %d\n", channel);
-    dprintf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
+//     dprintf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
-        x + area.x, y + area.y,  x + area.z, y + area.y,  x + area.z,y + area.w,
+//         x + area.x, y + area.y,  x + area.z, y + area.y,  x + area.z,y + area.w,
-        x + area.x, y + area.w);
+//         x + area.x, y + area.w);
-    dprintf("at point %d %d with offset %d\n", x, y, 0);
+//     dprintf("at point %d %d with offset %d\n", x, y, 0);
-    const int* curr = hogluv + ((channel * 121) + y) * pitch;
+//     const int* curr = hogluv + ((channel * 121) + y) * pitch;
-    int a = curr[area.y * pitch + x + area.x];
+//     int a = curr[area.y * pitch + x + area.x];
-    int b = curr[area.y * pitch + x + area.z];
+//     int b = curr[area.y * pitch + x + area.z];
-    int c = curr[area.w * pitch + x + area.z];
+//     int c = curr[area.w * pitch + x + area.z];
-    int d = curr[area.w * pitch + x + area.x];
+//     int d = curr[area.w * pitch + x + area.x];
-    dprintf("    retruved integral values: %d %d %d %d\n", a, b, c, d);
+//     dprintf("    retruved integral values: %d %d %d %d\n", a, b, c, d);
-    return (a - b + c - d);
+//     return (a - b + c - d);
-}
+// }
-void __device icf::Cascade::detectAt(const int* __restrict__ hogluv, const int pitch,
+// void __device icf::Cascade::detectAt(const int* __restrict__ hogluv, const int pitch,
-                                    PtrStepSz<uchar4>& objects) const
+//                                     PtrStepSz<uchar4>& objects) const
-{
+// {
-    const icf::Level* lls = (const icf::Level*)levels.ptr();
+//     const icf::Level* lls = (const icf::Level*)levels.ptr();
-    const int y = blockIdx.y * blockDim.y + threadIdx.y;
+//     const int y = blockIdx.y * blockDim.y + threadIdx.y;
-    const int x = blockIdx.x * blockDim.x + threadIdx.x;
+//     const int x = blockIdx.x * blockDim.x + threadIdx.x;
-    // if (x > 0 || y > 0) return;
+//     // if (x > 0 || y > 0) return;
-    Level level = lls[blockIdx.z];
+//     Level level = lls[blockIdx.z];
-    if (x >= level.workRect.x || y >= level.workRect.y) return;
+//     if (x >= level.workRect.x || y >= level.workRect.y) return;
-    dprintf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
+//     dprintf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
-        level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
+//         level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
-    const Octave octave = ((const Octave*)octaves.ptr())[level.octave];
+//     const Octave octave = ((const Octave*)octaves.ptr())[level.octave];
-    // printf("Octave: %d %d %d (%d %d) %f\n", octave.index, octave.stages,
+//     // printf("Octave: %d %d %d (%d %d) %f\n", octave.index, octave.stages,
-    //     octave.shrinkage, octave.size.x, octave.size.y, octave.scale);
+//     //     octave.shrinkage, octave.size.x, octave.size.y, octave.scale);
-    const int stBegin = octave.index * octave.stages, stEnd = stBegin + octave.stages;
+//     const int stBegin = octave.index * octave.stages, stEnd = stBegin + octave.stages;
-    float detectionScore = 0.f;
+//     float detectionScore = 0.f;
-    int st = stBegin;
+//     int st = stBegin;
-    for(; st < stEnd; ++st)
+//     for(; st < stEnd; ++st)
-    {
+//     {
-        const float stage = stages(0, st);
+//         const float stage = stages(0, st);
-        dprintf("Stage: %f\n", stage);
+//         dprintf("Stage: %f\n", stage);
-        {
+//         {
-            const int nId = st * 3;
+//             const int nId = st * 3;
-            // work with root node
+//             // work with root node
-            const Node node = ((const Node*)nodes.ptr())[nId];
+//             const Node node = ((const Node*)nodes.ptr())[nId];
-            dprintf("Node: %d %f\n", node.feature, node.threshold);
+//             dprintf("Node: %d %f\n", node.feature, node.threshold);
-            const Feature feature = ((const Feature*)features.ptr())[node.feature];
+//             const Feature feature = ((const Feature*)features.ptr())[node.feature];
-            uchar4 scaledRect = feature.rect;
+//             uchar4 scaledRect = feature.rect;
-            float threshold = rescale(level, scaledRect, feature.channel, node.threshold);
+//             float threshold = rescale(level, scaledRect, feature.channel, node.threshold);
-            float sum = get(hogluv,pitch, x, y, feature.channel, scaledRect);
+//             float sum = get(hogluv,pitch, x, y, feature.channel, scaledRect);
-            dprintf("root feature %d %f\n",feature.channel, sum);
+//             dprintf("root feature %d %f\n",feature.channel, sum);
-            int next = 1 + (int)(sum >= threshold);
+//             int next = 1 + (int)(sum >= threshold);
-            dprintf("go: %d (%f >= %f)\n\n" ,next, sum, threshold);
+//             dprintf("go: %d (%f >= %f)\n\n" ,next, sum, threshold);
-            // leaves
+//             // leaves
-            const Node leaf = ((const Node*)nodes.ptr())[nId + next];
+//             const Node leaf = ((const Node*)nodes.ptr())[nId + next];
-            const Feature fLeaf = ((const Feature*)features.ptr())[leaf.feature];
+//             const Feature fLeaf = ((const Feature*)features.ptr())[leaf.feature];
-            scaledRect = fLeaf.rect;
+//             scaledRect = fLeaf.rect;
-            threshold = rescale(level, scaledRect, fLeaf.channel, leaf.threshold);
+//             threshold = rescale(level, scaledRect, fLeaf.channel, leaf.threshold);
-            sum = get(hogluv, pitch, x, y, fLeaf.channel, scaledRect);
+//             sum = get(hogluv, pitch, x, y, fLeaf.channel, scaledRect);
-            const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
+//             const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
-            float impact = leaves(0, (st * 4) + lShift);
+//             float impact = leaves(0, (st * 4) + lShift);
-            detectionScore += impact;
+//             detectionScore += impact;
-            dprintf("decided: %d (%f >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
+//             dprintf("decided: %d (%f >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
-            dprintf("extracted stage:\n");
+//             dprintf("extracted stage:\n");
-            dprintf("ct %f\n", stage);
+//             dprintf("ct %f\n", stage);
-            dprintf("computed score %f\n\n", detectionScore);
+//             dprintf("computed score %f\n\n", detectionScore);
-            dprintf("\n\n");
+//             dprintf("\n\n");
-        }
+//         }
-        if (detectionScore <= stage || st - stBegin == 100) break;
+//         if (detectionScore <= stage || st - stBegin == 100) break;
-    }
+//     }
-    dprintf("x %d y %d: %d\n", x, y, st - stBegin);
+//     dprintf("x %d y %d: %d\n", x, y, st - stBegin);
-    if (st == stEnd)
+//     if (st == stEnd)
-    {
+//     {
-        uchar4 a;
+//         uchar4 a;
-        a.x = level.workRect.x;
+//         a.x = level.workRect.x;
-        a.y = level.workRect.y;
+//         a.y = level.workRect.y;
-        objects(0, threadIdx.x) = a;
+//         objects(0, threadIdx.x) = a;
-    }
+//     }
-}
+// }
-void icf::Cascade::detect(const cv::gpu::PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, cudaStream_t stream) const
+// void icf::Cascade::detect(const cv::gpu::PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, cudaStream_t stream) const
-{
+// {
-    dim3 block(32, 8, 1);
+//     dim3 block(32, 8, 1);
-    dim3 grid(ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 47);
+//     dim3 grid(ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 47);
-    device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(int), objects);
+//     device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(int), objects);
-    cudaSafeCall( cudaGetLastError() );
+//     cudaSafeCall( cudaGetLastError() );
-    if (!stream)
+//     if (!stream)
-        cudaSafeCall( cudaDeviceSynchronize() );
+//         cudaSafeCall( cudaDeviceSynchronize() );
-}
+// }
-}}
+// }}
--- a/modules/gpu/src/icf.hpp
+++ b/modules/gpu/src/icf.hpp
@ -40,127 +40,127 @@
 //
 //M*/
-#include <opencv2/gpu/device/common.hpp>
+// #include <opencv2/gpu/device/common.hpp>
-
+
-#ifndef __OPENCV_ICF_HPP__
+// #ifndef __OPENCV_ICF_HPP__
-#define __OPENCV_ICF_HPP__
+// #define __OPENCV_ICF_HPP__
-
+
-#if defined __CUDACC__
+// #if defined __CUDACC__
-# define __device __device__ __forceinline__
+// # define __device __device__ __forceinline__
-#else
+// #else
-# define __device
+// # define __device
-#endif
+// #endif
-
+
-
+
-namespace cv { namespace gpu { namespace icf {
+// namespace cv { namespace gpu { namespace icf {
-
+
-using cv::gpu::PtrStepSzb;
+// using cv::gpu::PtrStepSzb;
-using cv::gpu::PtrStepSzf;
+// using cv::gpu::PtrStepSzf;
-
+
-typedef unsigned char uchar;
+// typedef unsigned char uchar;
-
+
-struct __align__(16) Octave
+// struct __align__(16) Octave
-{
+// {
-    ushort index;
+//     ushort index;
-    ushort stages;
+//     ushort stages;
-    ushort shrinkage;
+//     ushort shrinkage;
-    ushort2 size;
+//     ushort2 size;
-    float scale;
+//     float scale;
-
+
-    Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc)
+//     Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc)
-    : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {}
+//     : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {}
-};
+// };
-
+
-struct __align__(8) Level //is actually 24 bytes
+// struct __align__(8) Level //is actually 24 bytes
-{
+// {
-    int octave;
+//     int octave;
-
+
-    // float origScale; //not actually used
+//     // float origScale; //not actually used
-    float relScale;
+//     float relScale;
-    float shrScale;   // used for marking detection
+//     float shrScale;   // used for marking detection
-    float scaling[2]; // calculated according to Dollal paper
+//     float scaling[2]; // calculated according to Dollal paper
-
+
-    // for 640x480 we can not get overflow
+//     // for 640x480 we can not get overflow
-    uchar2 workRect;
+//     uchar2 workRect;
-    uchar2 objSize;
+//     uchar2 objSize;
-
+
-    Level(int idx, const Octave& oct, const float scale, const int w, const int h)
+//     Level(int idx, const Octave& oct, const float scale, const int w, const int h)
-    :  octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage)
+//     :  octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage)
-    {
+//     {
-        workRect.x = round(w / (float)oct.shrinkage);
+//         workRect.x = round(w / (float)oct.shrinkage);
-        workRect.y = round(h / (float)oct.shrinkage);
+//         workRect.y = round(h / (float)oct.shrinkage);
-
+
-        objSize.x  = round(oct.size.x * relScale);
+//         objSize.x  = round(oct.size.x * relScale);
-        objSize.y  = round(oct.size.y * relScale);
+//         objSize.y  = round(oct.size.y * relScale);
-    }
+//     }
-};
+// };
-
+
-struct Cascade
+// struct Cascade
-{
+// {
-    Cascade() {}
+//     Cascade() {}
-    Cascade(const cv::gpu::PtrStepSzb& octs, const cv::gpu::PtrStepSzf& sts, const cv::gpu::PtrStepSzb& nds,
+//     Cascade(const cv::gpu::PtrStepSzb& octs, const cv::gpu::PtrStepSzf& sts, const cv::gpu::PtrStepSzb& nds,
-        const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls)
+//         const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls)
-    : octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {}
+//     : octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {}
-
+
-    void detect(const cv::gpu::PtrStepSzi& hogluv, cv::gpu::PtrStepSz<uchar4> objects, cudaStream_t stream) const;
+//     void detect(const cv::gpu::PtrStepSzi& hogluv, cv::gpu::PtrStepSz<uchar4> objects, cudaStream_t stream) const;
-    void __device detectAt(const int* __restrict__ hogluv, const int pitch, PtrStepSz<uchar4>& objects) const;
+//     void __device detectAt(const int* __restrict__ hogluv, const int pitch, PtrStepSz<uchar4>& objects) const;
-    float __device rescale(const icf::Level& level, uchar4& scaledRect,
+//     float __device rescale(const icf::Level& level, uchar4& scaledRect,
-                           const int channel, const float threshold) const;
+//                            const int channel, const float threshold) const;
-
+
-    PtrStepSzb octaves;
+//     PtrStepSzb octaves;
-    PtrStepSzf stages;
+//     PtrStepSzf stages;
-    PtrStepSzb nodes;
+//     PtrStepSzb nodes;
-    PtrStepSzf leaves;
+//     PtrStepSzf leaves;
-    PtrStepSzb features;
+//     PtrStepSzb features;
-
+
-    PtrStepSzb levels;
+//     PtrStepSzb levels;
-
+
-};
+// };
-
+
-struct ChannelStorage
+// struct ChannelStorage
-{
+// {
-    ChannelStorage(){}
+//     ChannelStorage(){}
-    ChannelStorage(const cv::gpu::PtrStepSzb& buff, const cv::gpu::PtrStepSzb& shr,
+//     ChannelStorage(const cv::gpu::PtrStepSzb& buff, const cv::gpu::PtrStepSzb& shr,
-        const cv::gpu::PtrStepSzb& itg, const int s)
+//         const cv::gpu::PtrStepSzb& itg, const int s)
-    : dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {}
+//     : dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {}
-
+
-    void frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream){}
+//     void frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream){}
-
+
-    PtrStepSzb dmem;
+//     PtrStepSzb dmem;
-    PtrStepSzb shrunk;
+//     PtrStepSzb shrunk;
-    PtrStepSzb hogluv;
+//     PtrStepSzb hogluv;
-
+
-    enum
+//     enum
-    {
+//     {
-        FRAME_WIDTH        = 640,
+//         FRAME_WIDTH        = 640,
-        FRAME_HEIGHT       = 480,
+//         FRAME_HEIGHT       = 480,
-        TOTAL_SCALES       = 55,
+//         TOTAL_SCALES       = 55,
-        CLASSIFIERS        = 5,
+//         CLASSIFIERS        = 5,
-        ORIG_OBJECT_WIDTH  = 64,
+//         ORIG_OBJECT_WIDTH  = 64,
-        ORIG_OBJECT_HEIGHT = 128,
+//         ORIG_OBJECT_HEIGHT = 128,
-        HOG_BINS           = 6,
+//         HOG_BINS           = 6,
-        HOG_LUV_BINS       = 10
+//         HOG_LUV_BINS       = 10
-    };
+//     };
-
+
-    int shrinkage;
+//     int shrinkage;
-    static const float magnitudeScaling = 1.f ;// / sqrt(2);
+//     static const float magnitudeScaling = 1.f ;// / sqrt(2);
-};
+// };
-
+
-struct __align__(8) Node
+// struct __align__(8) Node
-{
+// {
-    int feature;
+//     int feature;
-    float threshold;
+//     float threshold;
-
+
-    Node(const int f, const float t) : feature(f), threshold(t) {}
+//     Node(const int f, const float t) : feature(f), threshold(t) {}
-};
+// };
-
+
-struct __align__(8) Feature
+// struct __align__(8) Feature
-{
+// {
-    int channel;
+//     int channel;
-    uchar4 rect;
+//     uchar4 rect;
-
+
-    Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
+//     Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
-};
+// };
-}}}
+// }}}
-
+
-#endif
+// #endif
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@ -41,361 +41,365 @@
 //M*/
 #include <precomp.hpp>
-#include "opencv2/highgui/highgui.hpp"
+#include <opencv2/highgui/highgui.hpp>
 #if !defined (HAVE_CUDA)
 cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); }
 cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
 cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
-
+bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
 bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); }
 void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu(); }
 #else
-#include <icf.hpp>
+// #include <icf.hpp>
 namespace cv { namespace gpu { namespace device {
 namespace icf {
    void fillBins(cv::gpu::PtrStepSzb hogluv,const cv::gpu::PtrStepSzf& nangle);
 }
 }}}
 struct cv::gpu::SoftCascade::Filds
 {
    // scales range
    float minScale;
    float maxScale;
    int origObjWidth;
    int origObjHeight;
    GpuMat octaves;
    GpuMat stages;
    GpuMat nodes;
    GpuMat leaves;
    GpuMat features;
    GpuMat levels;
    // preallocated buffer 640x480x10 + 640x480
    GpuMat dmem;
    // 160x120x10
    GpuMat shrunk;
    // 161x121x10
    GpuMat hogluv;
    // will be removed in final version
    // temporial mat for cvtColor
    GpuMat luv;
    // temporial mat for integrall
    GpuMat integralBuffer;
    // temp matrix for sobel and cartToPolar
    GpuMat dfdx, dfdy, angle, mag, nmag, nangle;
    std::vector<float> scales;
-    icf::Cascade cascade;
+// namespace cv { namespace gpu { namespace device {
-    icf::ChannelStorage storage;
+// namespace icf {
 //     void fillBins(cv::gpu::PtrStepSzb hogluv,const cv::gpu::PtrStepSzf& nangle);
 // }
 // }}}
-    enum { BOOST = 0 };
+// namespace {
-    enum
+//     char *itoa(long i, char* s, int /*dummy_radix*/)
-    {
+//     {
-        FRAME_WIDTH        = 640,
+//         sprintf(s, "%ld", i);
-        FRAME_HEIGHT       = 480,
+//         return s;
-        TOTAL_SCALES       = 55,
+//     }
-        CLASSIFIERS        = 5,
+// }
        ORIG_OBJECT_WIDTH  = 64,
        ORIG_OBJECT_HEIGHT = 128,
        HOG_BINS           = 6,
        HOG_LUV_BINS       = 10
    };
    bool fill(const FileNode &root, const float mins, const float maxs);
    void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
    {
        cascade.detect(hogluv, objects, stream);
    }
 private:
    void calcLevels(const std::vector<icf::Octave>& octs,
                                                    int frameW, int frameH, int nscales);
    typedef std::vector<icf::Octave>::const_iterator  octIt_t;
    int fitOctave(const std::vector<icf::Octave>& octs, const float& logFactor) const
    {
        float minAbsLog = FLT_MAX;
        int res =  0;
        for (int oct = 0; oct < (int)octs.size(); ++oct)
        {
            const icf::Octave& octave =octs[oct];
            float logOctave = ::log(octave.scale);
            float logAbsScale = ::fabs(logFactor - logOctave);
            if(logAbsScale < minAbsLog)
            {
                res = oct;
                minAbsLog = logAbsScale;
            }
        }
        return res;
    }
 };
-inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
+struct cv::gpu::SoftCascade::Filds
 {
    minScale = mins;
    maxScale = maxs;
    // cascade properties
    static const char *const SC_STAGE_TYPE          = "stageType";
    static const char *const SC_BOOST               = "BOOST";
    static const char *const SC_FEATURE_TYPE        = "featureType";
    static const char *const SC_ICF                 = "ICF";
    static const char *const SC_ORIG_W              = "width";
    static const char *const SC_ORIG_H              = "height";
    static const char *const SC_OCTAVES             = "octaves";
    static const char *const SC_STAGES              = "stages";
    static const char *const SC_FEATURES            = "features";
    static const char *const SC_WEEK                = "weakClassifiers";
    static const char *const SC_INTERNAL            = "internalNodes";
    static const char *const SC_LEAF                = "leafValues";
    static const char *const SC_OCT_SCALE           = "scale";
    static const char *const SC_OCT_STAGES          = "stageNum";
    static const char *const SC_OCT_SHRINKAGE       = "shrinkingFactor";
    static const char *const SC_STAGE_THRESHOLD     = "stageThreshold";
    static const char * const SC_F_CHANNEL          = "channel";
    static const char * const SC_F_RECT             = "rect";
    // only Ada Boost supported
    std::string stageTypeStr = (string)root[SC_STAGE_TYPE];
    CV_Assert(stageTypeStr == SC_BOOST);
    // only HOG-like integral channel features cupported
    string featureTypeStr = (string)root[SC_FEATURE_TYPE];
    CV_Assert(featureTypeStr == SC_ICF);
    origObjWidth = (int)root[SC_ORIG_W];
    CV_Assert(origObjWidth  == ORIG_OBJECT_WIDTH);
    origObjHeight = (int)root[SC_ORIG_H];
    CV_Assert(origObjHeight == ORIG_OBJECT_HEIGHT);
    FileNode fn = root[SC_OCTAVES];
        if (fn.empty()) return false;
    std::vector<icf::Octave>  voctaves;
    std::vector<float>        vstages;
    std::vector<icf::Node>    vnodes;
    std::vector<float>        vleaves;
    std::vector<icf::Feature> vfeatures;
    scales.clear();
    // std::vector<Level> levels;
    FileNodeIterator it = fn.begin(), it_end = fn.end();
    int feature_offset = 0;
    ushort octIndex = 0;
    ushort shrinkage = 1;
    for (; it != it_end; ++it)
    {
        FileNode fns = *it;
        float scale = (float)fns[SC_OCT_SCALE];
        scales.push_back(scale);
        ushort nstages = saturate_cast<ushort>((int)fns[SC_OCT_STAGES]);
        ushort2 size;
        size.x = cvRound(ORIG_OBJECT_WIDTH * scale);
        size.y = cvRound(ORIG_OBJECT_HEIGHT * scale);
        shrinkage = saturate_cast<ushort>((int)fns[SC_OCT_SHRINKAGE]);
        icf::Octave octave(octIndex, nstages, shrinkage, size, scale);
        CV_Assert(octave.stages > 0);
        voctaves.push_back(octave);
        FileNode ffs = fns[SC_FEATURES];
        if (ffs.empty()) return false;
        fns = fns[SC_STAGES];
        if (fn.empty()) return false;
        // for each stage (~ decision tree with H = 2)
        FileNodeIterator st = fns.begin(), st_end = fns.end();
        for (; st != st_end; ++st )
        {
            fns = *st;
            vstages.push_back((float)fns[SC_STAGE_THRESHOLD]);
            fns = fns[SC_WEEK];
            FileNodeIterator ftr = fns.begin(), ft_end = fns.end();
            for (; ftr != ft_end; ++ftr)
            {
                fns = (*ftr)[SC_INTERNAL];
                FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
                for (; inIt != inIt_end;)
                {
                    int feature = (int)(*(inIt +=2)++) + feature_offset;
                    float th = (float)(*(inIt++));
                    vnodes.push_back(icf::Node(feature, th));
                }
                fns = (*ftr)[SC_LEAF];
                inIt = fns.begin(), inIt_end = fns.end();
                for (; inIt != inIt_end; ++inIt)
                    vleaves.push_back((float)(*inIt));
            }
        }
        st = ffs.begin(), st_end = ffs.end();
        for (; st != st_end; ++st )
        {
            cv::FileNode rn = (*st)[SC_F_RECT];
            cv::FileNodeIterator r_it = rn.begin();
            uchar4 rect;
            rect.x = saturate_cast<uchar>((int)*(r_it++));
            rect.y = saturate_cast<uchar>((int)*(r_it++));
            rect.z = saturate_cast<uchar>((int)*(r_it++));
            rect.w = saturate_cast<uchar>((int)*(r_it++));
            vfeatures.push_back(icf::Feature((int)(*st)[SC_F_CHANNEL], rect));
        }
        feature_offset += octave.stages * 3;
        ++octIndex;
    }
    // upload in gpu memory
    octaves.upload(cv::Mat(1, voctaves.size() * sizeof(icf::Octave), CV_8UC1, (uchar*)&(voctaves[0]) ));
    CV_Assert(!octaves.empty());
    stages.upload(cv::Mat(vstages).reshape(1,1));
    CV_Assert(!stages.empty());
    nodes.upload(cv::Mat(1, vnodes.size() * sizeof(icf::Node), CV_8UC1, (uchar*)&(vnodes[0]) ));
    CV_Assert(!nodes.empty());
    leaves.upload(cv::Mat(vleaves).reshape(1,1));
    CV_Assert(!leaves.empty());
    features.upload(cv::Mat(1, vfeatures.size() * sizeof(icf::Feature), CV_8UC1, (uchar*)&(vfeatures[0]) ));
    CV_Assert(!features.empty());
    // compute levels
    calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
    CV_Assert(!levels.empty());
    //init Cascade
    cascade = icf::Cascade(octaves, stages, nodes, leaves, features, levels);
    // allocate buffers
    dmem.create(FRAME_HEIGHT * (HOG_LUV_BINS + 1), FRAME_WIDTH, CV_8UC1);
    shrunk.create(FRAME_HEIGHT / shrinkage * HOG_LUV_BINS, FRAME_WIDTH / shrinkage, CV_8UC1);
    // hogluv.create( (FRAME_HEIGHT / shrinkage + 1) * HOG_LUV_BINS, (FRAME_WIDTH / shrinkage + 1), CV_16UC1);
    hogluv.create( (FRAME_HEIGHT / shrinkage + 1) * HOG_LUV_BINS, (FRAME_WIDTH / shrinkage + 1), CV_32SC1);
    luv.create(FRAME_HEIGHT, FRAME_WIDTH, CV_8UC3);
    integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1);
    dfdx.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
    dfdy.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
    angle.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
    mag.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
    nmag.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
    nangle.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
    storage = icf::ChannelStorage(dmem, shrunk, hogluv, shrinkage);
    return true;
 }
 namespace {
    struct CascadeIntrinsics
    {
        static const float lambda = 1.099f, a = 0.89f;
        static float getFor(int channel, float scaling)
 {
-            CV_Assert(channel < 10);
+//     // scales range
-
+//     float minScale;
-            if (fabs(scaling - 1.f) < FLT_EPSILON)
+//     float maxScale;
-                return 1.f;
+
-
+//     int origObjWidth;
-            // according to R. Benenson, M. Mathias, R. Timofte and L. Van Gool's and Dallal's papers
+//     int origObjHeight;
-            static const float A[2][2] =
+
-            {   //channel <= 6, otherwise
+//     GpuMat octaves;
-                {        0.89f, 1.f}, // down
+//     GpuMat stages;
-                {        1.00f, 1.f}  // up
+//     GpuMat nodes;
-            };
+//     GpuMat leaves;
-
+//     GpuMat features;
-            static const float B[2][2] =
+//     GpuMat levels;
-            {   //channel <= 6,  otherwise
+
-                { 1.099f / log(2), 2.f}, // down
+//     // preallocated buffer 640x480x10 + 640x480
-                {             0.f, 2.f}  // up
+//     GpuMat dmem;
 //     // 160x120x10
 //     GpuMat shrunk;
 //     // 161x121x10
 //     GpuMat hogluv;
 //     // will be removed in final version
 //     // temporial mat for cvtColor
 //     GpuMat luv;
 //     // temporial mat for integrall
 //     GpuMat integralBuffer;
 //     // temp matrix for sobel and cartToPolar
 //     GpuMat dfdx, dfdy, angle, mag, nmag, nangle;
 //     std::vector<float> scales;
 //     icf::Cascade cascade;
 //     icf::ChannelStorage storage;
 //     enum { BOOST = 0 };
 //     enum
 //     {
 //         FRAME_WIDTH        = 640,
 //         FRAME_HEIGHT       = 480,
 //         TOTAL_SCALES       = 55,
 //         CLASSIFIERS        = 5,
 //         ORIG_OBJECT_WIDTH  = 64,
 //         ORIG_OBJECT_HEIGHT = 128,
 //         HOG_BINS           = 6,
 //         HOG_LUV_BINS       = 10
 //     };
 //     bool fill(const FileNode &root, const float mins, const float maxs);
 //     void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
 //     {
 //         cascade.detect(hogluv, objects, stream);
 //     }
 // private:
 //     void calcLevels(const std::vector<icf::Octave>& octs,
 //                                                     int frameW, int frameH, int nscales);
 //     typedef std::vector<icf::Octave>::const_iterator  octIt_t;
 //     int fitOctave(const std::vector<icf::Octave>& octs, const float& logFactor) const
 //     {
 //         float minAbsLog = FLT_MAX;
 //         int res =  0;
 //         for (int oct = 0; oct < (int)octs.size(); ++oct)
 //         {
 //             const icf::Octave& octave =octs[oct];
 //             float logOctave = ::log(octave.scale);
 //             float logAbsScale = ::fabs(logFactor - logOctave);
 //             if(logAbsScale < minAbsLog)
 //             {
 //                 res = oct;
 //                 minAbsLog = logAbsScale;
 //             }
 //         }
 //         return res;
 //     }
 };
-            float a = A[(int)(scaling >= 1)][(int)(channel > 6)];
+// inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
-            float b = B[(int)(scaling >= 1)][(int)(channel > 6)];
+// {
-
+//     minScale = mins;
-            // printf("!!! scaling: %f %f %f -> %f\n", scaling, a, b, a * pow(scaling, b));
+//     maxScale = maxs;
-            return a * pow(scaling, b);
+
-        }
+//     // cascade properties
-    };
+//     static const char *const SC_STAGE_TYPE          = "stageType";
-}
+//     static const char *const SC_BOOST               = "BOOST";
-
+
-inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<icf::Octave>& octs,
+//     static const char *const SC_FEATURE_TYPE        = "featureType";
-                                                    int frameW, int frameH, int nscales)
+//     static const char *const SC_ICF                 = "ICF";
-{
+
-    CV_Assert(nscales > 1);
+//     static const char *const SC_ORIG_W              = "width";
-
+//     static const char *const SC_ORIG_H              = "height";
-    std::vector<icf::Level> vlevels;
+
-    float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1);
+//     static const char *const SC_OCTAVES             = "octaves";
-
+//     static const char *const SC_STAGES              = "stages";
-    float scale = minScale;
+//     static const char *const SC_FEATURES            = "features";
-    for (int sc = 0; sc < nscales; ++sc)
+
-    {
+//     static const char *const SC_WEEK                = "weakClassifiers";
-        int width  = ::std::max(0.0f, frameW - (origObjWidth  * scale));
+//     static const char *const SC_INTERNAL            = "internalNodes";
-        int height = ::std::max(0.0f, frameH - (origObjHeight * scale));
+//     static const char *const SC_LEAF                = "leafValues";
-
+
-        float logScale = ::log(scale);
+//     static const char *const SC_OCT_SCALE           = "scale";
-        int fit = fitOctave(octs, logScale);
+//     static const char *const SC_OCT_STAGES          = "stageNum";
-
+//     static const char *const SC_OCT_SHRINKAGE       = "shrinkingFactor";
-        icf::Level level(fit, octs[fit], scale, width, height);
+
-        level.scaling[0] = CascadeIntrinsics::getFor(0, level.relScale);
+//     static const char *const SC_STAGE_THRESHOLD     = "stageThreshold";
-        level.scaling[1] = CascadeIntrinsics::getFor(9, level.relScale);
+
-
+//     static const char * const SC_F_CHANNEL          = "channel";
-        if (!width || !height)
+//     static const char * const SC_F_RECT             = "rect";
-            break;
+
-        else
+//     // only Ada Boost supported
-            vlevels.push_back(level);
+//     std::string stageTypeStr = (string)root[SC_STAGE_TYPE];
-
+//     CV_Assert(stageTypeStr == SC_BOOST);
-        if (::fabs(scale - maxScale) < FLT_EPSILON) break;
+
-        scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
+//     // only HOG-like integral channel features cupported
-
+//     string featureTypeStr = (string)root[SC_FEATURE_TYPE];
-        // printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
+//     CV_Assert(featureTypeStr == SC_ICF);
-        //     level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
+
-
+//     origObjWidth = (int)root[SC_ORIG_W];
-        // std::cout << "level " << sc
+//     CV_Assert(origObjWidth  == ORIG_OBJECT_WIDTH);
-        //           << " octeve "
+
-        //           << vlevels[sc].octave
+//     origObjHeight = (int)root[SC_ORIG_H];
-        //           << " relScale "
+//     CV_Assert(origObjHeight == ORIG_OBJECT_HEIGHT);
-        //           << vlevels[sc].relScale
+
-        //           << " " << vlevels[sc].shrScale
+//     FileNode fn = root[SC_OCTAVES];
-        //           << " [" << (int)vlevels[sc].objSize.x
+//         if (fn.empty()) return false;
-        //           << " " <<  (int)vlevels[sc].objSize.y << "] ["
+
-        // <<  (int)vlevels[sc].workRect.x << " " <<  (int)vlevels[sc].workRect.y << "]" << std::endl;
+//     std::vector<icf::Octave>  voctaves;
-    }
+//     std::vector<float>        vstages;
-    levels.upload(cv::Mat(1, vlevels.size() * sizeof(icf::Level), CV_8UC1, (uchar*)&(vlevels[0]) ));
+//     std::vector<icf::Node>    vnodes;
-}
+//     std::vector<float>        vleaves;
 //     std::vector<icf::Feature> vfeatures;
 //     scales.clear();
 //     // std::vector<Level> levels;
 //     FileNodeIterator it = fn.begin(), it_end = fn.end();
 //     int feature_offset = 0;
 //     ushort octIndex = 0;
 //     ushort shrinkage = 1;
 //     for (; it != it_end; ++it)
 //     {
 //         FileNode fns = *it;
 //         float scale = (float)fns[SC_OCT_SCALE];
 //         scales.push_back(scale);
 //         ushort nstages = saturate_cast<ushort>((int)fns[SC_OCT_STAGES]);
 //         ushort2 size;
 //         size.x = cvRound(ORIG_OBJECT_WIDTH * scale);
 //         size.y = cvRound(ORIG_OBJECT_HEIGHT * scale);
 //         shrinkage = saturate_cast<ushort>((int)fns[SC_OCT_SHRINKAGE]);
 //         icf::Octave octave(octIndex, nstages, shrinkage, size, scale);
 //         CV_Assert(octave.stages > 0);
 //         voctaves.push_back(octave);
 //         FileNode ffs = fns[SC_FEATURES];
 //         if (ffs.empty()) return false;
 //         fns = fns[SC_STAGES];
 //         if (fn.empty()) return false;
 //         // for each stage (~ decision tree with H = 2)
 //         FileNodeIterator st = fns.begin(), st_end = fns.end();
 //         for (; st != st_end; ++st )
 //         {
 //             fns = *st;
 //             vstages.push_back((float)fns[SC_STAGE_THRESHOLD]);
 //             fns = fns[SC_WEEK];
 //             FileNodeIterator ftr = fns.begin(), ft_end = fns.end();
 //             for (; ftr != ft_end; ++ftr)
 //             {
 //                 fns = (*ftr)[SC_INTERNAL];
 //                 FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
 //                 for (; inIt != inIt_end;)
 //                 {
 //                     int feature = (int)(*(inIt +=2)++) + feature_offset;
 //                     float th = (float)(*(inIt++));
 //                     vnodes.push_back(icf::Node(feature, th));
 //                 }
 //                 fns = (*ftr)[SC_LEAF];
 //                 inIt = fns.begin(), inIt_end = fns.end();
 //                 for (; inIt != inIt_end; ++inIt)
 //                     vleaves.push_back((float)(*inIt));
 //             }
 //         }
 //         st = ffs.begin(), st_end = ffs.end();
 //         for (; st != st_end; ++st )
 //         {
 //             cv::FileNode rn = (*st)[SC_F_RECT];
 //             cv::FileNodeIterator r_it = rn.begin();
 //             uchar4 rect;
 //             rect.x = saturate_cast<uchar>((int)*(r_it++));
 //             rect.y = saturate_cast<uchar>((int)*(r_it++));
 //             rect.z = saturate_cast<uchar>((int)*(r_it++));
 //             rect.w = saturate_cast<uchar>((int)*(r_it++));
 //             vfeatures.push_back(icf::Feature((int)(*st)[SC_F_CHANNEL], rect));
 //         }
 //         feature_offset += octave.stages * 3;
 //         ++octIndex;
 //     }
 //     // upload in gpu memory
 //     octaves.upload(cv::Mat(1, voctaves.size() * sizeof(icf::Octave), CV_8UC1, (uchar*)&(voctaves[0]) ));
 //     CV_Assert(!octaves.empty());
 //     stages.upload(cv::Mat(vstages).reshape(1,1));
 //     CV_Assert(!stages.empty());
 //     nodes.upload(cv::Mat(1, vnodes.size() * sizeof(icf::Node), CV_8UC1, (uchar*)&(vnodes[0]) ));
 //     CV_Assert(!nodes.empty());
 //     leaves.upload(cv::Mat(vleaves).reshape(1,1));
 //     CV_Assert(!leaves.empty());
 //     features.upload(cv::Mat(1, vfeatures.size() * sizeof(icf::Feature), CV_8UC1, (uchar*)&(vfeatures[0]) ));
 //     CV_Assert(!features.empty());
 //     // compute levels
 //     calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
 //     CV_Assert(!levels.empty());
 //     //init Cascade
 //     cascade = icf::Cascade(octaves, stages, nodes, leaves, features, levels);
 //     // allocate buffers
 //     dmem.create(FRAME_HEIGHT * (HOG_LUV_BINS + 1), FRAME_WIDTH, CV_8UC1);
 //     shrunk.create(FRAME_HEIGHT / shrinkage * HOG_LUV_BINS, FRAME_WIDTH / shrinkage, CV_8UC1);
 //     // hogluv.create( (FRAME_HEIGHT / shrinkage + 1) * HOG_LUV_BINS, (FRAME_WIDTH / shrinkage + 1), CV_16UC1);
 //     hogluv.create( (FRAME_HEIGHT / shrinkage + 1) * HOG_LUV_BINS, (FRAME_WIDTH / shrinkage + 1), CV_32SC1);
 //     luv.create(FRAME_HEIGHT, FRAME_WIDTH, CV_8UC3);
 //     integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1);
 //     dfdx.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
 //     dfdy.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
 //     angle.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
 //     mag.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
 //     nmag.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
 //     nangle.create(FRAME_HEIGHT, FRAME_WIDTH, CV_32FC1);
 //     storage = icf::ChannelStorage(dmem, shrunk, hogluv, shrinkage);
 //     return true;
 // }
 // namespace {
 //     struct CascadeIntrinsics
 //     {
 //         static const float lambda = 1.099f, a = 0.89f;
 //         static float getFor(int channel, float scaling)
 //         {
 //             CV_Assert(channel < 10);
 //             if (fabs(scaling - 1.f) < FLT_EPSILON)
 //                 return 1.f;
 //             // according to R. Benenson, M. Mathias, R. Timofte and L. Van Gool's and Dallal's papers
 //             static const float A[2][2] =
 //             {   //channel <= 6, otherwise
 //                 {        0.89f, 1.f}, // down
 //                 {        1.00f, 1.f}  // up
 //             };
 //             static const float B[2][2] =
 //             {   //channel <= 6,  otherwise
 //                 { 1.099f / log(2), 2.f}, // down
 //                 {             0.f, 2.f}  // up
 //             };
 //             float a = A[(int)(scaling >= 1)][(int)(channel > 6)];
 //             float b = B[(int)(scaling >= 1)][(int)(channel > 6)];
 //             // printf("!!! scaling: %f %f %f -> %f\n", scaling, a, b, a * pow(scaling, b));
 //             return a * pow(scaling, b);
 //         }
 //     };
 // }
 // inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<icf::Octave>& octs,
 //                                                     int frameW, int frameH, int nscales)
 // {
 //     CV_Assert(nscales > 1);
 //     std::vector<icf::Level> vlevels;
 //     float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1);
 //     float scale = minScale;
 //     for (int sc = 0; sc < nscales; ++sc)
 //     {
 //         int width  = ::std::max(0.0f, frameW - (origObjWidth  * scale));
 //         int height = ::std::max(0.0f, frameH - (origObjHeight * scale));
 //         float logScale = ::log(scale);
 //         int fit = fitOctave(octs, logScale);
 //         icf::Level level(fit, octs[fit], scale, width, height);
 //         level.scaling[0] = CascadeIntrinsics::getFor(0, level.relScale);
 //         level.scaling[1] = CascadeIntrinsics::getFor(9, level.relScale);
 //         if (!width || !height)
 //             break;
 //         else
 //             vlevels.push_back(level);
 //         if (::fabs(scale - maxScale) < FLT_EPSILON) break;
 //         scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
 //         // printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
 //         //     level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
 //         // std::cout << "level " << sc
 //         //           << " octeve "
 //         //           << vlevels[sc].octave
 //         //           << " relScale "
 //         //           << vlevels[sc].relScale
 //         //           << " " << vlevels[sc].shrScale
 //         //           << " [" << (int)vlevels[sc].objSize.x
 //         //           << " " <<  (int)vlevels[sc].objSize.y << "] ["
 //         // <<  (int)vlevels[sc].workRect.x << " " <<  (int)vlevels[sc].workRect.y << "]" << std::endl;
 //     }
 //     levels.upload(cv::Mat(1, vlevels.size() * sizeof(icf::Level), CV_8UC1, (uchar*)&(vlevels[0]) ));
 // }
 cv::gpu::SoftCascade::SoftCascade() : filds(0) {}
@ -419,97 +423,89 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
    if (!fs.isOpened()) return false;
    filds = new Filds;
-    Filds& flds = *filds;
+//     Filds& flds = *filds;
-    if (!flds.fill(fs.getFirstTopLevelNode(), minScale, maxScale)) return false;
+//     if (!flds.fill(fs.getFirstTopLevelNode(), minScale, maxScale)) return false;
    return true;
 }
-namespace {
+// #define USE_REFERENCE_VALUES
    char *itoa(long i, char* s, int /*dummy_radix*/)
    {
        sprintf(s, "%ld", i);
        return s;
    }
 }
 #define USE_REFERENCE_VALUES
 void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
                                GpuMat& objects, const int /*rejectfactor*/, Stream s)
 {
-    // only color images are supperted
+//     // only color images are supperted
-    CV_Assert(colored.type() == CV_8UC3);
+//     CV_Assert(colored.type() == CV_8UC3);
    // // only this window size allowed
    CV_Assert(colored.cols == 640 && colored.rows == 480);
    Filds& flds = *filds;
 #if defined USE_REFERENCE_VALUES
    cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
    cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
    char buff[33];
    for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
    {
        cv::Mat channel;
        imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
        GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
        gchannel.upload(channel);
    }
 #else
    GpuMat& dmem = flds.dmem;
    cudaMemset(dmem.data, 0, dmem.step * dmem.rows);
    GpuMat& shrunk = flds.shrunk;
    int w = shrunk.cols;
    int h = colored.rows / flds.storage.shrinkage;
    std::vector<GpuMat> splited;
    for(int i = 0; i < 3; ++i)
    {
        splited.push_back(GpuMat(dmem, cv::Rect(0, colored.rows * (7 + i), colored.cols, colored.rows)));
    }
    GpuMat gray(dmem, cv::Rect(0, colored.rows * 10, colored.cols, colored.rows) );
    cv::gpu::cvtColor(colored, gray, CV_RGB2GRAY);
    //create hog
    cv::gpu::Sobel(gray, flds.dfdx, CV_32F, 1, 0, 3, 0.25);
    cv::gpu::Sobel(gray, flds.dfdy, CV_32F, 0, 1, 3, 0.25);
    cv::gpu::cartToPolar(flds.dfdx, flds.dfdy, flds.mag, flds.angle, true);
    cv::gpu::multiply(flds.mag, cv::Scalar::all(1.0 / ::log(2)), flds.nmag);
    cv::gpu::multiply(flds.angle, cv::Scalar::all(1.0 / 60.0), flds.nangle);
    GpuMat magCannel(dmem, cv::Rect(0, colored.rows * 6, colored.cols, colored.rows));
    flds.nmag.convertTo(magCannel, CV_8UC1);
    device::icf::fillBins(dmem, flds.nangle);
-    // create luv
+//     // // only this window size allowed
-    cv::gpu::cvtColor(colored, flds.luv, CV_BGR2Luv);
+//     CV_Assert(colored.cols == 640 && colored.rows == 480);
    cv::gpu::split(flds.luv, splited);
-    GpuMat plane(dmem, cv::Rect(0, 0, colored.cols, colored.rows * Filds::HOG_LUV_BINS));
+//     Filds& flds = *filds;
    cv::gpu::resize(plane, flds.shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA);
-    // fer debug purpose
+// #if defined USE_REFERENCE_VALUES
 //     cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
-
+//     cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
-    for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
+//     char buff[33];
-    {
+
-        GpuMat channel(shrunk, cv::Rect(0, h  * i, w, h ));
+//     for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
-        GpuMat sum(flds.hogluv, cv::Rect(0, (h + 1) * i, w + 1, h + 1));
+//     {
-        cv::gpu::integralBuffered(channel, sum, flds.integralBuffer);
+//         cv::Mat channel;
-    }
+//         imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
-
+//         GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
-#endif
+//         gchannel.upload(channel);
-
+//     }
-    cudaStream_t stream = StreamAccessor::getStream(s);
+// #else
-    // detection
+//     GpuMat& dmem = flds.dmem;
-    flds.detect(objects, stream);
+//     cudaMemset(dmem.data, 0, dmem.step * dmem.rows);
-
+//     GpuMat& shrunk = flds.shrunk;
-    // flds.storage.frame(colored, stream);
+//     int w = shrunk.cols;
 //     int h = colored.rows / flds.storage.shrinkage;
 //     std::vector<GpuMat> splited;
 //     for(int i = 0; i < 3; ++i)
 //     {
 //         splited.push_back(GpuMat(dmem, cv::Rect(0, colored.rows * (7 + i), colored.cols, colored.rows)));
 //     }
 //     GpuMat gray(dmem, cv::Rect(0, colored.rows * 10, colored.cols, colored.rows) );
 //     cv::gpu::cvtColor(colored, gray, CV_RGB2GRAY);
 //     //create hog
 //     cv::gpu::Sobel(gray, flds.dfdx, CV_32F, 1, 0, 3, 0.25);
 //     cv::gpu::Sobel(gray, flds.dfdy, CV_32F, 0, 1, 3, 0.25);
 //     cv::gpu::cartToPolar(flds.dfdx, flds.dfdy, flds.mag, flds.angle, true);
 //     cv::gpu::multiply(flds.mag, cv::Scalar::all(1.0 / ::log(2)), flds.nmag);
 //     cv::gpu::multiply(flds.angle, cv::Scalar::all(1.0 / 60.0), flds.nangle);
 //     GpuMat magCannel(dmem, cv::Rect(0, colored.rows * 6, colored.cols, colored.rows));
 //     flds.nmag.convertTo(magCannel, CV_8UC1);
 //     device::icf::fillBins(dmem, flds.nangle);
 //     // create luv
 //     cv::gpu::cvtColor(colored, flds.luv, CV_BGR2Luv);
 //     cv::gpu::split(flds.luv, splited);
 //     GpuMat plane(dmem, cv::Rect(0, 0, colored.cols, colored.rows * Filds::HOG_LUV_BINS));
 //     cv::gpu::resize(plane, flds.shrunk, cv::Size(), 0.25, 0.25, CV_INTER_AREA);
 //     // fer debug purpose
 //     // cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
 //     for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
 //     {
 //         GpuMat channel(shrunk, cv::Rect(0, h  * i, w, h ));
 //         GpuMat sum(flds.hogluv, cv::Rect(0, (h + 1) * i, w + 1, h + 1));
 //         cv::gpu::integralBuffered(channel, sum, flds.integralBuffer);
    // }
 // #endif
 //     cudaStream_t stream = StreamAccessor::getStream(s);
 //     // detection
 //     flds.detect(objects, stream);
 //     // flds.storage.frame(colored, stream);
 }
 #endif