memory optimization

b83d4add · marina.kolpakova · 4d9c7c10 · b83d4add · b83d4add · b83d4add
Commit b83d4add authored Sep 26, 2012 by marina.kolpakova
Expand all Show whitespace changes
Inline Side-by-side

Showing with 117 additions and 67 deletions

isf-sc.cu modules/gpu/src/cuda/isf-sc.cu +54 -2

icf.hpp modules/gpu/src/icf.hpp +63 -65

softcascade.cpp modules/gpu/src/softcascade.cpp +0 -0

No files found.
--- a/modules/gpu/src/cuda/isf-sc.cu
+++ b/modules/gpu/src/cuda/isf-sc.cu
@@ -41,9 +41,9 @@
 //M*/
 #include <opencv2/gpu/device/common.hpp>
-// #include <icf.hpp>
+#include <icf.hpp>
 // #include <opencv2/gpu/device/saturate_cast.hpp>
-// #include <stdio.h>
+#include <stdio.h>
 // #include <float.h>
 // //#define LOG_CUDA_CASCADE
@@ -93,6 +93,58 @@ namespace icf {
        cudaSafeCall( cudaGetLastError() );
        cudaSafeCall( cudaDeviceSynchronize() );
    }
+    texture<float2,  cudaTextureType1D, cudaReadModeElementType> tnode;
+    __global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
+        const Node* nodes,
+        PtrStepSz<uchar4> objects)
+    {
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+        Level level = levels[blockIdx.z];
+        if(x >= level.workRect.x || y >= level.workRect.y) return;
+        Octave octave = octaves[level.octave];
+        int st = octave.index * octave.stages;
+        const int stEnd = st + 1000;//octave.stages;
+        float confidence = 0.f;
+#pragma unroll 8
+        for(; st < stEnd; ++st)
+        {
+            const int nId = st * 3;
+            const Node node = nodes[nId];
+            const float stage = stages[st];
+            confidence += node.rect.x * stage;
+        }
+        uchar4 val;
+        val.x = (int)confidence;
+        if (x == y) objects(0, threadIdx.x) = val;
+    }
+    void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
+        const PtrStepSzb& nodes, const PtrStepSzb& features,
+        PtrStepSz<uchar4> objects)
+    {
+        int fw = 160;
+        int fh = 120;
+        dim3 block(32, 8);
+        dim3 grid(fw / 32, fh / 8, 47);
+        const Level* l = (const Level*)levels.ptr();
+        const Octave* oct = ((const Octave*)octaves.ptr());
+        const float* st = (const float*)stages.ptr();
+        const Node* nd = (const Node*)nodes.ptr();
+        // cudaSafeCall( cudaBindTexture(0, tnode, nodes.data, rgb.cols / size) );
+        test_kernel<<<grid, block>>>(l, oct, st, nd, objects);
+        cudaSafeCall( cudaGetLastError());
+        cudaSafeCall( cudaDeviceSynchronize());
+    }
 }
 }}}

--- a/modules/gpu/src/icf.hpp
+++ b/modules/gpu/src/icf.hpp
-/*M///////////////////////////////////////////////////////////////////////////////////////
+//M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
@@ -38,12 +38,12 @@
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
-//M*/
+//M
-// #include <opencv2/gpu/device/common.hpp>
+#include <opencv2/gpu/device/common.hpp>
-// #ifndef __OPENCV_ICF_HPP__
+#ifndef __OPENCV_ICF_HPP__
-// #define __OPENCV_ICF_HPP__
+#define __OPENCV_ICF_HPP__
 // #if defined __CUDACC__
 // # define __device __device__ __forceinline__
@@ -52,49 +52,62 @@
 // #endif
-// namespace cv { namespace gpu { namespace icf {
+namespace cv { namespace gpu { namespace device {
+namespace icf {
-// using cv::gpu::PtrStepSzb;
-// using cv::gpu::PtrStepSzf;
+struct __align__(16) Octave
+{
-// typedef unsigned char uchar;
+    ushort index;
+    ushort stages;
-// struct __align__(16) Octave
+    ushort shrinkage;
-// {
+    ushort2 size;
-//     ushort index;
+    float scale;
-//     ushort stages;
-//     ushort shrinkage;
+    Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc)
-//     ushort2 size;
+    : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {}
-//     float scale;
+};
-//     Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc)
+struct __align__(8) Level //is actually 24 bytes
-//     : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {}
+{
-// };
+    int octave;
-// struct __align__(8) Level //is actually 24 bytes
+    float relScale;
-// {
+    float shrScale;   // used for marking detection
-//     int octave;
+    float scaling[2]; // calculated according to Dollal paper
-//     // float origScale; //not actually used
+    // for 640x480 we can not get overflow
-//     float relScale;
+    uchar2 workRect;
-//     float shrScale;   // used for marking detection
+    uchar2 objSize;
-//     float scaling[2]; // calculated according to Dollal paper
+    Level(int idx, const Octave& oct, const float scale, const int w, const int h)
-//     // for 640x480 we can not get overflow
+    :  octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage)
-//     uchar2 workRect;
+    {
-//     uchar2 objSize;
+        workRect.x = round(w / (float)oct.shrinkage);
+        workRect.y = round(h / (float)oct.shrinkage);
-//     Level(int idx, const Octave& oct, const float scale, const int w, const int h)
-//     :  octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage)
+        objSize.x  = round(oct.size.x * relScale);
-//     {
+        objSize.y  = round(oct.size.y * relScale);
-//         workRect.x = round(w / (float)oct.shrinkage);
+    }
-//         workRect.y = round(h / (float)oct.shrinkage);
+};
-//         objSize.x  = round(oct.size.x * relScale);
+struct __align__(8) Node
-//         objSize.y  = round(oct.size.y * relScale);
+{
-//     }
+    // int feature;
-// };
+    uchar4 rect;
+    float threshold;
+    Node(const uchar4 c, const int t) : rect(c), threshold(t) {}
+};
+struct __align__(8) Feature
+{
+    int channel;
+    uchar4 rect;
+    Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
+};
+}
+}}}
 // struct Cascade
 // {
 //     Cascade() {}
@@ -146,21 +159,6 @@
 //     static const float magnitudeScaling = 1.f ;// / sqrt(2);
 // };
-// struct __align__(8) Node
-// {
-//     int feature;
-//     float threshold;
-//     Node(const int f, const float t) : feature(f), threshold(t) {}
-// };
-// struct __align__(8) Feature
-// {
-//     int channel;
-//     uchar4 rect;
-//     Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
-// };
 // }}}
-// #endif
+#endif
\ No newline at end of file
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp