Commit b83d4add authored by marina.kolpakova's avatar marina.kolpakova

memory optimization

parent 4d9c7c10
...@@ -41,9 +41,9 @@ ...@@ -41,9 +41,9 @@
//M*/ //M*/
#include <opencv2/gpu/device/common.hpp> #include <opencv2/gpu/device/common.hpp>
// #include <icf.hpp> #include <icf.hpp>
// #include <opencv2/gpu/device/saturate_cast.hpp> // #include <opencv2/gpu/device/saturate_cast.hpp>
// #include <stdio.h> #include <stdio.h>
// #include <float.h> // #include <float.h>
// //#define LOG_CUDA_CASCADE // //#define LOG_CUDA_CASCADE
...@@ -93,6 +93,58 @@ namespace icf { ...@@ -93,6 +93,58 @@ namespace icf {
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
texture<float2, cudaTextureType1D, cudaReadModeElementType> tnode;
__global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes,
PtrStepSz<uchar4> objects)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
Level level = levels[blockIdx.z];
if(x >= level.workRect.x || y >= level.workRect.y) return;
Octave octave = octaves[level.octave];
int st = octave.index * octave.stages;
const int stEnd = st + 1000;//octave.stages;
float confidence = 0.f;
#pragma unroll 8
for(; st < stEnd; ++st)
{
const int nId = st * 3;
const Node node = nodes[nId];
const float stage = stages[st];
confidence += node.rect.x * stage;
}
uchar4 val;
val.x = (int)confidence;
if (x == y) objects(0, threadIdx.x) = val;
}
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzb& features,
PtrStepSz<uchar4> objects)
{
int fw = 160;
int fh = 120;
dim3 block(32, 8);
dim3 grid(fw / 32, fh / 8, 47);
const Level* l = (const Level*)levels.ptr();
const Octave* oct = ((const Octave*)octaves.ptr());
const float* st = (const float*)stages.ptr();
const Node* nd = (const Node*)nodes.ptr();
// cudaSafeCall( cudaBindTexture(0, tnode, nodes.data, rgb.cols / size) );
test_kernel<<<grid, block>>>(l, oct, st, nd, objects);
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
}
} }
}}} }}}
......
/*M/////////////////////////////////////////////////////////////////////////////////////// //M///////////////////////////////////////////////////////////////////////////////////////
// //
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
// //
...@@ -38,12 +38,12 @@ ...@@ -38,12 +38,12 @@
// or tort (including negligence or otherwise) arising in any way out of // or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M
// #include <opencv2/gpu/device/common.hpp> #include <opencv2/gpu/device/common.hpp>
// #ifndef __OPENCV_ICF_HPP__ #ifndef __OPENCV_ICF_HPP__
// #define __OPENCV_ICF_HPP__ #define __OPENCV_ICF_HPP__
// #if defined __CUDACC__ // #if defined __CUDACC__
// # define __device __device__ __forceinline__ // # define __device __device__ __forceinline__
...@@ -52,49 +52,62 @@ ...@@ -52,49 +52,62 @@
// #endif // #endif
// namespace cv { namespace gpu { namespace icf { namespace cv { namespace gpu { namespace device {
namespace icf {
// using cv::gpu::PtrStepSzb;
// using cv::gpu::PtrStepSzf; struct __align__(16) Octave
{
// typedef unsigned char uchar; ushort index;
ushort stages;
// struct __align__(16) Octave ushort shrinkage;
// { ushort2 size;
// ushort index; float scale;
// ushort stages;
// ushort shrinkage; Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc)
// ushort2 size; : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {}
// float scale; };
// Octave(const ushort i, const ushort s, const ushort sh, const ushort2 sz, const float sc) struct __align__(8) Level //is actually 24 bytes
// : index(i), stages(s), shrinkage(sh), size(sz), scale(sc) {} {
// }; int octave;
// struct __align__(8) Level //is actually 24 bytes float relScale;
// { float shrScale; // used for marking detection
// int octave; float scaling[2]; // calculated according to Dollal paper
// // float origScale; //not actually used // for 640x480 we can not get overflow
// float relScale; uchar2 workRect;
// float shrScale; // used for marking detection uchar2 objSize;
// float scaling[2]; // calculated according to Dollal paper
Level(int idx, const Octave& oct, const float scale, const int w, const int h)
// // for 640x480 we can not get overflow : octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage)
// uchar2 workRect; {
// uchar2 objSize; workRect.x = round(w / (float)oct.shrinkage);
workRect.y = round(h / (float)oct.shrinkage);
// Level(int idx, const Octave& oct, const float scale, const int w, const int h)
// : octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage) objSize.x = round(oct.size.x * relScale);
// { objSize.y = round(oct.size.y * relScale);
// workRect.x = round(w / (float)oct.shrinkage); }
// workRect.y = round(h / (float)oct.shrinkage); };
// objSize.x = round(oct.size.x * relScale); struct __align__(8) Node
// objSize.y = round(oct.size.y * relScale); {
// } // int feature;
// }; uchar4 rect;
float threshold;
Node(const uchar4 c, const int t) : rect(c), threshold(t) {}
};
struct __align__(8) Feature
{
int channel;
uchar4 rect;
Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
};
}
}}}
// struct Cascade // struct Cascade
// { // {
// Cascade() {} // Cascade() {}
...@@ -146,21 +159,6 @@ ...@@ -146,21 +159,6 @@
// static const float magnitudeScaling = 1.f ;// / sqrt(2); // static const float magnitudeScaling = 1.f ;// / sqrt(2);
// }; // };
// struct __align__(8) Node
// {
// int feature;
// float threshold;
// Node(const int f, const float t) : feature(f), threshold(t) {}
// };
// struct __align__(8) Feature
// {
// int channel;
// uchar4 rect;
// Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
// };
// }}} // }}}
// #endif #endif
\ No newline at end of file \ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment