Commit d547d9c9 authored by Marina Kolpakova's avatar Marina Kolpakova

fixed alignment related bugs in gpu/nvidia

parent 6de2b1a0
/*M/////////////////////////////////////////////////////////////////////////////////////// /*M///////////////////////////////////////////////////////////////////////////////////////
// //
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
// //
// By downloading, copying, installing or using the software you agree to this license. // By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install, // If you do not agree to this license, do not download, install,
// copy or use the software. // copy or use the software.
...@@ -64,18 +64,18 @@ ...@@ -64,18 +64,18 @@
/// \brief Model and solver parameters /// \brief Model and solver parameters
struct NCVBroxOpticalFlowDescriptor struct NCVBroxOpticalFlowDescriptor
{ {
/// flow smoothness /// flow smoothness
Ncv32f alpha; Ncv32f alpha;
/// gradient constancy importance /// gradient constancy importance
Ncv32f gamma; Ncv32f gamma;
/// pyramid scale factor /// pyramid scale factor
Ncv32f scale_factor; Ncv32f scale_factor;
/// number of lagged non-linearity iterations (inner loop) /// number of lagged non-linearity iterations (inner loop)
Ncv32u number_of_inner_iterations; Ncv32u number_of_inner_iterations;
/// number of warping iterations (number of pyramid levels) /// number of warping iterations (number of pyramid levels)
Ncv32u number_of_outer_iterations; Ncv32u number_of_outer_iterations;
/// number of linear system solver iterations /// number of linear system solver iterations
Ncv32u number_of_solver_iterations; Ncv32u number_of_solver_iterations;
}; };
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
...@@ -93,11 +93,11 @@ struct NCVBroxOpticalFlowDescriptor ...@@ -93,11 +93,11 @@ struct NCVBroxOpticalFlowDescriptor
NCV_EXPORTS NCV_EXPORTS
NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
INCVMemAllocator &gpu_mem_allocator, INCVMemAllocator &gpu_mem_allocator,
const NCVMatrix<Ncv32f> &frame0, const NCVMatrix<Ncv32f> &frame0,
const NCVMatrix<Ncv32f> &frame1, const NCVMatrix<Ncv32f> &frame1,
NCVMatrix<Ncv32f> &u, NCVMatrix<Ncv32f> &u,
NCVMatrix<Ncv32f> &v, NCVMatrix<Ncv32f> &v,
cudaStream_t stream); cudaStream_t stream);
#endif #endif
...@@ -59,6 +59,7 @@ ...@@ -59,6 +59,7 @@
#define _ncvhaarobjectdetection_hpp_ #define _ncvhaarobjectdetection_hpp_
#include <string> #include <string>
#include <vector_types.h>
#include "NCV.hpp" #include "NCV.hpp"
...@@ -68,41 +69,43 @@ ...@@ -68,41 +69,43 @@
// //
//============================================================================== //==============================================================================
struct HaarFeature64 struct HaarFeature64
{ {
uint2 _ui2; union
{
uint2 _ui2;
struct {NcvRect8u__ _rect; Ncv32f _f;};
};
#define HaarFeature64_CreateCheck_MaxRectField 0xFF #define HaarFeature64_CreateCheck_MaxRectField 0xFF
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/) __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
{ {
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES); ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX; _rect = NcvRect8u(rectX,rectY,rectWidth,rectHeight);
((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ NCVStatus setWeight(Ncv32f weight) __host__ NCVStatus setWeight(Ncv32f weight)
{ {
((Ncv32f*)&(this->_ui2.y))[0] = weight; _f = weight;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight) __device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
{ {
NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x); *rectX = _rect.x;
*rectX = tmpRect.x; *rectY = _rect.y;
*rectY = tmpRect.y; *rectWidth = _rect.width;
*rectWidth = tmpRect.width; *rectHeight = _rect.height;
*rectHeight = tmpRect.height;
} }
__device__ __host__ Ncv32f getWeight(void) __device__ __host__ Ncv32f getWeight(void)
{ {
return *(Ncv32f*)(&this->_ui2.y); return _f;
} }
}; };
...@@ -170,24 +173,28 @@ public: ...@@ -170,24 +173,28 @@ public:
struct HaarClassifierNodeDescriptor32 struct HaarClassifierNodeDescriptor32
{
union
{ {
uint1 _ui1; uint1 _ui1;
Ncv32f _f;
};
__host__ NCVStatus create(Ncv32f leafValue) __host__ NCVStatus create(Ncv32f leafValue)
{ {
*(Ncv32f *)&this->_ui1 = leafValue; _f = leafValue;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ NCVStatus create(Ncv32u offsetHaarClassifierNode) __host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
{ {
this->_ui1.x = offsetHaarClassifierNode; _ui1.x = offsetHaarClassifierNode;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ Ncv32f getLeafValueHost(void) __host__ Ncv32f getLeafValueHost(void)
{ {
return *(Ncv32f *)&this->_ui1.x; return _f;
} }
#ifdef __CUDACC__ #ifdef __CUDACC__
...@@ -199,57 +206,67 @@ struct HaarClassifierNodeDescriptor32 ...@@ -199,57 +206,67 @@ struct HaarClassifierNodeDescriptor32
__device__ __host__ Ncv32u getNextNodeOffset(void) __device__ __host__ Ncv32u getNextNodeOffset(void)
{ {
return this->_ui1.x; return _ui1.x;
} }
}; };
struct HaarClassifierNode128 struct HaarClassifierNode128
{
union
{ {
uint4 _ui4; uint4 _ui4;
struct
{
HaarFeatureDescriptor32 _f;
Ncv32f _t;
HaarClassifierNodeDescriptor32 _nl;
HaarClassifierNodeDescriptor32 _nr;
};
};
__host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f) __host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
{ {
this->_ui4.x = *(Ncv32u *)&f; _f = f;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ NCVStatus setThreshold(Ncv32f t) __host__ NCVStatus setThreshold(Ncv32f t)
{ {
this->_ui4.y = *(Ncv32u *)&t; _t = t;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl) __host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
{ {
this->_ui4.z = *(Ncv32u *)&nl; _nl = nl;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr) __host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
{ {
this->_ui4.w = *(Ncv32u *)&nr; _nr = nr;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
__host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void) __host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
{ {
return *(HaarFeatureDescriptor32 *)&this->_ui4.x; return _f;
} }
__host__ __device__ Ncv32f getThreshold(void) __host__ __device__ Ncv32f getThreshold(void)
{ {
return *(Ncv32f*)&this->_ui4.y; return _t;
} }
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void) __host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
{ {
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z; return _nl;
} }
__host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void) __host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
{ {
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w; return _nr;
} }
}; };
...@@ -260,11 +277,15 @@ struct HaarStage64 ...@@ -260,11 +277,15 @@ struct HaarStage64
#define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000 #define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000
#define HaarStage64_Interpret_ShiftRootNodeOffset 16 #define HaarStage64_Interpret_ShiftRootNodeOffset 16
union
{
uint2 _ui2; uint2 _ui2;
struct {Ncv32f _t; Ncv32u _root;};
};
__host__ NCVStatus setStageThreshold(Ncv32f t) __host__ NCVStatus setStageThreshold(Ncv32f t)
{ {
this->_ui2.x = *(Ncv32u *)&t; _t = t;
return NCV_SUCCESS; return NCV_SUCCESS;
} }
...@@ -290,7 +311,7 @@ struct HaarStage64 ...@@ -290,7 +311,7 @@ struct HaarStage64
__host__ __device__ Ncv32f getStageThreshold(void) __host__ __device__ Ncv32f getStageThreshold(void)
{ {
return *(Ncv32f*)&this->_ui2.x; return _t;
} }
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void) __host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
...@@ -304,14 +325,12 @@ struct HaarStage64 ...@@ -304,14 +325,12 @@ struct HaarStage64
} }
}; };
NCV_CT_ASSERT(sizeof(HaarFeature64) == 8); NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4); NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4); NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16); NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
NCV_CT_ASSERT(sizeof(HaarStage64) == 8); NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
//============================================================================== //==============================================================================
// //
// Classifier cascade descriptor // Classifier cascade descriptor
......
...@@ -134,15 +134,24 @@ typedef unsigned char Ncv8u; ...@@ -134,15 +134,24 @@ typedef unsigned char Ncv8u;
typedef float Ncv32f; typedef float Ncv32f;
typedef double Ncv64f; typedef double Ncv64f;
struct NcvRect8u__
struct NcvRect8u
{ {
Ncv8u x; Ncv8u x;
Ncv8u y; Ncv8u y;
Ncv8u width; Ncv8u width;
Ncv8u height; Ncv8u height;
__host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {}; };
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
struct NcvRect8u : NcvRect8u__
{
__host__ __device__ NcvRect8u() {}
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height)
{
x = x;
y = y;
width = width;
height = height;
}
}; };
......
...@@ -33,9 +33,9 @@ private: ...@@ -33,9 +33,9 @@ private:
TestRectStdDev& operator=(const TestRectStdDev&); TestRectStdDev& operator=(const TestRectStdDev&);
NCVTestSourceProvider<Ncv8u> &src; NCVTestSourceProvider<Ncv8u> &src;
NcvRect32u rect;
Ncv32u width; Ncv32u width;
Ncv32u height; Ncv32u height;
NcvRect32u rect;
Ncv32f scaleFactor; Ncv32f scaleFactor;
NcvBool bTextureCache; NcvBool bTextureCache;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment