Commit 464a010f authored by Roman Donchenko's avatar Roman Donchenko

Merge remote-tracking branch 'origin/2.4' into merge-2.4

Conflicts:
	modules/core/include/opencv2/core/operations.hpp
	modules/core/include/opencv2/core/version.hpp
	modules/core/src/gpumat.cpp
	modules/cudaimgproc/src/color.cpp
	modules/features2d/src/orb.cpp
	modules/imgproc/src/samplers.cpp
	modules/ocl/include/opencv2/ocl/matrix_operations.hpp
	modules/ocl/include/opencv2/ocl/ocl.hpp
	samples/ocl/facedetect.cpp
parents 8db53c17 8f10ca51
...@@ -454,7 +454,7 @@ typedef unsigned int cl_GLenum; ...@@ -454,7 +454,7 @@ typedef unsigned int cl_GLenum;
/* Define alignment keys */ /* Define alignment keys */
#if defined( __GNUC__ ) #if defined( __GNUC__ )
#define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
#elif defined( _WIN32) && (_MSC_VER) #elif defined( _WIN32) && defined(_MSC_VER)
/* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
/* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
/* #include <crtdefs.h> */ /* #include <crtdefs.h> */
......
...@@ -70,7 +70,7 @@ bool CvCascadeImageReader::NegReader::nextImg() ...@@ -70,7 +70,7 @@ bool CvCascadeImageReader::NegReader::nextImg()
_offset.x = std::min( (int)round % winSize.width, src.cols - winSize.width ); _offset.x = std::min( (int)round % winSize.width, src.cols - winSize.width );
_offset.y = std::min( (int)round / winSize.width, src.rows - winSize.height ); _offset.y = std::min( (int)round / winSize.width, src.rows - winSize.height );
if( !src.empty() && src.type() == CV_8UC1 if( !src.empty() && src.type() == CV_8UC1
&& offset.x >= 0 && offset.y >= 0 ) && _offset.x >= 0 && _offset.y >= 0 )
break; break;
} }
......
...@@ -105,7 +105,7 @@ Building OpenCV ...@@ -105,7 +105,7 @@ Building OpenCV
Enable hardware optimizations Enable hardware optimizations
----------------------------- -----------------------------
Depending on target platfrom architecture different instruction sets can be used. By default Depending on target platform architecture different instruction sets can be used. By default
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON`` compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
NEON SIMD extensions. NEON SIMD extensions.
......
...@@ -2110,6 +2110,8 @@ void cv::cuda::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stre ...@@ -2110,6 +2110,8 @@ void cv::cuda::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stre
void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream) void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream)
{ {
CV_Assert( !_src.empty() );
switch (code) switch (code)
{ {
case cv::COLOR_BayerBG2GRAY: case cv::COLOR_BayerGB2GRAY: case cv::COLOR_BayerRG2GRAY: case cv::COLOR_BayerGR2GRAY: case cv::COLOR_BayerBG2GRAY: case cv::COLOR_BayerGB2GRAY: case cv::COLOR_BayerRG2GRAY: case cv::COLOR_BayerGR2GRAY:
......
...@@ -2357,6 +2357,7 @@ struct Demosaicing : testing::TestWithParam<cv::cuda::DeviceInfo> ...@@ -2357,6 +2357,7 @@ struct Demosaicing : testing::TestWithParam<cv::cuda::DeviceInfo>
CUDA_TEST_P(Demosaicing, BayerBG2BGR) CUDA_TEST_P(Demosaicing, BayerBG2BGR)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 1)); mosaic(img, src, cv::Point(1, 1));
...@@ -2370,6 +2371,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR) ...@@ -2370,6 +2371,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR)
CUDA_TEST_P(Demosaicing, BayerGB2BGR) CUDA_TEST_P(Demosaicing, BayerGB2BGR)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 1)); mosaic(img, src, cv::Point(0, 1));
...@@ -2383,6 +2385,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR) ...@@ -2383,6 +2385,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR)
CUDA_TEST_P(Demosaicing, BayerRG2BGR) CUDA_TEST_P(Demosaicing, BayerRG2BGR)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 0)); mosaic(img, src, cv::Point(0, 0));
...@@ -2396,6 +2399,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR) ...@@ -2396,6 +2399,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR)
CUDA_TEST_P(Demosaicing, BayerGR2BGR) CUDA_TEST_P(Demosaicing, BayerGR2BGR)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 0)); mosaic(img, src, cv::Point(1, 0));
...@@ -2409,6 +2413,7 @@ CUDA_TEST_P(Demosaicing, BayerGR2BGR) ...@@ -2409,6 +2413,7 @@ CUDA_TEST_P(Demosaicing, BayerGR2BGR)
CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 1)); mosaic(img, src, cv::Point(1, 1));
...@@ -2422,6 +2427,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT) ...@@ -2422,6 +2427,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT)
CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 1)); mosaic(img, src, cv::Point(0, 1));
...@@ -2435,6 +2441,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT) ...@@ -2435,6 +2441,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT)
CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 0)); mosaic(img, src, cv::Point(0, 0));
...@@ -2448,6 +2455,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT) ...@@ -2448,6 +2455,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT)
CUDA_TEST_P(Demosaicing, BayerGR2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerGR2BGR_MHT)
{ {
cv::Mat img = readImage("stereobm/aloe-L.png"); cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src; cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 0)); mosaic(img, src, cv::Point(1, 0));
......
...@@ -141,7 +141,7 @@ static void computeOrbDescriptor(const KeyPoint& kpt, ...@@ -141,7 +141,7 @@ static void computeOrbDescriptor(const KeyPoint& kpt,
float x, y; float x, y;
int ix, iy; int ix, iy;
#if 1 #if 1
#define GET_VALUE(idx) \ #define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \ (x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \ y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvRound(x), \ ix = cvRound(x), \
......
...@@ -1552,9 +1552,9 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da ...@@ -1552,9 +1552,9 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da
// image origin is not necessarily at (0,0) // image origin is not necessarily at (0,0)
int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2; int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2;
int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2; int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2;
pt.x = cvRound( ((pt32f.x-x0)*image_widget->original_image->cols)/ pt.x = cvFloor( ((pt32f.x-x0)*image_widget->original_image->cols)/
image_widget->scaled_image->cols ); image_widget->scaled_image->cols );
pt.y = cvRound( ((pt32f.y-y0)*image_widget->original_image->rows)/ pt.y = cvFloor( ((pt32f.y-y0)*image_widget->original_image->rows)/
image_widget->scaled_image->rows ); image_widget->scaled_image->rows );
} }
else{ else{
......
...@@ -64,7 +64,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size, ...@@ -64,7 +64,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size,
rect.x = win_size.width; rect.x = win_size.width;
} }
if( ip.x + win_size.width < src_size.width ) if( ip.x < src_size.width - win_size.width )
rect.width = win_size.width; rect.width = win_size.width;
else else
{ {
...@@ -85,7 +85,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size, ...@@ -85,7 +85,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size,
else else
rect.y = -ip.y; rect.y = -ip.y;
if( ip.y + win_size.height < src_size.height ) if( ip.y < src_size.height - win_size.height )
rect.height = win_size.height; rect.height = win_size.height;
else else
{ {
...@@ -155,8 +155,8 @@ void getRectSubPix_Cn_(const _Tp* src, size_t src_step, Size src_size, ...@@ -155,8 +155,8 @@ void getRectSubPix_Cn_(const _Tp* src, size_t src_step, Size src_size,
src_step /= sizeof(src[0]); src_step /= sizeof(src[0]);
dst_step /= sizeof(dst[0]); dst_step /= sizeof(dst[0]);
if( 0 <= ip.x && ip.x + win_size.width < src_size.width && if( 0 <= ip.x && ip.x < src_size.width - win_size.width &&
0 <= ip.y && ip.y + win_size.height < src_size.height ) 0 <= ip.y && ip.y < src_size.height - win_size.height)
{ {
// extracted rectangle is totally inside the image // extracted rectangle is totally inside the image
src += ip.y * src_step + ip.x*cn; src += ip.y * src_step + ip.x*cn;
......
...@@ -144,14 +144,6 @@ OpenCV C++ 1-D or 2-D dense array class :: ...@@ -144,14 +144,6 @@ OpenCV C++ 1-D or 2-D dense array class ::
//! returns true if oclMatrix data is NULL //! returns true if oclMatrix data is NULL
bool empty() const; bool empty() const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar *ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp *ptr(int y = 0);
template<typename _Tp> const _Tp *ptr(int y = 0) const;
//! matrix transposition //! matrix transposition
oclMat t() const; oclMat t() const;
......
...@@ -378,14 +378,6 @@ namespace cv ...@@ -378,14 +378,6 @@ namespace cv
//! returns true if oclMatrix data is NULL //! returns true if oclMatrix data is NULL
bool empty() const; bool empty() const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar *ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp *ptr(int y = 0);
template<typename _Tp> const _Tp *ptr(int y = 0) const;
//! matrix transposition //! matrix transposition
oclMat t() const; oclMat t() const;
......
...@@ -456,36 +456,6 @@ namespace cv ...@@ -456,36 +456,6 @@ namespace cv
return data == 0; return data == 0;
} }
inline uchar *oclMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return data + step * y;
}
inline const uchar *oclMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return data + step * y;
}
template<typename _Tp> inline _Tp *oclMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return (_Tp *)(data + step * y);
}
template<typename _Tp> inline const _Tp *oclMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return (const _Tp *)(data + step * y);
}
inline oclMat oclMat::t() const inline oclMat oclMat::t() const
{ {
oclMat tmp; oclMat tmp;
......
...@@ -73,10 +73,10 @@ PERF_TEST_P(MomentsFixture, Moments, ...@@ -73,10 +73,10 @@ PERF_TEST_P(MomentsFixture, Moments,
Mat src(srcSize, type), dst(7, 1, CV_64F); Mat src(srcSize, type), dst(7, 1, CV_64F);
randu(src, 0, 255); randu(src, 0, 255);
oclMat src_d(src);
cv::Moments mom; cv::Moments mom;
if (RUN_OCL_IMPL) if (RUN_OCL_IMPL)
{ {
oclMat src_d(src);
OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(src_d, binaryImage); OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(src_d, binaryImage);
} }
else if (RUN_PLAIN_IMPL) else if (RUN_PLAIN_IMPL)
......
...@@ -676,7 +676,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c ...@@ -676,7 +676,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
ensureSizeIsEnough(1, nQuery, CV_32F, distance); ensureSizeIsEnough(1, nQuery, CV_32F, distance);
matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
return; return;
} }
......
...@@ -290,8 +290,9 @@ void openCLFree(void *devPtr) ...@@ -290,8 +290,9 @@ void openCLFree(void *devPtr)
} }
#else #else
// TODO FIXIT Attach clReleaseMemObject call to event completion callback // TODO FIXIT Attach clReleaseMemObject call to event completion callback
Context* ctx = Context::getContext(); // TODO 2013/12/04 Disable workaround
clFinish(getClCommandQueue(ctx)); // Context* ctx = Context::getContext();
// clFinish(getClCommandQueue(ctx));
#endif #endif
openCLSafeCall(clReleaseMemObject(data.mainBuffer)); openCLSafeCall(clReleaseMemObject(data.mainBuffer));
} }
......
...@@ -62,13 +62,13 @@ typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode ...@@ -62,13 +62,13 @@ typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
GpuHidHaarTreeNode; GpuHidHaarTreeNode;
typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier //typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier
{ //{
int count __attribute__((aligned (4))); // int count __attribute__((aligned (4)));
GpuHidHaarTreeNode* node __attribute__((aligned (8))); // GpuHidHaarTreeNode* node __attribute__((aligned (8)));
float* alpha __attribute__((aligned (8))); // float* alpha __attribute__((aligned (8)));
} //}
GpuHidHaarClassifier; //GpuHidHaarClassifier;
typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier
...@@ -84,22 +84,22 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier ...@@ -84,22 +84,22 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier
GpuHidHaarStageClassifier; GpuHidHaarStageClassifier;
typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade //typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
{ //{
int count __attribute__((aligned (4))); // int count __attribute__((aligned (4)));
int is_stump_based __attribute__((aligned (4))); // int is_stump_based __attribute__((aligned (4)));
int has_tilted_features __attribute__((aligned (4))); // int has_tilted_features __attribute__((aligned (4)));
int is_tree __attribute__((aligned (4))); // int is_tree __attribute__((aligned (4)));
int pq0 __attribute__((aligned (4))); // int pq0 __attribute__((aligned (4)));
int pq1 __attribute__((aligned (4))); // int pq1 __attribute__((aligned (4)));
int pq2 __attribute__((aligned (4))); // int pq2 __attribute__((aligned (4)));
int pq3 __attribute__((aligned (4))); // int pq3 __attribute__((aligned (4)));
int p0 __attribute__((aligned (4))); // int p0 __attribute__((aligned (4)));
int p1 __attribute__((aligned (4))); // int p1 __attribute__((aligned (4)));
int p2 __attribute__((aligned (4))); // int p2 __attribute__((aligned (4)));
int p3 __attribute__((aligned (4))); // int p3 __attribute__((aligned (4)));
float inv_window_area __attribute__((aligned (4))); // float inv_window_area __attribute__((aligned (4)));
} GpuHidHaarClassifierCascade; //} GpuHidHaarClassifierCascade;
#ifdef PACKED_CLASSIFIER #ifdef PACKED_CLASSIFIER
...@@ -196,10 +196,12 @@ __kernel void gpuRunHaarClassifierCascadePacked( ...@@ -196,10 +196,12 @@ __kernel void gpuRunHaarClassifierCascadePacked(
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ ) for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
{// iterate until candidate is exist {// iterate until candidate is exist
float stage_sum = 0.0f; float stage_sum = 0.0f;
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
float stagethreshold = as_float(stageinfo.y); ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x); int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
for(int nodeloop = 0; nodeloop < stageinfo.x; nodecounter++,nodeloop++ ) for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
{ {
// simple macro to extract shorts from int // simple macro to extract shorts from int
#define M0(_t) ((_t)&0xFFFF) #define M0(_t) ((_t)&0xFFFF)
...@@ -358,11 +360,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa ...@@ -358,11 +360,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ ) for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ )
{ {
float stage_sum = 0.f; float stage_sum = 0.f;
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
float stagethreshold = as_float(stageinfo.y); ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
for(int nodeloop = 0; nodeloop < stageinfo.x; ) int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
for(int nodeloop = 0; nodeloop < stagecount; )
{ {
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter); __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
(((__global uchar*)nodeptr) + nodecounter * sizeof(GpuHidHaarTreeNode));
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0])); int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
...@@ -418,7 +423,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa ...@@ -418,7 +423,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
#endif #endif
} }
result = (stage_sum >= stagethreshold); result = (stage_sum >= stagethreshold) ? 1 : 0;
} }
if(factor < 2) if(factor < 2)
{ {
...@@ -447,14 +452,17 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa ...@@ -447,14 +452,17 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
lclcount[0]=0; lclcount[0]=0;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); //int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
float stagethreshold = as_float(stageinfo.y); __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
int perfscale = queuecount > 4 ? 3 : 2; int perfscale = queuecount > 4 ? 3 : 2;
int queuecount_loop = (queuecount + (1<<perfscale)-1) >> perfscale; int queuecount_loop = (queuecount + (1<<perfscale)-1) >> perfscale;
int lcl_compute_win = lcl_sz >> perfscale; int lcl_compute_win = lcl_sz >> perfscale;
int lcl_compute_win_id = (lcl_id >>(6-perfscale)); int lcl_compute_win_id = (lcl_id >>(6-perfscale));
int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale); int lcl_loops = (stagecount + lcl_compute_win -1) >> (6-perfscale);
int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale)); int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
for(int queueloop=0; queueloop<queuecount_loop; queueloop++) for(int queueloop=0; queueloop<queuecount_loop; queueloop++)
{ {
...@@ -469,10 +477,10 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa ...@@ -469,10 +477,10 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
float part_sum = 0.f; float part_sum = 0.f;
const int stump_factor = STUMP_BASED ? 1 : 2; const int stump_factor = STUMP_BASED ? 1 : 2;
int root_offset = 0; int root_offset = 0;
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;) for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stagecount;)
{ {
__global GpuHidHaarTreeNode* currentnodeptr = __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
nodeptr + (nodecounter + tempnodecounter) * stump_factor + root_offset; (((__global uchar*)nodeptr) + sizeof(GpuHidHaarTreeNode) * ((nodecounter + tempnodecounter) * stump_factor + root_offset));
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0])); int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
...@@ -549,7 +557,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa ...@@ -549,7 +557,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
queuecount = lclcount[0]; queuecount = lclcount[0];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
nodecounter += stageinfo.x; nodecounter += stagecount;
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++) }//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
if(lcl_id<queuecount) if(lcl_id<queuecount)
......
...@@ -59,13 +59,13 @@ typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode ...@@ -59,13 +59,13 @@ typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
int right __attribute__((aligned(4))); int right __attribute__((aligned(4)));
} }
GpuHidHaarTreeNode; GpuHidHaarTreeNode;
typedef struct __attribute__((aligned(32))) GpuHidHaarClassifier //typedef struct __attribute__((aligned(32))) GpuHidHaarClassifier
{ //{
int count __attribute__((aligned(4))); // int count __attribute__((aligned(4)));
GpuHidHaarTreeNode *node __attribute__((aligned(8))); // GpuHidHaarTreeNode *node __attribute__((aligned(8)));
float *alpha __attribute__((aligned(8))); // float *alpha __attribute__((aligned(8)));
} //}
GpuHidHaarClassifier; //GpuHidHaarClassifier;
typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier
{ {
int count __attribute__((aligned(4))); int count __attribute__((aligned(4)));
...@@ -77,27 +77,27 @@ typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier ...@@ -77,27 +77,27 @@ typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier
int reserved3 __attribute__((aligned(8))); int reserved3 __attribute__((aligned(8)));
} }
GpuHidHaarStageClassifier; GpuHidHaarStageClassifier;
typedef struct __attribute__((aligned(64))) GpuHidHaarClassifierCascade //typedef struct __attribute__((aligned(64))) GpuHidHaarClassifierCascade
{ //{
int count __attribute__((aligned(4))); // int count __attribute__((aligned(4)));
int is_stump_based __attribute__((aligned(4))); // int is_stump_based __attribute__((aligned(4)));
int has_tilted_features __attribute__((aligned(4))); // int has_tilted_features __attribute__((aligned(4)));
int is_tree __attribute__((aligned(4))); // int is_tree __attribute__((aligned(4)));
int pq0 __attribute__((aligned(4))); // int pq0 __attribute__((aligned(4)));
int pq1 __attribute__((aligned(4))); // int pq1 __attribute__((aligned(4)));
int pq2 __attribute__((aligned(4))); // int pq2 __attribute__((aligned(4)));
int pq3 __attribute__((aligned(4))); // int pq3 __attribute__((aligned(4)));
int p0 __attribute__((aligned(4))); // int p0 __attribute__((aligned(4)));
int p1 __attribute__((aligned(4))); // int p1 __attribute__((aligned(4)));
int p2 __attribute__((aligned(4))); // int p2 __attribute__((aligned(4)));
int p3 __attribute__((aligned(4))); // int p3 __attribute__((aligned(4)));
float inv_window_area __attribute__((aligned(4))); // float inv_window_area __attribute__((aligned(4)));
} GpuHidHaarClassifierCascade; //} GpuHidHaarClassifierCascade;
__kernel void gpuRunHaarClassifierCascade_scaled2( __kernel void gpuRunHaarClassifierCascade_scaled2(
global GpuHidHaarStageClassifier *stagecascadeptr, global GpuHidHaarStageClassifier *stagecascadeptr_,
global int4 *info, global int4 *info,
global GpuHidHaarTreeNode *nodeptr, global GpuHidHaarTreeNode *nodeptr_,
global const int *restrict sum, global const int *restrict sum,
global const float *restrict sqsum, global const float *restrict sqsum,
global int4 *candidate, global int4 *candidate,
...@@ -132,8 +132,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( ...@@ -132,8 +132,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int max_idx = rows * cols - 1; int max_idx = rows * cols - 1;
for (int scalei = 0; scalei < loopcount; scalei++) for (int scalei = 0; scalei < loopcount; scalei++)
{ {
int4 scaleinfo1; int4 scaleinfo1 = info[scalei];
scaleinfo1 = info[scalei];
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16; int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
int totalgrp = scaleinfo1.y & 0xffff; int totalgrp = scaleinfo1.y & 0xffff;
float factor = as_float(scaleinfo1.w); float factor = as_float(scaleinfo1.w);
...@@ -174,15 +173,18 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( ...@@ -174,15 +173,18 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++) for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
{ {
float stage_sum = 0.f; float stage_sum = 0.f;
int stagecount = stagecascadeptr[stageloop].count; __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
(((__global uchar*)stagecascadeptr_)+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
for (int nodeloop = 0; nodeloop < stagecount;) for (int nodeloop = 0; nodeloop < stagecount;)
{ {
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter); __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
(((__global uchar*)nodeptr_) + nodecounter * sizeof(GpuHidHaarTreeNode));
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0])); int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0])); int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0])); int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0])); float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0])); float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor; float nodethreshold = w.w * variance_norm_factor;
info1.x += p_offset; info1.x += p_offset;
...@@ -204,7 +206,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( ...@@ -204,7 +206,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
+ sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z; + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
bool passThres = classsum >= nodethreshold; bool passThres = (classsum >= nodethreshold) ? 1 : 0;
#if STUMP_BASED #if STUMP_BASED
stage_sum += passThres ? alpha3.y : alpha3.x; stage_sum += passThres ? alpha3.y : alpha3.x;
...@@ -234,7 +236,8 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( ...@@ -234,7 +236,8 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
} }
#endif #endif
} }
result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
result = (stage_sum >= stageinfo->threshold) ? 1 : 0;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -281,11 +284,14 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( ...@@ -281,11 +284,14 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
} }
} }
} }
__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum) __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, const int nodenum)
{ {
int counter = get_global_id(0); const int counter = get_global_id(0);
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0; int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
GpuHidHaarTreeNode t1 = *(orinode + counter); GpuHidHaarTreeNode t1 = *(__global GpuHidHaarTreeNode*)
(((__global uchar*)orinode) + counter * sizeof(GpuHidHaarTreeNode));
__global GpuHidHaarTreeNode* pNew = (__global GpuHidHaarTreeNode*)
(((__global uchar*)newnode) + (counter + nodenum) * sizeof(GpuHidHaarTreeNode));
#pragma unroll #pragma unroll
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
...@@ -297,22 +303,21 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH ...@@ -297,22 +303,21 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
} }
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]); t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
counter += nodenum;
#pragma unroll #pragma unroll
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
newnode[counter].p[i][0] = tr_x[i]; pNew->p[i][0] = tr_x[i];
newnode[counter].p[i][1] = tr_y[i]; pNew->p[i][1] = tr_y[i];
newnode[counter].p[i][2] = tr_x[i] + tr_w[i]; pNew->p[i][2] = tr_x[i] + tr_w[i];
newnode[counter].p[i][3] = tr_y[i] + tr_h[i]; pNew->p[i][3] = tr_y[i] + tr_h[i];
newnode[counter].weight[i] = t1.weight[i] * weight_scale; pNew->weight[i] = t1.weight[i] * weight_scale;
} }
newnode[counter].left = t1.left; pNew->left = t1.left;
newnode[counter].right = t1.right; pNew->right = t1.right;
newnode[counter].threshold = t1.threshold; pNew->threshold = t1.threshold;
newnode[counter].alpha[0] = t1.alpha[0]; pNew->alpha[0] = t1.alpha[0];
newnode[counter].alpha[1] = t1.alpha[1]; pNew->alpha[1] = t1.alpha[1];
newnode[counter].alpha[2] = t1.alpha[2]; pNew->alpha[2] = t1.alpha[2];
} }
...@@ -74,11 +74,11 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src ...@@ -74,11 +74,11 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
VT vthresh = (VT)(thresh); VT vthresh = (VT)(thresh);
#ifdef THRESH_BINARY #ifdef THRESH_BINARY
VT vecValue = sdata > vthresh ? max_val : (VT)(0); VT vecValue = sdata > vthresh ? (VT)max_val : (VT)(0);
#elif defined THRESH_BINARY_INV #elif defined THRESH_BINARY_INV
VT vecValue = sdata > vthresh ? (VT)(0) : max_val; VT vecValue = sdata > vthresh ? (VT)(0) : (VT)max_val;
#elif defined THRESH_TRUNC #elif defined THRESH_TRUNC
VT vecValue = sdata > vthresh ? thresh : sdata; VT vecValue = sdata > vthresh ? (VT)thresh : sdata;
#elif defined THRESH_TOZERO #elif defined THRESH_TOZERO
VT vecValue = sdata > vthresh ? sdata : (VT)(0); VT vecValue = sdata > vthresh ? sdata : (VT)(0);
#elif defined THRESH_TOZERO_INV #elif defined THRESH_TOZERO_INV
......
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android" <manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.opencv.engine" package="org.opencv.engine"
android:versionCode="214@ANDROID_PLATFORM_VERSION_CODE@" android:versionCode="216@ANDROID_PLATFORM_VERSION_CODE@"
android:versionName="2.14" > android:versionName="2.16" >
<uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" /> <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
<uses-feature android:name="android.hardware.touchscreen" android:required="false"/> <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
......
...@@ -170,7 +170,7 @@ inline string JoinPlatform(int platform) ...@@ -170,7 +170,7 @@ inline string JoinPlatform(int platform)
return result; return result;
} }
inline int SplitPlatfrom(const vector<string>& features) inline int SplitPlatform(const vector<string>& features)
{ {
int result = 0; int result = 0;
...@@ -419,7 +419,7 @@ InstallPath(install_path) ...@@ -419,7 +419,7 @@ InstallPath(install_path)
return; return;
} }
Platform = SplitPlatfrom(features); Platform = SplitPlatform(features);
if (PLATFORM_UNKNOWN != Platform) if (PLATFORM_UNKNOWN != Platform)
{ {
switch (Platform) switch (Platform)
......
...@@ -170,7 +170,7 @@ TEST(CpuID, CheckVFPv3) ...@@ -170,7 +170,7 @@ TEST(CpuID, CheckVFPv3)
EXPECT_TRUE(cpu_id & FEATURES_HAS_VFPv3); EXPECT_TRUE(cpu_id & FEATURES_HAS_VFPv3);
} }
TEST(PlatfromDetector, CheckTegra) TEST(PlatformDetector, CheckTegra)
{ {
EXPECT_NE(PLATFORM_UNKNOWN, DetectKnownPlatforms()); EXPECT_NE(PLATFORM_UNKNOWN, DetectKnownPlatforms());
} }
......
...@@ -90,28 +90,28 @@ public class ManagerActivity extends Activity ...@@ -90,28 +90,28 @@ public class ManagerActivity extends Activity
mInstalledPackageView.setAdapter(mInstalledPacksAdapter); mInstalledPackageView.setAdapter(mInstalledPacksAdapter);
TextView HardwarePlatformView = (TextView)findViewById(R.id.HardwareValue); TextView HardwarePlatformView = (TextView)findViewById(R.id.HardwareValue);
int Platfrom = HardwareDetector.DetectKnownPlatforms(); int Platform = HardwareDetector.DetectKnownPlatforms();
int CpuId = HardwareDetector.GetCpuID(); int CpuId = HardwareDetector.GetCpuID();
if (HardwareDetector.PLATFORM_UNKNOWN != Platfrom) if (HardwareDetector.PLATFORM_UNKNOWN != Platform)
{ {
if (HardwareDetector.PLATFORM_TEGRA == Platfrom) if (HardwareDetector.PLATFORM_TEGRA == Platform)
{ {
HardwarePlatformView.setText("Tegra"); HardwarePlatformView.setText("Tegra");
} }
else if (HardwareDetector.PLATFORM_TEGRA2 == Platfrom) else if (HardwareDetector.PLATFORM_TEGRA2 == Platform)
{ {
HardwarePlatformView.setText("Tegra 2"); HardwarePlatformView.setText("Tegra 2");
} }
else if (HardwareDetector.PLATFORM_TEGRA3 == Platfrom) else if (HardwareDetector.PLATFORM_TEGRA3 == Platform)
{ {
HardwarePlatformView.setText("Tegra 3"); HardwarePlatformView.setText("Tegra 3");
} }
else if (HardwareDetector.PLATFORM_TEGRA4i == Platfrom) else if (HardwareDetector.PLATFORM_TEGRA4i == Platform)
{ {
HardwarePlatformView.setText("Tegra 4i"); HardwarePlatformView.setText("Tegra 4i");
} }
else if (HardwareDetector.PLATFORM_TEGRA4 == Platfrom) else if (HardwareDetector.PLATFORM_TEGRA4 == Platform)
{ {
HardwarePlatformView.setText("Tegra 4"); HardwarePlatformView.setText("Tegra 4");
} }
......
...@@ -14,20 +14,20 @@ manually using adb tool: ...@@ -14,20 +14,20 @@ manually using adb tool:
.. code-block:: sh .. code-block:: sh
adb install OpenCV-2.4.7-android-sdk/apk/OpenCV_2.4.7_Manager_2.14_<platform>.apk adb install OpenCV-2.4.7.1-android-sdk/apk/OpenCV_2.4.7.1_Manager_2.15_<platform>.apk
Use the table below to determine proper OpenCV Manager package for your device: Use the table below to determine proper OpenCV Manager package for your device:
+------------------------------+--------------+----------------------------------------------------+ +------------------------------+--------------+------------------------------------------------------+
| Hardware Platform | Android ver. | Package name | | Hardware Platform | Android ver. | Package name |
+==============================+==============+====================================================+ +==============================+==============+======================================================+
| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk | | armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_armv7a-neon.apk |
+------------------------------+--------------+----------------------------------------------------+ +------------------------------+--------------+------------------------------------------------------+
| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.7_Manager_2.14_armv7a-neon-android8.apk | | armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.7.1_Manager_2.15_armv7a-neon-android8.apk |
+------------------------------+--------------+----------------------------------------------------+ +------------------------------+--------------+------------------------------------------------------+
| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.7_Manager_2.14_armeabi.apk | | armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_armeabi.apk |
+------------------------------+--------------+----------------------------------------------------+ +------------------------------+--------------+------------------------------------------------------+
| Intel x86 | >= 2.3 | OpenCV_2.4.7_Manager_2.14_x86.apk | | Intel x86 | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_x86.apk |
+------------------------------+--------------+----------------------------------------------------+ +------------------------------+--------------+------------------------------------------------------+
| MIPS | >= 2.3 | OpenCV_2.4.7_Manager_2.14_mips.apk | | MIPS | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_mips.apk |
+------------------------------+--------------+----------------------------------------------------+ +------------------------------+--------------+------------------------------------------------------+
...@@ -29,6 +29,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) ...@@ -29,6 +29,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/cudafilters/include") ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/cudafilters/include")
endif() endif()
if(HAVE_opencv_ocl)
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/ocl/include")
endif()
if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS) if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
endif() endif()
...@@ -56,6 +60,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) ...@@ -56,6 +60,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
target_link_libraries(${the_target} opencv_cudaarithm opencv_cudafilters) target_link_libraries(${the_target} opencv_cudaarithm opencv_cudafilters)
endif() endif()
if(HAVE_opencv_ocl)
target_link_libraries(${the_target} opencv_ocl)
endif()
set_target_properties(${the_target} PROPERTIES set_target_properties(${the_target} PROPERTIES
OUTPUT_NAME "cpp-${sample_kind}-${name}" OUTPUT_NAME "cpp-${sample_kind}-${name}"
PROJECT_LABEL "(${sample_KIND}) ${name}") PROJECT_LABEL "(${sample_KIND}) ${name}")
......
#include "opencv2/opencv_modules.hpp"
#include "opencv2/highgui/highgui.hpp" #include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp" #include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/features2d/features2d.hpp" #include "opencv2/features2d/features2d.hpp"
#include "opencv2/nonfree/nonfree.hpp" #include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/ml/ml.hpp" #include "opencv2/ml/ml.hpp"
#ifdef HAVE_OPENCV_OCL
#define _OCL_SVM_ 1 //select whether using ocl::svm method or not, default is using
#include "opencv2/ocl/ocl.hpp"
#endif
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
...@@ -2373,9 +2378,15 @@ static void setSVMTrainAutoParams( CvParamGrid& c_grid, CvParamGrid& gamma_grid, ...@@ -2373,9 +2378,15 @@ static void setSVMTrainAutoParams( CvParamGrid& c_grid, CvParamGrid& gamma_grid,
degree_grid.step = 0; degree_grid.step = 0;
} }
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
static void trainSVMClassifier( cv::ocl::CvSVM_OCL& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
#else
static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData, static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector, Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath ) const string& resPath )
#endif
{ {
/* first check if a previously trained svm for the current class has been saved to file */ /* first check if a previously trained svm for the current class has been saved to file */
string svmFilename = resPath + svmsDir + "/" + objClassName + ".xml.gz"; string svmFilename = resPath + svmsDir + "/" + objClassName + ".xml.gz";
...@@ -2448,9 +2459,15 @@ static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsEx ...@@ -2448,9 +2459,15 @@ static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsEx
} }
} }
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
static void computeConfidences( cv::ocl::CvSVM_OCL& svm, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
#else
static void computeConfidences( CvSVM& svm, const string& objClassName, VocData& vocData, static void computeConfidences( CvSVM& svm, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector, Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath ) const string& resPath )
#endif
{ {
cout << "*** CALCULATING CONFIDENCES FOR CLASS " << objClassName << " ***" << endl; cout << "*** CALCULATING CONFIDENCES FOR CLASS " << objClassName << " ***" << endl;
cout << "CALCULATING BOW VECTORS FOR TEST SET OF " << objClassName << "..." << endl; cout << "CALCULATING BOW VECTORS FOR TEST SET OF " << objClassName << "..." << endl;
...@@ -2589,7 +2606,11 @@ int main(int argc, char** argv) ...@@ -2589,7 +2606,11 @@ int main(int argc, char** argv)
for( size_t classIdx = 0; classIdx < objClasses.size(); ++classIdx ) for( size_t classIdx = 0; classIdx < objClasses.size(); ++classIdx )
{ {
// Train a classifier on train dataset // Train a classifier on train dataset
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
cv::ocl::CvSVM_OCL svm;
#else
CvSVM svm; CvSVM svm;
#endif
trainSVMClassifier( svm, svmTrainParamsExt, objClasses[classIdx], vocData, trainSVMClassifier( svm, svmTrainParamsExt, objClasses[classIdx], vocData,
bowExtractor, featureDetector, resPath ); bowExtractor, featureDetector, resPath );
......
#include "opencv2/opencv_modules.hpp"
#include "opencv2/core/core.hpp" #include "opencv2/core/core.hpp"
#include "opencv2/ml/ml.hpp" #include "opencv2/ml/ml.hpp"
#include "opencv2/highgui/highgui.hpp" #include "opencv2/highgui/highgui.hpp"
#ifdef HAVE_OPENCV_OCL
#define _OCL_KNN_ 1 // select whether using ocl::KNN method or not, default is using
#define _OCL_SVM_ 1 // select whether using ocl::svm method or not, default is using
#include "opencv2/ocl/ocl.hpp"
#endif
#include <stdio.h> #include <stdio.h>
...@@ -133,7 +139,14 @@ static void find_decision_boundary_KNN( int K ) ...@@ -133,7 +139,14 @@ static void find_decision_boundary_KNN( int K )
prepare_train_data( trainSamples, trainClasses ); prepare_train_data( trainSamples, trainClasses );
// learn classifier // learn classifier
#if defined HAVE_OPENCV_OCL && _OCL_KNN_
cv::ocl::KNearestNeighbour knnClassifier;
Mat temp, result;
knnClassifier.train(trainSamples, trainClasses, temp, false, K);
cv::ocl::oclMat testSample_ocl, reslut_ocl;
#else
CvKNearest knnClassifier( trainSamples, trainClasses, Mat(), false, K ); CvKNearest knnClassifier( trainSamples, trainClasses, Mat(), false, K );
#endif
Mat testSample( 1, 2, CV_32FC1 ); Mat testSample( 1, 2, CV_32FC1 );
for( int y = 0; y < img.rows; y += testStep ) for( int y = 0; y < img.rows; y += testStep )
...@@ -142,9 +155,19 @@ static void find_decision_boundary_KNN( int K ) ...@@ -142,9 +155,19 @@ static void find_decision_boundary_KNN( int K )
{ {
testSample.at<float>(0) = (float)x; testSample.at<float>(0) = (float)x;
testSample.at<float>(1) = (float)y; testSample.at<float>(1) = (float)y;
#if defined HAVE_OPENCV_OCL && _OCL_KNN_
testSample_ocl.upload(testSample);
knnClassifier.find_nearest(testSample_ocl, K, reslut_ocl);
reslut_ocl.download(result);
int response = saturate_cast<int>(result.at<float>(0));
circle(imgDst, Point(x, y), 1, classColors[response]);
#else
int response = (int)knnClassifier.find_nearest( testSample, K ); int response = (int)knnClassifier.find_nearest( testSample, K );
circle( imgDst, Point(x,y), 1, classColors[response] ); circle( imgDst, Point(x,y), 1, classColors[response] );
#endif
} }
} }
} }
...@@ -159,7 +182,11 @@ static void find_decision_boundary_SVM( CvSVMParams params ) ...@@ -159,7 +182,11 @@ static void find_decision_boundary_SVM( CvSVMParams params )
prepare_train_data( trainSamples, trainClasses ); prepare_train_data( trainSamples, trainClasses );
// learn classifier // learn classifier
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
cv::ocl::CvSVM_OCL svmClassifier(trainSamples, trainClasses, Mat(), Mat(), params);
#else
CvSVM svmClassifier( trainSamples, trainClasses, Mat(), Mat(), params ); CvSVM svmClassifier( trainSamples, trainClasses, Mat(), Mat(), params );
#endif
Mat testSample( 1, 2, CV_32FC1 ); Mat testSample( 1, 2, CV_32FC1 );
for( int y = 0; y < img.rows; y += testStep ) for( int y = 0; y < img.rows; y += testStep )
...@@ -178,7 +205,7 @@ static void find_decision_boundary_SVM( CvSVMParams params ) ...@@ -178,7 +205,7 @@ static void find_decision_boundary_SVM( CvSVMParams params )
for( int i = 0; i < svmClassifier.get_support_vector_count(); i++ ) for( int i = 0; i < svmClassifier.get_support_vector_count(); i++ )
{ {
const float* supportVector = svmClassifier.get_support_vector(i); const float* supportVector = svmClassifier.get_support_vector(i);
circle( imgDst, Point(supportVector[0],supportVector[1]), 5, Scalar(255,255,255), -1 ); circle( imgDst, Point(saturate_cast<int>(supportVector[0]),saturate_cast<int>(supportVector[1])), 5, CV_RGB(255,255,255), -1 );
} }
} }
......
...@@ -8,11 +8,16 @@ ...@@ -8,11 +8,16 @@
#include <iostream> #include <iostream>
#include <stdio.h> #include <stdio.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
# include <thread>
#endif
using namespace std; using namespace std;
using namespace cv; using namespace cv;
#define LOOP_NUM 1 #define LOOP_NUM 1
///////////////////////////single-threading faces detecting///////////////////////////////
const static Scalar colors[] = { CV_RGB(0,0,255), const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(0,128,255), CV_RGB(0,128,255),
CV_RGB(0,255,255), CV_RGB(0,255,255),
...@@ -26,7 +31,7 @@ const static Scalar colors[] = { CV_RGB(0,0,255), ...@@ -26,7 +31,7 @@ const static Scalar colors[] = { CV_RGB(0,0,255),
int64 work_begin = 0; int64 work_begin = 0;
int64 work_end = 0; int64 work_end = 0;
string outputName; string inputName, outputName, cascadeName;
static void workBegin() static void workBegin()
{ {
...@@ -61,41 +66,17 @@ static void Draw(Mat& img, vector<Rect>& faces, double scale); ...@@ -61,41 +66,17 @@ static void Draw(Mat& img, vector<Rect>& faces, double scale);
// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst); double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
int main( int argc, const char** argv ) static int facedetect_one_thread(bool useCPU, double scale )
{ {
const char* keys =
"{ h help | false | print help message }"
"{ i input | | specify input image }"
"{ t template | haarcascade_frontalface_alt.xml |"
" specify template file path }"
"{ c scale | 1.0 | scale image }"
"{ s use_cpu | false | use cpu or gpu to process the image }"
"{ o output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Usage : facedetect [options]" << endl;
cout << "Available options:" << endl;
cmd.printMessage();
return EXIT_SUCCESS;
}
CvCapture* capture = 0; CvCapture* capture = 0;
Mat frame, frameCopy0, frameCopy, image; Mat frame, frameCopy0, frameCopy, image;
bool useCPU = cmd.get<bool>("s");
string inputName = cmd.get<string>("i");
outputName = cmd.get<string>("o");
string cascadeName = cmd.get<string>("t");
double scale = cmd.get<double>("c");
ocl::OclCascadeClassifier cascade; ocl::OclCascadeClassifier cascade;
CascadeClassifier cpu_cascade; CascadeClassifier cpu_cascade;
if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
{ {
cout << "ERROR: Could not load classifier cascade" << endl; cout << "ERROR: Could not load classifier cascade: " << cascadeName << endl;
return EXIT_FAILURE; return EXIT_FAILURE;
} }
...@@ -186,7 +167,112 @@ int main( int argc, const char** argv ) ...@@ -186,7 +167,112 @@ int main( int argc, const char** argv )
} }
cvDestroyWindow("result"); cvDestroyWindow("result");
std::cout<< "single-threaded sample has finished" <<std::endl;
return 0;
}
///////////////////////////////////////detectfaces with multithreading////////////////////////////////////////////
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
#define MAX_THREADS 10
static void detectFaces(std::string fileName)
{
ocl::OclCascadeClassifier cascade;
if(!cascade.load(cascadeName))
{
std::cout << "ERROR: Could not load classifier cascade: " << cascadeName << std::endl;
return;
}
Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR);
if (img.empty())
{
std::cout << "cann't open file " + fileName <<std::endl;
return;
}
ocl::oclMat d_img;
d_img.upload(img);
std::vector<Rect> oclfaces;
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30), Size(0, 0));
for(unsigned int i = 0; i<oclfaces.size(); i++)
rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
std::string::size_type pos = outputName.rfind('.');
std::string outputNameTid = outputName + '-' + std::to_string(_threadid);
if(pos == std::string::npos)
{
std::cout << "Invalid output file name: " << outputName << std::endl;
}
else
{
outputNameTid = outputName.substr(0, pos) + "_" + std::to_string(_threadid) + outputName.substr(pos);
imwrite(outputNameTid, img);
}
imshow(outputNameTid, img);
waitKey(0);
}
static void facedetect_multithreading(int nthreads)
{
int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads;
std::vector<std::thread> threads;
for(int i = 0; i<thread_number; i++)
threads.push_back(std::thread(detectFaces, inputName));
for(int i = 0; i<thread_number; i++)
threads[i].join();
}
#endif
int main( int argc, const char** argv )
{
const char* keys =
"{ h help | false | print help message }"
"{ i input | | specify input image }"
"{ t template | haarcascade_frontalface_alt.xml |"
" specify template file path }"
"{ c scale | 1.0 | scale image }"
"{ s use_cpu | false | use cpu or gpu to process the image }"
"{ o output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }"
"{ n thread_num | 1 | set number of threads >= 1 }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help"))
{
cout << "Usage : facedetect [options]" << endl;
cout << "Available options:" << endl;
cmd.printMessage();
return EXIT_SUCCESS;
}
bool useCPU = cmd.get<bool>("s");
inputName = cmd.get<string>("i");
outputName = cmd.get<string>("o");
cascadeName = cmd.get<string>("t");
double scale = cmd.get<double>("c");
int n = cmd.get<int>("n");
if(n > 1)
{
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
std::cout<<"multi-threaded sample is running" <<std::endl;
facedetect_multithreading(n);
std::cout<<"multi-threaded sample has finished" <<std::endl;
return 0; return 0;
#else
std::cout << "std::thread is not supported, running a single-threaded version" << std::endl;
#endif
}
if (n<0)
std::cout<<"incorrect number of threads:" << n << ", running a single-threaded version" <<std::endl;
else
std::cout<<"single-threaded sample is running" <<std::endl;
return facedetect_one_thread(useCPU, scale);
} }
void detect( Mat& img, vector<Rect>& faces, void detect( Mat& img, vector<Rect>& faces,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment