Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
3b1fc16f
Commit
3b1fc16f
authored
Feb 24, 2013
by
Andrey Kamaev
Committed by
OpenCV Buildbot
Feb 24, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #536 from bitwangyaoyao:2.4_fixHaar
parents
e6dd4e84
9bbf1700
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
211 additions
and
1454 deletions
+211
-1454
haar.cpp
modules/ocl/src/haar.cpp
+4
-1252
haarobjectdetect.cl
modules/ocl/src/kernels/haarobjectdetect.cl
+207
-202
No files found.
modules/ocl/src/haar.cpp
View file @
3b1fc16f
...
...
@@ -47,15 +47,10 @@
//
//M*/
/* Haar features calculation */
//#define EMU
#include "precomp.hpp"
#include <stdio.h>
#include <string>
#ifdef EMU
#include "runCL.h"
#endif
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
std
;
...
...
@@ -926,7 +921,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
if
(
gimg
.
cols
<
minSize
.
width
||
gimg
.
rows
<
minSize
.
height
)
CV_Error
(
CV_StsError
,
"Image too small"
);
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
&&
gimg
.
clCxt
->
impl
->
devName
.
find
(
"Intel(R) HD Graphics"
)
==
string
::
npos
)
if
(
(
flags
&
CV_HAAR_SCALE_IMAGE
)
)
{
CvSize
winSize0
=
cascade
->
orig_window_size
;
//float scalefactor = 1.1f;
...
...
@@ -1114,30 +1109,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
p
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int4
)
,
(
void
*
)
&
pq
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
correction
));
/*
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
// openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
// openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect
,
"gpuRunHaarClassifierCascade"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
//t = (double)cvGetTickCount() - t;
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
...
...
@@ -1258,13 +1230,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
scaleinfo
[
i
].
factor
=
factor
;
int
startnodenum
=
nodenum
*
i
;
float
factor2
=
(
float
)
factor
;
/*
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
*/
vector
<
pair
<
size_t
,
const
void
*>
>
args1
;
args1
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
nodebuffer
));
...
...
@@ -1298,22 +1263,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
correctionbuffer
=
openCLCreateBuffer
(
gsum
.
clCxt
,
CL_MEM_READ_ONLY
,
sizeof
(
cl_float
)
*
loopcount
);
openCLSafeCall
(
clEnqueueWriteBuffer
(
gsum
.
clCxt
->
impl
->
clCmdQueue
,
correctionbuffer
,
1
,
0
,
sizeof
(
cl_float
)
*
loopcount
,
correction
,
0
,
NULL
,
NULL
));
//int argcount = 0;
/*openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&newnodebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&step));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&pbuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
stagebuffer
));
...
...
@@ -1335,8 +1284,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
openCLExecuteKernel
(
gsum
.
clCxt
,
&
haarobjectdetect_scaled2
,
"gpuRunHaarClassifierCascade_scaled2"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
//openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
//openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
candidate
=
(
int
*
)
clEnqueueMapBuffer
(
gsum
.
clCxt
->
impl
->
clCmdQueue
,
candidatebuffer
,
1
,
CL_MAP_READ
,
0
,
4
*
sizeof
(
int
),
0
,
0
,
0
,
&
status
);
...
...
@@ -1407,204 +1354,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
}
// static CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
// {
// int i;
// CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n);
// cascade->orig_window_size = orig_window_size;
// for( i = 0; i < n; i++ )
// {
// int j, count, l;
// float threshold = 0;
// const char *stage = input_cascade[i];
// int dl = 0;
// /* tree links */
// int parent = -1;
// int next = -1;
// sscanf( stage, "%d%n", &count, &dl );
// stage += dl;
// assert( count > 0 );
// cascade->stage_classifier[i].count = count;
// cascade->stage_classifier[i].classifier =
// (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
// for( j = 0; j < count; j++ )
// {
// CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j;
// int k, rects = 0;
// char str[100];
// sscanf( stage, "%d%n", &classifier->count, &dl );
// stage += dl;
// classifier->haar_feature = (CvHaarFeature *) cvAlloc(
// classifier->count * ( sizeof( *classifier->haar_feature ) +
// sizeof( *classifier->threshold ) +
// sizeof( *classifier->left ) +
// sizeof( *classifier->right ) ) +
// (classifier->count + 1) * sizeof( *classifier->alpha ) );
// classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
// classifier->left = (int *) (classifier->threshold + classifier->count);
// classifier->right = (int *) (classifier->left + classifier->count);
// classifier->alpha = (float *) (classifier->right + classifier->count);
// for( l = 0; l < classifier->count; l++ )
// {
// sscanf( stage, "%d%n", &rects, &dl );
// stage += dl;
// assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX );
// for( k = 0; k < rects; k++ )
// {
// CvRect r;
// int band = 0;
// sscanf( stage, "%d%d%d%d%d%f%n",
// &r.x, &r.y, &r.width, &r.height, &band,
// &(classifier->haar_feature[l].rect[k].weight), &dl );
// stage += dl;
// classifier->haar_feature[l].rect[k].r = r;
// }
// sscanf( stage, "%s%n", str, &dl );
// stage += dl;
// classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0;
// for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ )
// {
// memset( classifier->haar_feature[l].rect + k, 0,
// sizeof(classifier->haar_feature[l].rect[k]) );
// }
// sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]),
// &(classifier->left[l]),
// &(classifier->right[l]), &dl );
// stage += dl;
// }
// for( l = 0; l <= classifier->count; l++ )
// {
// sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl );
// stage += dl;
// }
// }
// sscanf( stage, "%f%n", &threshold, &dl );
// stage += dl;
// cascade->stage_classifier[i].threshold = threshold;
// /* load tree links */
// if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 )
// {
// parent = i - 1;
// next = -1;
// }
// stage += dl;
// cascade->stage_classifier[i].parent = parent;
// cascade->stage_classifier[i].next = next;
// cascade->stage_classifier[i].child = -1;
// if( parent != -1 && cascade->stage_classifier[parent].child == -1 )
// {
// cascade->stage_classifier[parent].child = i;
// }
// }
// return cascade;
// }
#ifndef _MAX_PATH
#define _MAX_PATH 1024
#endif
// static CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size )
// {
// const char **input_cascade = 0;
// CvHaarClassifierCascade *cascade = 0;
// int i, n;
// const char *slash;
// char name[_MAX_PATH];
// int size = 0;
// char *ptr = 0;
// if( !directory )
// CV_Error( CV_StsNullPtr, "Null path is passed" );
// n = (int)strlen(directory) - 1;
// slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/";
// /* try to read the classifier from directory */
// for( n = 0; ; n++ )
// {
// sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n );
// FILE *f = fopen( name, "rb" );
// if( !f )
// break;
// fseek( f, 0, SEEK_END );
// size += ftell( f ) + 1;
// fclose(f);
// }
// if( n == 0 && slash[0] )
// return (CvHaarClassifierCascade *)cvLoad( directory );
// if( n == 0 )
// CV_Error( CV_StsBadArg, "Invalid path" );
// size += (n + 1) * sizeof(char *);
// input_cascade = (const char **)cvAlloc( size );
// ptr = (char *)(input_cascade + n + 1);
// for( i = 0; i < n; i++ )
// {
// sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i );
// FILE *f = fopen( name, "rb" );
// if( !f )
// CV_Error( CV_StsError, "" );
// fseek( f, 0, SEEK_END );
// size = ftell( f );
// fseek( f, 0, SEEK_SET );
// CV_Assert((size_t)size == fread( ptr, 1, size, f ));
// fclose(f);
// input_cascade[i] = ptr;
// ptr += size;
// *ptr++ = '\0';
// }
// input_cascade[n] = 0;
// cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size );
// if( input_cascade )
// cvFree( &input_cascade );
// return cascade;
// }
// static void gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade )
// {
// if( _cascade && *_cascade )
// {
// int i, j;
// CvHaarClassifierCascade *cascade = *_cascade;
// for( i = 0; i < cascade->count; i++ )
// {
// for( j = 0; j < cascade->stage_classifier[i].count; j++ )
// cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature );
// cvFree( &cascade->stage_classifier[i].classifier );
// }
// gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade );
// cvFree( _cascade );
// }
// }
/****************************************************************************************\
* Persistence functions *
...
...
@@ -1627,937 +1380,11 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
#define ICV_HAAR_PARENT_NAME "parent"
#define ICV_HAAR_NEXT_NAME "next"
// static int gpuIsHaarClassifier( const void *struct_ptr )
// {
// return CV_IS_HAAR_CLASSIFIER( struct_ptr );
// }
// static void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
// {
// CvHaarClassifierCascade *cascade = NULL;
// char buf[256];
// CvFileNode *seq_fn = NULL; /* sequence */
// CvFileNode *fn = NULL;
// CvFileNode *stages_fn = NULL;
// CvSeqReader stages_reader;
// int n;
// int i, j, k, l;
// int parent, next;
// stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME );
// if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) )
// CV_Error( CV_StsError, "Invalid stages node" );
// n = stages_fn->data.seq->total;
// cascade = gpuCreateHaarClassifierCascade(n);
// /* read size */
// seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME );
// if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 )
// CV_Error( CV_StsError, "size node is not a valid sequence." );
// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 )
// CV_Error( CV_StsError, "Invalid size node: width must be positive integer" );
// cascade->orig_window_size.width = fn->data.i;
// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 )
// CV_Error( CV_StsError, "Invalid size node: height must be positive integer" );
// cascade->orig_window_size.height = fn->data.i;
// cvStartReadSeq( stages_fn->data.seq, &stages_reader );
// for( i = 0; i < n; ++i )
// {
// CvFileNode *stage_fn;
// CvFileNode *trees_fn;
// CvSeqReader trees_reader;
// stage_fn = (CvFileNode *) stages_reader.ptr;
// if( !CV_NODE_IS_MAP( stage_fn->tag ) )
// {
// sprintf( buf, "Invalid stage %d", i );
// CV_Error( CV_StsError, buf );
// }
// trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME );
// if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag )
// || trees_fn->data.seq->total <= 0 )
// {
// sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// cascade->stage_classifier[i].classifier =
// (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
// * sizeof( cascade->stage_classifier[i].classifier[0] ) );
// for( j = 0; j < trees_fn->data.seq->total; ++j )
// {
// cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
// }
// cascade->stage_classifier[i].count = trees_fn->data.seq->total;
// cvStartReadSeq( trees_fn->data.seq, &trees_reader );
// for( j = 0; j < trees_fn->data.seq->total; ++j )
// {
// CvFileNode *tree_fn;
// CvSeqReader tree_reader;
// CvHaarClassifier *classifier;
// int last_idx;
// classifier = &cascade->stage_classifier[i].classifier[j];
// tree_fn = (CvFileNode *) trees_reader.ptr;
// if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 )
// {
// sprintf( buf, "Tree node is not a valid sequence."
// " (stage %d, tree %d)", i, j );
// CV_Error( CV_StsError, buf );
// }
// classifier->count = tree_fn->data.seq->total;
// classifier->haar_feature = (CvHaarFeature *) cvAlloc(
// classifier->count * ( sizeof( *classifier->haar_feature ) +
// sizeof( *classifier->threshold ) +
// sizeof( *classifier->left ) +
// sizeof( *classifier->right ) ) +
// (classifier->count + 1) * sizeof( *classifier->alpha ) );
// classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
// classifier->left = (int *) (classifier->threshold + classifier->count);
// classifier->right = (int *) (classifier->left + classifier->count);
// classifier->alpha = (float *) (classifier->right + classifier->count);
// cvStartReadSeq( tree_fn->data.seq, &tree_reader );
// for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k )
// {
// CvFileNode *node_fn;
// CvFileNode *feature_fn;
// CvFileNode *rects_fn;
// CvSeqReader rects_reader;
// node_fn = (CvFileNode *) tree_reader.ptr;
// if( !CV_NODE_IS_MAP( node_fn->tag ) )
// {
// sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)",
// k, i, j );
// CV_Error( CV_StsError, buf );
// }
// feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME );
// if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) )
// {
// sprintf( buf, "Feature node is not a valid map. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME );
// if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag )
// || rects_fn->data.seq->total < 1
// || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
// {
// sprintf( buf, "Rects node is not a valid sequence. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// cvStartReadSeq( rects_fn->data.seq, &rects_reader );
// for( l = 0; l < rects_fn->data.seq->total; ++l )
// {
// CvFileNode *rect_fn;
// CvRect r;
// rect_fn = (CvFileNode *) rects_reader.ptr;
// if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 )
// {
// sprintf( buf, "Rect %d is not a valid sequence. "
// "(stage %d, tree %d, node %d)", l, i, j, k );
// CV_Error( CV_StsError, buf );
// }
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
// {
// sprintf( buf, "x coordinate must be non-negative integer. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.x = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
// {
// sprintf( buf, "y coordinate must be non-negative integer. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.y = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
// || r.x + fn->data.i > cascade->orig_window_size.width )
// {
// sprintf( buf, "width must be positive integer and "
// "(x + width) must not exceed window width. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.width = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
// || r.y + fn->data.i > cascade->orig_window_size.height )
// {
// sprintf( buf, "height must be positive integer and "
// "(y + height) must not exceed window height. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.height = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 );
// if( !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "weight must be real number. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// classifier->haar_feature[k].rect[l].weight = (float) fn->data.f;
// classifier->haar_feature[k].rect[l].r = r;
// CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader );
// } /* for each rect */
// for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l )
// {
// classifier->haar_feature[k].rect[l].weight = 0;
// classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 );
// }
// fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME);
// if( !fn || !CV_NODE_IS_INT( fn->tag ) )
// {
// sprintf( buf, "tilted must be 0 or 1. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->haar_feature[k].tilted = ( fn->data.i != 0 );
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME);
// if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "threshold must be real number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->threshold[k] = (float) fn->data.f;
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME);
// if( fn )
// {
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
// || fn->data.i >= tree_fn->data.seq->total )
// {
// sprintf( buf, "left node must be valid node number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* left node */
// classifier->left[k] = fn->data.i;
// }
// else
// {
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME );
// if( !fn )
// {
// sprintf( buf, "left node or left value must be specified. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// if( !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "left value must be real number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* left value */
// if( last_idx >= classifier->count + 1 )
// {
// sprintf( buf, "Tree structure is broken: too many values. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->left[k] = -last_idx;
// classifier->alpha[last_idx++] = (float) fn->data.f;
// }
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME);
// if( fn )
// {
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
// || fn->data.i >= tree_fn->data.seq->total )
// {
// sprintf( buf, "right node must be valid node number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* right node */
// classifier->right[k] = fn->data.i;
// }
// else
// {
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME );
// if( !fn )
// {
// sprintf( buf, "right node or right value must be specified. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// if( !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "right value must be real number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* right value */
// if( last_idx >= classifier->count + 1 )
// {
// sprintf( buf, "Tree structure is broken: too many values. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->right[k] = -last_idx;
// classifier->alpha[last_idx++] = (float) fn->data.f;
// }
// CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader );
// } /* for each node */
// if( last_idx != classifier->count + 1 )
// {
// sprintf( buf, "Tree structure is broken: too few values. "
// "(stage %d, tree %d)", i, j );
// CV_Error( CV_StsError, buf );
// }
// CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader );
// } /* for each tree */
// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME);
// if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "stage threshold must be real number. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// cascade->stage_classifier[i].threshold = (float) fn->data.f;
// parent = i - 1;
// next = -1;
// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME );
// if( !fn || !CV_NODE_IS_INT( fn->tag )
// || fn->data.i < -1 || fn->data.i >= cascade->count )
// {
// sprintf( buf, "parent must be integer number. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// parent = fn->data.i;
// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME );
// if( !fn || !CV_NODE_IS_INT( fn->tag )
// || fn->data.i < -1 || fn->data.i >= cascade->count )
// {
// sprintf( buf, "next must be integer number. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// next = fn->data.i;
// cascade->stage_classifier[i].parent = parent;
// cascade->stage_classifier[i].next = next;
// cascade->stage_classifier[i].child = -1;
// if( parent != -1 && cascade->stage_classifier[parent].child == -1 )
// {
// cascade->stage_classifier[parent].child = i;
// }
// CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader );
// } /* for each stage */
// return cascade;
// }
// static void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr,
// CvAttrList attributes )
// {
// int i, j, k, l;
// char buf[256];
// const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr;
// /* TODO: parameters check */
// cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes );
// cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW );
// cvWriteInt( fs, NULL, cascade->orig_window_size.width );
// cvWriteInt( fs, NULL, cascade->orig_window_size.height );
// cvEndWriteStruct( fs ); /* size */
// cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ );
// for( i = 0; i < cascade->count; ++i )
// {
// cvStartWriteStruct( fs, NULL, CV_NODE_MAP );
// sprintf( buf, "stage %d", i );
// cvWriteComment( fs, buf, 1 );
// cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ );
// for( j = 0; j < cascade->stage_classifier[i].count; ++j )
// {
// CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j];
// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ );
// sprintf( buf, "tree %d", j );
// cvWriteComment( fs, buf, 1 );
// for( k = 0; k < tree->count; ++k )
// {
// CvHaarFeature *feature = &tree->haar_feature[k];
// cvStartWriteStruct( fs, NULL, CV_NODE_MAP );
// if( k )
// {
// sprintf( buf, "node %d", k );
// }
// else
// {
// sprintf( buf, "root node" );
// }
// cvWriteComment( fs, buf, 1 );
// cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP );
// cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ );
// for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l )
// {
// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW );
// cvWriteInt( fs, NULL, feature->rect[l].r.x );
// cvWriteInt( fs, NULL, feature->rect[l].r.y );
// cvWriteInt( fs, NULL, feature->rect[l].r.width );
// cvWriteInt( fs, NULL, feature->rect[l].r.height );
// cvWriteReal( fs, NULL, feature->rect[l].weight );
// cvEndWriteStruct( fs ); /* rect */
// }
// cvEndWriteStruct( fs ); /* rects */
// cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted );
// cvEndWriteStruct( fs ); /* feature */
// cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]);
// if( tree->left[k] > 0 )
// {
// cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] );
// }
// else
// {
// cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME,
// tree->alpha[-tree->left[k]] );
// }
// if( tree->right[k] > 0 )
// {
// cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] );
// }
// else
// {
// cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME,
// tree->alpha[-tree->right[k]] );
// }
// cvEndWriteStruct( fs ); /* split */
// }
// cvEndWriteStruct( fs ); /* tree */
// }
// cvEndWriteStruct( fs ); /* trees */
// cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold);
// cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent );
// cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next );
// cvEndWriteStruct( fs ); /* stage */
// } /* for each stage */
// cvEndWriteStruct( fs ); /* stages */
// cvEndWriteStruct( fs ); /* root */
// }
// static void * gpuCloneHaarClassifier( const void *struct_ptr )
// {
// CvHaarClassifierCascade *cascade = NULL;
// int i, j, k, n;
// const CvHaarClassifierCascade *cascade_src =
// (const CvHaarClassifierCascade *) struct_ptr;
// n = cascade_src->count;
// cascade = gpuCreateHaarClassifierCascade(n);
// cascade->orig_window_size = cascade_src->orig_window_size;
// for( i = 0; i < n; ++i )
// {
// cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent;
// cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next;
// cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child;
// cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold;
// cascade->stage_classifier[i].count = 0;
// cascade->stage_classifier[i].classifier =
// (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
// * sizeof( cascade->stage_classifier[i].classifier[0] ) );
// cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count;
// for( j = 0; j < cascade->stage_classifier[i].count; ++j )
// cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
// for( j = 0; j < cascade->stage_classifier[i].count; ++j )
// {
// const CvHaarClassifier *classifier_src =
// &cascade_src->stage_classifier[i].classifier[j];
// CvHaarClassifier *classifier =
// &cascade->stage_classifier[i].classifier[j];
// classifier->count = classifier_src->count;
// classifier->haar_feature = (CvHaarFeature *) cvAlloc(
// classifier->count * ( sizeof( *classifier->haar_feature ) +
// sizeof( *classifier->threshold ) +
// sizeof( *classifier->left ) +
// sizeof( *classifier->right ) ) +
// (classifier->count + 1) * sizeof( *classifier->alpha ) );
// classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
// classifier->left = (int *) (classifier->threshold + classifier->count);
// classifier->right = (int *) (classifier->left + classifier->count);
// classifier->alpha = (float *) (classifier->right + classifier->count);
// for( k = 0; k < classifier->count; ++k )
// {
// classifier->haar_feature[k] = classifier_src->haar_feature[k];
// classifier->threshold[k] = classifier_src->threshold[k];
// classifier->left[k] = classifier_src->left[k];
// classifier->right[k] = classifier_src->right[k];
// classifier->alpha[k] = classifier_src->alpha[k];
// }
// classifier->alpha[classifier->count] =
// classifier_src->alpha[classifier->count];
// }
// }
// return cascade;
// }
#if 0
CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier,
(CvReleaseFunc)gpuReleaseHaarClassifierCascade,
gpuReadHaarClassifier, gpuWriteHaarClassifier,
gpuCloneHaarClassifier );
namespace cv
{
HaarClassifierCascade::HaarClassifierCascade() {}
HaarClassifierCascade::HaarClassifierCascade(const String &filename)
{
load(filename);
}
bool HaarClassifierCascade::load(const String &filename)
{
cascade = Ptr<CvHaarClassifierCascade>((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0));
return (CvHaarClassifierCascade *)cascade != 0;
}
void HaarClassifierCascade::detectMultiScale( const Mat &image,
Vector<Rect> &objects, double scaleFactor,
int minNeighbors, int flags,
Size minSize )
{
MemStorage storage(cvCreateMemStorage(0));
CvMat _image = image;
CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor,
minNeighbors, flags, minSize );
Seq<Rect>(_objects).copyTo(objects);
}
int HaarClassifierCascade::runAt(Point pt, int startStage, int) const
{
return gpuRunHaarClassifierCascade(cascade, pt, startStage);
}
void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum,
const Mat &tilted, double scale )
{
CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted;
gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale );
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////reserved functios//////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/*#if CV_SSE2
# if CV_SSE4 || defined __SSE4__
# include <smmintrin.h>
# else
# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
# endif
#if defined CV_ICC
# define CV_HAAR_USE_SSE 1
#endif
#endif*/
/*
CV_IMPL void
gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
const CvArr* _sum,
const CvArr* _sqsum,
const CvArr* _tilted_sum,
double scale )
{
CvMat sum_stub, *sum = (CvMat*)_sum;
CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade* cascade;
int coi0 = 0, coi1 = 0;
int i;
int datasize;
int totalclassifier;
CvRect equRect;
double weight_scale;
int rows,cols;
if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" );
if( scale <= 0 )
CV_Error( CV_StsOutOfRange, "Scale must be positive" );
sum = cvGetMat( sum, &sum_stub, &coi0 );
sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
if( coi0 || coi1 )
CV_Error( CV_BadCOI, "COI is not supported" );
if( !CV_ARE_SIZES_EQ( sum, sqsum ))
CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
CV_MAT_TYPE(sum->type) != CV_32SC1 )
CV_Error( CV_StsUnsupportedFormat,
"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if( !_cascade->hid_cascade )
gpuCreateHidHaarClassifierCascade(_cascade,&datasize,&totalclassifier);
cascade =(GpuHidHaarClassifierCascade *)_cascade->hid_cascade;
if( cascade->has_tilted_features )
{
tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
CV_Error( CV_StsUnsupportedFormat,
"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if( sum->step != tilted->step )
CV_Error( CV_StsUnmatchedSizes,
"Sum and tilted_sum must have the same stride (step, widthStep)" );
if( !CV_ARE_SIZES_EQ( sum, tilted ))
CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
//cascade->tilted = *tilted;
}
_cascade->scale = scale;
_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
//cascade->sum = *sum;
//cascade->sqsum = *sqsum;
equRect.x = equRect.y = cvRound(scale);
equRect.width = cvRound((_cascade->orig_window_size.width-2)*scale);
equRect.height = cvRound((_cascade->orig_window_size.height-2)*scale);
weight_scale = 1./(equRect.width*equRect.height);
cascade->inv_window_area = weight_scale;
cascade->p0 = sum_elem_ptr(*sum, equRect.y, equRect.x);
cascade->p1 = sum_elem_ptr(*sum, equRect.y, equRect.x + equRect.width );
cascade->p2 = sum_elem_ptr(*sum, equRect.y + equRect.height, equRect.x );
cascade->p3 = sum_elem_ptr(*sum, equRect.y + equRect.height,
equRect.x + equRect.width );
*/
/* rows=sum->rows;
cols=sum->cols;
cascade->p0 = equRect.y*cols + equRect.x;
cascade->p1 = equRect.y*cols + equRect.x + equRect.width;
cascade->p2 = (equRect.y + equRect.height) * cols + equRect.x;
cascade->p3 = (equRect.y + equRect.height) * cols + equRect.x + equRect.width ;
*/
/*
cascade->pq0 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x);
cascade->pq1 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x + equRect.width );
cascade->pq2 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, equRect.x );
cascade->pq3 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height,
equRect.x + equRect.width );
*/
/* init pointers in haar features according to real window size and
given image pointers */
/* for( i = 0; i < _cascade->count; i++ )
{
int j, k, l;
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
for( l = 0; l < cascade->stage_classifier[i].classifier[j].count; l++ )
{
CvHaarFeature* feature =
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
*/
/* GpuHidHaarClassifier* classifier =
cascade->stage_classifier[i].classifier + j; */
//GpuHidHaarFeature* hidfeature =
// &cascade->stage_classifier[i].classifier[j].node[l].feature;
/* double sum0 = 0, area0 = 0;
CvRect r[3];
int base_w = -1, base_h = -1;
int new_base_w = 0, new_base_h = 0;
int kx, ky;
int flagx = 0, flagy = 0;
int x0 = 0, y0 = 0;
int nr;
*/
/* align blocks */
/* for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
{
//if( !hidfeature->rect[k].p0 )
// break;
r[k] = feature->rect[k].r;
base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
}
nr = k;
base_w += 1;
base_h += 1;
kx = r[0].width / base_w;
ky = r[0].height / base_h;
if( kx <= 0 )
{
flagx = 1;
new_base_w = cvRound( r[0].width * scale ) / kx;
x0 = cvRound( r[0].x * scale );
}
if( ky <= 0 )
{
flagy = 1;
new_base_h = cvRound( r[0].height * scale ) / ky;
y0 = cvRound( r[0].y * scale );
}
for( k = 0; k < nr; k++ )
{
CvRect tr;
double correction_ratio;
if( flagx )
{
tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0;
tr.width = r[k].width * new_base_w / base_w;
}
else
static
int
gpuRunHaarClassifierCascade
(
/*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */
)
{
tr.x = cvRound( r[k].x * scale );
tr.width = cvRound( r[k].width * scale );
}
if( flagy )
{
tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0;
tr.height = r[k].height * new_base_h / base_h;
}
else
{
tr.y = cvRound( r[k].y * scale );
tr.height = cvRound( r[k].height * scale );
}
#if CV_ADJUST_WEIGHTS
{
// RAINER START
const float orig_feature_size = (float)(feature->rect[k].r.width)*feature->rect[k].r.height;
const float orig_norm_size = (float)(_cascade->orig_window_size.width)*(_cascade->orig_window_size.height);
const float feature_size = float(tr.width*tr.height);
//const float normSize = float(equRect.width*equRect.height);
float target_ratio = orig_feature_size / orig_norm_size;
//float isRatio = featureSize / normSize;
//correctionRatio = targetRatio / isRatio / normSize;
correction_ratio = target_ratio / feature_size;
// RAINER END
}
#else
correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5);
#endif
if( !feature->tilted )
{
hidfeature->rect[k].p0 = tr.y * rows + tr.x;
hidfeature->rect[k].p1 = tr.y * rows + tr.x + tr.width;
hidfeature->rect[k].p2 = (tr.y + tr.height) * rows + tr.x;
hidfeature->rect[k].p3 = (tr.y + tr.height) * rows + tr.x + tr.width;
}
else
{
hidfeature->rect[k].p2 = (tr.y + tr.width) * rows + tr.x + tr.width;
hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * rows + tr.x + tr.width - tr.height;
hidfeature->rect[k].p0 = tr.y*rows + tr.x;
hidfeature->rect[k].p1 = (tr.y + tr.height) * rows + tr.x - tr.height;
}
//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
if( k == 0 )
area0 = tr.width * tr.height;
else
;// sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
}
//hidfeature->rect[0].weight = (float)(-sum0/area0);*/
// } /* l */
// } /* j */
// }
//}
/*
CV_INLINE
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
double variance_norm_factor,
size_t p_offset )
{
int idx = 0;
do
{
GpuHidHaarTreeNode* node = classifier->node + idx;
double t = node->threshold * variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
idx = sum < t ? node->left : node->right;
}
while( idx > 0 );
return classifier->alpha[-idx];
return 0.;
}
*/
static
int
gpuRunHaarClassifierCascade
(
/*const CvHaarClassifierCascade *_cascade,
CvPoint pt, int start_stage */
)
{
/*
int result = -1;
int p_offset, pq_offset;
int i, j;
double mean, variance_norm_factor;
GpuHidHaarClassifierCascade* cascade;
if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid cascade pointer" );
cascade = (GpuHidHaarClassifierCascade*) _cascade->hid_cascade;
if( !cascade )
CV_Error( CV_StsNullPtr, "Hidden cascade has not been created.\n"
"Use gpuSetImagesForHaarClassifierCascade" );
if( pt.x < 0 || pt.y < 0 ||
pt.x + _cascade->real_window_size.width >= cascade->sum.width-2 ||
pt.y + _cascade->real_window_size.height >= cascade->sum.height-2 )
return -1;
p_offset = pt.y * (cascade->sum.step/sizeof(sumtype)) + pt.x;
pq_offset = pt.y * (cascade->sqsum.step/sizeof(sqsumtype)) + pt.x;
mean = calc_sum(*cascade,p_offset)*cascade->inv_window_area;
variance_norm_factor = cascade->pq0[pq_offset] - cascade->pq1[pq_offset] -
cascade->pq2[pq_offset] + cascade->pq3[pq_offset];
variance_norm_factor = variance_norm_factor*cascade->inv_window_area - mean*mean;
if( variance_norm_factor >= 0. )
variance_norm_factor = sqrt(variance_norm_factor);
else
variance_norm_factor = 1.;
if( cascade->is_stump_based )
{
for( i = start_stage; i < cascade->count; i++ )
{
double stage_sum = 0;
if( cascade->stage_classifier[i].two_rects )
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
GpuHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
else
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
GpuHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
*/
return
1
;
}
namespace
cv
{
namespace
ocl
...
...
@@ -2671,78 +1498,3 @@ struct gpuHaarDetectObjects_ScaleCascade_Invoker
}
}
/*
typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
{
struct _ALIGNED_ON(32)
{
int p0 _ALIGNED_ON(4);
int p1 _ALIGNED_ON(4);
int p2 _ALIGNED_ON(4);
int p3 _ALIGNED_ON(4);
float weight _ALIGNED_ON(4);
}
rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
}
GpuHidHaarFeature;
typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
{
int left _ALIGNED_ON(4);
int right _ALIGNED_ON(4);
float threshold _ALIGNED_ON(4);
int p0[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
int p1[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
int p2[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
int p3[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
float weight[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
float alpha[2] _ALIGNED_ON(8);
// GpuHidHaarFeature feature __attribute__((aligned (128)));
}
GpuHidHaarTreeNode;
typedef struct _ALIGNED_ON(32) GpuHidHaarClassifier
{
int count _ALIGNED_ON(4);
//CvHaarFeature* orig_feature;
GpuHidHaarTreeNode* node _ALIGNED_ON(8);
float* alpha _ALIGNED_ON(8);
}
GpuHidHaarClassifier;
typedef struct _ALIGNED_ON(64) __attribute__((aligned (64))) GpuHidHaarStageClassifier
{
int count _ALIGNED_ON(4);
float threshold _ALIGNED_ON(4);
int two_rects _ALIGNED_ON(4);
GpuHidHaarClassifier* classifier _ALIGNED_ON(8);
struct GpuHidHaarStageClassifier* next _ALIGNED_ON(8);
struct GpuHidHaarStageClassifier* child _ALIGNED_ON(8);
struct GpuHidHaarStageClassifier* parent _ALIGNED_ON(8);
}
GpuHidHaarStageClassifier;
typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
{
int count _ALIGNED_ON(4);
int is_stump_based _ALIGNED_ON(4);
int has_tilted_features _ALIGNED_ON(4);
int is_tree _ALIGNED_ON(4);
int pq0 _ALIGNED_ON(4);
int pq1 _ALIGNED_ON(4);
int pq2 _ALIGNED_ON(4);
int pq3 _ALIGNED_ON(4);
int p0 _ALIGNED_ON(4);
int p1 _ALIGNED_ON(4);
int p2 _ALIGNED_ON(4);
int p3 _ALIGNED_ON(4);
float inv_window_area _ALIGNED_ON(4);
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
}GpuHidHaarClassifierCascade;
*/
/* End of file. */
modules/ocl/src/kernels/haarobjectdetect.cl
View file @
3b1fc16f
...
...
@@ -9,6 +9,7 @@
//
Niko
Li,
newlife20080214@gmail.com
//
Wang
Weiyan,
wangweiyanster@gmail.com
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
Nathan,
liujun@multicorewareinc.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
...
...
@@ -47,14 +48,14 @@ typedef float sqsumtype;
typedef
struct
__attribute__
((
aligned
(
128
)))
GpuHidHaarFeature
{
struct
__attribute__
((
aligned
(
32
)))
{
int
p0
__attribute__
((
aligned
(
4
)))
;
int
p1
__attribute__
((
aligned
(
4
)))
;
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
weight
__attribute__
((
aligned
(
4
)))
;
}
rect[CV_HAAR_FEATURE_MAX]
__attribute__
((
aligned
(
32
)))
;
{
int
p0
__attribute__
((
aligned
(
4
)))
;
int
p1
__attribute__
((
aligned
(
4
)))
;
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
weight
__attribute__
((
aligned
(
4
)))
;
}
rect[CV_HAAR_FEATURE_MAX]
__attribute__
((
aligned
(
32
)))
;
}
GpuHidHaarFeature
;
...
...
@@ -108,31 +109,31 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
int
p2
__attribute__
((
aligned
(
4
)))
;
int
p3
__attribute__
((
aligned
(
4
)))
;
float
inv_window_area
__attribute__
((
aligned
(
4
)))
;
}GpuHidHaarClassifierCascade
;
}
GpuHidHaarClassifierCascade
;
__kernel
void
__attribute__
((
reqd_work_group_size
(
8
,
8
,
1
)))
gpuRunHaarClassifierCascade
(
//constant
GpuHidHaarClassifierCascade
*
cascade,
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
global
int4
*
info,
global
GpuHidHaarTreeNode
*
nodeptr,
global
const
int
*
restrict
sum1,
global
const
float
*
restrict
sqsum1,
global
int4
*
candidate,
const
int
pixelstep,
const
int
loopcount,
const
int
start_stage,
const
int
split_stage,
const
int
end_stage,
const
int
startnode,
const
int
splitnode,
const
int4
p,
const
int4
pq,
const
float
correction
//const
int
width,
//const
int
height,
//const
int
grpnumperline,
//const
int
totalgrp
)
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
global
int4
*
info,
global
GpuHidHaarTreeNode
*
nodeptr,
global
const
int
*
restrict
sum1,
global
const
float
*
restrict
sqsum1,
global
int4
*
candidate,
const
int
pixelstep,
const
int
loopcount,
const
int
start_stage,
const
int
split_stage,
const
int
end_stage,
const
int
startnode,
const
int
splitnode,
const
int4
p,
const
int4
pq,
const
float
correction
//const
int
width,
//const
int
height,
//const
int
grpnumperline,
//const
int
totalgrp
)
{
int
grpszx
=
get_local_size
(
0
)
;
int
grpszy
=
get_local_size
(
1
)
;
...
...
@@ -184,7 +185,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
__global
const
int
*
sum
=
sum1
+
imgoff
;
__global
const
float
*
sqsum
=
sqsum1
+
imgoff
;
for
(
int
grploop=grpidx
;
grploop<totalgrp;
grploop+=grpnumx)
for
(
int
grploop=grpidx
;
grploop<totalgrp;
grploop+=grpnumx)
{
int
grpidy
=
grploop
/
grpnumperline
;
int
grpidx
=
grploop
-
mul24
(
grpidy,
grpnumperline
)
;
...
...
@@ -195,7 +196,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
grpoffx
=
x-lclidx
;
int
grpoffy
=
y-lclidy
;
for
(
int
i=0
;
i<read_loop;
i++)
for
(
int
i=0
;
i<read_loop;
i++)
{
int
pos_id
=
mad24
(
i,lcl_sz,lcl_id
)
;
pos_id
=
pos_id
<
total_read
?
pos_id
:
0
;
...
...
@@ -234,15 +235,15 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
cascadeinfo1.x
+=lcl_off
;
cascadeinfo1.z
+=lcl_off
;
mean
=
(
lcldata[mad24
(
cascadeinfo1.y,readwidth,cascadeinfo1.x
)
]
-
lcldata[mad24
(
cascadeinfo1.y,readwidth,cascadeinfo1.z
)
]
-
lcldata[mad24
(
cascadeinfo1.w,readwidth,cascadeinfo1.x
)
]
+
lcldata[mad24
(
cascadeinfo1.w,readwidth,cascadeinfo1.z
)
]
)
*correction
;
lcldata[mad24
(
cascadeinfo1.w,readwidth,cascadeinfo1.x
)
]
+
lcldata[mad24
(
cascadeinfo1.w,readwidth,cascadeinfo1.z
)
]
)
*correction
;
int
p_offset
=
mad24
(
y,
pixelstep,
x
)
;
cascadeinfo2.x
+=p_offset
;
cascadeinfo2.z
+=p_offset
;
variance_norm_factor
=sqsum[mad24
(
cascadeinfo2.y,
pixelstep,
cascadeinfo2.x
)
]
-
sqsum[mad24
(
cascadeinfo2.y,
pixelstep,
cascadeinfo2.z
)
]
-
sqsum[mad24
(
cascadeinfo2.w,
pixelstep,
cascadeinfo2.x
)
]
+
sqsum[mad24
(
cascadeinfo2.w,
pixelstep,
cascadeinfo2.z
)
]
;
sqsum[mad24
(
cascadeinfo2.w,
pixelstep,
cascadeinfo2.x
)
]
+
sqsum[mad24
(
cascadeinfo2.w,
pixelstep,
cascadeinfo2.z
)
]
;
variance_norm_factor
=
variance_norm_factor
*
correction
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
...
...
@@ -270,19 +271,19 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
info2.z
+=lcl_off
;
float
classsum
=
(
lcldata[mad24
(
info1.y,readwidth,info1.x
)
]
-
lcldata[mad24
(
info1.y,readwidth,info1.z
)
]
-
lcldata[mad24
(
info1.w,readwidth,info1.x
)
]
+
lcldata[mad24
(
info1.w,readwidth,info1.z
)
]
)
*
w.x
;
lcldata[mad24
(
info1.w,readwidth,info1.x
)
]
+
lcldata[mad24
(
info1.w,readwidth,info1.z
)
]
)
*
w.x
;
classsum
+=
(
lcldata[mad24
(
info2.y,readwidth,info2.x
)
]
-
lcldata[mad24
(
info2.y,readwidth,info2.z
)
]
-
lcldata[mad24
(
info2.w,readwidth,info2.x
)
]
+
lcldata[mad24
(
info2.w,readwidth,info2.z
)
]
)
*
w.y
;
lcldata[mad24
(
info2.w,readwidth,info2.x
)
]
+
lcldata[mad24
(
info2.w,readwidth,info2.z
)
]
)
*
w.y
;
//if
((
info3.z
-
info3.x
)
&&
(
!stageinfo.z
))
//{
info3.x
+=lcl_off
;
info3.z
+=lcl_off
;
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
info3.x
+=lcl_off
;
info3.z
+=lcl_off
;
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
//}
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
nodecounter++
;
...
...
@@ -299,12 +300,13 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
queuecount
=
lclcount[0]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
nodecounter
=
splitnode
;
for
(
int
stageloop
=
split_stage
; stageloop< end_stage && queuecount>0;stageloop++)
for
(
int
stageloop
=
split_stage
; stageloop< end_stage && queuecount>0;
stageloop++)
{
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//if
(
lcl_id
==
0
)
lclcount[0]=0
;
lclcount[0]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int2
stageinfo
=
*
(
global
int2*
)(
stagecascadeptr+stageloop
)
;
...
...
@@ -316,70 +318,73 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
lcl_compute_win_id
=
(
lcl_id
>>
(
6-perfscale
))
;
int
lcl_loops
=
(
stageinfo.x
+
lcl_compute_win
-1
)
>>
(
6-perfscale
)
;
int
lcl_compute_id
=
lcl_id
-
(
lcl_compute_win_id
<<
(
6-perfscale
))
;
for
(
int
queueloop=0
;
queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/;
queueloop++)
for
(
int
queueloop=0
;
queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/;
queueloop++)
{
float
stage_sum
=
0.f
;
int
temp_coord
=
lcloutindex[lcl_compute_win_id<<1]
;
float
variance_norm_factor
=
as_float
(
lcloutindex[
(
lcl_compute_win_id<<1
)
+1]
)
;
int
queue_pixel
=
mad24
(((
temp_coord
&
(
int
)
0xffff0000
)
>>16
)
,
readwidth,temp_coord
&
0xffff
)
;
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lcl_compute_win_id
<
queuecount
)
{
int
tempnodecounter
=
lcl_compute_id
;
float
part_sum
=
0.f
;
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;lcl_loop++)
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lcl_compute_win_id
<
queuecount
)
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
+
tempnodecounter
)
;
int4
info1
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float2
alpha2
=
*
(
__global
float2*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
int
tempnodecounter
=
lcl_compute_id
;
float
part_sum
=
0.f
;
for
(
int
lcl_loop=0
; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
{
__global
GpuHidHaarTreeNode*
currentnodeptr
=
(
nodeptr
+
nodecounter
+
tempnodecounter
)
;
info1.x
+=queue_pixel
;
info1.z
+=queue_pixel
;
info2.x
+=queue_pixel
;
info2.z
+=queue_pixel
;
int4
info1
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[0][0]
))
;
int4
info2
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[1][0]
))
;
int4
info3
=
*
(
__global
int4*
)(
&
(
currentnodeptr->p[2][0]
))
;
float4
w
=
*
(
__global
float4*
)(
&
(
currentnodeptr->weight[0]
))
;
float2
alpha2
=
*
(
__global
float2*
)(
&
(
currentnodeptr->alpha[0]
))
;
float
nodethreshold
=
w.w
*
variance_norm_factor
;
float
classsum
=
(
lcldata[mad24
(
info1.y,readwidth,info1.x
)
]
-
lcldata[mad24
(
info1.y,readwidth,info1.z
)
]
-
lcldata[mad24
(
info1.w,readwidth,info1.x
)
]
+
lcldata[mad24
(
info1.w,readwidth,info1.z
)
]
)
*
w.x
;
info1.x
+=queue_pixel
;
info1.z
+=queue_pixel
;
info2.x
+=queue_pixel
;
info2.z
+=queue_pixel
;
float
classsum
=
(
lcldata[mad24
(
info1.y,readwidth,info1.x
)
]
-
lcldata[mad24
(
info1.y,readwidth,info1.z
)
]
-
lcldata[mad24
(
info1.w,readwidth,info1.x
)
]
+
lcldata[mad24
(
info1.w,readwidth,info1.z
)
]
)
*
w.x
;
classsum
+=
(
lcldata[mad24
(
info2.y,readwidth,info2.x
)
]
-
lcldata[mad24
(
info2.y,readwidth,info2.z
)
]
-
lcldata[mad24
(
info2.w,readwidth,info2.x
)
]
+
lcldata[mad24
(
info2.w,readwidth,info2.z
)
]
)
*
w.y
;
//if
((
info3.z
-
info3.x
)
&&
(
!stageinfo.z
))
//{
classsum
+=
(
lcldata[mad24
(
info2.y,readwidth,info2.x
)
]
-
lcldata[mad24
(
info2.y,readwidth,info2.z
)
]
-
lcldata[mad24
(
info2.w,readwidth,info2.x
)
]
+
lcldata[mad24
(
info2.w,readwidth,info2.z
)
]
)
*
w.y
;
//if
((
info3.z
-
info3.x
)
&&
(
!stageinfo.z
))
//{
info3.x
+=queue_pixel
;
info3.z
+=queue_pixel
;
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
//}
part_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
tempnodecounter
+=lcl_compute_win
;
}//end
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lcl_compute_win_id
<
queuecount
)
{
for
(
int
i=0
;i<lcl_compute_win && (lcl_compute_id==0);i++)
{
stage_sum
+=
partialsum[lcl_id+i]
;
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
//}
part_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
tempnodecounter
+=lcl_compute_win
;
}//end
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum
;
}
if
(
stage_sum
>=
stagethreshold
&&
(
lcl_compute_id==0
))
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lcl_compute_win_id
<
queuecount
)
{
int
queueindex
=
atomic_inc
(
lclcount
)
;
lcloutindex[queueindex<<1]
=
temp_coord
;
lcloutindex[
(
queueindex<<1
)
+1]
=
as_int
(
variance_norm_factor
)
;
for
(
int
i=0
; i<lcl_compute_win && (lcl_compute_id==0); i++)
{
stage_sum
+=
partialsum[lcl_id+i]
;
}
if
(
stage_sum
>=
stagethreshold
&&
(
lcl_compute_id==0
))
{
int
queueindex
=
atomic_inc
(
lclcount
)
;
lcloutindex[queueindex<<1]
=
temp_coord
;
lcloutindex[
(
queueindex<<1
)
+1]
=
as_int
(
variance_norm_factor
)
;
}
lcl_compute_win_id
+=
(
1<<perfscale
)
;
}
lcl_compute_win_id
+=
(
1<<perfscale
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}//end
for
(
int
queueloop=0
;queueloop<queuecount_loop;queueloop++)
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
queuecount
=
lclcount[0]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
nodecounter
+=
stageinfo.x
;
}//end
for
(
int
stageloop
=
splitstage
; stageloop< endstage && queuecount>0;stageloop++)
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -420,138 +425,138 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
/*
if
(
stagecascade->two_rects
)
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,p_offset,0
)
*
t1.weight[0]
;
/*
if
(
stagecascade->two_rects
)
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,p_offset,0
)
*
t1.weight[0]
;
classsum
+=
calc_sum1
(
t1,
p_offset,1
)
*
t1.weight[1]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]:t1.alpha[0]
;
classsum
+=
calc_sum1
(
t1,
p_offset,1
)
*
t1.weight[1]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]:t1.alpha[0]
;
counter++
;
}
}
else
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t
=
node[counter].threshold*variance_norm_factor
;
classsum
=
calc_sum1
(
node[counter],p_offset,0
)
*
node[counter].weight[0]
;
classsum
+=
calc_sum1
(
node[counter],p_offset,1
)
*
node[counter].weight[1]
;
counter++
;
}
}
else
{
#
pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t
=
node[counter].threshold*variance_norm_factor
;
classsum
=
calc_sum1
(
node[counter],p_offset,0
)
*
node[counter].weight[0]
;
classsum
+=
calc_sum1
(
node[counter],p_offset,1
)
*
node[counter].weight[1]
;
if
(
node[counter].p0[2]
)
classsum
+=
calc_sum1
(
node[counter],p_offset,2
)
*
node[counter].weight[2]
;
if
(
node[counter].p0[2]
)
classsum
+=
calc_sum1
(
node[counter],p_offset,2
)
*
node[counter].weight[2]
;
stage_sum
+=
classsum
>=
t
?
node[counter].alpha[1]:node[counter].alpha[0]
;// modify
stage_sum
+=
classsum
>=
t
?
node[counter].alpha[1]:node[counter].alpha[0]
;// modify
counter++
;
}
}
*/
/*
counter++
;
}
}
*/
/*
__kernel
void
gpuRunHaarClassifierCascade_ScaleWindow
(
constant
GpuHidHaarClassifierCascade
*
_cascade,
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
//global
GpuHidHaarClassifier
*
classifierptr,
global
GpuHidHaarTreeNode
*
nodeptr,
global
int
*
sum,
global
float
*
sqsum,
global
int
*
_candidate,
int
pixel_step,
int
cols,
int
rows,
int
start_stage,
int
end_stage,
//int
counts,
int
nodenum,
int
ystep,
int
detect_width,
//int
detect_height,
int
loopcount,
int
outputstep
)
//float
scalefactor
)
constant
GpuHidHaarClassifierCascade
*
_cascade,
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
//global
GpuHidHaarClassifier
*
classifierptr,
global
GpuHidHaarTreeNode
*
nodeptr,
global
int
*
sum,
global
float
*
sqsum,
global
int
*
_candidate,
int
pixel_step,
int
cols,
int
rows,
int
start_stage,
int
end_stage,
//int
counts,
int
nodenum,
int
ystep,
int
detect_width,
//int
detect_height,
int
loopcount,
int
outputstep
)
//float
scalefactor
)
{
unsigned
int
x1
=
get_global_id
(
0
)
;
unsigned
int
y1
=
get_global_id
(
1
)
;
int
p_offset
;
int
m,
n
;
int
result
;
int
counter
;
float
mean,
variance_norm_factor
;
for
(
int
i=0
;i<loopcount;i++)
{
constant
GpuHidHaarClassifierCascade
*
cascade
=
_cascade
+
i
;
global
int
*
candidate
=
_candidate
+
i*outputstep
;
int
window_width
=
cascade->p1
-
cascade->p0
;
int
window_height
=
window_width
;
result
=
1
;
counter
=
0
;
unsigned
int
x
=
mul24
(
x1,ystep
)
;
unsigned
int
y
=
mul24
(
y1,ystep
)
;
if
((
x
<
cols
-
window_width
-
1
)
&&
(
y
<
rows
-
window_height
-1
))
{
global
GpuHidHaarStageClassifier
*stagecascade
=
stagecascadeptr
+cascade->count*i+
start_stage
;
//global
GpuHidHaarClassifier
*classifier
=
classifierptr
;
global
GpuHidHaarTreeNode
*node
=
nodeptr
+
nodenum*i
;
unsigned
int
x1
=
get_global_id
(
0
)
;
unsigned
int
y1
=
get_global_id
(
1
)
;
int
p_offset
;
int
m,
n
;
int
result
;
int
counter
;
float
mean,
variance_norm_factor
;
for
(
int
i=0
;i<loopcount;i++)
{
constant
GpuHidHaarClassifierCascade
*
cascade
=
_cascade
+
i
;
global
int
*
candidate
=
_candidate
+
i*outputstep
;
int
window_width
=
cascade->p1
-
cascade->p0
;
int
window_height
=
window_width
;
result
=
1
;
counter
=
0
;
unsigned
int
x
=
mul24
(
x1,ystep
)
;
unsigned
int
y
=
mul24
(
y1,ystep
)
;
if
((
x
<
cols
-
window_width
-
1
)
&&
(
y
<
rows
-
window_height
-1
))
{
global
GpuHidHaarStageClassifier
*stagecascade
=
stagecascadeptr
+cascade->count*i+
start_stage
;
//global
GpuHidHaarClassifier
*classifier
=
classifierptr
;
global
GpuHidHaarTreeNode
*node
=
nodeptr
+
nodenum*i
;
p_offset
=
mad24
(
y,
pixel_step,
x
)
;// modify
p_offset
=
mad24
(
y,
pixel_step,
x
)
;// modify
mean
=
(
*
(
sum
+
p_offset
+
(
int
)
cascade->p0
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p1
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p2
)
+
*
(
sum
+
p_offset
+
(
int
)
cascade->p3
))
*cascade->inv_window_area
;
mean
=
(
*
(
sum
+
p_offset
+
(
int
)
cascade->p0
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p1
)
-
*
(
sum
+
p_offset
+
(
int
)
cascade->p2
)
+
*
(
sum
+
p_offset
+
(
int
)
cascade->p3
))
*cascade->inv_window_area
;
variance_norm_factor
=
*
(
sqsum
+
p_offset
+
cascade->p0
)
-
*
(
sqsum
+
cascade->p1
+
p_offset
)
-
*
(
sqsum
+
p_offset
+
cascade->p2
)
+
*
(
sqsum
+
cascade->p3
+
p_offset
)
;
variance_norm_factor
=
variance_norm_factor
*
cascade->inv_window_area
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=0.f
?
sqrt
(
variance_norm_factor
)
:
1
;//modify
variance_norm_factor
=
*
(
sqsum
+
p_offset
+
cascade->p0
)
-
*
(
sqsum
+
cascade->p1
+
p_offset
)
-
*
(
sqsum
+
p_offset
+
cascade->p2
)
+
*
(
sqsum
+
cascade->p3
+
p_offset
)
;
variance_norm_factor
=
variance_norm_factor
*
cascade->inv_window_area
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=0.f
?
sqrt
(
variance_norm_factor
)
:
1
;//modify
//
if
(
cascade->is_stump_based
)
//{
for
(
m
=
start_stage
; m < end_stage; m++ )
{
float
stage_sum
=
0.f
;
float
t
,
classsum
;
GpuHidHaarTreeNode
t1
;
//
if
(
cascade->is_stump_based
)
//{
for
(
m
=
start_stage
; m < end_stage; m++ )
{
float
stage_sum
=
0.f
;
float
t
,
classsum
;
GpuHidHaarTreeNode
t1
;
//#pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,
p_offset
,
0
)
*
t1.weight[0]
+
calc_sum1
(
t1,
p_offset
,
1
)
*
t1.weight[1]
;
//#pragma
unroll
for
(
n
=
0
; n < stagecascade->count; n++ )
{
t1
=
*
(
node
+
counter
)
;
t
=
t1.threshold
*
variance_norm_factor
;
classsum
=
calc_sum1
(
t1,
p_offset
,
0
)
*
t1.weight[0]
+
calc_sum1
(
t1,
p_offset
,
1
)
*
t1.weight[1]
;
if
((
t1.p0[2]
)
&&
(
!stagecascade->two_rects
))
classsum
+=
calc_sum1
(
t1,
p_offset,
2
)
*
t1.weight[2]
;
if
((
t1.p0[2]
)
&&
(
!stagecascade->two_rects
))
classsum
+=
calc_sum1
(
t1,
p_offset,
2
)
*
t1.weight[2]
;
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]
:
t1.alpha[0]
;// modify
counter++
;
}
stage_sum
+=
classsum
>=
t
?
t1.alpha[1]
:
t1.alpha[0]
;// modify
counter++
;
}
if
(
stage_sum
<
stagecascade->threshold
)
{
result
=
0
;
break
;
}
if
(
stage_sum
<
stagecascade->threshold
)
{
result
=
0
;
break
;
}
stagecascade++
;
stagecascade++
;
}
if
(
result
)
{
candidate[4
*
(
y1
*
detect_width
+
x1
)
]
=
x
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
1]
=
y
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+2]
=
window_width
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
3]
=
window_height
;
}
//}
}
}
}
if
(
result
)
{
candidate[4
*
(
y1
*
detect_width
+
x1
)
]
=
x
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
1]
=
y
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+2]
=
window_width
;
candidate[4
*
(
y1
*
detect_width
+
x1
)
+
3]
=
window_height
;
}
//}
}
}
}
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment