Commit 3b1fc16f authored by Andrey Kamaev's avatar Andrey Kamaev Committed by OpenCV Buildbot

Merge pull request #536 from bitwangyaoyao:2.4_fixHaar

parents e6dd4e84 9bbf1700
......@@ -47,15 +47,10 @@
//
//M*/
/* Haar features calculation */
//#define EMU
#include "precomp.hpp"
#include <stdio.h>
#include <string>
#ifdef EMU
#include "runCL.h"
#endif
using namespace cv;
using namespace cv::ocl;
using namespace std;
......@@ -926,7 +921,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
CV_Error(CV_StsError, "Image too small");
if( (flags & CV_HAAR_SCALE_IMAGE) && gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos )
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
CvSize winSize0 = cascade->orig_window_size;
//float scalefactor = 1.1f;
......@@ -1114,30 +1109,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
/*
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
// openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
// openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
//t = (double)cvGetTickCount() - t;
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
......@@ -1258,13 +1230,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
scaleinfo[i].factor = factor;
int startnodenum = nodenum * i;
float factor2 = (float)factor;
/*
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
*/
vector<pair<size_t, const void *> > args1;
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
......@@ -1298,22 +1263,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
//int argcount = 0;
/*openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&newnodebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&step));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&pbuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
vector<pair<size_t, const void *> > args;
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
......@@ -1335,8 +1284,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
//openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
//openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
......@@ -1407,204 +1354,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
}
// static CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
// {
// int i;
// CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n);
// cascade->orig_window_size = orig_window_size;
// for( i = 0; i < n; i++ )
// {
// int j, count, l;
// float threshold = 0;
// const char *stage = input_cascade[i];
// int dl = 0;
// /* tree links */
// int parent = -1;
// int next = -1;
// sscanf( stage, "%d%n", &count, &dl );
// stage += dl;
// assert( count > 0 );
// cascade->stage_classifier[i].count = count;
// cascade->stage_classifier[i].classifier =
// (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
// for( j = 0; j < count; j++ )
// {
// CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j;
// int k, rects = 0;
// char str[100];
// sscanf( stage, "%d%n", &classifier->count, &dl );
// stage += dl;
// classifier->haar_feature = (CvHaarFeature *) cvAlloc(
// classifier->count * ( sizeof( *classifier->haar_feature ) +
// sizeof( *classifier->threshold ) +
// sizeof( *classifier->left ) +
// sizeof( *classifier->right ) ) +
// (classifier->count + 1) * sizeof( *classifier->alpha ) );
// classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
// classifier->left = (int *) (classifier->threshold + classifier->count);
// classifier->right = (int *) (classifier->left + classifier->count);
// classifier->alpha = (float *) (classifier->right + classifier->count);
// for( l = 0; l < classifier->count; l++ )
// {
// sscanf( stage, "%d%n", &rects, &dl );
// stage += dl;
// assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX );
// for( k = 0; k < rects; k++ )
// {
// CvRect r;
// int band = 0;
// sscanf( stage, "%d%d%d%d%d%f%n",
// &r.x, &r.y, &r.width, &r.height, &band,
// &(classifier->haar_feature[l].rect[k].weight), &dl );
// stage += dl;
// classifier->haar_feature[l].rect[k].r = r;
// }
// sscanf( stage, "%s%n", str, &dl );
// stage += dl;
// classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0;
// for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ )
// {
// memset( classifier->haar_feature[l].rect + k, 0,
// sizeof(classifier->haar_feature[l].rect[k]) );
// }
// sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]),
// &(classifier->left[l]),
// &(classifier->right[l]), &dl );
// stage += dl;
// }
// for( l = 0; l <= classifier->count; l++ )
// {
// sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl );
// stage += dl;
// }
// }
// sscanf( stage, "%f%n", &threshold, &dl );
// stage += dl;
// cascade->stage_classifier[i].threshold = threshold;
// /* load tree links */
// if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 )
// {
// parent = i - 1;
// next = -1;
// }
// stage += dl;
// cascade->stage_classifier[i].parent = parent;
// cascade->stage_classifier[i].next = next;
// cascade->stage_classifier[i].child = -1;
// if( parent != -1 && cascade->stage_classifier[parent].child == -1 )
// {
// cascade->stage_classifier[parent].child = i;
// }
// }
// return cascade;
// }
#ifndef _MAX_PATH
#define _MAX_PATH 1024
#endif
// static CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size )
// {
// const char **input_cascade = 0;
// CvHaarClassifierCascade *cascade = 0;
// int i, n;
// const char *slash;
// char name[_MAX_PATH];
// int size = 0;
// char *ptr = 0;
// if( !directory )
// CV_Error( CV_StsNullPtr, "Null path is passed" );
// n = (int)strlen(directory) - 1;
// slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/";
// /* try to read the classifier from directory */
// for( n = 0; ; n++ )
// {
// sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n );
// FILE *f = fopen( name, "rb" );
// if( !f )
// break;
// fseek( f, 0, SEEK_END );
// size += ftell( f ) + 1;
// fclose(f);
// }
// if( n == 0 && slash[0] )
// return (CvHaarClassifierCascade *)cvLoad( directory );
// if( n == 0 )
// CV_Error( CV_StsBadArg, "Invalid path" );
// size += (n + 1) * sizeof(char *);
// input_cascade = (const char **)cvAlloc( size );
// ptr = (char *)(input_cascade + n + 1);
// for( i = 0; i < n; i++ )
// {
// sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i );
// FILE *f = fopen( name, "rb" );
// if( !f )
// CV_Error( CV_StsError, "" );
// fseek( f, 0, SEEK_END );
// size = ftell( f );
// fseek( f, 0, SEEK_SET );
// CV_Assert((size_t)size == fread( ptr, 1, size, f ));
// fclose(f);
// input_cascade[i] = ptr;
// ptr += size;
// *ptr++ = '\0';
// }
// input_cascade[n] = 0;
// cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size );
// if( input_cascade )
// cvFree( &input_cascade );
// return cascade;
// }
// static void gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade )
// {
// if( _cascade && *_cascade )
// {
// int i, j;
// CvHaarClassifierCascade *cascade = *_cascade;
// for( i = 0; i < cascade->count; i++ )
// {
// for( j = 0; j < cascade->stage_classifier[i].count; j++ )
// cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature );
// cvFree( &cascade->stage_classifier[i].classifier );
// }
// gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade );
// cvFree( _cascade );
// }
// }
/****************************************************************************************\
* Persistence functions *
......@@ -1627,937 +1380,11 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
#define ICV_HAAR_PARENT_NAME "parent"
#define ICV_HAAR_NEXT_NAME "next"
// static int gpuIsHaarClassifier( const void *struct_ptr )
// {
// return CV_IS_HAAR_CLASSIFIER( struct_ptr );
// }
// static void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
// {
// CvHaarClassifierCascade *cascade = NULL;
// char buf[256];
// CvFileNode *seq_fn = NULL; /* sequence */
// CvFileNode *fn = NULL;
// CvFileNode *stages_fn = NULL;
// CvSeqReader stages_reader;
// int n;
// int i, j, k, l;
// int parent, next;
// stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME );
// if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) )
// CV_Error( CV_StsError, "Invalid stages node" );
// n = stages_fn->data.seq->total;
// cascade = gpuCreateHaarClassifierCascade(n);
// /* read size */
// seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME );
// if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 )
// CV_Error( CV_StsError, "size node is not a valid sequence." );
// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 )
// CV_Error( CV_StsError, "Invalid size node: width must be positive integer" );
// cascade->orig_window_size.width = fn->data.i;
// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 )
// CV_Error( CV_StsError, "Invalid size node: height must be positive integer" );
// cascade->orig_window_size.height = fn->data.i;
// cvStartReadSeq( stages_fn->data.seq, &stages_reader );
// for( i = 0; i < n; ++i )
// {
// CvFileNode *stage_fn;
// CvFileNode *trees_fn;
// CvSeqReader trees_reader;
// stage_fn = (CvFileNode *) stages_reader.ptr;
// if( !CV_NODE_IS_MAP( stage_fn->tag ) )
// {
// sprintf( buf, "Invalid stage %d", i );
// CV_Error( CV_StsError, buf );
// }
// trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME );
// if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag )
// || trees_fn->data.seq->total <= 0 )
// {
// sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// cascade->stage_classifier[i].classifier =
// (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
// * sizeof( cascade->stage_classifier[i].classifier[0] ) );
// for( j = 0; j < trees_fn->data.seq->total; ++j )
// {
// cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
// }
// cascade->stage_classifier[i].count = trees_fn->data.seq->total;
// cvStartReadSeq( trees_fn->data.seq, &trees_reader );
// for( j = 0; j < trees_fn->data.seq->total; ++j )
// {
// CvFileNode *tree_fn;
// CvSeqReader tree_reader;
// CvHaarClassifier *classifier;
// int last_idx;
// classifier = &cascade->stage_classifier[i].classifier[j];
// tree_fn = (CvFileNode *) trees_reader.ptr;
// if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 )
// {
// sprintf( buf, "Tree node is not a valid sequence."
// " (stage %d, tree %d)", i, j );
// CV_Error( CV_StsError, buf );
// }
// classifier->count = tree_fn->data.seq->total;
// classifier->haar_feature = (CvHaarFeature *) cvAlloc(
// classifier->count * ( sizeof( *classifier->haar_feature ) +
// sizeof( *classifier->threshold ) +
// sizeof( *classifier->left ) +
// sizeof( *classifier->right ) ) +
// (classifier->count + 1) * sizeof( *classifier->alpha ) );
// classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
// classifier->left = (int *) (classifier->threshold + classifier->count);
// classifier->right = (int *) (classifier->left + classifier->count);
// classifier->alpha = (float *) (classifier->right + classifier->count);
// cvStartReadSeq( tree_fn->data.seq, &tree_reader );
// for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k )
// {
// CvFileNode *node_fn;
// CvFileNode *feature_fn;
// CvFileNode *rects_fn;
// CvSeqReader rects_reader;
// node_fn = (CvFileNode *) tree_reader.ptr;
// if( !CV_NODE_IS_MAP( node_fn->tag ) )
// {
// sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)",
// k, i, j );
// CV_Error( CV_StsError, buf );
// }
// feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME );
// if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) )
// {
// sprintf( buf, "Feature node is not a valid map. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME );
// if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag )
// || rects_fn->data.seq->total < 1
// || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
// {
// sprintf( buf, "Rects node is not a valid sequence. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// cvStartReadSeq( rects_fn->data.seq, &rects_reader );
// for( l = 0; l < rects_fn->data.seq->total; ++l )
// {
// CvFileNode *rect_fn;
// CvRect r;
// rect_fn = (CvFileNode *) rects_reader.ptr;
// if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 )
// {
// sprintf( buf, "Rect %d is not a valid sequence. "
// "(stage %d, tree %d, node %d)", l, i, j, k );
// CV_Error( CV_StsError, buf );
// }
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
// {
// sprintf( buf, "x coordinate must be non-negative integer. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.x = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
// {
// sprintf( buf, "y coordinate must be non-negative integer. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.y = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
// || r.x + fn->data.i > cascade->orig_window_size.width )
// {
// sprintf( buf, "width must be positive integer and "
// "(x + width) must not exceed window width. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.width = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 );
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
// || r.y + fn->data.i > cascade->orig_window_size.height )
// {
// sprintf( buf, "height must be positive integer and "
// "(y + height) must not exceed window height. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// r.height = fn->data.i;
// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 );
// if( !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "weight must be real number. "
// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
// CV_Error( CV_StsError, buf );
// }
// classifier->haar_feature[k].rect[l].weight = (float) fn->data.f;
// classifier->haar_feature[k].rect[l].r = r;
// CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader );
// } /* for each rect */
// for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l )
// {
// classifier->haar_feature[k].rect[l].weight = 0;
// classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 );
// }
// fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME);
// if( !fn || !CV_NODE_IS_INT( fn->tag ) )
// {
// sprintf( buf, "tilted must be 0 or 1. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->haar_feature[k].tilted = ( fn->data.i != 0 );
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME);
// if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "threshold must be real number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->threshold[k] = (float) fn->data.f;
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME);
// if( fn )
// {
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
// || fn->data.i >= tree_fn->data.seq->total )
// {
// sprintf( buf, "left node must be valid node number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* left node */
// classifier->left[k] = fn->data.i;
// }
// else
// {
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME );
// if( !fn )
// {
// sprintf( buf, "left node or left value must be specified. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// if( !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "left value must be real number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* left value */
// if( last_idx >= classifier->count + 1 )
// {
// sprintf( buf, "Tree structure is broken: too many values. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->left[k] = -last_idx;
// classifier->alpha[last_idx++] = (float) fn->data.f;
// }
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME);
// if( fn )
// {
// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
// || fn->data.i >= tree_fn->data.seq->total )
// {
// sprintf( buf, "right node must be valid node number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* right node */
// classifier->right[k] = fn->data.i;
// }
// else
// {
// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME );
// if( !fn )
// {
// sprintf( buf, "right node or right value must be specified. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// if( !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "right value must be real number. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// /* right value */
// if( last_idx >= classifier->count + 1 )
// {
// sprintf( buf, "Tree structure is broken: too many values. "
// "(stage %d, tree %d, node %d)", i, j, k );
// CV_Error( CV_StsError, buf );
// }
// classifier->right[k] = -last_idx;
// classifier->alpha[last_idx++] = (float) fn->data.f;
// }
// CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader );
// } /* for each node */
// if( last_idx != classifier->count + 1 )
// {
// sprintf( buf, "Tree structure is broken: too few values. "
// "(stage %d, tree %d)", i, j );
// CV_Error( CV_StsError, buf );
// }
// CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader );
// } /* for each tree */
// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME);
// if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
// {
// sprintf( buf, "stage threshold must be real number. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// cascade->stage_classifier[i].threshold = (float) fn->data.f;
// parent = i - 1;
// next = -1;
// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME );
// if( !fn || !CV_NODE_IS_INT( fn->tag )
// || fn->data.i < -1 || fn->data.i >= cascade->count )
// {
// sprintf( buf, "parent must be integer number. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// parent = fn->data.i;
// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME );
// if( !fn || !CV_NODE_IS_INT( fn->tag )
// || fn->data.i < -1 || fn->data.i >= cascade->count )
// {
// sprintf( buf, "next must be integer number. (stage %d)", i );
// CV_Error( CV_StsError, buf );
// }
// next = fn->data.i;
// cascade->stage_classifier[i].parent = parent;
// cascade->stage_classifier[i].next = next;
// cascade->stage_classifier[i].child = -1;
// if( parent != -1 && cascade->stage_classifier[parent].child == -1 )
// {
// cascade->stage_classifier[parent].child = i;
// }
// CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader );
// } /* for each stage */
// return cascade;
// }
// static void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr,
// CvAttrList attributes )
// {
// int i, j, k, l;
// char buf[256];
// const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr;
// /* TODO: parameters check */
// cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes );
// cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW );
// cvWriteInt( fs, NULL, cascade->orig_window_size.width );
// cvWriteInt( fs, NULL, cascade->orig_window_size.height );
// cvEndWriteStruct( fs ); /* size */
// cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ );
// for( i = 0; i < cascade->count; ++i )
// {
// cvStartWriteStruct( fs, NULL, CV_NODE_MAP );
// sprintf( buf, "stage %d", i );
// cvWriteComment( fs, buf, 1 );
// cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ );
// for( j = 0; j < cascade->stage_classifier[i].count; ++j )
// {
// CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j];
// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ );
// sprintf( buf, "tree %d", j );
// cvWriteComment( fs, buf, 1 );
// for( k = 0; k < tree->count; ++k )
// {
// CvHaarFeature *feature = &tree->haar_feature[k];
// cvStartWriteStruct( fs, NULL, CV_NODE_MAP );
// if( k )
// {
// sprintf( buf, "node %d", k );
// }
// else
// {
// sprintf( buf, "root node" );
// }
// cvWriteComment( fs, buf, 1 );
// cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP );
// cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ );
// for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l )
// {
// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW );
// cvWriteInt( fs, NULL, feature->rect[l].r.x );
// cvWriteInt( fs, NULL, feature->rect[l].r.y );
// cvWriteInt( fs, NULL, feature->rect[l].r.width );
// cvWriteInt( fs, NULL, feature->rect[l].r.height );
// cvWriteReal( fs, NULL, feature->rect[l].weight );
// cvEndWriteStruct( fs ); /* rect */
// }
// cvEndWriteStruct( fs ); /* rects */
// cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted );
// cvEndWriteStruct( fs ); /* feature */
// cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]);
// if( tree->left[k] > 0 )
// {
// cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] );
// }
// else
// {
// cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME,
// tree->alpha[-tree->left[k]] );
// }
// if( tree->right[k] > 0 )
// {
// cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] );
// }
// else
// {
// cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME,
// tree->alpha[-tree->right[k]] );
// }
// cvEndWriteStruct( fs ); /* split */
// }
// cvEndWriteStruct( fs ); /* tree */
// }
// cvEndWriteStruct( fs ); /* trees */
// cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold);
// cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent );
// cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next );
// cvEndWriteStruct( fs ); /* stage */
// } /* for each stage */
// cvEndWriteStruct( fs ); /* stages */
// cvEndWriteStruct( fs ); /* root */
// }
// static void * gpuCloneHaarClassifier( const void *struct_ptr )
// {
// CvHaarClassifierCascade *cascade = NULL;
// int i, j, k, n;
// const CvHaarClassifierCascade *cascade_src =
// (const CvHaarClassifierCascade *) struct_ptr;
// n = cascade_src->count;
// cascade = gpuCreateHaarClassifierCascade(n);
// cascade->orig_window_size = cascade_src->orig_window_size;
// for( i = 0; i < n; ++i )
// {
// cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent;
// cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next;
// cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child;
// cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold;
// cascade->stage_classifier[i].count = 0;
// cascade->stage_classifier[i].classifier =
// (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
// * sizeof( cascade->stage_classifier[i].classifier[0] ) );
// cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count;
// for( j = 0; j < cascade->stage_classifier[i].count; ++j )
// cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
// for( j = 0; j < cascade->stage_classifier[i].count; ++j )
// {
// const CvHaarClassifier *classifier_src =
// &cascade_src->stage_classifier[i].classifier[j];
// CvHaarClassifier *classifier =
// &cascade->stage_classifier[i].classifier[j];
// classifier->count = classifier_src->count;
// classifier->haar_feature = (CvHaarFeature *) cvAlloc(
// classifier->count * ( sizeof( *classifier->haar_feature ) +
// sizeof( *classifier->threshold ) +
// sizeof( *classifier->left ) +
// sizeof( *classifier->right ) ) +
// (classifier->count + 1) * sizeof( *classifier->alpha ) );
// classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
// classifier->left = (int *) (classifier->threshold + classifier->count);
// classifier->right = (int *) (classifier->left + classifier->count);
// classifier->alpha = (float *) (classifier->right + classifier->count);
// for( k = 0; k < classifier->count; ++k )
// {
// classifier->haar_feature[k] = classifier_src->haar_feature[k];
// classifier->threshold[k] = classifier_src->threshold[k];
// classifier->left[k] = classifier_src->left[k];
// classifier->right[k] = classifier_src->right[k];
// classifier->alpha[k] = classifier_src->alpha[k];
// }
// classifier->alpha[classifier->count] =
// classifier_src->alpha[classifier->count];
// }
// }
// return cascade;
// }
#if 0
CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier,
(CvReleaseFunc)gpuReleaseHaarClassifierCascade,
gpuReadHaarClassifier, gpuWriteHaarClassifier,
gpuCloneHaarClassifier );
namespace cv
{
HaarClassifierCascade::HaarClassifierCascade() {}
HaarClassifierCascade::HaarClassifierCascade(const String &filename)
{
load(filename);
}
bool HaarClassifierCascade::load(const String &filename)
{
cascade = Ptr<CvHaarClassifierCascade>((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0));
return (CvHaarClassifierCascade *)cascade != 0;
}
void HaarClassifierCascade::detectMultiScale( const Mat &image,
Vector<Rect> &objects, double scaleFactor,
int minNeighbors, int flags,
Size minSize )
{
MemStorage storage(cvCreateMemStorage(0));
CvMat _image = image;
CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor,
minNeighbors, flags, minSize );
Seq<Rect>(_objects).copyTo(objects);
}
int HaarClassifierCascade::runAt(Point pt, int startStage, int) const
{
return gpuRunHaarClassifierCascade(cascade, pt, startStage);
}
void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum,
const Mat &tilted, double scale )
{
CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted;
gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale );
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////reserved functios//////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/*#if CV_SSE2
# if CV_SSE4 || defined __SSE4__
# include <smmintrin.h>
# else
# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
# endif
#if defined CV_ICC
# define CV_HAAR_USE_SSE 1
#endif
#endif*/
/*
CV_IMPL void
gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
const CvArr* _sum,
const CvArr* _sqsum,
const CvArr* _tilted_sum,
double scale )
{
CvMat sum_stub, *sum = (CvMat*)_sum;
CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade* cascade;
int coi0 = 0, coi1 = 0;
int i;
int datasize;
int totalclassifier;
CvRect equRect;
double weight_scale;
int rows,cols;
if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" );
if( scale <= 0 )
CV_Error( CV_StsOutOfRange, "Scale must be positive" );
sum = cvGetMat( sum, &sum_stub, &coi0 );
sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
if( coi0 || coi1 )
CV_Error( CV_BadCOI, "COI is not supported" );
if( !CV_ARE_SIZES_EQ( sum, sqsum ))
CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
CV_MAT_TYPE(sum->type) != CV_32SC1 )
CV_Error( CV_StsUnsupportedFormat,
"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if( !_cascade->hid_cascade )
gpuCreateHidHaarClassifierCascade(_cascade,&datasize,&totalclassifier);
cascade =(GpuHidHaarClassifierCascade *)_cascade->hid_cascade;
if( cascade->has_tilted_features )
{
tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
CV_Error( CV_StsUnsupportedFormat,
"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if( sum->step != tilted->step )
CV_Error( CV_StsUnmatchedSizes,
"Sum and tilted_sum must have the same stride (step, widthStep)" );
if( !CV_ARE_SIZES_EQ( sum, tilted ))
CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
//cascade->tilted = *tilted;
}
_cascade->scale = scale;
_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
//cascade->sum = *sum;
//cascade->sqsum = *sqsum;
equRect.x = equRect.y = cvRound(scale);
equRect.width = cvRound((_cascade->orig_window_size.width-2)*scale);
equRect.height = cvRound((_cascade->orig_window_size.height-2)*scale);
weight_scale = 1./(equRect.width*equRect.height);
cascade->inv_window_area = weight_scale;
cascade->p0 = sum_elem_ptr(*sum, equRect.y, equRect.x);
cascade->p1 = sum_elem_ptr(*sum, equRect.y, equRect.x + equRect.width );
cascade->p2 = sum_elem_ptr(*sum, equRect.y + equRect.height, equRect.x );
cascade->p3 = sum_elem_ptr(*sum, equRect.y + equRect.height,
equRect.x + equRect.width );
*/
/* rows=sum->rows;
cols=sum->cols;
cascade->p0 = equRect.y*cols + equRect.x;
cascade->p1 = equRect.y*cols + equRect.x + equRect.width;
cascade->p2 = (equRect.y + equRect.height) * cols + equRect.x;
cascade->p3 = (equRect.y + equRect.height) * cols + equRect.x + equRect.width ;
*/
/*
cascade->pq0 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x);
cascade->pq1 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x + equRect.width );
cascade->pq2 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, equRect.x );
cascade->pq3 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height,
equRect.x + equRect.width );
*/
/* init pointers in haar features according to real window size and
given image pointers */
/* for( i = 0; i < _cascade->count; i++ )
{
int j, k, l;
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
for( l = 0; l < cascade->stage_classifier[i].classifier[j].count; l++ )
{
CvHaarFeature* feature =
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
*/ /* GpuHidHaarClassifier* classifier =
cascade->stage_classifier[i].classifier + j; */
//GpuHidHaarFeature* hidfeature =
// &cascade->stage_classifier[i].classifier[j].node[l].feature;
/* double sum0 = 0, area0 = 0;
CvRect r[3];
int base_w = -1, base_h = -1;
int new_base_w = 0, new_base_h = 0;
int kx, ky;
int flagx = 0, flagy = 0;
int x0 = 0, y0 = 0;
int nr;
*/
/* align blocks */
/* for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
{
//if( !hidfeature->rect[k].p0 )
// break;
r[k] = feature->rect[k].r;
base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
}
nr = k;
base_w += 1;
base_h += 1;
kx = r[0].width / base_w;
ky = r[0].height / base_h;
if( kx <= 0 )
{
flagx = 1;
new_base_w = cvRound( r[0].width * scale ) / kx;
x0 = cvRound( r[0].x * scale );
}
if( ky <= 0 )
{
flagy = 1;
new_base_h = cvRound( r[0].height * scale ) / ky;
y0 = cvRound( r[0].y * scale );
}
for( k = 0; k < nr; k++ )
{
CvRect tr;
double correction_ratio;
if( flagx )
{
tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0;
tr.width = r[k].width * new_base_w / base_w;
}
else
static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */)
{
tr.x = cvRound( r[k].x * scale );
tr.width = cvRound( r[k].width * scale );
}
if( flagy )
{
tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0;
tr.height = r[k].height * new_base_h / base_h;
}
else
{
tr.y = cvRound( r[k].y * scale );
tr.height = cvRound( r[k].height * scale );
}
#if CV_ADJUST_WEIGHTS
{
// RAINER START
const float orig_feature_size = (float)(feature->rect[k].r.width)*feature->rect[k].r.height;
const float orig_norm_size = (float)(_cascade->orig_window_size.width)*(_cascade->orig_window_size.height);
const float feature_size = float(tr.width*tr.height);
//const float normSize = float(equRect.width*equRect.height);
float target_ratio = orig_feature_size / orig_norm_size;
//float isRatio = featureSize / normSize;
//correctionRatio = targetRatio / isRatio / normSize;
correction_ratio = target_ratio / feature_size;
// RAINER END
}
#else
correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5);
#endif
if( !feature->tilted )
{
hidfeature->rect[k].p0 = tr.y * rows + tr.x;
hidfeature->rect[k].p1 = tr.y * rows + tr.x + tr.width;
hidfeature->rect[k].p2 = (tr.y + tr.height) * rows + tr.x;
hidfeature->rect[k].p3 = (tr.y + tr.height) * rows + tr.x + tr.width;
}
else
{
hidfeature->rect[k].p2 = (tr.y + tr.width) * rows + tr.x + tr.width;
hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * rows + tr.x + tr.width - tr.height;
hidfeature->rect[k].p0 = tr.y*rows + tr.x;
hidfeature->rect[k].p1 = (tr.y + tr.height) * rows + tr.x - tr.height;
}
//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
if( k == 0 )
area0 = tr.width * tr.height;
else
;// sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
}
//hidfeature->rect[0].weight = (float)(-sum0/area0);*/
// } /* l */
// } /* j */
// }
//}
/*
CV_INLINE
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
double variance_norm_factor,
size_t p_offset )
{
int idx = 0;
do
{
GpuHidHaarTreeNode* node = classifier->node + idx;
double t = node->threshold * variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
idx = sum < t ? node->left : node->right;
}
while( idx > 0 );
return classifier->alpha[-idx];
return 0.;
}
*/
static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade,
CvPoint pt, int start_stage */)
{
/*
int result = -1;
int p_offset, pq_offset;
int i, j;
double mean, variance_norm_factor;
GpuHidHaarClassifierCascade* cascade;
if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid cascade pointer" );
cascade = (GpuHidHaarClassifierCascade*) _cascade->hid_cascade;
if( !cascade )
CV_Error( CV_StsNullPtr, "Hidden cascade has not been created.\n"
"Use gpuSetImagesForHaarClassifierCascade" );
if( pt.x < 0 || pt.y < 0 ||
pt.x + _cascade->real_window_size.width >= cascade->sum.width-2 ||
pt.y + _cascade->real_window_size.height >= cascade->sum.height-2 )
return -1;
p_offset = pt.y * (cascade->sum.step/sizeof(sumtype)) + pt.x;
pq_offset = pt.y * (cascade->sqsum.step/sizeof(sqsumtype)) + pt.x;
mean = calc_sum(*cascade,p_offset)*cascade->inv_window_area;
variance_norm_factor = cascade->pq0[pq_offset] - cascade->pq1[pq_offset] -
cascade->pq2[pq_offset] + cascade->pq3[pq_offset];
variance_norm_factor = variance_norm_factor*cascade->inv_window_area - mean*mean;
if( variance_norm_factor >= 0. )
variance_norm_factor = sqrt(variance_norm_factor);
else
variance_norm_factor = 1.;
if( cascade->is_stump_based )
{
for( i = start_stage; i < cascade->count; i++ )
{
double stage_sum = 0;
if( cascade->stage_classifier[i].two_rects )
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
GpuHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
else
{
for( j = 0; j < cascade->stage_classifier[i].count; j++ )
{
GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
GpuHidHaarTreeNode* node = classifier->node;
double t = node->threshold*variance_norm_factor;
double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
stage_sum += classifier->alpha[sum >= t];
}
}
if( stage_sum < cascade->stage_classifier[i].threshold )
return -i;
}
}
*/
return 1;
}
namespace cv
{
namespace ocl
......@@ -2671,78 +1498,3 @@ struct gpuHaarDetectObjects_ScaleCascade_Invoker
}
}
/*
typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
{
struct _ALIGNED_ON(32)
{
int p0 _ALIGNED_ON(4);
int p1 _ALIGNED_ON(4);
int p2 _ALIGNED_ON(4);
int p3 _ALIGNED_ON(4);
float weight _ALIGNED_ON(4);
}
rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
}
GpuHidHaarFeature;
typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
{
int left _ALIGNED_ON(4);
int right _ALIGNED_ON(4);
float threshold _ALIGNED_ON(4);
int p0[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
int p1[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
int p2[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
int p3[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
float weight[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16);
float alpha[2] _ALIGNED_ON(8);
// GpuHidHaarFeature feature __attribute__((aligned (128)));
}
GpuHidHaarTreeNode;
typedef struct _ALIGNED_ON(32) GpuHidHaarClassifier
{
int count _ALIGNED_ON(4);
//CvHaarFeature* orig_feature;
GpuHidHaarTreeNode* node _ALIGNED_ON(8);
float* alpha _ALIGNED_ON(8);
}
GpuHidHaarClassifier;
typedef struct _ALIGNED_ON(64) __attribute__((aligned (64))) GpuHidHaarStageClassifier
{
int count _ALIGNED_ON(4);
float threshold _ALIGNED_ON(4);
int two_rects _ALIGNED_ON(4);
GpuHidHaarClassifier* classifier _ALIGNED_ON(8);
struct GpuHidHaarStageClassifier* next _ALIGNED_ON(8);
struct GpuHidHaarStageClassifier* child _ALIGNED_ON(8);
struct GpuHidHaarStageClassifier* parent _ALIGNED_ON(8);
}
GpuHidHaarStageClassifier;
typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
{
int count _ALIGNED_ON(4);
int is_stump_based _ALIGNED_ON(4);
int has_tilted_features _ALIGNED_ON(4);
int is_tree _ALIGNED_ON(4);
int pq0 _ALIGNED_ON(4);
int pq1 _ALIGNED_ON(4);
int pq2 _ALIGNED_ON(4);
int pq3 _ALIGNED_ON(4);
int p0 _ALIGNED_ON(4);
int p1 _ALIGNED_ON(4);
int p2 _ALIGNED_ON(4);
int p3 _ALIGNED_ON(4);
float inv_window_area _ALIGNED_ON(4);
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
}GpuHidHaarClassifierCascade;
*/
/* End of file. */
......@@ -9,6 +9,7 @@
// Niko Li, newlife20080214@gmail.com
// Wang Weiyan, wangweiyanster@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Nathan, liujun@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -47,14 +48,14 @@ typedef float sqsumtype;
typedef struct __attribute__((aligned (128))) GpuHidHaarFeature
{
struct __attribute__((aligned (32)))
{
int p0 __attribute__((aligned (4)));
int p1 __attribute__((aligned (4)));
int p2 __attribute__((aligned (4)));
int p3 __attribute__((aligned (4)));
float weight __attribute__((aligned (4)));
}
rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
{
int p0 __attribute__((aligned (4)));
int p1 __attribute__((aligned (4)));
int p2 __attribute__((aligned (4)));
int p3 __attribute__((aligned (4)));
float weight __attribute__((aligned (4)));
}
rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
}
GpuHidHaarFeature;
......@@ -108,31 +109,31 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
int p2 __attribute__((aligned (4)));
int p3 __attribute__((aligned (4)));
float inv_window_area __attribute__((aligned (4)));
}GpuHidHaarClassifierCascade;
} GpuHidHaarClassifierCascade;
__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(//constant GpuHidHaarClassifierCascade * cascade,
global GpuHidHaarStageClassifier * stagecascadeptr,
global int4 * info,
global GpuHidHaarTreeNode * nodeptr,
global const int * restrict sum1,
global const float * restrict sqsum1,
global int4 * candidate,
const int pixelstep,
const int loopcount,
const int start_stage,
const int split_stage,
const int end_stage,
const int startnode,
const int splitnode,
const int4 p,
const int4 pq,
const float correction
//const int width,
//const int height,
//const int grpnumperline,
//const int totalgrp
)
global GpuHidHaarStageClassifier * stagecascadeptr,
global int4 * info,
global GpuHidHaarTreeNode * nodeptr,
global const int * restrict sum1,
global const float * restrict sqsum1,
global int4 * candidate,
const int pixelstep,
const int loopcount,
const int start_stage,
const int split_stage,
const int end_stage,
const int startnode,
const int splitnode,
const int4 p,
const int4 pq,
const float correction
//const int width,
//const int height,
//const int grpnumperline,
//const int totalgrp
)
{
int grpszx = get_local_size(0);
int grpszy = get_local_size(1);
......@@ -184,7 +185,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
__global const int * sum = sum1 + imgoff;
__global const float * sqsum = sqsum1 + imgoff;
for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx)
for(int grploop=grpidx; grploop<totalgrp; grploop+=grpnumx)
{
int grpidy = grploop / grpnumperline;
int grpidx = grploop - mul24(grpidy, grpnumperline);
......@@ -195,7 +196,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int grpoffx = x-lclidx;
int grpoffy = y-lclidy;
for(int i=0;i<read_loop;i++)
for(int i=0; i<read_loop; i++)
{
int pos_id = mad24(i,lcl_sz,lcl_id);
pos_id = pos_id < total_read ? pos_id : 0;
......@@ -234,15 +235,15 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
cascadeinfo1.x +=lcl_off;
cascadeinfo1.z +=lcl_off;
mean = (lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.x)] - lcldata[mad24(cascadeinfo1.y,readwidth,cascadeinfo1.z)] -
lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
*correction;
lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.x)] + lcldata[mad24(cascadeinfo1.w,readwidth,cascadeinfo1.z)])
*correction;
int p_offset = mad24(y, pixelstep, x);
cascadeinfo2.x +=p_offset;
cascadeinfo2.z +=p_offset;
variance_norm_factor =sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.x)] - sqsum[mad24(cascadeinfo2.y, pixelstep, cascadeinfo2.z)] -
sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.x)] + sqsum[mad24(cascadeinfo2.w, pixelstep, cascadeinfo2.z)];
variance_norm_factor = variance_norm_factor * correction - mean * mean;
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f;
......@@ -270,19 +271,19 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
info2.z +=lcl_off;
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
//if((info3.z - info3.x) && (!stageinfo.z))
//{
info3.x +=lcl_off;
info3.z +=lcl_off;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
info3.x +=lcl_off;
info3.z +=lcl_off;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
//}
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
nodecounter++;
......@@ -299,12 +300,13 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
}
barrier(CLK_LOCAL_MEM_FENCE);
int queuecount = lclcount[0];
barrier(CLK_LOCAL_MEM_FENCE);
nodecounter = splitnode;
for(int stageloop = split_stage; stageloop< end_stage && queuecount>0;stageloop++)
for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++)
{
//barrier(CLK_LOCAL_MEM_FENCE);
//barrier(CLK_LOCAL_MEM_FENCE);
//if(lcl_id == 0)
lclcount[0]=0;
lclcount[0]=0;
barrier(CLK_LOCAL_MEM_FENCE);
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
......@@ -316,70 +318,73 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int lcl_compute_win_id = (lcl_id >>(6-perfscale));
int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale);
int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
for(int queueloop=0;queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/;queueloop++)
for(int queueloop=0; queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/; queueloop++)
{
float stage_sum = 0.f;
int temp_coord = lcloutindex[lcl_compute_win_id<<1];
float variance_norm_factor = as_float(lcloutindex[(lcl_compute_win_id<<1)+1]);
int queue_pixel = mad24(((temp_coord & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff);
//barrier(CLK_LOCAL_MEM_FENCE);
if(lcl_compute_win_id < queuecount) {
int tempnodecounter = lcl_compute_id;
float part_sum = 0.f;
for(int lcl_loop=0;lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;lcl_loop++)
//barrier(CLK_LOCAL_MEM_FENCE);
if(lcl_compute_win_id < queuecount)
{
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
int tempnodecounter = lcl_compute_id;
float part_sum = 0.f;
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
{
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
info1.x +=queue_pixel;
info1.z +=queue_pixel;
info2.x +=queue_pixel;
info2.z +=queue_pixel;
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
info1.x +=queue_pixel;
info1.z +=queue_pixel;
info2.x +=queue_pixel;
info2.z +=queue_pixel;
float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] -
lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x;
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
//if((info3.z - info3.x) && (!stageinfo.z))
//{
classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] -
lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y;
//if((info3.z - info3.x) && (!stageinfo.z))
//{
info3.x +=queue_pixel;
info3.z +=queue_pixel;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
//}
part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
tempnodecounter +=lcl_compute_win;
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lcl_compute_win_id < queuecount) {
for(int i=0;i<lcl_compute_win && (lcl_compute_id==0);i++)
{
stage_sum += partialsum[lcl_id+i];
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
//}
part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
tempnodecounter +=lcl_compute_win;
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum;
}
if(stage_sum >= stagethreshold && (lcl_compute_id==0))
barrier(CLK_LOCAL_MEM_FENCE);
if(lcl_compute_win_id < queuecount)
{
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex<<1] = temp_coord;
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++)
{
stage_sum += partialsum[lcl_id+i];
}
if(stage_sum >= stagethreshold && (lcl_compute_id==0))
{
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex<<1] = temp_coord;
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor);
}
lcl_compute_win_id +=(1<<perfscale);
}
lcl_compute_win_id +=(1<<perfscale);
}
barrier(CLK_LOCAL_MEM_FENCE);
}//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++)
barrier(CLK_LOCAL_MEM_FENCE);
//barrier(CLK_LOCAL_MEM_FENCE);
queuecount = lclcount[0];
barrier(CLK_LOCAL_MEM_FENCE);
nodecounter += stageinfo.x;
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
//barrier(CLK_LOCAL_MEM_FENCE);
......@@ -420,138 +425,138 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
/*
if(stagecascade->two_rects)
{
#pragma unroll
for( n = 0; n < stagecascade->count; n++ )
{
t1 = *(node + counter);
t = t1.threshold * variance_norm_factor;
classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
/*
if(stagecascade->two_rects)
{
#pragma unroll
for( n = 0; n < stagecascade->count; n++ )
{
t1 = *(node + counter);
t = t1.threshold * variance_norm_factor;
classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
classsum += calc_sum1(t1, p_offset,1) * t1.weight[1];
stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
classsum += calc_sum1(t1, p_offset,1) * t1.weight[1];
stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
counter++;
}
}
else
{
#pragma unroll
for( n = 0; n < stagecascade->count; n++ )
{
t = node[counter].threshold*variance_norm_factor;
classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
counter++;
}
}
else
{
#pragma unroll
for( n = 0; n < stagecascade->count; n++ )
{
t = node[counter].threshold*variance_norm_factor;
classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
if( node[counter].p0[2] )
classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
if( node[counter].p0[2] )
classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
counter++;
}
}
*/
/*
counter++;
}
}
*/
/*
__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
constant GpuHidHaarClassifierCascade * _cascade,
global GpuHidHaarStageClassifier * stagecascadeptr,
//global GpuHidHaarClassifier * classifierptr,
global GpuHidHaarTreeNode * nodeptr,
global int * sum,
global float * sqsum,
global int * _candidate,
int pixel_step,
int cols,
int rows,
int start_stage,
int end_stage,
//int counts,
int nodenum,
int ystep,
int detect_width,
//int detect_height,
int loopcount,
int outputstep)
//float scalefactor)
constant GpuHidHaarClassifierCascade * _cascade,
global GpuHidHaarStageClassifier * stagecascadeptr,
//global GpuHidHaarClassifier * classifierptr,
global GpuHidHaarTreeNode * nodeptr,
global int * sum,
global float * sqsum,
global int * _candidate,
int pixel_step,
int cols,
int rows,
int start_stage,
int end_stage,
//int counts,
int nodenum,
int ystep,
int detect_width,
//int detect_height,
int loopcount,
int outputstep)
//float scalefactor)
{
unsigned int x1 = get_global_id(0);
unsigned int y1 = get_global_id(1);
int p_offset;
int m, n;
int result;
int counter;
float mean, variance_norm_factor;
for(int i=0;i<loopcount;i++)
{
constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
global int * candidate = _candidate + i*outputstep;
int window_width = cascade->p1 - cascade->p0;
int window_height = window_width;
result = 1;
counter = 0;
unsigned int x = mul24(x1,ystep);
unsigned int y = mul24(y1,ystep);
if((x < cols - window_width - 1) && (y < rows - window_height -1))
{
global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
//global GpuHidHaarClassifier *classifier = classifierptr;
global GpuHidHaarTreeNode *node = nodeptr + nodenum*i;
unsigned int x1 = get_global_id(0);
unsigned int y1 = get_global_id(1);
int p_offset;
int m, n;
int result;
int counter;
float mean, variance_norm_factor;
for(int i=0;i<loopcount;i++)
{
constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
global int * candidate = _candidate + i*outputstep;
int window_width = cascade->p1 - cascade->p0;
int window_height = window_width;
result = 1;
counter = 0;
unsigned int x = mul24(x1,ystep);
unsigned int y = mul24(y1,ystep);
if((x < cols - window_width - 1) && (y < rows - window_height -1))
{
global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
//global GpuHidHaarClassifier *classifier = classifierptr;
global GpuHidHaarTreeNode *node = nodeptr + nodenum*i;
p_offset = mad24(y, pixel_step, x);// modify
p_offset = mad24(y, pixel_step, x);// modify
mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
*(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
*cascade->inv_window_area;
mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
*(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
*cascade->inv_window_area;
variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
*(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
*(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
// if( cascade->is_stump_based )
//{
for( m = start_stage; m < end_stage; m++ )
{
float stage_sum = 0.f;
float t, classsum;
GpuHidHaarTreeNode t1;
// if( cascade->is_stump_based )
//{
for( m = start_stage; m < end_stage; m++ )
{
float stage_sum = 0.f;
float t, classsum;
GpuHidHaarTreeNode t1;
//#pragma unroll
for( n = 0; n < stagecascade->count; n++ )
{
t1 = *(node + counter);
t = t1.threshold * variance_norm_factor;
classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
//#pragma unroll
for( n = 0; n < stagecascade->count; n++ )
{
t1 = *(node + counter);
t = t1.threshold * variance_norm_factor;
classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
if((t1.p0[2]) && (!stagecascade->two_rects))
classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
if((t1.p0[2]) && (!stagecascade->two_rects))
classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
counter++;
}
stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
counter++;
}
if (stage_sum < stagecascade->threshold)
{
result = 0;
break;
}
if (stage_sum < stagecascade->threshold)
{
result = 0;
break;
}
stagecascade++;
stagecascade++;
}
if(result)
{
candidate[4 * (y1 * detect_width + x1)] = x;
candidate[4 * (y1 * detect_width + x1) + 1] = y;
candidate[4 * (y1 * detect_width + x1)+2] = window_width;
candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
}
//}
}
}
}
if(result)
{
candidate[4 * (y1 * detect_width + x1)] = x;
candidate[4 * (y1 * detect_width + x1) + 1] = y;
candidate[4 * (y1 * detect_width + x1)+2] = window_width;
candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
}
//}
}
}
}
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment