Commit fbfccffb authored by Valentina Kustikova's avatar Valentina Kustikova

Integration object detection using Latent SVM. Sample was added.

parent a22f74c3
define_opencv_module(objdetect opencv_core opencv_imgproc)
define_opencv_module(objdetect opencv_core opencv_imgproc opencv_highgui)
......@@ -139,6 +139,129 @@ CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascad
CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
CvPoint pt, int start_stage CV_DEFAULT(0));
/****************************************************************************************\
* Latent SVM Object Detection functions *
\****************************************************************************************/
// DataType: STRUCT position
// Structure describes the position of the filter in the feature pyramid
// l - level in the feature pyramid
// (x, y) - coordinate in level l
typedef struct
{
unsigned int x;
unsigned int y;
unsigned int l;
} position;
// DataType: STRUCT filterObject
// Description of the filter, which corresponds to the part of the object
// V - ideal (penalty = 0) position of the partial filter
// from the root filter position (V_i in the paper)
// penaltyFunction - vector describes penalty function (d_i in the paper)
// pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
// FILTER DESCRIPTION
// Rectangular map (sizeX x sizeY),
// every cell stores feature vector (dimension = p)
// H - matrix of feature vectors
// to set and get feature vectors (i,j)
// used formula H[(j * sizeX + i) * p + k], where
// k - component of feature vector in cell (i, j)
// END OF FILTER DESCRIPTION
// xp - auxillary parameter for internal use
// size of row in feature vectors
// (yp = (int) (p / xp); p = xp * yp)
typedef struct{
position V;
float fineFunction[4];
unsigned int sizeX;
unsigned int sizeY;
unsigned int p;
unsigned int xp;
float *H;
} filterObject;
// data type: STRUCT CvLatentSvmDetector
// structure contains internal representation of trained Latent SVM detector
// num_filters - total number of filters (root plus part) in model
// num_components - number of components in model
// num_part_filters - array containing number of part filters for each component
// filters - root and part filters for all model components
// b - biases for all model components
// score_threshold - confidence level threshold
typedef struct CvLatentSvmDetector
{
int num_filters;
int num_components;
int* num_part_filters;
filterObject** filters;
float* b;
float score_threshold;
}
CvLatentSvmDetector;
// data type: STRUCT CvObjectDetection
// structure contains the bounding box and confidence level for detected object
// rect - bounding box for a detected object
// score - confidence level
typedef struct CvObjectDetection
{
CvRect rect;
float score;
} CvObjectDetection;
//////////////// Object Detection using Latent SVM //////////////
/*
// load trained detector from a file
//
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// INPUT
// filename - path to the file containing the parameters of
- trained Latent SVM detector
// OUTPUT
// trained Latent SVM detector in internal representation
*/
CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);
/*
// release memory allocated for CvLatentSvmDetector structure
//
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// INPUT
// detector - CvLatentSvmDetector structure to be released
// OUTPUT
*/
CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
/*
// find rectangular regions in the given image that are likely
// to contain objects and corresponding confidence levels
//
// API
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
// CvLatentSvmDetector* detector,
// CvMemStorage* storage,
// float overlap_threshold = 0.5f);
// INPUT
// image - image to detect objects in
// detector - Latent SVM detector in internal representation
// storage - memory storage to store the resultant sequence
// of the object candidate rectangles
// overlap_threshold - threshold for the non-maximum suppression algorithm
= 0.5f [here will be the reference to original paper]
// OUTPUT
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
*/
CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
CvLatentSvmDetector* detector,
CvMemStorage* storage,
float overlap_threshold CV_DEFAULT(0.5f));
#ifdef __cplusplus
}
......
#ifndef DIST_TRANSFORM
#define DIST_TRANSFORM
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
/*
// Computation the point of intersection functions
// (parabolas on the variable y)
// a(y - q1) + b(q1 - y)(q1 - y) + f[q1]
// a(y - q2) + b(q2 - y)(q2 - y) + f[q2]
//
// API
// int GetPointOfIntersection(const F_type *f,
const F_type a, const F_type b,
int q1, int q2, F_type *point);
// INPUT
// f - function on the regular grid
// a - coefficient of the function
// b - coefficient of the function
// q1 - parameter of the function
// q2 - parameter of the function
// OUTPUT
// point - point of intersection
// RESULT
// Error status
*/
int GetPointOfIntersection(const float *f,
const float a, const float b,
int q1, int q2, float *point);
/*
// Decision of one dimensional problem generalized distance transform
// on the regular grid at all points
// min (a(y' - y) + b(y' - y)(y' - y) + f(y')) (on y')
//
// API
// int DistanceTransformOneDimensionalProblem(const F_type *f, const int n,
const F_type a, const F_type b,
F_type *distanceTransform,
int *points);
// INPUT
// f - function on the regular grid
// n - grid dimension
// a - coefficient of optimizable function
// b - coefficient of optimizable function
// OUTPUT
// distanceTransform - values of generalized distance transform
// points - arguments that corresponds to the optimal value of function
// RESULT
// Error status
*/
int DistanceTransformOneDimensionalProblem(const float *f, const int n,
const float a, const float b,
float *distanceTransform,
int *points);
/*
// Computation next cycle element
//
// API
// int GetNextCycleElement(int k, int n, int q);
// INPUT
// k - index of the previous cycle element
// n - number of matrix rows
// q - parameter that equal (number_of_rows * number_of_columns - 1)
// OUTPUT
// None
// RESULT
// Next cycle element
*/
int GetNextCycleElement(int k, int n, int q);
/*
// Transposition of cycle elements
//
// API
// void TransposeCycleElements(F_type *a, int *cycle, int cycle_len);
// INPUT
// a - initial matrix
// cycle - cycle
// cycle_len - cycle length
// OUTPUT
// a - matrix with transposed elements
// RESULT
// None
*/
void TransposeCycleElements(float *a, int *cycle, int cycle_len);
/*
// Getting transposed matrix
//
// API
// void Transpose(F_type *a, int n, int m);
// INPUT
// a - initial matrix
// n - number of rows
// m - number of columns
// OUTPUT
// a - transposed matrix
// RESULT
// Error status
*/
void Transpose(float *a, int n, int m);
/*
// Decision of two dimensional problem generalized distance transform
// on the regular grid at all points
// min{d2(y' - y) + d4(y' - y)(y' - y) +
min(d1(x' - x) + d3(x' - x)(x' - x) + f(x',y'))} (on x', y')
//
// API
// int DistanceTransformTwoDimensionalProblem(const F_type *f,
const int n, const int m,
const F_type coeff[4],
F_type *distanceTransform,
int *pointsX, int *pointsY);
// INPUT
// f - function on the regular grid
// n - number of rows
// m - number of columns
// coeff - coefficients of optimizable function
coeff[0] = d1, coeff[1] = d2,
coeff[2] = d3, coeff[3] = d4
// OUTPUT
// distanceTransform - values of generalized distance transform
// pointsX - arguments x' that correspond to the optimal value
// pointsY - arguments y' that correspond to the optimal value
// RESULT
// Error status
*/
int DistanceTransformTwoDimensionalProblem(const float *f,
const int n, const int m,
const float coeff[4],
float *distanceTransform,
int *pointsX, int *pointsY);
#endif
\ No newline at end of file
#ifndef SVM_ERROR
#define SVM_ERROR
#define LATENT_SVM_OK 0
#define DISTANCE_TRANSFORM_OK 1
#define DISTANCE_TRANSFORM_GET_INTERSECTION_ERROR -1
#define DISTANCE_TRANSFORM_ERROR -2
#define DISTANCE_TRANSFORM_EQUAL_POINTS -3
#define LATENT_SVM_GET_FEATURE_PYRAMID_FAILED -4
#define LATENT_SVM_SEARCH_OBJECT_FAILED -5
#define LATENT_SVM_FAILED_SUPERPOSITION -6
#define FILTER_OUT_OF_BOUNDARIES -7
#define FFT_OK 2
#define FFT_ERROR -8
#endif
\ No newline at end of file
#ifndef _FFT_H
#define _FFT_H
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
#include <math.h>
/*
// 1-dimensional FFT
//
// API
// int fft(float *x_in, float *x_out, int n, int shift);
// INPUT
// x_in - input signal
// n - number of elements for searching Fourier image
// shift - shift between input elements
// OUTPUT
// x_out - output signal (contains 2n elements in order
Re(x_in[0]), Im(x_in[0]), Re(x_in[1]), Im(x_in[1]) and etc.)
// RESULT
// Error status
*/
int fft(float *x_in, float *x_out, int n, int shift);
/*
// Inverse 1-dimensional FFT
//
// API
// int fftInverse(float *x_in, float *x_out, int n, int shift);
// INPUT
// x_in - Fourier image of 1d input signal(contains 2n elements
in order Re(x_in[0]), Im(x_in[0]),
Re(x_in[1]), Im(x_in[1]) and etc.)
// n - number of elements for searching counter FFT image
// shift - shift between input elements
// OUTPUT
// x_in - input signal (contains n elements)
// RESULT
// Error status
*/
int fftInverse(float *x_in, float *x_out, int n, int shift);
/*
// 2-dimensional FFT
//
// API
// int fft2d(float *x_in, float *x_out, int numRows, int numColls);
// INPUT
// x_in - input signal (matrix, launched by rows)
// numRows - number of rows
// numColls - number of collumns
// OUTPUT
// x_out - output signal (contains (2 * numRows * numColls) elements
in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// RESULT
// Error status
*/
int fft2d(float *x_in, float *x_out, int numRows, int numColls);
/*
// Inverse 2-dimensional FFT
//
// API
// int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls);
// INPUT
// x_in - Fourier image of matrix (contains (2 * numRows * numColls)
elements in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// numRows - number of rows
// numColls - number of collumns
// OUTPUT
// x_out - initial signal (matrix, launched by rows)
// RESULT
// Error status
*/
int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls);
#endif
\ No newline at end of file
This diff is collapsed.
#ifndef LSVM_PARSER
#define LSVM_PARSER
#include "precomp.hpp"
#include "_types.h"
#define MODEL 1
#define P 2
#define COMP 3
#define SCORE 4
#define RFILTER 100
#define PFILTERs 101
#define PFILTER 200
#define SIZEX 150
#define SIZEY 151
#define WEIGHTS 152
#define TAGV 300
#define Vx 350
#define Vy 351
#define TAGD 400
#define Dx 451
#define Dy 452
#define Dxx 453
#define Dyy 454
#define BTAG 500
#define STEP_END 1000
#define EMODEL (STEP_END + MODEL)
#define EP (STEP_END + P)
#define ECOMP (STEP_END + COMP)
#define ESCORE (STEP_END + SCORE)
#define ERFILTER (STEP_END + RFILTER)
#define EPFILTERs (STEP_END + PFILTERs)
#define EPFILTER (STEP_END + PFILTER)
#define ESIZEX (STEP_END + SIZEX)
#define ESIZEY (STEP_END + SIZEY)
#define EWEIGHTS (STEP_END + WEIGHTS)
#define ETAGV (STEP_END + TAGV)
#define EVx (STEP_END + Vx)
#define EVy (STEP_END + Vy)
#define ETAGD (STEP_END + TAGD)
#define EDx (STEP_END + Dx)
#define EDy (STEP_END + Dy)
#define EDxx (STEP_END + Dxx)
#define EDyy (STEP_END + Dyy)
#define EBTAG (STEP_END + BTAG)
//extern "C" {
void LSVMparser(const char * filename, filterObject *** model, int *last, int *max, int **comp, float **b, int *count, float * score);
#ifdef __cplusplus
extern "C"
#endif
int loadModel(
//
const char *modelPath,// -
//
filterObject ***filters,// -
int *kFilters, //-
int *kComponents, //-
int **kPartFilters, //- ,
float **b, //-
float *scoreThreshold); //- score)
//};
#endif
\ No newline at end of file
This diff is collapsed.
#ifndef RESIZEIMG
#define RESIZEIMG
#include "precomp.hpp"
#include "_types.h"
IplImage * resize_opencv (IplImage * img, float scale);
IplImage * resize_article_dp1(IplImage * img, float scale, const int k);
IplImage * resize_article_dp(IplImage * img, float scale, const int k);
#endif
\ No newline at end of file
#ifndef _ROUTINE_H
#define _ROUTINE_H
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
//////////////////////////////////////////////////////////////
// Memory management routines
// All paramaters names correspond to previous data structures description
// All "alloc" functions return allocated memory for 1 object
// with all fields including arrays
// Error status is return value
//////////////////////////////////////////////////////////////
int allocFilterObject(filterObject **obj, const int sizeX, const int sizeY,
const int p, const int xp);
int freeFilterObject (filterObject **obj);
int allocFeatureMapObject(featureMap **obj, const int sizeX, const int sizeY,
const int p, const int xp);
int freeFeatureMapObject (featureMap **obj);
#ifdef __cplusplus
extern "C"
#endif
int allocFeaturePyramidObject(featurePyramid **obj,
const int lambda, const int countLevel);
#ifdef __cplusplus
extern "C"
#endif
int freeFeaturePyramidObject (featurePyramid **obj);
int allocFFTImage(fftImage **image, int p, int dimX, int dimY);
int freeFFTImage(fftImage **image);
#endif
\ No newline at end of file
#ifndef SVM_TYPE
#define SVM_TYPE
//#include "opencv2/core/core.hpp"
//#include "opencv2/highgui/highgui.hpp"
#include "precomp.hpp"
//#define FFT_CONV
// PI
#define PI 3.1415926535897932384626433832795
//
#define EPS 0.000001
//
#define F_MAX 3.402823466e+38
#define F_MIN -3.402823465e+38
// The number of elements in bin
// The number of sectors in gradient histogram building
#define CNTPARTION 9
// The number of levels in image resize procedure
// We need Lambda levels to resize image twice
#define LAMBDA 10
// Block size. Used in feature pyramid building procedure
#define SIDE_LENGTH 8
//////////////////////////////////////////////////////////////
// main data structures //
//////////////////////////////////////////////////////////////
// DataType: STRUCT featureMap
// FEATURE MAP DESCRIPTION
// Rectangular map (sizeX x sizeY),
// every cell stores feature vector (dimension = p)
// H - matrix of feature vectors
// to set and get feature vectors (i,j)
// used formula Map[(j * sizeX + i) * p + k], where
// k - component of feature vector in cell (i, j)
// END OF FEATURE MAP DESCRIPTION
// xp - auxillary parameter for internal use
// size of row in feature vectors
// (yp = (int) (p / xp); p = xp * yp)
typedef struct{
int sizeX;
int sizeY;
int p;
int xp;
float *Map;
} featureMap;
// DataType: STRUCT featurePyramid
//
// countLevel - number of levels in the feature pyramid
// lambda - resize scale coefficient
// pyramid - array of pointers to feature map at different levels
typedef struct{
int countLevel;
int lambda;
featureMap **pyramid;
} featurePyramid;
// DataType: STRUCT filterDisposition
// The structure stores preliminary results in optimization process
// with objective function D
//
// x - array with X coordinates of optimization problems solutions
// y - array with Y coordinates of optimization problems solutions
// score - array with optimal objective values
typedef struct{
float *score;
int *x;
int *y;
} filterDisposition;
// DataType: STRUCT fftImage
// The structure stores FFT image
//
// p - number of channels
// x - array of FFT images for 2d signals
// n - number of rows
// m - number of collums
typedef struct{
unsigned int p;
unsigned int dimX;
unsigned int dimY;
float **channels;
} fftImage;
#endif
This diff is collapsed.
This diff is collapsed.
#include "_fft.h"
int getEntireRes(int number, int divisor, int *entire, int *res)
{
*entire = number / divisor;
*res = number % divisor;
return FFT_OK;
}
int getMultipliers(int n, int *n1, int *n2)
{
int multiplier, i;
if (n == 1)
{
*n1 = 1;
*n2 = 1;
return FFT_ERROR; // n = 1
}
multiplier = n / 2;
for (i = multiplier; i >= 2; i--)
{
if (n % i == 0)
{
*n1 = i;
*n2 = n / i;
return FFT_OK; // n = n1 * n2
}
}
*n1 = 1;
*n2 = n;
return FFT_ERROR; // n - prime number
}
/*
// 1-dimensional FFT
//
// API
// int fft(float *x_in, float *x_out, int n, int shift);
// INPUT
// x_in - input signal
// n - number of elements for searching Fourier image
// shift - shift between input elements
// OUTPUT
// x_out - output signal (contains 2n elements in order
Re(x_in[0]), Im(x_in[0]), Re(x_in[1]), Im(x_in[1]) and etc.)
// RESULT
// Error status
*/
int fft(float *x_in, float *x_out, int n, int shift)
{
int n1, n2, res, k1, k2, m1, m2, index, idx;
float alpha, beta, gamma, angle, cosAngle, sinAngle;
float tmpGamma, tmpAlpha, tmpBeta;
float tmpRe, tmpIm, phaseRe, phaseIm;
res = getMultipliers(n, &n1, &n2);
if (res == FFT_OK)
{
fft(x_in, x_out, n1, shift);
fft(x_in, x_out, n2, shift);
}
alpha = (float)(2.0 * PI / ((float)n));
beta = (float)(2.0 * PI / ((float)n1));
gamma = (float)(2.0 * PI / ((float)n2));
for (k1 = 0; k1 < n1; k1++)
{
tmpBeta = beta * k1;
for (k2 = 0; k2 < n2; k2++)
{
idx = shift * (n2 * k1 + k2);
x_out[idx] = 0.0;
x_out[idx + 1] = 0.0;
tmpGamma = gamma * k2;
tmpAlpha = alpha * k2;
for (m1 = 0; m1 < n1; m1++)
{
tmpRe = 0.0;
tmpIm = 0.0;
for (m2 = 0; m2 < n2; m2++)
{
angle = tmpGamma * m2;
index = shift * (n1 * m2 + m1);
cosAngle = cosf(angle);
sinAngle = sinf(angle);
tmpRe += x_in[index] * cosAngle + x_in[index + 1] * sinAngle;
tmpIm += x_in[index + 1] * cosAngle - x_in[index] * sinAngle;
}
angle = tmpAlpha * m1;
cosAngle = cosf(angle);
sinAngle = sinf(angle);
phaseRe = cosAngle * tmpRe + sinAngle * tmpIm;
phaseIm = cosAngle * tmpIm - sinAngle * tmpRe;
angle = tmpBeta * m1;
cosAngle = cosf(angle);
sinAngle = sinf(angle);
x_out[idx] += (cosAngle * phaseRe + sinAngle * phaseIm);
x_out[idx + 1] += (cosAngle * phaseIm - sinAngle * phaseRe);
}
}
}
return FFT_OK;
}
/*
// Inverse 1-dimensional FFT
//
// API
// int fftInverse(float *x_in, float *x_out, int n, int shift);
// INPUT
// x_in - Fourier image of 1d input signal(contains 2n elements
in order Re(x_in[0]), Im(x_in[0]),
Re(x_in[1]), Im(x_in[1]) and etc.)
// n - number of elements for searching counter FFT image
// shift - shift between input elements
// OUTPUT
// x_in - input signal (contains n elements)
// RESULT
// Error status
*/
int fftInverse(float *x_in, float *x_out, int n, int shift)
{
int n1, n2, res, k1, k2, m1, m2, index, idx;
float alpha, beta, gamma, angle, cosAngle, sinAngle;
float tmpRe, tmpIm, phaseRe, phaseIm;
res = getMultipliers(n, &n1, &n2);
if (res == FFT_OK)
{
fftInverse(x_in, x_out, n1, shift);
fftInverse(x_in, x_out, n2, shift);
}
alpha = (float)(2.0f * PI / ((float)n));
beta = (float)(2.0f * PI / ((float)n1));
gamma = (float)(2.0f * PI / ((float)n2));
for (m1 = 0; m1 < n1; m1++)
{
for (m2 = 0; m2 < n2; m2++)
{
idx = (n1 * m2 + m1) * shift;
x_out[idx] = 0.0;
x_out[idx + 1] = 0.0;
for (k2 = 0; k2 < n2; k2++)
{
tmpRe = 0.0;
tmpIm = 0.0;
for (k1 = 0; k1 < n1; k1++)
{
angle = beta * k1 * m1;
index = shift *(n2 * k1 + k2);
sinAngle = sinf(angle);
cosAngle = cosf(angle);
tmpRe += x_in[index] * cosAngle - x_in[index + 1] * sinAngle;
tmpIm += x_in[index] * sinAngle + x_in[index + 1] * cosAngle;
}
angle = alpha * m1 * k2;
sinAngle = sinf(angle);
cosAngle = cosf(angle);
phaseRe = cosAngle * tmpRe - sinAngle * tmpIm;
phaseIm = cosAngle * tmpIm + sinAngle * tmpRe;
angle = gamma * k2 * m2;
sinAngle = sinf(angle);
cosAngle = cosf(angle);
x_out[idx] += cosAngle * phaseRe - sinAngle * phaseIm;
x_out[idx + 1] += cosAngle * phaseIm + sinAngle * phaseRe;
}
x_out[idx] /= n;
x_out[idx + 1] /= n;
}
}
return FFT_OK;
}
/*
// 2-dimensional FFT
//
// API
// int fft2d(float *x_in, float *x_out, int numRows, int numColls);
// INPUT
// x_in - input signal (matrix, launched by rows)
// numRows - number of rows
// numColls - number of collumns
// OUTPUT
// x_out - output signal (contains (2 * numRows * numColls) elements
in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// RESULT
// Error status
*/
int fft2d(float *x_in, float *x_out, int numRows, int numColls)
{
int i, size;
float *x_outTmp;
size = numRows * numColls;
x_outTmp = (float *)malloc(sizeof(float) * (2 * size));
for (i = 0; i < numRows; i++)
{
fft(x_in + i * 2 * numColls,
x_outTmp + i * 2 * numColls,
numColls, 2);
}
for (i = 0; i < numColls; i++)
{
fft(x_outTmp + 2 * i,
x_out + 2 * i,
numRows, 2 * numColls);
}
free(x_outTmp);
return FFT_OK;
}
/*
// Inverse 2-dimensional FFT
//
// API
// int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls);
// INPUT
// x_in - Fourier image of matrix (contains (2 * numRows * numColls)
elements in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// numRows - number of rows
// numColls - number of collumns
// OUTPUT
// x_out - initial signal (matrix, launched by rows)
// RESULT
// Error status
*/
int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls)
{
int i, size;
float *x_outTmp;
size = numRows * numColls;
x_outTmp = (float *)malloc(sizeof(float) * (2 * size));
for (i = 0; i < numRows; i++)
{
fftInverse(x_in + i * 2 * numColls,
x_outTmp + i * 2 * numColls,
numColls, 2);
}
for (i = 0; i < numColls; i++)
{
fftInverse(x_outTmp + 2 * i,
x_out + 2 * i,
numRows, 2 * numColls);
}
free(x_outTmp);
return FFT_OK;
}
This diff is collapsed.
#include "precomp.hpp"
#include "_lsvmparser.h"
#include "_matching.h"
/*
// load trained detector from a file
//
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// INPUT
// filename - path to the file containing the parameters of
- trained Latent SVM detector
// OUTPUT
// trained Latent SVM detector in internal representation
*/
CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename)
{
CvLatentSvmDetector* detector = 0;
filterObject** filters = 0;
int kFilters = 0;
int kComponents = 0;
int* kPartFilters = 0;
float* b = 0;
float scoreThreshold = 0.f;
loadModel(filename, &filters, &kFilters, &kComponents, &kPartFilters, &b, &scoreThreshold);
detector = (CvLatentSvmDetector*)malloc(sizeof(CvLatentSvmDetector));
detector->filters = filters;
detector->b = b;
detector->num_components = kComponents;
detector->num_filters = kFilters;
detector->num_part_filters = kPartFilters;
detector->score_threshold = scoreThreshold;
return detector;
}
/*
// release memory allocated for CvLatentSvmDetector structure
//
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// INPUT
// detector - CvLatentSvmDetector structure to be released
// OUTPUT
*/
void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector)
{
free((*detector)->b);
free((*detector)->num_part_filters);
for (int i = 0; i < (*detector)->num_filters; i++)
{
free((*detector)->filters[i]->H);
free((*detector)->filters[i]);
}
free((*detector)->filters);
free((*detector));
*detector = 0;
}
/*
// find rectangular regions in the given image that are likely
// to contain objects and corresponding confidence levels
//
// API
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
// CvLatentSvmDetector* detector,
// CvMemStorage* storage,
// float overlap_threshold = 0.5f);
// INPUT
// image - image to detect objects in
// detector - Latent SVM detector in internal representation
// storage - memory storage to store the resultant sequence
// of the object candidate rectangles
// overlap_threshold - threshold for the non-maximum suppression algorithm [here will be the reference to original paper]
// OUTPUT
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
*/
CvSeq* cvLatentSvmDetectObjects(IplImage* image,
CvLatentSvmDetector* detector,
CvMemStorage* storage,
float overlap_threshold)
{
featurePyramid *H = 0;
CvPoint *points = 0, *oppPoints = 0;
int kPoints = 0;
float *score = 0;
unsigned int maxXBorder = 0, maxYBorder = 0;
int numBoxesOut = 0;
CvPoint *pointsOut = 0;
CvPoint *oppPointsOut = 0;
float *scoreOut = 0;
CvSeq* result_seq = 0;
cvConvertImage(image, image, CV_CVTIMG_SWAP_RB);
// Getting maximum filter dimensions
getMaxFilterDims((const filterObject**)(detector->filters), detector->num_components, detector->num_part_filters, &maxXBorder, &maxYBorder);
// Create feature pyramid with nullable border
H = createFeaturePyramidWithBorder(image, maxXBorder, maxYBorder);
// Search object
searchObjectThresholdSomeComponents(H, (const filterObject**)(detector->filters), detector->num_components,
detector->num_part_filters, detector->b, detector->score_threshold,
&points, &oppPoints, &score, &kPoints);
// Clipping boxes
clippingBoxes(image->width, image->height, points, kPoints);
clippingBoxes(image->width, image->height, oppPoints, kPoints);
// NMS procedure
nonMaximumSuppression(kPoints, points, oppPoints, score, overlap_threshold,
&numBoxesOut, &pointsOut, &oppPointsOut, &scoreOut);
result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvObjectDetection), storage );
for (int i = 0; i < numBoxesOut; i++)
{
CvObjectDetection detection = {{0, 0, 0, 0}, 0};
detection.score = scoreOut[i];
CvRect bounding_box = {0, 0, 0, 0};
bounding_box.x = pointsOut[i].x;
bounding_box.y = pointsOut[i].y;
bounding_box.width = oppPointsOut[i].x - pointsOut[i].x;
bounding_box.height = oppPointsOut[i].y - pointsOut[i].y;
detection.rect = bounding_box;
cvSeqPush(result_seq, &detection);
}
cvConvertImage(image, image, CV_CVTIMG_SWAP_RB);
freeFeaturePyramidObject(&H);
free(points);
free(oppPoints);
free(score);
return result_seq;
}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -54,6 +54,8 @@
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/core_c.h"
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/core/internal.hpp"
#endif
#include "_resizeimg.h"
#include <stdio.h>
#include <assert.h>
#include <math.h>
IplImage * resize_opencv (IplImage * img, float scale){
IplImage * imgTmp;
int W, H, tW, tH;
W = img->width;
H = img->height;
tW = (int)(((float)W) * scale + 0.5);
tH = (int)(((float)H) * scale + 0.5);
imgTmp = cvCreateImage(cvSize(tW , tH), img->depth, img->nChannels);
cvResize(
img,
imgTmp,
CV_INTER_AREA
);
return imgTmp;
}
//
///*
// * Fast image subsampling.
// * This is used to construct the feature pyramid.
// */
//
//// struct used for caching interpolation values
//typedef struct {
// int si, di;
// float alpha;
//}alphainfo;
//
//// copy src into dst using pre-computed interpolation values
//void alphacopy(float *src, float *dst, alphainfo *ofs, int n) {
// int i;
// for(i = 0; i < n; i++){
// dst[ofs[i].di] += ofs[i].alpha * src[ofs[i].si];
// }
//}
//
//int round(float val){
// return (int)(val + 0.5);
//}
//void bzero(float * arr, int cnt){
// int i;
// for(i = 0; i < cnt; i++){
// arr[i] = 0.0f;
// }
//}
//// resize along each column
//// result is transposed, so we can apply it twice for a complete resize
//void resize1dtran(float *src, int sheight, float *dst, int dheight,
// int width, int chan) {
// alphainfo *ofs;
// float scale = (float)dheight/(float)sheight;
// float invscale = (float)sheight/(float)dheight;
//
// // we cache the interpolation values since they can be
// // shared among different columns
// int len = (int)ceilf(dheight*invscale) + 2*dheight;
// int k = 0;
// int dy;
// float fsy1;
// float fsy2;
// int sy1;
// int sy2;
// int sy;
// int c, x;
// float *s, *d;
//
// ofs = (alphainfo *) malloc (sizeof(alphainfo) * len);
// for (dy = 0; dy < dheight; dy++) {
// fsy1 = dy * invscale;
// fsy2 = fsy1 + invscale;
// sy1 = (int)ceilf(fsy1);
// sy2 = (int)floorf(fsy2);
//
// if (sy1 - fsy1 > 1e-3) {
// assert(k < len);
// assert(sy1 - 1 >= 0);
// ofs[k].di = dy*width;
// ofs[k].si = sy1-1;
// ofs[k++].alpha = (sy1 - fsy1) * scale;
// }
//
// for (sy = sy1; sy < sy2; sy++) {
// assert(k < len);
// assert(sy < sheight);
// ofs[k].di = dy*width;
// ofs[k].si = sy;
// ofs[k++].alpha = scale;
// }
//
// if (fsy2 - sy2 > 1e-3) {
// assert(k < len);
// assert(sy2 < sheight);
// ofs[k].di = dy*width;
// ofs[k].si = sy2;
// ofs[k++].alpha = (fsy2 - sy2) * scale;
// }
// }
//
// // resize each column of each color channel
// bzero(dst, chan*width*dheight);
// for (c = 0; c < chan; c++) {
// for (x = 0; x < width; x++) {
// s = src + c*width*sheight + x*sheight;
// d = dst + c*width*dheight + x;
// alphacopy(s, d, ofs, k);
// }
// }
// free(ofs);
//}
//
//IplImage * resize_article_dp(IplImage * img, float scale, const int k){
// IplImage * imgTmp;
// float W, H;
// unsigned char *dataSrc;
// float * dataf;
// float *src, *dst, *tmp;
// int i, j, kk, channels;
// int index;
// int widthStep;
// int tW, tH;
//
// W = (float)img->width;
// H = (float)img->height;
// channels = img->nChannels;
// widthStep = img->widthStep;
//
// tW = (int)(((float)W) * scale + 0.5f);
// tH = (int)(((float)H) * scale + 0.5f);
//
// src = (float *)malloc(sizeof(float) * (int)(W * H * 3));
//
// dataSrc = (unsigned char*)(img->imageData);
// index = 0;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < W; i++)
// {
// for (j = 0; j < H; j++)
// {
// src[index++] = (float)dataSrc[j * widthStep + i * channels + kk];
// }
// }
// }
//
// imgTmp = cvCreateImage(cvSize(tW , tH), IPL_DEPTH_32F, channels);
//
// dst = (float *)malloc(sizeof(float) * (int)(tH * tW) * channels);
// tmp = (float *)malloc(sizeof(float) * (int)(tH * W) * channels);
//
// resize1dtran(src, (int)H, tmp, (int)tH, (int)W , 3);
//
// resize1dtran(tmp, (int)W, dst, (int)tW, (int)tH, 3);
//
// index = 0;
// //dataf = (float*)imgTmp->imageData;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < tW; i++)
// {
// for (j = 0; j < tH; j++)
// {
// dataf = (float*)(imgTmp->imageData + j * imgTmp->widthStep);
// dataf[ i * channels + kk] = dst[index++];
// }
// }
// }
//
// free(src);
// free(dst);
// free(tmp);
// return imgTmp;
//}
//
//IplImage * resize_article_dp1(IplImage * img, float scale, const int k){
// IplImage * imgTmp;
// float W, H;
// float * dataf;
// float *src, *dst, *tmp;
// int i, j, kk, channels;
// int index;
// int widthStep;
// int tW, tH;
//
// W = (float)img->width;
// H = (float)img->height;
// channels = img->nChannels;
// widthStep = img->widthStep;
//
// tW = (int)(((float)W) * scale + 0.5f);
// tH = (int)(((float)H) * scale + 0.5f);
//
// src = (float *)malloc(sizeof(float) * (int)(W * H) * 3);
//
// index = 0;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < W; i++)
// {
// for (j = 0; j < H; j++)
// {
// src[index++] = (float)(*( (float *)(img->imageData + j * widthStep) + i * channels + kk));
// }
// }
// }
//
// imgTmp = cvCreateImage(cvSize(tW , tH), IPL_DEPTH_32F, channels);
//
// dst = (float *)malloc(sizeof(float) * (int)(tH * tW) * channels);
// tmp = (float *)malloc(sizeof(float) * (int)(tH * W) * channels);
//
// resize1dtran(src, (int)H, tmp, (int)tH, (int)W , 3);
//
// resize1dtran(tmp, (int)W, dst, (int)tW, (int)tH, 3);
//
// index = 0;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < tW; i++)
// {
// for (j = 0; j < tH; j++)
// {
// dataf = (float *)(imgTmp->imageData + j * imgTmp->widthStep);
// dataf[ i * channels + kk] = dst[index++];
// }
// }
// }
//
// free(src);
// free(dst);
// free(tmp);
// return imgTmp;
//}
\ No newline at end of file
#include "_routine.h"
int allocFilterObject(filterObject **obj, const int sizeX, const int sizeY, const int p, const int xp){
int i;
(*obj) = (filterObject *)malloc(sizeof(filterObject));
(*obj)->sizeX = sizeX;
(*obj)->sizeY = sizeY;
(*obj)->p = p ;
(*obj)->xp = xp ;
(*obj)->fineFunction[0] = 0.0f;
(*obj)->fineFunction[1] = 0.0f;
(*obj)->fineFunction[2] = 0.0f;
(*obj)->fineFunction[3] = 0.0f;
(*obj)->V.x = 0;
(*obj)->V.y = 0;
(*obj)->V.l = 0;
(*obj)->H = (float *) malloc(sizeof (float) * (sizeX * sizeY * p));
for(i = 0; i < sizeX * sizeY * p; i++){
(*obj)->H[i] = 0.0f;
}
return LATENT_SVM_OK;
}
int freeFilterObject (filterObject **obj){
if(*obj == NULL) return 0;
free((*obj)->H);
free(*obj);
(*obj) = NULL;
return LATENT_SVM_OK;
}
int allocFeatureMapObject(featureMap **obj, const int sizeX, const int sizeY, const int p, const int xp){
int i;
(*obj) = (featureMap *)malloc(sizeof(featureMap));
(*obj)->sizeX = sizeX;
(*obj)->sizeY = sizeY;
(*obj)->p = p ;
(*obj)->xp = xp ;
(*obj)->Map = (float *) malloc(sizeof (float) * (sizeX * sizeY * p));
for(i = 0; i < sizeX * sizeY * p; i++){
(*obj)->Map[i] = 0.0;
}
return LATENT_SVM_OK;
}
int freeFeatureMapObject (featureMap **obj){
if(*obj == NULL) return 0;
free((*obj)->Map);
free(*obj);
(*obj) = NULL;
return LATENT_SVM_OK;
}
int allocFeaturePyramidObject(featurePyramid **obj, const int lambda, const int countLevel){
(*obj) = (featurePyramid *)malloc(sizeof(featurePyramid));
(*obj)->countLevel = countLevel;
(*obj)->pyramid = (featureMap **)malloc(sizeof(featureMap *) * countLevel);
(*obj)->lambda = lambda;
return LATENT_SVM_OK;
}
int freeFeaturePyramidObject (featurePyramid **obj){
int i;
if(*obj == NULL) return 0;
for(i = 0; i < (*obj)->countLevel; i++)
freeFeatureMapObject(&((*obj)->pyramid[i]));
free((*obj)->pyramid);
free(*obj);
(*obj) = NULL;
return LATENT_SVM_OK;
}
int allocFFTImage(fftImage **image, int p, int dimX, int dimY)
{
int i, j, size;
*image = (fftImage *)malloc(sizeof(fftImage));
(*image)->p = p;
(*image)->dimX = dimX;
(*image)->dimY = dimY;
(*image)->channels = (float **)malloc(sizeof(float *) * p);
size = 2 * dimX * dimY;
for (i = 0; i < p; i++)
{
(*image)->channels[i] = (float *)malloc(sizeof(float) * size);
for (j = 0; j < size; j++)
{
(*image)->channels[i][j] = 0.0;
}
}
return LATENT_SVM_OK;
}
int freeFFTImage(fftImage **image)
{
unsigned int i;
if (*image == NULL) return LATENT_SVM_OK;
for (i = 0; i < (*image)->p; i++)
{
free((*image)->channels[i]);
(*image)->channels[i] = NULL;
}
free((*image)->channels);
(*image)->channels = NULL;
return LATENT_SVM_OK;
}
\ No newline at end of file
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/highgui/highgui_c.h"
#include <stdio.h>
using namespace cv;
const char* model_filename = "cat.xml";
const char* image_filename = "000028.jpg";
void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector)
{
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq* detections = 0;
int i = 0;
int64 start = 0, finish = 0;
start = cvGetTickCount();
detections = cvLatentSvmDetectObjects(image, detector, storage);
finish = cvGetTickCount();
printf("detection time = %.3f\n", (float)(finish - start) / (float)(cvGetTickFrequency() * 1000000.0));
for( i = 0; i < detections->total; i++ )
{
CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, i );
CvRect bounding_box = detection.rect;
cvRectangle( image, cvPoint(bounding_box.x, bounding_box.y),
cvPoint(bounding_box.x + bounding_box.width,
bounding_box.y + bounding_box.height),
CV_RGB(255,0,0), 3 );
}
cvReleaseMemStorage( &storage );
}
int main(int argc, char* argv[])
{
IplImage* image = cvLoadImage(image_filename);
CvLatentSvmDetector* detector = cvLoadLatentSvmDetector(model_filename);
detect_and_draw_objects( image, detector );
cvNamedWindow( "test", 0 );
cvShowImage( "test", image );
cvWaitKey(0);
cvReleaseLatentSvmDetector( &detector );
cvReleaseImage( &image );
cvDestroyAllWindows();
return 0;
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment