Commit 1a941861 authored by Anatoly Baksheev's avatar Anatoly Baksheev

First version of CascadeClassifier_GPU.

Only for VS2008 now.
Sample for it.
new NPP_staging for VS2008 only
parent 31e582e3
......@@ -188,14 +188,14 @@ struct NppStSize32u
enum NppStStatus
{
//already present in NPP
/* NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
NPP_ERROR = -1, ///< Unknown error
NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error*/
//NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
//NPP_ERROR = -1, ///< Unknown error
//NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
//NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
//NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
//NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
//NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
//NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error
//to be added
NPP_INVALID_ROI, ///< Invalid region of interest argument
......@@ -244,7 +244,7 @@ extern "C" {
/** \defgroup core_npp NPP Core
* Basic functions for CUDA streams management.
* WARNING: These functions couldn't be exported from NPP_staging library, so they can't be used
* WARNING: These functions couldn't be exported into DLL, so they can be used only with static version of NPP_staging
* @{
*/
......@@ -569,6 +569,13 @@ NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,
NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Calculates the size of the temporary buffer for integral image creation
* \see nppiStIntegralGetSize_8u32u
*/
NppStStatus nppiStIntegralGetSize_32f32f(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Creates an integral image representation for the input image
*
......@@ -587,6 +594,15 @@ NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates an integral image representation for the input image
* \see nppiStIntegral_8u32u_C1R
*/
NppStStatus nppiStIntegral_32f32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
NppSt32f *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates an integral image representation for the input image. Host implementation
*
......@@ -602,6 +618,14 @@ NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/**
* Creates an integral image representation for the input image. Host implementation
* \see nppiStIntegral_8u32u_C1R_host
*/
NppStStatus nppiStIntegral_32f32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
NppSt32f *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/**
* Calculates the size of the temporary buffer for squared integral image creation
*
......
......@@ -35,6 +35,13 @@ source_group("Include" FILES ${lib_hdrs})
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
source_group("Device" FILES ${lib_device_hdrs})
if (HAVE_CUDA AND MSVC)
file(GLOB ncv_srcs "src/nvidia/*.cpp")
file(GLOB ncv_hdrs "src/nvidia/*.h*")
file(GLOB ncv_cuda "src/nvidia/*.cu")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
endif()
if (HAVE_CUDA)
get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp})
......@@ -68,19 +75,16 @@ if (HAVE_CUDA)
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
endif()
CUDA_COMPILE(cuda_objs ${lib_cuda})
include(FindNPP_staging.cmake)
include_directories(${NPPST_INC})
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET()
endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${cuda_objs})
IF (HAVE_CUDA)
include(FindNPP_staging.cmake)
include_directories(${NPPST_INC})
target_link_libraries(${the_target} ${NPPST_LIB})
endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
......@@ -114,6 +118,7 @@ target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA)
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
target_link_libraries(${the_target} ${NPPST_LIB})
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
endif()
......
......@@ -1380,87 +1380,39 @@ namespace cv
explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BruteForceMatcher_GPU_base(L2Dist) {}
};
////////////////////////////////// CascadeClassifier //////////////////////////////////////////
////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
// The cascade classifier class for object detection.
class CV_EXPORTS CascadeClassifier
class CV_EXPORTS CascadeClassifier_GPU
{
public:
struct CV_EXPORTS DTreeNode
{
int featureIdx;
float threshold; // for ordered features only
int left;
int right;
};
struct CV_EXPORTS DTree
{
int nodeCount;
};
struct CV_EXPORTS Stage
{
int first;
int ntrees;
float threshold;
};
enum { BOOST = 0 };
enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
CascadeClassifier();
CascadeClassifier(const string& filename);
~CascadeClassifier();
public:
CascadeClassifier_GPU();
CascadeClassifier_GPU(const string& filename);
~CascadeClassifier_GPU();
bool empty() const;
bool load(const string& filename);
bool read(const FileNode& node);
void detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor=1.1,
int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size());
bool setImage( Ptr<FeatureEvaluator>&, const Mat& );
int runAt( Ptr<FeatureEvaluator>&, Point );
bool isStumpBased;
int stageType;
int featureType;
int ncategories;
Size origWinSize;
vector<Stage> stages;
vector<DTree> classifiers;
vector<DTreeNode> nodes;
vector<float> leaves;
vector<int> subsets;
void release();
/* returns number of detected objects */
int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size());
bool findLargestObject;
bool visualizeInPlace;
Ptr<FeatureEvaluator> feval;
Ptr<CvHaarClassifierCascade> oldCascade;
Size getClassifierSize() const;
private:
struct CascadeClassifierImpl;
CascadeClassifierImpl* impl;
};
////////////////////////////////// SURF //////////////////////////////////////////
struct CV_EXPORTS SURFParams_GPU
{
SURFParams_GPU() :
threshold(0.1f),
nOctaves(4),
nIntervals(4),
initialScale(2.f),
l1(3.f/1.5f),
l2(5.f/1.5f),
l3(3.f/1.5f),
l4(1.f/1.5f),
edgeScale(0.81f),
initialStep(1),
extended(true),
featuresRatio(0.01f)
{
}
SURFParams_GPU() : threshold(0.1f), nOctaves(4), nIntervals(4), initialScale(2.f),
l1(3.f/1.5f), l2(5.f/1.5f), l3(3.f/1.5f), l4(1.f/1.5f),
edgeScale(0.81f), initialStep(1), extended(true), featuresRatio(0.01f) {}
//! The interest operator threshold
float threshold;
......
......@@ -170,8 +170,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
if (src.type() == CV_8UC1)
{
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
}
else
{
......@@ -186,8 +185,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>();
}
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz,
pValues3, lvls.pLevels3, lvls.nValues3) );
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) );
}
}
......
This diff is collapsed.
......@@ -62,7 +62,7 @@ namespace cv
BORDER_REPLICATE_GPU,
BORDER_CONSTANT_GPU
};
// Converts CPU border extrapolation mode into GPU internal analogue.
// Returns true if the GPU analogue exists, false otherwise.
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
......@@ -105,8 +105,28 @@ namespace cv
const textureReference* tex;
cudaSafeCall( cudaGetTextureReference(&tex, name) );
cudaSafeCall( cudaUnbindTexture(tex) );
}
}
struct KeyPoint_GPU
{
float x;
float y;
float size;
float response;
float angle;
float octave;
};
enum KeypointLayout
{
SF_X,
SF_Y,
SF_SIZE,
SF_RESPONSE,
SF_ANGLE,
SF_OCTAVE,
SF_FEATURE_STRIDE
};
}
}
......
......@@ -47,29 +47,7 @@ namespace cv
{
namespace gpu
{
namespace surf
{
struct KeyPoint_GPU
{
float x;
float y;
float size;
float response;
float angle;
float octave;
};
enum KeypointLayout
{
SF_X,
SF_Y,
SF_SIZE,
SF_RESPONSE,
SF_ANGLE,
SF_OCTAVE,
SF_FEATURE_STRIDE
};
}
}
}
......
......@@ -82,21 +82,16 @@ void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu()
namespace
{
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep,
NppiSize oSizeROI, int nScaleFactor);
typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst,
npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4,
npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)
{
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);
dst.create( src1.size(), src1.type() );
NppiSize sz;
......@@ -106,24 +101,16 @@ namespace
switch (src1.type())
{
case CV_8UC1:
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, 0) );
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, 0) );
break;
case CV_8UC4:
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz, 0) );
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, 0) );
break;
case CV_32SC1:
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,
src2.ptr<Npp32s>(), src2.step,
dst.ptr<Npp32s>(), dst.step, sz) );
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step, src2.ptr<Npp32s>(), src2.step, dst.ptr<Npp32s>(), dst.step, sz) );
break;
case CV_32FC1:
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,
src2.ptr<Npp32f>(), src2.step,
dst.ptr<Npp32f>(), dst.step, sz) );
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp32f>(), dst.step, sz) );
break;
default:
CV_Assert(!"Unsupported source type");
......@@ -133,16 +120,15 @@ namespace
template<int SCN> struct NppArithmScalarFunc;
template<> struct NppArithmScalarFunc<1>
{
typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, int nDstStep, NppiSize oSizeROI);
};
template<> struct NppArithmScalarFunc<2>
{
typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst,
int nDstStep, NppiSize oSizeROI);
typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst, int nDstStep, NppiSize oSizeROI);
};
template<int SCN, typename NppArithmScalarFunc<SCN>::func_ptr func> struct NppArithmScalar;
template<typename NppArithmScalarFunc<1>::func_ptr func> struct NppArithmScalar<1, func>
{
static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst)
......@@ -254,24 +240,16 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
switch (src1.type())
{
case CV_8UC1:
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz) );
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz) );
break;
case CV_8UC4:
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step,
src2.ptr<Npp8u>(), src2.step,
dst.ptr<Npp8u>(), dst.step, sz) );
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz) );
break;
case CV_32SC1:
nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step,
src2.ptr<Npp32s>(), src2.step,
dst.ptr<Npp32s>(), dst.step, sz) );
nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step, src2.ptr<Npp32s>(), src2.step, dst.ptr<Npp32s>(), dst.step, sz) );
break;
case CV_32FC1:
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step,
src2.ptr<Npp32f>(), src2.step,
dst.ptr<Npp32f>(), dst.step, sz) );
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp32f>(), dst.step, sz) );
break;
default:
CV_Assert(!"Unsupported source type");
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
////////////////////////////////////////////////////////////////////////////////
// The Loki Library
// Copyright (c) 2001 by Andrei Alexandrescu
// This code accompanies the book:
// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
// Patterns Applied". Copyright (c) 2001. Addison-Wesley.
// Permission to use, copy, modify, distribute and sell this software for any
// purpose is hereby granted without fee, provided that the above copyright
// notice appear in all copies and that both that copyright notice and this
// permission notice appear in supporting documentation.
// The author or Addison-Welsey Longman make no representations about the
// suitability of this software for any purpose. It is provided "as is"
// without express or implied warranty.
// http://loki-lib.sourceforge.net/index.php?n=Main.License
////////////////////////////////////////////////////////////////////////////////
#ifndef _ncvruntimetemplates_hpp_
#define _ncvruntimetemplates_hpp_
#include <stdarg.h>
#include <vector>
namespace Loki
{
//==============================================================================
// class NullType
// Used as a placeholder for "no type here"
// Useful as an end marker in typelists
//==============================================================================
class NullType {};
//==============================================================================
// class template Typelist
// The building block of typelists of any length
// Use it through the LOKI_TYPELIST_NN macros
// Defines nested types:
// Head (first element, a non-typelist type by convention)
// Tail (second element, can be another typelist)
//==============================================================================
template <class T, class U>
struct Typelist
{
typedef T Head;
typedef U Tail;
};
//==============================================================================
// class template Int2Type
// Converts each integral constant into a unique type
// Invocation: Int2Type<v> where v is a compile-time constant integral
// Defines 'value', an enum that evaluates to v
//==============================================================================
template <int v>
struct Int2Type
{
enum { value = v };
};
namespace TL
{
//==============================================================================
// class template TypeAt
// Finds the type at a given index in a typelist
// Invocation (TList is a typelist and index is a compile-time integral
// constant):
// TypeAt<TList, index>::Result
// returns the type in position 'index' in TList
// If you pass an out-of-bounds index, the result is a compile-time error
//==============================================================================
template <class TList, unsigned int index> struct TypeAt;
template <class Head, class Tail>
struct TypeAt<Typelist<Head, Tail>, 0>
{
typedef Head Result;
};
template <class Head, class Tail, unsigned int i>
struct TypeAt<Typelist<Head, Tail>, i>
{
typedef typename TypeAt<Tail, i - 1>::Result Result;
};
}
}
////////////////////////////////////////////////////////////////////////////////
// Runtime boolean template instance dispatcher
// Cyril Crassin <cyril.crassin@icare3d.org>
// NVIDIA, 2010
////////////////////////////////////////////////////////////////////////////////
namespace NCVRuntimeTemplateBool
{
//This struct is used to transform a list of parameters into template arguments
//The idea is to build a typelist containing the arguments
//and to pass this typelist to a user defined functor
template<typename TList, int NumArguments, class Func>
struct KernelCaller
{
//Convenience function used by the user
//Takes a variable argument list, transforms it into a list
static void call(Func &functor, int dummy, ...)
{
//Vector used to collect arguments
std::vector<int> templateParamList;
//Variable argument list manipulation
va_list listPointer;
va_start(listPointer, dummy);
//Collect parameters into the list
for(int i=0; i<NumArguments; i++)
{
int val = va_arg(listPointer, int);
templateParamList.push_back(val);
}
va_end(listPointer);
//Call the actual typelist building function
call(functor, templateParamList);
}
//Actual function called recursively to build a typelist based
//on a list of values
static void call( Func &functor, std::vector<int> &templateParamList)
{
//Get current parameter value in the list
int val = templateParamList[templateParamList.size() - 1];
templateParamList.pop_back();
//Select the compile time value to add into the typelist
//depending on the runtime variable and make recursive call.
//Both versions are really instantiated
if(val)
{
KernelCaller<
Loki::Typelist<typename Loki::Int2Type<true>, TList >,
NumArguments-1, Func >
::call(functor, templateParamList);
}
else
{
KernelCaller<
Loki::Typelist<typename Loki::Int2Type<false>, TList >,
NumArguments-1, Func >
::call(functor, templateParamList);
}
}
};
//Specialization for 0 value left in the list
//-> actual kernel functor call
template<class TList, class Func>
struct KernelCaller<TList, 0, Func>
{
static void call(Func &functor)
{
//Call to the functor's kernel call method
functor.call(TList()); //TList instantiated to get the method template parameter resolved
}
static void call(Func &functor, std::vector<int> &templateParams)
{
functor.call(TList());
}
};
}
#endif //_ncvruntimetemplates_hpp_
......@@ -71,6 +71,9 @@
#include "npp_staging.h"
#include "surf_key_point.h"
#include "nvidia/NCV.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp"
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
#define NPP_MINIMUM_REQUIRED_VERSION 3216
......
// WARNING: this sample is under construction! Use it on your own risk.
#include <opencv2/contrib/contrib.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/gpu/gpu.hpp>
#include <iostream>
#include <iomanip>
#include <stdio.h>
using namespace std;
using namespace cv;
using namespace cv::gpu;
void help()
{
cout << "Usage: ./cascadeclassifier <cascade_file> <image_or_video_or_cameraid>\n"
"Using OpenCV version " << CV_VERSION << endl << endl;
}
void DetectAndDraw(Mat& img, CascadeClassifier_GPU& cascade);
String cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml";
String nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
template<class T> void convertAndReseize(const T& src, T& gray, T& resized, double scale = 2.0)
{
if (src.channels() == 3)
cvtColor( src, gray, CV_BGR2GRAY );
else
gray = src;
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
if (scale != 1)
resize(gray, resized, sz);
else
resized = gray;
}
int main( int argc, const char** argv )
{
if (argc != 3)
return help(), -1;
if (cv::gpu::getCudaEnabledDeviceCount() == 0)
return cerr << "No GPU found or the library is compiled without GPU support" << endl, -1;
VideoCapture capture;
string cascadeName = argv[1];
string inputName = argv[2];
cv::gpu::CascadeClassifier_GPU cascade_gpu;
if( !cascade_gpu.load( cascadeName ) )
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
cv::CascadeClassifier cascade_cpu;
if( !cascade_cpu.load( cascadeName ) )
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
Mat image = imread( inputName);
if( image.empty() )
if (!capture.open(inputName))
{
int camid = 0;
sscanf(inputName.c_str(), "%d", &camid);
if(!capture.open(camid))
cout << "Can't open source" << endl;
}
namedWindow( "result", 1 );
Size fontSz = cv::getTextSize("T[]", FONT_HERSHEY_SIMPLEX, 1.0, 2, 0);
Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
vector<Rect> facesBuf_cpu;
GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
/* parameters */
bool useGPU = true;
double scale_factor = 2;
bool visualizeInPlace = false;
bool findLargestObject = false;
printf("\t<space> - toggle GPU/CPU\n");
printf("\tL - toggle lagest faces\n");
printf("\tV - toggle visualisation in-place (for GPU only)\n");
printf("\t1/q - inc/dec scale\n");
int detections_num;
for(;;)
{
if( capture.isOpened() )
{
capture >> frame;
if( frame.empty())
break;
}
(image.empty() ? frame : image).copyTo(frame_cpu);
frame_gpu.upload( image.empty() ? frame : image);
convertAndReseize(frame_gpu, gray_gpu, resized_gpu, scale_factor);
convertAndReseize(frame_cpu, gray_cpu, resized_cpu, scale_factor);
cv::TickMeter tm;
tm.start();
if (useGPU)
{
cascade_gpu.visualizeInPlace = visualizeInPlace;
cascade_gpu.findLargestObject = findLargestObject;
detections_num = cascade_gpu.detectMultiScale( resized_gpu, facesBuf_gpu );
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
}
else /* so use CPU */
{
Size minSize = cascade_gpu.getClassifierSize();
if (findLargestObject)
{
float ratio = (float)std::min(frame.cols / minSize.width, frame.rows / minSize.height);
ratio = std::max(ratio / 2.5f, 1.f);
minSize = Size(cvRound(minSize.width * ratio), cvRound(minSize.height * ratio));
}
cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2, 4, (findLargestObject ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE, minSize);
detections_num = (int)facesBuf_cpu.size();
}
tm.stop();
printf( "detection time = %g ms\n", tm.getTimeMilli() );
if (useGPU)
resized_gpu.download(resized_cpu);
if (!visualizeInPlace || !useGPU)
if (detections_num)
{
Rect* faces = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
for(int i = 0; i < detections_num; ++i)
cv::rectangle(resized_cpu, faces[i], Scalar(255));
}
Point text_pos(5, 25);
int offs = fontSz.height + 5;
Scalar color = CV_RGB(255, 0, 0);
cv::cvtColor(resized_cpu, frameDisp, CV_GRAY2BGR);
char buf[4096];
sprintf(buf, "%s, FPS = %0.3g", useGPU ? "GPU" : "CPU", 1.0/tm.getTimeSec());
putText(frameDisp, buf, text_pos, FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
sprintf(buf, "scale = %0.3g, [%d*scale x %d*scale]", scale_factor, frame.cols, frame.rows);
putText(frameDisp, buf, text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
putText(frameDisp, "Hotkeys: space, 1, Q, L, V, Esc", text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
if (findLargestObject)
putText(frameDisp, "FindLargestObject", text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
if (visualizeInPlace && useGPU)
putText(frameDisp, "VisualizeInPlace", text_pos+Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2);
cv::imshow( "result", frameDisp);
int key = waitKey( 5 );
if( key == 27)
break;
switch (key)
{
case (int)' ': useGPU = !useGPU; printf("Using %s\n", useGPU ? "GPU" : "CPU");break;
case (int)'v': case (int)'V': visualizeInPlace = !visualizeInPlace; printf("VisualizeInPlace = %d\n", visualizeInPlace); break;
case (int)'l': case (int)'L': findLargestObject = !findLargestObject; printf("FindLargestObject = %d\n", findLargestObject); break;
case (int)'1': scale_factor*=1.05; printf("Scale factor = %g\n", scale_factor); break;
case (int)'q': case (int)'Q':scale_factor/=1.05; printf("Scale factor = %g\n", scale_factor); break;
}
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment