Commit 0747f2d8 authored by Anatoly Baksheev's avatar Anatoly Baksheev

1) NPP_staging as sources. Binaries removed.

2) NVidia tests for GPU
3) FD sample that uses NVidia's interface.
parent 811f6fbe
......@@ -36,10 +36,12 @@ file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
source_group("Device" FILES ${lib_device_hdrs})
if (HAVE_CUDA AND MSVC)
file(GLOB ncv_srcs "src/nvidia/*.cpp")
file(GLOB ncv_hdrs "src/nvidia/*.h*")
file(GLOB ncv_cuda "src/nvidia/*.cu")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp")
file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
endif()
if (HAVE_CUDA)
......@@ -74,17 +76,13 @@ if (HAVE_CUDA)
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
endif()
include(FindNPP_staging.cmake)
include_directories(${NPPST_INC})
endif()
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET()
endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs})
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
......@@ -117,8 +115,7 @@ set_target_properties(${the_target} PROPERTIES
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA)
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
target_link_libraries(${the_target} ${NPPST_LIB})
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
endif()
......
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(BIT_SUFF 32)
else()
set(BIT_SUFF 64)
endif()
if (APPLE)
set(PLATFORM_SUFF Darwin)
elseif (UNIX)
set(PLATFORM_SUFF Linux)
else()
set(PLATFORM_SUFF Windows)
endif()
set(LIB_FILE NPP_staging_static_${PLATFORM_SUFF}_${BIT_SUFF}_v1)
find_library(NPPST_LIB
NAMES "${LIB_FILE}" "lib${LIB_FILE}"
PATHS "${CMAKE_SOURCE_DIR}/3rdparty/NPP_staging"
DOC "NPP staging library"
)
SET(NPPST_INC "${CMAKE_SOURCE_DIR}//3rdparty/NPP_staging")
\ No newline at end of file
......@@ -83,25 +83,25 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
}
else if (src.elemSize() == 4)
{
NppStSize32u sz;
NcvSize32u sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<NppSt32u*>(src.ptr<NppSt32u>()), src.step,
dst.ptr<NppSt32u>(), dst.step, sz) );
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), src.step,
dst.ptr<Ncv32u>(), dst.step, sz) );
}
else // if (src.elemSize() == 8)
{
NppStSize32u sz;
NcvSize32u sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<NppSt64u*>(src.ptr<NppSt64u>()), src.step,
dst.ptr<NppSt64u>(), dst.step, sz) );
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), src.step,
dst.ptr<Ncv64u>(), dst.step, sz) );
}
cudaSafeCall( cudaThreadSynchronize() );
......
......@@ -126,7 +126,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
minNeighbors,
scaleStep, 1,
flags,
*gpuAllocator, *cpuAllocator, devProp.major, devProp.minor, 0);
*gpuAllocator, *cpuAllocator, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
......@@ -146,8 +146,8 @@ private:
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice);
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned);
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
......@@ -212,7 +212,7 @@ private:
roi.height = d_src.height();
Ncv32u numDetections;
ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp.major, devProp.minor, 0);
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
......
......@@ -560,16 +560,19 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
sum.create(src.rows + 1, src.cols + 1, CV_32S);
NppStSize32u roiSize;
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
NppSt32u bufSize;
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) );
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize;
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step,
sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) );
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), src.step,
sum.ptr<Ncv32u>(), sum.step, roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
cudaSafeCall( cudaThreadSynchronize() );
}
......@@ -600,19 +603,20 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
{
CV_Assert(src.type() == CV_8U);
NppStSize32u roiSize;
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
NppSt32u bufSize;
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize));
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize;
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
GpuMat buf(1, bufSize, CV_8U);
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(
const_cast<NppSt8u*>(src.ptr<NppSt8u>(0)), src.step,
sqsum.ptr<NppSt64u>(0), sqsum.step, roiSize,
buf.ptr<NppSt8u>(0), bufSize));
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), src.step,
sqsum.ptr<Ncv64u>(0), sqsum.step, roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
cudaSafeCall( cudaThreadSynchronize() );
}
......
......@@ -75,13 +75,13 @@ struct HaarFeature64
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
{
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
((NcvRect8u*)&(this->_ui2.x))->x = rectX;
((NcvRect8u*)&(this->_ui2.x))->y = rectY;
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;
((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
return NCV_SUCCESS;
}
......@@ -306,11 +306,11 @@ struct HaarStage64
};
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);
NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
//==============================================================================
......@@ -347,7 +347,7 @@ enum
NCVPipeObjDet_VisualizeInPlace = 0x004,
};
NCV_EXPORTS
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
NcvSize32u srcRoi,
NCVVector<NcvRect32u> &d_dstRects,
......@@ -367,15 +367,14 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaDeviceProp &devProp,
cudaStream_t cuStream);
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
#define HAAR_STDDEV_BORDER 1
NCV_EXPORTS
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
NCVMatrix<Ncv32f> &d_weights,
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
......@@ -391,11 +390,10 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
Ncv32f scaleArea,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaDeviceProp &devProp,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
NCVMatrix<Ncv32f> &h_weights,
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
......@@ -409,7 +407,7 @@ NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
Ncv32u pixelStep,
Ncv32f scaleArea);
NCV_EXPORTS
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
......@@ -419,7 +417,7 @@ NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
Ncv8u color,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
......@@ -429,7 +427,7 @@ NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
Ncv32u color,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
......@@ -438,7 +436,7 @@ NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
Ncv32u numRects,
Ncv8u color);
NCV_EXPORTS
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
......@@ -450,7 +448,7 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
#define RECT_SIMILARITY_PROPORTION 0.2f
NCV_EXPORTS
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
Ncv32u numPixelMaskDetections,
NCVVector<NcvRect32u> &hypotheses,
......@@ -461,7 +459,7 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
Ncv32f curScale,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
Ncv32u numPixelMaskDetections,
NCVVector<NcvRect32u> &hypotheses,
......@@ -471,18 +469,18 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
Ncv32u rectHeight,
Ncv32f curScale);
NCV_EXPORTS
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
Ncv32u &numHypotheses,
Ncv32u minNeighbors,
Ncv32f intersectEps,
NCVVector<Ncv32u> *hypothesesWeights);
NCV_EXPORTS
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
Ncv32u &numNodes, Ncv32u &numFeatures);
NCV_EXPORTS
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &h_HaarStages,
......@@ -490,6 +488,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
NCVVector<HaarFeature64> &h_HaarFeatures);
NCV_EXPORTS
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
HaarClassifierCascadeDescriptor haar,
NCVVector<HaarStage64> &h_HaarStages,
......
This diff is collapsed.
......@@ -40,15 +40,13 @@
//M*/
#include <precomp.hpp>
#if !defined (HAVE_CUDA)
#else /* !defined (HAVE_CUDA) */
#include <ios>
#include <stdarg.h>
#include "NCV.hpp"
......@@ -94,17 +92,6 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
//==============================================================================
NCVStatus GPUAlignmentValue(Ncv32u &alignment)
{
int curDev;
cudaDeviceProp curProp;
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
return NCV_SUCCESS;
}
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
{
Ncv32u alignMask = alignment-1;
......@@ -216,7 +203,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
}
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr)
:
currentSize(0),
_maxSize(0),
......@@ -229,17 +216,26 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity,
allocBegin = NULL;
switch (memT)
if (reusePtr == NULL)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPageable:
allocBegin = (Ncv8u *)malloc(capacity);
break;
bReusesMemory = false;
switch (memT)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPageable:
allocBegin = (Ncv8u *)malloc(capacity);
break;
}
}
else
{
bReusesMemory = true;
allocBegin = (Ncv8u *)reusePtr;
}
if (capacity == 0)
......@@ -260,18 +256,23 @@ NCVMemStackAllocator::~NCVMemStackAllocator()
if (allocBegin != NULL)
{
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
switch (_memType)
if (!bReusesMemory)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaFree(allocBegin), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
break;
case NCVMemoryTypeHostPageable:
free(allocBegin);
break;
switch (_memType)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaFree(allocBegin), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
break;
case NCVMemoryTypeHostPageable:
free(allocBegin);
break;
}
}
allocBegin = NULL;
}
}
......@@ -356,14 +357,14 @@ size_t NCVMemStackAllocator::maxSize(void) const
//===================================================================
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
:
currentSize(0),
_maxSize(0),
_memType(memT)
_memType(memT),
_alignment(alignment)
{
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );
}
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _ncvruntimetemplates_hpp_
#define _ncvruntimetemplates_hpp_
#include <stdarg.h>
#include <vector>
////////////////////////////////////////////////////////////////////////////////
// The Loki Library
// Copyright (c) 2001 by Andrei Alexandrescu
......@@ -14,13 +62,6 @@
// http://loki-lib.sourceforge.net/index.php?n=Main.License
////////////////////////////////////////////////////////////////////////////////
#ifndef _ncvruntimetemplates_hpp_
#define _ncvruntimetemplates_hpp_
#include <stdarg.h>
#include <vector>
namespace Loki
{
//==============================================================================
......
......@@ -69,9 +69,9 @@
#include "cufft.h"
#include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "npp_staging.h"
#include "nvidia/NCV.hpp"
#include "nvidia/core/NCV.hpp"
#include "nvidia/NPP_staging/npp_staging.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp"
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
......
......@@ -1378,7 +1378,7 @@ cmpEpsFlt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff, size_t
{
_Tp a = src1[i], b = src2[i];
if( a < 0 ) a ^= C; if( b < 0 ) b ^= C;
_Tp d = std::abs(a - b);
_Tp d = std::abs(double(a - b));
if( d > imaxdiff )
{
idx = i + startidx;
......
......@@ -3,8 +3,8 @@
#
# ----------------------------------------------------------------------------
add_subdirectory(c)
add_subdirectory(cpp)
#add_subdirectory(c)
#add_subdirectory(cpp)
add_subdirectory(gpu)
if(0)
......
......@@ -14,10 +14,12 @@ if (BUILD_EXAMPLES)
"${CMAKE_SOURCE_DIR}/modules/legacy/include"
"${CMAKE_SOURCE_DIR}/modules/contrib/include"
"${CMAKE_SOURCE_DIR}/modules/gpu/include"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
)
if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core)
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
......
......@@ -3,12 +3,15 @@
# ----------------------------------------------------------------------------
project(opencv_test_gpu)
set(the_target "opencv_test_gpu")
file(GLOB test_srcs "src/*.cpp")
source_group("Src" FILES ${test_srcs})
file(GLOB test_hdrs "src/*.h*")
source_group("Src" FILES ${test_srcs})
source_group("Include" FILES ${test_hdrs})
set(the_target "opencv_test_gpu")
include_directories (
"${CMAKE_SOURCE_DIR}/include/opencv"
......@@ -26,11 +29,21 @@ include_directories (
"${CMAKE_SOURCE_DIR}/modules/ml/include"
"${CMAKE_CURRENT_SOURCE_DIR}/src"
"${CMAKE_CURRENT_BINARY_DIR}"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
)
include_directories(../cxts)
add_executable(${the_target} ${test_srcs} ${test_hdrs})
if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "src/nvidia/*.*")
SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
source_group("Src\\nvidia" FILES ${nvidia})
endif()
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp})
# Additional target properties
set_target_properties(${the_target} PROPERTIES
......
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _ncvautotestlister_hpp_
#define _ncvautotestlister_hpp_
#include <vector>
#include "NCVTest.hpp"
class NCVAutoTestLister
{
public:
NCVAutoTestLister(std::string testSuiteName, NcvBool bStopOnFirstFail=false, NcvBool bCompactOutput=true)
:
testSuiteName(testSuiteName),
bStopOnFirstFail(bStopOnFirstFail),
bCompactOutput(bCompactOutput)
{
}
void add(INCVTest *test)
{
this->tests.push_back(test);
}
void invoke()
{
Ncv32u nPassed = 0;
Ncv32u nFailed = 0;
Ncv32u nFailedMem = 0;
if (bCompactOutput)
{
printf("Test suite '%s' with %d tests\n",
testSuiteName.c_str(),
(int)(this->tests.size()));
}
for (Ncv32u i=0; i<this->tests.size(); i++)
{
INCVTest &curTest = *tests[i];
NCVTestReport curReport;
bool res = curTest.executeTest(curReport);
if (!bCompactOutput)
{
printf("Test %3i %16s; Consumed mem GPU = %8d, CPU = %8d; %s\n",
i,
curTest.getName().c_str(),
curReport.statsNums["MemGPU"],
curReport.statsNums["MemCPU"],
curReport.statsText["rcode"].c_str());
}
if (res)
{
nPassed++;
if (bCompactOutput)
{
printf(".");
}
}
else
{
if (!curReport.statsText["rcode"].compare("FAILED"))
{
nFailed++;
if (bCompactOutput)
{
printf("x");
}
if (bStopOnFirstFail)
{
break;
}
}
else
{
nFailedMem++;
if (bCompactOutput)
{
printf("m");
}
}
}
fflush(stdout);
}
if (bCompactOutput)
{
printf("\n");
}
printf("Test suite '%s' complete: %d total, %d passed, %d memory errors, %d failed\n\n",
testSuiteName.c_str(),
(int)(this->tests.size()),
nPassed,
nFailedMem,
nFailed);
}
~NCVAutoTestLister()
{
for (Ncv32u i=0; i<this->tests.size(); i++)
{
delete tests[i];
}
}
private:
NcvBool bStopOnFirstFail;
NcvBool bCompactOutput;
std::string testSuiteName;
std::vector<INCVTest *> tests;
};
#endif // _ncvautotestlister_hpp_
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _ncvtest_hpp_
#define _ncvtest_hpp_
#pragma warning( disable : 4201 4408 4127 4100)
#include <string>
#include <vector>
#include <map>
#include <memory>
#include <algorithm>
#include <fstream>
#include <cuda_runtime.h>
#include "NPP_staging.hpp"
struct NCVTestReport
{
std::map<std::string, Ncv32u> statsNums;
std::map<std::string, std::string> statsText;
};
class INCVTest
{
public:
virtual bool executeTest(NCVTestReport &report) = 0;
virtual std::string getName() const = 0;
};
class NCVTestProvider : public INCVTest
{
public:
NCVTestProvider(std::string testName)
:
testName(testName)
{
int devId;
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&this->devProp, devId), "Error returned from cudaGetDeviceProperties", );
}
virtual bool init() = 0;
virtual bool process() = 0;
virtual bool deinit() = 0;
virtual bool toString(std::ofstream &strOut) = 0;
virtual std::string getName() const
{
return this->testName;
}
virtual ~NCVTestProvider()
{
deinitMemory();
}
virtual bool executeTest(NCVTestReport &report)
{
bool res;
report.statsText["rcode"] = "FAILED";
res = initMemory(report);
if (!res)
{
dumpToFile(report);
deinitMemory();
return false;
}
res = init();
if (!res)
{
dumpToFile(report);
deinit();
deinitMemory();
return false;
}
res = process();
if (!res)
{
dumpToFile(report);
deinit();
deinitMemory();
return false;
}
res = deinit();
if (!res)
{
dumpToFile(report);
deinitMemory();
return false;
}
deinitMemory();
report.statsText["rcode"] = "Passed";
return true;
}
protected:
cudaDeviceProp devProp;
std::auto_ptr<INCVMemAllocator> allocatorGPU;
std::auto_ptr<INCVMemAllocator> allocatorCPU;
private:
std::string testName;
bool initMemory(NCVTestReport &report)
{
this->allocatorGPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));
this->allocatorCPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));
if (!this->allocatorGPU.get()->isInitialized() ||
!this->allocatorCPU.get()->isInitialized())
{
report.statsText["rcode"] = "Memory FAILED";
return false;
}
if (!this->process())
{
report.statsText["rcode"] = "Memory FAILED";
return false;
}
Ncv32u maxGPUsize = (Ncv32u)this->allocatorGPU.get()->maxSize();
Ncv32u maxCPUsize = (Ncv32u)this->allocatorCPU.get()->maxSize();
report.statsNums["MemGPU"] = maxGPUsize;
report.statsNums["MemCPU"] = maxCPUsize;
this->allocatorGPU.reset(new NCVMemStackAllocator(NCVMemoryTypeDevice, maxGPUsize, devProp.textureAlignment));
this->allocatorCPU.reset(new NCVMemStackAllocator(NCVMemoryTypeHostPinned, maxCPUsize, devProp.textureAlignment));
if (!this->allocatorGPU.get()->isInitialized() ||
!this->allocatorCPU.get()->isInitialized())
{
report.statsText["rcode"] = "Memory FAILED";
return false;
}
return true;
}
void deinitMemory()
{
this->allocatorGPU.reset();
this->allocatorCPU.reset();
}
void dumpToFile(NCVTestReport &report)
{
bool bReasonMem = (0 == report.statsText["rcode"].compare("Memory FAILED"));
std::string fname = "TestDump_";
fname += (bReasonMem ? "m_" : "") + this->testName + ".log";
std::ofstream stream(fname.c_str(), std::ios::trunc | std::ios::out);
if (!stream.is_open()) return;
stream << "NCV Test Failure Log: " << this->testName << std::endl;
stream << "====================================================" << std::endl << std::endl;
stream << "Test initialization report: " << std::endl;
for (std::map<std::string,std::string>::iterator it=report.statsText.begin();
it != report.statsText.end(); it++)
{
stream << it->first << "=" << it->second << std::endl;
}
for (std::map<std::string,Ncv32u>::iterator it=report.statsNums.begin();
it != report.statsNums.end(); it++)
{
stream << it->first << "=" << it->second << std::endl;
}
stream << std::endl;
stream << "Test initialization parameters: " << std::endl;
bool bSerializeRes = false;
try
{
bSerializeRes = this->toString(stream);
}
catch (...)
{
}
if (!bSerializeRes)
{
stream << "Couldn't retrieve object dump" << std::endl;
}
stream.flush();
}
};
#endif // _ncvtest_hpp_
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _ncvtestsourceprovider_hpp_
#define _ncvtestsourceprovider_hpp_
#include <memory>
#include "NCV.hpp"
#include <opencv2/highgui/highgui.hpp>
template <class T>
class NCVTestSourceProvider
{
public:
NCVTestSourceProvider(Ncv32u seed, T rangeLow, T rangeHigh, Ncv32u maxWidth, Ncv32u maxHeight)
:
bInit(false)
{
ncvAssertPrintReturn(rangeLow < rangeHigh, "NCVTestSourceProvider ctor:: Invalid range", );
int devId;
cudaDeviceProp devProp;
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );
//Ncv32u maxWpitch = alignUp(maxWidth * sizeof(T), devProp.textureAlignment);
allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));
data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), maxWidth, maxHeight));
ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );
this->dataWidth = maxWidth;
this->dataHeight = maxHeight;
srand(seed);
for (Ncv32u i=0; i<maxHeight; i++)
{
for (Ncv32u j=0; j<data.get()->stride(); j++)
{
data.get()->ptr()[i * data.get()->stride() + j] =
(T)(((1.0 * rand()) / RAND_MAX) * (rangeHigh - rangeLow) + rangeLow);
}
}
this->bInit = true;
}
NCVTestSourceProvider(std::string pgmFilename)
:
bInit(false)
{
ncvAssertPrintReturn(sizeof(T) == 1, "NCVTestSourceProvider ctor:: PGM constructor complies only with 8bit types", );
cv::Mat image = cv::imread(pgmFilename);
ncvAssertPrintReturn(!image.empty(), "NCVTestSourceProvider ctor:: PGM file error", );
int devId;
cudaDeviceProp devProp;
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );
allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));
data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), image.cols, image.rows));
ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );
this->dataWidth = image.cols;
this->dataHeight = image.rows;
cv::Mat hdr(image.size(), CV_8UC1, data.get()->ptr(), data.get()->pitch());
image.copyTo(hdr);
this->bInit = true;
}
NcvBool fill(NCVMatrix<T> &dst)
{
ncvAssertReturn(this->isInit() &&
dst.memType() == allocatorCPU.get()->memType(), false);
if (dst.width() == 0 || dst.height() == 0)
{
return true;
}
for (Ncv32u i=0; i<dst.height(); i++)
{
Ncv32u srcLine = i % this->dataHeight;
Ncv32u srcFullChunks = dst.width() / this->dataWidth;
for (Ncv32u j=0; j<srcFullChunks; j++)
{
memcpy(dst.ptr() + i * dst.stride() + j * this->dataWidth,
this->data.get()->ptr() + this->data.get()->stride() * srcLine,
this->dataWidth * sizeof(T));
}
Ncv32u srcLastChunk = dst.width() % this->dataWidth;
memcpy(dst.ptr() + i * dst.stride() + srcFullChunks * this->dataWidth,
this->data.get()->ptr() + this->data.get()->stride() * srcLine,
srcLastChunk * sizeof(T));
}
return true;
}
NcvBool fill(NCVVector<T> &dst)
{
ncvAssertReturn(this->isInit() &&
dst.memType() == allocatorCPU.get()->memType(), false);
if (dst.length() == 0)
{
return true;
}
Ncv32u srcLen = this->dataWidth * this->dataHeight;
Ncv32u srcFullChunks = (Ncv32u)dst.length() / srcLen;
for (Ncv32u j=0; j<srcFullChunks; j++)
{
memcpy(dst.ptr() + j * srcLen, this->data.get()->ptr(), srcLen * sizeof(T));
}
Ncv32u srcLastChunk = dst.length() % srcLen;
memcpy(dst.ptr() + srcFullChunks * srcLen, this->data.get()->ptr(), srcLastChunk * sizeof(T));
return true;
}
~NCVTestSourceProvider()
{
data.reset();
allocatorCPU.reset();
}
private:
NcvBool isInit(void)
{
return this->bInit;
}
NcvBool bInit;
std::auto_ptr< INCVMemAllocator > allocatorCPU;
std::auto_ptr< NCVMatrixAlloc<T> > data;
Ncv32u dataWidth;
Ncv32u dataHeight;
};
#endif // _ncvtestsourceprovider_hpp_
This diff is collapsed.
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testhypothesescompact_h_
#define _testhypothesescompact_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestCompact : public NCVTestProvider
{
public:
TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestCompact(const TestCompact&);
TestCompact& operator=(const TestCompact&);
NCVTestSourceProvider<Ncv32u> &src;
Ncv32u length;
Ncv32u badElem;
Ncv32u badElemPercentage;
};
#endif // _testhypothesescompact_h_
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment