Commit 42ced17c authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed some warnings under win64

parent 767ac9aa
...@@ -82,7 +82,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -82,7 +82,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) ); nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
} }
else if (src.elemSize() == 4) else if (src.elemSize() == 4)
{ {
...@@ -92,8 +93,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -92,8 +93,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), src.step, nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
dst.ptr<Ncv32u>(), dst.step, sz) ); dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) );
} }
else // if (src.elemSize() == 8) else // if (src.elemSize() == 8)
{ {
...@@ -103,8 +104,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -103,8 +104,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), src.step, nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
dst.ptr<Ncv64u>(), dst.step, sz) ); dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) );
} }
if (stream == 0) if (stream == 0)
...@@ -130,14 +131,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& s) ...@@ -130,14 +131,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& s)
if (src.type() == CV_8UC1) if (src.type() == CV_8UC1)
{ {
nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), src.step, nppSafeCall( nppiMirror_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), dst.step, sz, dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
} }
else else
{ {
nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), src.step, nppSafeCall( nppiMirror_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), dst.step, sz, dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
} }
...@@ -187,7 +188,8 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) ...@@ -187,7 +188,8 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
if (src.type() == CV_8UC1) if (src.type() == CV_8UC1)
{ {
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) ); nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
} }
else else
{ {
...@@ -202,7 +204,8 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) ...@@ -202,7 +204,8 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
pValues3[1] = nppLut3[1].ptr<Npp32s>(); pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>(); pValues3[2] = nppLut3[2].ptr<Npp32s>();
} }
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, pValues3, lvls.pLevels3, lvls.nValues3) ); nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) );
} }
if (stream == 0) if (stream == 0)
...@@ -226,7 +229,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -226,7 +229,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& s)
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -249,7 +252,7 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -249,7 +252,7 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& s)
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -274,7 +277,7 @@ namespace ...@@ -274,7 +277,7 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -265,7 +265,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect ...@@ -265,7 +265,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect
if (masks.empty()) if (masks.empty())
{ {
Mat trainCollectionCPU(1, trainDescCollection.size(), CV_8UC(sizeof(DevMem2D))); Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(DevMem2D)));
for (size_t i = 0; i < trainDescCollection.size(); ++i) for (size_t i = 0; i < trainDescCollection.size(); ++i)
{ {
...@@ -280,8 +280,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect ...@@ -280,8 +280,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect
{ {
CV_Assert(masks.size() == trainDescCollection.size()); CV_Assert(masks.size() == trainDescCollection.size());
Mat trainCollectionCPU(1, trainDescCollection.size(), CV_8UC(sizeof(DevMem2D))); Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(DevMem2D)));
Mat maskCollectionCPU(1, trainDescCollection.size(), CV_8UC(sizeof(PtrStep))); Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStep)));
for (size_t i = 0; i < trainDescCollection.size(); ++i) for (size_t i = 0; i < trainDescCollection.size(); ++i)
{ {
......
...@@ -87,7 +87,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl ...@@ -87,7 +87,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
src_seg.begin = src_beg; src_seg.begin = src_beg;
src_seg.size = src.step * src.rows; src_seg.size = src.step * src.rows;
NCVMatrixReuse<Ncv8u> d_src(src_seg, devProp.textureAlignment, src.cols, src.rows, src.step, true); NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
CV_Assert(objects.rows == 1); CV_Assert(objects.rows == 1);
...@@ -141,8 +141,8 @@ private: ...@@ -141,8 +141,8 @@ private:
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment); gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment); cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
...@@ -189,8 +189,8 @@ private: ...@@ -189,8 +189,8 @@ private:
} }
// Calculate memory requirements and create real allocators // Calculate memory requirements and create real allocators
NCVMemStackAllocator gpuCounter(devProp.textureAlignment); NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment));
NCVMemStackAllocator cpuCounter(devProp.textureAlignment); NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment));
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR); ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR); ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);
...@@ -214,8 +214,8 @@ private: ...@@ -214,8 +214,8 @@ private:
ncvAssertReturnNcvStat(ncvStat); ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
gpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment); gpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
cpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment); cpuAllocator = new NCVMemStackAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR); ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
...@@ -372,7 +372,7 @@ NCVStatus loadFromXML(const std::string &filename, ...@@ -372,7 +372,7 @@ NCVStatus loadFromXML(const std::string &filename,
for(int s = 0; s < stagesCound; ++s) // by stages for(int s = 0; s < stagesCound; ++s) // by stages
{ {
HaarStage64 curStage; HaarStage64 curStage;
curStage.setStartClassifierRootNodeOffset(haarClassifierNodes.size()); curStage.setStartClassifierRootNodeOffset(static_cast<Ncv32u>(haarClassifierNodes.size()));
curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold); curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold);
...@@ -452,7 +452,7 @@ NCVStatus loadFromXML(const std::string &filename, ...@@ -452,7 +452,7 @@ NCVStatus loadFromXML(const std::string &filename,
HaarFeatureDescriptor32 tmpFeatureDesc; HaarFeatureDescriptor32 tmpFeatureDesc;
ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf, ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf,
featureId, haarFeatures.size() - featureId); featureId, static_cast<Ncv32u>(haarFeatures.size()) - featureId);
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat); ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
curNode.setFeatureDesc(tmpFeatureDesc); curNode.setFeatureDesc(tmpFeatureDesc);
...@@ -478,13 +478,13 @@ NCVStatus loadFromXML(const std::string &filename, ...@@ -478,13 +478,13 @@ NCVStatus loadFromXML(const std::string &filename,
} }
//fill in cascade stats //fill in cascade stats
haar.NumStages = haarStages.size(); haar.NumStages = static_cast<Ncv32u>(haarStages.size());
haar.NumClassifierRootNodes = haarClassifierNodes.size(); haar.NumClassifierRootNodes = static_cast<Ncv32u>(haarClassifierNodes.size());
haar.NumClassifierTotalNodes = haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size(); haar.NumClassifierTotalNodes = static_cast<Ncv32u>(haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size());
haar.NumFeatures = haarFeatures.size(); haar.NumFeatures = static_cast<Ncv32u>(haarFeatures.size());
//merge root and leaf nodes in one classifiers array //merge root and leaf nodes in one classifiers array
Ncv32u offsetRoot = haarClassifierNodes.size(); Ncv32u offsetRoot = static_cast<Ncv32u>(haarClassifierNodes.size());
for (Ncv32u i=0; i<haarClassifierNodes.size(); i++) for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
{ {
HaarFeatureDescriptor32 featureDesc = haarClassifierNodes[i].getFeatureDesc(); HaarFeatureDescriptor32 featureDesc = haarClassifierNodes[i].getFeatureDesc();
......
...@@ -171,10 +171,10 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -171,10 +171,10 @@ namespace cv { namespace gpu { namespace mathfunc
} }
void bitwiseNotCaller(int rows, int cols, int elem_size1, int cn, void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn,
const PtrStep src, PtrStep dst, cudaStream_t stream) const PtrStep src, PtrStep dst, cudaStream_t stream)
{ {
bitwiseUnOp<UN_OP_NOT>(rows, cols * elem_size1 * cn, src, dst, stream); bitwiseUnOp<UN_OP_NOT>(rows, static_cast<int>(cols * elem_size1 * cn), src, dst, stream);
} }
...@@ -296,10 +296,10 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -296,10 +296,10 @@ namespace cv { namespace gpu { namespace mathfunc
} }
void bitwiseOrCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src1,
const PtrStep src2, PtrStep dst, cudaStream_t stream) const PtrStep src2, PtrStep dst, cudaStream_t stream)
{ {
bitwiseBinOp<BIN_OP_OR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream); bitwiseBinOp<BIN_OP_OR>(rows, static_cast<int>(cols * elem_size1 * cn), src1, src2, dst, stream);
} }
...@@ -315,10 +315,10 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -315,10 +315,10 @@ namespace cv { namespace gpu { namespace mathfunc
template void bitwiseMaskOrCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); template void bitwiseMaskOrCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
void bitwiseAndCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, void bitwiseAndCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src1,
const PtrStep src2, PtrStep dst, cudaStream_t stream) const PtrStep src2, PtrStep dst, cudaStream_t stream)
{ {
bitwiseBinOp<BIN_OP_AND>(rows, cols * elem_size1 * cn, src1, src2, dst, stream); bitwiseBinOp<BIN_OP_AND>(rows, static_cast<int>(cols * elem_size1 * cn), src1, src2, dst, stream);
} }
...@@ -334,10 +334,10 @@ namespace cv { namespace gpu { namespace mathfunc ...@@ -334,10 +334,10 @@ namespace cv { namespace gpu { namespace mathfunc
template void bitwiseMaskAndCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); template void bitwiseMaskAndCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
void bitwiseXorCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src1,
const PtrStep src2, PtrStep dst, cudaStream_t stream) const PtrStep src2, PtrStep dst, cudaStream_t stream)
{ {
bitwiseBinOp<BIN_OP_XOR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream); bitwiseBinOp<BIN_OP_XOR>(rows, static_cast<int>(cols * elem_size1 * cn), src1, src2, dst, stream);
} }
......
...@@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace histograms ...@@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace histograms
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>( histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
DevMem2D_<uint>(src), DevMem2D_<uint>(src),
buf, buf,
src.rows * src.step / sizeof(uint), static_cast<uint>(src.rows * src.step / sizeof(uint)),
src.cols); src.cols);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
......
...@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace imgproc
texture<uchar4, 2> tex_meanshift; texture<uchar4, 2> tex_meanshift;
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out, __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
int out_step, int cols, int rows, size_t out_step, int cols, int rows,
int sp, int sr, int maxIter, float eps) int sp, int sr, int maxIter, float eps)
{ {
int isr2 = sr*sr; int isr2 = sr*sr;
...@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace imgproc
return make_short2((short)x0, (short)y0); return make_short2((short)x0, (short)y0);
} }
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, extern "C" __global__ void meanshift_kernel( unsigned char* out, size_t out_step, int cols, int rows,
int sp, int sr, int maxIter, float eps ) int sp, int sr, int maxIter, float eps )
{ {
int x0 = blockIdx.x * blockDim.x + threadIdx.x; int x0 = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -235,8 +235,8 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -235,8 +235,8 @@ namespace cv { namespace gpu { namespace imgproc
do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps); do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
} }
extern "C" __global__ void meanshiftproc_kernel( unsigned char* outr, int outrstep, extern "C" __global__ void meanshiftproc_kernel( unsigned char* outr, size_t outrstep,
unsigned char* outsp, int outspstep, unsigned char* outsp, size_t outspstep,
int cols, int rows, int cols, int rows,
int sp, int sr, int maxIter, float eps ) int sp, int sr, int maxIter, float eps )
{ {
......
...@@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
template<typename T> template<typename T>
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels) __global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
{ {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
} }
template<typename T> template<typename T>
__global__ void set_to_without_mask(T * mat, int cols, int rows, int step, int channels) __global__ void set_to_without_mask(T * mat, int cols, int rows, size_t step, int channels)
{ {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace matrix_operations { ...@@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
} }
template<typename T> template<typename T>
__global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask) __global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{ {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
......
...@@ -276,7 +276,7 @@ namespace cv { namespace gpu { namespace split_merge { ...@@ -276,7 +276,7 @@ namespace cv { namespace gpu { namespace split_merge {
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
int total_channels, int elem_size, int total_channels, size_t elem_size,
const cudaStream_t& stream) const cudaStream_t& stream)
{ {
static MergeFunction merge_func_tbl[] = static MergeFunction merge_func_tbl[] =
...@@ -286,7 +286,7 @@ namespace cv { namespace gpu { namespace split_merge { ...@@ -286,7 +286,7 @@ namespace cv { namespace gpu { namespace split_merge {
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>, mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
}; };
int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1); size_t merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
MergeFunction merge_func = merge_func_tbl[merge_func_id]; MergeFunction merge_func = merge_func_tbl[merge_func_id];
if (merge_func == 0) if (merge_func == 0)
...@@ -485,7 +485,7 @@ namespace cv { namespace gpu { namespace split_merge { ...@@ -485,7 +485,7 @@ namespace cv { namespace gpu { namespace split_merge {
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
int num_channels, int elem_size1, int num_channels, size_t elem_size1,
const cudaStream_t& stream) const cudaStream_t& stream)
{ {
static SplitFunction split_func_tbl[] = static SplitFunction split_func_tbl[] =
...@@ -495,7 +495,7 @@ namespace cv { namespace gpu { namespace split_merge { ...@@ -495,7 +495,7 @@ namespace cv { namespace gpu { namespace split_merge {
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>, splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
}; };
int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1); size_t split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
SplitFunction split_func = split_func_tbl[split_func_id]; SplitFunction split_func = split_func_tbl[split_func_id];
if (split_func == 0) if (split_func == 0)
......
...@@ -98,16 +98,20 @@ namespace ...@@ -98,16 +98,20 @@ namespace
switch (src1.type()) switch (src1.type())
{ {
case CV_8UC1: case CV_8UC1:
nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, 0) ); nppSafeCall( npp_func_8uc1(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, 0) );
break; break;
case CV_8UC4: case CV_8UC4:
nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz, 0) ); nppSafeCall( npp_func_8uc4(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, 0) );
break; break;
case CV_32SC1: case CV_32SC1:
nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step, src2.ptr<Npp32s>(), src2.step, dst.ptr<Npp32s>(), dst.step, sz) ); nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step),
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );
break; break;
case CV_32FC1: case CV_32FC1:
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
break; break;
default: default:
CV_Assert(!"Unsupported source type"); CV_Assert(!"Unsupported source type");
...@@ -141,7 +145,7 @@ namespace ...@@ -141,7 +145,7 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), (Npp32f)sc[0], dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -163,7 +167,7 @@ namespace ...@@ -163,7 +167,7 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp32fc>(), src.step, nValue, dst.ptr<Npp32fc>(), dst.step, sz) ); nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), nValue, dst.ptr<Npp32fc>(), static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -238,7 +242,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& ...@@ -238,7 +242,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream&
NppStreamHandler h(cudaStream); NppStreamHandler h(cudaStream);
nppSafeCall( nppiMulC_32f_C1R(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( nppiMulC_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), (Npp32f)sc[0], dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (cudaStream == 0) if (cudaStream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -258,7 +262,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s ...@@ -258,7 +262,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
NppStreamHandler h(cudaStream); NppStreamHandler h(cudaStream);
nppSafeCall( nppiDivC_32f_C1R(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( nppiDivC_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), (Npp32f)sc[0], dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (cudaStream == 0) if (cudaStream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -287,16 +291,20 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea ...@@ -287,16 +291,20 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
switch (src1.type()) switch (src1.type())
{ {
case CV_8UC1: case CV_8UC1:
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz) ); nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
break; break;
case CV_8UC4: case CV_8UC4:
nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, dst.ptr<Npp8u>(), dst.step, sz) ); nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
break; break;
case CV_32SC1: case CV_32SC1:
nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step, src2.ptr<Npp32s>(), src2.step, dst.ptr<Npp32s>(), dst.step, sz) ); nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step),
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );
break; break;
case CV_32FC1: case CV_32FC1:
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step, src2.ptr<Npp32f>(), src2.step, dst.ptr<Npp32f>(), dst.step, sz) ); nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
break; break;
default: default:
CV_Assert(!"Unsupported source type"); CV_Assert(!"Unsupported source type");
...@@ -320,7 +328,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea ...@@ -320,7 +328,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiffC_32f_C1R(src1.ptr<Npp32f>(), src1.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)src2[0]) ); nppSafeCall( nppiAbsDiffC_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, (Npp32f)src2[0]) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -358,9 +366,9 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c ...@@ -358,9 +366,9 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), src1.step, nppSafeCall( nppiCompare_8u_C4R(src1.ptr<Npp8u>(), static_cast<int>(src1.step),
src2.ptr<Npp8u>(), src2.step, src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppCmpOp[cmpop]) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -376,9 +384,9 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c ...@@ -376,9 +384,9 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiCompare_32f_C1R(src1.ptr<Npp32f>(), src1.step, nppSafeCall( nppiCompare_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step),
src2.ptr<Npp32f>(), src2.step, src2.ptr<Npp32f>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), dst.step, sz, nppCmpOp[cmpop]) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppCmpOp[cmpop]) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -396,7 +404,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c ...@@ -396,7 +404,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
namespace cv { namespace gpu { namespace mathfunc namespace cv { namespace gpu { namespace mathfunc
{ {
void bitwiseNotCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream); void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream);
template <typename T> template <typename T>
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream); void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream);
...@@ -450,17 +458,17 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St ...@@ -450,17 +458,17 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
namespace cv { namespace gpu { namespace mathfunc namespace cv { namespace gpu { namespace mathfunc
{ {
void bitwiseOrCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream);
template <typename T> template <typename T>
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream);
void bitwiseAndCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); void bitwiseAndCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream);
template <typename T> template <typename T>
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream);
void bitwiseXorCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream);
template <typename T> template <typename T>
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream);
...@@ -732,8 +740,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -732,8 +740,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step, nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step),
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) ); dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -110,7 +110,7 @@ namespace ...@@ -110,7 +110,7 @@ namespace
error_entry( NPP_ODD_ROI_WARNING ) error_entry( NPP_ODD_ROI_WARNING )
}; };
int error_num = sizeof(npp_errors)/sizeof(npp_errors[0]); const size_t error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
struct Searcher struct Searcher
{ {
...@@ -161,7 +161,7 @@ namespace cv ...@@ -161,7 +161,7 @@ namespace cv
{ {
const string getNppErrorString( int err ) const string getNppErrorString( int err )
{ {
int idx = std::find_if(npp_errors, npp_errors + error_num, Searcher(err)) - npp_errors; size_t idx = std::find_if(npp_errors, npp_errors + error_num, Searcher(err)) - npp_errors;
const string& msg = (idx != error_num) ? npp_errors[idx].str : string("Unknown error code"); const string& msg = (idx != error_num) ? npp_errors[idx].str : string("Unknown error code");
std::stringstream interpreter; std::stringstream interpreter;
......
...@@ -253,7 +253,8 @@ namespace ...@@ -253,7 +253,8 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) ); nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -287,7 +288,8 @@ namespace ...@@ -287,7 +288,8 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) ); nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -333,7 +335,8 @@ namespace ...@@ -333,7 +335,8 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) ); nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -401,7 +404,8 @@ namespace ...@@ -401,7 +404,8 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) ); nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -584,7 +588,7 @@ namespace ...@@ -584,7 +588,7 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) ); kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
if (stream == 0) if (stream == 0)
...@@ -666,7 +670,8 @@ namespace ...@@ -666,7 +670,8 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) ); nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -780,7 +785,8 @@ namespace ...@@ -780,7 +785,8 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) ); nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -1040,7 +1046,7 @@ namespace ...@@ -1040,7 +1046,7 @@ namespace
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) ); nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -78,7 +78,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans ...@@ -78,7 +78,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
terminals.step, leftTransp.step, sznpp, labels.ptr<Npp8u>(), labels.step, buf.ptr<Npp8u>()) ); static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -218,7 +218,7 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img) ...@@ -218,7 +218,7 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
// block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F); // block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F);
block_hists = getBuffer(1, block_hist_size * blocks_per_img.area(), CV_32F, block_hists_buf); block_hists = getBuffer(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F, block_hists_buf);
hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols,
grad, qangle, (float)getWinSigma(), block_hists.ptr<float>()); grad, qangle, (float)getWinSigma(), block_hists.ptr<float>());
...@@ -234,11 +234,11 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, ...@@ -234,11 +234,11 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
computeBlockHistograms(img); computeBlockHistograms(img);
const int block_hist_size = getBlockHistogramSize(); const size_t block_hist_size = getBlockHistogramSize();
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F); descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
switch (descr_format) switch (descr_format)
{ {
......
This diff is collapsed.
...@@ -177,7 +177,7 @@ namespace ...@@ -177,7 +177,7 @@ namespace
NppiSize sz; NppiSize sz;
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( func(src.ptr<src_t>(), src.step, dst.ptr<dst_t>(), dst.step, sz) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
...@@ -191,7 +191,7 @@ namespace ...@@ -191,7 +191,7 @@ namespace
NppiSize sz; NppiSize sz;
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( func(src.ptr<Npp32f>(), src.step, dst.ptr<dst_t>(), dst.step, sz, NPP_RND_NEAR) ); nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
...@@ -347,7 +347,7 @@ namespace ...@@ -347,7 +347,7 @@ namespace
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
Scalar_<src_t> nppS = s; Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz) ); nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
...@@ -362,7 +362,7 @@ namespace ...@@ -362,7 +362,7 @@ namespace
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
Scalar_<src_t> nppS = s; Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz) ); nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
...@@ -398,7 +398,7 @@ namespace ...@@ -398,7 +398,7 @@ namespace
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
Scalar_<src_t> nppS = s; Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS.val, src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) ); nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
...@@ -413,7 +413,7 @@ namespace ...@@ -413,7 +413,7 @@ namespace
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
Scalar_<src_t> nppS = s; Scalar_<src_t> nppS = s;
nppSafeCall( func(nppS[0], src.ptr<src_t>(), src.step, sz, mask.ptr<Npp8u>(), mask.step) ); nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
......
...@@ -116,7 +116,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) ...@@ -116,7 +116,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
DeviceBuffer dbuf(2); DeviceBuffer dbuf(2);
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, dbuf, (double*)dbuf + 1) ); nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -177,7 +177,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) ...@@ -177,7 +177,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
DeviceBuffer dbuf; DeviceBuffer dbuf;
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, sz, dbuf) ); nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -409,7 +409,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -409,7 +409,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
double maxVal_; if (!maxVal) maxVal = &maxVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_;
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, src.elemSize(), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
if (mask.empty()) if (mask.empty())
...@@ -510,7 +510,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -510,7 +510,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
int maxLoc_[2]; int maxLoc_[2];
Size valbuf_size, locbuf_size; Size valbuf_size, locbuf_size;
getBufSizeRequired(src.cols, src.rows, src.elemSize(), valbuf_size.width, getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), valbuf_size.width,
valbuf_size.height, locbuf_size.width, locbuf_size.height); valbuf_size.height, locbuf_size.width, locbuf_size.height);
ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
......
...@@ -1096,7 +1096,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag ...@@ -1096,7 +1096,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
NCVVectorReuse<Ncv32u> d_vecPixelMask(d_pixelMask.getSegment(), anchorsRoi.height * d_pixelMask.stride()); NCVVectorReuse<Ncv32u> d_vecPixelMask(d_pixelMask.getSegment(), anchorsRoi.height * d_pixelMask.stride());
ncvAssertReturn(d_vecPixelMask.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); ncvAssertReturn(d_vecPixelMask.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
NCVVectorAlloc<Ncv32u> d_vecPixelMaskTmp(gpuAllocator, d_vecPixelMask.length()); NCVVectorAlloc<Ncv32u> d_vecPixelMaskTmp(gpuAllocator, static_cast<Ncv32u>(d_vecPixelMask.length()));
ncvAssertReturn(d_vecPixelMaskTmp.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); ncvAssertReturn(d_vecPixelMaskTmp.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVVectorAlloc<Ncv32u> hp_pool32u(cpuAllocator, 2); NCVVectorAlloc<Ncv32u> hp_pool32u(cpuAllocator, 2);
...@@ -1120,7 +1120,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag ...@@ -1120,7 +1120,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp; NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp;
Ncv32u szNppCompactTmpBuf; Ncv32u szNppCompactTmpBuf;
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf, devProp); nppsStCompactGetSize_32u(static_cast<Ncv32u>(d_vecPixelMask.length()), &szNppCompactTmpBuf, devProp);
if (bDoAtomicCompaction) if (bDoAtomicCompaction)
{ {
szNppCompactTmpBuf = 0; szNppCompactTmpBuf = 0;
...@@ -1206,7 +1206,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag ...@@ -1206,7 +1206,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
gridInit, blockInit, cuStream, gridInit, blockInit, cuStream,
d_ptrNowData->ptr(), d_ptrNowData->ptr(),
d_ptrNowTmp->ptr(), d_ptrNowTmp->ptr(),
d_vecPixelMask.length(), d_pixelMask.stride(), static_cast<Ncv32u>(d_vecPixelMask.length()), d_pixelMask.stride(),
anchorsRoi, pixelStep); anchorsRoi, pixelStep);
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
...@@ -1221,7 +1221,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag ...@@ -1221,7 +1221,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
else else
{ {
NCVStatus nppSt; NCVStatus nppSt;
nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), d_vecPixelMask.length(), nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), static_cast<Ncv32u>(d_vecPixelMask.length()),
d_ptrNowData->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U, d_ptrNowData->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp); d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
ncvAssertReturn(nppSt == NPPST_SUCCESS, NCV_NPP_ERROR); ncvAssertReturn(nppSt == NPPST_SUCCESS, NCV_NPP_ERROR);
...@@ -1276,7 +1276,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag ...@@ -1276,7 +1276,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
else else
{ {
NCVStatus nppSt; NCVStatus nppSt;
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), d_vecPixelMask.length(), nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), static_cast<Ncv32u>(d_vecPixelMask.length()),
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U, d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp); d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
ncvAssertReturnNcvStat(nppSt); ncvAssertReturnNcvStat(nppSt);
...@@ -1783,7 +1783,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg, ...@@ -1783,7 +1783,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
detectionsOnThisScale, detectionsOnThisScale,
d_hypothesesIntermediate, d_hypothesesIntermediate,
dstNumRects, dstNumRects,
d_hypothesesIntermediate.length(), static_cast<Ncv32u>(d_hypothesesIntermediate.length()),
haar.ClassifierSize.width, haar.ClassifierSize.width,
haar.ClassifierSize.height, haar.ClassifierSize.height,
(Ncv32f)scale, (Ncv32f)scale,
...@@ -1880,7 +1880,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg, ...@@ -1880,7 +1880,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
if (dstNumRects > d_dstRects.length()) if (dstNumRects > d_dstRects.length())
{ {
ncvRetCode = NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW; ncvRetCode = NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW;
dstNumRects = d_dstRects.length(); dstNumRects = static_cast<Ncv32u>(d_dstRects.length());
} }
if (dstNumRects != 0) if (dstNumRects != 0)
......
...@@ -457,7 +457,7 @@ NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cuda ...@@ -457,7 +457,7 @@ NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cuda
ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR); ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);
ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI); ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);
NCVMemStackAllocator gpuCounter(devProp.textureAlignment); NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = ncvIntegralImage_device((Ncv8u*)NULL, roiSize.width, NCVStatus ncvStat = ncvIntegralImage_device((Ncv8u*)NULL, roiSize.width,
...@@ -475,7 +475,7 @@ NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cud ...@@ -475,7 +475,7 @@ NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cud
ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR); ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);
ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI); ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);
NCVMemStackAllocator gpuCounter(devProp.textureAlignment); NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = ncvIntegralImage_device((Ncv32f*)NULL, roiSize.width * sizeof(Ncv32f), NCVStatus ncvStat = ncvIntegralImage_device((Ncv32f*)NULL, roiSize.width * sizeof(Ncv32f),
...@@ -493,7 +493,7 @@ NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, c ...@@ -493,7 +493,7 @@ NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, c
ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR); ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);
ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI); ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);
NCVMemStackAllocator gpuCounter(devProp.textureAlignment); NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = ncvSquaredIntegralImage_device(NULL, roiSize.width, NCVStatus ncvStat = ncvSquaredIntegralImage_device(NULL, roiSize.width,
...@@ -511,7 +511,7 @@ NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep, ...@@ -511,7 +511,7 @@ NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
NcvSize32u roiSize, Ncv8u *pBuffer, NcvSize32u roiSize, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp) Ncv32u bufSize, cudaDeviceProp &devProp)
{ {
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer); NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), pBuffer);
ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = ncvIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator); NCVStatus ncvStat = ncvIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator);
...@@ -526,7 +526,7 @@ NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep, ...@@ -526,7 +526,7 @@ NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
NcvSize32u roiSize, Ncv8u *pBuffer, NcvSize32u roiSize, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp) Ncv32u bufSize, cudaDeviceProp &devProp)
{ {
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer); NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), pBuffer);
ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = ncvIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator); NCVStatus ncvStat = ncvIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator);
...@@ -541,7 +541,7 @@ NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep, ...@@ -541,7 +541,7 @@ NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
NcvSize32u roiSize, Ncv8u *pBuffer, NcvSize32u roiSize, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp) Ncv32u bufSize, cudaDeviceProp &devProp)
{ {
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer); NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), pBuffer);
ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = ncvSquaredIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator); NCVStatus ncvStat = ncvSquaredIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator);
...@@ -1506,7 +1506,7 @@ NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDevicePr ...@@ -1506,7 +1506,7 @@ NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDevicePr
return NPPST_SUCCESS; return NPPST_SUCCESS;
} }
NCVMemStackAllocator gpuCounter(devProp.textureAlignment); NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = compactVector_32u_device(NULL, srcLen, NULL, NULL, 0xC001C0DE, NCVStatus ncvStat = compactVector_32u_device(NULL, srcLen, NULL, NULL, 0xC001C0DE,
...@@ -1535,7 +1535,7 @@ NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen, ...@@ -1535,7 +1535,7 @@ NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
Ncv32u elemRemove, Ncv8u *pBuffer, Ncv32u elemRemove, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp) Ncv32u bufSize, cudaDeviceProp &devProp)
{ {
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer); NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), pBuffer);
ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR); ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);
NCVStatus ncvStat = compactVector_32u_device(d_src, srcLen, d_dst, p_dstLen, elemRemove, NCVStatus ncvStat = compactVector_32u_device(d_src, srcLen, d_dst, p_dstLen, elemRemove,
......
...@@ -355,7 +355,7 @@ NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size) ...@@ -355,7 +355,7 @@ NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)
seg.clear(); seg.clear();
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC); ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
size = alignUp(size, this->_alignment); size = alignUp(static_cast<Ncv32u>(size), this->_alignment);
this->currentSize += size; this->currentSize += size;
this->_maxSize = std::max(this->_maxSize, this->currentSize); this->_maxSize = std::max(this->_maxSize, this->currentSize);
...@@ -464,7 +464,7 @@ NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size) ...@@ -464,7 +464,7 @@ NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)
break; break;
} }
this->currentSize += alignUp(size, this->_alignment); this->currentSize += alignUp(static_cast<Ncv32u>(size), this->_alignment);
this->_maxSize = std::max(this->_maxSize, this->currentSize); this->_maxSize = std::max(this->_maxSize, this->currentSize);
seg.begin.memtype = this->_memType; seg.begin.memtype = this->_memType;
...@@ -480,8 +480,8 @@ NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg) ...@@ -480,8 +480,8 @@ NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC); ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC); ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC); ncvAssertReturn(currentSize >= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
currentSize -= alignUp(seg.size, this->_alignment); currentSize -= alignUp(static_cast<Ncv32u>(seg.size), this->_alignment);
switch (this->_memType) switch (this->_memType)
{ {
......
...@@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace device ...@@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace device
template <typename D> struct BrdColReflect101 : BrdReflect101 template <typename D> struct BrdColReflect101 : BrdReflect101
{ {
BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {} BrdColReflect101(int len, size_t step): BrdReflect101(len), step(step) {}
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
...@@ -104,7 +104,7 @@ namespace cv { namespace gpu { namespace device ...@@ -104,7 +104,7 @@ namespace cv { namespace gpu { namespace device
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step)); return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
} }
int step; size_t step;
}; };
struct BrdReplicate struct BrdReplicate
...@@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace device ...@@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace device
template <typename D> struct BrdColReplicate : BrdReplicate template <typename D> struct BrdColReplicate : BrdReplicate
{ {
BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {} BrdColReplicate(int len, size_t step): BrdReplicate(len), step(step) {}
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
...@@ -164,7 +164,7 @@ namespace cv { namespace gpu { namespace device ...@@ -164,7 +164,7 @@ namespace cv { namespace gpu { namespace device
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step)); return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
} }
int step; size_t step;
}; };
template <typename D> struct BrdRowConstant template <typename D> struct BrdRowConstant
...@@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace device ...@@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace device
template <typename D> struct BrdColConstant template <typename D> struct BrdColConstant
{ {
BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {} BrdColConstant(int len_, size_t step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
{ {
...@@ -210,7 +210,7 @@ namespace cv { namespace gpu { namespace device ...@@ -210,7 +210,7 @@ namespace cv { namespace gpu { namespace device
} }
int len; int len;
int step; size_t step;
D val; D val;
}; };
......
...@@ -56,11 +56,11 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st ...@@ -56,11 +56,11 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st
namespace cv { namespace gpu { namespace split_merge namespace cv { namespace gpu { namespace split_merge
{ {
extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,
int total_channels, int elem_size, int total_channels, size_t elem_size,
const cudaStream_t& stream); const cudaStream_t& stream);
extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,
int num_channels, int elem_size1, int num_channels, size_t elem_size1,
const cudaStream_t& stream); const cudaStream_t& stream);
void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
......
...@@ -167,7 +167,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2] ...@@ -167,7 +167,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
nr_plane_pyr[0] = rthis.nr_plane; nr_plane_pyr[0] = rthis.nr_plane;
const int n = 64; const int n = 64;
step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T); step_pyr[0] = static_cast<int>(alignSize(cols * sizeof(T), n) / sizeof(T));
for (int i = 1; i < levels; i++) for (int i = 1; i < levels; i++)
{ {
cols_pyr[i] = (cols_pyr[i-1] + 1) / 2; cols_pyr[i] = (cols_pyr[i-1] + 1) / 2;
...@@ -175,7 +175,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2] ...@@ -175,7 +175,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2; nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2;
step_pyr[i] = alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T); step_pyr[i] = static_cast<int>(alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T));
} }
Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]); Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]);
...@@ -197,7 +197,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2] ...@@ -197,7 +197,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
data_cost.create(data_cost_size, DataType<T>::type); data_cost.create(data_cost_size, DataType<T>::type);
data_cost_selected.create(msg_size, DataType<T>::type); data_cost_selected.create(msg_size, DataType<T>::type);
step_pyr[0] = data_cost.step / sizeof(T); step_pyr[0] = static_cast<int>(data_cost.step / sizeof(T));
Size temp_size = data_cost_size; Size temp_size = data_cost_size;
if (data_cost_size.width * data_cost_size.height < step_pyr[levels - 1] * rows_pyr[levels - 1] * rthis.ndisp) if (data_cost_size.width * data_cost_size.height < step_pyr[levels - 1] * rows_pyr[levels - 1] * rthis.ndisp)
......
...@@ -260,7 +260,7 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa ...@@ -260,7 +260,7 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa
keypointsGPU.release(); keypointsGPU.release();
else else
{ {
Mat keypointsCPU(SURF_GPU::SF_FEATURE_STRIDE, keypoints.size(), CV_32FC1); Mat keypointsCPU(SURF_GPU::SF_FEATURE_STRIDE, static_cast<int>(keypoints.size()), CV_32FC1);
float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::SF_X); float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::SF_X);
float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::SF_Y); float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::SF_Y);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment