Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
da93a1da
Commit
da93a1da
authored
Dec 13, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed build for CARMA platform
parent
889674ef
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
27 changed files
with
145 additions
and
192 deletions
+145
-192
OpenCVDetectCUDA.cmake
cmake/OpenCVDetectCUDA.cmake
+11
-12
CMakeLists.txt
modules/core/CMakeLists.txt
+0
-1
gpumat.cpp
modules/core/src/gpumat.cpp
+1
-25
opengl_interop.cpp
modules/core/src/opengl_interop.cpp
+29
-22
CMakeLists.txt
modules/gpu/app/nv_perf_test/CMakeLists.txt
+1
-1
common.hpp
modules/gpu/include/opencv2/gpu/device/common.hpp
+0
-1
canny.cu
modules/gpu/src/cuda/canny.cu
+19
-31
ccomponetns.cu
modules/gpu/src/cuda/ccomponetns.cu
+2
-2
column_filter.h
modules/gpu/src/cuda/column_filter.h
+0
-0
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+0
-0
gftt.cu
modules/gpu/src/cuda/gftt.cu
+2
-2
global_motion.cu
modules/gpu/src/cuda/global_motion.cu
+6
-8
hist.cu
modules/gpu/src/cuda/hist.cu
+5
-8
hog.cu
modules/gpu/src/cuda/hog.cu
+9
-7
hough.cu
modules/gpu/src/cuda/hough.cu
+2
-0
matrix_reductions.cu
modules/gpu/src/cuda/matrix_reductions.cu
+8
-7
optflowbm.cu
modules/gpu/src/cuda/optflowbm.cu
+1
-4
orb.cu
modules/gpu/src/cuda/orb.cu
+1
-0
pyrlk.cu
modules/gpu/src/cuda/pyrlk.cu
+17
-20
row_filter.h
modules/gpu/src/cuda/row_filter.h
+0
-0
split_merge.cu
modules/gpu/src/cuda/split_merge.cu
+1
-2
tvl1flow.cu
modules/gpu/src/cuda/tvl1flow.cu
+12
-24
imgproc.cpp
modules/gpu/src/imgproc.cpp
+1
-1
TestHaarCascadeApplication.cpp
modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp
+13
-2
driver_api_multi.cpp
samples/gpu/driver_api_multi.cpp
+1
-7
driver_api_stereo_multi.cpp
samples/gpu/driver_api_stereo_multi.cpp
+1
-1
softcascade.cpp
samples/gpu/softcascade.cpp
+2
-4
No files found.
cmake/OpenCVDetectCUDA.cmake
View file @
da93a1da
...
...
@@ -3,12 +3,12 @@ if(${CMAKE_VERSION} VERSION_LESS "2.8.3")
return
()
endif
()
if
(
WIN32 AND NOT MSVC
)
if
(
WIN32 AND NOT MSVC
)
message
(
STATUS
"CUDA compilation is disabled (due to only Visual Studio compiler suppoted on your platform)."
)
return
()
endif
()
if
(
CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
)
if
(
CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
)
message
(
STATUS
"CUDA compilation is disabled (due to Clang unsuppoted on your platform)."
)
return
()
endif
()
...
...
@@ -72,11 +72,11 @@ if(CUDA_FOUND)
# Tell NVCC to add PTX intermediate code for the specified architectures
string
(
REGEX MATCHALL
"[0-9]+"
ARCH_LIST
"
${
ARCH_PTX_NO_POINTS
}
"
)
foreach
(
ARCH IN LISTS ARCH_LIST
)
set
(
NVCC_FLAGS_EXTRA
${
NVCC_FLAGS_EXTRA
}
-gencode arch=compute_
${
ARCH
}
,code=compute_
${
ARCH
}
)
set
(
OPENCV_CUDA_ARCH_PTX
"
${
OPENCV_CUDA_ARCH_PTX
}
${
ARCH
}
"
)
set
(
OPENCV_CUDA_ARCH_FEATURES
"
${
OPENCV_CUDA_ARCH_FEATURES
}
${
ARCH
}
"
)
endforeach
()
foreach
(
ARCH IN LISTS ARCH_LIST
)
set
(
NVCC_FLAGS_EXTRA
${
NVCC_FLAGS_EXTRA
}
-gencode arch=compute_
${
ARCH
}
,code=compute_
${
ARCH
}
)
set
(
OPENCV_CUDA_ARCH_PTX
"
${
OPENCV_CUDA_ARCH_PTX
}
${
ARCH
}
"
)
set
(
OPENCV_CUDA_ARCH_FEATURES
"
${
OPENCV_CUDA_ARCH_FEATURES
}
${
ARCH
}
"
)
endforeach
()
# These vars will be processed in other scripts
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
${
NVCC_FLAGS_EXTRA
}
)
...
...
@@ -84,7 +84,7 @@ if(CUDA_FOUND)
message
(
STATUS
"CUDA NVCC target flags:
${
CUDA_NVCC_FLAGS
}
"
)
OCV_OPTION
(
CUDA_FAST_MATH
"Enable --use_fast_math for CUDA compiler "
OFF
)
OCV_OPTION
(
CUDA_FAST_MATH
"Enable --use_fast_math for CUDA compiler "
OFF
)
if
(
CUDA_FAST_MATH
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
--use_fast_math
)
...
...
@@ -92,7 +92,6 @@ if(CUDA_FOUND)
mark_as_advanced
(
CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR
)
unset
(
CUDA_npp_LIBRARY CACHE
)
find_cuda_helper_libs
(
npp
)
macro
(
ocv_cuda_compile VAR
)
...
...
@@ -106,15 +105,15 @@ if(CUDA_FOUND)
string
(
REPLACE
"-ggdb3"
""
${
var
}
"
${${
var
}}
"
)
endforeach
()
if
(
BUILD_SHARED_LIBS
)
if
(
BUILD_SHARED_LIBS
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
-Xcompiler -DCVAPI_EXPORTS
)
endif
()
if
(
UNIX OR APPLE
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
-Xcompiler -fPIC
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
-Xcompiler -fPIC
)
endif
()
if
(
APPLE
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
-Xcompiler -fno-finite-math-only
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
-Xcompiler -fno-finite-math-only
)
endif
()
# disabled because of multiple warnings during building nvcc auto generated files
...
...
modules/core/CMakeLists.txt
View file @
da93a1da
...
...
@@ -10,7 +10,6 @@ if(HAVE_CUDA)
file
(
GLOB lib_cuda
"src/cuda/*.cu"
)
ocv_cuda_compile
(
cuda_objs
${
lib_cuda
}
)
set
(
cuda_link_libs
${
CUDA_LIBRARIES
}
${
CUDA_npp_LIBRARY
}
)
else
()
set
(
lib_cuda
""
)
...
...
modules/core/src/gpumat.cpp
View file @
da93a1da
...
...
@@ -45,8 +45,7 @@
#include <iostream>
#ifdef HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
#include <npp.h>
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
...
...
@@ -394,18 +393,6 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory)
namespace
{
template
<
class
T
>
void
getCudaAttribute
(
T
*
attribute
,
CUdevice_attribute
device_attribute
,
int
device
)
{
*
attribute
=
T
();
//CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu??
CUresult
error
=
cuDeviceGetAttribute
(
attribute
,
device_attribute
,
device
);
if
(
CUDA_SUCCESS
==
error
)
return
;
printf
(
"Driver API error = %04d
\n
"
,
error
);
cv
::
gpu
::
error
(
"driver API error"
,
__FILE__
,
__LINE__
);
}
int
convertSMVer2Cores
(
int
major
,
int
minor
)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
...
...
@@ -466,17 +453,6 @@ void cv::gpu::printCudaDeviceInfo(int device)
convertSMVer2Cores
(
prop
.
major
,
prop
.
minor
)
*
prop
.
multiProcessorCount
);
printf
(
" GPU Clock Speed: %.2f GHz
\n
"
,
prop
.
clockRate
*
1e-6
f
);
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
int
memoryClock
,
memBusWidth
,
L2CacheSize
;
getCudaAttribute
<
int
>
(
&
memoryClock
,
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE
,
dev
);
getCudaAttribute
<
int
>
(
&
memBusWidth
,
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH
,
dev
);
getCudaAttribute
<
int
>
(
&
L2CacheSize
,
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE
,
dev
);
printf
(
" Memory Clock rate: %.2f Mhz
\n
"
,
memoryClock
*
1e-3
f
);
printf
(
" Memory Bus Width: %d-bit
\n
"
,
memBusWidth
);
if
(
L2CacheSize
)
printf
(
" L2 Cache Size: %d bytes
\n
"
,
L2CacheSize
);
printf
(
" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)
\n
"
,
prop
.
maxTexture1D
,
prop
.
maxTexture2D
[
0
],
prop
.
maxTexture2D
[
1
],
prop
.
maxTexture3D
[
0
],
prop
.
maxTexture3D
[
1
],
prop
.
maxTexture3D
[
2
]);
...
...
modules/core/src/opengl_interop.cpp
View file @
da93a1da
...
...
@@ -44,11 +44,13 @@
#include "opencv2/core/opengl_interop.hpp"
#include "opencv2/core/gpumat.hpp"
#include "gl_core_3_1.hpp"
#ifdef HAVE_OPENGL
#include "gl_core_3_1.hpp"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#endif
#endif
using
namespace
std
;
...
...
@@ -61,24 +63,24 @@ namespace
void
throw_nogl
()
{
CV_Error
(
CV_OpenGlNotSupported
,
"The library is compiled without OpenGL support"
);
}
#else
void
throw_nogl
()
{
CV_Error
(
CV_OpenGlApiCallError
,
"OpenGL context doesn't exist"
);
}
#endif
#ifndef HAVE_CUDA
void
throw_nocuda
()
{
CV_Error
(
CV_GpuNotSupported
,
"The library is compiled without GPU support"
);
}
#else
void
throw_nocuda
()
{
CV_Error
(
CV_StsNotImplemented
,
"The called functionality is disabled for current build or platform"
);
}
#ifndef HAVE_CUDA
void
throw_nocuda
()
{
CV_Error
(
CV_GpuNotSupported
,
"The library is compiled without GPU support"
);
}
#else
void
throw_nocuda
()
{
CV_Error
(
CV_StsNotImplemented
,
"The called functionality is disabled for current build or platform"
);
}
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else
/* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else
/* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
void
___cudaSafeCall
(
cudaError_t
err
,
const
char
*
file
,
const
int
line
,
const
char
*
func
=
""
)
{
if
(
cudaSuccess
!=
err
)
cv
::
gpu
::
error
(
cudaGetErrorString
(
err
),
file
,
line
,
func
);
}
void
___cudaSafeCall
(
cudaError_t
err
,
const
char
*
file
,
const
int
line
,
const
char
*
func
=
""
)
{
if
(
cudaSuccess
!=
err
)
cv
::
gpu
::
error
(
cudaGetErrorString
(
err
),
file
,
line
,
func
);
}
#endif
#endif
}
...
...
@@ -139,11 +141,16 @@ namespace
void
cv
::
gpu
::
setGlDevice
(
int
device
)
{
#if
!defined(HAVE_CUDA) || defined(CUDA_DISABLER)
#if
ndef HAVE_OPENGL
(
void
)
device
;
throw_no
cuda
();
throw_no
gl
();
#else
cudaSafeCall
(
cudaGLSetGLDevice
(
device
)
);
#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
(
void
)
device
;
throw_nocuda
();
#else
cudaSafeCall
(
cudaGLSetGLDevice
(
device
)
);
#endif
#endif
}
...
...
modules/gpu/app/nv_perf_test/CMakeLists.txt
View file @
da93a1da
cmake_minimum_required
(
VERSION 2.8.
6
)
cmake_minimum_required
(
VERSION 2.8.
3
)
project
(
nv_perf_test
)
...
...
modules/gpu/include/opencv2/gpu/device/common.hpp
View file @
da93a1da
...
...
@@ -100,7 +100,6 @@ namespace cv { namespace gpu
typedef
unsigned
char
uchar
;
typedef
unsigned
short
ushort
;
typedef
signed
char
schar
;
typedef
unsigned
int
uint
;
template
<
class
T
>
inline
void
bindTexture
(
const
textureReference
*
tex
,
const
PtrStepSz
<
T
>&
img
)
{
...
...
modules/gpu/src/cuda/canny.cu
View file @
da93a1da
...
...
@@ -52,7 +52,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace
canny
{
struct L1 : binary_function<int, int, float>
{
...
...
@@ -78,17 +78,17 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<
L1> : DefaultTransformFunctorTraits<
L1>
template <> struct TransformFunctorTraits<
canny::L1> : DefaultTransformFunctorTraits<canny::
L1>
{
enum { smart_shift = 4 };
};
template <> struct TransformFunctorTraits<
L2> : DefaultTransformFunctorTraits<
L2>
template <> struct TransformFunctorTraits<
canny::L2> : DefaultTransformFunctorTraits<canny::
L2>
{
enum { smart_shift = 4 };
};
}}}
namespace
namespace
canny
{
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
struct SrcTex
...
...
@@ -104,7 +104,7 @@ namespace
};
template <class Norm> __global__
void calcMagnitude(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
void calcMagnitude
Kernel
(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -120,10 +120,7 @@ namespace
mag(y, x) = norm(dxVal, dyVal);
}
}
namespace canny
{
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
{
const dim3 block(16, 16);
...
...
@@ -135,12 +132,12 @@ namespace canny
if (L2Grad)
{
L2 norm;
::calcMagnitude
<<<grid, block>>>(src, dx, dy, mag, norm);
calcMagnitudeKernel
<<<grid, block>>>(src, dx, dy, mag, norm);
}
else
{
L1 norm;
::calcMagnitude
<<<grid, block>>>(src, dx, dy, mag, norm);
calcMagnitudeKernel
<<<grid, block>>>(src, dx, dy, mag, norm);
}
cudaSafeCall( cudaGetLastError() );
...
...
@@ -165,11 +162,11 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace
canny
{
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void calcMap(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
__global__ void calcMap
Kernel
(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
{
const int CANNY_SHIFT = 15;
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
...
...
@@ -220,10 +217,7 @@ namespace
map(y, x) = edge_type;
}
}
namespace canny
{
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
{
const dim3 block(16, 16);
...
...
@@ -231,7 +225,7 @@ namespace canny
bindTexture(&tex_mag, mag);
::calcMap
<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
calcMapKernel
<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
@@ -240,11 +234,11 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace
canny
{
__device__ int counter = 0;
__global__ void edgesHysteresisLocal(PtrStepSzi map, ushort2* st)
__global__ void edgesHysteresisLocal
Kernel
(PtrStepSzi map, ushort2* st)
{
__shared__ volatile int smem[18][18];
...
...
@@ -325,10 +319,7 @@ namespace
st[ind] = make_ushort2(x, y);
}
}
}
namespace canny
{
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
{
void* counter_ptr;
...
...
@@ -339,7 +330,7 @@ namespace canny
const dim3 block(16, 16);
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
::edgesHysteresisLoca
l<<<grid, block>>>(map, st1);
edgesHysteresisLocalKerne
l<<<grid, block>>>(map, st1);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
@@ -348,12 +339,12 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace
canny
{
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
__global__ void edgesHysteresisGlobal
Kernel
(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
{
const int stack_size = 512;
...
...
@@ -439,14 +430,11 @@ namespace
st2[ind + i] = s_st[i];
}
}
}
namespace canny
{
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, ::counter) );
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr,
canny
::counter) );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
...
...
@@ -458,7 +446,7 @@ namespace canny
const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
::edgesHysteresisGloba
l<<<grid, block>>>(map, st1, st2, count);
edgesHysteresisGlobalKerne
l<<<grid, block>>>(map, st1, st2, count);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
@@ -472,7 +460,7 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace
canny
{
struct GetEdges : unary_function<int, uchar>
{
...
...
@@ -488,7 +476,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<
GetEdges> : DefaultTransformFunctorTraits<
GetEdges>
template <> struct TransformFunctorTraits<
canny::GetEdges> : DefaultTransformFunctorTraits<canny::
GetEdges>
{
enum { smart_shift = 4 };
};
...
...
modules/gpu/src/cuda/ccomponetns.cu
View file @
da93a1da
...
...
@@ -497,6 +497,7 @@ namespace cv { namespace gpu { namespace device
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream)
{
(void) flags;
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
...
...
@@ -529,4 +530,4 @@ namespace cv { namespace gpu { namespace device
}
} } }
#endif /* CUDA_DISABLER */
\ No newline at end of file
#endif /* CUDA_DISABLER */
modules/gpu/src/cuda/column_filter.h
View file @
da93a1da
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/element_operations.cu
View file @
da93a1da
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/gftt.cu
View file @
da93a1da
...
...
@@ -47,6 +47,7 @@
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
...
...
@@ -148,4 +149,4 @@ namespace cv { namespace gpu { namespace device
}}}
#endif /* CUDA_DISABLER */
\ No newline at end of file
#endif /* CUDA_DISABLER */
modules/gpu/src/cuda/global_motion.cu
View file @
da93a1da
...
...
@@ -43,12 +43,11 @@
#if !defined CUDA_DISABLER
#include "thrust/device_ptr.h"
#include "thrust/remove.h"
#include "thrust/functional.h"
#include "internal_shared.hpp"
#include <thrust/device_ptr.h>
#include <thrust/remove.h>
#include <thrust/functional.h>
using namespace thrust;
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device { namespace globmotion {
...
...
@@ -64,7 +63,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask)
return thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dpoints0, dpoints1)),
thrust::make_zip_iterator(thrust::make_tuple(dpoints0 + N, dpoints1 + N)),
dmask, thrust::not1(thrust::identity<uchar>()))
- make_zip_iterator(make_tuple(dpoints0, dpoints1));
-
thrust::
make_zip_iterator(make_tuple(dpoints0, dpoints1));
}
...
...
@@ -117,4 +116,4 @@ void calcWobbleSuppressionMaps(
}}}}
#endif /* CUDA_DISABLER */
\ No newline at end of file
#endif /* CUDA_DISABLER */
modules/gpu/src/cuda/hist.cu
View file @
da93a1da
...
...
@@ -51,9 +51,9 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace
hist
{
__global__ void histogram256(const uchar* src, int cols, int rows, size_t step, int* hist)
__global__ void histogram256
Kernel
(const uchar* src, int cols, int rows, size_t step, int* hist)
{
__shared__ int shist[256];
...
...
@@ -94,16 +94,13 @@ namespace
if (histVal > 0)
::atomicAdd(hist + tid, histVal);
}
}
namespace hist
{
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y));
::histogram256
<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
histogram256Kernel
<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
...
...
@@ -113,7 +110,7 @@ namespace hist
/////////////////////////////////////////////////////////////////////////
namespace
namespace
hist
{
__constant__ int c_lut[256];
...
...
@@ -133,7 +130,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<
EqualizeHist> : DefaultTransformFunctorTraits<
EqualizeHist>
template <> struct TransformFunctorTraits<
hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::
EqualizeHist>
{
enum { smart_shift = 4 };
};
...
...
modules/gpu/src/cuda/hog.cu
View file @
da93a1da
...
...
@@ -244,15 +244,17 @@ namespace cv { namespace gpu { namespace device
return smem[0];
#endif
}
else
{
#if __CUDA_ARCH__ >= 300
if (threadIdx.x == 0)
smem[0] = sum;
#endif
#if __CUDA_ARCH__ >= 300
if (threadIdx.x == 0)
smem[0] = sum;
#endif
__syncthreads();
__syncthreads();
return smem[0];
return smem[0];
}
}
...
...
modules/gpu/src/cuda/hough.cu
View file @
da93a1da
...
...
@@ -42,7 +42,9 @@
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/emulation.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
...
...
modules/gpu/src/cuda/matrix_reductions.cu
View file @
da93a1da
...
...
@@ -55,7 +55,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace
detail
{
template <int cn> struct Unroll;
template <> struct Unroll<1>
...
...
@@ -218,7 +218,7 @@ namespace sum
{
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
device::reduce<BLOCK_SIZE>(
Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid,
Unroll<cn>::op(plus<R>()));
device::reduce<BLOCK_SIZE>(
detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::
Unroll<cn>::op(plus<R>()));
if (tid == 0)
{
...
...
@@ -254,7 +254,7 @@ namespace sum
{
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
device::reduce<BLOCK_SIZE>(
Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid,
Unroll<cn>::op(plus<double>()));
device::reduce<BLOCK_SIZE>(
detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::
Unroll<cn>::op(plus<double>()));
if (tid == 0)
{
...
...
@@ -294,7 +294,7 @@ namespace sum
}
}
device::reduce<BLOCK_SIZE>(
Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid,
Unroll<cn>::op(plus<R>()));
device::reduce<BLOCK_SIZE>(
detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::
Unroll<cn>::op(plus<R>()));
GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
}
...
...
@@ -918,13 +918,11 @@ namespace countNonZero
__global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight)
{
__shared__ unsigned int scount[BLOCK_SIZE];
__shared__ bool is_last;
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
unsigned int mycount = 0;
...
...
@@ -946,6 +944,9 @@ namespace countNonZero
if (tid == 0)
::atomicAdd(count, mycount);
#else
__shared__ bool is_last;
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
if (tid == 0)
{
count[bid] = mycount;
...
...
@@ -1244,7 +1245,7 @@ namespace reduce
for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
device::reduce<BLOCK_SIZE>(
Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(myVal), threadIdx.x,
Unroll<cn>::op(op));
device::reduce<BLOCK_SIZE>(
detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::
Unroll<cn>::op(op));
if (threadIdx.x == 0)
dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
...
...
modules/gpu/src/cuda/optflowbm.cu
View file @
da93a1da
...
...
@@ -48,7 +48,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace
optflowbm
{
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
...
...
@@ -145,10 +145,7 @@ namespace
velx(i, j) = static_cast<float>(sumx) / countMin;
vely(i, j) = static_cast<float>(sumy) / countMin;
}
}
namespace optflowbm
{
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
{
...
...
modules/gpu/src/cuda/orb.cu
View file @
da93a1da
...
...
@@ -47,6 +47,7 @@
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
...
...
modules/gpu/src/cuda/pyrlk.cu
View file @
da93a1da
...
...
@@ -57,7 +57,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace
pyrlk
{
__constant__ int c_winSize_x;
__constant__ int c_winSize_y;
...
...
@@ -123,7 +123,7 @@ namespace
}
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void sparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
__global__ void sparse
Kernel
(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if __CUDA_ARCH__ <= 110
const int BLOCK_SIZE = 128;
...
...
@@ -321,9 +321,9 @@ namespace
dim3 grid(ptcount);
if (level == 0 && err)
sparse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
sparse
Kernel
<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
else
sparse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
sparse
Kernel
<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
cudaSafeCall( cudaGetLastError() );
...
...
@@ -332,7 +332,7 @@ namespace
}
template <bool calcErr>
__global__ void dense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
__global__ void dense
Kernel
(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
{
extern __shared__ int smem[];
...
...
@@ -476,10 +476,7 @@ namespace
err(y, x) = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
}
}
}
namespace pyrlk
{
void loadConstants(int2 winSize, int iters)
{
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
...
...
@@ -500,11 +497,11 @@ namespace pyrlk
static const func_t funcs[5][5] =
{
{
::sparse_caller<1, 1, 1>, ::sparse_caller<1, 2, 1>, ::sparse_caller<1, 3, 1>, ::sparse_caller<1, 4, 1>, ::
sparse_caller<1, 5, 1>},
{
::sparse_caller<1, 1, 2>, ::sparse_caller<1, 2, 2>, ::sparse_caller<1, 3, 2>, ::sparse_caller<1, 4, 2>, ::
sparse_caller<1, 5, 2>},
{
::sparse_caller<1, 1, 3>, ::sparse_caller<1, 2, 3>, ::sparse_caller<1, 3, 3>, ::sparse_caller<1, 4, 3>, ::
sparse_caller<1, 5, 3>},
{
::sparse_caller<1, 1, 4>, ::sparse_caller<1, 2, 4>, ::sparse_caller<1, 3, 4>, ::sparse_caller<1, 4, 4>, ::
sparse_caller<1, 5, 4>},
{
::sparse_caller<1, 1, 5>, ::sparse_caller<1, 2, 5>, ::sparse_caller<1, 3, 5>, ::sparse_caller<1, 4, 5>, ::
sparse_caller<1, 5, 5>}
{
sparse_caller<1, 1, 1>, sparse_caller<1, 2, 1>, sparse_caller<1, 3, 1>, sparse_caller<1, 4, 1>,
sparse_caller<1, 5, 1>},
{
sparse_caller<1, 1, 2>, sparse_caller<1, 2, 2>, sparse_caller<1, 3, 2>, sparse_caller<1, 4, 2>,
sparse_caller<1, 5, 2>},
{
sparse_caller<1, 1, 3>, sparse_caller<1, 2, 3>, sparse_caller<1, 3, 3>, sparse_caller<1, 4, 3>,
sparse_caller<1, 5, 3>},
{
sparse_caller<1, 1, 4>, sparse_caller<1, 2, 4>, sparse_caller<1, 3, 4>, sparse_caller<1, 4, 4>,
sparse_caller<1, 5, 4>},
{
sparse_caller<1, 1, 5>, sparse_caller<1, 2, 5>, sparse_caller<1, 3, 5>, sparse_caller<1, 4, 5>,
sparse_caller<1, 5, 5>}
};
bindTexture(&tex_If, I);
...
...
@@ -522,11 +519,11 @@ namespace pyrlk
static const func_t funcs[5][5] =
{
{
::sparse_caller<4, 1, 1>, ::sparse_caller<4, 2, 1>, ::sparse_caller<4, 3, 1>, ::sparse_caller<4, 4, 1>, ::
sparse_caller<4, 5, 1>},
{
::sparse_caller<4, 1, 2>, ::sparse_caller<4, 2, 2>, ::sparse_caller<4, 3, 2>, ::sparse_caller<4, 4, 2>, ::
sparse_caller<4, 5, 2>},
{
::sparse_caller<4, 1, 3>, ::sparse_caller<4, 2, 3>, ::sparse_caller<4, 3, 3>, ::sparse_caller<4, 4, 3>, ::
sparse_caller<4, 5, 3>},
{
::sparse_caller<4, 1, 4>, ::sparse_caller<4, 2, 4>, ::sparse_caller<4, 3, 4>, ::sparse_caller<4, 4, 4>, ::
sparse_caller<4, 5, 4>},
{
::sparse_caller<4, 1, 5>, ::sparse_caller<4, 2, 5>, ::sparse_caller<4, 3, 5>, ::sparse_caller<4, 4, 5>, ::
sparse_caller<4, 5, 5>}
{
sparse_caller<4, 1, 1>, sparse_caller<4, 2, 1>, sparse_caller<4, 3, 1>, sparse_caller<4, 4, 1>,
sparse_caller<4, 5, 1>},
{
sparse_caller<4, 1, 2>, sparse_caller<4, 2, 2>, sparse_caller<4, 3, 2>, sparse_caller<4, 4, 2>,
sparse_caller<4, 5, 2>},
{
sparse_caller<4, 1, 3>, sparse_caller<4, 2, 3>, sparse_caller<4, 3, 3>, sparse_caller<4, 4, 3>,
sparse_caller<4, 5, 3>},
{
sparse_caller<4, 1, 4>, sparse_caller<4, 2, 4>, sparse_caller<4, 3, 4>, sparse_caller<4, 4, 4>,
sparse_caller<4, 5, 4>},
{
sparse_caller<4, 1, 5>, sparse_caller<4, 2, 5>, sparse_caller<4, 3, 5>, sparse_caller<4, 4, 5>,
sparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
...
...
@@ -551,12 +548,12 @@ namespace pyrlk
if (err.data)
{
::dense
<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
denseKernel
<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
else
{
::dense
<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
denseKernel
<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
...
...
modules/gpu/src/cuda/row_filter.h
View file @
da93a1da
This diff is collapsed.
Click to expand it.
modules/gpu/src/cuda/split_merge.cu
View file @
da93a1da
...
...
@@ -508,4 +508,4 @@ namespace cv { namespace gpu { namespace device
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */
\ No newline at end of file
#endif /* CUDA_DISABLER */
modules/gpu/src/cuda/tvl1flow.cu
View file @
da93a1da
...
...
@@ -52,9 +52,9 @@ using namespace cv::gpu::device;
////////////////////////////////////////////////////////////
// centeredGradient
namespace
namespace
tvl1flow
{
__global__ void centeredGradient(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
__global__ void centeredGradient
Kernel
(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -65,16 +65,13 @@ namespace
dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
}
}
namespace tvl1flow
{
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
::centeredGradient
<<<grid, block>>>(src, dx, dy);
centeredGradientKernel
<<<grid, block>>>(src, dx, dy);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
@@ -84,7 +81,7 @@ namespace tvl1flow
////////////////////////////////////////////////////////////
// warpBackward
namespace
namespace
tvl1flow
{
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
...
...
@@ -107,7 +104,7 @@ namespace
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void warpBackward(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
__global__ void warpBackward
Kernel
(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -166,10 +163,7 @@ namespace
const float I0Val = I0(y, x);
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
}
namespace tvl1flow
{
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
{
const dim3 block(32, 8);
...
...
@@ -179,7 +173,7 @@ namespace tvl1flow
bindTexture(&tex_I1x, I1x);
bindTexture(&tex_I1y, I1y);
::warpBackward
<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
warpBackwardKernel
<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
@@ -189,7 +183,7 @@ namespace tvl1flow
////////////////////////////////////////////////////////////
// estimateU
namespace
namespace
tvl1flow
{
__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
{
...
...
@@ -213,7 +207,7 @@ namespace
}
}
__global__ void estimateU(const PtrStepSzf I1wx, const PtrStepf I1wy,
__global__ void estimateU
Kernel
(const PtrStepSzf I1wx, const PtrStepf I1wy,
const PtrStepf grad, const PtrStepf rho_c,
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
PtrStepf u1, PtrStepf u2, PtrStepf error,
...
...
@@ -275,10 +269,7 @@ namespace
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
}
}
namespace tvl1flow
{
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
...
...
@@ -288,7 +279,7 @@ namespace tvl1flow
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
::estimateU
<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
estimateUKernel
<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
@@ -298,9 +289,9 @@ namespace tvl1flow
////////////////////////////////////////////////////////////
// estimateDualVariables
namespace
namespace
tvl1flow
{
__global__ void estimateDualVariables(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
__global__ void estimateDualVariables
Kernel
(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -325,16 +316,13 @@ namespace
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
}
}
namespace tvl1flow
{
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
::estimateDualVariables
<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
estimateDualVariablesKernel
<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
...
...
modules/gpu/src/imgproc.cpp
View file @
da93a1da
...
...
@@ -551,7 +551,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
src
.
locateROI
(
whole
,
offset
);
if
(
info
.
supports
(
WARP_SHUFFLE_FUNCTIONS
)
&&
src
.
cols
<=
2048
&&
offset
.
x
%
16
==
0
&&
((
src
.
cols
+
63
)
/
64
)
*
64
<=
(
s
rc
.
step
-
offset
.
x
))
&&
offset
.
x
%
16
==
0
&&
((
src
.
cols
+
63
)
/
64
)
*
64
<=
(
s
tatic_cast
<
int
>
(
src
.
step
)
-
offset
.
x
))
{
ensureSizeIsEnough
(((
src
.
rows
+
7
)
/
8
)
*
8
,
((
src
.
cols
+
63
)
/
64
)
*
64
,
CV_32SC1
,
buffer
);
...
...
modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp
View file @
da93a1da
...
...
@@ -210,6 +210,18 @@ bool TestHaarCascadeApplication::process()
#if defined(__GNUC__)
//http://www.christian-seiler.de/projekte/fpmath/
#ifndef _FPU_EXTENDED
#define _FPU_EXTENDED 0
#endif
#ifndef _FPU_DOUBLE
#define _FPU_DOUBLE 0
#endif
#ifndef _FPU_SINGLE
#define _FPU_SINGLE 0
#endif
fpu_control_t
fpu_oldcw
,
fpu_cw
;
_FPU_GETCW
(
fpu_oldcw
);
// store old cw
fpu_cw
=
(
fpu_oldcw
&
~
_FPU_EXTENDED
&
~
_FPU_DOUBLE
&
~
_FPU_SINGLE
)
|
_FPU_SINGLE
;
...
...
@@ -302,4 +314,4 @@ bool TestHaarCascadeApplication::deinit()
return
true
;
}
#endif
/* CUDA_DISABLER */
\ No newline at end of file
#endif
/* CUDA_DISABLER */
samples/gpu/driver_api_multi.cpp
View file @
da93a1da
...
...
@@ -54,14 +54,8 @@ inline void safeCall_(int code, const char* expr, const char* file, int line)
// Each GPU is associated with its own context
CUcontext
contexts
[
2
];
int
main
(
int
argc
,
char
**
argv
)
int
main
()
{
if
(
argc
>
1
)
{
cout
<<
"CUDA driver API sample
\n
"
;
return
-
1
;
}
int
num_devices
=
getCudaEnabledDeviceCount
();
if
(
num_devices
<
2
)
{
...
...
samples/gpu/driver_api_stereo_multi.cpp
View file @
da93a1da
...
...
@@ -76,7 +76,7 @@ GpuMat d_result[2];
// CPU result
Mat
result
;
void
printHelp
()
static
void
printHelp
()
{
std
::
cout
<<
"Usage: driver_api_stereo_multi_gpu --left <left_image> --right <right_image>
\n
"
;
}
...
...
samples/gpu/softcascade.cpp
View file @
da93a1da
...
...
@@ -76,8 +76,7 @@ int main(int argc, char** argv)
cv
::
gpu
::
GpuMat
dframe
(
frame
),
roi
(
frame
.
rows
,
frame
.
cols
,
CV_8UC1
),
trois
;
roi
.
setTo
(
cv
::
Scalar
::
all
(
1
));
cascade
.
genRoi
(
roi
,
trois
);
cascade
.
detect
(
dframe
,
trois
,
objects
);
cascade
.
detect
(
dframe
,
roi
,
objects
);
cv
::
Mat
dt
(
objects
);
typedef
cv
::
gpu
::
SCascade
::
Detection
Detection
;
...
...
@@ -103,4 +102,4 @@ int main(int argc, char** argv)
}
return
0
;
}
\ No newline at end of file
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment