Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
20f636fc
Commit
20f636fc
authored
Sep 17, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed cudev compilation for old pre-Fermi archs
parent
eff21788
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
115 additions
and
32 deletions
+115
-32
histogram.hpp
...les/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
+2
-0
histogram.hpp
modules/cudev/include/opencv2/cudev/grid/histogram.hpp
+4
-0
texture.hpp
modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
+60
-32
atomic.hpp
modules/cudev/include/opencv2/cudev/util/atomic.hpp
+41
-0
saturate_cast.hpp
modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
+8
-0
No files found.
modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
View file @
20f636fc
...
...
@@ -56,6 +56,7 @@ namespace grid_histogram_detail
template
<
int
BIN_COUNT
,
int
BLOCK_SIZE
,
class
SrcPtr
,
typename
ResType
,
class
MaskPtr
>
__global__
void
histogram
(
const
SrcPtr
src
,
ResType
*
hist
,
const
MaskPtr
mask
,
const
int
rows
,
const
int
cols
)
{
#if CV_CUDEV_ARCH >= 120
__shared__
ResType
smem
[
BIN_COUNT
];
const
int
y
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
...
...
@@ -86,6 +87,7 @@ namespace grid_histogram_detail
if
(
histVal
>
0
)
atomicAdd
(
hist
+
i
,
histVal
);
}
#endif
}
template
<
int
BIN_COUNT
,
class
Policy
,
class
SrcPtr
,
typename
ResType
,
class
MaskPtr
>
...
...
modules/cudev/include/opencv2/cudev/grid/histogram.hpp
View file @
20f636fc
...
...
@@ -57,6 +57,8 @@ namespace cv { namespace cudev {
template
<
int
BIN_COUNT
,
class
Policy
,
class
SrcPtr
,
typename
ResType
,
class
MaskPtr
>
__host__
void
gridHistogram_
(
const
SrcPtr
&
src
,
GpuMat_
<
ResType
>&
dst
,
const
MaskPtr
&
mask
,
Stream
&
stream
=
Stream
::
Null
())
{
CV_Assert
(
deviceSupports
(
SHARED_ATOMICS
)
);
const
int
rows
=
getRows
(
src
);
const
int
cols
=
getCols
(
src
);
...
...
@@ -75,6 +77,8 @@ __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const Mas
template
<
int
BIN_COUNT
,
class
Policy
,
class
SrcPtr
,
typename
ResType
>
__host__
void
gridHistogram_
(
const
SrcPtr
&
src
,
GpuMat_
<
ResType
>&
dst
,
Stream
&
stream
=
Stream
::
Null
())
{
CV_Assert
(
deviceSupports
(
SHARED_ATOMICS
)
);
const
int
rows
=
getRows
(
src
);
const
int
cols
=
getCols
(
src
);
...
...
modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
View file @
20f636fc
...
...
@@ -52,6 +52,40 @@
#include "gpumat.hpp"
#include "traits.hpp"
namespace
{
template
<
typename
T
>
struct
CvCudevTextureRef
{
typedef
texture
<
T
,
cudaTextureType2D
,
cudaReadModeElementType
>
TexRef
;
static
TexRef
ref
;
__host__
static
void
bind
(
const
cv
::
cudev
::
GlobPtrSz
<
T
>&
mat
,
bool
normalizedCoords
=
false
,
cudaTextureFilterMode
filterMode
=
cudaFilterModePoint
,
cudaTextureAddressMode
addressMode
=
cudaAddressModeClamp
)
{
ref
.
normalized
=
normalizedCoords
;
ref
.
filterMode
=
filterMode
;
ref
.
addressMode
[
0
]
=
addressMode
;
ref
.
addressMode
[
1
]
=
addressMode
;
ref
.
addressMode
[
2
]
=
addressMode
;
cudaChannelFormatDesc
desc
=
cudaCreateChannelDesc
<
T
>
();
CV_CUDEV_SAFE_CALL
(
cudaBindTexture2D
(
0
,
&
ref
,
mat
.
data
,
&
desc
,
mat
.
cols
,
mat
.
rows
,
mat
.
step
)
);
}
__host__
static
void
unbind
()
{
CV_CUDEV_SAFE_CALL
(
cudaUnbindTexture
(
ref
)
);
}
};
template
<
typename
T
>
typename
CvCudevTextureRef
<
T
>::
TexRef
CvCudevTextureRef
<
T
>::
ref
;
}
namespace
cv
{
namespace
cudev
{
template
<
typename
T
>
struct
TexturePtr
...
...
@@ -63,24 +97,34 @@ template <typename T> struct TexturePtr
__device__
__forceinline__
T
operator
()(
float
y
,
float
x
)
const
{
#if CV_CUDEV_ARCH < 300
// Use the texture reference
return
tex2D
(
CvCudevTextureRef
<
T
>::
ref
,
x
,
y
);
#else
// Use the texture object
return
tex2D
<
T
>
(
texObj
,
x
,
y
);
#endif
}
};
template
<
typename
T
>
struct
Texture
:
TexturePtr
<
T
>
{
int
rows
,
cols
;
bool
cc30
;
__host__
explicit
Texture
(
const
GlobPtrSz
<
T
>&
mat
,
bool
normalizedCoords
=
false
,
cudaTextureFilterMode
filterMode
=
cudaFilterModePoint
,
cudaTextureAddressMode
addressMode
=
cudaAddressModeClamp
)
{
CV_Assert
(
deviceSupports
(
FEATURE_SET_COMPUTE_30
)
);
cc30
=
deviceSupports
(
FEATURE_SET_COMPUTE_30
);
rows
=
mat
.
rows
;
cols
=
mat
.
cols
;
if
(
cc30
)
{
// Use the texture object
cudaResourceDesc
texRes
;
std
::
memset
(
&
texRes
,
0
,
sizeof
(
texRes
));
texRes
.
resType
=
cudaResourceTypePitch2D
;
...
...
@@ -92,51 +136,35 @@ template <typename T> struct Texture : TexturePtr<T>
cudaTextureDesc
texDescr
;
std
::
memset
(
&
texDescr
,
0
,
sizeof
(
texDescr
));
texDescr
.
normalizedCoords
=
normalizedCoords
;
texDescr
.
filterMode
=
filterMode
;
texDescr
.
addressMode
[
0
]
=
addressMode
;
texDescr
.
addressMode
[
1
]
=
addressMode
;
texDescr
.
addressMode
[
2
]
=
addressMode
;
texDescr
.
filterMode
=
filterMode
;
texDescr
.
readMode
=
cudaReadModeElementType
;
texDescr
.
normalizedCoords
=
normalizedCoords
;
CV_CUDEV_SAFE_CALL
(
cudaCreateTextureObject
(
&
this
->
texObj
,
&
texRes
,
&
texDescr
,
0
)
);
}
__host__
explicit
Texture
(
const
GpuMat_
<
T
>&
mat
,
bool
normalizedCoords
=
false
,
cudaTextureFilterMode
filterMode
=
cudaFilterModePoint
,
cudaTextureAddressMode
addressMode
=
cudaAddressModeClamp
)
else
{
CV_Assert
(
deviceSupports
(
FEATURE_SET_COMPUTE_30
)
);
rows
=
mat
.
rows
;
cols
=
mat
.
cols
;
cudaResourceDesc
texRes
;
std
::
memset
(
&
texRes
,
0
,
sizeof
(
texRes
));
texRes
.
resType
=
cudaResourceTypePitch2D
;
texRes
.
res
.
pitch2D
.
devPtr
=
mat
.
data
;
texRes
.
res
.
pitch2D
.
height
=
mat
.
rows
;
texRes
.
res
.
pitch2D
.
width
=
mat
.
cols
;
texRes
.
res
.
pitch2D
.
pitchInBytes
=
mat
.
step
;
texRes
.
res
.
pitch2D
.
desc
=
cudaCreateChannelDesc
<
T
>
();
cudaTextureDesc
texDescr
;
std
::
memset
(
&
texDescr
,
0
,
sizeof
(
texDescr
));
texDescr
.
addressMode
[
0
]
=
addressMode
;
texDescr
.
addressMode
[
1
]
=
addressMode
;
texDescr
.
addressMode
[
2
]
=
addressMode
;
texDescr
.
filterMode
=
filterMode
;
texDescr
.
readMode
=
cudaReadModeElementType
;
texDescr
.
normalizedCoords
=
normalizedCoords
;
CV_CUDEV_SAFE_CALL
(
cudaCreateTextureObject
(
&
this
->
texObj
,
&
texRes
,
&
texDescr
,
0
)
);
// Use the texture reference
CvCudevTextureRef
<
T
>::
bind
(
mat
,
normalizedCoords
,
filterMode
,
addressMode
);
}
}
__host__
~
Texture
()
{
if
(
cc30
)
{
// Use the texture object
cudaDestroyTextureObject
(
this
->
texObj
);
}
else
{
// Use the texture reference
CvCudevTextureRef
<
T
>::
unbind
();
}
}
};
template
<
typename
T
>
struct
PtrTraits
<
Texture
<
T
>
>
:
PtrTraitsBase
<
Texture
<
T
>
,
TexturePtr
<
T
>
>
...
...
modules/cudev/include/opencv2/cudev/util/atomic.hpp
View file @
20f636fc
...
...
@@ -64,11 +64,23 @@ __device__ __forceinline__ uint atomicAdd(uint* address, uint val)
__device__
__forceinline__
float
atomicAdd
(
float
*
address
,
float
val
)
{
#if CV_CUDEV_ARCH >= 200
return
::
atomicAdd
(
address
,
val
);
#else
int
*
address_as_i
=
(
int
*
)
address
;
int
old
=
*
address_as_i
,
assumed
;
do
{
assumed
=
old
;
old
=
::
atomicCAS
(
address_as_i
,
assumed
,
__float_as_int
(
val
+
__int_as_float
(
assumed
)));
}
while
(
assumed
!=
old
);
return
__int_as_float
(
old
);
#endif
}
__device__
static
double
atomicAdd
(
double
*
address
,
double
val
)
{
#if CV_CUDEV_ARCH >= 130
unsigned
long
long
int
*
address_as_ull
=
(
unsigned
long
long
int
*
)
address
;
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
do
{
...
...
@@ -77,6 +89,11 @@ __device__ static double atomicAdd(double* address, double val)
__double_as_longlong
(
val
+
__longlong_as_double
(
assumed
)));
}
while
(
assumed
!=
old
);
return
__longlong_as_double
(
old
);
#else
(
void
)
address
;
(
void
)
val
;
return
0.0
;
#endif
}
// atomicMin
...
...
@@ -93,6 +110,7 @@ __device__ __forceinline__ uint atomicMin(uint* address, uint val)
__device__
static
float
atomicMin
(
float
*
address
,
float
val
)
{
#if CV_CUDEV_ARCH >= 120
int
*
address_as_i
=
(
int
*
)
address
;
int
old
=
*
address_as_i
,
assumed
;
do
{
...
...
@@ -101,10 +119,16 @@ __device__ static float atomicMin(float* address, float val)
__float_as_int
(
::
fminf
(
val
,
__int_as_float
(
assumed
))));
}
while
(
assumed
!=
old
);
return
__int_as_float
(
old
);
#else
(
void
)
address
;
(
void
)
val
;
return
0.0
f
;
#endif
}
__device__
static
double
atomicMin
(
double
*
address
,
double
val
)
{
#if CV_CUDEV_ARCH >= 130
unsigned
long
long
int
*
address_as_ull
=
(
unsigned
long
long
int
*
)
address
;
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
do
{
...
...
@@ -113,6 +137,11 @@ __device__ static double atomicMin(double* address, double val)
__double_as_longlong
(
::
fmin
(
val
,
__longlong_as_double
(
assumed
))));
}
while
(
assumed
!=
old
);
return
__longlong_as_double
(
old
);
#else
(
void
)
address
;
(
void
)
val
;
return
0.0
;
#endif
}
// atomicMax
...
...
@@ -129,6 +158,7 @@ __device__ __forceinline__ uint atomicMax(uint* address, uint val)
__device__
static
float
atomicMax
(
float
*
address
,
float
val
)
{
#if CV_CUDEV_ARCH >= 120
int
*
address_as_i
=
(
int
*
)
address
;
int
old
=
*
address_as_i
,
assumed
;
do
{
...
...
@@ -137,10 +167,16 @@ __device__ static float atomicMax(float* address, float val)
__float_as_int
(
::
fmaxf
(
val
,
__int_as_float
(
assumed
))));
}
while
(
assumed
!=
old
);
return
__int_as_float
(
old
);
#else
(
void
)
address
;
(
void
)
val
;
return
0.0
f
;
#endif
}
__device__
static
double
atomicMax
(
double
*
address
,
double
val
)
{
#if CV_CUDEV_ARCH >= 130
unsigned
long
long
int
*
address_as_ull
=
(
unsigned
long
long
int
*
)
address
;
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
do
{
...
...
@@ -149,6 +185,11 @@ __device__ static double atomicMax(double* address, double val)
__double_as_longlong
(
::
fmax
(
val
,
__longlong_as_double
(
assumed
))));
}
while
(
assumed
!=
old
);
return
__longlong_as_double
(
old
);
#else
(
void
)
address
;
(
void
)
val
;
return
0.0
;
#endif
}
}}
...
...
modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
View file @
20f636fc
...
...
@@ -228,7 +228,11 @@ template <> __device__ __forceinline__ int saturate_cast<int>(float v)
}
template
<>
__device__
__forceinline__
int
saturate_cast
<
int
>
(
double
v
)
{
#if CV_CUDEV_ARCH >= 130
return
__double2int_rn
(
v
);
#else
return
saturate_cast
<
int
>
((
float
)
v
);
#endif
}
template
<>
__device__
__forceinline__
uint
saturate_cast
<
uint
>
(
schar
v
)
...
...
@@ -256,7 +260,11 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(float v)
}
template
<>
__device__
__forceinline__
uint
saturate_cast
<
uint
>
(
double
v
)
{
#if CV_CUDEV_ARCH >= 130
return
__double2uint_rn
(
v
);
#else
return
saturate_cast
<
uint
>
((
float
)
v
);
#endif
}
}}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment