Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d13a6b74
Commit
d13a6b74
authored
Dec 21, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed bug with submatrix in device::transform
parent
dab35867
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
64 additions
and
23 deletions
+64
-23
gpumat.hpp
modules/core/include/opencv2/core/gpumat.hpp
+1
-1
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+3
-3
element_operations.cpp
modules/gpu/src/element_operations.cpp
+33
-9
common.hpp
modules/gpu/src/opencv2/gpu/device/common.hpp
+5
-0
transform_detail.hpp
...es/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
+18
-6
transform.hpp
modules/gpu/src/opencv2/gpu/device/transform.hpp
+4
-4
No files found.
modules/core/include/opencv2/core/gpumat.hpp
View file @
d13a6b74
...
...
@@ -448,7 +448,7 @@ namespace cv { namespace gpu
{
int
area
=
rows
*
cols
;
if
(
!
m
.
isContinuous
()
||
m
.
type
()
!=
type
||
m
.
size
().
area
()
!=
area
)
m
.
create
(
1
,
area
,
type
);
ensureSizeIsEnough
(
1
,
area
,
type
,
m
);
m
=
m
.
reshape
(
0
,
rows
);
}
...
...
modules/gpu/src/cuda/element_operations.cu
View file @
d13a6b74
...
...
@@ -1058,12 +1058,12 @@ namespace cv { namespace gpu { namespace device
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
}
//
template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<schar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<ushort>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<short >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
//
template void absdiff_gpu<int >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
//
template void absdiff_gpu<float >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<int >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<float >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template void absdiff_gpu<double>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
template <typename T> struct AbsdiffScalar : unary_function<T, T>
...
...
modules/gpu/src/element_operations.cpp
View file @
d13a6b74
...
...
@@ -159,7 +159,13 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
if
(
mask
.
empty
()
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
))
bool
useNpp
=
mask
.
empty
()
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
)
&&
(
isAligned
(
src1
.
data
,
16
)
&&
isAligned
(
src2
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
));
if
(
useNpp
)
{
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
nppiAdd_32s_C1R
,
nppiAdd_32f_C1R
,
stream
);
return
;
...
...
@@ -271,7 +277,13 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
if
(
mask
.
empty
()
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
))
bool
useNpp
=
mask
.
empty
()
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
)
&&
(
isAligned
(
src1
.
data
,
16
)
&&
isAligned
(
src2
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
));
if
(
useNpp
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
,
stream
);
return
;
...
...
@@ -403,8 +415,13 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
dst
.
create
(
src1
.
size
(),
CV_MAKE_TYPE
(
CV_MAT_DEPTH
(
dtype
),
src1
.
channels
()));
bool
useNpp
=
scale
==
1
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
)
&&
(
isAligned
(
src1
.
data
,
16
)
&&
isAligned
(
src2
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
));
if
(
scale
==
1
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
)
)
if
(
useNpp
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiMul_8u_C1RSfs
,
nppiMul_8u_C4RSfs
,
nppiMul_32s_C1R
,
nppiMul_32f_C1R
,
stream
);
return
;
...
...
@@ -528,8 +545,13 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
dst
.
create
(
src1
.
size
(),
CV_MAKE_TYPE
(
CV_MAT_DEPTH
(
dtype
),
src1
.
channels
()));
bool
useNpp
=
scale
==
1
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
)
&&
(
isAligned
(
src1
.
data
,
16
)
&&
isAligned
(
src2
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
));
if
(
scale
==
1
&&
dst
.
type
()
==
src1
.
type
()
&&
(
src1
.
depth
()
==
CV_8U
||
src1
.
depth
()
==
CV_32S
||
src1
.
depth
()
==
CV_32F
)
)
if
(
useNpp
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiDiv_8u_C1RSfs
,
nppiDiv_8u_C4RSfs
,
nppiDiv_32s_C1R
,
nppiDiv_32f_C1R
,
stream
);
return
;
...
...
@@ -643,7 +665,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
static
const
func_t
funcs
[]
=
{
0
/*absdiff_gpu<unsigned char>*/
,
absdiff_gpu
<
signed
char
>
,
absdiff_gpu
<
unsigned
short
>
,
absdiff_gpu
<
short
>
,
0
/*absdiff_gpu<int>*/
,
0
/*absdiff_gpu<float>*/
,
absdiff_gpu
<
double
>
absdiff_gpu
<
unsigned
char
>
,
absdiff_gpu
<
signed
char
>
,
absdiff_gpu
<
unsigned
short
>
,
absdiff_gpu
<
short
>
,
absdiff_gpu
<
int
>
,
absdiff_gpu
<
float
>
,
absdiff_gpu
<
double
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
...
...
@@ -656,7 +678,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
sz
.
width
=
src1
.
cols
*
src1
.
channels
();
sz
.
height
=
src1
.
rows
;
if
(
src1
.
depth
()
==
CV_8U
&&
(
src1
.
cols
*
src1
.
channels
())
%
4
==
0
)
bool
aligned
=
isAligned
(
src1
.
data
,
16
)
&&
isAligned
(
src2
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
);
if
(
aligned
&&
src1
.
depth
()
==
CV_8U
&&
(
src1
.
cols
*
src1
.
channels
())
%
4
==
0
)
{
NppStreamHandler
h
(
stream
);
...
...
@@ -668,7 +692,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
else
if
(
src1
.
depth
()
==
CV_8U
)
else
if
(
aligned
&&
src1
.
depth
()
==
CV_8U
)
{
NppStreamHandler
h
(
stream
);
...
...
@@ -678,7 +702,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
else
if
(
src1
.
depth
()
==
CV_32S
)
else
if
(
aligned
&&
src1
.
depth
()
==
CV_32S
)
{
NppStreamHandler
h
(
stream
);
...
...
@@ -688,7 +712,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
else
if
(
src1
.
depth
()
==
CV_32F
)
else
if
(
aligned
&&
src1
.
depth
()
==
CV_32F
)
{
NppStreamHandler
h
(
stream
);
...
...
modules/gpu/src/opencv2/gpu/device/common.hpp
View file @
d13a6b74
...
...
@@ -67,6 +67,11 @@
namespace
cv
{
namespace
gpu
{
void
error
(
const
char
*
error_string
,
const
char
*
file
,
const
int
line
,
const
char
*
func
);
template
<
typename
T
>
static
inline
bool
isAligned
(
const
T
*
ptr
,
size_t
size
)
{
return
reinterpret_cast
<
size_t
>
(
ptr
)
%
size
==
0
;
}
}}
static
inline
void
___cudaSafeCall
(
cudaError_t
err
,
const
char
*
file
,
const
int
line
,
const
char
*
func
=
""
)
...
...
modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
View file @
d13a6b74
...
...
@@ -309,7 +309,7 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
TransformDispatcher
<
false
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
DevMem2D_
<
T
>
src
,
DevMem2D_
<
D
>
dst
,
UnOp
op
,
Mask
mask
,
cudaStream_t
stream
)
{
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
...
...
@@ -324,7 +324,7 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
DevMem2D_
<
T1
>
src1
,
DevMem2D_
<
T2
>
src2
,
DevMem2D_
<
D
>
dst
,
BinOp
op
,
Mask
mask
,
cudaStream_t
stream
)
{
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
...
...
@@ -341,12 +341,18 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
TransformDispatcher
<
true
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
DevMem2D_
<
T
>
src
,
DevMem2D_
<
D
>
dst
,
UnOp
op
,
Mask
mask
,
cudaStream_t
stream
)
{
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
StaticAssert
<
ft
::
smart_shift
!=
1
>::
check
();
if
(
!
isAligned
(
src
.
data
,
ft
::
smart_shift
*
sizeof
(
T
))
||
!
isAligned
(
dst
.
data
,
ft
::
smart_shift
*
sizeof
(
D
)))
{
TransformDispatcher
<
false
>::
call
(
src
,
dst
,
op
,
mask
,
stream
);
return
;
}
const
dim3
threads
(
ft
::
smart_block_dim_x
,
ft
::
smart_block_dim_y
,
1
);
const
dim3
grid
(
divUp
(
src
.
cols
,
threads
.
x
*
ft
::
smart_shift
),
divUp
(
src
.
rows
,
threads
.
y
),
1
);
...
...
@@ -358,12 +364,18 @@ namespace cv { namespace gpu { namespace device
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
DevMem2D_
<
T1
>
src1
,
DevMem2D_
<
T2
>
src2
,
DevMem2D_
<
D
>
dst
,
BinOp
op
,
Mask
mask
,
cudaStream_t
stream
)
{
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
StaticAssert
<
ft
::
smart_shift
!=
1
>::
check
();
if
(
!
isAligned
(
src1
.
data
,
ft
::
smart_shift
*
sizeof
(
T1
))
||
!
isAligned
(
src2
.
data
,
ft
::
smart_shift
*
sizeof
(
T2
))
||
!
isAligned
(
dst
.
data
,
ft
::
smart_shift
*
sizeof
(
D
)))
{
TransformDispatcher
<
false
>::
call
(
src1
,
src2
,
dst
,
op
,
mask
,
stream
);
return
;
}
const
dim3
threads
(
ft
::
smart_block_dim_x
,
ft
::
smart_block_dim_y
,
1
);
const
dim3
grid
(
divUp
(
src1
.
cols
,
threads
.
x
*
ft
::
smart_shift
),
divUp
(
src1
.
rows
,
threads
.
y
),
1
);
...
...
@@ -376,14 +388,14 @@ namespace cv { namespace gpu { namespace device
};
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
transform_caller
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
inline
void
transform_caller
(
DevMem2D_
<
T
>
src
,
DevMem2D_
<
D
>
dst
,
UnOp
op
,
Mask
mask
,
cudaStream_t
stream
)
{
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
TransformDispatcher
<
VecTraits
<
T
>::
cn
==
1
&&
VecTraits
<
D
>::
cn
==
1
&&
ft
::
smart_shift
!=
1
>::
call
(
src
,
dst
,
op
,
mask
,
stream
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
transform_caller
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
inline
void
transform_caller
(
DevMem2D_
<
T1
>
src1
,
DevMem2D_
<
T2
>
src2
,
DevMem2D_
<
D
>
dst
,
BinOp
op
,
Mask
mask
,
cudaStream_t
stream
)
{
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
TransformDispatcher
<
VecTraits
<
T1
>::
cn
==
1
&&
VecTraits
<
T2
>::
cn
==
1
&&
VecTraits
<
D
>::
cn
==
1
&&
ft
::
smart_shift
!=
1
>::
call
(
src1
,
src2
,
dst
,
op
,
mask
,
stream
);
...
...
modules/gpu/src/opencv2/gpu/device/transform.hpp
View file @
d13a6b74
...
...
@@ -50,25 +50,25 @@
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
typename
T
,
typename
D
,
typename
UnOp
>
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
cudaStream_t
stream
=
0
)
static
inline
void
transform
(
DevMem2D_
<
T
>
src
,
DevMem2D_
<
D
>
dst
,
UnOp
op
,
cudaStream_t
stream
=
0
)
{
transform_detail
::
transform_caller
(
src
,
dst
,
op
,
WithOutMask
(),
stream
);
}
template
<
typename
T
,
typename
D
,
typename
UnOp
>
void
transform
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
PtrStepb
&
mask
,
const
UnOp
&
op
,
cudaStream_t
stream
=
0
)
static
inline
void
transform
(
DevMem2D_
<
T
>
src
,
DevMem2D_
<
D
>
dst
,
PtrStepb
mask
,
UnOp
op
,
cudaStream_t
stream
=
0
)
{
transform_detail
::
transform_caller
(
src
,
dst
,
op
,
SingleMask
(
mask
),
stream
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
void
transform
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
cudaStream_t
stream
=
0
)
static
inline
void
transform
(
DevMem2D_
<
T1
>
src1
,
DevMem2D_
<
T2
>
src2
,
DevMem2D_
<
D
>
dst
,
BinOp
op
,
cudaStream_t
stream
=
0
)
{
transform_detail
::
transform_caller
(
src1
,
src2
,
dst
,
op
,
WithOutMask
(),
stream
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
>
void
transform
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
PtrStepb
&
mask
,
const
BinOp
&
op
,
cudaStream_t
stream
=
0
)
static
inline
void
transform
(
DevMem2D_
<
T1
>
src1
,
DevMem2D_
<
T2
>
src2
,
DevMem2D_
<
D
>
dst
,
PtrStepb
mask
,
BinOp
op
,
cudaStream_t
stream
=
0
)
{
transform_detail
::
transform_caller
(
src1
,
src2
,
dst
,
op
,
SingleMask
(
mask
),
stream
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment