Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e820c5c6
Commit
e820c5c6
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::mulSpectrums
parent
3f62e785
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
82 additions
and
177 deletions
+82
-177
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+0
-89
mul_spectrums.cu
modules/cudaarithm/src/cuda/mul_spectrums.cu
+82
-88
No files found.
modules/cudaarithm/src/arithm.cpp
View file @
e820c5c6
...
...
@@ -292,95 +292,6 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#endif
}
//////////////////////////////////////////////////////////////////////////////
// mulSpectrums
#ifdef HAVE_CUFFT
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
mulSpectrums
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
void
mulSpectrums_CONJ
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
}}}
#endif
void
cv
::
cuda
::
mulSpectrums
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
flags
,
bool
conjB
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src1
;
(
void
)
_src2
;
(
void
)
_dst
;
(
void
)
flags
;
(
void
)
conjB
;
(
void
)
stream
;
throw_no_cuda
();
#else
(
void
)
flags
;
typedef
void
(
*
Caller
)(
const
PtrStep
<
cufftComplex
>
,
const
PtrStep
<
cufftComplex
>
,
PtrStepSz
<
cufftComplex
>
,
cudaStream_t
stream
);
static
Caller
callers
[]
=
{
device
::
mulSpectrums
,
device
::
mulSpectrums_CONJ
};
GpuMat
src1
=
_src1
.
getGpuMat
();
GpuMat
src2
=
_src2
.
getGpuMat
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
type
()
==
CV_32FC2
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
CV_32FC2
);
GpuMat
dst
=
_dst
.
getGpuMat
();
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
#endif
}
//////////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
#ifdef HAVE_CUFFT
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
mulAndScaleSpectrums
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
float
scale
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
void
mulAndScaleSpectrums_CONJ
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
float
scale
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
}}}
#endif
void
cv
::
cuda
::
mulAndScaleSpectrums
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
flags
,
float
scale
,
bool
conjB
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src1
;
(
void
)
_src2
;
(
void
)
_dst
;
(
void
)
flags
;
(
void
)
scale
;
(
void
)
conjB
;
(
void
)
stream
;
throw_no_cuda
();
#else
(
void
)
flags
;
typedef
void
(
*
Caller
)(
const
PtrStep
<
cufftComplex
>
,
const
PtrStep
<
cufftComplex
>
,
float
scale
,
PtrStepSz
<
cufftComplex
>
,
cudaStream_t
stream
);
static
Caller
callers
[]
=
{
device
::
mulAndScaleSpectrums
,
device
::
mulAndScaleSpectrums_CONJ
};
GpuMat
src1
=
_src1
.
getGpuMat
();
GpuMat
src2
=
_src2
.
getGpuMat
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
type
()
==
CV_32FC2
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
CV_32FC2
);
GpuMat
dst
=
_dst
.
getGpuMat
();
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
src1
,
src2
,
scale
,
dst
,
StreamAccessor
::
getStream
(
stream
));
#endif
}
//////////////////////////////////////////////////////////////////////////////
// dft
...
...
modules/cudaarithm/src/cuda/mul_spectrums.cu
View file @
e820c5c6
...
...
@@ -40,132 +40,126 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#i
nclude "cvconfig.h"
#i
fndef HAVE_OPENCV_CUDEV
#
ifdef HAVE_CUFFT
#
error "opencv_cudev is required"
#
include <cufft.h>
#
else
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
namespace cv { namespace cuda { namespace device
{
//////////////////////////////////////////////////////////////////////////
// mulSpectrums
using namespace cv::cudev;
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
//////////////////////////////////////////////////////////////////////////////
// mulSpectrums
if (x < c.cols && y < c.rows)
namespace
{
__device__ __forceinline__ float real(const float2& val)
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x])
;
return val.x
;
}
}
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream
)
__device__ __forceinline__ float imag(const float2& val
)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
return val.y;
}
//////////////////////////////////////////////////////////////////////////
// mulSpectrums_CONJ
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
__device__ __forceinline__ float2 cmul(const float2& a, const float2& b)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
return make_float2((real(a) * real(b)) - (imag(a) * imag(b)),
(real(a) * imag(b)) + (imag(a) * real(b)));
}
if (x < c.cols && y < c.rows
)
__device__ __forceinline__ float2 conj(const float2& a
)
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
}
return make_float2(real(a), -imag(a));
}
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
struct comlex_mul : binary_function<float2, float2, float2>
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
return cmul(a, b);
}
};
struct comlex_mul_conj : binary_function<float2, float2, float2>
{
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
return cmul(a, conj(b));
}
};
//////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
struct comlex_mul_scale : binary_function<float2, float2, float2>
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
float scale;
if (x < c.cols && y < c.rows)
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
return scale * cmul(a, b);
}
};
struct comlex_mul_conj_scale : binary_function<float2, float2, float2>
{
float scale;
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
return scale * cmul(a, conj(b));
}
};
}
mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c);
cudaSafeCall( cudaGetLastError() );
void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
{
(void) flags;
if (stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
GpuMat src1 = _src1.getGpuMat();
GpuMat src2 = _src2.getGpuMat();
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
CV_Assert( src1.size() == src2.size() );
//////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums_CONJ
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
}
if (x < c.cols && y < c.rows)
{
cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
{
(void) flags;
GpuMat src1 = _src1.getGpuMat();
GpuMat src2 = _src2.getGpuMat();
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
CV_Assert( src1.size() == src2.size() );
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c
);
cudaSafeCall( cudaGetLastError()
);
_dst.create(src1.size(), CV_32FC2
);
GpuMat dst = _dst.getGpuMat(
);
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
if (conjB)
{
comlex_mul_conj_scale op;
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
}}} // namespace cv { namespace cuda { namespace cudev
#endif // HAVE_CUFFT
else
{
comlex_mul_scale op;
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
}
#endif
/* CUDA_DISABLER */
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment