Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e820c5c6
Commit
e820c5c6
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::mulSpectrums
parent
3f62e785
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
84 additions
and
179 deletions
+84
-179
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+0
-89
mul_spectrums.cu
modules/cudaarithm/src/cuda/mul_spectrums.cu
+84
-90
No files found.
modules/cudaarithm/src/arithm.cpp
View file @
e820c5c6
...
@@ -292,95 +292,6 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
...
@@ -292,95 +292,6 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#endif
#endif
}
}
//////////////////////////////////////////////////////////////////////////////
// mulSpectrums
#ifdef HAVE_CUFFT
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
mulSpectrums
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
void
mulSpectrums_CONJ
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
}}}
#endif
void
cv
::
cuda
::
mulSpectrums
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
flags
,
bool
conjB
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src1
;
(
void
)
_src2
;
(
void
)
_dst
;
(
void
)
flags
;
(
void
)
conjB
;
(
void
)
stream
;
throw_no_cuda
();
#else
(
void
)
flags
;
typedef
void
(
*
Caller
)(
const
PtrStep
<
cufftComplex
>
,
const
PtrStep
<
cufftComplex
>
,
PtrStepSz
<
cufftComplex
>
,
cudaStream_t
stream
);
static
Caller
callers
[]
=
{
device
::
mulSpectrums
,
device
::
mulSpectrums_CONJ
};
GpuMat
src1
=
_src1
.
getGpuMat
();
GpuMat
src2
=
_src2
.
getGpuMat
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
type
()
==
CV_32FC2
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
CV_32FC2
);
GpuMat
dst
=
_dst
.
getGpuMat
();
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
#endif
}
//////////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
#ifdef HAVE_CUFFT
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
mulAndScaleSpectrums
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
float
scale
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
void
mulAndScaleSpectrums_CONJ
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
float
scale
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
}}}
#endif
void
cv
::
cuda
::
mulAndScaleSpectrums
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
flags
,
float
scale
,
bool
conjB
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src1
;
(
void
)
_src2
;
(
void
)
_dst
;
(
void
)
flags
;
(
void
)
scale
;
(
void
)
conjB
;
(
void
)
stream
;
throw_no_cuda
();
#else
(
void
)
flags
;
typedef
void
(
*
Caller
)(
const
PtrStep
<
cufftComplex
>
,
const
PtrStep
<
cufftComplex
>
,
float
scale
,
PtrStepSz
<
cufftComplex
>
,
cudaStream_t
stream
);
static
Caller
callers
[]
=
{
device
::
mulAndScaleSpectrums
,
device
::
mulAndScaleSpectrums_CONJ
};
GpuMat
src1
=
_src1
.
getGpuMat
();
GpuMat
src2
=
_src2
.
getGpuMat
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
type
()
==
CV_32FC2
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
CV_32FC2
);
GpuMat
dst
=
_dst
.
getGpuMat
();
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
src1
,
src2
,
scale
,
dst
,
StreamAccessor
::
getStream
(
stream
));
#endif
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// dft
// dft
...
...
modules/cudaarithm/src/cuda/mul_spectrums.cu
View file @
e820c5c6
...
@@ -40,132 +40,126 @@
...
@@ -40,132 +40,126 @@
//
//
//M*/
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#i
nclude "cvconfig.h"
#i
fndef HAVE_OPENCV_CUDEV
#
ifdef HAVE_CUFFT
#
error "opencv_cudev is required"
#
include <cufft.h>
#
else
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
namespace cv { namespace cuda { namespace device
using namespace cv::cudev;
{
//////////////////////////////////////////////////////////////////////////
// mulSpectrums
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
//////////////////////////////////////////////////////////////////////////////
{
// mulSpectrums
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
namespace
{
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
__device__ __forceinline__ float real(const float2& val)
}
{
return val.x;
}
}
__device__ __forceinline__ float imag(const float2& val)
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
{
{
dim3 threads(256);
return val.y;
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
__device__ __forceinline__ float2 cmul(const float2& a, const float2& b)
{
return make_float2((real(a) * real(b)) - (imag(a) * imag(b)),
(real(a) * imag(b)) + (imag(a) * real(b)));
}
//////////////////////////////////////////////////////////////////////////
__device__ __forceinline__ float2 conj(const float2& a)
// mulSpectrums_CONJ
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
{
{
const int x = blockIdx.x * blockDim.x + threadIdx.x
;
return make_float2(real(a), -imag(a))
;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
}
if (x < c.cols && y < c.rows)
struct comlex_mul : binary_function<float2, float2, float2>
{
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x])
);
return cmul(a, b
);
}
}
}
}
;
struct comlex_mul_conj : binary_function<float2, float2, float2>
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
{
{
dim3 threads(256);
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
{
return cmul(a, conj(b));
mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
}
cudaSafeCall( cudaGetLastError() );
};
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
struct comlex_mul_scale : binary_function<float2, float2, float2>
{
float scale;
//////////////////////////////////////////////////////////////////////////
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
// mulAndScaleSpectrums
{
return scale * cmul(a, b);
}
};
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
struct comlex_mul_conj_scale : binary_function<float2, float2, float2>
{
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
float scale;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
{
cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
return scale * cmul(a, conj(b));
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
}
};
}
void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
{
(void) flags;
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
GpuMat src1 = _src1.getGpuMat();
{
GpuMat src2 = _src2.getGpuMat();
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2
);
cudaSafeCall( cudaGetLastError
() );
CV_Assert( src1.size() == src2.size
() );
if (stream)
_dst.create(src1.size(), CV_32FC2);
cudaSafeCall( cudaDeviceSynchronize() );
GpuMat dst = _dst.getGpuMat();
}
if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
}
//////////////////////////////////////////////////////////////////////////
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
// mulAndScaleSpectrums_CONJ
{
(void) flags;
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
GpuMat src1 = _src1.getGpuMat();
{
GpuMat src2 = _src2.getGpuMat();
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
{
CV_Assert( src1.size() == src2.size() );
cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream
)
if (conjB
)
{
{
dim3 threads(256);
comlex_mul_conj_scale op;
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}} // namespace cv { namespace cuda { namespace cudev
else
{
#endif // HAVE_CUFFT
comlex_mul_scale op;
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
}
#endif
/* CUDA_DISABLER */
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment