Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e820c5c6
Commit
e820c5c6
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::mulSpectrums
parent
3f62e785
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
84 additions
and
179 deletions
+84
-179
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+0
-89
mul_spectrums.cu
modules/cudaarithm/src/cuda/mul_spectrums.cu
+84
-90
No files found.
modules/cudaarithm/src/arithm.cpp
View file @
e820c5c6
...
...
@@ -292,95 +292,6 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#endif
}
//////////////////////////////////////////////////////////////////////////////
// mulSpectrums
#ifdef HAVE_CUFFT
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
mulSpectrums
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
void
mulSpectrums_CONJ
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
}}}
#endif
void
cv
::
cuda
::
mulSpectrums
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
flags
,
bool
conjB
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src1
;
(
void
)
_src2
;
(
void
)
_dst
;
(
void
)
flags
;
(
void
)
conjB
;
(
void
)
stream
;
throw_no_cuda
();
#else
(
void
)
flags
;
typedef
void
(
*
Caller
)(
const
PtrStep
<
cufftComplex
>
,
const
PtrStep
<
cufftComplex
>
,
PtrStepSz
<
cufftComplex
>
,
cudaStream_t
stream
);
static
Caller
callers
[]
=
{
device
::
mulSpectrums
,
device
::
mulSpectrums_CONJ
};
GpuMat
src1
=
_src1
.
getGpuMat
();
GpuMat
src2
=
_src2
.
getGpuMat
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
type
()
==
CV_32FC2
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
CV_32FC2
);
GpuMat
dst
=
_dst
.
getGpuMat
();
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
#endif
}
//////////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
#ifdef HAVE_CUFFT
namespace
cv
{
namespace
cuda
{
namespace
device
{
void
mulAndScaleSpectrums
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
float
scale
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
void
mulAndScaleSpectrums_CONJ
(
const
PtrStep
<
cufftComplex
>
a
,
const
PtrStep
<
cufftComplex
>
b
,
float
scale
,
PtrStepSz
<
cufftComplex
>
c
,
cudaStream_t
stream
);
}}}
#endif
void
cv
::
cuda
::
mulAndScaleSpectrums
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
flags
,
float
scale
,
bool
conjB
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src1
;
(
void
)
_src2
;
(
void
)
_dst
;
(
void
)
flags
;
(
void
)
scale
;
(
void
)
conjB
;
(
void
)
stream
;
throw_no_cuda
();
#else
(
void
)
flags
;
typedef
void
(
*
Caller
)(
const
PtrStep
<
cufftComplex
>
,
const
PtrStep
<
cufftComplex
>
,
float
scale
,
PtrStepSz
<
cufftComplex
>
,
cudaStream_t
stream
);
static
Caller
callers
[]
=
{
device
::
mulAndScaleSpectrums
,
device
::
mulAndScaleSpectrums_CONJ
};
GpuMat
src1
=
_src1
.
getGpuMat
();
GpuMat
src2
=
_src2
.
getGpuMat
();
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
type
()
==
CV_32FC2
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
CV_32FC2
);
GpuMat
dst
=
_dst
.
getGpuMat
();
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
src1
,
src2
,
scale
,
dst
,
StreamAccessor
::
getStream
(
stream
));
#endif
}
//////////////////////////////////////////////////////////////////////////////
// dft
...
...
modules/cudaarithm/src/cuda/mul_spectrums.cu
View file @
e820c5c6
...
...
@@ -40,132 +40,126 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#i
nclude "cvconfig.h"
#i
fndef HAVE_OPENCV_CUDEV
#
ifdef HAVE_CUFFT
#
error "opencv_cudev is required"
#
include <cufft.h>
#
else
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
namespace cv { namespace cuda { namespace device
{
//////////////////////////////////////////////////////////////////////////
// mulSpectrums
using namespace cv::cudev;
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
//////////////////////////////////////////////////////////////////////////////
// mulSpectrums
if (x < c.cols && y < c.rows)
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
}
namespace
{
__device__ __forceinline__ float real(const float2& val)
{
return val.x;
}
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
__device__ __forceinline__ float imag(const float2& val)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
return val.y;
}
__device__ __forceinline__ float2 cmul(const float2& a, const float2& b)
{
return make_float2((real(a) * real(b)) - (imag(a) * imag(b)),
(real(a) * imag(b)) + (imag(a) * real(b)));
}
//////////////////////////////////////////////////////////////////////////
// mulSpectrums_CONJ
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
__device__ __forceinline__ float2 conj(const float2& a)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x
;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
return make_float2(real(a), -imag(a))
;
}
if (x < c.cols && y < c.rows)
struct comlex_mul : binary_function<float2, float2, float2>
{
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x])
);
return cmul(a, b
);
}
}
}
;
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
struct comlex_mul_conj : binary_function<float2, float2, float2>
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
return cmul(a, conj(b));
}
};
struct comlex_mul_scale : binary_function<float2, float2, float2>
{
float scale;
//////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
return scale * cmul(a, b);
}
};
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
struct comlex_mul_conj_scale : binary_function<float2, float2, float2>
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
float scale;
if (x < c.cols && y < c.rows)
__device__ __forceinline__ float2 operator ()(const float2& a, const float2& b) const
{
cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
return scale * cmul(a, conj(b));
}
}
};
}
void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
{
(void) flags;
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
GpuMat src1 = _src1.getGpuMat();
GpuMat src2 = _src2.getGpuMat();
mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c
);
cudaSafeCall( cudaGetLastError
() );
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2
);
CV_Assert( src1.size() == src2.size
() );
if (stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
}
//////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums_CONJ
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
{
(void) flags;
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
GpuMat src1 = _src1.getGpuMat();
GpuMat src2 = _src2.getGpuMat();
if (x < c.cols && y < c.rows)
{
cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream
)
if (conjB
)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
comlex_mul_conj_scale op;
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
}}} // namespace cv { namespace cuda { namespace cudev
#endif // HAVE_CUFFT
else
{
comlex_mul_scale op;
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
}
#endif
/* CUDA_DISABLER */
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment