Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
37d01e2d
Commit
37d01e2d
authored
Jul 25, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added license header, using cv::Ptr, small fixes.
parent
66ac4621
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
125 additions
and
119 deletions
+125
-119
cvdef.h
modules/core/include/opencv2/core/cvdef.h
+1
-1
dxt.cpp
modules/core/src/dxt.cpp
+97
-98
fft.cl
modules/core/src/opencl/fft.cl
+27
-20
No files found.
modules/core/include/opencv2/core/cvdef.h
View file @
37d01e2d
...
@@ -244,7 +244,7 @@ typedef signed char schar;
...
@@ -244,7 +244,7 @@ typedef signed char schar;
/* fundamental constants */
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
#define CV_PI 3.1415926535897932384626433832795
#define CV_
TWO_
PI 6.283185307179586476925286766559
#define CV_
2
PI 6.283185307179586476925286766559
#define CV_LOG2 0.69314718055994530941723212145818
#define CV_LOG2 0.69314718055994530941723212145818
/****************************************************************************************\
/****************************************************************************************\
...
...
modules/core/src/dxt.cpp
View file @
37d01e2d
...
@@ -1788,89 +1788,23 @@ namespace cv
...
@@ -1788,89 +1788,23 @@ namespace cv
enum
FftType
enum
FftType
{
{
R2R
=
0
,
R2R
=
0
,
// real to CCS in case forward transform, CCS to real otherwise
C2R
=
1
,
C2R
=
1
,
// complex to real in case inverse transform
R2C
=
2
,
R2C
=
2
,
// real to complex in case forward transform
C2C
=
3
C2C
=
3
// complex to complex
};
};
static
void
ocl_getRadixes
(
int
cols
,
std
::
vector
<
int
>&
radixes
,
std
::
vector
<
int
>&
blocks
,
int
&
min_radix
)
{
int
factors
[
34
];
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
n
=
1
;
int
factor_index
=
0
;
min_radix
=
INT_MAX
;
// 2^n transforms
if
((
factors
[
factor_index
]
&
1
)
==
0
)
{
for
(
;
n
<
factors
[
factor_index
];)
{
int
radix
=
2
,
block
=
1
;
if
(
8
*
n
<=
factors
[
0
])
radix
=
8
;
else
if
(
4
*
n
<=
factors
[
0
])
{
radix
=
4
;
if
(
cols
%
12
==
0
)
block
=
3
;
else
if
(
cols
%
8
==
0
)
block
=
2
;
}
else
{
if
(
cols
%
10
==
0
)
block
=
5
;
else
if
(
cols
%
8
==
0
)
block
=
4
;
else
if
(
cols
%
6
==
0
)
block
=
3
;
else
if
(
cols
%
4
==
0
)
block
=
2
;
}
radixes
.
push_back
(
radix
);
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
n
*=
radix
;
}
factor_index
++
;
}
// all the other transforms
for
(
;
factor_index
<
nf
;
factor_index
++
)
{
int
radix
=
factors
[
factor_index
],
block
=
1
;
if
(
radix
==
3
)
{
if
(
cols
%
12
==
0
)
block
=
4
;
else
if
(
cols
%
9
==
0
)
block
=
3
;
else
if
(
cols
%
6
==
0
)
block
=
2
;
}
else
if
(
radix
==
5
)
{
if
(
cols
%
10
==
0
)
block
=
2
;
}
radixes
.
push_back
(
radix
);
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
}
}
struct
OCL_FftPlan
struct
OCL_FftPlan
{
{
private
:
UMat
twiddles
;
UMat
twiddles
;
String
buildOptions
;
String
buildOptions
;
int
thread_count
;
int
thread_count
;
bool
status
;
public
:
int
dft_size
;
int
dft_size
;
bool
status
;
OCL_FftPlan
(
int
_size
)
:
dft_size
(
_size
),
status
(
true
)
OCL_FftPlan
(
int
_size
)
:
dft_size
(
_size
),
status
(
true
)
{
{
int
min_radix
;
int
min_radix
;
...
@@ -1910,7 +1844,7 @@ struct OCL_FftPlan
...
@@ -1910,7 +1844,7 @@ struct OCL_FftPlan
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
{
{
double
theta
=
-
CV_
TWO_
PI
*
j
/
n
;
double
theta
=
-
CV_
2
PI
*
j
/
n
;
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
{
{
...
@@ -1922,7 +1856,7 @@ struct OCL_FftPlan
...
@@ -1922,7 +1856,7 @@ struct OCL_FftPlan
twiddles
=
tw
.
getUMat
(
ACCESS_READ
);
twiddles
=
tw
.
getUMat
(
ACCESS_READ
);
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
dft_size
,
min_radix
,
radix_processing
.
c_str
());
}
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
...
@@ -1982,6 +1916,76 @@ struct OCL_FftPlan
...
@@ -1982,6 +1916,76 @@ struct OCL_FftPlan
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
num_dfts
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
num_dfts
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
}
private
:
static
void
ocl_getRadixes
(
int
cols
,
std
::
vector
<
int
>&
radixes
,
std
::
vector
<
int
>&
blocks
,
int
&
min_radix
)
{
int
factors
[
34
];
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
n
=
1
;
int
factor_index
=
0
;
min_radix
=
INT_MAX
;
// 2^n transforms
if
((
factors
[
factor_index
]
&
1
)
==
0
)
{
for
(
;
n
<
factors
[
factor_index
];)
{
int
radix
=
2
,
block
=
1
;
if
(
8
*
n
<=
factors
[
0
])
radix
=
8
;
else
if
(
4
*
n
<=
factors
[
0
])
{
radix
=
4
;
if
(
cols
%
12
==
0
)
block
=
3
;
else
if
(
cols
%
8
==
0
)
block
=
2
;
}
else
{
if
(
cols
%
10
==
0
)
block
=
5
;
else
if
(
cols
%
8
==
0
)
block
=
4
;
else
if
(
cols
%
6
==
0
)
block
=
3
;
else
if
(
cols
%
4
==
0
)
block
=
2
;
}
radixes
.
push_back
(
radix
);
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
n
*=
radix
;
}
factor_index
++
;
}
// all the other transforms
for
(
;
factor_index
<
nf
;
factor_index
++
)
{
int
radix
=
factors
[
factor_index
],
block
=
1
;
if
(
radix
==
3
)
{
if
(
cols
%
12
==
0
)
block
=
4
;
else
if
(
cols
%
9
==
0
)
block
=
3
;
else
if
(
cols
%
6
==
0
)
block
=
2
;
}
else
if
(
radix
==
5
)
{
if
(
cols
%
10
==
0
)
block
=
2
;
}
radixes
.
push_back
(
radix
);
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
}
}
};
};
class
OCL_FftPlanCache
class
OCL_FftPlanCache
...
@@ -1993,27 +1997,24 @@ public:
...
@@ -1993,27 +1997,24 @@ public:
return
planCache
;
return
planCache
;
}
}
OCL_FftPlan
*
getFftPlan
(
int
dft_size
)
Ptr
<
OCL_FftPlan
>
getFftPlan
(
int
dft_size
)
{
{
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
{
OCL_FftPlan
*
const
plan
=
planStorage
[
i
];
Ptr
<
OCL_FftPlan
>
plan
=
planStorage
[
i
];
if
(
plan
->
dft_size
==
dft_size
)
if
(
plan
->
dft_size
==
dft_size
)
{
{
return
plan
;
return
plan
;
}
}
}
}
OCL_FftPlan
*
newPlan
=
new
OCL_FftPlan
(
dft_size
);
Ptr
<
OCL_FftPlan
>
newPlan
=
Ptr
<
OCL_FftPlan
>
(
new
OCL_FftPlan
(
dft_size
)
);
planStorage
.
push_back
(
newPlan
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
;
return
newPlan
;
}
}
~
OCL_FftPlanCache
()
~
OCL_FftPlanCache
()
{
{
for
(
std
::
vector
<
OCL_FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
planStorage
.
clear
();
}
}
...
@@ -2023,18 +2024,18 @@ protected:
...
@@ -2023,18 +2024,18 @@ protected:
{
{
}
}
std
::
vector
<
OCL_FftPlan
*
>
planStorage
;
std
::
vector
<
Ptr
<
OCL_FftPlan
>
>
planStorage
;
};
};
static
bool
ocl_dft_
C2C_
rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
,
int
fftType
)
static
bool
ocl_dft_rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
,
int
fftType
)
{
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
());
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
());
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
fftType
,
true
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
fftType
,
true
);
}
}
static
bool
ocl_dft_
C2C_
cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
,
int
fftType
)
static
bool
ocl_dft_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
,
int
fftType
)
{
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
());
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
());
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
fftType
,
false
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
fftType
,
false
);
}
}
...
@@ -2103,13 +2104,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2103,13 +2104,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if
(
!
inv
)
if
(
!
inv
)
{
{
if
(
!
ocl_dft_
C2C_
rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
if
(
!
ocl_dft_rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
return
false
;
if
(
!
is1d
)
if
(
!
is1d
)
{
{
int
nonzero_cols
=
fftType
==
R2R
?
output
.
cols
/
2
+
1
:
output
.
cols
;
int
nonzero_cols
=
fftType
==
R2R
?
output
.
cols
/
2
+
1
:
output
.
cols
;
if
(
!
ocl_dft_
C2C_
cols
(
output
,
_dst
,
nonzero_cols
,
flags
,
fftType
))
if
(
!
ocl_dft_cols
(
output
,
_dst
,
nonzero_cols
,
flags
,
fftType
))
return
false
;
return
false
;
}
}
}
}
...
@@ -2118,12 +2119,12 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2118,12 +2119,12 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if
(
fftType
==
C2C
)
if
(
fftType
==
C2C
)
{
{
// complex output
// complex output
if
(
!
ocl_dft_
C2C_
rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
if
(
!
ocl_dft_rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
return
false
;
if
(
!
is1d
)
if
(
!
is1d
)
{
{
if
(
!
ocl_dft_
C2C_
cols
(
output
,
output
,
output
.
cols
,
flags
,
fftType
))
if
(
!
ocl_dft_cols
(
output
,
output
,
output
.
cols
,
flags
,
fftType
))
return
false
;
return
false
;
}
}
}
}
...
@@ -2131,16 +2132,16 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2131,16 +2132,16 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
{
{
if
(
is1d
)
if
(
is1d
)
{
{
if
(
!
ocl_dft_
C2C_
rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
if
(
!
ocl_dft_rows
(
src
,
output
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
return
false
;
}
}
else
else
{
{
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
if
(
!
ocl_dft_
C2C_
cols
(
src
,
output
,
nonzero_cols
,
flags
,
fftType
))
if
(
!
ocl_dft_cols
(
src
,
output
,
nonzero_cols
,
flags
,
fftType
))
return
false
;
return
false
;
if
(
!
ocl_dft_
C2C_
rows
(
output
,
_dst
,
nonzero_rows
,
flags
,
fftType
))
if
(
!
ocl_dft_rows
(
output
,
_dst
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
return
false
;
}
}
}
}
...
@@ -2286,7 +2287,7 @@ public:
...
@@ -2286,7 +2287,7 @@ public:
}
}
// no baked plan is found, so let's create a new one
// no baked plan is found, so let's create a new one
FftPlan
*
newPlan
=
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
);
Ptr
<
FftPlan
>
newPlan
=
Ptr
<
FftPlan
>
(
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
)
);
planStorage
.
push_back
(
newPlan
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
return
newPlan
->
plHandle
;
...
@@ -2294,8 +2295,6 @@ public:
...
@@ -2294,8 +2295,6 @@ public:
~
PlanCache
()
~
PlanCache
()
{
{
for
(
std
::
vector
<
FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
planStorage
.
clear
();
}
}
...
@@ -2305,7 +2304,7 @@ protected:
...
@@ -2305,7 +2304,7 @@ protected:
{
{
}
}
std
::
vector
<
FftPlan
*
>
planStorage
;
std
::
vector
<
Ptr
<
FftPlan
>
>
planStorage
;
};
};
extern
"C"
{
extern
"C"
{
...
...
modules/core/src/opencl/fft.cl
View file @
37d01e2d
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
define
SQRT_2
0.707106781188f
#
define
SQRT_2
0.707106781188f
#
define
sin_120
0.866025403784f
#
define
sin_120
0.866025403784f
#
define
fft5_2
0.559016994374f
#
define
fft5_2
0.559016994374f
...
@@ -509,9 +516,9 @@ void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const
...
@@ -509,9 +516,9 @@ void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const
}
}
#
ifdef
DFT_SCALE
#
ifdef
DFT_SCALE
#
define
VAL
(
x,
scale
)
x*scale
#
define
SCALE_
VAL
(
x,
scale
)
x*scale
#
else
#
else
#
define
VAL
(
x,
scale
)
x
#
define
SCALE_
VAL
(
x,
scale
)
x
#
endif
#
endif
__kernel
void
fft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__kernel
void
fft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
...
@@ -558,15 +565,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -558,15 +565,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i=x
; i<cols; i+=block_size)
for
(
int
i=x
; i<cols; i+=block_size)
dst[i]
=
VAL
(
smem[i],
scale
)
;
dst[i]
=
SCALE_
VAL
(
smem[i],
scale
)
;
#
else
#
else
//
pack
row
to
CCS
//
pack
row
to
CCS
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
for
(
int
i=x
; i<dst_cols-1; i+=block_size)
for
(
int
i=x
; i<dst_cols-1; i+=block_size)
dst[i+1]
=
VAL
(
smem_1cn[i+2],
scale
)
;
dst[i+1]
=
SCALE_
VAL
(
smem_1cn[i+2],
scale
)
;
if
(
x
==
0
)
if
(
x
==
0
)
dst[0]
=
VAL
(
smem_1cn[0],
scale
)
;
dst[0]
=
SCALE_
VAL
(
smem_1cn[0],
scale
)
;
#
endif
#
endif
}
}
else
else
...
@@ -611,7 +618,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -611,7 +618,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
*
((
__global
float2*
)(
dst
+
i*block_size*dst_step
))
=
VAL
(
smem[y
+
i*block_size],
scale
)
;
*
((
__global
float2*
)(
dst
+
i*block_size*dst_step
))
=
SCALE_
VAL
(
smem[y
+
i*block_size],
scale
)
;
#
else
#
else
if
(
x
==
0
)
if
(
x
==
0
)
{
{
...
@@ -619,9 +626,9 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -619,9 +626,9 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y+1,
dst_step,
dst_offset
)
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y+1,
dst_step,
dst_offset
)
;
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
*
((
__global
float*
)
dst
)
=
VAL
(
smem_1cn[i+2],
scale
)
;
*
((
__global
float*
)
dst
)
=
SCALE_
VAL
(
smem_1cn[i+2],
scale
)
;
if
(
y
==
0
)
if
(
y
==
0
)
*
((
__global
float*
)
(
dst_ptr
+
dst_offset
))
=
VAL
(
smem_1cn[0],
scale
)
;
*
((
__global
float*
)
(
dst_ptr
+
dst_offset
))
=
SCALE_
VAL
(
smem_1cn[0],
scale
)
;
}
}
else
if
(
x
==
(
dst_cols+1
)
/2
)
else
if
(
x
==
(
dst_cols+1
)
/2
)
{
{
...
@@ -629,16 +636,16 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -629,16 +636,16 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
mad24
(
y+1,
dst_step,
dst_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
mad24
(
y+1,
dst_step,
dst_offset
))
;
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
for
(
int
i=y
; i<dst_rows-1; i+=block_size, dst+=dst_step*block_size)
*
((
__global
float*
)
dst
)
=
VAL
(
smem_1cn[i+2],
scale
)
;
*
((
__global
float*
)
dst
)
=
SCALE_
VAL
(
smem_1cn[i+2],
scale
)
;
if
(
y
==
0
)
if
(
y
==
0
)
*
((
__global
float*
)
(
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
dst_offset
)))
=
VAL
(
smem_1cn[0],
scale
)
;
*
((
__global
float*
)
(
dst_ptr
+
mad24
(
dst_cols-1,
(
int
)
sizeof
(
float
)
,
dst_offset
)))
=
SCALE_
VAL
(
smem_1cn[0],
scale
)
;
}
}
else
else
{
{
__global
uchar*
dst
=
dst_ptr
+
mad24
(
x,
(
int
)
sizeof
(
float
)
*2,
mad24
(
y,
dst_step,
dst_offset
-
(
int
)
sizeof
(
float
)))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
x,
(
int
)
sizeof
(
float
)
*2,
mad24
(
y,
dst_step,
dst_offset
-
(
int
)
sizeof
(
float
)))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i=y
; i<dst_rows; i+=block_size, dst+=block_size*dst_step)
for
(
int
i=y
; i<dst_rows; i+=block_size, dst+=block_size*dst_step)
vstore2
(
VAL
(
smem[i],
scale
)
,
0
,
(
__global
float*
)
dst
)
;
vstore2
(
SCALE_
VAL
(
smem[i],
scale
)
,
0
,
(
__global
float*
)
dst
)
;
}
}
#
endif
#
endif
}
}
...
@@ -724,15 +731,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -724,15 +731,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
dst[i*block_size].x
=
VAL
(
smem[x
+
i*block_size].x,
scale
)
;
dst[i*block_size].x
=
SCALE_
VAL
(
smem[x
+
i*block_size].x,
scale
)
;
dst[i*block_size].y
=
VAL
(
-smem[x
+
i*block_size].y,
scale
)
;
dst[i*block_size].y
=
SCALE_
VAL
(
-smem[x
+
i*block_size].y,
scale
)
;
}
}
#
else
#
else
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
))
,
dst_offset
)))
;
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
))
,
dst_offset
)))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
dst[i*block_size]
=
VAL
(
smem[x
+
i*block_size].x,
scale
)
;
dst[i*block_size]
=
SCALE_
VAL
(
smem[x
+
i*block_size].x,
scale
)
;
}
}
#
endif
#
endif
}
}
...
@@ -783,9 +790,9 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -783,9 +790,9 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
__global
float2*
re
z
=
(
__global
float2*
)(
dst
+
i*block_size*dst_step
)
;
__global
float2*
re
s
=
(
__global
float2*
)(
dst
+
i*block_size*dst_step
)
;
re
z
[0].x
=
smem[y
+
i*block_size].x
;
re
s
[0].x
=
smem[y
+
i*block_size].x
;
re
z
[0].y
=
-smem[y
+
i*block_size].y
;
re
s
[0].y
=
-smem[y
+
i*block_size].y
;
}
}
}
}
#
else
#
else
...
@@ -848,9 +855,9 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -848,9 +855,9 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
__global
float2*
re
z
=
(
__global
float2*
)(
dst
+
i*block_size*dst_step
)
;
__global
float2*
re
s
=
(
__global
float2*
)(
dst
+
i*block_size*dst_step
)
;
re
z
[0].x
=
smem[y
+
i*block_size].x
;
re
s
[0].x
=
smem[y
+
i*block_size].x
;
re
z
[0].y
=
-smem[y
+
i*block_size].y
;
re
s
[0].y
=
-smem[y
+
i*block_size].y
;
}
}
}
}
#
endif
#
endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment