Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
39f327ac
Commit
39f327ac
authored
Nov 14, 2018
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #13157 from elatkin:el/gapi_perf_sobel_3
parents
8b7f8056
a6253948
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
270 deletions
+0
-270
gfluidimgproc_func.cpp
modules/gapi/src/backends/fluid/gfluidimgproc_func.cpp
+0
-270
No files found.
modules/gapi/src/backends/fluid/gfluidimgproc_func.cpp
deleted
100644 → 0
View file @
8b7f8056
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018 Intel Corporation
#if !defined(GAPI_STANDALONE)
#include "gfluidimgproc_func.hpp"
#include "gfluidutils.hpp"
#include <opencv2/core/hal/intrin.hpp>
#include <cmath>
#include <cstdlib>
#ifdef __GNUC__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wstrict-overflow"
#endif
namespace
cv
{
namespace
gapi
{
namespace
fluid
{
//---------------------
//
// Fluid kernels: Sobel
//
//---------------------
// Sobel 3x3: vertical pass
template
<
bool
noscale
,
typename
DST
>
void
run_sobel3x3_vert
(
DST
out
[],
int
length
,
const
float
ky
[],
float
scale
,
float
delta
,
const
int
r
[],
float
*
buf
[])
{
float
ky0
=
ky
[
0
],
ky1
=
ky
[
1
],
ky2
=
ky
[
2
];
int
r0
=
r
[
0
],
r1
=
r
[
1
],
r2
=
r
[
2
];
#if CV_SIMD
// for floating-point output,
// manual vectoring may be not better than compiler's optimization
#define EXPLICIT_SIMD_32F 0 // 1=vectorize 32f case explicitly, 0=don't
#if EXPLICIT_SIMD_32F
if
(
std
::
is_same
<
DST
,
float
>::
value
&&
length
>=
v_int16
::
nlanes
)
{
constexpr
static
int
nlanes
=
v_float32
::
nlanes
;
for
(
int
l
=
0
;
l
<
length
;
)
{
for
(;
l
<=
length
-
nlanes
;
l
+=
nlanes
)
{
v_float32
sum
=
vx_load
(
&
buf
[
r0
][
l
])
*
vx_setall_f32
(
ky0
);
sum
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
]),
vx_setall_f32
(
ky1
),
sum
);
sum
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
]),
vx_setall_f32
(
ky2
),
sum
);
if
(
!
noscale
)
{
sum
=
v_fma
(
sum
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
}
v_store
(
reinterpret_cast
<
float
*>
(
&
out
[
l
]),
sum
);
}
if
(
l
<
length
)
{
// tail: recalculate last pixels
GAPI_DbgAssert
(
length
>=
nlanes
);
l
=
length
-
nlanes
;
}
}
return
;
}
#endif
if
((
std
::
is_same
<
DST
,
short
>::
value
||
std
::
is_same
<
DST
,
ushort
>::
value
)
&&
length
>=
v_int16
::
nlanes
)
{
constexpr
static
int
nlanes
=
v_int16
::
nlanes
;
for
(
int
l
=
0
;
l
<
length
;
)
{
for
(;
l
<=
length
-
nlanes
;
l
+=
nlanes
)
{
v_float32
sum0
=
vx_load
(
&
buf
[
r0
][
l
])
*
vx_setall_f32
(
ky0
);
sum0
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
]),
vx_setall_f32
(
ky1
),
sum0
);
sum0
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
]),
vx_setall_f32
(
ky2
),
sum0
);
v_float32
sum1
=
vx_load
(
&
buf
[
r0
][
l
+
nlanes
/
2
])
*
vx_setall_f32
(
ky0
);
sum1
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
+
nlanes
/
2
]),
vx_setall_f32
(
ky1
),
sum1
);
sum1
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
+
nlanes
/
2
]),
vx_setall_f32
(
ky2
),
sum1
);
if
(
!
noscale
)
{
sum0
=
v_fma
(
sum0
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
sum1
=
v_fma
(
sum1
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
}
v_int32
isum0
=
v_round
(
sum0
),
isum1
=
v_round
(
sum1
);
if
(
std
::
is_same
<
DST
,
short
>::
value
)
{
// signed short
v_int16
res
=
v_pack
(
isum0
,
isum1
);
v_store
(
reinterpret_cast
<
short
*>
(
&
out
[
l
]),
res
);
}
else
{
// unsigned short
v_uint16
res
=
v_pack_u
(
isum0
,
isum1
);
v_store
(
reinterpret_cast
<
ushort
*>
(
&
out
[
l
]),
res
);
}
}
if
(
l
<
length
)
{
// tail: recalculate last pixels
GAPI_DbgAssert
(
length
>=
nlanes
);
l
=
length
-
nlanes
;
}
}
return
;
}
if
(
std
::
is_same
<
DST
,
uchar
>::
value
&&
length
>=
v_uint8
::
nlanes
)
{
constexpr
static
int
nlanes
=
v_uint8
::
nlanes
;
for
(
int
l
=
0
;
l
<
length
;
)
{
for
(;
l
<=
length
-
nlanes
;
l
+=
nlanes
)
{
v_float32
sum0
=
vx_load
(
&
buf
[
r0
][
l
])
*
vx_setall_f32
(
ky0
);
sum0
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
]),
vx_setall_f32
(
ky1
),
sum0
);
sum0
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
]),
vx_setall_f32
(
ky2
),
sum0
);
v_float32
sum1
=
vx_load
(
&
buf
[
r0
][
l
+
nlanes
/
4
])
*
vx_setall_f32
(
ky0
);
sum1
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
+
nlanes
/
4
]),
vx_setall_f32
(
ky1
),
sum1
);
sum1
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
+
nlanes
/
4
]),
vx_setall_f32
(
ky2
),
sum1
);
v_float32
sum2
=
vx_load
(
&
buf
[
r0
][
l
+
2
*
nlanes
/
4
])
*
vx_setall_f32
(
ky0
);
sum2
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
+
2
*
nlanes
/
4
]),
vx_setall_f32
(
ky1
),
sum2
);
sum2
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
+
2
*
nlanes
/
4
]),
vx_setall_f32
(
ky2
),
sum2
);
v_float32
sum3
=
vx_load
(
&
buf
[
r0
][
l
+
3
*
nlanes
/
4
])
*
vx_setall_f32
(
ky0
);
sum3
=
v_fma
(
vx_load
(
&
buf
[
r1
][
l
+
3
*
nlanes
/
4
]),
vx_setall_f32
(
ky1
),
sum3
);
sum3
=
v_fma
(
vx_load
(
&
buf
[
r2
][
l
+
3
*
nlanes
/
4
]),
vx_setall_f32
(
ky2
),
sum3
);
if
(
!
noscale
)
{
sum0
=
v_fma
(
sum0
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
sum1
=
v_fma
(
sum1
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
sum2
=
v_fma
(
sum2
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
sum3
=
v_fma
(
sum3
,
vx_setall_f32
(
scale
),
vx_setall_f32
(
delta
));
}
v_int32
isum0
=
v_round
(
sum0
),
isum1
=
v_round
(
sum1
),
isum2
=
v_round
(
sum2
),
isum3
=
v_round
(
sum3
);
v_int16
ires0
=
v_pack
(
isum0
,
isum1
),
ires1
=
v_pack
(
isum2
,
isum3
);
v_uint8
res
=
v_pack_u
(
ires0
,
ires1
);
v_store
(
reinterpret_cast
<
uchar
*>
(
&
out
[
l
]),
res
);
}
if
(
l
<
length
)
{
// tail: recalculate last pixels
GAPI_DbgAssert
(
length
>=
nlanes
);
l
=
length
-
nlanes
;
}
}
return
;
}
#endif
// reference code
for
(
int
l
=
0
;
l
<
length
;
l
++
)
{
float
sum
=
buf
[
r0
][
l
]
*
ky0
+
buf
[
r1
][
l
]
*
ky1
+
buf
[
r2
][
l
]
*
ky2
;
if
(
!
noscale
)
{
sum
=
sum
*
scale
+
delta
;
}
out
[
l
]
=
saturate
<
DST
>
(
sum
,
rintf
);
}
}
template
<
typename
DST
,
typename
SRC
>
void
run_sobel_impl
(
DST
out
[],
const
SRC
*
in
[],
int
width
,
int
chan
,
const
float
kx
[],
const
float
ky
[],
int
border
,
float
scale
,
float
delta
,
float
*
buf
[],
int
y
,
int
y0
)
{
int
r
[
3
];
r
[
0
]
=
(
y
-
y0
)
%
3
;
// buf[r[0]]: previous
r
[
1
]
=
(
y
-
y0
+
1
)
%
3
;
// this
r
[
2
]
=
(
y
-
y0
+
2
)
%
3
;
// next row
int
length
=
width
*
chan
;
// horizontal pass
// full horizontal pass is needed only if very 1st row in ROI;
// for 2nd and further rows, it is enough to convolve only the
// "next" row - as we can reuse buffers from previous calls to
// this kernel (note that Fluid processes rows consequently)
int
k0
=
(
y
==
y0
)
?
0
:
2
;
for
(
int
k
=
k0
;
k
<
3
;
k
++
)
{
// previous, this , next pixel
const
SRC
*
s
[
3
]
=
{
in
[
k
]
-
border
*
chan
,
in
[
k
],
in
[
k
]
+
border
*
chan
};
// rely on compiler vectoring
for
(
int
l
=
0
;
l
<
length
;
l
++
)
{
buf
[
r
[
k
]][
l
]
=
s
[
0
][
l
]
*
kx
[
0
]
+
s
[
1
][
l
]
*
kx
[
1
]
+
s
[
2
][
l
]
*
kx
[
2
];
}
}
// vertical pass
if
(
scale
==
1
&&
delta
==
0
)
{
constexpr
static
bool
noscale
=
true
;
// omit scaling
run_sobel3x3_vert
<
noscale
,
DST
>
(
out
,
length
,
ky
,
scale
,
delta
,
r
,
buf
);
}
else
{
constexpr
static
bool
noscale
=
false
;
// do scaling
run_sobel3x3_vert
<
noscale
,
DST
>
(
out
,
length
,
ky
,
scale
,
delta
,
r
,
buf
);
}
}
#define INSTANTIATE(DST, SRC) \
template void run_sobel_impl(DST out[], const SRC *in[], int width, int chan, \
const float kx[], const float ky[], int border, \
float scale, float delta, float *buf[], \
int y, int y0);
INSTANTIATE
(
uchar
,
uchar
)
INSTANTIATE
(
ushort
,
ushort
)
INSTANTIATE
(
short
,
uchar
)
INSTANTIATE
(
short
,
ushort
)
INSTANTIATE
(
short
,
short
)
INSTANTIATE
(
float
,
uchar
)
INSTANTIATE
(
float
,
ushort
)
INSTANTIATE
(
float
,
short
)
INSTANTIATE
(
float
,
float
)
#undef INSTANTIATE
}
// namespace fliud
}
// namespace gapi
}
// namespace cv
#endif // !defined(GAPI_STANDALONE)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment