Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6ad8a9c0
Commit
6ad8a9c0
authored
Aug 28, 2018
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Replaced core module calls to universal intrinsics with wide universal intrinsics
parent
aee865fe
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
37 deletions
+43
-37
arithm.cpp
modules/core/src/arithm.cpp
+43
-37
No files found.
modules/core/src/arithm.cpp
View file @
6ad8a9c0
...
...
@@ -1379,7 +1379,7 @@ struct InRange_SIMD
}
};
#if CV_SIMD
128
#if CV_SIMD
template
<>
struct
InRange_SIMD
<
uchar
>
...
...
@@ -1388,16 +1388,17 @@ struct InRange_SIMD<uchar>
uchar
*
dst
,
int
len
)
const
{
int
x
=
0
;
const
int
width
=
v_uint8
x16
::
nlanes
;
const
int
width
=
v_uint8
::
nlanes
;
for
(;
x
<=
len
-
width
;
x
+=
width
)
{
v_uint8
x16
values
=
v
_load
(
src1
+
x
);
v_uint8
x16
low
=
v
_load
(
src2
+
x
);
v_uint8
x16
high
=
v
_load
(
src3
+
x
);
v_uint8
values
=
vx
_load
(
src1
+
x
);
v_uint8
low
=
vx
_load
(
src2
+
x
);
v_uint8
high
=
vx
_load
(
src3
+
x
);
v_store
(
dst
+
x
,
(
values
>=
low
)
&
(
high
>=
values
));
}
vx_cleanup
();
return
x
;
}
};
...
...
@@ -1409,16 +1410,17 @@ struct InRange_SIMD<schar>
uchar
*
dst
,
int
len
)
const
{
int
x
=
0
;
const
int
width
=
v_int8
x16
::
nlanes
;
const
int
width
=
v_int8
::
nlanes
;
for
(;
x
<=
len
-
width
;
x
+=
width
)
{
v_int8
x16
values
=
v
_load
(
src1
+
x
);
v_int8
x16
low
=
v
_load
(
src2
+
x
);
v_int8
x16
high
=
v
_load
(
src3
+
x
);
v_int8
values
=
vx
_load
(
src1
+
x
);
v_int8
low
=
vx
_load
(
src2
+
x
);
v_int8
high
=
vx
_load
(
src3
+
x
);
v_store
((
schar
*
)(
dst
+
x
),
(
values
>=
low
)
&
(
high
>=
values
));
}
vx_cleanup
();
return
x
;
}
};
...
...
@@ -1430,20 +1432,21 @@ struct InRange_SIMD<ushort>
uchar
*
dst
,
int
len
)
const
{
int
x
=
0
;
const
int
width
=
v_uint16
x8
::
nlanes
*
2
;
const
int
width
=
v_uint16
::
nlanes
*
2
;
for
(;
x
<=
len
-
width
;
x
+=
width
)
{
v_uint16
x8
values1
=
v
_load
(
src1
+
x
);
v_uint16
x8
low1
=
v
_load
(
src2
+
x
);
v_uint16
x8
high1
=
v
_load
(
src3
+
x
);
v_uint16
values1
=
vx
_load
(
src1
+
x
);
v_uint16
low1
=
vx
_load
(
src2
+
x
);
v_uint16
high1
=
vx
_load
(
src3
+
x
);
v_uint16
x8
values2
=
v_load
(
src1
+
x
+
v_uint16x8
::
nlanes
);
v_uint16
x8
low2
=
v_load
(
src2
+
x
+
v_uint16x8
::
nlanes
);
v_uint16
x8
high2
=
v_load
(
src3
+
x
+
v_uint16x8
::
nlanes
);
v_uint16
values2
=
vx_load
(
src1
+
x
+
v_uint16
::
nlanes
);
v_uint16
low2
=
vx_load
(
src2
+
x
+
v_uint16
::
nlanes
);
v_uint16
high2
=
vx_load
(
src3
+
x
+
v_uint16
::
nlanes
);
v_store
(
dst
+
x
,
v_pack
((
values1
>=
low1
)
&
(
high1
>=
values1
),
(
values2
>=
low2
)
&
(
high2
>=
values2
)));
}
vx_cleanup
();
return
x
;
}
};
...
...
@@ -1455,20 +1458,21 @@ struct InRange_SIMD<short>
uchar
*
dst
,
int
len
)
const
{
int
x
=
0
;
const
int
width
=
(
int
)
v_int16
x8
::
nlanes
*
2
;
const
int
width
=
(
int
)
v_int16
::
nlanes
*
2
;
for
(;
x
<=
len
-
width
;
x
+=
width
)
{
v_int16
x8
values1
=
v
_load
(
src1
+
x
);
v_int16
x8
low1
=
v
_load
(
src2
+
x
);
v_int16
x8
high1
=
v
_load
(
src3
+
x
);
v_int16
values1
=
vx
_load
(
src1
+
x
);
v_int16
low1
=
vx
_load
(
src2
+
x
);
v_int16
high1
=
vx
_load
(
src3
+
x
);
v_int16
x8
values2
=
v_load
(
src1
+
x
+
v_int16x8
::
nlanes
);
v_int16
x8
low2
=
v_load
(
src2
+
x
+
v_int16x8
::
nlanes
);
v_int16
x8
high2
=
v_load
(
src3
+
x
+
v_int16x8
::
nlanes
);
v_int16
values2
=
vx_load
(
src1
+
x
+
v_int16
::
nlanes
);
v_int16
low2
=
vx_load
(
src2
+
x
+
v_int16
::
nlanes
);
v_int16
high2
=
vx_load
(
src3
+
x
+
v_int16
::
nlanes
);
v_store
((
schar
*
)(
dst
+
x
),
v_pack
((
values1
>=
low1
)
&
(
high1
>=
values1
),
(
values2
>=
low2
)
&
(
high2
>=
values2
)));
}
vx_cleanup
();
return
x
;
}
};
...
...
@@ -1480,20 +1484,21 @@ struct InRange_SIMD<int>
uchar
*
dst
,
int
len
)
const
{
int
x
=
0
;
const
int
width
=
(
int
)
v_int32
x4
::
nlanes
*
2
;
const
int
width
=
(
int
)
v_int32
::
nlanes
*
2
;
for
(;
x
<=
len
-
width
;
x
+=
width
)
{
v_int32
x4
values1
=
v
_load
(
src1
+
x
);
v_int32
x4
low1
=
v
_load
(
src2
+
x
);
v_int32
x4
high1
=
v
_load
(
src3
+
x
);
v_int32
values1
=
vx
_load
(
src1
+
x
);
v_int32
low1
=
vx
_load
(
src2
+
x
);
v_int32
high1
=
vx
_load
(
src3
+
x
);
v_int32
x4
values2
=
v_load
(
src1
+
x
+
v_int32x4
::
nlanes
);
v_int32
x4
low2
=
v_load
(
src2
+
x
+
v_int32x4
::
nlanes
);
v_int32
x4
high2
=
v_load
(
src3
+
x
+
v_int32x4
::
nlanes
);
v_int32
values2
=
vx_load
(
src1
+
x
+
v_int32
::
nlanes
);
v_int32
low2
=
vx_load
(
src2
+
x
+
v_int32
::
nlanes
);
v_int32
high2
=
vx_load
(
src3
+
x
+
v_int32
::
nlanes
);
v_pack_store
(
dst
+
x
,
v_reinterpret_as_u16
(
v_pack
((
values1
>=
low1
)
&
(
high1
>=
values1
),
(
values2
>=
low2
)
&
(
high2
>=
values2
))));
}
vx_cleanup
();
return
x
;
}
};
...
...
@@ -1505,20 +1510,21 @@ struct InRange_SIMD<float>
uchar
*
dst
,
int
len
)
const
{
int
x
=
0
;
const
int
width
=
(
int
)
v_float32
x4
::
nlanes
*
2
;
const
int
width
=
(
int
)
v_float32
::
nlanes
*
2
;
for
(;
x
<=
len
-
width
;
x
+=
width
)
{
v_float32
x4
values1
=
v
_load
(
src1
+
x
);
v_float32
x4
low1
=
v
_load
(
src2
+
x
);
v_float32
x4
high1
=
v
_load
(
src3
+
x
);
v_float32
values1
=
vx
_load
(
src1
+
x
);
v_float32
low1
=
vx
_load
(
src2
+
x
);
v_float32
high1
=
vx
_load
(
src3
+
x
);
v_float32
x4
values2
=
v_load
(
src1
+
x
+
v_float32x4
::
nlanes
);
v_float32
x4
low2
=
v_load
(
src2
+
x
+
v_float32x4
::
nlanes
);
v_float32
x4
high2
=
v_load
(
src3
+
x
+
v_float32x4
::
nlanes
);
v_float32
values2
=
vx_load
(
src1
+
x
+
v_float32
::
nlanes
);
v_float32
low2
=
vx_load
(
src2
+
x
+
v_float32
::
nlanes
);
v_float32
high2
=
vx_load
(
src3
+
x
+
v_float32
::
nlanes
);
v_pack_store
(
dst
+
x
,
v_pack
(
v_reinterpret_as_u32
((
values1
>=
low1
)
&
(
high1
>=
values1
)),
v_reinterpret_as_u32
((
values2
>=
low2
)
&
(
high2
>=
values2
))));
}
vx_cleanup
();
return
x
;
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment