Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6c862fae
Commit
6c862fae
authored
Mar 19, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #14099 from seiko2plus:vsx_improvements_3
parents
ce1432e4
4fe2d9bd
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
72 additions
and
43 deletions
+72
-43
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+72
-43
No files found.
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
6c862fae
...
...
@@ -1077,100 +1077,129 @@ inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_loa
inline
v_int32x4
v_lut
(
const
int
*
tab
,
const
v_int32x4
&
idxvec
)
{
int
CV_DECL_ALIGNED
(
32
)
idx
[
4
];
v_store_aligned
(
idx
,
idxvec
);
const
int
idx
[
4
]
=
{
vec_extract
(
idxvec
.
val
,
0
),
vec_extract
(
idxvec
.
val
,
1
),
vec_extract
(
idxvec
.
val
,
2
),
vec_extract
(
idxvec
.
val
,
3
)
};
return
v_int32x4
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]],
tab
[
idx
[
2
]],
tab
[
idx
[
3
]]);
}
inline
v_uint32x4
v_lut
(
const
unsigned
*
tab
,
const
v_int32x4
&
idxvec
)
{
int
CV_DECL_ALIGNED
(
32
)
idx
[
4
];
v_store_aligned
(
idx
,
idxvec
);
const
int
idx
[
4
]
=
{
vec_extract
(
idxvec
.
val
,
0
),
vec_extract
(
idxvec
.
val
,
1
),
vec_extract
(
idxvec
.
val
,
2
),
vec_extract
(
idxvec
.
val
,
3
)
};
return
v_uint32x4
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]],
tab
[
idx
[
2
]],
tab
[
idx
[
3
]]);
}
inline
v_float32x4
v_lut
(
const
float
*
tab
,
const
v_int32x4
&
idxvec
)
{
int
CV_DECL_ALIGNED
(
32
)
idx
[
4
];
v_store_aligned
(
idx
,
idxvec
);
const
int
idx
[
4
]
=
{
vec_extract
(
idxvec
.
val
,
0
),
vec_extract
(
idxvec
.
val
,
1
),
vec_extract
(
idxvec
.
val
,
2
),
vec_extract
(
idxvec
.
val
,
3
)
};
return
v_float32x4
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]],
tab
[
idx
[
2
]],
tab
[
idx
[
3
]]);
}
inline
v_float64x2
v_lut
(
const
double
*
tab
,
const
v_int32x4
&
idxvec
)
{
int
CV_DECL_ALIGNED
(
32
)
idx
[
4
];
v_store_aligned
(
idx
,
idxvec
);
const
int
idx
[
2
]
=
{
vec_extract
(
idxvec
.
val
,
0
),
vec_extract
(
idxvec
.
val
,
1
)
};
return
v_float64x2
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]]);
}
inline
void
v_lut_deinterleave
(
const
float
*
tab
,
const
v_int32x4
&
idxvec
,
v_float32x4
&
x
,
v_float32x4
&
y
)
{
int
CV_DECL_ALIGNED
(
32
)
idx
[
4
];
v_store_aligned
(
idx
,
idxvec
);
x
=
v_float32x4
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]],
tab
[
idx
[
2
]],
tab
[
idx
[
3
]]);
y
=
v_float32x4
(
tab
[
idx
[
0
]
+
1
],
tab
[
idx
[
1
]
+
1
],
tab
[
idx
[
2
]
+
1
],
tab
[
idx
[
3
]
+
1
]);
vec_float4
xy0
=
vec_ld_l8
(
tab
+
vec_extract
(
idxvec
.
val
,
0
));
vec_float4
xy1
=
vec_ld_l8
(
tab
+
vec_extract
(
idxvec
.
val
,
1
));
vec_float4
xy2
=
vec_ld_l8
(
tab
+
vec_extract
(
idxvec
.
val
,
2
));
vec_float4
xy3
=
vec_ld_l8
(
tab
+
vec_extract
(
idxvec
.
val
,
3
));
vec_float4
xy02
=
vec_mergeh
(
xy0
,
xy2
);
// x0, x2, y0, y2
vec_float4
xy13
=
vec_mergeh
(
xy1
,
xy3
);
// x1, x3, y1, y3
x
.
val
=
vec_mergeh
(
xy02
,
xy13
);
y
.
val
=
vec_mergel
(
xy02
,
xy13
);
}
inline
void
v_lut_deinterleave
(
const
double
*
tab
,
const
v_int32x4
&
idxvec
,
v_float64x2
&
x
,
v_float64x2
&
y
)
{
int
CV_DECL_ALIGNED
(
32
)
idx
[
4
]
;
v
_store_aligned
(
idx
,
idxvec
);
x
=
v_float64x2
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]]
);
y
=
v_float64x2
(
tab
[
idx
[
0
]
+
1
],
tab
[
idx
[
1
]
+
1
]
);
vec_double2
xy0
=
vsx_ld
(
vec_extract
(
idxvec
.
val
,
0
),
tab
)
;
v
ec_double2
xy1
=
vsx_ld
(
vec_extract
(
idxvec
.
val
,
1
),
tab
);
x
.
val
=
vec_mergeh
(
xy0
,
xy1
);
y
.
val
=
vec_mergel
(
xy0
,
xy1
);
}
inline
v_int8x16
v_interleave_pairs
(
const
v_int8x16
&
vec
)
{
vec_short8
vec0
=
vec_mergeh
((
vec_short8
)
vec
.
val
,
(
vec_short8
)
vec_mergesql
(
vec
.
val
,
vec
.
val
));
vec0
=
vec_mergeh
(
vec0
,
vec_mergesql
(
vec0
,
vec0
));
return
v_int8x16
(
vec_mergeh
((
vec_char16
)
vec0
,
(
vec_char16
)
vec_mergesql
(
vec0
,
vec0
)));
static
const
vec_uchar16
perm
=
{
0
,
2
,
1
,
3
,
4
,
6
,
5
,
7
,
8
,
10
,
9
,
11
,
12
,
14
,
13
,
15
};
return
v_int8x16
(
vec_perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint8x16
v_interleave_pairs
(
const
v_uint8x16
&
vec
)
{
return
v_reinterpret_as_u8
(
v_interleave_pairs
(
v_reinterpret_as_s8
(
vec
)));
}
inline
v_uint8x16
v_interleave_pairs
(
const
v_uint8x16
&
vec
)
{
return
v_reinterpret_as_u8
(
v_interleave_pairs
(
v_reinterpret_as_s8
(
vec
)));
}
inline
v_int8x16
v_interleave_quads
(
const
v_int8x16
&
vec
)
{
vec_char16
vec0
=
(
vec_char16
)
vec_mergeh
((
vec_int4
)
vec
.
val
,
(
vec_int4
)
vec_mergesql
(
vec
.
val
,
vec
.
val
))
;
return
v_int8x16
(
vec_
mergeh
(
vec0
,
vec_mergesql
(
vec0
,
vec0
)
));
static
const
vec_uchar16
perm
=
{
0
,
4
,
1
,
5
,
2
,
6
,
3
,
7
,
8
,
12
,
9
,
13
,
10
,
14
,
11
,
15
}
;
return
v_int8x16
(
vec_
perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint8x16
v_interleave_quads
(
const
v_uint8x16
&
vec
)
{
return
v_reinterpret_as_u8
(
v_interleave_quads
(
v_reinterpret_as_s8
(
vec
)));
}
inline
v_uint8x16
v_interleave_quads
(
const
v_uint8x16
&
vec
)
{
return
v_reinterpret_as_u8
(
v_interleave_quads
(
v_reinterpret_as_s8
(
vec
)));
}
inline
v_int16x8
v_interleave_pairs
(
const
v_int16x8
&
vec
)
{
vec_short8
vec0
=
(
vec_short8
)
vec_mergeh
((
vec_int4
)
vec
.
val
,
(
vec_int4
)
vec_mergesql
(
vec
.
val
,
vec
.
val
))
;
return
v_int16x8
(
vec_
mergeh
(
vec0
,
vec_mergesql
(
vec0
,
vec0
)
));
static
const
vec_uchar16
perm
=
{
0
,
1
,
4
,
5
,
2
,
3
,
6
,
7
,
8
,
9
,
12
,
13
,
10
,
11
,
14
,
15
}
;
return
v_int16x8
(
vec_
perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint16x8
v_interleave_pairs
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_uint16x8
v_interleave_pairs
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int16x8
v_interleave_quads
(
const
v_int16x8
&
vec
)
{
return
v_int16x8
(
vec_mergeh
(
vec
.
val
,
vec_mergesql
(
vec
.
val
,
vec
.
val
)));
static
const
vec_uchar16
perm
=
{
0
,
1
,
8
,
9
,
2
,
3
,
10
,
11
,
4
,
5
,
12
,
13
,
6
,
7
,
14
,
15
};
return
v_int16x8
(
vec_perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint16x8
v_interleave_quads
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_uint16x8
v_interleave_quads
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int32x4
v_interleave_pairs
(
const
v_int32x4
&
vec
)
{
return
v_int32x4
(
vec_mergeh
(
vec
.
val
,
vec_mergesql
(
vec
.
val
,
vec
.
val
)));
static
const
vec_uchar16
perm
=
{
0
,
1
,
2
,
3
,
8
,
9
,
10
,
11
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
};
return
v_int32x4
(
vec_perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint32x4
v_interleave_pairs
(
const
v_uint32x4
&
vec
)
{
return
v_reinterpret_as_u32
(
v_interleave_pairs
(
v_reinterpret_as_s32
(
vec
)));
}
inline
v_float32x4
v_interleave_pairs
(
const
v_float32x4
&
vec
)
{
return
v_reinterpret_as_f32
(
v_interleave_pairs
(
v_reinterpret_as_s32
(
vec
)));
}
inline
v_uint32x4
v_interleave_pairs
(
const
v_uint32x4
&
vec
)
{
return
v_reinterpret_as_u32
(
v_interleave_pairs
(
v_reinterpret_as_s32
(
vec
)));
}
inline
v_float32x4
v_interleave_pairs
(
const
v_float32x4
&
vec
)
{
return
v_reinterpret_as_f32
(
v_interleave_pairs
(
v_reinterpret_as_s32
(
vec
)));
}
inline
v_int8x16
v_pack_triplets
(
const
v_int8x16
&
vec
)
{
schar
CV_DECL_ALIGNED
(
32
)
val
[
16
];
v_store_aligned
(
val
,
vec
);
return
v_int8x16
(
val
[
0
],
val
[
1
],
val
[
2
],
val
[
4
],
val
[
5
],
val
[
6
],
val
[
8
],
val
[
9
],
val
[
10
],
val
[
12
],
val
[
13
],
val
[
14
],
val
[
15
],
val
[
15
],
val
[
15
],
val
[
15
]);
static
const
vec_uchar16
perm
=
{
0
,
1
,
2
,
4
,
5
,
6
,
8
,
9
,
10
,
12
,
13
,
14
,
15
,
15
,
15
,
15
};
return
v_int8x16
(
vec_perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint8x16
v_pack_triplets
(
const
v_uint8x16
&
vec
)
{
return
v_reinterpret_as_u8
(
v_pack_triplets
(
v_reinterpret_as_s8
(
vec
)));
}
inline
v_uint8x16
v_pack_triplets
(
const
v_uint8x16
&
vec
)
{
return
v_reinterpret_as_u8
(
v_pack_triplets
(
v_reinterpret_as_s8
(
vec
)));
}
inline
v_int16x8
v_pack_triplets
(
const
v_int16x8
&
vec
)
{
short
CV_DECL_ALIGNED
(
32
)
val
[
8
];
v_store_aligned
(
val
,
vec
);
return
v_int16x8
(
val
[
0
],
val
[
1
],
val
[
2
],
val
[
4
],
val
[
5
],
val
[
6
],
val
[
7
],
val
[
7
]);
static
const
vec_uchar16
perm
=
{
0
,
1
,
2
,
3
,
4
,
5
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
14
,
15
};
return
v_int16x8
(
vec_perm
(
vec
.
val
,
vec
.
val
,
perm
));
}
inline
v_uint16x8
v_pack_triplets
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_pack_triplets
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_uint16x8
v_pack_triplets
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_pack_triplets
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int32x4
v_pack_triplets
(
const
v_int32x4
&
vec
)
{
return
vec
;
}
inline
v_uint32x4
v_pack_triplets
(
const
v_uint32x4
&
vec
)
{
return
vec
;
}
inline
v_float32x4
v_pack_triplets
(
const
v_float32x4
&
vec
)
{
return
vec
;
}
inline
v_int32x4
v_pack_triplets
(
const
v_int32x4
&
vec
)
{
return
vec
;
}
inline
v_uint32x4
v_pack_triplets
(
const
v_uint32x4
&
vec
)
{
return
vec
;
}
inline
v_float32x4
v_pack_triplets
(
const
v_float32x4
&
vec
)
{
return
vec
;
}
/////// FP16 support ////////
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment