Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ffmpeg
Commits
68590650
Commit
68590650
authored
Oct 02, 2013
by
Stefano Sabatini
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lavu/avstring: add av_utf8_decode() function
parent
e782eea1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
180 additions
and
1 deletion
+180
-1
APIchanges
doc/APIchanges
+3
-0
Makefile
libavutil/Makefile
+1
-0
avstring.c
libavutil/avstring.c
+64
-0
avstring.h
libavutil/avstring.h
+40
-0
utf8.c
libavutil/utf8.c
+71
-0
version.h
libavutil/version.h
+1
-1
No files found.
doc/APIchanges
View file @
68590650
...
...
@@ -15,6 +15,9 @@ libavutil: 2012-10-22
API changes, most recent first:
2013-11-XX - xxxxxxx - lavu 52.54.100 - avstring.h
Add av_utf8_decode() function.
2013-11-xx - xxxxxxx - lavc 55.44.100 - avcodec.h
Add av_packet_{un,}pack_dictionary()
Add AV_PKT_METADATA_UPDATE side data type, used to transmit key/value
...
...
libavutil/Makefile
View file @
68590650
...
...
@@ -157,6 +157,7 @@ TESTPROGS = adler32 \
sha
\
sha512
\
tree
\
utf8
\
xtea
\
TESTPROGS-$(HAVE_LZO1X_999_COMPRESS)
+=
lzo
...
...
libavutil/avstring.c
View file @
68590650
...
...
@@ -307,6 +307,70 @@ int av_isxdigit(int c)
return
av_isdigit
(
c
)
||
(
c
>=
'a'
&&
c
<=
'f'
);
}
int
av_utf8_decode
(
int32_t
*
codep
,
const
uint8_t
**
bufp
,
const
uint8_t
*
buf_end
,
unsigned
int
flags
)
{
const
uint8_t
*
p
=
*
bufp
;
uint32_t
top
;
uint64_t
code
;
int
ret
=
0
;
if
(
p
>=
buf_end
)
return
0
;
code
=
*
p
++
;
/* first sequence byte starts with 10, or is 1111-1110 or 1111-1111,
which is not admitted */
if
((
code
&
0xc0
)
==
0x80
||
code
>=
0xFE
)
{
ret
=
AVERROR
(
EILSEQ
);
goto
end
;
}
top
=
(
code
&
128
)
>>
1
;
while
(
code
&
top
)
{
int
tmp
;
if
(
p
>=
buf_end
)
{
ret
=
AVERROR
(
EILSEQ
);
/* incomplete sequence */
goto
end
;
}
/* we assume the byte to be in the form 10xx-xxxx */
tmp
=
*
p
++
-
128
;
/* strip leading 1 */
if
(
tmp
>>
6
)
{
ret
=
AVERROR
(
EILSEQ
);
goto
end
;
}
code
=
(
code
<<
6
)
+
tmp
;
top
<<=
5
;
}
code
&=
(
top
<<
1
)
-
1
;
if
(
code
>=
1
<<
31
)
{
ret
=
AVERROR
(
EILSEQ
);
/* out-of-range value */
goto
end
;
}
*
codep
=
code
;
if
(
code
>
0x10FFFF
&&
!
(
flags
&
AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES
))
ret
=
AVERROR
(
EILSEQ
);
if
(
code
<
0x20
&&
code
!=
0x9
&&
code
!=
0xA
&&
code
!=
0xD
&&
flags
&
AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES
)
ret
=
AVERROR
(
EILSEQ
);
if
(
code
>=
0xD800
&&
code
<=
0xDFFF
&&
!
(
flags
&
AV_UTF8_FLAG_ACCEPT_SURROGATES
))
ret
=
AVERROR
(
EILSEQ
);
if
(
code
==
0xFFFE
||
code
==
0xFFFF
&&
(
!
flags
&
AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS
))
ret
=
AVERROR
(
EILSEQ
);
end:
*
bufp
=
p
;
return
ret
;
}
#ifdef TEST
int
main
(
void
)
...
...
libavutil/avstring.h
View file @
68590650
...
...
@@ -22,6 +22,7 @@
#define AVUTIL_AVSTRING_H
#include <stddef.h>
#include <stdint.h>
#include "attributes.h"
/**
...
...
@@ -295,6 +296,45 @@ enum AVEscapeMode {
int
av_escape
(
char
**
dst
,
const
char
*
src
,
const
char
*
special_chars
,
enum
AVEscapeMode
mode
,
int
flags
);
#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
#define AV_UTF8_FLAG_ACCEPT_ALL \
AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
/**
* Read and decode a single UTF-8 code point (character) from the
* buffer in *buf, and update *buf to point to the next byte to
* decode.
*
* In case of an invalid byte sequence, the pointer will be updated to
* the next byte after the invalid sequence and the function will
* return an error code.
*
* Depending on the specified flags, the function will also fail in
* case the decoded code point does not belong to a valid range.
*
* @note For speed-relevant code a carefully implemented use of
* GET_UTF8() may be preferred.
*
* @param codep pointer used to return the parsed code in case of success.
* The value in *codep is set even in case the range check fails.
* @param bufp pointer to the address the first byte of the sequence
* to decode, updated by the function to point to the
* byte next after the decoded sequence
* @param buf_end pointer to the end of the buffer, points to the next
* byte past the last in the buffer. This is used to
* avoid buffer overreads (in case of an unfinished
* UTF-8 sequence towards the end of the buffer).
* @param flags a collection of AV_UTF8_FLAG_* flags
* @return >= 0 in case a sequence was successfully read, a negative
* value in case of invalid sequence
*/
int
av_utf8_decode
(
int32_t
*
codep
,
const
uint8_t
**
bufp
,
const
uint8_t
*
buf_end
,
unsigned
int
flags
);
/**
* @}
*/
...
...
libavutil/utf8.c
0 → 100644
View file @
68590650
/*
* Copyright (c) 2013 Stefano Sabatini
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include "libavutil/avstring.h"
#include "libavutil/file.h"
static
void
print_sequence
(
const
char
*
p
,
int
l
,
int
indent
)
{
int
i
;
for
(
i
=
0
;
i
<
l
;
i
++
)
printf
(
"%02X"
,
(
uint8_t
)
p
[
i
]);
printf
(
"%*s"
,
indent
-
l
*
2
,
""
);
}
int
main
(
int
argc
,
char
**
argv
)
{
int
ret
;
char
*
filename
=
argv
[
1
];
uint8_t
*
file_buf
;
size_t
file_buf_size
;
uint32_t
code
;
const
uint8_t
*
p
,
*
endp
;
ret
=
av_file_map
(
filename
,
&
file_buf
,
&
file_buf_size
,
0
,
NULL
);
if
(
ret
<
0
)
return
1
;
p
=
file_buf
;
endp
=
file_buf
+
file_buf_size
;
while
(
p
<
endp
)
{
int
l
,
r
;
const
uint8_t
*
p0
=
p
;
code
=
UINT32_MAX
;
r
=
av_utf8_decode
(
&
code
,
&
p
,
endp
,
0
);
l
=
(
int
)(
p
-
p0
);
print_sequence
(
p0
,
l
,
20
);
if
(
code
!=
UINT32_MAX
)
{
printf
(
"%-10d 0x%-10X %-5d "
,
code
,
code
,
l
);
if
(
r
>=
0
)
{
if
(
*
p0
==
'\n'
)
printf
(
"
\\
n
\n
"
);
else
printf
(
"%.*s
\n
"
,
l
,
p0
);
}
else
{
printf
(
"invalid code range
\n
"
);
}
}
else
{
printf
(
"invalid sequence
\n
"
);
}
}
av_file_unmap
(
file_buf
,
file_buf_size
);
return
0
;
}
libavutil/version.h
View file @
68590650
...
...
@@ -75,7 +75,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 52
#define LIBAVUTIL_VERSION_MINOR 5
3
#define LIBAVUTIL_VERSION_MINOR 5
4
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment