Commit 4ed6fafd authored by Vladimir Glavnyy's avatar Vladimir Glavnyy Committed by Wouter van Oortmerssen

Refactoring of idl_parser (#4948)

* Refactoring of numbers parser

More accurate parse of float and double.
Hexadecimal floats.
Check "out-of-range" of uint64 fields.
Check correctness of default values and metadata.

* Remove locale-independent code strtod/strtof from PR #4948.

* small optimization

* Add is_(ascii) functions

* is_ascii cleanup

* Fix format conversation

* Refine number parser

* Make code compatible with Android build

* Remove unnecessary suppression of warning C4127
parent 53ce80ce
......@@ -109,3 +109,5 @@ dart/.dart_tool/
dart/build/
dart/doc/api/
Cargo.lock
.corpus**
.seed**
......@@ -49,11 +49,26 @@ file_extension_decl = `file_extension` string\_constant `;`
file_identifier_decl = `file_identifier` string\_constant `;`
integer\_constant = `-?[0-9]+` | `true` | `false`
float\_constant = `-?[0-9]+.[0-9]+((e|E)(+|-)?[0-9]+)?`
string\_constant = `\".*?\"`
ident = `[a-zA-Z_][a-zA-Z0-9_]*`
`[:digit:]` = `[0-9]`
`[:xdigit:]` = `[0-9a-fA-F]`
dec\_integer\_constant = `[-+]?[:digit:]+`
hex\_integer\_constant = `[-+]?0[xX][:xdigit:]+`
integer\_constant = dec\_integer\_constant | hex\_integer\_constant
dec\_float\_constant = `[-+]?(([.][:digit:]+)|([:digit:]+[.][:digit:]*)|([:digit:]+))([eE][-+]?[:digit:]+)?`
hex\_float\_constant = `[-+]?0[xX](([.][:xdigit:]+)|([:xdigit:]+[.][:xdigit:]*)|([:xdigit:]+))([pP][-+]?[:digit:]+)`
special\_float\_constant = `[-+]?(nan|inf|infinity)`
float\_constant = decimal\_float\_constant | hexadecimal\_float\_constant | special\_float\_constant
boolean\_constant = `(true|false)` | (integer\_constant ? `true` : `false`)
......@@ -385,6 +385,31 @@ When parsing JSON, it recognizes the following escape codes in strings:
It also generates these escape codes back again when generating JSON from a
binary representation.
When parsing numbers, the parser is more flexible than JSON.
A format of numeric literals is more close to the C/C++.
According to the [grammar](@ref flatbuffers_grammar), it accepts the following
numerical literals:
- An integer literal can have any number of leading zero `0` digits.
Unlike C/C++, the parser ignores a leading zero, not interpreting it as the
beginning of the octal number.
The numbers `[081, -00094]` are equal to `[81, -94]` decimal integers.
- The parser accepts unsigned and signed hexadecimal integer numbers.
For example: `[0x123, +0x45, -0x67]` are equal to `[291, 69, -103]` decimals.
- The format of float-point numbers is fully compatible with C/C++ format.
If a modern C++ compiler is used the parser accepts hexadecimal and special
float-point literals as well:
`[-1.0, 2., .3e0, 3.e4, 0x21.34p-5, -inf, nan]`.
The exponent suffix of hexadecimal float-point number is mandatory.
Extended float-point support was tested with:
- x64 Windows: `MSVC2015` and higher.
- x64 Linux: `LLVM 6.0`, `GCC 4.9` and higher.
- For compatibility with a JSON lint tool all numeric literals of scalar
fields can be wrapped to quoted string:
`"1", "2.0", "0x48A", "0x0C.0Ep-1", "-inf", "true"`.
## Guidelines
### Efficiency
......
......@@ -180,6 +180,17 @@
#endif // __has_include
#endif // !FLATBUFFERS_HAS_STRING_VIEW
#ifndef FLATBUFFERS_HAS_NEW_STRTOD
// Modern (C++11) strtod and strtof functions are available for use.
// 1) nan/inf strings as argument of strtod;
// 2) hex-float as argument of strtod/strtof.
#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
(defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
(defined(__clang__))
#define FLATBUFFERS_HAS_NEW_STRTOD 1
#endif
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
/// @endcond
/// @file
......
......@@ -484,7 +484,11 @@ struct IDLOptions {
// This encapsulates where the parser is in the current source file.
struct ParserState {
ParserState()
: cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1) {}
: cursor_(nullptr),
line_start_(nullptr),
line_(0),
token_(-1),
attr_is_trivial_ascii_string_(true) {}
protected:
void ResetState(const char *source) {
......@@ -508,6 +512,10 @@ struct ParserState {
int line_; // the current line being parsed
int token_;
// Flag: text in attribute_ is true ASCII string without escape
// sequences. Only printable ASCII (without [\t\r\n]).
// Used for number-in-string (and base64 string in future).
bool attr_is_trivial_ascii_string_;
std::string attribute_;
std::vector<std::string> doc_comment_;
};
......@@ -644,7 +652,8 @@ class Parser : public ParserState {
bool ParseFlexBuffer(const char *source, const char *source_filename,
flexbuffers::Builder *builder);
FLATBUFFERS_CHECKED_ERROR CheckInRange(int64_t val, int64_t min, int64_t max);
FLATBUFFERS_CHECKED_ERROR InvalidNumber(const char *number,
const std::string &msg);
StructDef *LookupStruct(const std::string &id) const;
......@@ -711,7 +720,7 @@ class Parser : public ParserState {
BaseType req, bool *destmatch);
FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field);
FLATBUFFERS_CHECKED_ERROR TokenError();
FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e);
FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(Type &type, int64_t *result);
StructDef *LookupCreateStruct(const std::string &name,
bool create_if_new = true,
......
......@@ -37,9 +37,9 @@
// Not possible if Microsoft Compiler before 2012
// Possible is the language feature __cpp_alias_templates is defined well
// Or possible if the C++ std is C+11 or newer
#if !(defined(_MSC_VER) && _MSC_VER <= 1700 /* MSVC2012 */) \
&& ((defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
|| (defined(__cplusplus) && __cplusplus >= 201103L))
#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
|| (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
|| (defined(__cplusplus) && __cplusplus >= 201103L)
#define FLATBUFFERS_TEMPLATES_ALIASES
#endif
......@@ -88,12 +88,33 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES)
#else
template <typename T> class numeric_limits :
public std::numeric_limits<T> {};
public std::numeric_limits<T> {
public:
// Android NDK fix.
static T lowest() {
return std::numeric_limits<T>::min();
}
};
template <> class numeric_limits<float> :
public std::numeric_limits<float> {
public:
static float lowest() { return -FLT_MAX; }
};
template <> class numeric_limits<double> :
public std::numeric_limits<double> {
public:
static double lowest() { return -DBL_MAX; }
};
template <> class numeric_limits<unsigned long long> {
public:
static unsigned long long min() { return 0ULL; }
static unsigned long long max() { return ~0ULL; }
static unsigned long long lowest() {
return numeric_limits<unsigned long long>::min();
}
};
template <> class numeric_limits<long long> {
......@@ -105,6 +126,9 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
return static_cast<long long>(
(1ULL << ((sizeof(long long) << 3) - 1)) - 1);
}
static long long lowest() {
return numeric_limits<long long>::min();
}
};
#endif // FLATBUFFERS_CPP98_STL
......@@ -114,6 +138,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
template <typename T, typename U> using is_same = std::is_same<T,U>;
template <typename T> using is_floating_point = std::is_floating_point<T>;
template <typename T> using is_unsigned = std::is_unsigned<T>;
template <typename T> using make_unsigned = std::make_unsigned<T>;
#else
// Map C++ TR1 templates defined by stlport.
template <typename T> using is_scalar = std::tr1::is_scalar<T>;
......@@ -121,6 +146,13 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
template <typename T> using is_floating_point =
std::tr1::is_floating_point<T>;
template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
// Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
template<typename T> struct make_unsigned {
static_assert(is_unsigned<T>::value, "Specialization not impelented!");
using type = T;
};
template<> struct make_unsigned<char> { using type = unsigned char; };
template<> struct make_unsigned<int> { using type = unsigned int; };
#endif // !FLATBUFFERS_CPP98_STL
#else
// MSVC 2010 doesn't support C++11 aliases.
......@@ -129,6 +161,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
template <typename T> struct is_floating_point :
public std::is_floating_point<T> {};
template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES)
#ifndef FLATBUFFERS_CPP98_STL
......
This diff is collapsed.
......@@ -145,7 +145,7 @@ class CppGenerator : public BaseGenerator {
std::string guard = file_name_;
// Remove any non-alpha-numeric characters that may appear in a filename.
struct IsAlnum {
bool operator()(char c) const { return !isalnum(c); }
bool operator()(char c) const { return !is_alnum(c); }
};
guard.erase(std::remove_if(guard.begin(), guard.end(), IsAlnum()),
guard.end());
......
......@@ -149,19 +149,23 @@ bool Print<const void *>(const void *val, Type type, int indent,
return true;
}
template<typename T> static T GetFieldDefault(const FieldDef &fd) {
T val;
auto check = StringToNumber(fd.value.constant.c_str(), &val);
(void)check;
FLATBUFFERS_ASSERT(check);
return val;
}
// Generate text for a scalar field.
template<typename T> static bool GenField(const FieldDef &fd,
const Table *table, bool fixed,
const IDLOptions &opts,
int indent,
std::string *_text) {
return Print(fixed ?
reinterpret_cast<const Struct *>(table)->GetField<T>(fd.value.offset) :
table->GetField<T>(fd.value.offset,
IsFloat(fd.value.type.base_type) ?
static_cast<T>(strtod(fd.value.constant.c_str(), nullptr)) :
static_cast<T>(StringToInt(fd.value.constant.c_str()))),
fd.value.type, indent, nullptr, opts, _text);
template<typename T>
static bool GenField(const FieldDef &fd, const Table *table, bool fixed,
const IDLOptions &opts, int indent, std::string *_text) {
return Print(
fixed ? reinterpret_cast<const Struct *>(table)->GetField<T>(
fd.value.offset)
: table->GetField<T>(fd.value.offset, GetFieldDefault<T>(fd)),
fd.value.type, indent, nullptr, opts, _text);
}
static bool GenStruct(const StructDef &struct_def, const Table *table,
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment