Refactoring of idl_parser (#4948)

* Refactoring of numbers parser More accurate parse of float and double. Hexadecimal floats. Check "out-of-range" of uint64 fields. Check correctness of default values and metadata. * Remove locale-independent code strtod/strtof from PR #4948. * small optimization * Add is_(ascii) functions * is_ascii cleanup * Fix format conversation * Refine number parser * Make code compatible with Android build * Remove unnecessary suppression of warning C4127

Refactoring of idl_parser (#4948)
* Refactoring of numbers parser More accurate parse of float and double. Hexadecimal floats. Check "out-of-range" of uint64 fields. Check correctness of default values and metadata. * Remove locale-independent code strtod/strtof from PR #4948. * small optimization * Add is_(ascii) functions * is_ascii cleanup * Fix format conversation * Refine number parser * Make code compatible with Android build * Remove unnecessary suppression of warning C4127
4ed6fafd · Vladimir Glavnyy · Wouter van Oortmerssen · 53ce80ce · 4ed6fafd · 4ed6fafd
Commit 4ed6fafd authored Oct 11, 2018 by Vladimir Glavnyy Committed by Wouter van Oortmerssen Oct 11, 2018
11 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,5 @@ dart/.dart_tool/
 dart/build/
 dart/doc/api/
 Cargo.lock
+.corpus**
+.seed**
--- a/docs/source/Grammar.md
+++ b/docs/source/Grammar.md
@@ -49,11 +49,26 @@ file_extension_decl = `file_extension` string\_constant `;`

 file_identifier_decl = `file_identifier` string\_constant `;`

-integer\_constant = `-?[0-9]+` | `true` | `false`
-
-float\_constant = `-?[0-9]+.[0-9]+((e|E)(+|-)?[0-9]+)?`
-
 string\_constant = `\".*?\"`

 ident = `[a-zA-Z_][a-zA-Z0-9_]*`

+`[:digit:]` = `[0-9]`
+
+`[:xdigit:]` = `[0-9a-fA-F]`
+
+dec\_integer\_constant = `[-+]?[:digit:]+`
+
+hex\_integer\_constant = `[-+]?0[xX][:xdigit:]+`
+
+integer\_constant = dec\_integer\_constant | hex\_integer\_constant
+
+dec\_float\_constant = `[-+]?(([.][:digit:]+)|([:digit:]+[.][:digit:]*)|([:digit:]+))([eE][-+]?[:digit:]+)?`
+
+hex\_float\_constant = `[-+]?0[xX](([.][:xdigit:]+)|([:xdigit:]+[.][:xdigit:]*)|([:xdigit:]+))([pP][-+]?[:digit:]+)`
+
+special\_float\_constant = `[-+]?(nan|inf|infinity)`
+
+float\_constant = decimal\_float\_constant | hexadecimal\_float\_constant | special\_float\_constant
+
+boolean\_constant = `(true|false)` | (integer\_constant ? `true` : `false`)
--- a/docs/source/Schemas.md
+++ b/docs/source/Schemas.md
@@ -385,6 +385,31 @@ When parsing JSON, it recognizes the following escape codes in strings:
 It also generates these escape codes back again when generating JSON from a
 binary representation.

+When parsing numbers, the parser is more flexible than JSON.
+A format of numeric literals is more close to the C/C++.
+According to the [grammar](@ref flatbuffers_grammar), it accepts the following 
+numerical literals:
+
+-   An integer literal can have any number of leading zero `0` digits.
+    Unlike C/C++, the parser ignores a leading zero, not interpreting it as the 
+    beginning of the octal number.
+    The numbers `[081, -00094]` are equal to `[81, -94]`  decimal integers.
+-   The parser accepts unsigned and signed hexadecimal integer numbers.
+    For example: `[0x123, +0x45, -0x67]` are equal to `[291, 69, -103]` decimals.
+-   The format of float-point numbers is fully compatible with C/C++ format.
+    If a modern C++ compiler is used the parser accepts hexadecimal and special 
+    float-point literals as well:
+    `[-1.0, 2., .3e0, 3.e4, 0x21.34p-5, -inf, nan]`.
+    The exponent suffix of hexadecimal float-point number is mandatory.
+    
+    Extended float-point support was tested with:
+    - x64 Windows: `MSVC2015` and higher.
+    - x64 Linux: `LLVM 6.0`, `GCC 4.9` and higher.
+
+-   For compatibility with a JSON lint tool all numeric literals of scalar 
+    fields can be wrapped to quoted string:
+    `"1", "2.0", "0x48A", "0x0C.0Ep-1", "-inf", "true"`.
+
 ## Guidelines

 ### Efficiency

--- a/include/flatbuffers/base.h
+++ b/include/flatbuffers/base.h
@@ -180,6 +180,17 @@
  #endif // __has_include
 #endif // !FLATBUFFERS_HAS_STRING_VIEW

+#ifndef FLATBUFFERS_HAS_NEW_STRTOD
+  // Modern (C++11) strtod and strtof functions are available for use.
+  // 1) nan/inf strings as argument of strtod;
+  // 2) hex-float  as argument of  strtod/strtof.
+  #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+      (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
+      (defined(__clang__))
+    #define FLATBUFFERS_HAS_NEW_STRTOD 1
+  #endif
+#endif // !FLATBUFFERS_HAS_NEW_STRTOD
+
 /// @endcond

 /// @file

--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -484,7 +484,11 @@ struct IDLOptions {
 // This encapsulates where the parser is in the current source file.
 struct ParserState {
  ParserState()
-      : cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1) {}
+      : cursor_(nullptr),
+        line_start_(nullptr),
+        line_(0),
+        token_(-1),
+        attr_is_trivial_ascii_string_(true) {}

 protected:
  void ResetState(const char *source) {
@@ -508,6 +512,10 @@ struct ParserState {
  int line_;  // the current line being parsed
  int token_;

+  // Flag: text in attribute_ is true ASCII string without escape
+  // sequences. Only printable ASCII (without [\t\r\n]).
+  // Used for number-in-string (and base64 string in future).
+  bool attr_is_trivial_ascii_string_;
  std::string attribute_;
  std::vector<std::string> doc_comment_;
 };
@@ -644,7 +652,8 @@ class Parser : public ParserState {
  bool ParseFlexBuffer(const char *source, const char *source_filename,
                       flexbuffers::Builder *builder);

-  FLATBUFFERS_CHECKED_ERROR CheckInRange(int64_t val, int64_t min, int64_t max);
+  FLATBUFFERS_CHECKED_ERROR InvalidNumber(const char *number,
+                                          const std::string &msg);

  StructDef *LookupStruct(const std::string &id) const;

@@ -711,7 +720,7 @@ class Parser : public ParserState {
                                          BaseType req, bool *destmatch);
  FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field);
  FLATBUFFERS_CHECKED_ERROR TokenError();
-  FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e);
+  FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
  FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(Type &type, int64_t *result);
  StructDef *LookupCreateStruct(const std::string &name,
                                bool create_if_new = true,

--- a/include/flatbuffers/stl_emulation.h
+++ b/include/flatbuffers/stl_emulation.h
@@ -37,9 +37,9 @@
 // Not possible if Microsoft Compiler before 2012
 // Possible is the language feature __cpp_alias_templates is defined well
 // Or possible if the C++ std is C+11 or newer
-#if !(defined(_MSC_VER) && _MSC_VER <= 1700 /* MSVC2012 */) \
-  && ((defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
-    || (defined(__cplusplus) && __cplusplus >= 201103L))
+#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
+    || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
+    || (defined(__cplusplus) && __cplusplus >= 201103L)
  #define FLATBUFFERS_TEMPLATES_ALIASES
 #endif

@@ -88,12 +88,33 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
 #else
  template <typename T> class numeric_limits :
-      public std::numeric_limits<T> {};
+      public std::numeric_limits<T> {
+    public:
+      // Android NDK fix.
+      static T lowest() {
+        return std::numeric_limits<T>::min();
+      }
+  };
+
+  template <> class numeric_limits<float> : 
+      public std::numeric_limits<float> {
+    public:
+      static float lowest() { return -FLT_MAX; }
+  };
+
+  template <> class numeric_limits<double> : 
+      public std::numeric_limits<double> {
+    public:
+      static double lowest() { return -DBL_MAX; }
+  };

  template <> class numeric_limits<unsigned long long> {
   public:
    static unsigned long long min() { return 0ULL; }
    static unsigned long long max() { return ~0ULL; }
+    static unsigned long long lowest() {
+      return numeric_limits<unsigned long long>::min();
+    }
  };

  template <> class numeric_limits<long long> {
@@ -105,6 +126,9 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
      return static_cast<long long>(
          (1ULL << ((sizeof(long long) << 3) - 1)) - 1);
    }
+    static long long lowest() {
+      return numeric_limits<long long>::min();
+    }
  };
 #endif  // FLATBUFFERS_CPP98_STL

@@ -114,6 +138,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
    template <typename T, typename U> using is_same = std::is_same<T,U>;
    template <typename T> using is_floating_point = std::is_floating_point<T>;
    template <typename T> using is_unsigned = std::is_unsigned<T>;
+    template <typename T> using make_unsigned = std::make_unsigned<T>;
  #else
    // Map C++ TR1 templates defined by stlport.
    template <typename T> using is_scalar = std::tr1::is_scalar<T>;
@@ -121,6 +146,13 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
    template <typename T> using is_floating_point =
        std::tr1::is_floating_point<T>;
    template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
+    // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
+    template<typename T> struct make_unsigned {
+      static_assert(is_unsigned<T>::value, "Specialization not impelented!");
+      using type = T;
+    };
+    template<> struct make_unsigned<char> { using type = unsigned char; };
+    template<> struct make_unsigned<int>  { using type = unsigned int;  };
  #endif  // !FLATBUFFERS_CPP98_STL
 #else
  // MSVC 2010 doesn't support C++11 aliases.
@@ -129,6 +161,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
  template <typename T> struct is_floating_point :
        public std::is_floating_point<T> {};
  template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
+  template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
 #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)

 #ifndef FLATBUFFERS_CPP98_STL

--- a/include/flatbuffers/util.h
+++ b/include/flatbuffers/util.h
--- a/src/idl_gen_cpp.cpp
+++ b/src/idl_gen_cpp.cpp
@@ -145,7 +145,7 @@ class CppGenerator : public BaseGenerator {
    std::string guard = file_name_;
    // Remove any non-alpha-numeric characters that may appear in a filename.
    struct IsAlnum {
-      bool operator()(char c) const { return !isalnum(c); }
+      bool operator()(char c) const { return !is_alnum(c); }
    };
    guard.erase(std::remove_if(guard.begin(), guard.end(), IsAlnum()),
                guard.end());

--- a/src/idl_gen_text.cpp
+++ b/src/idl_gen_text.cpp
@@ -149,19 +149,23 @@ bool Print<const void *>(const void *val, Type type, int indent,
  return true;
 }

+template<typename T> static T GetFieldDefault(const FieldDef &fd) {
+  T val;
+  auto check = StringToNumber(fd.value.constant.c_str(), &val);
+  (void)check;
+  FLATBUFFERS_ASSERT(check);
+  return val;
+}
+
 // Generate text for a scalar field.
-template<typename T> static bool GenField(const FieldDef &fd,
-                                          const Table *table, bool fixed,
-                                          const IDLOptions &opts,
-                                          int indent,
-                                          std::string *_text) {
-  return Print(fixed ?
-    reinterpret_cast<const Struct *>(table)->GetField<T>(fd.value.offset) :
-    table->GetField<T>(fd.value.offset,
-    IsFloat(fd.value.type.base_type) ?
-    static_cast<T>(strtod(fd.value.constant.c_str(), nullptr)) :
-    static_cast<T>(StringToInt(fd.value.constant.c_str()))),
-    fd.value.type, indent, nullptr, opts, _text);
+template<typename T>
+static bool GenField(const FieldDef &fd, const Table *table, bool fixed,
+                     const IDLOptions &opts, int indent, std::string *_text) {
+  return Print(
+      fixed ? reinterpret_cast<const Struct *>(table)->GetField<T>(
+                  fd.value.offset)
+            : table->GetField<T>(fd.value.offset, GetFieldDefault<T>(fd)),
+      fd.value.type, indent, nullptr, opts, _text);
 }

 static bool GenStruct(const StructDef &struct_def, const Table *table,

--- a/src/idl_parser.cpp
+++ b/src/idl_parser.cpp
--- a/tests/test.cpp
+++ b/tests/test.cpp