Commit 12c4c223 authored by Vladimir Glavnyy's avatar Vladimir Glavnyy Committed by Wouter van Oortmerssen

Output JSON strings as natural UTF-8 text without escapes (#4710)

* Added support for the non-escaped print of utf-8 string.

* EscapeString: the first invalid symbol resets print_natural_utf8 flag to false.

* Move the test to ParseAndGenerateTextTest. Fixes.

* Removed dependence between `natural_utf8` and `allow_non_utf8` flags.
parent 85faa46f
...@@ -495,14 +495,14 @@ class Reference { ...@@ -495,14 +495,14 @@ class Reference {
if (type_ == TYPE_STRING) { if (type_ == TYPE_STRING) {
String str(Indirect(), byte_width_); String str(Indirect(), byte_width_);
if (strings_quoted) { if (strings_quoted) {
flatbuffers::EscapeString(str.c_str(), str.length(), &s, true); flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false);
} else { } else {
s.append(str.c_str(), str.length()); s.append(str.c_str(), str.length());
} }
} else if (IsKey()) { } else if (IsKey()) {
auto str = AsKey(); auto str = AsKey();
if (keys_quoted) { if (keys_quoted) {
flatbuffers::EscapeString(str, strlen(str), &s, true); flatbuffers::EscapeString(str, strlen(str), &s, true, false);
} else { } else {
s += str; s += str;
} }
......
...@@ -379,6 +379,7 @@ struct IDLOptions { ...@@ -379,6 +379,7 @@ struct IDLOptions {
std::string object_suffix; std::string object_suffix;
bool union_value_namespacing; bool union_value_namespacing;
bool allow_non_utf8; bool allow_non_utf8;
bool natural_utf8;
std::string include_prefix; std::string include_prefix;
bool keep_include_path; bool keep_include_path;
bool binary_schema_comments; bool binary_schema_comments;
...@@ -439,6 +440,7 @@ struct IDLOptions { ...@@ -439,6 +440,7 @@ struct IDLOptions {
object_suffix("T"), object_suffix("T"),
union_value_namespacing(true), union_value_namespacing(true),
allow_non_utf8(false), allow_non_utf8(false),
natural_utf8(false),
keep_include_path(false), keep_include_path(false),
binary_schema_comments(false), binary_schema_comments(false),
binary_schema_builtins(false), binary_schema_builtins(false),
......
...@@ -314,7 +314,7 @@ struct ToStringVisitor : public IterationVisitor { ...@@ -314,7 +314,7 @@ struct ToStringVisitor : public IterationVisitor {
void Float(float x) { s += NumToString(x); } void Float(float x) { s += NumToString(x); }
void Double(double x) { s += NumToString(x); } void Double(double x) { s += NumToString(x); }
void String(const struct String *str) { void String(const struct String *str) {
EscapeString(str->c_str(), str->size(), &s, true); EscapeString(str->c_str(), str->size(), &s, true, false);
} }
void Unknown(const uint8_t *) { s += "(?)"; } void Unknown(const uint8_t *) { s += "(?)"; }
void StartVector() { s += "[ "; } void StartVector() { s += "[ "; }
......
...@@ -381,7 +381,7 @@ inline std::string WordWrap(const std::string in, size_t max_length, ...@@ -381,7 +381,7 @@ inline std::string WordWrap(const std::string in, size_t max_length,
} }
inline bool EscapeString(const char *s, size_t length, std::string *_text, inline bool EscapeString(const char *s, size_t length, std::string *_text,
bool allow_non_utf8) { bool allow_non_utf8, bool natural_utf8) {
std::string &text = *_text; std::string &text = *_text;
text += "\""; text += "\"";
for (uoffset_t i = 0; i < length; i++) { for (uoffset_t i = 0; i < length; i++) {
...@@ -421,7 +421,10 @@ inline bool EscapeString(const char *s, size_t length, std::string *_text, ...@@ -421,7 +421,10 @@ inline bool EscapeString(const char *s, size_t length, std::string *_text,
return false; return false;
} }
} else { } else {
if (ucc <= 0xFFFF) { if (natural_utf8) {
// utf8 points to past all utf-8 bytes parsed
text.append(s + i, static_cast<size_t>(utf8 - s - i));
} else if (ucc <= 0xFFFF) {
// Parses as Unicode within JSON's \uXXXX range, so use that. // Parses as Unicode within JSON's \uXXXX range, so use that.
text += "\\u"; text += "\\u";
text += IntToStringHex(ucc, 4); text += IntToStringHex(ucc, 4);
......
...@@ -69,6 +69,8 @@ std::string FlatCompiler::GetUsageString(const char *program_name) const { ...@@ -69,6 +69,8 @@ std::string FlatCompiler::GetUsageString(const char *program_name) const {
" --allow-non-utf8 Pass non-UTF-8 input through parser and emit nonstandard\n" " --allow-non-utf8 Pass non-UTF-8 input through parser and emit nonstandard\n"
" \\x escapes in JSON. (Default is to raise parse error on\n" " \\x escapes in JSON. (Default is to raise parse error on\n"
" non-UTF-8 input.)\n" " non-UTF-8 input.)\n"
" --natural-utf8 Output strings with UTF-8 as human-readable strings.\n"
" By default, UTF-8 characters are printed as \\uXXXX escapes.\n"
" --defaults-json Output fields whose value is the default when\n" " --defaults-json Output fields whose value is the default when\n"
" writing JSON\n" " writing JSON\n"
" --unknown-json Allow fields in JSON that are not defined in the\n" " --unknown-json Allow fields in JSON that are not defined in the\n"
...@@ -182,6 +184,8 @@ int FlatCompiler::Compile(int argc, const char **argv) { ...@@ -182,6 +184,8 @@ int FlatCompiler::Compile(int argc, const char **argv) {
opts.strict_json = true; opts.strict_json = true;
} else if (arg == "--allow-non-utf8") { } else if (arg == "--allow-non-utf8") {
opts.allow_non_utf8 = true; opts.allow_non_utf8 = true;
} else if (arg == "--natural-utf8") {
opts.natural_utf8 = true;
} else if (arg == "--no-js-exports") { } else if (arg == "--no-js-exports") {
opts.skip_js_exports = true; opts.skip_js_exports = true;
} else if (arg == "--goog-js-export") { } else if (arg == "--goog-js-export") {
......
...@@ -119,7 +119,8 @@ bool Print<const void *>(const void *val, Type type, int indent, ...@@ -119,7 +119,8 @@ bool Print<const void *>(const void *val, Type type, int indent,
break; break;
case BASE_TYPE_STRING: { case BASE_TYPE_STRING: {
auto s = reinterpret_cast<const String *>(val); auto s = reinterpret_cast<const String *>(val);
if (!EscapeString(s->c_str(), s->Length(), _text, opts.allow_non_utf8)) { if (!EscapeString(s->c_str(), s->Length(), _text, opts.allow_non_utf8,
opts.natural_utf8)) {
return false; return false;
} }
break; break;
......
...@@ -91,7 +91,8 @@ std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data, ...@@ -91,7 +91,8 @@ std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data,
auto val = GetAnyFieldS(*table_field, fielddef, schema); auto val = GetAnyFieldS(*table_field, fielddef, schema);
if (fielddef.type()->base_type() == reflection::String) { if (fielddef.type()->base_type() == reflection::String) {
std::string esc; std::string esc;
flatbuffers::EscapeString(val.c_str(), val.length(), &esc, true); flatbuffers::EscapeString(val.c_str(), val.length(), &esc, true,
false);
val = esc; val = esc;
} }
s += fielddef.name()->str(); s += fielddef.name()->str();
......
/* /*
* Copyright 2014 Google Inc. All rights reserved. * Copyright 2014 Google Inc. All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -645,6 +645,22 @@ void ParseAndGenerateTextTest() { ...@@ -645,6 +645,22 @@ void ParseAndGenerateTextTest() {
// If this fails, check registry.lasterror_. // If this fails, check registry.lasterror_.
TEST_EQ(ok, true); TEST_EQ(ok, true);
TEST_EQ_STR(text.c_str(), jsonfile.c_str()); TEST_EQ_STR(text.c_str(), jsonfile.c_str());
// Generate text for UTF-8 strings without escapes.
std::string jsonfile_utf8;
TEST_EQ(flatbuffers::LoadFile((test_data_path + "unicode_test.json").c_str(),
false, &jsonfile_utf8),
true);
TEST_EQ(parser.Parse(jsonfile_utf8.c_str(), include_directories), true);
// To ensure it is correct, generate utf-8 text back from the binary.
std::string jsongen_utf8;
// request natural printing for utf-8 strings
parser.opts.natural_utf8 = true;
parser.opts.strict_json = true;
TEST_EQ(
GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen_utf8),
true);
TEST_EQ_STR(jsongen_utf8.c_str(), jsonfile_utf8.c_str());
} }
void ReflectionTest(uint8_t *flatbuf, size_t length) { void ReflectionTest(uint8_t *flatbuf, size_t length) {
......
{ {
"name": "unicode_test", "name": "unicode_test",
"testarrayoftables": [
{ "name": "Цлїςσδε" },
{ "name": "フムアムカモケモ" },
{ "name": "フムヤムカモケモ" },
{ "name": "㊀㊁㊂㊃㊄" },
{ "name": "☳☶☲" },
{ "name": "𡇙𝌆" }
],
"testarrayofstring": [ "testarrayofstring": [
"Цлїςσδε", "Цлїςσδε",
"フムアムカモケモ", "フムアムカモケモ",
...@@ -15,5 +7,25 @@ ...@@ -15,5 +7,25 @@
"㊀㊁㊂㊃㊄", "㊀㊁㊂㊃㊄",
"☳☶☲", "☳☶☲",
"𡇙𝌆" "𡇙𝌆"
],
"testarrayoftables": [
{
"name": "Цлїςσδε"
},
{
"name": "フムアムカモケモ"
},
{
"name": "フムヤムカモケモ"
},
{
"name": "㊀㊁㊂㊃㊄"
},
{
"name": "☳☶☲"
},
{
"name": "𡇙𝌆"
}
] ]
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment