Commit 75626ed7 authored by Jon Skeet's avatar Jon Skeet

Add C# codegen changes to enum value names (mostly C++)

Overview of changes:
- A new C#-specific command-line option, legacy_enum_values to revert to the old behavior
- When legacy_enum_values isn't specified, we strip the enum name as a prefix, and PascalCase the value name
- A new attribute within the C# code so that we can always tell the original in-proto name

Regenerating the C# code with legacy_enum_values leads to code which still compiles and works - but
there's more still to do.
parent 2a197b3e
...@@ -154,6 +154,7 @@ csharp_EXTRA_DIST= \ ...@@ -154,6 +154,7 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf/Reflection/MethodDescriptor.cs \ csharp/src/Google.Protobuf/Reflection/MethodDescriptor.cs \
csharp/src/Google.Protobuf/Reflection/OneofAccessor.cs \ csharp/src/Google.Protobuf/Reflection/OneofAccessor.cs \
csharp/src/Google.Protobuf/Reflection/OneofDescriptor.cs \ csharp/src/Google.Protobuf/Reflection/OneofDescriptor.cs \
csharp/src/Google.Protobuf/Reflection/OriginalNameAttribute.cs \
csharp/src/Google.Protobuf/Reflection/PackageDescriptor.cs \ csharp/src/Google.Protobuf/Reflection/PackageDescriptor.cs \
csharp/src/Google.Protobuf/Reflection/PartialClasses.cs \ csharp/src/Google.Protobuf/Reflection/PartialClasses.cs \
csharp/src/Google.Protobuf/Reflection/ReflectionUtil.cs \ csharp/src/Google.Protobuf/Reflection/ReflectionUtil.cs \
......
...@@ -117,6 +117,7 @@ ...@@ -117,6 +117,7 @@
<Compile Include="Reflection\MethodDescriptor.cs" /> <Compile Include="Reflection\MethodDescriptor.cs" />
<Compile Include="Reflection\OneofAccessor.cs" /> <Compile Include="Reflection\OneofAccessor.cs" />
<Compile Include="Reflection\OneofDescriptor.cs" /> <Compile Include="Reflection\OneofDescriptor.cs" />
<Compile Include="Reflection\OriginalNameAttribute.cs" />
<Compile Include="Reflection\PackageDescriptor.cs" /> <Compile Include="Reflection\PackageDescriptor.cs" />
<Compile Include="Reflection\PartialClasses.cs" /> <Compile Include="Reflection\PartialClasses.cs" />
<Compile Include="Reflection\ReflectionUtil.cs" /> <Compile Include="Reflection\ReflectionUtil.cs" />
......
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion
using System;
namespace Google.Protobuf.Reflection
{
/// <summary>
/// Specifies the original name (in the .proto file) of a named element,
/// such as an enum value.
/// </summary>
[AttributeUsage(AttributeTargets.Field)]
public class OriginalNameAttribute : Attribute
{
/// <summary>
/// The name of the element in the .proto file.
/// </summary>
public string Name { get; set; }
/// <summary>
/// Constructs a new attribute instance for the given name.
/// </summary>
/// <param name="name">The name of the element in the .proto file.</param>
public OriginalNameAttribute(string name)
{
Name = ProtoPreconditions.CheckNotNull(name, nameof(name));
}
}
}
\ No newline at end of file
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <google/protobuf/compiler/csharp/csharp_doc_comment.h> #include <google/protobuf/compiler/csharp/csharp_doc_comment.h>
#include <google/protobuf/compiler/csharp/csharp_enum.h> #include <google/protobuf/compiler/csharp/csharp_enum.h>
#include <google/protobuf/compiler/csharp/csharp_helpers.h> #include <google/protobuf/compiler/csharp/csharp_helpers.h>
#include <google/protobuf/compiler/csharp/csharp_options.h>
using google::protobuf::internal::scoped_ptr; using google::protobuf::internal::scoped_ptr;
...@@ -64,10 +65,23 @@ void EnumGenerator::Generate(io::Printer* printer) { ...@@ -64,10 +65,23 @@ void EnumGenerator::Generate(io::Printer* printer) {
"access_level", class_access_level(), "access_level", class_access_level(),
"name", descriptor_->name()); "name", descriptor_->name());
printer->Indent(); printer->Indent();
std::set<string> used_names;
for (int i = 0; i < descriptor_->value_count(); i++) { for (int i = 0; i < descriptor_->value_count(); i++) {
WriteEnumValueDocComment(printer, descriptor_->value(i)); WriteEnumValueDocComment(printer, descriptor_->value(i));
printer->Print("$name$ = $number$,\n", string original_name = descriptor_->value(i)->name();
"name", descriptor_->value(i)->name(), string name = options()->legacy_enum_values
? descriptor_->value(i)->name()
: GetEnumValueName(descriptor_->name(), descriptor_->value(i)->name());
// Make sure we don't get any duplicate names due to prefix removal.
while (!used_names.insert(name).second) {
// It's possible we'll end up giving this warning multiple times, but that's better than not at all.
GOOGLE_LOG(WARNING) << "Duplicate enum value " << name << " (originally " << original_name
<< ") in " << descriptor_->name() << "; adding underscore to distinguish";
name += "_";
}
printer->Print("[pbr::OriginalName(\"$original_name$\")] $name$ = $number$,\n",
"original_name", original_name,
"name", name,
"number", SimpleItoa(descriptor_->value(i)->number())); "number", SimpleItoa(descriptor_->value(i)->number()));
} }
printer->Outdent(); printer->Outdent();
......
...@@ -83,6 +83,9 @@ bool Generator::Generate( ...@@ -83,6 +83,9 @@ bool Generator::Generate(
cli_options.base_namespace_specified = true; cli_options.base_namespace_specified = true;
} else if (options[i].first == "internal_access") { } else if (options[i].first == "internal_access") {
cli_options.internal_access = true; cli_options.internal_access = true;
} else if (options[i].first == "legacy_enum_values") {
// TODO: Remove this before final release
cli_options.legacy_enum_values = true;
} else { } else {
*error = "Unknown generator option: " + options[i].first; *error = "Unknown generator option: " + options[i].first;
return false; return false;
......
...@@ -30,8 +30,8 @@ ...@@ -30,8 +30,8 @@
#include <memory> #include <memory>
#include <google/protobuf/compiler/ruby/ruby_generator.h>
#include <google/protobuf/compiler/command_line_interface.h> #include <google/protobuf/compiler/command_line_interface.h>
#include <google/protobuf/compiler/csharp/csharp_helpers.h>
#include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/printer.h> #include <google/protobuf/io/printer.h>
...@@ -45,7 +45,23 @@ namespace compiler { ...@@ -45,7 +45,23 @@ namespace compiler {
namespace csharp { namespace csharp {
namespace { namespace {
// TODO(jtattermusch): add some tests. TEST(CSharpEnumValue, PascalCasedPrefixStripping) {
EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR"));
EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ"));
EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO_BAR"));
EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO__BAR"));
EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "FOO_BAR_BAZ"));
EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "Foo_BarBaz"));
EXPECT_EQ("Bar", GetEnumValueName("FO_O", "FOO_BAR"));
EXPECT_EQ("Bar", GetEnumValueName("FOO", "F_O_O_BAR"));
EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR"));
EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ"));
EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO"));
EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO___"));
// Identifiers can't start with digits
EXPECT_EQ("_2Bar", GetEnumValueName("Foo", "FOO_2_BAR"));
EXPECT_EQ("_2", GetEnumValueName("Foo", "FOO___2"));
}
} // namespace } // namespace
} // namespace csharp } // namespace csharp
......
...@@ -178,6 +178,99 @@ std::string UnderscoresToPascalCase(const std::string& input) { ...@@ -178,6 +178,99 @@ std::string UnderscoresToPascalCase(const std::string& input) {
return UnderscoresToCamelCase(input, true); return UnderscoresToCamelCase(input, true);
} }
// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
// into a PascalCase string. Precise rules implemented:
// Previous input character Current character Case
// Any Non-alphanumeric Skipped
// None - first char of input Alphanumeric Upper
// Non-letter (e.g. _ or 1) Alphanumeric Upper
// Numeric Alphanumeric Upper
// Lower letter Alphanumeric Same as current
// Upper letter Alphanumeric Lower
std::string ShoutyToPascalCase(const std::string& input) {
string result;
// Simple way of implementing "always start with upper"
char previous = '_';
for (int i = 0; i < input.size(); i++) {
char current = input[i];
if (!ascii_isalnum(current)) {
previous = current;
continue;
}
if (!ascii_isalnum(previous)) {
result += ascii_toupper(current);
} else if (ascii_isdigit(previous)) {
result += ascii_toupper(current);
} else if (ascii_islower(previous)) {
result += current;
} else {
result += ascii_tolower(current);
}
previous = current;
}
return result;
}
// Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
// (foo, foo_bar) => bar - underscore after prefix is skipped
// (FOO, foo_bar) => bar - casing is ignored
// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
// (foobar, foo_barbaz) => baz - underscore in value is ignored
// (foo, bar) => bar - prefix isn't matched; return original value
std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
// First normalize to a lower-case no-underscores prefix to match against
std::string prefix_to_match = "";
for (size_t i = 0; i < prefix.size(); i++) {
if (prefix[i] != '_') {
prefix_to_match += ascii_tolower(prefix[i]);
}
}
// This keeps track of how much of value we've consumed
size_t prefix_index, value_index;
for (prefix_index = 0, value_index = 0;
prefix_index < prefix_to_match.size() && value_index < value.size();
value_index++) {
// Skip over underscores in the value
if (value[value_index] == '_') {
continue;
}
if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
// Failed to match the prefix - bail out early.
return value;
}
}
// If we didn't finish looking through the prefix, we can't strip it.
if (prefix_index < prefix_to_match.size()) {
return value;
}
// Step over any underscores after the prefix
while (value_index < value.size() && value[value_index] == '_') {
value_index++;
}
// If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
if (value_index == value.size()) {
return value;
}
return value.substr(value_index);
}
std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
std::string result = ShoutyToPascalCase(stripped);
// Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
// string is a valid identifier.
if (ascii_isdigit(result[0])) {
result = "_" + result;
}
return result;
}
std::string ToCSharpName(const std::string& name, const FileDescriptor* file) { std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
std::string result = GetFileNamespace(file); std::string result = GetFileNamespace(file);
if (result != "") { if (result != "") {
......
...@@ -93,6 +93,8 @@ inline std::string UnderscoresToCamelCase(const std::string& input, bool cap_nex ...@@ -93,6 +93,8 @@ inline std::string UnderscoresToCamelCase(const std::string& input, bool cap_nex
std::string UnderscoresToPascalCase(const std::string& input); std::string UnderscoresToPascalCase(const std::string& input);
std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name);
// TODO(jtattermusch): perhaps we could move this to strutil // TODO(jtattermusch): perhaps we could move this to strutil
std::string StringToBase64(const std::string& input); std::string StringToBase64(const std::string& input);
......
...@@ -45,7 +45,8 @@ struct Options { ...@@ -45,7 +45,8 @@ struct Options {
file_extension(".cs"), file_extension(".cs"),
base_namespace(""), base_namespace(""),
base_namespace_specified(false), base_namespace_specified(false),
internal_access(false) { internal_access(false),
legacy_enum_values(false) {
} }
// Extension of the generated file. Defaults to ".cs" // Extension of the generated file. Defaults to ".cs"
string file_extension; string file_extension;
...@@ -68,6 +69,12 @@ struct Options { ...@@ -68,6 +69,12 @@ struct Options {
// Whether the generated classes should have accessibility level of "internal". // Whether the generated classes should have accessibility level of "internal".
// Defaults to false that generates "public" classes. // Defaults to false that generates "public" classes.
bool internal_access; bool internal_access;
// By default, C# codegen now uses PascalCased enum values names, after
// removing the enum type name as a prefix (if it *is* a prefix of the value).
// Setting this option reverts to the previous behavior of just copying the
// value name specified in the .proto file, allowing gradual migration.
// This option will be removed before final release.
bool legacy_enum_values;
}; };
} // namespace csharp } // namespace csharp
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment