Commit b17ec3ca authored by Feng Xiao's avatar Feng Xiao

Down-integrate from internal code base.

parent eee38b0c
...@@ -99,6 +99,12 @@ set(common_test_files ...@@ -99,6 +99,12 @@ set(common_test_files
${protobuf_source_dir}/src/google/protobuf/testing/googletest.cc ${protobuf_source_dir}/src/google/protobuf/testing/googletest.cc
) )
set(common_lite_test_files
${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc
${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc
${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc
)
set(tests_files set(tests_files
${protobuf_source_dir}/src/google/protobuf/any_test.cc ${protobuf_source_dir}/src/google/protobuf/any_test.cc
${protobuf_source_dir}/src/google/protobuf/arena_unittest.cc ${protobuf_source_dir}/src/google/protobuf/arena_unittest.cc
...@@ -179,10 +185,13 @@ add_executable(test_plugin ${test_plugin_files}) ...@@ -179,10 +185,13 @@ add_executable(test_plugin ${test_plugin_files})
target_link_libraries(test_plugin libprotoc libprotobuf gmock) target_link_libraries(test_plugin libprotoc libprotobuf gmock)
set(lite_test_files set(lite_test_files
${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc
${protobuf_source_dir}/src/google/protobuf/lite_unittest.cc ${protobuf_source_dir}/src/google/protobuf/lite_unittest.cc
${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc
${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc
) )
add_executable(lite-test ${lite_test_files} ${lite_test_proto_files}) add_executable(lite-test ${lite_test_files} ${common_lite_test_files} ${lite_test_proto_files})
target_link_libraries(lite-test libprotobuf-lite) target_link_libraries(lite-test libprotobuf-lite)
set(lite_arena_test_files
${protobuf_source_dir}/src/google/protobuf/lite_arena_unittest.cc
)
add_executable(lite-arena-test ${lite_arena_test_files} ${common_lite_test_files} ${lite_test_proto_files})
target_link_libraries(lite-arena-test libprotobuf-lite gmock_main)
...@@ -142,6 +142,7 @@ ...@@ -142,6 +142,7 @@
<arg value="../src/google/protobuf/unittest_enormous_descriptor.proto" /> <arg value="../src/google/protobuf/unittest_enormous_descriptor.proto" />
<arg value="../src/google/protobuf/unittest_no_generic_services.proto" /> <arg value="../src/google/protobuf/unittest_no_generic_services.proto" />
<arg value="../src/google/protobuf/unittest_well_known_types.proto" /> <arg value="../src/google/protobuf/unittest_well_known_types.proto" />
<arg value="src/test/java/com/google/protobuf/any_test.proto" />
<arg value="src/test/java/com/google/protobuf/field_presence_test.proto" /> <arg value="src/test/java/com/google/protobuf/field_presence_test.proto" />
<arg value="src/test/java/com/google/protobuf/map_for_proto2_lite_test.proto" /> <arg value="src/test/java/com/google/protobuf/map_for_proto2_lite_test.proto" />
<arg value="src/test/java/com/google/protobuf/map_for_proto2_test.proto" /> <arg value="src/test/java/com/google/protobuf/map_for_proto2_test.proto" />
......
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.protobuf;
import any_test.AnyTestProto.TestAny;
import protobuf_unittest.UnittestProto.TestAllTypes;
import junit.framework.TestCase;
/**
* Unit tests for Any message.
*/
public class AnyTest extends TestCase {
public void testAnyGeneratedApi() throws Exception {
TestAllTypes.Builder builder = TestAllTypes.newBuilder();
TestUtil.setAllFields(builder);
TestAllTypes message = builder.build();
TestAny container = TestAny.newBuilder()
.setValue(Any.pack(message)).build();
assertTrue(container.getValue().is(TestAllTypes.class));
assertFalse(container.getValue().is(TestAny.class));
TestAllTypes result = container.getValue().unpack(TestAllTypes.class);
TestUtil.assertAllFieldsSet(result);
// Unpacking to a wrong type will throw an exception.
try {
TestAny wrongMessage = container.getValue().unpack(TestAny.class);
fail("Exception is expected.");
} catch (InvalidProtocolBufferException e) {
// expected.
}
// Test that unpacking throws an exception if parsing fails.
TestAny.Builder containerBuilder = container.toBuilder();
containerBuilder.getValueBuilder().setValue(
ByteString.copyFrom(new byte[]{0x11}));
container = containerBuilder.build();
try {
TestAllTypes parsingFailed = container.getValue().unpack(TestAllTypes.class);
fail("Exception is expected.");
} catch (InvalidProtocolBufferException e) {
// expected.
}
}
public void testCachedUnpackResult() throws Exception {
TestAllTypes.Builder builder = TestAllTypes.newBuilder();
TestUtil.setAllFields(builder);
TestAllTypes message = builder.build();
TestAny container = TestAny.newBuilder()
.setValue(Any.pack(message)).build();
assertTrue(container.getValue().is(TestAllTypes.class));
TestAllTypes result1 = container.getValue().unpack(TestAllTypes.class);
TestAllTypes result2 = container.getValue().unpack(TestAllTypes.class);
assertTrue(result1 == result2);
}
}
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package any_test;
option java_package = "any_test";
option java_outer_classname = "AnyTestProto";
import "google/protobuf/any.proto";
message TestAny {
google.protobuf.Any value = 1;
}
...@@ -62,6 +62,14 @@ namespace google { ...@@ -62,6 +62,14 @@ namespace google {
namespace protobuf { namespace protobuf {
namespace python { namespace python {
// Store interned descriptors, so that the same C++ descriptor yields the same
// Python object. Objects are not immortal: this map does not own the
// references, and items are deleted when the last reference to the object is
// released.
// This is enough to support the "is" operator on live objects.
// All descriptors are stored here.
hash_map<const void*, PyObject*> interned_descriptors;
PyObject* PyString_FromCppString(const string& str) { PyObject* PyString_FromCppString(const string& str) {
return PyString_FromStringAndSize(str.c_str(), str.size()); return PyString_FromStringAndSize(str.c_str(), str.size());
} }
...@@ -147,6 +155,24 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) { ...@@ -147,6 +155,24 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) {
// Helper functions for descriptor objects. // Helper functions for descriptor objects.
// A set of templates to retrieve the C++ FileDescriptor of any descriptor.
template<class DescriptorClass>
const FileDescriptor* GetFileDescriptor(const DescriptorClass* descriptor) {
return descriptor->file();
}
template<>
const FileDescriptor* GetFileDescriptor(const FileDescriptor* descriptor) {
return descriptor;
}
template<>
const FileDescriptor* GetFileDescriptor(const EnumValueDescriptor* descriptor) {
return descriptor->type()->file();
}
template<>
const FileDescriptor* GetFileDescriptor(const OneofDescriptor* descriptor) {
return descriptor->containing_type()->file();
}
// Converts options into a Python protobuf, and cache the result. // Converts options into a Python protobuf, and cache the result.
// //
// This is a bit tricky because options can contain extension fields defined in // This is a bit tricky because options can contain extension fields defined in
...@@ -156,8 +182,13 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) { ...@@ -156,8 +182,13 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) {
// Always returns a new reference. // Always returns a new reference.
template<class DescriptorClass> template<class DescriptorClass>
static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
// Options (and their extensions) are completely resolved in the proto file
// containing the descriptor.
PyDescriptorPool* pool = GetDescriptorPool_FromPool(
GetFileDescriptor(descriptor)->pool());
hash_map<const void*, PyObject*>* descriptor_options = hash_map<const void*, PyObject*>* descriptor_options =
GetDescriptorPool()->descriptor_options; pool->descriptor_options;
// First search in the cache. // First search in the cache.
if (descriptor_options->find(descriptor) != descriptor_options->end()) { if (descriptor_options->find(descriptor) != descriptor_options->end()) {
PyObject *value = (*descriptor_options)[descriptor]; PyObject *value = (*descriptor_options)[descriptor];
...@@ -170,7 +201,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { ...@@ -170,7 +201,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
const Message& options(descriptor->options()); const Message& options(descriptor->options());
const Descriptor *message_type = options.GetDescriptor(); const Descriptor *message_type = options.GetDescriptor();
PyObject* message_class(cdescriptor_pool::GetMessageClass( PyObject* message_class(cdescriptor_pool::GetMessageClass(
GetDescriptorPool(), message_type)); pool, message_type));
if (message_class == NULL) { if (message_class == NULL) {
PyErr_Format(PyExc_TypeError, "Could not retrieve class for Options: %s", PyErr_Format(PyExc_TypeError, "Could not retrieve class for Options: %s",
message_type->full_name().c_str()); message_type->full_name().c_str());
...@@ -192,7 +223,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { ...@@ -192,7 +223,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
options.SerializeToString(&serialized); options.SerializeToString(&serialized);
io::CodedInputStream input( io::CodedInputStream input(
reinterpret_cast<const uint8*>(serialized.c_str()), serialized.size()); reinterpret_cast<const uint8*>(serialized.c_str()), serialized.size());
input.SetExtensionRegistry(GetDescriptorPool()->pool, input.SetExtensionRegistry(pool->pool,
GetDescriptorPool()->message_factory); GetDescriptorPool()->message_factory);
bool success = cmsg->message->MergePartialFromCodedStream(&input); bool success = cmsg->message->MergePartialFromCodedStream(&input);
if (!success) { if (!success) {
...@@ -203,7 +234,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { ...@@ -203,7 +234,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
// Cache the result. // Cache the result.
Py_INCREF(value); Py_INCREF(value);
(*GetDescriptorPool()->descriptor_options)[descriptor] = value.get(); (*pool->descriptor_options)[descriptor] = value.get();
return value.release(); return value.release();
} }
...@@ -237,6 +268,9 @@ typedef struct PyBaseDescriptor { ...@@ -237,6 +268,9 @@ typedef struct PyBaseDescriptor {
// Pointer to the C++ proto2 descriptor. // Pointer to the C++ proto2 descriptor.
// Like all descriptors, it is owned by the global DescriptorPool. // Like all descriptors, it is owned by the global DescriptorPool.
const void* descriptor; const void* descriptor;
// Owned reference to the DescriptorPool, to ensure it is kept alive.
PyDescriptorPool* pool;
} PyBaseDescriptor; } PyBaseDescriptor;
...@@ -258,7 +292,9 @@ namespace descriptor { ...@@ -258,7 +292,9 @@ namespace descriptor {
// 'was_created' is an optional pointer to a bool, and is set to true if a new // 'was_created' is an optional pointer to a bool, and is set to true if a new
// object was allocated. // object was allocated.
// Always return a new reference. // Always return a new reference.
PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, template<class DescriptorClass>
PyObject* NewInternedDescriptor(PyTypeObject* type,
const DescriptorClass* descriptor,
bool* was_created) { bool* was_created) {
if (was_created) { if (was_created) {
*was_created = false; *was_created = false;
...@@ -270,8 +306,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, ...@@ -270,8 +306,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
// See if the object is in the map of interned descriptors // See if the object is in the map of interned descriptors
hash_map<const void*, PyObject*>::iterator it = hash_map<const void*, PyObject*>::iterator it =
GetDescriptorPool()->interned_descriptors->find(descriptor); interned_descriptors.find(descriptor);
if (it != GetDescriptorPool()->interned_descriptors->end()) { if (it != interned_descriptors.end()) {
GOOGLE_DCHECK(Py_TYPE(it->second) == type); GOOGLE_DCHECK(Py_TYPE(it->second) == type);
Py_INCREF(it->second); Py_INCREF(it->second);
return it->second; return it->second;
...@@ -283,10 +319,21 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, ...@@ -283,10 +319,21 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
return NULL; return NULL;
} }
py_descriptor->descriptor = descriptor; py_descriptor->descriptor = descriptor;
// and cache it. // and cache it.
GetDescriptorPool()->interned_descriptors->insert( interned_descriptors.insert(
std::make_pair(descriptor, reinterpret_cast<PyObject*>(py_descriptor))); std::make_pair(descriptor, reinterpret_cast<PyObject*>(py_descriptor)));
// Ensures that the DescriptorPool stays alive.
PyDescriptorPool* pool = GetDescriptorPool_FromPool(
GetFileDescriptor(descriptor)->pool());
if (pool == NULL) {
Py_DECREF(py_descriptor);
return NULL;
}
Py_INCREF(pool);
py_descriptor->pool = pool;
if (was_created) { if (was_created) {
*was_created = true; *was_created = true;
} }
...@@ -295,7 +342,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, ...@@ -295,7 +342,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
static void Dealloc(PyBaseDescriptor* self) { static void Dealloc(PyBaseDescriptor* self) {
// Remove from interned dictionary // Remove from interned dictionary
GetDescriptorPool()->interned_descriptors->erase(self->descriptor); interned_descriptors.erase(self->descriptor);
Py_CLEAR(self->pool);
Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self)); Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
} }
......
...@@ -54,9 +54,13 @@ namespace google { ...@@ -54,9 +54,13 @@ namespace google {
namespace protobuf { namespace protobuf {
namespace python { namespace python {
// A map to cache Python Pools per C++ pointer.
// Pointers are not owned here, and belong to the PyDescriptorPool.
static hash_map<const DescriptorPool*, PyDescriptorPool*> descriptor_pool_map;
namespace cdescriptor_pool { namespace cdescriptor_pool {
PyDescriptorPool* NewDescriptorPool() { static PyDescriptorPool* NewDescriptorPool() {
PyDescriptorPool* cdescriptor_pool = PyObject_New( PyDescriptorPool* cdescriptor_pool = PyObject_New(
PyDescriptorPool, &PyDescriptorPool_Type); PyDescriptorPool, &PyDescriptorPool_Type);
if (cdescriptor_pool == NULL) { if (cdescriptor_pool == NULL) {
...@@ -77,22 +81,27 @@ PyDescriptorPool* NewDescriptorPool() { ...@@ -77,22 +81,27 @@ PyDescriptorPool* NewDescriptorPool() {
// storage. // storage.
cdescriptor_pool->classes_by_descriptor = cdescriptor_pool->classes_by_descriptor =
new PyDescriptorPool::ClassesByMessageMap(); new PyDescriptorPool::ClassesByMessageMap();
cdescriptor_pool->interned_descriptors =
new hash_map<const void*, PyObject *>();
cdescriptor_pool->descriptor_options = cdescriptor_pool->descriptor_options =
new hash_map<const void*, PyObject *>(); new hash_map<const void*, PyObject *>();
if (!descriptor_pool_map.insert(
std::make_pair(cdescriptor_pool->pool, cdescriptor_pool)).second) {
// Should never happen -- would indicate an internal error / bug.
PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
return NULL;
}
return cdescriptor_pool; return cdescriptor_pool;
} }
static void Dealloc(PyDescriptorPool* self) { static void Dealloc(PyDescriptorPool* self) {
typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator; typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
descriptor_pool_map.erase(self->pool);
for (iterator it = self->classes_by_descriptor->begin(); for (iterator it = self->classes_by_descriptor->begin();
it != self->classes_by_descriptor->end(); ++it) { it != self->classes_by_descriptor->end(); ++it) {
Py_DECREF(it->second); Py_DECREF(it->second);
} }
delete self->classes_by_descriptor; delete self->classes_by_descriptor;
delete self->interned_descriptors; // its references were borrowed.
for (hash_map<const void*, PyObject*>::iterator it = for (hash_map<const void*, PyObject*>::iterator it =
self->descriptor_options->begin(); self->descriptor_options->begin();
it != self->descriptor_options->end(); ++it) { it != self->descriptor_options->end(); ++it) {
...@@ -391,22 +400,43 @@ PyTypeObject PyDescriptorPool_Type = { ...@@ -391,22 +400,43 @@ PyTypeObject PyDescriptorPool_Type = {
PyObject_Del, // tp_free PyObject_Del, // tp_free
}; };
static PyDescriptorPool* global_cdescriptor_pool = NULL; // This is the DescriptorPool which contains all the definitions from the
// generated _pb2.py modules.
static PyDescriptorPool* python_generated_pool = NULL;
bool InitDescriptorPool() { bool InitDescriptorPool() {
if (PyType_Ready(&PyDescriptorPool_Type) < 0) if (PyType_Ready(&PyDescriptorPool_Type) < 0)
return false; return false;
global_cdescriptor_pool = cdescriptor_pool::NewDescriptorPool(); python_generated_pool = cdescriptor_pool::NewDescriptorPool();
if (global_cdescriptor_pool == NULL) { if (python_generated_pool == NULL) {
return false; return false;
} }
// Register this pool to be found for C++-generated descriptors.
descriptor_pool_map.insert(
std::make_pair(DescriptorPool::generated_pool(),
python_generated_pool));
return true; return true;
} }
PyDescriptorPool* GetDescriptorPool() { PyDescriptorPool* GetDescriptorPool() {
return global_cdescriptor_pool; return python_generated_pool;
}
PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) {
// Fast path for standard descriptors.
if (pool == python_generated_pool->pool ||
pool == DescriptorPool::generated_pool()) {
return python_generated_pool;
}
hash_map<const DescriptorPool*, PyDescriptorPool*>::iterator it =
descriptor_pool_map.find(pool);
if (it != descriptor_pool_map.end()) {
PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool");
return NULL;
}
return it->second;
} }
} // namespace python } // namespace python
......
...@@ -72,14 +72,6 @@ typedef struct PyDescriptorPool { ...@@ -72,14 +72,6 @@ typedef struct PyDescriptorPool {
typedef hash_map<const Descriptor*, PyObject*> ClassesByMessageMap; typedef hash_map<const Descriptor*, PyObject*> ClassesByMessageMap;
ClassesByMessageMap* classes_by_descriptor; ClassesByMessageMap* classes_by_descriptor;
// Store interned descriptors, so that the same C++ descriptor yields the same
// Python object. Objects are not immortal: this map does not own the
// references, and items are deleted when the last reference to the object is
// released.
// This is enough to support the "is" operator on live objects.
// All descriptors are stored here.
hash_map<const void*, PyObject*>* interned_descriptors;
// Cache the options for any kind of descriptor. // Cache the options for any kind of descriptor.
// Descriptor pointers are owned by the DescriptorPool above. // Descriptor pointers are owned by the DescriptorPool above.
// Python objects are owned by the map. // Python objects are owned by the map.
...@@ -91,9 +83,6 @@ extern PyTypeObject PyDescriptorPool_Type; ...@@ -91,9 +83,6 @@ extern PyTypeObject PyDescriptorPool_Type;
namespace cdescriptor_pool { namespace cdescriptor_pool {
// Builds a new DescriptorPool. Normally called only once per process.
PyDescriptorPool* NewDescriptorPool();
// Looks up a message by name. // Looks up a message by name.
// Returns a message Descriptor, or NULL if not found. // Returns a message Descriptor, or NULL if not found.
const Descriptor* FindMessageTypeByName(PyDescriptorPool* self, const Descriptor* FindMessageTypeByName(PyDescriptorPool* self,
...@@ -150,6 +139,10 @@ PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg); ...@@ -150,6 +139,10 @@ PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg);
// Returns a *borrowed* reference. // Returns a *borrowed* reference.
PyDescriptorPool* GetDescriptorPool(); PyDescriptorPool* GetDescriptorPool();
// Retrieve the python descriptor pool owning a C++ descriptor pool.
// Returns a *borrowed* reference.
PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool);
// Initialize objects used by this module. // Initialize objects used by this module.
bool InitDescriptorPool(); bool InitDescriptorPool();
......
...@@ -191,6 +191,7 @@ libprotobuf_lite_la_SOURCES = \ ...@@ -191,6 +191,7 @@ libprotobuf_lite_la_SOURCES = \
google/protobuf/stubs/stringpiece.h \ google/protobuf/stubs/stringpiece.h \
google/protobuf/stubs/stringprintf.cc \ google/protobuf/stubs/stringprintf.cc \
google/protobuf/stubs/stringprintf.h \ google/protobuf/stubs/stringprintf.h \
google/protobuf/stubs/structurally_valid.cc \
google/protobuf/stubs/strutil.cc \ google/protobuf/stubs/strutil.cc \
google/protobuf/stubs/strutil.h \ google/protobuf/stubs/strutil.h \
google/protobuf/stubs/time.cc \ google/protobuf/stubs/time.cc \
...@@ -232,7 +233,6 @@ libprotobuf_la_SOURCES = \ ...@@ -232,7 +233,6 @@ libprotobuf_la_SOURCES = \
google/protobuf/service.cc \ google/protobuf/service.cc \
google/protobuf/source_context.pb.cc \ google/protobuf/source_context.pb.cc \
google/protobuf/struct.pb.cc \ google/protobuf/struct.pb.cc \
google/protobuf/stubs/structurally_valid.cc \
google/protobuf/stubs/substitute.cc \ google/protobuf/stubs/substitute.cc \
google/protobuf/stubs/substitute.h \ google/protobuf/stubs/substitute.h \
google/protobuf/text_format.cc \ google/protobuf/text_format.cc \
...@@ -659,7 +659,8 @@ COMMON_TEST_SOURCES = \ ...@@ -659,7 +659,8 @@ COMMON_TEST_SOURCES = \
google/protobuf/testing/file.h google/protobuf/testing/file.h
check_PROGRAMS = protoc protobuf-test protobuf-lazy-descriptor-test \ check_PROGRAMS = protoc protobuf-test protobuf-lazy-descriptor-test \
protobuf-lite-test test_plugin $(GZCHECKPROGRAMS) protobuf-lite-test test_plugin protobuf-lite-arena-test \
$(GZCHECKPROGRAMS)
protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \ protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \
../gmock/gtest/lib/libgtest.la \ ../gmock/gtest/lib/libgtest.la \
../gmock/lib/libgmock.la \ ../gmock/lib/libgmock.la \
...@@ -756,21 +757,40 @@ protobuf_lazy_descriptor_test_SOURCES = \ ...@@ -756,21 +757,40 @@ protobuf_lazy_descriptor_test_SOURCES = \
$(COMMON_TEST_SOURCES) $(COMMON_TEST_SOURCES)
nodist_protobuf_lazy_descriptor_test_SOURCES = $(protoc_outputs) nodist_protobuf_lazy_descriptor_test_SOURCES = $(protoc_outputs)
# Build lite_unittest separately, since it doesn't use gtest. COMMON_LITE_TEST_SOURCES = \
protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la
protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
protobuf_lite_test_SOURCES = \
google/protobuf/arena_test_util.cc \ google/protobuf/arena_test_util.cc \
google/protobuf/arena_test_util.h \ google/protobuf/arena_test_util.h \
google/protobuf/lite_unittest.cc \
google/protobuf/map_lite_test_util.cc \ google/protobuf/map_lite_test_util.cc \
google/protobuf/map_lite_test_util.h \ google/protobuf/map_lite_test_util.h \
google/protobuf/test_util_lite.cc \ google/protobuf/test_util_lite.cc \
google/protobuf/test_util_lite.h google/protobuf/test_util_lite.h
# TODO(teboring) add the file back and make the test build.
# google/protobuf/map_lite_test.cc # Build lite_unittest separately, since it doesn't use gtest. It can't
# depend on gtest because our internal version of gtest depend on proto
# full runtime and we want to make sure this test builds without full
# runtime.
protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la
protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
protobuf_lite_test_SOURCES = \
google/protobuf/lite_unittest.cc \
$(COMMON_LITE_TEST_SOURCES)
nodist_protobuf_lite_test_SOURCES = $(protoc_lite_outputs) nodist_protobuf_lite_test_SOURCES = $(protoc_lite_outputs)
# lite_arena_unittest depends on gtest because teboring@ found that without
# gtest when building the test internally our memory sanitizer doesn't detect
# memory leaks (don't know why).
protobuf_lite_arena_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la \
../gmock/gtest/lib/libgtest.la \
../gmock/lib/libgmock.la \
../gmock/lib/libgmock_main.la
protobuf_lite_arena_test_CPPFLAGS = -I$(srcdir)/../gmock/include \
-I$(srcdir)/../gmock/gtest/include
protobuf_lite_arena_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
protobuf_lite_arena_test_SOURCES = \
google/protobuf/lite_arena_unittest.cc \
$(COMMON_LITE_TEST_SOURCES)
nodist_protobuf_lite_arena_test_SOURCES = $(protoc_lite_outputs)
# Test plugin binary. # Test plugin binary.
test_plugin_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \ test_plugin_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \
../gmock/gtest/lib/libgtest.la ../gmock/gtest/lib/libgtest.la
...@@ -790,4 +810,5 @@ zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc ...@@ -790,4 +810,5 @@ zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc
endif endif
TESTS = protobuf-test protobuf-lazy-descriptor-test protobuf-lite-test \ TESTS = protobuf-test protobuf-lazy-descriptor-test protobuf-lite-test \
google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS) google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS) \
protobuf-lite-arena-test
...@@ -600,6 +600,94 @@ bool IsAnyMessage(const Descriptor* descriptor) { ...@@ -600,6 +600,94 @@ bool IsAnyMessage(const Descriptor* descriptor) {
descriptor->file()->name() == kAnyProtoFile; descriptor->file()->name() == kAnyProtoFile;
} }
enum Utf8CheckMode {
STRICT = 0, // Parsing will fail if non UTF-8 data is in string fields.
VERIFY = 1, // Only log an error but parsing will succeed.
NONE = 2, // No UTF-8 check.
};
// Which level of UTF-8 enforcemant is placed on this file.
static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field) {
if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
return STRICT;
} else if (field->file()->options().optimize_for() !=
FileOptions::LITE_RUNTIME) {
return VERIFY;
} else {
return NONE;
}
}
static void GenerateUtf8CheckCode(const FieldDescriptor* field,
bool for_parse,
const map<string, string>& variables,
const char* parameters,
const char* strict_function,
const char* verify_function,
io::Printer* printer) {
switch (GetUtf8CheckMode(field)) {
case STRICT: {
if (for_parse) {
printer->Print("DO_(");
}
printer->Print(
"::google::protobuf::internal::WireFormatLite::$function$(\n",
"function", strict_function);
printer->Indent();
printer->Print(variables, parameters);
if (for_parse) {
printer->Print("::google::protobuf::internal::WireFormatLite::PARSE,\n");
} else {
printer->Print("::google::protobuf::internal::WireFormatLite::SERIALIZE,\n");
}
printer->Print("\"$full_name$\")", "full_name", field->full_name());
if (for_parse) {
printer->Print(")");
}
printer->Print(";\n");
printer->Outdent();
break;
}
case VERIFY: {
printer->Print(
"::google::protobuf::internal::WireFormat::$function$(\n",
"function", verify_function);
printer->Indent();
printer->Print(variables, parameters);
if (for_parse) {
printer->Print("::google::protobuf::internal::WireFormat::PARSE,\n");
} else {
printer->Print("::google::protobuf::internal::WireFormat::SERIALIZE,\n");
}
printer->Print("\"$full_name$\");\n", "full_name", field->full_name());
printer->Outdent();
break;
}
case NONE:
break;
}
}
void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
bool for_parse,
const map<string, string>& variables,
const char* parameters,
io::Printer* printer) {
GenerateUtf8CheckCode(field, for_parse, variables, parameters,
"VerifyUtf8String", "VerifyUTF8StringNamedField",
printer);
}
void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
bool for_parse,
const map<string, string>& variables,
const char* parameters,
io::Printer* printer) {
GenerateUtf8CheckCode(field, for_parse, variables, parameters,
"VerifyUtf8Cord", "VerifyUTF8CordNamedField",
printer);
}
} // namespace cpp } // namespace cpp
} // namespace compiler } // namespace compiler
} // namespace protobuf } // namespace protobuf
......
...@@ -202,11 +202,6 @@ inline bool HasGenericServices(const FileDescriptor* file) { ...@@ -202,11 +202,6 @@ inline bool HasGenericServices(const FileDescriptor* file) {
file->options().cc_generic_services(); file->options().cc_generic_services();
} }
// Should string fields in this file verify that their contents are UTF-8?
inline bool HasUtf8Verification(const FileDescriptor* file) {
return file->options().optimize_for() != FileOptions::LITE_RUNTIME;
}
// Should we generate a separate, super-optimized code path for serializing to // Should we generate a separate, super-optimized code path for serializing to
// flat arrays? We don't do this in Lite mode because we'd rather reduce code // flat arrays? We don't do this in Lite mode because we'd rather reduce code
// size. // size.
...@@ -270,6 +265,20 @@ inline bool SupportsArenas(const FieldDescriptor* field) { ...@@ -270,6 +265,20 @@ inline bool SupportsArenas(const FieldDescriptor* field) {
bool IsAnyMessage(const FileDescriptor* descriptor); bool IsAnyMessage(const FileDescriptor* descriptor);
bool IsAnyMessage(const Descriptor* descriptor); bool IsAnyMessage(const Descriptor* descriptor);
void GenerateUtf8CheckCodeForString(
const FieldDescriptor* field,
bool for_parse,
const map<string, string>& variables,
const char* parameters,
io::Printer* printer);
void GenerateUtf8CheckCodeForCord(
const FieldDescriptor* field,
bool for_parse,
const map<string, string>& variables,
const char* parameters,
io::Printer* printer);
} // namespace cpp } // namespace cpp
} // namespace compiler } // namespace compiler
} // namespace protobuf } // namespace protobuf
......
...@@ -234,6 +234,20 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { ...@@ -234,6 +234,20 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"}\n"); "}\n");
} }
const FieldDescriptor* key_field =
descriptor_->message_type()->FindFieldByName("key");
if (key_field->type() == FieldDescriptor::TYPE_STRING) {
GenerateUtf8CheckCodeForString(
key_field, true, variables_,
"entry->key().data(), entry->key().length(),\n", printer);
}
if (value_field->type() == FieldDescriptor::TYPE_STRING) {
GenerateUtf8CheckCodeForString(
value_field, true, variables_,
"entry->mutable_value()->data(),\n"
"entry->mutable_value()->length(),\n", printer);
}
// If entry is allocated by arena, its desctructor should be avoided. // If entry is allocated by arena, its desctructor should be avoided.
if (SupportsArenas(descriptor_)) { if (SupportsArenas(descriptor_)) {
printer->Print(variables_, printer->Print(variables_,
...@@ -261,7 +275,30 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const { ...@@ -261,7 +275,30 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
printer->Print(variables_, printer->Print(variables_,
" entry.reset($name$_.New$wrapper$(it->first, it->second));\n" " entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
" ::google::protobuf::internal::WireFormatLite::Write$stream_writer$(\n" " ::google::protobuf::internal::WireFormatLite::Write$stream_writer$(\n"
" $number$, *entry, output);\n" " $number$, *entry, output);\n");
printer->Indent();
printer->Indent();
const FieldDescriptor* key_field =
descriptor_->message_type()->FindFieldByName("key");
const FieldDescriptor* value_field =
descriptor_->message_type()->FindFieldByName("value");
if (key_field->type() == FieldDescriptor::TYPE_STRING) {
GenerateUtf8CheckCodeForString(
key_field, false, variables_,
"it->first.data(), it->first.length(),\n", printer);
}
if (value_field->type() == FieldDescriptor::TYPE_STRING) {
GenerateUtf8CheckCodeForString(
value_field, false, variables_,
"it->second.data(), it->second.length(),\n", printer);
}
printer->Outdent();
printer->Outdent();
printer->Print(
" }\n"); " }\n");
// If entry is allocated by arena, its desctructor should be avoided. // If entry is allocated by arena, its desctructor should be avoided.
...@@ -296,7 +333,29 @@ GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const { ...@@ -296,7 +333,29 @@ GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
" entry.reset($name$_.New$wrapper$(it->first, it->second));\n" " entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
" target = ::google::protobuf::internal::WireFormatLite::\n" " target = ::google::protobuf::internal::WireFormatLite::\n"
" Write$declared_type$NoVirtualToArray(\n" " Write$declared_type$NoVirtualToArray(\n"
" $number$, *entry, target);\n" " $number$, *entry, target);\n");
printer->Indent();
printer->Indent();
const FieldDescriptor* key_field =
descriptor_->message_type()->FindFieldByName("key");
const FieldDescriptor* value_field =
descriptor_->message_type()->FindFieldByName("value");
if (key_field->type() == FieldDescriptor::TYPE_STRING) {
GenerateUtf8CheckCodeForString(
key_field, false, variables_,
"it->first.data(), it->first.length(),\n", printer);
}
if (value_field->type() == FieldDescriptor::TYPE_STRING) {
GenerateUtf8CheckCodeForString(
value_field, false, variables_,
"it->second.data(), it->second.length(),\n", printer);
}
printer->Outdent();
printer->Outdent();
printer->Print(
" }\n"); " }\n");
// If entry is allocated by arena, its desctructor should be avoided. // If entry is allocated by arena, its desctructor should be avoided.
......
...@@ -367,25 +367,19 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { ...@@ -367,25 +367,19 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n" "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->mutable_$name$()));\n"); " input, this->mutable_$name$()));\n");
if (HasUtf8Verification(descriptor_->file()) && if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
descriptor_->type() == FieldDescriptor::TYPE_STRING) { GenerateUtf8CheckCodeForString(
printer->Print(variables_, descriptor_, true, variables_,
"::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" "this->$name$().data(), this->$name$().length(),\n", printer);
" this->$name$().data(), this->$name$().length(),\n"
" ::google::protobuf::internal::WireFormat::PARSE,\n"
" \"$full_name$\");\n");
} }
} }
void StringFieldGenerator:: void StringFieldGenerator::
GenerateSerializeWithCachedSizes(io::Printer* printer) const { GenerateSerializeWithCachedSizes(io::Printer* printer) const {
if (HasUtf8Verification(descriptor_->file()) && if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
descriptor_->type() == FieldDescriptor::TYPE_STRING) { GenerateUtf8CheckCodeForString(
printer->Print(variables_, descriptor_, false, variables_,
"::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" "this->$name$().data(), this->$name$().length(),\n", printer);
" this->$name$().data(), this->$name$().length(),\n"
" ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
" \"$full_name$\");\n");
} }
printer->Print(variables_, printer->Print(variables_,
"::google::protobuf::internal::WireFormatLite::Write$declared_type$MaybeAliased(\n" "::google::protobuf::internal::WireFormatLite::Write$declared_type$MaybeAliased(\n"
...@@ -394,13 +388,10 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const { ...@@ -394,13 +388,10 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
void StringFieldGenerator:: void StringFieldGenerator::
GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const { GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
if (HasUtf8Verification(descriptor_->file()) && if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
descriptor_->type() == FieldDescriptor::TYPE_STRING) { GenerateUtf8CheckCodeForString(
printer->Print(variables_, descriptor_, false, variables_,
"::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" "this->$name$().data(), this->$name$().length(),\n", printer);
" this->$name$().data(), this->$name$().length(),\n"
" ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
" \"$full_name$\");\n");
} }
printer->Print(variables_, printer->Print(variables_,
"target =\n" "target =\n"
...@@ -665,13 +656,10 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { ...@@ -665,13 +656,10 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n" "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->mutable_$name$()));\n"); " input, this->mutable_$name$()));\n");
if (HasUtf8Verification(descriptor_->file()) && if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
descriptor_->type() == FieldDescriptor::TYPE_STRING) { GenerateUtf8CheckCodeForString(
printer->Print(variables_, descriptor_, true, variables_,
"::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" "this->$name$().data(), this->$name$().length(),\n", printer);
" this->$name$().data(), this->$name$().length(),\n"
" ::google::protobuf::internal::WireFormat::PARSE,\n"
" \"$full_name$\");\n");
} }
} }
...@@ -817,14 +805,12 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { ...@@ -817,14 +805,12 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
printer->Print(variables_, printer->Print(variables_,
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n" "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->add_$name$()));\n"); " input, this->add_$name$()));\n");
if (HasUtf8Verification(descriptor_->file()) && if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
descriptor_->type() == FieldDescriptor::TYPE_STRING) { GenerateUtf8CheckCodeForString(
printer->Print(variables_, descriptor_, true, variables_,
"::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" "this->$name$(this->$name$_size() - 1).data(),\n"
" this->$name$(this->$name$_size() - 1).data(),\n" "this->$name$(this->$name$_size() - 1).length(),\n",
" this->$name$(this->$name$_size() - 1).length(),\n" printer);
" ::google::protobuf::internal::WireFormat::PARSE,\n"
" \"$full_name$\");\n");
} }
} }
...@@ -832,14 +818,13 @@ void RepeatedStringFieldGenerator:: ...@@ -832,14 +818,13 @@ void RepeatedStringFieldGenerator::
GenerateSerializeWithCachedSizes(io::Printer* printer) const { GenerateSerializeWithCachedSizes(io::Printer* printer) const {
printer->Print(variables_, printer->Print(variables_,
"for (int i = 0; i < this->$name$_size(); i++) {\n"); "for (int i = 0; i < this->$name$_size(); i++) {\n");
if (HasUtf8Verification(descriptor_->file()) && printer->Indent();
descriptor_->type() == FieldDescriptor::TYPE_STRING) { if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
printer->Print(variables_, GenerateUtf8CheckCodeForString(
"::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" descriptor_, false, variables_,
" this->$name$(i).data(), this->$name$(i).length(),\n" "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
" ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
" \"$full_name$\");\n");
} }
printer->Outdent();
printer->Print(variables_, printer->Print(variables_,
" ::google::protobuf::internal::WireFormatLite::Write$declared_type$(\n" " ::google::protobuf::internal::WireFormatLite::Write$declared_type$(\n"
" $number$, this->$name$(i), output);\n" " $number$, this->$name$(i), output);\n"
...@@ -850,14 +835,13 @@ void RepeatedStringFieldGenerator:: ...@@ -850,14 +835,13 @@ void RepeatedStringFieldGenerator::
GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const { GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
printer->Print(variables_, printer->Print(variables_,
"for (int i = 0; i < this->$name$_size(); i++) {\n"); "for (int i = 0; i < this->$name$_size(); i++) {\n");
if (HasUtf8Verification(descriptor_->file()) && printer->Indent();
descriptor_->type() == FieldDescriptor::TYPE_STRING) { if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
printer->Print(variables_, GenerateUtf8CheckCodeForString(
" ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" descriptor_, false, variables_,
" this->$name$(i).data(), this->$name$(i).length(),\n" "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
" ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
" \"$full_name$\");\n");
} }
printer->Outdent();
printer->Print(variables_, printer->Print(variables_,
" target = ::google::protobuf::internal::WireFormatLite::\n" " target = ::google::protobuf::internal::WireFormatLite::\n"
" Write$declared_type$ToArray($number$, this->$name$(i), target);\n" " Write$declared_type$ToArray($number$, this->$name$(i), target);\n"
......
...@@ -939,6 +939,42 @@ void Parser::GenerateMapEntry(const MapField& map_field, ...@@ -939,6 +939,42 @@ void Parser::GenerateMapEntry(const MapField& map_field,
} else { } else {
value_field->set_type_name(map_field.value_type_name); value_field->set_type_name(map_field.value_type_name);
} }
// Propagate the "enforce_utf8" option to key and value fields if they
// are strings. This helps simplify the implementation of code generators
// and also reflection-based parsing code.
//
// The following definition:
// message Foo {
// map<string, string> value = 1 [enforce_utf8 = false];
// }
// will be interpreted as:
// message Foo {
// message ValueEntry {
// option map_entry = true;
// string key = 1 [enforce_utf8 = false];
// string value = 2 [enforce_utf8 = false];
// }
// repeated ValueEntry value = 1 [enforce_utf8 = false];
// }
//
// TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
// from protocol compiler.
for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
const UninterpretedOption& option =
field->options().uninterpreted_option(i);
if (option.name_size() == 1 &&
option.name(0).name_part() == "enforce_utf8" &&
!option.name(0).is_extension()) {
if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
key_field->mutable_options()->add_uninterpreted_option()
->CopyFrom(option);
}
if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
value_field->mutable_options()->add_uninterpreted_option()
->CopyFrom(option);
}
}
}
} }
bool Parser::ParseFieldOptions(FieldDescriptorProto* field, bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
......
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <google/protobuf/arena_test_util.h>
#include <google/protobuf/map_lite_test_util.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
namespace google {
namespace protobuf {
namespace {
TEST(LiteArenaTest, MapNoHeapAllocation) {
// Allocate a large initial block to avoid mallocs during hooked test.
std::vector<char> arena_block(128 * 1024);
google::protobuf::ArenaOptions options;
options.initial_block = &arena_block[0];
options.initial_block_size = arena_block.size();
google::protobuf::Arena arena(options);
string data;
data.reserve(128 * 1024);
{
// TODO(teboring): Enable no heap check when ArenaStringPtr is used in
// Map.
// google::protobuf::internal::NoHeapChecker no_heap;
protobuf_unittest::TestArenaMapLite* from =
google::protobuf::Arena::CreateMessage<protobuf_unittest::TestArenaMapLite>(&arena);
google::protobuf::MapLiteTestUtil::SetArenaMapFields(from);
from->SerializeToString(&data);
protobuf_unittest::TestArenaMapLite* to =
google::protobuf::Arena::CreateMessage<protobuf_unittest::TestArenaMapLite>(&arena);
to->ParseFromString(data);
google::protobuf::MapLiteTestUtil::ExpectArenaMapFieldsSet(*to);
}
}
TEST(LiteArenaTest, UnknownFieldMemLeak) {
google::protobuf::Arena arena;
protobuf_unittest::ForeignMessageArenaLite* message =
google::protobuf::Arena::CreateMessage<protobuf_unittest::ForeignMessageArenaLite>(
&arena);
string data = "\012\000";
int original_capacity = data.capacity();
while (data.capacity() <= original_capacity) {
data.append("a");
}
data[1] = data.size() - 2;
message->ParseFromString(data);
}
} // namespace
} // namespace protobuf
} // namespace google
...@@ -461,6 +461,10 @@ bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number, ...@@ -461,6 +461,10 @@ bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
} }
} }
static bool StrictUtf8Check(const FieldDescriptor* field) {
return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
}
bool WireFormat::ParseAndMergeField( bool WireFormat::ParseAndMergeField(
uint32 tag, uint32 tag,
const FieldDescriptor* field, // May be NULL for unknown const FieldDescriptor* field, // May be NULL for unknown
...@@ -633,10 +637,19 @@ bool WireFormat::ParseAndMergeField( ...@@ -633,10 +637,19 @@ bool WireFormat::ParseAndMergeField(
// Handle strings separately so that we can optimize the ctype=CORD case. // Handle strings separately so that we can optimize the ctype=CORD case.
case FieldDescriptor::TYPE_STRING: { case FieldDescriptor::TYPE_STRING: {
bool strict_utf8_check = StrictUtf8Check(field);
string value; string value;
if (!WireFormatLite::ReadString(input, &value)) return false; if (!WireFormatLite::ReadString(input, &value)) return false;
if (strict_utf8_check) {
if (!WireFormatLite::VerifyUtf8String(
value.data(), value.length(), WireFormatLite::PARSE,
field->full_name().c_str())) {
return false;
}
} else {
VerifyUTF8StringNamedField(value.data(), value.length(), PARSE, VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
field->name().c_str()); field->full_name().c_str());
}
if (field->is_repeated()) { if (field->is_repeated()) {
message_reflection->AddString(message, field, value); message_reflection->AddString(message, field, value);
} else { } else {
...@@ -894,13 +907,20 @@ void WireFormat::SerializeFieldWithCachedSizes( ...@@ -894,13 +907,20 @@ void WireFormat::SerializeFieldWithCachedSizes(
// Handle strings separately so that we can get string references // Handle strings separately so that we can get string references
// instead of copying. // instead of copying.
case FieldDescriptor::TYPE_STRING: { case FieldDescriptor::TYPE_STRING: {
bool strict_utf8_check = StrictUtf8Check(field);
string scratch; string scratch;
const string& value = field->is_repeated() ? const string& value = field->is_repeated() ?
message_reflection->GetRepeatedStringReference( message_reflection->GetRepeatedStringReference(
message, field, j, &scratch) : message, field, j, &scratch) :
message_reflection->GetStringReference(message, field, &scratch); message_reflection->GetStringReference(message, field, &scratch);
if (strict_utf8_check) {
WireFormatLite::VerifyUtf8String(value.data(), value.length(),
WireFormatLite::SERIALIZE,
field->full_name().c_str());
} else {
VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE, VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
field->name().c_str()); field->full_name().c_str());
}
WireFormatLite::WriteString(field->number(), value, output); WireFormatLite::WriteString(field->number(), value, output);
break; break;
} }
...@@ -1108,34 +1128,6 @@ int WireFormat::MessageSetItemByteSize( ...@@ -1108,34 +1128,6 @@ int WireFormat::MessageSetItemByteSize(
return our_size; return our_size;
} }
void WireFormat::VerifyUTF8StringFallback(const char* data,
int size,
Operation op,
const char* field_name) {
if (!IsStructurallyValidUTF8(data, size)) {
const char* operation_str = NULL;
switch (op) {
case PARSE:
operation_str = "parsing";
break;
case SERIALIZE:
operation_str = "serializing";
break;
// no default case: have the compiler warn if a case is not covered.
}
string quoted_field_name = "";
if (field_name != NULL) {
quoted_field_name = StringPrintf(" '%s'", field_name);
}
// no space below to avoid double space when the field name is missing.
GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
<< "UTF-8 data when " << operation_str << " a protocol "
<< "buffer. Use the 'bytes' type if you intend to send raw "
<< "bytes. ";
}
}
} // namespace internal } // namespace internal
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google
...@@ -231,8 +231,8 @@ class LIBPROTOBUF_EXPORT WireFormat { ...@@ -231,8 +231,8 @@ class LIBPROTOBUF_EXPORT WireFormat {
const Message& message); const Message& message);
enum Operation { enum Operation {
PARSE, PARSE = 0,
SERIALIZE, SERIALIZE = 1,
}; };
// Verifies that a string field is valid UTF8, logging an error if not. // Verifies that a string field is valid UTF8, logging an error if not.
...@@ -247,13 +247,6 @@ class LIBPROTOBUF_EXPORT WireFormat { ...@@ -247,13 +247,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
const char* field_name); const char* field_name);
private: private:
// Verifies that a string field is valid UTF8, logging an error if not.
static void VerifyUTF8StringFallback(
const char* data,
int size,
Operation op,
const char* field_name);
// Skip a MessageSet field. // Skip a MessageSet field.
static bool SkipMessageSetField(io::CodedInputStream* input, static bool SkipMessageSetField(io::CodedInputStream* input,
uint32 field_number, uint32 field_number,
...@@ -265,8 +258,6 @@ class LIBPROTOBUF_EXPORT WireFormat { ...@@ -265,8 +258,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
Message* message, Message* message,
io::CodedInputStream* input); io::CodedInputStream* input);
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat); GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
}; };
...@@ -321,7 +312,8 @@ inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) { ...@@ -321,7 +312,8 @@ inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
inline void WireFormat::VerifyUTF8String(const char* data, int size, inline void WireFormat::VerifyUTF8String(const char* data, int size,
WireFormat::Operation op) { WireFormat::Operation op) {
#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
WireFormat::VerifyUTF8StringFallback(data, size, op, NULL); WireFormatLite::VerifyUtf8String(
data, size, static_cast<WireFormatLite::Operation>(op), NULL);
#else #else
// Avoid the compiler warning about unsued variables. // Avoid the compiler warning about unsued variables.
(void)data; (void)size; (void)op; (void)data; (void)size; (void)op;
...@@ -332,7 +324,8 @@ inline void WireFormat::VerifyUTF8StringNamedField( ...@@ -332,7 +324,8 @@ inline void WireFormat::VerifyUTF8StringNamedField(
const char* data, int size, WireFormat::Operation op, const char* data, int size, WireFormat::Operation op,
const char* field_name) { const char* field_name) {
#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
WireFormat::VerifyUTF8StringFallback(data, size, op, field_name); WireFormatLite::VerifyUtf8String(
data, size, static_cast<WireFormatLite::Operation>(op), field_name);
#endif #endif
} }
......
...@@ -39,10 +39,12 @@ ...@@ -39,10 +39,12 @@
#include <vector> #include <vector>
#include <google/protobuf/stubs/logging.h> #include <google/protobuf/stubs/logging.h>
#include <google/protobuf/stubs/common.h> #include <google/protobuf/stubs/common.h>
#include <google/protobuf/stubs/stringprintf.h>
#include <google/protobuf/io/coded_stream_inl.h> #include <google/protobuf/io/coded_stream_inl.h>
#include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl_lite.h> #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace google { namespace google {
namespace protobuf { namespace protobuf {
namespace internal { namespace internal {
...@@ -505,6 +507,35 @@ bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) { ...@@ -505,6 +507,35 @@ bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) {
return ReadBytesToString(input, *p); return ReadBytesToString(input, *p);
} }
bool WireFormatLite::VerifyUtf8String(const char* data,
int size,
Operation op,
const char* field_name) {
if (!IsStructurallyValidUTF8(data, size)) {
const char* operation_str = NULL;
switch (op) {
case PARSE:
operation_str = "parsing";
break;
case SERIALIZE:
operation_str = "serializing";
break;
// no default case: have the compiler warn if a case is not covered.
}
string quoted_field_name = "";
if (field_name != NULL) {
quoted_field_name = StringPrintf(" '%s'", field_name);
}
// no space below to avoid double space when the field name is missing.
GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
<< "UTF-8 data when " << operation_str << " a protocol "
<< "buffer. Use the 'bytes' type if you intend to send raw "
<< "bytes. ";
return false;
}
return true;
}
} // namespace internal } // namespace internal
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google
...@@ -316,6 +316,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite { ...@@ -316,6 +316,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite {
static bool ReadBytes(input, string** p); static bool ReadBytes(input, string** p);
enum Operation {
PARSE = 0,
SERIALIZE = 1,
};
// Returns true if the data is valid UTF-8.
static bool VerifyUtf8String(const char* data, int size,
Operation op,
const char* field_name);
static inline bool ReadGroup (field_number, input, MessageLite* value); static inline bool ReadGroup (field_number, input, MessageLite* value);
static inline bool ReadMessage(input, MessageLite* value); static inline bool ReadMessage(input, MessageLite* value);
......
...@@ -55,8 +55,10 @@ LITE_PROTOS=$(get_proto_files $MAKEFILE protoc_lite_outputs) ...@@ -55,8 +55,10 @@ LITE_PROTOS=$(get_proto_files $MAKEFILE protoc_lite_outputs)
PROTOS=$(get_proto_files $MAKEFILE protoc_outputs) PROTOS=$(get_proto_files $MAKEFILE protoc_outputs)
WKT_PROTOS=$(get_variable_value $MAKEFILE nobase_dist_proto_DATA) WKT_PROTOS=$(get_variable_value $MAKEFILE nobase_dist_proto_DATA)
COMMON_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_TEST_SOURCES) COMMON_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_TEST_SOURCES)
COMMON_LITE_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_LITE_TEST_SOURCES)
TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_test_SOURCES) TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_test_SOURCES)
LITE_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_test_SOURCES) LITE_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_test_SOURCES)
LITE_ARENA_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_arena_test_SOURCES)
TEST_PLUGIN_SOURCES=$(get_source_files $MAKEFILE test_plugin_SOURCES) TEST_PLUGIN_SOURCES=$(get_source_files $MAKEFILE test_plugin_SOURCES)
################################################################################ ################################################################################
...@@ -112,8 +114,10 @@ set_cmake_value $CMAKE_DIR/libprotoc.cmake libprotoc_files $CMAKE_PREFIX $LIBPRO ...@@ -112,8 +114,10 @@ set_cmake_value $CMAKE_DIR/libprotoc.cmake libprotoc_files $CMAKE_PREFIX $LIBPRO
set_cmake_value $CMAKE_DIR/tests.cmake lite_test_protos "" $LITE_PROTOS set_cmake_value $CMAKE_DIR/tests.cmake lite_test_protos "" $LITE_PROTOS
set_cmake_value $CMAKE_DIR/tests.cmake tests_protos "" $PROTOS set_cmake_value $CMAKE_DIR/tests.cmake tests_protos "" $PROTOS
set_cmake_value $CMAKE_DIR/tests.cmake common_test_files $CMAKE_PREFIX $COMMON_TEST_SOURCES set_cmake_value $CMAKE_DIR/tests.cmake common_test_files $CMAKE_PREFIX $COMMON_TEST_SOURCES
set_cmake_value $CMAKE_DIR/tests.cmake common_lite_test_files $CMAKE_PREFIX $COMMON_LITE_TEST_SOURCES
set_cmake_value $CMAKE_DIR/tests.cmake tests_files $CMAKE_PREFIX $TEST_SOURCES set_cmake_value $CMAKE_DIR/tests.cmake tests_files $CMAKE_PREFIX $TEST_SOURCES
set_cmake_value $CMAKE_DIR/tests.cmake lite_test_files $CMAKE_PREFIX $LITE_TEST_SOURCES set_cmake_value $CMAKE_DIR/tests.cmake lite_test_files $CMAKE_PREFIX $LITE_TEST_SOURCES
set_cmake_value $CMAKE_DIR/tests.cmake lite_arena_test_files $CMAKE_PREFIX $LITE_ARENA_TEST_SOURCES
# Generate extract_includes.bat # Generate extract_includes.bat
echo "mkdir include" > $EXTRACT_INCLUDES_BAT echo "mkdir include" > $EXTRACT_INCLUDES_BAT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment