Unverified Commit ab09b2a2 authored by Jie Luo's avatar Jie Luo Committed by GitHub

Disable surrogate check for ucs2 (#5039)

* _SURROGATE_PATTERN check for ucs4
Skip some test for ucs2 by sys.maxunicode
parent fe2eef4b
...@@ -83,7 +83,9 @@ __author__ = 'kenton@google.com (Kenton Varda)' ...@@ -83,7 +83,9 @@ __author__ = 'kenton@google.com (Kenton Varda)'
import struct import struct
import six import six
import sys
_UCS2_MAXUNICODE = 65535
if six.PY3: if six.PY3:
long = int long = int
else: else:
...@@ -550,7 +552,8 @@ def StringDecoder(field_number, is_repeated, is_packed, key, new_default, ...@@ -550,7 +552,8 @@ def StringDecoder(field_number, is_repeated, is_packed, key, new_default,
e.reason = '%s in field: %s' % (e, key.full_name) e.reason = '%s in field: %s' % (e, key.full_name)
raise raise
if is_strict_utf8 and six.PY2: if is_strict_utf8 and six.PY2 and sys.maxunicode > _UCS2_MAXUNICODE:
# Only do the check for python2 ucs4 when is_strict_utf8 enabled
if _SURROGATE_PATTERN.search(value): if _SURROGATE_PATTERN.search(value):
reason = ('String field %s contains invalid UTF-8 data when parsing' reason = ('String field %s contains invalid UTF-8 data when parsing'
'a protocol buffer: surrogates not allowed. Use' 'a protocol buffer: surrogates not allowed. Use'
......
...@@ -81,6 +81,7 @@ from google.protobuf.internal import testing_refleaks ...@@ -81,6 +81,7 @@ from google.protobuf.internal import testing_refleaks
from google.protobuf import message from google.protobuf import message
from google.protobuf.internal import _parameterized from google.protobuf.internal import _parameterized
UCS2_MAXUNICODE = 65535
if six.PY3: if six.PY3:
long = int long = int
...@@ -2210,6 +2211,8 @@ class Proto3Test(BaseTestCase): ...@@ -2210,6 +2211,8 @@ class Proto3Test(BaseTestCase):
msg.map_string_foreign_message['foo'].c = 5 msg.map_string_foreign_message['foo'].c = 5
self.assertEqual(0, len(msg.FindInitializationErrors())) self.assertEqual(0, len(msg.FindInitializationErrors()))
@unittest.skipIf(sys.maxunicode == UCS2_MAXUNICODE,
'Skip for ucs2')
def testStrictUtf8Check(self): def testStrictUtf8Check(self):
# Test u'\ud801' is rejected at parser in both python2 and python3. # Test u'\ud801' is rejected at parser in both python2 and python3.
serialized = (b'r\x03\xed\xa0\x81') serialized = (b'r\x03\xed\xa0\x81')
...@@ -2259,7 +2262,8 @@ class Proto3Test(BaseTestCase): ...@@ -2259,7 +2262,8 @@ class Proto3Test(BaseTestCase):
unittest_proto3_arena_pb2.TestAllTypes( unittest_proto3_arena_pb2.TestAllTypes(
optional_string=u'\ud801\ud801') optional_string=u'\ud801\ud801')
@unittest.skipIf(six.PY3, 'Surrogates are rejected at setters in Python3') @unittest.skipIf(six.PY3 or sys.maxunicode == UCS2_MAXUNICODE,
'Surrogates are rejected at setters in Python3')
def testSurrogatesInPython2(self): def testSurrogatesInPython2(self):
# Test optional_string=u'\ud801\udc01'. # Test optional_string=u'\ud801\udc01'.
# surrogate pair is acceptable in python2. # surrogate pair is acceptable in python2.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment