diff options
-rw-r--r-- | TODO | 1 | ||||
-rw-r--r-- | pypy/interpreter/unicodehelper.py | 6 | ||||
-rw-r--r-- | pypy/objspace/std/objspace.py | 2 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_index.py | 5 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_lengthhint.py | 6 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_liststrategies.py | 2 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_obj.py | 2 | ||||
-rw-r--r-- | pypy/objspace/std/unicodeobject.py | 4 |
8 files changed, 20 insertions, 8 deletions
@@ -8,3 +8,4 @@ * better flag handling in split/splitlines maybe? * find all the fast-paths that we want to do with utf8 (we only do utf-8 now, not UTF8 or utf8) for decode/encode +* encode_error_handler has XXX diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py index c41ed1feec..c3ca8171d7 100644 --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -20,11 +20,13 @@ def decode_error_handler(space): @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. - def raise_unicode_exception_encode(errors, encoding, msg, w_u, + def raise_unicode_exception_encode(errors, encoding, msg, u, u_len, startingpos, endingpos): + # XXX fix once we stop using runicode.py + flag = _get_flag(u.decode('utf8')) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - w_u, + space.newutf8(u, u_len, flag), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py index 0b67c7861f..714a014ca7 100644 --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -164,7 +164,7 @@ class StdObjSpace(ObjSpace): if isinstance(x, str): return self.newtext(x) if isinstance(x, unicode): - return self.newutf8(x.encode('utf8'), len(x)) + return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py index 37d4f830d6..e97febad4b 100644 --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -1,5 +1,7 @@ from py.test import raises +from rpython.rlib import rutf8 + class AppTest_IndexProtocol: def setup_class(self): w_oldstyle = self.space.appexec([], """(): @@ -263,7 +265,8 @@ class AppTest_StringTestCase(SeqTestCase, StringTestCase): class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase): def setup_method(self, method): SeqTestCase.setup_method(self, method) - self.w_seq = self.space.wrap(u"this is a test") + self.w_seq = self.space.newutf8("this is a test", len("this is a test"), + rutf8.FLAG_ASCII) self.w_const = self.space.appexec([], """(): return unicode""") diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py index 7e005af2d6..d7d7a9a1e4 100644 --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.module._collections.interp_deque import W_Deque from pypy.module.itertools.interp_itertools import W_Repeat @@ -71,7 +74,8 @@ class TestLengthHint: self._test_length_hint(self.space.wrap('P' * self.SIZE)) def test_unicode(self): - self._test_length_hint(self.space.wrap(u'Y' * self.SIZE)) + self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE, + rutf8.FLAG_ASCII)) def test_tuple(self): self._test_length_hint(self.space.wrap(tuple(self.ITEMS))) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py index ccae5975b6..3b5e297e2d 100644 --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -22,7 +22,7 @@ class TestW_ListStrategies(TestW_ListObject): BytesListStrategy) #assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy, # UnicodeListStrategy) - assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy, + assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy, ObjectListStrategy) # mixed unicode and bytes def test_empty_to_any(self): diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py index 1d394e1cd7..080e942790 100644 --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -17,7 +17,7 @@ class AppTestObject: cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info')) def w_unwrap_wrap_unicode(space, w_obj): - return space.newutf8(space.utf8_w(w_obj), w_obj._length) + return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag()) cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode)) def w_unwrap_wrap_str(space, w_obj): return space.wrap(space.str_w(w_obj)) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py index f050138cbb..4f326fc066 100644 --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1834,7 +1834,7 @@ def unicode_to_decimal_w(space, w_unistr): if not isinstance(w_unistr, W_UnicodeObject): raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr) unistr = w_unistr._utf8 - result = ['\0'] * len(unistr) + result = ['\0'] * w_unistr._length digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] i = 0 @@ -1843,6 +1843,8 @@ def unicode_to_decimal_w(space, w_unistr): uchr = rutf8.codepoint_at_pos(unistr, i) if rutf8.isspace(unistr, i): result[res_pos] = ' ' + res_pos += 1 + i = rutf8.next_codepoint_pos(unistr, i) continue try: result[res_pos] = digits[unicodedb.decimal(uchr)] |