aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO1
-rw-r--r--pypy/interpreter/unicodehelper.py6
-rw-r--r--pypy/objspace/std/objspace.py2
-rw-r--r--pypy/objspace/std/test/test_index.py5
-rw-r--r--pypy/objspace/std/test/test_lengthhint.py6
-rw-r--r--pypy/objspace/std/test/test_liststrategies.py2
-rw-r--r--pypy/objspace/std/test/test_obj.py2
-rw-r--r--pypy/objspace/std/unicodeobject.py4
8 files changed, 20 insertions, 8 deletions
diff --git a/TODO b/TODO
index 85d32f43b5..f5cccd7865 100644
--- a/TODO
+++ b/TODO
@@ -8,3 +8,4 @@
* better flag handling in split/splitlines maybe?
* find all the fast-paths that we want to do with utf8 (we only do
utf-8 now, not UTF8 or utf8) for decode/encode
+* encode_error_handler has XXX
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
index c41ed1feec..c3ca8171d7 100644
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -20,11 +20,13 @@ def decode_error_handler(space):
@specialize.memo()
def encode_error_handler(space):
# Fast version of the "strict" errors handler.
- def raise_unicode_exception_encode(errors, encoding, msg, w_u,
+ def raise_unicode_exception_encode(errors, encoding, msg, u, u_len,
startingpos, endingpos):
+ # XXX fix once we stop using runicode.py
+ flag = _get_flag(u.decode('utf8'))
raise OperationError(space.w_UnicodeEncodeError,
space.newtuple([space.newtext(encoding),
- w_u,
+ space.newutf8(u, u_len, flag),
space.newint(startingpos),
space.newint(endingpos),
space.newtext(msg)]))
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
index 0b67c7861f..714a014ca7 100644
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -164,7 +164,7 @@ class StdObjSpace(ObjSpace):
if isinstance(x, str):
return self.newtext(x)
if isinstance(x, unicode):
- return self.newutf8(x.encode('utf8'), len(x))
+ return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR)
if isinstance(x, float):
return W_FloatObject(x)
if isinstance(x, W_Root):
diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py
index 37d4f830d6..e97febad4b 100644
--- a/pypy/objspace/std/test/test_index.py
+++ b/pypy/objspace/std/test/test_index.py
@@ -1,5 +1,7 @@
from py.test import raises
+from rpython.rlib import rutf8
+
class AppTest_IndexProtocol:
def setup_class(self):
w_oldstyle = self.space.appexec([], """():
@@ -263,7 +265,8 @@ class AppTest_StringTestCase(SeqTestCase, StringTestCase):
class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase):
def setup_method(self, method):
SeqTestCase.setup_method(self, method)
- self.w_seq = self.space.wrap(u"this is a test")
+ self.w_seq = self.space.newutf8("this is a test", len("this is a test"),
+ rutf8.FLAG_ASCII)
self.w_const = self.space.appexec([], """(): return unicode""")
diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py
index 7e005af2d6..d7d7a9a1e4 100644
--- a/pypy/objspace/std/test/test_lengthhint.py
+++ b/pypy/objspace/std/test/test_lengthhint.py
@@ -1,3 +1,6 @@
+
+from rpython.rlib import rutf8
+
from pypy.module._collections.interp_deque import W_Deque
from pypy.module.itertools.interp_itertools import W_Repeat
@@ -71,7 +74,8 @@ class TestLengthHint:
self._test_length_hint(self.space.wrap('P' * self.SIZE))
def test_unicode(self):
- self._test_length_hint(self.space.wrap(u'Y' * self.SIZE))
+ self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE,
+ rutf8.FLAG_ASCII))
def test_tuple(self):
self._test_length_hint(self.space.wrap(tuple(self.ITEMS)))
diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py
index ccae5975b6..3b5e297e2d 100644
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -22,7 +22,7 @@ class TestW_ListStrategies(TestW_ListObject):
BytesListStrategy)
#assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy,
# UnicodeListStrategy)
- assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy,
+ assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy,
ObjectListStrategy) # mixed unicode and bytes
def test_empty_to_any(self):
diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py
index 1d394e1cd7..080e942790 100644
--- a/pypy/objspace/std/test/test_obj.py
+++ b/pypy/objspace/std/test/test_obj.py
@@ -17,7 +17,7 @@ class AppTestObject:
cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info'))
def w_unwrap_wrap_unicode(space, w_obj):
- return space.newutf8(space.utf8_w(w_obj), w_obj._length)
+ return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag())
cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode))
def w_unwrap_wrap_str(space, w_obj):
return space.wrap(space.str_w(w_obj))
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index f050138cbb..4f326fc066 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1834,7 +1834,7 @@ def unicode_to_decimal_w(space, w_unistr):
if not isinstance(w_unistr, W_UnicodeObject):
raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
unistr = w_unistr._utf8
- result = ['\0'] * len(unistr)
+ result = ['\0'] * w_unistr._length
digits = ['0', '1', '2', '3', '4',
'5', '6', '7', '8', '9']
i = 0
@@ -1843,6 +1843,8 @@ def unicode_to_decimal_w(space, w_unistr):
uchr = rutf8.codepoint_at_pos(unistr, i)
if rutf8.isspace(unistr, i):
result[res_pos] = ' '
+ res_pos += 1
+ i = rutf8.next_codepoint_pos(unistr, i)
continue
try:
result[res_pos] = digits[unicodedb.decimal(uchr)]