diff options
author | CF Bolz-Tereick <cfbolz@gmx.de> | 2024-03-02 14:17:46 +0100 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2024-04-04 17:26:30 +0200 |
commit | 813a9a7ff6dec3b75f4e3d2c01d852b336cde09d (patch) | |
tree | cf465845e5cd4fd939392d615c1e85f1ae252822 | |
parent | implement dict.__ror__ too (diff) | |
download | pypy-gentoo-3.10-7.3.15_p1.tar.gz pypy-gentoo-3.10-7.3.15_p1.tar.bz2 pypy-gentoo-3.10-7.3.15_p1.zip |
Merge branch 'py3.9' into py3.10gentoo-3.10-7.3.15_p1
(expandtabs fix, possibly more)
-rw-r--r-- | pypy/module/cpyext/test/test_unicodeobject.py | 69 | ||||
-rw-r--r-- | pypy/objspace/std/stringmethods.py | 19 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_bytesobject.py | 2 | ||||
-rw-r--r-- | pypy/objspace/std/test/test_unicodeobject.py | 6 | ||||
-rw-r--r-- | pypy/objspace/std/unicodeobject.py | 47 | ||||
-rw-r--r-- | pypy/objspace/test/test_descroperation.py | 1 | ||||
-rw-r--r-- | rpython/translator/backendopt/all.py | 1 | ||||
-rw-r--r-- | rpython/translator/backendopt/storesink.py | 53 | ||||
-rw-r--r-- | rpython/translator/backendopt/test/test_all.py | 25 |
9 files changed, 159 insertions, 64 deletions
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py index 410f30635c..d33d5ebcde 100644 --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -1019,58 +1019,55 @@ class TestUnicode(BaseApiTest): 0x2009, 0x200a, #0x200b is in Other_Default_Ignorable_Code_Point in 4.1.0 0x2028, 0x2029, 0x202f, 0x205f, 0x3000]: - assert Py_UNICODE_ISSPACE(space, unichr(char)) - assert not Py_UNICODE_ISSPACE(space, u'a') - - assert Py_UNICODE_ISALPHA(space, u'a') - assert not Py_UNICODE_ISALPHA(space, u'0') - assert Py_UNICODE_ISALNUM(space, u'a') - assert Py_UNICODE_ISALNUM(space, u'0') - assert not Py_UNICODE_ISALNUM(space, u'+') - - assert Py_UNICODE_ISDECIMAL(space, u'\u0660') - assert not Py_UNICODE_ISDECIMAL(space, u'a') - assert Py_UNICODE_ISDIGIT(space, u'9') - assert not Py_UNICODE_ISDIGIT(space, u'@') - assert Py_UNICODE_ISNUMERIC(space, u'9') - assert not Py_UNICODE_ISNUMERIC(space, u'@') + assert Py_UNICODE_ISSPACE(space, char) + assert not Py_UNICODE_ISSPACE(space, ord('a')) + + assert Py_UNICODE_ISALPHA(space, ord('a')) + assert not Py_UNICODE_ISALPHA(space, ord('0')) + assert Py_UNICODE_ISALNUM(space, ord('a')) + assert Py_UNICODE_ISALNUM(space, ord('0')) + assert not Py_UNICODE_ISALNUM(space, ord('+')) + + assert Py_UNICODE_ISDECIMAL(space, ord(u'\u0660')) + assert not Py_UNICODE_ISDECIMAL(space, ord('a')) + assert Py_UNICODE_ISDIGIT(space, ord('9')) + assert not Py_UNICODE_ISDIGIT(space, ord('@')) + assert Py_UNICODE_ISNUMERIC(space, ord('9')) + assert not Py_UNICODE_ISNUMERIC(space, ord('@')) for char in [0x0a, 0x0d, 0x1c, 0x1d, 0x1e, 0x85, 0x2028, 0x2029]: - assert Py_UNICODE_ISLINEBREAK(space, unichr(char)) + assert Py_UNICODE_ISLINEBREAK(space, char) - assert Py_UNICODE_ISLOWER(space, u'\xdf') # sharp s - assert Py_UNICODE_ISUPPER(space, u'\xde') # capital thorn - assert Py_UNICODE_ISLOWER(space, u'a') - assert not Py_UNICODE_ISUPPER(space, u'a') - assert not Py_UNICODE_ISTITLE(space, u'\xce') + assert Py_UNICODE_ISLOWER(space, ord('\xdf')) # sharp s + assert Py_UNICODE_ISUPPER(space, ord('\xde')) # capital thorn + assert Py_UNICODE_ISLOWER(space, ord('a')) + assert not Py_UNICODE_ISUPPER(space, ord('a')) + assert not Py_UNICODE_ISTITLE(space, ord('\xce')) assert Py_UNICODE_ISTITLE(space, - u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}') + ord(u'\N{LATIN CAPITAL LETTER L WITH SMALL LETTER J}')) def test_TOLOWER(self, space): - assert Py_UNICODE_TOLOWER(space, u'�') == u'�' - assert Py_UNICODE_TOLOWER(space, u'�') == u'�' + assert Py_UNICODE_TOLOWER(space, ord(u'�') == ord(u'�')) + assert Py_UNICODE_TOLOWER(space, ord(u'�') == ord(u'�')) def test_TOUPPER(self, space): - assert Py_UNICODE_TOUPPER(space, u'�') == u'�' - assert Py_UNICODE_TOUPPER(space, u'�') == u'�' + assert Py_UNICODE_TOUPPER(space, ord(u'�') == ord(u'�')) + assert Py_UNICODE_TOUPPER(space, ord(u'�') == ord(u'�')) def test_TOTITLE(self, space): - assert Py_UNICODE_TOTITLE(space, u'/') == u'/' - assert Py_UNICODE_TOTITLE(space, u'�') == u'�' - assert Py_UNICODE_TOTITLE(space, u'�') == u'�' + assert Py_UNICODE_TOTITLE(space, ord('/') == ord('/')) def test_TODECIMAL(self, space): - assert Py_UNICODE_TODECIMAL(space, u'6') == 6 - assert Py_UNICODE_TODECIMAL(space, u'A') == -1 + assert Py_UNICODE_TODECIMAL(space, ord('6')) == 6 + assert Py_UNICODE_TODECIMAL(space, ord('A')) == -1 def test_TODIGIT(self, space): - assert Py_UNICODE_TODIGIT(space, u'6') == 6 - assert Py_UNICODE_TODIGIT(space, u'A') == -1 + assert Py_UNICODE_TODIGIT(space, ord('6')) == 6 + assert Py_UNICODE_TODIGIT(space, ord('A')) == -1 def test_TONUMERIC(self, space): - assert Py_UNICODE_TONUMERIC(space, u'6') == 6.0 - assert Py_UNICODE_TONUMERIC(space, u'A') == -1.0 - assert Py_UNICODE_TONUMERIC(space, u'\N{VULGAR FRACTION ONE HALF}') == .5 + assert Py_UNICODE_TONUMERIC(space, ord('6')) == 6.0 + assert Py_UNICODE_TONUMERIC(space, ord('A')) == -1.0 def test_transform_decimal(self, space): def transform_decimal(s): diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py index cafafd4d9e..8c474bbd12 100644 --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -225,14 +225,19 @@ class StringMethods(object): ovfcheck(len(splitted) * tabsize) except OverflowError: raise oefmt(space.w_OverflowError, "new string is too long") - expanded = oldtoken = splitted.pop(0) - - for token in splitted: - expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken, - tabsize) + token + newlen = self._len() - len(splitted) + 1 + builder = self._builder(len(value)) + oldtoken = splitted[0] + builder.append(oldtoken) + + for index in range(1, len(splitted)): + token = splitted[index] + dist = self._tabindent(oldtoken, tabsize) + builder.append_multiple_char(' ', dist) + builder.append(token) + newlen += dist oldtoken = token - - return self._new(expanded) + return self._new(builder.build()) def _tabindent(self, token, tabsize): """calculates distance behind the token to the next tabstop""" diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py index 3d2d1a6dc6..a39432263e 100644 --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -542,8 +542,6 @@ class AppTestBytesObject: def test_expandtabs_overflows_gracefully(self): import sys - if sys.maxsize > (1 << 32): - skip("Wrong platform") raises((MemoryError, OverflowError), b't\tt\t'.expandtabs, sys.maxsize) def test_expandtabs_0(self): diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py index b33daa03ba..1fe0a792c1 100644 --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -710,14 +710,16 @@ class AppTestUnicodeString: def test_expandtabs_overflows_gracefully(self): import sys - if sys.maxsize > (1 << 32): - skip("Wrong platform") raises((OverflowError, MemoryError), 't\tt\t'.expandtabs, sys.maxsize) def test_expandtabs_0(self): assert u'x\ty'.expandtabs(0) == u'xy' assert u'x\ty'.expandtabs(-42) == u'xy' + def test_expandtabs_bug(self): + assert u"a\u266f\ttest".expandtabs() == u'a\u266f test' + assert u"a\u266f\ttest".expandtabs(0) == u'a\u266ftest' + def test_translate(self): import sys assert 'bbbc' == 'abababc'.translate({ord('a'):None}) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py index b11fa1863a..9a641b952f 100644 --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -605,24 +605,57 @@ class W_UnicodeObject(W_Root): value = self._utf8 if not value: return self._empty() + if tabsize == 0: + res, replacements = replace_count(value, '\t', '') + if not replacements and type(self) is W_UnicodeObject: + return self + newlength = self._length - replacements + assert res is not None + return W_UnicodeObject(res, newlength) splitted = value.split('\t') try: - if tabsize > 0: - ovfcheck(len(splitted) * tabsize) + ovfcheck(len(splitted) * tabsize) except OverflowError: raise oefmt(space.w_OverflowError, "new string is too long") - expanded = oldtoken = splitted.pop(0) - newlen = self._len() - len(splitted) + newlen = self._len() - len(splitted) + 1 + builder = StringBuilder(len(value)) + oldtoken = splitted[0] + builder.append(oldtoken) - for token in splitted: + for index in range(1, len(splitted)): + token = splitted[index] dist = self._tabindent(oldtoken, tabsize) - expanded += ' ' * dist + token + builder.append_multiple_char(' ', dist) + builder.append(token) newlen += dist oldtoken = token - return W_UnicodeObject(expanded, newlen) + return W_UnicodeObject(builder.build(), newlen) + + def _tabindent(self, token, tabsize): + if tabsize <= 0: + return 0 + distance = tabsize + if token: + distance = 0 + offset = len(token) + + while 1: + if token[offset-1] == "\n" or token[offset-1] == "\r": + break + distance += 1 + offset = rutf8.prev_codepoint_pos(token, offset) + if offset == 0: + break + + # the same like distance = len(token) - (offset + 1) + distance = (tabsize - distance) % tabsize + if distance == 0: + distance = tabsize + + return distance def _join_utf8_len_w(self, space, w_element, i): try: diff --git a/pypy/objspace/test/test_descroperation.py b/pypy/objspace/test/test_descroperation.py index ba4d7d3f6e..67f885ab0d 100644 --- a/pypy/objspace/test/test_descroperation.py +++ b/pypy/objspace/test/test_descroperation.py @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +from pytest import raises class Test_DescrOperation: diff --git a/rpython/translator/backendopt/all.py b/rpython/translator/backendopt/all.py index af0a36a171..9b96c18c67 100644 --- a/rpython/translator/backendopt/all.py +++ b/rpython/translator/backendopt/all.py @@ -90,6 +90,7 @@ def backend_optimizations(translator, graphs=None, secondary=False, constfold(config, graphs) if config.storesink: + remove_obvious_noops() for graph in graphs: storesink_graph(graph) diff --git a/rpython/translator/backendopt/storesink.py b/rpython/translator/backendopt/storesink.py index ab1c04565a..be59264741 100644 --- a/rpython/translator/backendopt/storesink.py +++ b/rpython/translator/backendopt/storesink.py @@ -1,6 +1,7 @@ from rpython.rtyper.lltypesystem.lloperation import llop -from rpython.flowspace.model import mkentrymap, Variable +from rpython.rtyper.lltypesystem import lltype +from rpython.flowspace.model import mkentrymap, Variable, Constant from rpython.translator.backendopt import removenoops from rpython.translator import simplify @@ -75,25 +76,57 @@ def _storesink_block(block, cache, inputlink): for k in cache.keys(): if k[0].concretetype == concretetype and k[1] == fieldname: del cache[k] + replacements = {} + def replace(op, res): + op.opname = 'same_as' + op.args = [res] + replacements[op.result] = res + + def get_rep(arg): + return replacements.get(arg, arg) added_some_same_as = False for op in block.operations: if op.opname == 'getfield': - tup = (op.args[0], op.args[1].value) - res = cache.get(tup, None) - if res is not None: - op.opname = 'same_as' - op.args = [res] - added_some_same_as = True + arg0 = get_rep(op.args[0]) + field = op.args[1].value + if ( + isinstance(arg0, Constant) and + arg0.concretetype.TO._immutable_field(field) and + not isinstance(arg0.value._obj, int) # tagged int + ): + # reading an immutable field from a constant + llres = getattr(arg0.value, field) + concretetype = getattr(arg0.concretetype.TO, field) + res = Constant(llres, concretetype) + replace(op, res) + else: + tup = (arg0, op.args[1].value) + res = cache.get(tup, None) + if res is not None: + replace(op, res) + else: + cache[tup] = op.result + elif op.opname == 'cast_pointer': + arg0 = get_rep(op.args[0]) + if isinstance(arg0, Constant): + llres = lltype.cast_pointer(op.result.concretetype, arg0.value) + res = Constant(llres, op.result.concretetype) + replace(op, res) else: - cache[tup] = op.result + tup = (arg0, op.result.concretetype) + res = cache.get(tup, None) + if res is not None: + replace(op, res) + else: + cache[tup] = op.result elif op.opname in ('setarrayitem', 'setinteriorfield', "malloc", "malloc_varsize"): pass elif op.opname == 'setfield': - target = op.args[0] + target = get_rep(op.args[0]) field = op.args[1].value clear_cache_for(cache, target.concretetype, field) cache[target, field] = op.args[2] elif has_side_effects(op): cache.clear() - return added_some_same_as + return bool(replacements) diff --git a/rpython/translator/backendopt/test/test_all.py b/rpython/translator/backendopt/test/test_all.py index 0081fce223..346be88c90 100644 --- a/rpython/translator/backendopt/test/test_all.py +++ b/rpython/translator/backendopt/test/test_all.py @@ -305,3 +305,28 @@ class TestLLType(object): t = self.translateopt(f, [], replace_we_are_jitted=True) graph = graphof(t, f) assert graph.startblock.exits[0].args[0].value == 2 + + def test_getfield_vtable(self): + class Base(object): + pass + class A(Base): + def f(self): + return 1 + class B(Base): + def f(self): + return 2 + def g(i): + if i > 0: + return A() + return B() + def f(i): + if i > 0: + a = g(5) + return a.f() + else: + a = g(-5) + return a.f() + t = self.translateopt(f, [int]) + graph = graphof(t, f) + s = summary(graph) + assert 'getfield' not in s |