aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonan Lamy <ronan.lamy@gmail.com>2020-11-25 20:25:40 +0000
committerRonan Lamy <ronan.lamy@gmail.com>2020-11-25 20:25:40 +0000
commit3c2bacc346bf0557e146c909b1723568ac2f02d2 (patch)
treef83daf6cb1c7449b78984e9a9c2ee358b67fa4ac
parenthg merge default (diff)
parentfix for a case where the difference shows up with recent unicode databases only (diff)
downloadpypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.tar.gz
pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.tar.bz2
pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.zip
hg merge default
-rw-r--r--.gitlab-ci.yml7
-rw-r--r--.gitlab-ci/Dockerfile28
-rw-r--r--.gitlab-ci/build.sh10
-rw-r--r--.hgtags6
-rw-r--r--lib-python/2.7/test/test_xml_etree.py20
-rw-r--r--lib-python/2.7/xml/etree/ElementTree.py2
-rw-r--r--pypy/doc/contributing.rst31
-rw-r--r--pypy/doc/how-to-release.rst8
-rw-r--r--pypy/doc/project-ideas.rst34
-rw-r--r--pypy/doc/release-v7.3.3.rst27
-rw-r--r--pypy/doc/whatsnew-head.rst11
-rwxr-xr-xpypy/tool/release/repackage.sh8
-rw-r--r--pypy/tool/test/test_tab.py2
-rw-r--r--rpython/rlib/rsre/rpy/_sre.py3
-rw-r--r--rpython/rlib/rsre/rpy/sre_constants.py40
-rw-r--r--rpython/rlib/rsre/rsre_char.py76
-rw-r--r--rpython/rlib/rsre/rsre_constants.py66
-rw-r--r--rpython/rlib/rsre/rsre_core.py251
-rw-r--r--rpython/rlib/rsre/rsre_utf8.py9
-rw-r--r--rpython/rlib/rsre/test/support.py4
-rw-r--r--rpython/rlib/rsre/test/test_char.py12
-rw-r--r--rpython/rlib/rsre/test/test_match.py10
-rw-r--r--rpython/rlib/rwinreg.py40
23 files changed, 492 insertions, 213 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b5964fff08..7ac5eb3c13 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,4 +1,5 @@
check_rpython_annotations:
- image: octobus/ci-for-pypy
- script:
- - (cd pypy/goal; python2 ../../rpython/bin/rpython --batch -O2 --annotate targetpypystandalone)
+ image: registry.heptapod.net/pypy/pypy/ci:v1
+ script: |
+ cd pypy/goal
+ python2 ../../rpython/bin/rpython --batch -O2 --annotate targetpypystandalone
diff --git a/.gitlab-ci/Dockerfile b/.gitlab-ci/Dockerfile
new file mode 100644
index 0000000000..a2cfc4cc22
--- /dev/null
+++ b/.gitlab-ci/Dockerfile
@@ -0,0 +1,28 @@
+FROM debian:buster
+
+RUN apt-get update && \
+ apt-get install -y \
+ build-essential \
+ gcc \
+ libbz2-dev \
+ libexpat1-dev \
+ libffi-dev \
+ libgc-dev \
+ libgdbm-dev \
+ liblzma-dev \
+ libncurses5-dev \
+ libncursesw5-dev \
+ libsqlite3-dev \
+ libssl-dev \
+ make \
+ pkg-config \
+ pypy-dev \
+ python \
+ python-cffi \
+ python-dev \
+ python-pip \
+ python-virtualenv \
+ tk-dev \
+ zlib1g-dev \
+ && \
+ apt-get -y clean
diff --git a/.gitlab-ci/build.sh b/.gitlab-ci/build.sh
new file mode 100644
index 0000000000..68859cbc7a
--- /dev/null
+++ b/.gitlab-ci/build.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -e
+
+GITLAB="registry.heptapod.net"
+TAG="${GITLAB}/pypy/pypy/ci:v1"
+
+docker build --tag "${TAG}" --file "Dockerfile" .
+
+echo -e "run: \n docker login ${GITLAB} # first time\n docker push ${TAG}" \ No newline at end of file
diff --git a/.hgtags b/.hgtags
index a7138fad73..b3b27f5cfa 100644
--- a/.hgtags
+++ b/.hgtags
@@ -95,3 +95,9 @@ d38cd66c14b86430155e2a122e59648e93011cc0 release-pypy3.6-v7.3.2
a29ef73f9b32953753d0dd6d2a56255fa2892e24 release-pypy2.7-v7.3.3rc1
fab92f174c7754272e9bef31d2a6d66d8d45188b release-pypy3.6-v7.3.3rc1
6cf8fa20a7f6fc61dc07402e1e859cf31bf977ad release-pypy3.7-v7.3.3rc1
+a29ef73f9b32953753d0dd6d2a56255fa2892e24 release-pypy2.7-v7.3.3rc2
+db1e853f94de42ad711bd930222bd2434e0f900d release-pypy3.6-v7.3.3rc2
+7e6e2bb30ac5fbdbd443619cae28c51d5c162a02 release-pypy3.7-v7.3.3rc2
+a29ef73f9b32953753d0dd6d2a56255fa2892e24 release-pypy2.7-v7.3.3
+db1e853f94de42ad711bd930222bd2434e0f900d release-pypy3.6-v7.3.3
+7e6e2bb30ac5fbdbd443619cae28c51d5c162a02 release-pypy3.7-v7.3.3
diff --git a/lib-python/2.7/test/test_xml_etree.py b/lib-python/2.7/test/test_xml_etree.py
index f6d5d17fc3..f1f1c21cd1 100644
--- a/lib-python/2.7/test/test_xml_etree.py
+++ b/lib-python/2.7/test/test_xml_etree.py
@@ -87,6 +87,19 @@ ENTITY_XML = """\
<document>&entity;</document>
"""
+# backport from https://github.com/python/cpython/pull/22987
+ATTLIST_XML = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE Foo [
+<!ELEMENT foo (bar*)>
+<!ELEMENT bar (#PCDATA)*>
+<!ATTLIST bar xml:lang CDATA "eng">
+<!ENTITY qux "quux">
+]>
+<foo>
+<bar>&qux;</bar>
+</foo>
+"""
def checkwarnings(*filters):
def decorator(test):
@@ -1002,6 +1015,13 @@ class ElementTreeTest(unittest.TestCase):
method='html')
self.assertEqual(serialized, expected)
+ # backported from https://github.com/python/cpython/pull/22987
+ def test_attlist_default(self):
+ # Test default attribute values; See BPO 42151.
+ root = ET.fromstring(ATTLIST_XML)
+ self.assertEqual(root[0].attrib,
+ {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
+
#
# xinclude tests (samples from appendix C of the xinclude specification)
diff --git a/lib-python/2.7/xml/etree/ElementTree.py b/lib-python/2.7/xml/etree/ElementTree.py
index dca69106d1..4cae355398 100644
--- a/lib-python/2.7/xml/etree/ElementTree.py
+++ b/lib-python/2.7/xml/etree/ElementTree.py
@@ -1226,7 +1226,6 @@ class _IterParseIterator(object):
if event == "start":
try:
parser.ordered_attributes = 1
- parser.specified_attributes = 1
def handler(tag, attrib_in, event=event, append=append,
start=self._parser._start_list):
append((event, start(tag, attrib_in)))
@@ -1505,7 +1504,6 @@ class XMLParser(object):
# use new-style attribute handling, if supported
try:
self._parser.ordered_attributes = 1
- self._parser.specified_attributes = 1
parser.StartElementHandler = self._start_list
except AttributeError:
pass
diff --git a/pypy/doc/contributing.rst b/pypy/doc/contributing.rst
index f5477e5805..959cecb065 100644
--- a/pypy/doc/contributing.rst
+++ b/pypy/doc/contributing.rst
@@ -40,14 +40,16 @@ details of which can be found in our :ref:`contact <contact>` section. The folks
there are very friendly, and can point you in the right direction.
We give out commit rights usually fairly liberally, so if you want to do something
-with PyPy, you can become a committer. We also run frequent coding sprints which
-are separately announced and often happen around Python conferences such as
-EuroPython or PyCon. Upcoming events are usually announced on `the blog`_.
+with PyPy, you can become a "developer" by logging into https://foss.heptapod.net
+and clicking the "Request Access" link on the `PyPy group page`. We also run
+coding sprints which are separately announced and are usually announced on `the
+blog`_.
Further Reading: :ref:`Contact <contact>`
.. _the blog: https://morepypy.blogspot.com
.. _pypy-dev mailing list: https://mail.python.org/mailman/listinfo/pypy-dev
+.. _`PyPy group page`: https://foss.heptapod.net/pypy
Your first contribution
@@ -96,31 +98,19 @@ Thanks to `Octobus <https://octobus.net/>`_ and `Clever Cloud
</a>
</h1>
-If you are new with Mercurial and Heptapod, you can read this `short tutorial`_
-
-.. _`short tutorial`: https://heptapod.net/pages/quick-start-guide.html
-
-However, we recommend at this time you **not** use topic branches. We prefer
-the usual mercurial named branch model, as pointed out in the :ref:`FAQ
-<github>` about why we didn't move to git.
-
Get Access
----------
-The important take-away from that tutorial for experienced developers is that
-since the free hosting on foss.heptapod.net does not allow personal forks, you
+As stated above, you need to request access to the repo.
+Since the free hosting on foss.heptapod.net does not allow personal forks, you
need permissions to push your changes directly to our repo. Once you sign in to
https://foss.heptapod.net using either a new login or your GitHub or Atlassian
logins, you can get developer status for pushing directly to
the project (just ask by clicking the link at foss.heptapod.net/pypy just under
the logo, and you'll get it, basically). Once you have it you can rewrite your
file ``.hg/hgrc`` to contain ``default = ssh://hg@foss.heptapod.net/pypy/pypy``.
-Your changes will then be pushed directly to the official repo, but (if you
-follow these rules) they are still on a branch, and we can still review the
-branches you want to merge. With developer status, you can push topic
-branches. If you wish to push long-lived branches, you will need to ask for
-higher permissions.
-
+Your changes will then be pushed directly to a branch on the official repo, and
+we will review the branches you want to merge.
Clone
-----
@@ -138,8 +128,7 @@ Clone
then edit ``.hg/hgrc`` as above and do ``hg pull && hg up``.
* Now you have a complete copy of the PyPy repo. Make a long-lived branch
- with a command like ``hg branch name_of_your_branch``, or make a short-
- lived branch for a simple fix with a command like ``hg topic issueXXXX``.
+ with a command like ``hg branch name_of_your_branch``.
Edit
----
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
index 43cc823fcb..81d6fe0ee2 100644
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -96,6 +96,10 @@ Other steps
create a fresh whatsnew_head.rst after the release
and add the new file to pypy/doc/index-of-whatsnew.rst
+ * rename pypy/doc/whatsnew-pypy3-HEAD.rst to whatsnew-pypy3-VERSION.rst
+ create a fresh whatsnew-pypy3_HEAD.rst after the release
+ and add the new file to pypy/doc/index-of-whatsnew.rst
+
* write release announcement pypy/doc/release-VERSION.rst
The release announcement should contain a direct link to the download page
@@ -125,7 +129,8 @@ Other steps
Also repackage and upload source "-src.tar.bz2"
- * Upload binaries to https://buildbot.pypy.org/mirror
+ * Upload binaries to https://buildbot.pypy.org/mirror. It takes an hour for
+ https://downloads.python.org/pypy/ to sync
* Send out a mailing list message asking for last-minute comments and testing
@@ -142,5 +147,4 @@ Other steps
* add a tag on the codespeed web site that corresponds to pypy release
* revise versioning at https://readthedocs.org/projects/pypy
- * tag the final release(s) with appropriate tags
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
index 9eeabedd49..70026f9597 100644
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -150,26 +150,18 @@ knowledge of the internals. Head over to `vmprof-python`_, `vmprof-server`_ and
.. _vmprof-server: https://github.com/vmprof/vmprof-server
.. _vmprof-integration: https://github.com/vmprof/vmprof-integration
-Optimized Unicode Representation
---------------------------------
-
-CPython 3.3 will use an optimized unicode representation (see :pep:`0393`) which switches between
-different ways to represent a unicode string, depending on whether the string
-fits into ASCII, has only two-byte characters or needs four-byte characters.
-
-The actual details would be rather different in PyPy, but we would like to have
-the same optimization implemented.
-
-Or maybe not. We can also play around with the idea of using a single
-representation: as a byte string in utf-8. (This idea needs some extra logic
-for efficient indexing, like a cache.) Work has begun on the ``unicode-utf``
-and ``unicode-utf8-py3`` branches. More is needed, for instance there are
-SIMD optimizations that are not yet used.
-
Convert RPython to Python3
--------------------------
-The world is moving on, we should too.
+The world is moving on, we should too. Work in this direction has begun on the
+``rpython3`` branch, mainly to enable building documentation with Python3. Some
+things that are known to need careful refactoring:
+- a single character in python3 is an int, not a byte
+- we use ``str``/``unicode`` to distiguish between different modes of
+ operation for windows in ``make_win32_traits``.
+
+There are probably more. The branch currently does not pass rpython tests so
+work is needed to back out some of the changes and redo them properly
Improve performance
-------------------
@@ -273,7 +265,10 @@ and it is hard to imagine NumPy abandoning the C-API. Here are a few ideas:
Support more platforms
----------------------
-We have a plan for a `Windows 64`_ port.
+We have a plan for a `Windows 64`_ port. There is progress on the ``win64``
+branch. Help is needed to continue the work. Stage I is complete: we now have
+a 64-bit PyPy2.7 on windows. But it is missing cpyext and other tidbits to
+enable releasing it.
.. _`Windows 64`: windows.html#what-is-missing-for-a-full-64-bit-translation
@@ -322,3 +317,6 @@ good work that needs to be finished:
TODO: see the end of the blog post
+Work has begun on HPy_ to enable a faster C-API.
+
+.. _HPy: https://hpy.readthedocs.io/en/latest/
diff --git a/pypy/doc/release-v7.3.3.rst b/pypy/doc/release-v7.3.3.rst
index 2a072b1525..ea6c0cf537 100644
--- a/pypy/doc/release-v7.3.3.rst
+++ b/pypy/doc/release-v7.3.3.rst
@@ -2,12 +2,6 @@
PyPy v7.3.3: release of 2.7, 3.6, and 3.7 beta
==============================================
-.. note::
- This is a pre-release announcement. When the release actually happens, it
- will be announced on the `morepypy blog`_
-
-.. _`morepypy blog`: https://morepypy.blogspot.com
-
-The PyPy team is proud to release the version 7.3.3 of PyPy, which includes
three different interpreters:
@@ -39,9 +33,9 @@ releases, but read on to find out what is new.
interpreters like GraalPython_ (written on top of the Java virtual machine),
RustPython_, and PyPy. Thanks to Oracle for sponsoring work on HPy.
-Several issues were exposed in the 7.3.2 release. Many of them came from the
-great work ongoing to ship PyPy packages in `conda-forge`_. A big shout out
-to them for taking this on.
+Several issues exposed in the 7.3.2 release were fixed. Many of them came from the
+great work ongoing to ship PyPy-compatible binary packages in `conda-forge`_.
+A big shout out to them for taking this on.
Development of PyPy has moved to https://foss.heptapod.net/pypy/pypy.
This was covered more extensively in this `blog post`_. We have seen an
@@ -52,7 +46,7 @@ The `CFFI`_ backend has been updated to version 1.14.3. We recommend using CFFI
rather than c-extensions to interact with C, and using cppyy_ for performant
wrapping of C++ code for Python.
-A new contributor took us up on the challenge to get `windows 64-bit`` support.
+A new contributor took us up on the challenge to get `windows 64-bit`_ support.
The work is proceeding on the ``win64`` branch, more help in coding or
sponsorship is welcome. In anticipation of merging this large change, we fixed
many test failures on windows.
@@ -161,17 +155,22 @@ Python 3.6+
- bpo-17288_: Prevent jump from a yield statement
- bpo-11471_: avoid generating a ``JUMP_FORWARD`` instruction at the end of an
``if``-block if there is no ``else``-clause
-- Fix ``os.listdir('')`` and ``os.stat('')`` on windows (issue 3331)
+- Fix ``os.listdir('')`` and ``os.stat('')`` on windows (issue 3331_)
- Fix many unicode encoding/decoding errors on windows
-- Fix pickling of time subclasses (issue 3324, bpo 41966)
-- Add support for ``sqlite3_load_extension`` (issue 3334)
+- Fix pickling of time subclasses (issue 3324_, bpo-41966_)
+- Add support for ``sqlite3_load_extension`` (issue 3334_)
- Change default file encoding from mbcs to utf-8 on windows
- Change default file encoding from ascii to utf-8 on linux
- Add ``resource.prlimit()``
+- Accept PathLike in ``nt._getfullpathname`` (issue 3343_)
+- Fix some problems with ``winreg``
+
Python 3.6 C-API
~~~~~~~~~~~~~~~~
+- Export ``PyStructSequence_NewType`` (issue 3346_)
+
.. _3312: https://foss.heptapod.net/pypy/pypy/-/issues/3312
.. _3315: https://foss.heptapod.net/pypy/pypy/-/issues/3315
.. _3321: https://foss.heptapod.net/pypy/pypy/-/issues/3321
@@ -180,6 +179,8 @@ Python 3.6 C-API
.. _3324: https://foss.heptapod.net/pypy/pypy/-/issues/3324
.. _3334: https://foss.heptapod.net/pypy/pypy/-/issues/3334
.. _3307: https://foss.heptapod.net/pypy/pypy/-/issues/3307
+.. _3343: https://foss.heptapod.net/pypy/pypy/-/issues/3343
+.. _3346: https://foss.heptapod.net/pypy/pypy/-/issues/3346
.. _`merge request 723`: https://foss.heptapod.net/pypy/pypy/-/merge_request/723
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
index 5f21ccff61..6fe082cc9a 100644
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,14 @@ What's new in PyPy2.7 7.3.3+
.. this is a revision shortly after release-pypy-7.3.3
.. startrev: de512cf13506
+.. branch: new-ci-image
+
+CI: Add a Dockerfile for CI to prevent hitting pull limits on docker hub
+
+.. branch: issue-3333
+
+Fix xml.etree.ElementTree assigning default attribute values: issue 3333
+
+.. branch: rpython-rsre-for-37
+
+Support for the new format of regular expressions in Python 3.7
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
index b6b4f801c2..29636ab220 100755
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -1,12 +1,12 @@
#! /bin/bash
# Edit these appropriately before running this script
-pmaj=2 # python main version: 2 or 3
-pmin=7 # python minor version
+pmaj=3 # python main version: 2 or 3
+pmin=6 # python minor version
maj=7
min=3
-rev=2
-# rc=rc3 # set to blank for actual release
+rev=3
+#rc=rc2 # set to blank for actual release
function maybe_exit {
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
diff --git a/pypy/tool/test/test_tab.py b/pypy/tool/test/test_tab.py
index 0aea831a39..ce96b61cf0 100644
--- a/pypy/tool/test/test_tab.py
+++ b/pypy/tool/test/test_tab.py
@@ -44,6 +44,8 @@ def test_no_pypy_import_in_rpython():
if os.path.isfile(path):
if not path.lower().endswith('.py'):
return
+ if path.lower().endswith('rsre_constants.py'):
+ return # exception in this file
with file(path) as f:
for line in f:
if "import" not in line:
diff --git a/rpython/rlib/rsre/rpy/_sre.py b/rpython/rlib/rsre/rpy/_sre.py
index 617345483a..70d7737297 100644
--- a/rpython/rlib/rsre/rpy/_sre.py
+++ b/rpython/rlib/rsre/rpy/_sre.py
@@ -22,6 +22,9 @@ def get_code(regexp, flags=0, allargs=False):
"""NOT_RPYTHON: you can't compile new regexps in an RPython program,
you can only use precompiled ones"""
from . import sre_compile
+ if rsre_constants.V37:
+ import pytest
+ pytest.skip("This test cannot run in a 3.7 branch of pypy")
try:
sre_compile.compile(regexp, flags)
except GotIt as e:
diff --git a/rpython/rlib/rsre/rpy/sre_constants.py b/rpython/rlib/rsre/rpy/sre_constants.py
index 89cbdb0d5f..4b9deac743 100644
--- a/rpython/rlib/rsre/rpy/sre_constants.py
+++ b/rpython/rlib/rsre/rpy/sre_constants.py
@@ -94,35 +94,17 @@ CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
-OPCODES = [
-
- # failure=0 success=1 (just because it looks better that way :-)
- FAILURE, SUCCESS,
-
- ANY, ANY_ALL,
- ASSERT, ASSERT_NOT,
- AT,
- BRANCH,
- CALL,
- CATEGORY,
- CHARSET, BIGCHARSET,
- GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
- IN, IN_IGNORE,
- INFO,
- JUMP,
- LITERAL, LITERAL_IGNORE,
- MARK,
- MAX_UNTIL,
- MIN_UNTIL,
- NOT_LITERAL, NOT_LITERAL_IGNORE,
- NEGATE,
- RANGE,
- REPEAT,
- REPEAT_ONE,
- SUBPATTERN,
- MIN_REPEAT_ONE,
- RANGE_IGNORE,
-]
+def _rpython_opcodes():
+ from rpython.rlib.rsre import rsre_constants as consts
+ mapping = {}
+ for name, value in consts.__dict__.items():
+ if name.startswith('OPCODE') and isinstance(value, int) and value < 70:
+ name = name[6:].lstrip('012346789_').lower()
+ mapping[value] = name
+ # check that there are no holes
+ assert sorted(mapping.keys()) == range(len(mapping))
+ return [name for value, name in sorted(mapping.items())]
+OPCODES = _rpython_opcodes()
ATCODES = [
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
diff --git a/rpython/rlib/rsre/rsre_char.py b/rpython/rlib/rsre/rsre_char.py
index 1680d2973d..49787946ae 100644
--- a/rpython/rlib/rsre/rsre_char.py
+++ b/rpython/rlib/rsre/rsre_char.py
@@ -25,6 +25,7 @@ def set_unicode_db(newunicodedb):
for i in range(128):
assert newunicodedb.tolower(i) == getlower_ascii(i)
assert newunicodedb.toupper(i) == getupper_ascii(i)
+ assert newunicodedb.toupper_full(i) == [getupper_ascii(i)]
global unicodedb
unicodedb = newunicodedb
@@ -45,10 +46,10 @@ else:
# codesize. But sre_compile will compile some stuff differently depending on the
# codesize (e.g., charsets).
from rpython.rlib.runicode import MAXUNICODE
-if MAXUNICODE == 65535:
+if MAXUNICODE == 65535 and not consts.V37:
CODESIZE = 2
else:
- CODESIZE = 4
+ CODESIZE = 4 # always 4 from py3.7
copyright = "_sre.py 2.4 Copyright 2005 by Nik Haldimann"
@@ -57,16 +58,22 @@ BIG_ENDIAN = sys.byteorder == "big"
def getlower_ascii(char_ord):
return char_ord + int_between(ord('A'), char_ord, ord('Z') + 1) * (ord('a') - ord('A'))
+def getlower_locale(char_ord):
+ if char_ord < 256: # cheating! Well, CPython does too.
+ char_ord = tolower(char_ord)
+ return char_ord
+
+def getlower_unicode(char_ord):
+ if char_ord < 128: # shortcut for ascii
+ return getlower_ascii(char_ord)
+ assert unicodedb is not None
+ return unicodedb.tolower(char_ord)
+
def getlower(char_ord, flags):
if flags & consts.SRE_FLAG_LOCALE:
- if char_ord < 256: # cheating! Well, CPython does too.
- char_ord = tolower(char_ord)
- return char_ord
+ char_ord = getlower_locale(char_ord)
elif flags & consts.SRE_FLAG_UNICODE:
- if char_ord < 128: # shortcut for ascii
- return getlower_ascii(char_ord)
- assert unicodedb is not None
- char_ord = unicodedb.tolower(char_ord)
+ char_ord = getlower_unicode(char_ord)
else:
char_ord = getlower_ascii(char_ord)
return char_ord
@@ -74,20 +81,43 @@ def getlower(char_ord, flags):
def getupper_ascii(char_ord):
return char_ord - int_between(ord('a'), char_ord, ord('z') + 1) * (ord('a') - ord('A'))
+def getupper_locale(char_ord):
+ if char_ord < 256: # cheating! Well, CPython does too.
+ char_ord = toupper(char_ord)
+ return char_ord
+
+def getupper_unicode(char_ord):
+ if char_ord < 128: # shortcut for ascii
+ return getupper_ascii(char_ord)
+ # Note: this is like CPython's sre_upper_unicode(), including for a few
+ # arguments like 0xfb05, whose uppercase is *several letters* in unicode.
+ # We return the first of these letters. That's rather random but no
+ # caller expects a sane result in this case, I think: iscased_unicode()
+ # is fine as long as it returns anything != char_ord in this case.
+ assert unicodedb is not None
+ return unicodedb.toupper_full(char_ord)[0]
+
def getupper(char_ord, flags):
if flags & consts.SRE_FLAG_LOCALE:
- if char_ord < 256: # cheating! Well, CPython does too.
- char_ord = toupper(char_ord)
- return char_ord
+ char_ord = getupper_locale(char_ord)
elif flags & consts.SRE_FLAG_UNICODE:
- if char_ord < 128: # shortcut for ascii
- return getupper_ascii(char_ord)
- assert unicodedb is not None
- char_ord = unicodedb.toupper(char_ord)
+ char_ord = getupper_unicode(char_ord)
else:
char_ord = getupper_ascii(char_ord)
return char_ord
+def iscased_ascii(char_ord): # used by py3.7
+ upper = int_between(ord('A'), char_ord, ord('Z')+1)
+ lower = int_between(ord('a'), char_ord, ord('z')+1)
+ return upper | lower
+
+def iscased_unicode(char_ord): # used by py3.7
+ # NOTE: this is not unicodedb.iscased(). As per CPython 3.7, it is
+ # something different which---as far as I can tell---doesn't really
+ # have a meaning on its own, but well.
+ return (char_ord != getlower_unicode(char_ord) or
+ char_ord != getupper_unicode(char_ord))
+
#### Category helpers
is_a_word = [(chr(i).isalnum() or chr(i) == '_') for i in range(256)]
@@ -223,12 +253,22 @@ def set_range(ctx, pattern, index, char_code):
def set_range_ignore(ctx, pattern, index, char_code):
# <RANGE_IGNORE> <lower> <upper>
# the char_code is already lower cased
+ assert not consts.V37
lower = pattern.pattern[index + 1]
upper = pattern.pattern[index + 2]
match1 = int_between(lower, char_code, upper + 1)
match2 = int_between(lower, getupper(char_code, pattern.flags), upper + 1)
return match1 | match2, index + 3
+def set_range_uni_ignore(ctx, pattern, index, char_code):
+ # <RANGE_UNI_IGNORE> <lower> <upper>
+ # the char_code is already lower cased
+ lower = pattern.pattern[index + 1]
+ upper = pattern.pattern[index + 2]
+ match1 = int_between(lower, char_code, upper + 1)
+ match2 = int_between(lower, getupper_unicode(char_code), upper + 1)
+ return match1 | match2, index + 3
+
def set_bigcharset(ctx, pattern, index, char_code):
# <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
count = pattern.pattern[index+1]
@@ -300,7 +340,9 @@ set_dispatch_table = {
consts.OPCODE_BIGCHARSET: set_bigcharset,
consts.OPCODE_LITERAL: set_literal,
consts.OPCODE_RANGE: set_range,
- consts.OPCODE_RANGE_IGNORE: set_range_ignore,
+ consts.OPCODE27_RANGE_IGNORE: set_range_ignore,
+ consts.OPCODE37_RANGE_UNI_IGNORE: set_range_uni_ignore,
consts.OPCODE_UNICODE_GENERAL_CATEGORY: set_unicode_general_category,
}
+set_dispatch_table.pop(None, None) # remove the OPCODE27_* or OPCODE37_*
set_dispatch_unroll = unrolling_iterable(sorted(set_dispatch_table.items()))
diff --git a/rpython/rlib/rsre/rsre_constants.py b/rpython/rlib/rsre/rsre_constants.py
index 9af708532a..9994db7b05 100644
--- a/rpython/rlib/rsre/rsre_constants.py
+++ b/rpython/rlib/rsre/rsre_constants.py
@@ -1,3 +1,15 @@
+# Horrible import-time hack.
+# Blame CPython for renumbering these OPCODE_* at some point.
+from rpython.rlib.objectmodel import specialize
+try:
+ import pypy.module.sys.version
+ V37 = pypy.module.sys.version.CPYTHON_VERSION >= (3, 7)
+except ImportError:
+ raise ImportError("Cannot import pypy.module.sys.version. You can safely "
+ "remove this 'raise' line if you are not interested in "
+ "PyPy but only RPython.")
+ V37 = False
+
OPCODE_FAILURE = 0
OPCODE_SUCCESS = 1
OPCODE_ANY = 2
@@ -6,35 +18,49 @@ OPCODE_ASSERT = 4
OPCODE_ASSERT_NOT = 5
OPCODE_AT = 6
OPCODE_BRANCH = 7
-#OPCODE_CALL = 8
+OPCODE_CALL = 8 # not used
OPCODE_CATEGORY = 9
OPCODE_CHARSET = 10
OPCODE_BIGCHARSET = 11
OPCODE_GROUPREF = 12
OPCODE_GROUPREF_EXISTS = 13
-OPCODE_GROUPREF_IGNORE = 14
-OPCODE_IN = 15
-OPCODE_IN_IGNORE = 16
-OPCODE_INFO = 17
-OPCODE_JUMP = 18
-OPCODE_LITERAL = 19
-OPCODE_LITERAL_IGNORE = 20
-OPCODE_MARK = 21
-OPCODE_MAX_UNTIL = 22
-OPCODE_MIN_UNTIL = 23
-OPCODE_NOT_LITERAL = 24
-OPCODE_NOT_LITERAL_IGNORE = 25
-OPCODE_NEGATE = 26
-OPCODE_RANGE = 27
-OPCODE_REPEAT = 28
-OPCODE_REPEAT_ONE = 29
-#OPCODE_SUBPATTERN = 30
-OPCODE_MIN_REPEAT_ONE = 31
-OPCODE_RANGE_IGNORE = 32
+OPCODE_GROUPREF_IGNORE = 28 if V37 else 14
+OPCODE_IN = 14 if V37 else 15
+OPCODE_IN_IGNORE = 29 if V37 else 16
+OPCODE_INFO = 15 if V37 else 17
+OPCODE_JUMP = 16 if V37 else 18
+OPCODE_LITERAL = 17 if V37 else 19
+OPCODE_LITERAL_IGNORE = 30 if V37 else 20
+OPCODE_MARK = 18 if V37 else 21
+OPCODE_MAX_UNTIL = 19 if V37 else 22
+OPCODE_MIN_UNTIL = 20 if V37 else 23
+OPCODE_NOT_LITERAL = 21 if V37 else 24
+OPCODE_NOT_LITERAL_IGNORE = 31 if V37 else 25
+OPCODE_NEGATE = 22 if V37 else 26
+OPCODE_RANGE = 23 if V37 else 27
+OPCODE_REPEAT = 24 if V37 else 28
+OPCODE_REPEAT_ONE = 25 if V37 else 29
+OPCODE_SUBPATTERN = 26 if V37 else 30 # not used
+OPCODE_MIN_REPEAT_ONE = 27 if V37 else 31
+OPCODE27_RANGE_IGNORE = None if V37 else 32
+
+OPCODE37_GROUPREF_LOC_IGNORE = 32 if V37 else None
+OPCODE37_IN_LOC_IGNORE = 33 if V37 else None
+OPCODE37_LITERAL_LOC_IGNORE = 34 if V37 else None
+OPCODE37_NOT_LITERAL_LOC_IGNORE = 35 if V37 else None
+OPCODE37_GROUPREF_UNI_IGNORE = 36 if V37 else None
+OPCODE37_IN_UNI_IGNORE = 37 if V37 else None
+OPCODE37_LITERAL_UNI_IGNORE = 38 if V37 else None
+OPCODE37_NOT_LITERAL_UNI_IGNORE = 39 if V37 else None
+OPCODE37_RANGE_UNI_IGNORE = 40 if V37 else None
# not used by Python itself
OPCODE_UNICODE_GENERAL_CATEGORY = 70
+@specialize.argtype(1)
+def eq(op, const):
+ return const is not None and op == const
+
AT_BEGINNING = 0
AT_BEGINNING_LINE = 1
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
index 489636b783..3ce901c46a 100644
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -55,7 +55,8 @@ class CompiledPattern(object):
def __init__(self, pattern, flags):
self.pattern = pattern
- self.flags = flags
+ if not consts.V37: # 'flags' is ignored in >=3.7 mode
+ self.flags = flags
# check we don't get the old value of MAXREPEAT
# during the untranslated tests.
# On python3, MAXCODE can appear in patterns. It will be 65535
@@ -63,6 +64,29 @@ class CompiledPattern(object):
if not we_are_translated() and rsre_char.CODESIZE != 2:
assert 65535 not in pattern
+ def lowa(self, char_ord):
+ """Pre-3.7: uses getlower(flags).
+ Post-3.7: this is always getlower_ascii().
+ """
+ if not consts.V37:
+ return rsre_char.getlower(char_ord, self.flags)
+ else:
+ return rsre_char.getlower_ascii(char_ord)
+
+ def char_loc_ignore(self, index, char_ord):
+ assert consts.V37
+ pattern = self.pat(index)
+ return (char_ord == pattern or
+ rsre_char.getlower_locale(char_ord) == pattern or
+ rsre_char.getupper_locale(char_ord) == pattern)
+
+ def charset_loc_ignore(self, ctx, ppos, char_ord):
+ lo = rsre_char.getlower_locale(char_ord)
+ if rsre_char.check_charset(ctx, self, ppos, lo):
+ return True
+ up = rsre_char.getupper_locale(char_ord)
+ return up != lo and rsre_char.check_charset(ctx, self, ppos, up)
+
def pat(self, index):
jit.promote(self)
check_nonneg(index)
@@ -74,6 +98,10 @@ class CompiledPattern(object):
assert result >= 0
return result
+MODE_ANY = '\x00' # an empty match is fine
+MODE_NONEMPTY = '\x01' # must have a non-empty match
+MODE_FULL = '\x02' # must match the whole string
+
class AbstractMatchContext(object):
"""Abstract base class"""
_immutable_fields_ = ['end']
@@ -81,7 +109,7 @@ class AbstractMatchContext(object):
match_end = 0
match_marks = None
match_marks_flat = None
- fullmatch_only = False
+ match_mode = MODE_ANY
def __init__(self, match_start, end):
# 'match_start' and 'end' must be known to be non-negative
@@ -91,25 +119,30 @@ class AbstractMatchContext(object):
self.match_start = match_start
self.end = end
- def reset(self, start):
+ def reset(self, start, must_advance=False):
self.match_start = start
self.match_marks = None
self.match_marks_flat = None
+ #
+ assert MODE_ANY == chr(False)
+ assert MODE_NONEMPTY == chr(True)
+ self.match_mode = chr(must_advance)
+
+ @not_rpython
+ def _fullmatch_only(self, x=None):
+ raise Exception("'ctx.fullmatch_only' was replaced with"
+ " 'ctx.match_mode'")
+ fullmatch_only = property(_fullmatch_only, _fullmatch_only)
@not_rpython
def str(self, index):
"""Must be overridden in a concrete subclass.
- The tag ^^^ here is used to generate a translation-time crash
+ The @not_rpython is used to generate a translation-time crash
if there is a call to str() that is indirect. All calls must
be direct for performance reasons; you need to specialize the
caller with @specializectx."""
raise NotImplementedError
- @not_rpython
- def lowstr(self, index, flags):
- """Similar to str()."""
- raise NotImplementedError
-
# The following methods are provided to be overriden in
# Utf8MatchContext. The non-utf8 implementation is provided
# by the FixedMatchContext abstract subclass, in order to use
@@ -236,10 +269,6 @@ class BufMatchContext(FixedMatchContext):
check_nonneg(index)
return ord(self._buffer.getitem(index))
- def lowstr(self, index, flags):
- c = self.str(index)
- return rsre_char.getlower(c, flags)
-
def fresh_copy(self, start):
return BufMatchContext(self._buffer, start,
self.end)
@@ -261,10 +290,6 @@ class StrMatchContext(FixedMatchContext):
check_nonneg(index)
return ord(self._string[index])
- def lowstr(self, index, flags):
- c = self.str(index)
- return rsre_char.getlower(c, flags)
-
def fresh_copy(self, start):
return StrMatchContext(self._string, start,
self.end)
@@ -289,10 +314,6 @@ class UnicodeMatchContext(FixedMatchContext):
check_nonneg(index)
return ord(self._unicodestr[index])
- def lowstr(self, index, flags):
- c = self.str(index)
- return rsre_char.getlower(c, flags)
-
def fresh_copy(self, start):
return UnicodeMatchContext(self._unicodestr, start,
self.end)
@@ -599,9 +620,13 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
return
elif op == consts.OPCODE_SUCCESS:
- if ctx.fullmatch_only:
+ mode = ctx.match_mode
+ if mode == MODE_FULL:
if ptr != ctx.end:
return # not a full match
+ elif mode == MODE_NONEMPTY:
+ if ptr == ctx.match_start:
+ return # empty match
ctx.match_end = ptr
ctx.match_marks = marks
return MATCHED_OK
@@ -633,10 +658,10 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
ptr1 = ctx.prev_n(ptr, pattern.pat(ppos+1), ctx.ZERO)
except EndOfString:
return
- saved = ctx.fullmatch_only
- ctx.fullmatch_only = False
+ saved = ctx.match_mode
+ ctx.match_mode = MODE_ANY
stop = sre_match(ctx, pattern, ppos + 2, ptr1, marks) is None
- ctx.fullmatch_only = saved
+ ctx.match_mode = saved
if stop:
return
marks = ctx.match_marks
@@ -651,10 +676,10 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
except EndOfString:
pass
else:
- saved = ctx.fullmatch_only
- ctx.fullmatch_only = False
+ saved = ctx.match_mode
+ ctx.match_mode = MODE_ANY
stop = sre_match(ctx, pattern, ppos + 2, ptr1, marks) is not None
- ctx.fullmatch_only = saved
+ ctx.match_mode = saved
if stop:
return
ppos += pattern.pat(ppos)
@@ -699,7 +724,29 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
startptr, length_bytes = get_group_ref(ctx, marks, pattern.pat(ppos))
if length_bytes < 0:
return # group was not previously defined
- ptr = match_repeated_ignore(ctx, ptr, startptr, length_bytes, pattern.flags)
+ ptr = match_repeated_ignore(ctx, ptr, startptr, length_bytes, pattern)
+ if ptr < ctx.ZERO:
+ return # no match
+ ppos += 1
+
+ elif consts.eq(op, consts.OPCODE37_GROUPREF_UNI_IGNORE):
+ # unicode version of OPCODE_GROUPREF_IGNORE
+ # <GROUPREF> <groupnum>
+ startptr, length_bytes = get_group_ref(ctx, marks, pattern.pat(ppos))
+ if length_bytes < 0:
+ return # group was not previously defined
+ ptr = match_repeated_uni_ignore(ctx, ptr, startptr, length_bytes)
+ if ptr < ctx.ZERO:
+ return # no match
+ ppos += 1
+
+ elif consts.eq(op, consts.OPCODE37_GROUPREF_LOC_IGNORE):
+ # locale version of OPCODE_GROUPREF_IGNORE
+ # <GROUPREF> <groupnum>
+ startptr, length_bytes = get_group_ref(ctx, marks, pattern.pat(ppos))
+ if length_bytes < 0:
+ return # group was not previously defined
+ ptr = match_repeated_loc_ignore(ctx, ptr, startptr, length_bytes)
if ptr < ctx.ZERO:
return # no match
ppos += 1
@@ -726,7 +773,25 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
# match set member (or non_member), ignoring case
# <IN> <skip> <set>
if ptr >= ctx.end or not rsre_char.check_charset(ctx, pattern, ppos+1,
- ctx.lowstr(ptr, pattern.flags)):
+ pattern.lowa(ctx.str(ptr))):
+ return
+ ppos += pattern.pat(ppos)
+ ptr = ctx.next(ptr)
+
+ elif consts.eq(op, consts.OPCODE37_IN_UNI_IGNORE):
+ # match set member (or non_member), ignoring case, unicode mode
+ # <IN> <skip> <set>
+ if ptr >= ctx.end or not rsre_char.check_charset(ctx, pattern, ppos+1,
+ rsre_char.getlower_unicode(ctx.str(ptr))):
+ return
+ ppos += pattern.pat(ppos)
+ ptr = ctx.next(ptr)
+
+ elif consts.eq(op, consts.OPCODE37_IN_LOC_IGNORE):
+ # match set member (or non_member), ignoring case, locale mode
+ # <IN> <skip> <set>
+ if ptr >= ctx.end or not pattern.charset_loc_ignore(ctx, ppos+1,
+ ctx.str(ptr)):
return
ppos += pattern.pat(ppos)
ptr = ctx.next(ptr)
@@ -752,7 +817,23 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
elif op == consts.OPCODE_LITERAL_IGNORE:
# match literal string, ignoring case
# <LITERAL_IGNORE> <code>
- if ptr >= ctx.end or ctx.lowstr(ptr, pattern.flags) != pattern.pat(ppos):
+ if ptr >= ctx.end or pattern.lowa(ctx.str(ptr)) != pattern.pat(ppos):
+ return
+ ppos += 1
+ ptr = ctx.next(ptr)
+
+ elif consts.eq(op, consts.OPCODE37_LITERAL_UNI_IGNORE):
+ # match literal string, ignoring case, unicode mode
+ # <LITERAL_IGNORE> <code>
+ if ptr >= ctx.end or rsre_char.getlower_unicode(ctx.str(ptr)) != pattern.pat(ppos):
+ return
+ ppos += 1
+ ptr = ctx.next(ptr)
+
+ elif consts.eq(op, consts.OPCODE37_LITERAL_LOC_IGNORE):
+ # match literal string, ignoring case, locale mode
+ # <LITERAL_IGNORE> <code>
+ if ptr >= ctx.end or not pattern.char_loc_ignore(ppos, ctx.str(ptr)):
return
ppos += 1
ptr = ctx.next(ptr)
@@ -775,7 +856,23 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
elif op == consts.OPCODE_NOT_LITERAL_IGNORE:
# match if it's not a literal string, ignoring case
# <NOT_LITERAL> <code>
- if ptr >= ctx.end or ctx.lowstr(ptr, pattern.flags) == pattern.pat(ppos):
+ if ptr >= ctx.end or pattern.lowa(ctx.str(ptr)) == pattern.pat(ppos):
+ return
+ ppos += 1
+ ptr = ctx.next(ptr)
+
+ elif consts.eq(op, consts.OPCODE37_NOT_LITERAL_UNI_IGNORE):
+ # match if it's not a literal string, ignoring case, unicode mode
+ # <NOT_LITERAL> <code>
+ if ptr >= ctx.end or rsre_char.getlower_unicode(ctx.str(ptr)) == pattern.pat(ppos):
+ return
+ ppos += 1
+ ptr = ctx.next(ptr)
+
+ elif consts.eq(op, consts.OPCODE37_NOT_LITERAL_LOC_IGNORE):
+ # match if it's not a literal string, ignoring case, locale mode
+ # <NOT_LITERAL> <code>
+ if ptr >= ctx.end or pattern.char_loc_ignore(ppos, ctx.str(ptr)):
return
ppos += 1
ptr = ctx.next(ptr)
@@ -883,12 +980,36 @@ def match_repeated(ctx, ptr, oldptr, length_bytes):
return True
@specializectx
-def match_repeated_ignore(ctx, ptr, oldptr, length_bytes, flags):
+def match_repeated_ignore(ctx, ptr, oldptr, length_bytes, pattern):
+ oldend = ctx.go_forward_by_bytes(oldptr, length_bytes)
+ while oldptr < oldend:
+ if ptr >= ctx.end:
+ return -1
+ if pattern.lowa(ctx.str(ptr)) != pattern.lowa(ctx.str(oldptr)):
+ return -1
+ ptr = ctx.next(ptr)
+ oldptr = ctx.next(oldptr)
+ return ptr
+
+@specializectx
+def match_repeated_uni_ignore(ctx, ptr, oldptr, length_bytes):
+ oldend = ctx.go_forward_by_bytes(oldptr, length_bytes)
+ while oldptr < oldend:
+ if ptr >= ctx.end:
+ return -1
+ if rsre_char.getlower_unicode(ctx.str(ptr)) != rsre_char.getlower_unicode(ctx.str(oldptr)):
+ return -1
+ ptr = ctx.next(ptr)
+ oldptr = ctx.next(oldptr)
+ return ptr
+
+@specializectx
+def match_repeated_loc_ignore(ctx, ptr, oldptr, length_bytes):
oldend = ctx.go_forward_by_bytes(oldptr, length_bytes)
while oldptr < oldend:
if ptr >= ctx.end:
return -1
- if ctx.lowstr(ptr, flags) != ctx.lowstr(oldptr, flags):
+ if rsre_char.getlower_locale(ctx.str(ptr)) != rsre_char.getlower_locale(ctx.str(oldptr)):
return -1
ptr = ctx.next(ptr)
oldptr = ctx.next(oldptr)
@@ -955,54 +1076,63 @@ def match_IN(ctx, pattern, ptr, ppos):
return rsre_char.check_charset(ctx, pattern, ppos+2, ctx.str(ptr))
@specializectx
def match_IN_IGNORE(ctx, pattern, ptr, ppos):
- return rsre_char.check_charset(ctx, pattern, ppos+2, ctx.lowstr(ptr, pattern.flags))
+ return rsre_char.check_charset(ctx, pattern, ppos+2, pattern.lowa(ctx.str(ptr)))
+@specializectx
+def match_IN_UNI_IGNORE(ctx, pattern, ptr, ppos):
+ return rsre_char.check_charset(ctx, pattern, ppos+2, rsre_char.getlower_unicode(ctx.str(ptr)))
+@specializectx
+def match_IN_LOC_IGNORE(ctx, pattern, ptr, ppos):
+ return pattern.charset_loc_ignore(ctx, ppos+2, ctx.str(ptr))
@specializectx
def match_LITERAL(ctx, pattern, ptr, ppos):
return ctx.str(ptr) == pattern.pat(ppos+1)
@specializectx
def match_LITERAL_IGNORE(ctx, pattern, ptr, ppos):
- return ctx.lowstr(ptr, pattern.flags) == pattern.pat(ppos+1)
+ return pattern.lowa(ctx.str(ptr)) == pattern.pat(ppos+1)
+@specializectx
+def match_LITERAL_UNI_IGNORE(ctx, pattern, ptr, ppos):
+ return rsre_char.getlower_unicode(ctx.str(ptr)) == pattern.pat(ppos+1)
+@specializectx
+def match_LITERAL_LOC_IGNORE(ctx, pattern, ptr, ppos):
+ return pattern.char_loc_ignore(ppos+1, ctx.str(ptr))
@specializectx
def match_NOT_LITERAL(ctx, pattern, ptr, ppos):
return ctx.str(ptr) != pattern.pat(ppos+1)
@specializectx
def match_NOT_LITERAL_IGNORE(ctx, pattern, ptr, ppos):
- return ctx.lowstr(ptr, pattern.flags) != pattern.pat(ppos+1)
+ return pattern.lowa(ctx.str(ptr)) != pattern.pat(ppos+1)
+@specializectx
+def match_NOT_LITERAL_UNI_IGNORE(ctx, pattern, ptr, ppos):
+ return rsre_char.getlower_unicode(ctx.str(ptr)) != pattern.pat(ppos+1)
+@specializectx
+def match_NOT_LITERAL_LOC_IGNORE(ctx, pattern, ptr, ppos):
+ return not pattern.char_loc_ignore(ppos+1, ctx.str(ptr))
def _make_fre(checkerfn):
if checkerfn == match_ANY_ALL:
def fre(ctx, pattern, ptr, end, ppos):
return end
- elif checkerfn == match_IN:
- install_jitdriver_spec('MatchIn',
+ elif checkerfn in (match_IN, match_IN_IGNORE, match_IN_UNI_IGNORE):
+ # produces three jitdrivers:
+ # MatchIn
+ # MatchInIgnore
+ # MatchInUniIgnore
+ name = checkerfn.__name__.title().replace('_', '')
+ method_name = "jitdriver_" + name
+ install_jitdriver_spec(name,
greens=['ppos', 'pattern'],
reds=['ptr', 'end', 'ctx'],
debugprint=(1, 0))
@specializectx
def fre(ctx, pattern, ptr, end, ppos):
while True:
- ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr,
+ getattr(ctx, method_name).jit_merge_point(ctx=ctx, ptr=ptr,
end=end, ppos=ppos,
pattern=pattern)
if ptr < end and checkerfn(ctx, pattern, ptr, ppos):
ptr = ctx.next(ptr)
else:
return ptr
- elif checkerfn == match_IN_IGNORE:
- install_jitdriver_spec('MatchInIgnore',
- greens=['ppos', 'pattern'],
- reds=['ptr', 'end', 'ctx'],
- debugprint=(1, 0))
- @specializectx
- def fre(ctx, pattern, ptr, end, ppos):
- while True:
- ctx.jitdriver_MatchInIgnore.jit_merge_point(ctx=ctx, ptr=ptr,
- end=end, ppos=ppos,
- pattern=pattern)
- if ptr < end and checkerfn(ctx, pattern, ptr, ppos):
- ptr = ctx.next(ptr)
- else:
- return ptr
else:
# in the other cases, the fre() function is not JITted at all
# and is present as a residual call.
@@ -1019,11 +1149,19 @@ unroll_char_checker = [
(consts.OPCODE_ANY_ALL, match_ANY_ALL),
(consts.OPCODE_IN, match_IN),
(consts.OPCODE_IN_IGNORE, match_IN_IGNORE),
+ (consts.OPCODE37_IN_UNI_IGNORE, match_IN_UNI_IGNORE),
+ (consts.OPCODE37_IN_LOC_IGNORE, match_IN_LOC_IGNORE),
(consts.OPCODE_LITERAL, match_LITERAL),
(consts.OPCODE_LITERAL_IGNORE, match_LITERAL_IGNORE),
+ (consts.OPCODE37_LITERAL_UNI_IGNORE, match_LITERAL_UNI_IGNORE),
+ (consts.OPCODE37_LITERAL_LOC_IGNORE, match_LITERAL_LOC_IGNORE),
(consts.OPCODE_NOT_LITERAL, match_NOT_LITERAL),
(consts.OPCODE_NOT_LITERAL_IGNORE, match_NOT_LITERAL_IGNORE),
+ (consts.OPCODE37_NOT_LITERAL_UNI_IGNORE, match_NOT_LITERAL_UNI_IGNORE),
+ (consts.OPCODE37_NOT_LITERAL_LOC_IGNORE, match_NOT_LITERAL_LOC_IGNORE),
]
+unroll_char_checker = [(_op, _fn) for (_op, _fn) in unroll_char_checker
+ if _op is not None] # possibly removes the OPCODE37_*
unroll_fre_checker = [(_op, _make_fre(_fn))
for (_op, _fn) in unroll_char_checker]
@@ -1119,7 +1257,8 @@ def match(pattern, string, start=0, end=sys.maxint, fullmatch=False):
assert isinstance(pattern, CompiledPattern)
start, end = _adjust(start, end, len(string))
ctx = StrMatchContext(string, start, end)
- ctx.fullmatch_only = fullmatch
+ if fullmatch:
+ ctx.match_mode = MODE_FULL
if match_context(ctx, pattern):
return ctx
else:
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
index 834748ebaa..7617acc5fd 100644
--- a/rpython/rlib/rsre/rsre_utf8.py
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -20,10 +20,6 @@ class Utf8MatchContext(AbstractMatchContext):
check_nonneg(index)
return rutf8.codepoint_at_pos(self._utf8, index)
- def lowstr(self, index, flags):
- c = self.str(index)
- return rsre_char.getlower(c, flags)
-
def get_single_byte(self, base_position, index):
return self._utf8[base_position + index]
@@ -97,10 +93,11 @@ def utf8match(pattern, utf8string, bytestart=0, byteend=sys.maxint,
fullmatch=False):
# bytestart and byteend must be valid byte positions inside the
# utf8string.
- from rpython.rlib.rsre.rsre_core import match_context
+ from rpython.rlib.rsre.rsre_core import match_context, MODE_FULL
ctx = make_utf8_ctx(utf8string, bytestart, byteend)
- ctx.fullmatch_only = fullmatch
+ if fullmatch:
+ ctx.match_mode = MODE_FULL
if match_context(ctx, pattern):
return ctx
else:
diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py
index 9e8fafc7c1..4e12277bcb 100644
--- a/rpython/rlib/rsre/test/support.py
+++ b/rpython/rlib/rsre/test/support.py
@@ -1,6 +1,7 @@
import sys, random
from rpython.rlib import debug
from rpython.rlib.rsre.rsre_core import _adjust, match_context, search_context
+from rpython.rlib.rsre.rsre_core import MODE_FULL
from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString
@@ -112,7 +113,8 @@ def match(pattern, string, start=0, end=sys.maxint, fullmatch=False):
start = Position(start)
end = Position(end)
ctx = MatchContextForTests(string, start, end)
- ctx.fullmatch_only = fullmatch
+ if fullmatch:
+ ctx.match_mode = MODE_FULL
if match_context(ctx, pattern):
return ctx
else:
diff --git a/rpython/rlib/rsre/test/test_char.py b/rpython/rlib/rsre/test/test_char.py
index bd3a6f2936..6e7d6f3e33 100644
--- a/rpython/rlib/rsre/test/test_char.py
+++ b/rpython/rlib/rsre/test/test_char.py
@@ -204,3 +204,15 @@ def test_general_category():
assert check_charset(pat, 0, 99) # Lcheck_charset(pat, 0, 453) # Lt
assert not check_charset(pat, 0, 688) # Lm
assert not check_charset(pat, 0, 5870) # Nl
+
+def test_iscased():
+ assert rsre_char.iscased_ascii(65)
+ assert rsre_char.iscased_ascii(100)
+ assert rsre_char.iscased_ascii(64) is False
+ assert rsre_char.iscased_ascii(126) is False
+ assert rsre_char.iscased_ascii(1260) is False
+ assert rsre_char.iscased_ascii(12600) is False
+ for i in range(65536):
+ assert rsre_char.iscased_unicode(i) == (
+ rsre_char.getlower_unicode(i) != i or
+ rsre_char.getupper_unicode(i) != i)
diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py
index c832244b11..758c015f7a 100644
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -1,5 +1,5 @@
import re, random, py
-from rpython.rlib.rsre import rsre_char
+from rpython.rlib.rsre import rsre_char, rsre_constants
from rpython.rlib.rsre.rpy import get_code, VERSION
from rpython.rlib.rsre.test.support import match, fullmatch, Position as P
@@ -306,6 +306,10 @@ class TestMatch:
rsre_char.set_unicode_db(unicodedb)
#
r = get_code(u"[\U00010428-\U0001044f]", re.I)
- assert r.pattern.count(27) == 1 # OPCODE_RANGE
- r.pattern[r.pattern.index(27)] = 32 # => OPCODE_RANGE_IGNORE
+ assert r.pattern.count(rsre_constants.OPCODE_RANGE) == 1
+ if rsre_constants.V37:
+ repl = rsre_constants.OPCODE37_RANGE_UNI_IGNORE
+ else:
+ repl = rsre_constants.OPCODE27_RANGE_IGNORE
+ r.pattern[r.pattern.index(rsre_constants.OPCODE_RANGE)] = repl
assert match(r, u"\U00010428")
diff --git a/rpython/rlib/rwinreg.py b/rpython/rlib/rwinreg.py
index 7f45088ba3..4628804faf 100644
--- a/rpython/rlib/rwinreg.py
+++ b/rpython/rlib/rwinreg.py
@@ -49,71 +49,75 @@ PHKEY = rffi.CArrayPtr(HKEY)
REGSAM = rwin32.DWORD
def get_traits(suffix):
+ if suffix == 'A':
+ strp = rffi.CCHARP
+ else:
+ strp = rffi.CWCHARP
RegSetValue = external(
'RegSetValue' + suffix,
- [HKEY, rffi.CCHARP, rwin32.DWORD, rffi.CCHARP, rwin32.DWORD],
+ [HKEY, strp, rwin32.DWORD, strp, rwin32.DWORD],
rffi.LONG)
RegSetValueEx = external(
'RegSetValueEx' + suffix,
- [HKEY, rffi.CCHARP, rwin32.DWORD,
- rwin32.DWORD, rffi.CCHARP, rwin32.DWORD],
+ [HKEY, strp, rwin32.DWORD,
+ rwin32.DWORD, strp, rwin32.DWORD],
rffi.LONG)
RegQueryValue = external(
'RegQueryValue' + suffix,
- [HKEY, rffi.CCHARP, rffi.CCHARP, rwin32.PLONG],
+ [HKEY, strp, strp, rwin32.PLONG],
rffi.LONG)
RegQueryValueEx = external(
'RegQueryValueEx' + suffix,
- [HKEY, rffi.CCHARP, rwin32.LPDWORD, rwin32.LPDWORD,
- rffi.CCHARP, rwin32.LPDWORD],
+ [HKEY, strp, rwin32.LPDWORD, rwin32.LPDWORD,
+ strp, rwin32.LPDWORD],
rffi.LONG)
RegCreateKey = external(
'RegCreateKey' + suffix,
- [HKEY, rffi.CCHARP, PHKEY],
+ [HKEY, strp, PHKEY],
rffi.LONG)
RegCreateKeyEx = external(
'RegCreateKeyEx' + suffix,
- [HKEY, rffi.CCHARP, rwin32.DWORD, rffi.CCHARP, rwin32.DWORD,
+ [HKEY, strp, rwin32.DWORD, strp, rwin32.DWORD,
REGSAM, rffi.VOIDP, PHKEY, rwin32.LPDWORD],
rffi.LONG)
RegDeleteValue = external(
'RegDeleteValue' + suffix,
- [HKEY, rffi.CCHARP],
+ [HKEY, strp],
rffi.LONG)
RegDeleteKey = external(
'RegDeleteKey' + suffix,
- [HKEY, rffi.CCHARP],
+ [HKEY, strp],
rffi.LONG)
RegOpenKeyEx = external(
'RegOpenKeyEx' + suffix,
- [HKEY, rffi.CCHARP, rwin32.DWORD, REGSAM, PHKEY],
+ [HKEY, strp, rwin32.DWORD, REGSAM, PHKEY],
rffi.LONG)
RegEnumValue = external(
'RegEnumValue' + suffix,
- [HKEY, rwin32.DWORD, rffi.CCHARP,
+ [HKEY, rwin32.DWORD, strp,
rwin32.LPDWORD, rwin32.LPDWORD, rwin32.LPDWORD,
rffi.CCHARP, rwin32.LPDWORD],
rffi.LONG)
RegEnumKeyEx = external(
'RegEnumKeyEx' + suffix,
- [HKEY, rwin32.DWORD, rffi.CCHARP,
+ [HKEY, rwin32.DWORD, strp,
rwin32.LPDWORD, rwin32.LPDWORD,
- rffi.CCHARP, rwin32.LPDWORD, rwin32.PFILETIME],
+ strp, rwin32.LPDWORD, rwin32.PFILETIME],
rffi.LONG)
RegQueryInfoKey = external(
'RegQueryInfoKey' + suffix,
- [HKEY, rffi.CCHARP, rwin32.LPDWORD, rwin32.LPDWORD,
+ [HKEY, strp, rwin32.LPDWORD, rwin32.LPDWORD,
rwin32.LPDWORD, rwin32.LPDWORD, rwin32.LPDWORD,
rwin32.LPDWORD, rwin32.LPDWORD, rwin32.LPDWORD,
rwin32.LPDWORD, rwin32.PFILETIME],
@@ -121,17 +125,17 @@ def get_traits(suffix):
RegLoadKey = external(
'RegLoadKey' + suffix,
- [HKEY, rffi.CCHARP, rffi.CCHARP],
+ [HKEY, strp, strp],
rffi.LONG)
RegSaveKey = external(
'RegSaveKey' + suffix,
- [HKEY, rffi.CCHARP, rffi.VOIDP],
+ [HKEY, strp, rffi.VOIDP],
rffi.LONG)
RegConnectRegistry = external(
'RegConnectRegistry' + suffix,
- [rffi.CCHARP, HKEY, PHKEY],
+ [strp, HKEY, PHKEY],
rffi.LONG)
return (RegSetValue, RegSetValueEx, RegQueryValue, RegQueryValueEx,