hg merge default

author: Ronan Lamy <ronan.lamy@gmail.com> 2020-11-25 20:25:40 +0000
committer: Ronan Lamy <ronan.lamy@gmail.com> 2020-11-25 20:25:40 +0000
commit: 3c2bacc346bf0557e146c909b1723568ac2f02d2 (patch)
tree: f83daf6cb1c7449b78984e9a9c2ee358b67fa4ac
parent: hg merge default (diff)
parent: fix for a case where the difference shows up with recent unicode databases only (diff)
download: pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.tar.gz
pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.tar.bz2
pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.zip
23 files changed, 492 insertions, 213 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b5964fff08..7ac5eb3c13 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,4 +1,5 @@
 check_rpython_annotations:
-    image: octobus/ci-for-pypy
-    script:
-      - (cd pypy/goal; python2 ../../rpython/bin/rpython --batch -O2 --annotate targetpypystandalone)
+    image: registry.heptapod.net/pypy/pypy/ci:v1
+    script: |
+      cd pypy/goal
+      python2 ../../rpython/bin/rpython --batch -O2 --annotate targetpypystandalone
diff --git a/.gitlab-ci/Dockerfile b/.gitlab-ci/Dockerfile
new file mode 100644
index 0000000000..a2cfc4cc22
--- /dev/null
+++ b/.gitlab-ci/Dockerfile
@@ -0,0 +1,28 @@
+FROM debian:buster
+
+RUN apt-get update && \
+    apt-get install -y \
+        build-essential \
+        gcc \
+        libbz2-dev \
+        libexpat1-dev \
+        libffi-dev \
+        libgc-dev \
+        libgdbm-dev \
+        liblzma-dev \
+        libncurses5-dev \
+        libncursesw5-dev \
+        libsqlite3-dev \
+        libssl-dev \
+        make \
+        pkg-config \
+        pypy-dev \
+        python \
+        python-cffi \
+        python-dev \
+        python-pip \
+        python-virtualenv \
+        tk-dev \
+        zlib1g-dev \
+    && \
+    apt-get -y clean
diff --git a/.gitlab-ci/build.sh b/.gitlab-ci/build.sh
new file mode 100644
index 0000000000..68859cbc7a
--- /dev/null
+++ b/.gitlab-ci/build.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -e
+
+GITLAB="registry.heptapod.net"
+TAG="${GITLAB}/pypy/pypy/ci:v1"
+
+docker build --tag "${TAG}" --file "Dockerfile" .
+
+echo -e "run: \n  docker login ${GITLAB}  # first time\n  docker push ${TAG}"
+\ No newline at end of file
diff --git a/.hgtags b/.hgtags
index a7138fad73..b3b27f5cfa 100644
--- a/.hgtags
+++ b/.hgtags
@@ -95,3 +95,9 @@ d38cd66c14b86430155e2a122e59648e93011cc0 release-pypy3.6-v7.3.2
 a29ef73f9b32953753d0dd6d2a56255fa2892e24 release-pypy2.7-v7.3.3rc1
 fab92f174c7754272e9bef31d2a6d66d8d45188b release-pypy3.6-v7.3.3rc1
 6cf8fa20a7f6fc61dc07402e1e859cf31bf977ad release-pypy3.7-v7.3.3rc1
+a29ef73f9b32953753d0dd6d2a56255fa2892e24 release-pypy2.7-v7.3.3rc2
+db1e853f94de42ad711bd930222bd2434e0f900d release-pypy3.6-v7.3.3rc2
+7e6e2bb30ac5fbdbd443619cae28c51d5c162a02 release-pypy3.7-v7.3.3rc2
+a29ef73f9b32953753d0dd6d2a56255fa2892e24 release-pypy2.7-v7.3.3
+db1e853f94de42ad711bd930222bd2434e0f900d release-pypy3.6-v7.3.3
+7e6e2bb30ac5fbdbd443619cae28c51d5c162a02 release-pypy3.7-v7.3.3
diff --git a/lib-python/2.7/test/test_xml_etree.py b/lib-python/2.7/test/test_xml_etree.py
index f6d5d17fc3..f1f1c21cd1 100644
--- a/lib-python/2.7/test/test_xml_etree.py
+++ b/lib-python/2.7/test/test_xml_etree.py
@@ -87,6 +87,19 @@ ENTITY_XML = """\
 <document>&entity;</document>
 """
 
+# backport from https://github.com/python/cpython/pull/22987
+ATTLIST_XML = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE Foo [
+<!ELEMENT foo (bar*)>
+<!ELEMENT bar (#PCDATA)*>
+<!ATTLIST bar xml:lang CDATA "eng">
+<!ENTITY qux "quux">
+]>
+<foo>
+<bar>&qux;</bar>
+</foo>
+"""
 
 def checkwarnings(*filters):
     def decorator(test):
@@ -1002,6 +1015,13 @@ class ElementTreeTest(unittest.TestCase):
                                        method='html')
                 self.assertEqual(serialized, expected)
 
+    # backported from https://github.com/python/cpython/pull/22987
+    def test_attlist_default(self):
+        # Test default attribute values; See BPO 42151.
+        root = ET.fromstring(ATTLIST_XML)
+        self.assertEqual(root[0].attrib,
+                         {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
+
 
 #
 # xinclude tests (samples from appendix C of the xinclude specification)
diff --git a/lib-python/2.7/xml/etree/ElementTree.py b/lib-python/2.7/xml/etree/ElementTree.py
index dca69106d1..4cae355398 100644
--- a/lib-python/2.7/xml/etree/ElementTree.py
+++ b/lib-python/2.7/xml/etree/ElementTree.py
@@ -1226,7 +1226,6 @@ class _IterParseIterator(object):
             if event == "start":
                 try:
                     parser.ordered_attributes = 1
-                    parser.specified_attributes = 1
                     def handler(tag, attrib_in, event=event, append=append,
                                 start=self._parser._start_list):
                         append((event, start(tag, attrib_in)))
@@ -1505,7 +1504,6 @@ class XMLParser(object):
         # use new-style attribute handling, if supported
         try:
             self._parser.ordered_attributes = 1
-            self._parser.specified_attributes = 1
             parser.StartElementHandler = self._start_list
         except AttributeError:
             pass
diff --git a/pypy/doc/contributing.rst b/pypy/doc/contributing.rst
index f5477e5805..959cecb065 100644
--- a/pypy/doc/contributing.rst
+++ b/pypy/doc/contributing.rst
@@ -40,14 +40,16 @@ details of which can be found in our :ref:`contact <contact>` section. The folks
 there are very friendly, and can point you in the right direction.
 
 We give out commit rights usually fairly liberally, so if you want to do something
-with PyPy, you can become a committer. We also run frequent coding sprints which
-are separately announced and often happen around Python conferences such as
-EuroPython or PyCon. Upcoming events are usually announced on `the blog`_.
+with PyPy, you can become a "developer" by logging into https://foss.heptapod.net
+and clicking the "Request Access" link on the `PyPy group page`. We also run
+coding sprints which are separately announced and are usually announced on `the
+blog`_.
 
 Further Reading: :ref:`Contact <contact>`
 
 .. _the blog: https://morepypy.blogspot.com
 .. _pypy-dev mailing list: https://mail.python.org/mailman/listinfo/pypy-dev
+.. _`PyPy group page`: https://foss.heptapod.net/pypy
 
 
 Your first contribution
@@ -96,31 +98,19 @@ Thanks to `Octobus <https://octobus.net/>`_ and `Clever Cloud
      </a>
    </h1>
 
-If you are new with Mercurial and Heptapod, you can read this `short tutorial`_
-
-.. _`short tutorial`: https://heptapod.net/pages/quick-start-guide.html
-
-However, we recommend at this time you **not** use topic branches. We prefer
-the usual mercurial named branch model, as pointed out in the :ref:`FAQ
-<github>` about why we didn't move to git.
-
 Get Access
 ----------
 
-The important take-away from that tutorial for experienced developers is that
-since the free hosting on foss.heptapod.net does not allow personal forks, you
+As stated above, you need to request access to the repo.
+Since the free hosting on foss.heptapod.net does not allow personal forks, you
 need permissions to push your changes directly to our repo. Once you sign in to
 https://foss.heptapod.net using either a new login or your GitHub or Atlassian
 logins, you can get developer status for pushing directly to
 the project (just ask by clicking the link at foss.heptapod.net/pypy just under
 the logo, and you'll get it, basically).  Once you have it you can rewrite your
 file ``.hg/hgrc`` to contain ``default = ssh://hg@foss.heptapod.net/pypy/pypy``.
-Your changes will then be pushed directly to the official repo, but (if you
-follow these rules) they are still on a branch, and we can still review the
-branches you want to merge.  With developer status, you can push topic
-branches. If you wish to push long-lived branches, you will need to ask for
-higher permissions.
-
+Your changes will then be pushed directly to a branch on the official repo, and
+we will review the branches you want to merge.
 
 Clone
 -----
@@ -138,8 +128,7 @@ Clone
   then edit ``.hg/hgrc`` as above and do ``hg pull && hg up``.
 
 * Now you have a complete copy of the PyPy repo.  Make a long-lived branch
-  with a command like ``hg branch name_of_your_branch``, or make a short-
-  lived branch for a simple fix with a command like ``hg topic issueXXXX``.
+  with a command like ``hg branch name_of_your_branch``.
 
 Edit
 ----
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
index 43cc823fcb..81d6fe0ee2 100644
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -96,6 +96,10 @@ Other steps
     create a fresh whatsnew_head.rst after the release
     and add the new file to  pypy/doc/index-of-whatsnew.rst
 
+  * rename pypy/doc/whatsnew-pypy3-HEAD.rst to whatsnew-pypy3-VERSION.rst
+    create a fresh whatsnew-pypy3_HEAD.rst after the release
+    and add the new file to  pypy/doc/index-of-whatsnew.rst
+
   * write release announcement pypy/doc/release-VERSION.rst
     The release announcement should contain a direct link to the download page
 
@@ -125,7 +129,8 @@ Other steps
 
     Also repackage and upload source "-src.tar.bz2"
 
-  * Upload binaries to https://buildbot.pypy.org/mirror
+  * Upload binaries to https://buildbot.pypy.org/mirror. It takes an hour for
+    https://downloads.python.org/pypy/ to sync
 
 * Send out a mailing list message asking for last-minute comments and testing
 
@@ -142,5 +147,4 @@ Other steps
 
   * add a tag on the codespeed web site that corresponds to pypy release
   * revise versioning at https://readthedocs.org/projects/pypy
-  * tag the final release(s) with appropriate tags
 
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
index 9eeabedd49..70026f9597 100644
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -150,26 +150,18 @@ knowledge of the internals. Head over to `vmprof-python`_, `vmprof-server`_ and
 .. _vmprof-server: https://github.com/vmprof/vmprof-server
 .. _vmprof-integration: https://github.com/vmprof/vmprof-integration
 
-Optimized Unicode Representation
---------------------------------
-
-CPython 3.3 will use an optimized unicode representation (see :pep:`0393`) which switches between
-different ways to represent a unicode string, depending on whether the string
-fits into ASCII, has only two-byte characters or needs four-byte characters.
-
-The actual details would be rather different in PyPy, but we would like to have
-the same optimization implemented.
-
-Or maybe not.  We can also play around with the idea of using a single
-representation: as a byte string in utf-8.  (This idea needs some extra logic
-for efficient indexing, like a cache.) Work has begun on the ``unicode-utf``
-and ``unicode-utf8-py3`` branches. More is needed, for instance there are
-SIMD optimizations that are not yet used.
-
 Convert RPython to Python3
 --------------------------
 
-The world is moving on, we should too.
+The world is moving on, we should too. Work in this direction has begun on the
+``rpython3`` branch, mainly to enable building documentation with Python3. Some
+things that are known to need careful refactoring:
+- a single character in python3 is an int, not a byte
+- we use ``str``/``unicode`` to distiguish between different modes of
+  operation for windows in ``make_win32_traits``.
+
+There are probably more. The branch currently does not pass rpython tests so
+work is needed to back out some of the changes and redo them properly
 
 Improve performance
 -------------------
@@ -273,7 +265,10 @@ and it is hard to imagine NumPy abandoning the C-API. Here are a few ideas:
 Support more platforms
 ----------------------
 
-We have a plan for a `Windows 64`_ port.
+We have a plan for a `Windows 64`_ port. There is progress on the ``win64``
+branch. Help is needed to continue the work. Stage I is complete: we now have
+a 64-bit PyPy2.7 on windows. But it is missing cpyext and other tidbits to
+enable releasing it.
 
 .. _`Windows 64`: windows.html#what-is-missing-for-a-full-64-bit-translation
 
@@ -322,3 +317,6 @@ good work that needs to be finished:
 
     TODO: see the end of the blog post
 
+Work has begun on HPy_ to enable a faster C-API.
+
+.. _HPy: https://hpy.readthedocs.io/en/latest/
diff --git a/pypy/doc/release-v7.3.3.rst b/pypy/doc/release-v7.3.3.rst
index 2a072b1525..ea6c0cf537 100644
--- a/pypy/doc/release-v7.3.3.rst
+++ b/pypy/doc/release-v7.3.3.rst
@@ -2,12 +2,6 @@
 PyPy v7.3.3: release of 2.7, 3.6, and 3.7 beta
 ==============================================
 
-.. note::
-  This is a pre-release announcement. When the release actually happens, it
-  will be announced on the `morepypy blog`_
-
-.. _`morepypy blog`: https://morepypy.blogspot.com
-
 -The PyPy team is proud to release the version 7.3.3 of PyPy, which includes
 three different interpreters:
 
@@ -39,9 +33,9 @@ releases, but read on to find out what is new.
   interpreters like GraalPython_ (written on top of the Java virtual machine),
   RustPython_, and PyPy. Thanks to Oracle for sponsoring work on HPy.
 
-Several issues were exposed in the 7.3.2 release. Many of them came from the
-great work ongoing to ship PyPy packages in `conda-forge`_.  A big shout out
-to them for taking this on.
+Several issues exposed in the 7.3.2 release were fixed. Many of them came from the
+great work ongoing to ship PyPy-compatible binary packages in `conda-forge`_.
+A big shout out to them for taking this on.
 
 Development of PyPy has moved to https://foss.heptapod.net/pypy/pypy.
 This was covered more extensively in this `blog post`_. We have seen an
@@ -52,7 +46,7 @@ The `CFFI`_ backend has been updated to version 1.14.3. We recommend using CFFI
 rather than c-extensions to interact with C, and using cppyy_ for performant
 wrapping of C++ code for Python.
 
-A new contributor took us up on the challenge to get `windows 64-bit`` support.
+A new contributor took us up on the challenge to get `windows 64-bit`_ support.
 The work is proceeding on the ``win64`` branch, more help in coding or
 sponsorship is welcome. In anticipation of merging this large change, we fixed
 many test failures on windows.
@@ -161,17 +155,22 @@ Python 3.6+
 - bpo-17288_: Prevent jump from a yield statement
 - bpo-11471_: avoid generating a ``JUMP_FORWARD`` instruction at the end of an
   ``if``-block if there is no ``else``-clause
-- Fix ``os.listdir('')`` and ``os.stat('')`` on windows (issue 3331)
+- Fix ``os.listdir('')`` and ``os.stat('')`` on windows (issue 3331_)
 - Fix many unicode encoding/decoding errors on windows
-- Fix pickling of time subclasses (issue 3324, bpo 41966)
-- Add support for ``sqlite3_load_extension`` (issue 3334)
+- Fix pickling of time subclasses (issue 3324_, bpo-41966_)
+- Add support for ``sqlite3_load_extension`` (issue 3334_)
 - Change default file encoding from mbcs to utf-8 on windows
 - Change default file encoding from ascii to utf-8 on linux
 - Add ``resource.prlimit()``
+- Accept PathLike in ``nt._getfullpathname`` (issue 3343_)
+- Fix some problems with ``winreg``
+
 
 Python 3.6 C-API
 ~~~~~~~~~~~~~~~~
 
+- Export ``PyStructSequence_NewType`` (issue 3346_)
+
 .. _3312: https://foss.heptapod.net/pypy/pypy/-/issues/3312
 .. _3315: https://foss.heptapod.net/pypy/pypy/-/issues/3315
 .. _3321: https://foss.heptapod.net/pypy/pypy/-/issues/3321
@@ -180,6 +179,8 @@ Python 3.6 C-API
 .. _3324: https://foss.heptapod.net/pypy/pypy/-/issues/3324
 .. _3334: https://foss.heptapod.net/pypy/pypy/-/issues/3334
 .. _3307: https://foss.heptapod.net/pypy/pypy/-/issues/3307
+.. _3343: https://foss.heptapod.net/pypy/pypy/-/issues/3343
+.. _3346: https://foss.heptapod.net/pypy/pypy/-/issues/3346
 
 .. _`merge request 723`: https://foss.heptapod.net/pypy/pypy/-/merge_request/723
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
index 5f21ccff61..6fe082cc9a 100644
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,14 @@ What's new in PyPy2.7 7.3.3+
 .. this is a revision shortly after release-pypy-7.3.3
 .. startrev: de512cf13506
 
+.. branch: new-ci-image
+
+CI: Add a Dockerfile for CI to prevent hitting pull limits on docker hub
+
+.. branch: issue-3333
+
+Fix xml.etree.ElementTree assigning default attribute values: issue 3333
+
+.. branch: rpython-rsre-for-37
+
+Support for the new format of regular expressions in Python 3.7
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
index b6b4f801c2..29636ab220 100755
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -1,12 +1,12 @@
 #! /bin/bash
 
 # Edit these appropriately before running this script
-pmaj=2  # python main version: 2 or 3
-pmin=7  # python minor version
+pmaj=3  # python main version: 2 or 3
+pmin=6  # python minor version
 maj=7
 min=3
-rev=2
-# rc=rc3  # set to blank for actual release
+rev=3
+#rc=rc2  # set to blank for actual release
 
 function maybe_exit {
     if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
diff --git a/pypy/tool/test/test_tab.py b/pypy/tool/test/test_tab.py
index 0aea831a39..ce96b61cf0 100644
--- a/pypy/tool/test/test_tab.py
+++ b/pypy/tool/test/test_tab.py
@@ -44,6 +44,8 @@ def test_no_pypy_import_in_rpython():
         if os.path.isfile(path):
             if not path.lower().endswith('.py'):
                 return
+            if path.lower().endswith('rsre_constants.py'):
+                return   # exception in this file
             with file(path) as f:
                 for line in f:
                     if "import" not in line:
diff --git a/rpython/rlib/rsre/rpy/_sre.py b/rpython/rlib/rsre/rpy/_sre.py
index 617345483a..70d7737297 100644
--- a/rpython/rlib/rsre/rpy/_sre.py
+++ b/rpython/rlib/rsre/rpy/_sre.py
@@ -22,6 +22,9 @@ def get_code(regexp, flags=0, allargs=False):
     """NOT_RPYTHON: you can't compile new regexps in an RPython program,
     you can only use precompiled ones"""
     from . import sre_compile
+    if rsre_constants.V37:
+        import pytest
+        pytest.skip("This test cannot run in a 3.7 branch of pypy")
     try:
         sre_compile.compile(regexp, flags)
     except GotIt as e:
diff --git a/rpython/rlib/rsre/rpy/sre_constants.py b/rpython/rlib/rsre/rpy/sre_constants.py
index 89cbdb0d5f..4b9deac743 100644
--- a/rpython/rlib/rsre/rpy/sre_constants.py
+++ b/rpython/rlib/rsre/rpy/sre_constants.py
@@ -94,35 +94,17 @@ CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
 
-OPCODES = [
-
-    # failure=0 success=1 (just because it looks better that way :-)
-    FAILURE, SUCCESS,
-
-    ANY, ANY_ALL,
-    ASSERT, ASSERT_NOT,
-    AT,
-    BRANCH,
-    CALL,
-    CATEGORY,
-    CHARSET, BIGCHARSET,
-    GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
-    IN, IN_IGNORE,
-    INFO,
-    JUMP,
-    LITERAL, LITERAL_IGNORE,
-    MARK,
-    MAX_UNTIL,
-    MIN_UNTIL,
-    NOT_LITERAL, NOT_LITERAL_IGNORE,
-    NEGATE,
-    RANGE,
-    REPEAT,
-    REPEAT_ONE,
-    SUBPATTERN,
-    MIN_REPEAT_ONE,
-    RANGE_IGNORE,
-]
+def _rpython_opcodes():
+    from rpython.rlib.rsre import rsre_constants as consts
+    mapping = {}
+    for name, value in consts.__dict__.items():
+        if name.startswith('OPCODE') and isinstance(value, int) and value < 70:
+            name = name[6:].lstrip('012346789_').lower()
+            mapping[value] = name
+    # check that there are no holes
+    assert sorted(mapping.keys()) == range(len(mapping))
+    return [name for value, name in sorted(mapping.items())]
+OPCODES = _rpython_opcodes()
 
 ATCODES = [
     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
diff --git a/rpython/rlib/rsre/rsre_char.py b/rpython/rlib/rsre/rsre_char.py
index 1680d2973d..49787946ae 100644
--- a/rpython/rlib/rsre/rsre_char.py
+++ b/rpython/rlib/rsre/rsre_char.py
@@ -25,6 +25,7 @@ def set_unicode_db(newunicodedb):
         for i in range(128):
             assert newunicodedb.tolower(i) == getlower_ascii(i)
             assert newunicodedb.toupper(i) == getupper_ascii(i)
+            assert newunicodedb.toupper_full(i) == [getupper_ascii(i)]
     global unicodedb
     unicodedb = newunicodedb
 
@@ -45,10 +46,10 @@ else:
 # codesize. But sre_compile will compile some stuff differently depending on the
 # codesize (e.g., charsets).
 from rpython.rlib.runicode import MAXUNICODE
-if MAXUNICODE == 65535:
+if MAXUNICODE == 65535 and not consts.V37:
     CODESIZE = 2
 else:
-    CODESIZE = 4
+    CODESIZE = 4        # always 4 from py3.7
 
 copyright = "_sre.py 2.4 Copyright 2005 by Nik Haldimann"
 
@@ -57,16 +58,22 @@ BIG_ENDIAN = sys.byteorder == "big"
 def getlower_ascii(char_ord):
     return char_ord + int_between(ord('A'), char_ord, ord('Z') + 1) * (ord('a') - ord('A'))
 
+def getlower_locale(char_ord):
+    if char_ord < 256:      # cheating!  Well, CPython does too.
+        char_ord = tolower(char_ord)
+    return char_ord
+
+def getlower_unicode(char_ord):
+    if char_ord < 128: # shortcut for ascii
+        return getlower_ascii(char_ord)
+    assert unicodedb is not None
+    return unicodedb.tolower(char_ord)
+
 def getlower(char_ord, flags):
     if flags & consts.SRE_FLAG_LOCALE:
-        if char_ord < 256:      # cheating!  Well, CPython does too.
-            char_ord = tolower(char_ord)
-        return char_ord
+        char_ord = getlower_locale(char_ord)
     elif flags & consts.SRE_FLAG_UNICODE:
-        if char_ord < 128: # shortcut for ascii
-            return getlower_ascii(char_ord)
-        assert unicodedb is not None
-        char_ord = unicodedb.tolower(char_ord)
+        char_ord = getlower_unicode(char_ord)
     else:
         char_ord = getlower_ascii(char_ord)
     return char_ord
@@ -74,20 +81,43 @@ def getlower(char_ord, flags):
 def getupper_ascii(char_ord):
     return char_ord - int_between(ord('a'), char_ord, ord('z') + 1) * (ord('a') - ord('A'))
 
+def getupper_locale(char_ord):
+    if char_ord < 256:      # cheating!  Well, CPython does too.
+        char_ord = toupper(char_ord)
+    return char_ord
+
+def getupper_unicode(char_ord):
+    if char_ord < 128: # shortcut for ascii
+        return getupper_ascii(char_ord)
+    # Note: this is like CPython's sre_upper_unicode(), including for a few
+    # arguments like 0xfb05, whose uppercase is *several letters* in unicode.
+    # We return the first of these letters.  That's rather random but no
+    # caller expects a sane result in this case, I think: iscased_unicode()
+    # is fine as long as it returns anything != char_ord in this case.
+    assert unicodedb is not None
+    return unicodedb.toupper_full(char_ord)[0]
+
 def getupper(char_ord, flags):
     if flags & consts.SRE_FLAG_LOCALE:
-        if char_ord < 256:      # cheating!  Well, CPython does too.
-            char_ord = toupper(char_ord)
-        return char_ord
+        char_ord = getupper_locale(char_ord)
     elif flags & consts.SRE_FLAG_UNICODE:
-        if char_ord < 128: # shortcut for ascii
-            return getupper_ascii(char_ord)
-        assert unicodedb is not None
-        char_ord = unicodedb.toupper(char_ord)
+        char_ord = getupper_unicode(char_ord)
     else:
         char_ord = getupper_ascii(char_ord)
     return char_ord
 
+def iscased_ascii(char_ord):   # used by py3.7
+    upper = int_between(ord('A'), char_ord, ord('Z')+1)
+    lower = int_between(ord('a'), char_ord, ord('z')+1)
+    return upper | lower
+
+def iscased_unicode(char_ord):   # used by py3.7
+    # NOTE: this is not unicodedb.iscased().  As per CPython 3.7, it is
+    # something different which---as far as I can tell---doesn't really
+    # have a meaning on its own, but well.
+    return (char_ord != getlower_unicode(char_ord) or
+            char_ord != getupper_unicode(char_ord))
+
 #### Category helpers
 
 is_a_word = [(chr(i).isalnum() or chr(i) == '_') for i in range(256)]
@@ -223,12 +253,22 @@ def set_range(ctx, pattern, index, char_code):
 def set_range_ignore(ctx, pattern, index, char_code):
     # <RANGE_IGNORE> <lower> <upper>
     # the char_code is already lower cased
+    assert not consts.V37
     lower = pattern.pattern[index + 1]
     upper = pattern.pattern[index + 2]
     match1 = int_between(lower, char_code, upper + 1)
     match2 = int_between(lower, getupper(char_code, pattern.flags), upper + 1)
     return match1 | match2, index + 3
 
+def set_range_uni_ignore(ctx, pattern, index, char_code):
+    # <RANGE_UNI_IGNORE> <lower> <upper>
+    # the char_code is already lower cased
+    lower = pattern.pattern[index + 1]
+    upper = pattern.pattern[index + 2]
+    match1 = int_between(lower, char_code, upper + 1)
+    match2 = int_between(lower, getupper_unicode(char_code), upper + 1)
+    return match1 | match2, index + 3
+
 def set_bigcharset(ctx, pattern, index, char_code):
     # <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
     count = pattern.pattern[index+1]
@@ -300,7 +340,9 @@ set_dispatch_table = {
     consts.OPCODE_BIGCHARSET: set_bigcharset,
     consts.OPCODE_LITERAL: set_literal,
     consts.OPCODE_RANGE: set_range,
-    consts.OPCODE_RANGE_IGNORE: set_range_ignore,
+    consts.OPCODE27_RANGE_IGNORE: set_range_ignore,
+    consts.OPCODE37_RANGE_UNI_IGNORE: set_range_uni_ignore,
     consts.OPCODE_UNICODE_GENERAL_CATEGORY: set_unicode_general_category,
 }
+set_dispatch_table.pop(None, None)   # remove the OPCODE27_* or OPCODE37_*
 set_dispatch_unroll = unrolling_iterable(sorted(set_dispatch_table.items()))
diff --git a/rpython/rlib/rsre/rsre_constants.py b/rpython/rlib/rsre/rsre_constants.py
index 9af708532a..9994db7b05 100644
--- a/rpython/rlib/rsre/rsre_constants.py
+++ b/rpython/rlib/rsre/rsre_constants.py
@@ -1,3 +1,15 @@
+# Horrible import-time hack.
+# Blame CPython for renumbering these OPCODE_* at some point.
+from rpython.rlib.objectmodel import specialize
+try:
+    import pypy.module.sys.version
+    V37 = pypy.module.sys.version.CPYTHON_VERSION >= (3, 7)
+except ImportError:
+    raise ImportError("Cannot import pypy.module.sys.version. You can safely "
+                      "remove this 'raise' line if you are not interested in "
+                      "PyPy but only RPython.")
+    V37 = False
+
 OPCODE_FAILURE            = 0
 OPCODE_SUCCESS            = 1
 OPCODE_ANY                = 2
@@ -6,35 +18,49 @@ OPCODE_ASSERT             = 4
 OPCODE_ASSERT_NOT         = 5
 OPCODE_AT                 = 6
 OPCODE_BRANCH             = 7
-#OPCODE_CALL              = 8
+OPCODE_CALL               = 8                    # not used
 OPCODE_CATEGORY           = 9
 OPCODE_CHARSET            = 10
 OPCODE_BIGCHARSET         = 11
 OPCODE_GROUPREF           = 12
 OPCODE_GROUPREF_EXISTS    = 13
-OPCODE_GROUPREF_IGNORE    = 14
-OPCODE_IN                 = 15
-OPCODE_IN_IGNORE          = 16
-OPCODE_INFO               = 17
-OPCODE_JUMP               = 18
-OPCODE_LITERAL            = 19
-OPCODE_LITERAL_IGNORE     = 20
-OPCODE_MARK               = 21
-OPCODE_MAX_UNTIL          = 22
-OPCODE_MIN_UNTIL          = 23
-OPCODE_NOT_LITERAL        = 24
-OPCODE_NOT_LITERAL_IGNORE = 25
-OPCODE_NEGATE             = 26
-OPCODE_RANGE              = 27
-OPCODE_REPEAT             = 28
-OPCODE_REPEAT_ONE         = 29
-#OPCODE_SUBPATTERN        = 30
-OPCODE_MIN_REPEAT_ONE     = 31
-OPCODE_RANGE_IGNORE       = 32
+OPCODE_GROUPREF_IGNORE    = 28 if V37 else 14
+OPCODE_IN                 = 14 if V37 else 15
+OPCODE_IN_IGNORE          = 29 if V37 else 16
+OPCODE_INFO               = 15 if V37 else 17
+OPCODE_JUMP               = 16 if V37 else 18
+OPCODE_LITERAL            = 17 if V37 else 19
+OPCODE_LITERAL_IGNORE     = 30 if V37 else 20
+OPCODE_MARK               = 18 if V37 else 21
+OPCODE_MAX_UNTIL          = 19 if V37 else 22
+OPCODE_MIN_UNTIL          = 20 if V37 else 23
+OPCODE_NOT_LITERAL        = 21 if V37 else 24
+OPCODE_NOT_LITERAL_IGNORE = 31 if V37 else 25
+OPCODE_NEGATE             = 22 if V37 else 26
+OPCODE_RANGE              = 23 if V37 else 27
+OPCODE_REPEAT             = 24 if V37 else 28
+OPCODE_REPEAT_ONE         = 25 if V37 else 29
+OPCODE_SUBPATTERN         = 26 if V37 else 30    # not used
+OPCODE_MIN_REPEAT_ONE     = 27 if V37 else 31
+OPCODE27_RANGE_IGNORE     = None if V37 else 32
+
+OPCODE37_GROUPREF_LOC_IGNORE      = 32 if V37 else None
+OPCODE37_IN_LOC_IGNORE            = 33 if V37 else None
+OPCODE37_LITERAL_LOC_IGNORE       = 34 if V37 else None
+OPCODE37_NOT_LITERAL_LOC_IGNORE   = 35 if V37 else None
+OPCODE37_GROUPREF_UNI_IGNORE      = 36 if V37 else None
+OPCODE37_IN_UNI_IGNORE            = 37 if V37 else None
+OPCODE37_LITERAL_UNI_IGNORE       = 38 if V37 else None
+OPCODE37_NOT_LITERAL_UNI_IGNORE   = 39 if V37 else None
+OPCODE37_RANGE_UNI_IGNORE         = 40 if V37 else None
 
 # not used by Python itself
 OPCODE_UNICODE_GENERAL_CATEGORY = 70
 
+@specialize.argtype(1)
+def eq(op, const):
+    return const is not None and op == const
+
 
 AT_BEGINNING = 0
 AT_BEGINNING_LINE = 1
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
index 489636b783..3ce901c46a 100644
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -55,7 +55,8 @@ class CompiledPattern(object):
 
     def __init__(self, pattern, flags):
         self.pattern = pattern
-        self.flags = flags
+        if not consts.V37:      # 'flags' is ignored in >=3.7 mode
+            self.flags = flags
         # check we don't get the old value of MAXREPEAT
         # during the untranslated tests. 
         # On python3, MAXCODE can appear in patterns. It will be 65535
@@ -63,6 +64,29 @@ class CompiledPattern(object):
         if not we_are_translated() and rsre_char.CODESIZE != 2:
             assert 65535 not in pattern
 
+    def lowa(self, char_ord):
+        """Pre-3.7: uses getlower(flags).
+           Post-3.7: this is always getlower_ascii().
+        """
+        if not consts.V37:
+            return rsre_char.getlower(char_ord, self.flags)
+        else:
+            return rsre_char.getlower_ascii(char_ord)
+
+    def char_loc_ignore(self, index, char_ord):
+        assert consts.V37
+        pattern = self.pat(index)
+        return (char_ord == pattern or
+                rsre_char.getlower_locale(char_ord) == pattern or
+                rsre_char.getupper_locale(char_ord) == pattern)
+
+    def charset_loc_ignore(self, ctx, ppos, char_ord):
+        lo = rsre_char.getlower_locale(char_ord)
+        if rsre_char.check_charset(ctx, self, ppos, lo):
+            return True
+        up = rsre_char.getupper_locale(char_ord)
+        return up != lo and rsre_char.check_charset(ctx, self, ppos, up)
+
     def pat(self, index):
         jit.promote(self)
         check_nonneg(index)
@@ -74,6 +98,10 @@ class CompiledPattern(object):
         assert result >= 0
         return result
 
+MODE_ANY = '\x00'         # an empty match is fine
+MODE_NONEMPTY = '\x01'    # must have a non-empty match
+MODE_FULL = '\x02'        # must match the whole string
+
 class AbstractMatchContext(object):
     """Abstract base class"""
     _immutable_fields_ = ['end']
@@ -81,7 +109,7 @@ class AbstractMatchContext(object):
     match_end = 0
     match_marks = None
     match_marks_flat = None
-    fullmatch_only = False
+    match_mode = MODE_ANY
 
     def __init__(self, match_start, end):
         # 'match_start' and 'end' must be known to be non-negative
@@ -91,25 +119,30 @@ class AbstractMatchContext(object):
         self.match_start = match_start
         self.end = end
 
-    def reset(self, start):
+    def reset(self, start, must_advance=False):
         self.match_start = start
         self.match_marks = None
         self.match_marks_flat = None
+        #
+        assert MODE_ANY == chr(False)
+        assert MODE_NONEMPTY == chr(True)
+        self.match_mode = chr(must_advance)
+
+    @not_rpython
+    def _fullmatch_only(self, x=None):
+        raise Exception("'ctx.fullmatch_only' was replaced with"
+                        " 'ctx.match_mode'")
+    fullmatch_only = property(_fullmatch_only, _fullmatch_only)
 
     @not_rpython
     def str(self, index):
         """Must be overridden in a concrete subclass.
-        The tag ^^^ here is used to generate a translation-time crash
+        The @not_rpython is used to generate a translation-time crash
         if there is a call to str() that is indirect.  All calls must
         be direct for performance reasons; you need to specialize the
         caller with @specializectx."""
         raise NotImplementedError
 
-    @not_rpython
-    def lowstr(self, index, flags):
-        """Similar to str()."""
-        raise NotImplementedError
-
     # The following methods are provided to be overriden in
     # Utf8MatchContext.  The non-utf8 implementation is provided
     # by the FixedMatchContext abstract subclass, in order to use
@@ -236,10 +269,6 @@ class BufMatchContext(FixedMatchContext):
         check_nonneg(index)
         return ord(self._buffer.getitem(index))
 
-    def lowstr(self, index, flags):
-        c = self.str(index)
-        return rsre_char.getlower(c, flags)
-
     def fresh_copy(self, start):
         return BufMatchContext(self._buffer, start,
                                self.end)
@@ -261,10 +290,6 @@ class StrMatchContext(FixedMatchContext):
         check_nonneg(index)
         return ord(self._string[index])
 
-    def lowstr(self, index, flags):
-        c = self.str(index)
-        return rsre_char.getlower(c, flags)
-
     def fresh_copy(self, start):
         return StrMatchContext(self._string, start,
                                self.end)
@@ -289,10 +314,6 @@ class UnicodeMatchContext(FixedMatchContext):
         check_nonneg(index)
         return ord(self._unicodestr[index])
 
-    def lowstr(self, index, flags):
-        c = self.str(index)
-        return rsre_char.getlower(c, flags)
-
     def fresh_copy(self, start):
         return UnicodeMatchContext(self._unicodestr, start,
                                    self.end)
@@ -599,9 +620,13 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
             return
 
         elif op == consts.OPCODE_SUCCESS:
-            if ctx.fullmatch_only:
+            mode = ctx.match_mode
+            if mode == MODE_FULL:
                 if ptr != ctx.end:
                     return     # not a full match
+            elif mode == MODE_NONEMPTY:
+                if ptr == ctx.match_start:
+                    return     # empty match
             ctx.match_end = ptr
             ctx.match_marks = marks
             return MATCHED_OK
@@ -633,10 +658,10 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
                 ptr1 = ctx.prev_n(ptr, pattern.pat(ppos+1), ctx.ZERO)
             except EndOfString:
                 return
-            saved = ctx.fullmatch_only
-            ctx.fullmatch_only = False
+            saved = ctx.match_mode
+            ctx.match_mode = MODE_ANY
             stop = sre_match(ctx, pattern, ppos + 2, ptr1, marks) is None
-            ctx.fullmatch_only = saved
+            ctx.match_mode = saved
             if stop:
                 return
             marks = ctx.match_marks
@@ -651,10 +676,10 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
             except EndOfString:
                 pass
             else:
-                saved = ctx.fullmatch_only
-                ctx.fullmatch_only = False
+                saved = ctx.match_mode
+                ctx.match_mode = MODE_ANY
                 stop = sre_match(ctx, pattern, ppos + 2, ptr1, marks) is not None
-                ctx.fullmatch_only = saved
+                ctx.match_mode = saved
                 if stop:
                     return
             ppos += pattern.pat(ppos)
@@ -699,7 +724,29 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
             startptr, length_bytes = get_group_ref(ctx, marks, pattern.pat(ppos))
             if length_bytes < 0:
                 return     # group was not previously defined
-            ptr = match_repeated_ignore(ctx, ptr, startptr, length_bytes, pattern.flags)
+            ptr = match_repeated_ignore(ctx, ptr, startptr, length_bytes, pattern)
+            if ptr < ctx.ZERO:
+                return     # no match
+            ppos += 1
+
+        elif consts.eq(op, consts.OPCODE37_GROUPREF_UNI_IGNORE):
+            # unicode version of OPCODE_GROUPREF_IGNORE
+            # <GROUPREF> <groupnum>
+            startptr, length_bytes = get_group_ref(ctx, marks, pattern.pat(ppos))
+            if length_bytes < 0:
+                return     # group was not previously defined
+            ptr = match_repeated_uni_ignore(ctx, ptr, startptr, length_bytes)
+            if ptr < ctx.ZERO:
+                return     # no match
+            ppos += 1
+
+        elif consts.eq(op, consts.OPCODE37_GROUPREF_LOC_IGNORE):
+            # locale version of OPCODE_GROUPREF_IGNORE
+            # <GROUPREF> <groupnum>
+            startptr, length_bytes = get_group_ref(ctx, marks, pattern.pat(ppos))
+            if length_bytes < 0:
+                return     # group was not previously defined
+            ptr = match_repeated_loc_ignore(ctx, ptr, startptr, length_bytes)
             if ptr < ctx.ZERO:
                 return     # no match
             ppos += 1
@@ -726,7 +773,25 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
             # match set member (or non_member), ignoring case
             # <IN> <skip> <set>
             if ptr >= ctx.end or not rsre_char.check_charset(ctx, pattern, ppos+1,
-                                                             ctx.lowstr(ptr, pattern.flags)):
+                                                             pattern.lowa(ctx.str(ptr))):
+                return
+            ppos += pattern.pat(ppos)
+            ptr = ctx.next(ptr)
+
+        elif consts.eq(op, consts.OPCODE37_IN_UNI_IGNORE):
+            # match set member (or non_member), ignoring case, unicode mode
+            # <IN> <skip> <set>
+            if ptr >= ctx.end or not rsre_char.check_charset(ctx, pattern, ppos+1,
+                                                             rsre_char.getlower_unicode(ctx.str(ptr))):
+                return
+            ppos += pattern.pat(ppos)
+            ptr = ctx.next(ptr)
+
+        elif consts.eq(op, consts.OPCODE37_IN_LOC_IGNORE):
+            # match set member (or non_member), ignoring case, locale mode
+            # <IN> <skip> <set>
+            if ptr >= ctx.end or not pattern.charset_loc_ignore(ctx, ppos+1,
+                                                                ctx.str(ptr)):
                 return
             ppos += pattern.pat(ppos)
             ptr = ctx.next(ptr)
@@ -752,7 +817,23 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
         elif op == consts.OPCODE_LITERAL_IGNORE:
             # match literal string, ignoring case
             # <LITERAL_IGNORE> <code>
-            if ptr >= ctx.end or ctx.lowstr(ptr, pattern.flags) != pattern.pat(ppos):
+            if ptr >= ctx.end or pattern.lowa(ctx.str(ptr)) != pattern.pat(ppos):
+                return
+            ppos += 1
+            ptr = ctx.next(ptr)
+
+        elif consts.eq(op, consts.OPCODE37_LITERAL_UNI_IGNORE):
+            # match literal string, ignoring case, unicode mode
+            # <LITERAL_IGNORE> <code>
+            if ptr >= ctx.end or rsre_char.getlower_unicode(ctx.str(ptr)) != pattern.pat(ppos):
+                return
+            ppos += 1
+            ptr = ctx.next(ptr)
+
+        elif consts.eq(op, consts.OPCODE37_LITERAL_LOC_IGNORE):
+            # match literal string, ignoring case, locale mode
+            # <LITERAL_IGNORE> <code>
+            if ptr >= ctx.end or not pattern.char_loc_ignore(ppos, ctx.str(ptr)):
                 return
             ppos += 1
             ptr = ctx.next(ptr)
@@ -775,7 +856,23 @@ def sre_match(ctx, pattern, ppos, ptr, marks):
         elif op == consts.OPCODE_NOT_LITERAL_IGNORE:
             # match if it's not a literal string, ignoring case
             # <NOT_LITERAL> <code>
-            if ptr >= ctx.end or ctx.lowstr(ptr, pattern.flags) == pattern.pat(ppos):
+            if ptr >= ctx.end or pattern.lowa(ctx.str(ptr)) == pattern.pat(ppos):
+                return
+            ppos += 1
+            ptr = ctx.next(ptr)
+
+        elif consts.eq(op, consts.OPCODE37_NOT_LITERAL_UNI_IGNORE):
+            # match if it's not a literal string, ignoring case, unicode mode
+            # <NOT_LITERAL> <code>
+            if ptr >= ctx.end or rsre_char.getlower_unicode(ctx.str(ptr)) == pattern.pat(ppos):
+                return
+            ppos += 1
+            ptr = ctx.next(ptr)
+
+        elif consts.eq(op, consts.OPCODE37_NOT_LITERAL_LOC_IGNORE):
+            # match if it's not a literal string, ignoring case, locale mode
+            # <NOT_LITERAL> <code>
+            if ptr >= ctx.end or pattern.char_loc_ignore(ppos, ctx.str(ptr)):
                 return
             ppos += 1
             ptr = ctx.next(ptr)
@@ -883,12 +980,36 @@ def match_repeated(ctx, ptr, oldptr, length_bytes):
     return True
 
 @specializectx
-def match_repeated_ignore(ctx, ptr, oldptr, length_bytes, flags):
+def match_repeated_ignore(ctx, ptr, oldptr, length_bytes, pattern):
+    oldend = ctx.go_forward_by_bytes(oldptr, length_bytes)
+    while oldptr < oldend:
+        if ptr >= ctx.end:
+            return -1
+        if pattern.lowa(ctx.str(ptr)) != pattern.lowa(ctx.str(oldptr)):
+            return -1
+        ptr = ctx.next(ptr)
+        oldptr = ctx.next(oldptr)
+    return ptr
+
+@specializectx
+def match_repeated_uni_ignore(ctx, ptr, oldptr, length_bytes):
+    oldend = ctx.go_forward_by_bytes(oldptr, length_bytes)
+    while oldptr < oldend:
+        if ptr >= ctx.end:
+            return -1
+        if rsre_char.getlower_unicode(ctx.str(ptr)) != rsre_char.getlower_unicode(ctx.str(oldptr)):
+            return -1
+        ptr = ctx.next(ptr)
+        oldptr = ctx.next(oldptr)
+    return ptr
+
+@specializectx
+def match_repeated_loc_ignore(ctx, ptr, oldptr, length_bytes):
     oldend = ctx.go_forward_by_bytes(oldptr, length_bytes)
     while oldptr < oldend:
         if ptr >= ctx.end:
             return -1
-        if ctx.lowstr(ptr, flags) != ctx.lowstr(oldptr, flags):
+        if rsre_char.getlower_locale(ctx.str(ptr)) != rsre_char.getlower_locale(ctx.str(oldptr)):
             return -1
         ptr = ctx.next(ptr)
         oldptr = ctx.next(oldptr)
@@ -955,54 +1076,63 @@ def match_IN(ctx, pattern, ptr, ppos):
     return rsre_char.check_charset(ctx, pattern, ppos+2, ctx.str(ptr))
 @specializectx
 def match_IN_IGNORE(ctx, pattern, ptr, ppos):
-    return rsre_char.check_charset(ctx, pattern, ppos+2, ctx.lowstr(ptr, pattern.flags))
+    return rsre_char.check_charset(ctx, pattern, ppos+2, pattern.lowa(ctx.str(ptr)))
+@specializectx
+def match_IN_UNI_IGNORE(ctx, pattern, ptr, ppos):
+    return rsre_char.check_charset(ctx, pattern, ppos+2, rsre_char.getlower_unicode(ctx.str(ptr)))
+@specializectx
+def match_IN_LOC_IGNORE(ctx, pattern, ptr, ppos):
+    return pattern.charset_loc_ignore(ctx, ppos+2, ctx.str(ptr))
 @specializectx
 def match_LITERAL(ctx, pattern, ptr, ppos):
     return ctx.str(ptr) == pattern.pat(ppos+1)
 @specializectx
 def match_LITERAL_IGNORE(ctx, pattern, ptr, ppos):
-    return ctx.lowstr(ptr, pattern.flags) == pattern.pat(ppos+1)
+    return pattern.lowa(ctx.str(ptr)) == pattern.pat(ppos+1)
+@specializectx
+def match_LITERAL_UNI_IGNORE(ctx, pattern, ptr, ppos):
+    return rsre_char.getlower_unicode(ctx.str(ptr)) == pattern.pat(ppos+1)
+@specializectx
+def match_LITERAL_LOC_IGNORE(ctx, pattern, ptr, ppos):
+    return pattern.char_loc_ignore(ppos+1, ctx.str(ptr))
 @specializectx
 def match_NOT_LITERAL(ctx, pattern, ptr, ppos):
     return ctx.str(ptr) != pattern.pat(ppos+1)
 @specializectx
 def match_NOT_LITERAL_IGNORE(ctx, pattern, ptr, ppos):
-    return ctx.lowstr(ptr, pattern.flags) != pattern.pat(ppos+1)
+    return pattern.lowa(ctx.str(ptr)) != pattern.pat(ppos+1)
+@specializectx
+def match_NOT_LITERAL_UNI_IGNORE(ctx, pattern, ptr, ppos):
+    return rsre_char.getlower_unicode(ctx.str(ptr)) != pattern.pat(ppos+1)
+@specializectx
+def match_NOT_LITERAL_LOC_IGNORE(ctx, pattern, ptr, ppos):
+    return not pattern.char_loc_ignore(ppos+1, ctx.str(ptr))
 
 def _make_fre(checkerfn):
     if checkerfn == match_ANY_ALL:
         def fre(ctx, pattern, ptr, end, ppos):
             return end
-    elif checkerfn == match_IN:
-        install_jitdriver_spec('MatchIn',
+    elif checkerfn in (match_IN, match_IN_IGNORE, match_IN_UNI_IGNORE):
+        # produces three jitdrivers:
+        #     MatchIn
+        #     MatchInIgnore
+        #     MatchInUniIgnore
+        name = checkerfn.__name__.title().replace('_', '')
+        method_name = "jitdriver_" + name
+        install_jitdriver_spec(name,
                                greens=['ppos', 'pattern'],
                                reds=['ptr', 'end', 'ctx'],
                                debugprint=(1, 0))
         @specializectx
         def fre(ctx, pattern, ptr, end, ppos):
             while True:
-                ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr,
+                getattr(ctx, method_name).jit_merge_point(ctx=ctx, ptr=ptr,
                                                       end=end, ppos=ppos,
                                                       pattern=pattern)
                 if ptr < end and checkerfn(ctx, pattern, ptr, ppos):
                     ptr = ctx.next(ptr)
                 else:
                     return ptr
-    elif checkerfn == match_IN_IGNORE:
-        install_jitdriver_spec('MatchInIgnore',
-                               greens=['ppos', 'pattern'],
-                               reds=['ptr', 'end', 'ctx'],
-                               debugprint=(1, 0))
-        @specializectx
-        def fre(ctx, pattern, ptr, end, ppos):
-            while True:
-                ctx.jitdriver_MatchInIgnore.jit_merge_point(ctx=ctx, ptr=ptr,
-                                                            end=end, ppos=ppos,
-                                                            pattern=pattern)
-                if ptr < end and checkerfn(ctx, pattern, ptr, ppos):
-                    ptr = ctx.next(ptr)
-                else:
-                    return ptr
     else:
         # in the other cases, the fre() function is not JITted at all
         # and is present as a residual call.
@@ -1019,11 +1149,19 @@ unroll_char_checker = [
     (consts.OPCODE_ANY_ALL,            match_ANY_ALL),
     (consts.OPCODE_IN,                 match_IN),
     (consts.OPCODE_IN_IGNORE,          match_IN_IGNORE),
+    (consts.OPCODE37_IN_UNI_IGNORE,           match_IN_UNI_IGNORE),
+    (consts.OPCODE37_IN_LOC_IGNORE,           match_IN_LOC_IGNORE),
     (consts.OPCODE_LITERAL,            match_LITERAL),
     (consts.OPCODE_LITERAL_IGNORE,     match_LITERAL_IGNORE),
+    (consts.OPCODE37_LITERAL_UNI_IGNORE,      match_LITERAL_UNI_IGNORE),
+    (consts.OPCODE37_LITERAL_LOC_IGNORE,      match_LITERAL_LOC_IGNORE),
     (consts.OPCODE_NOT_LITERAL,        match_NOT_LITERAL),
     (consts.OPCODE_NOT_LITERAL_IGNORE, match_NOT_LITERAL_IGNORE),
+    (consts.OPCODE37_NOT_LITERAL_UNI_IGNORE,  match_NOT_LITERAL_UNI_IGNORE),
+    (consts.OPCODE37_NOT_LITERAL_LOC_IGNORE,  match_NOT_LITERAL_LOC_IGNORE),
     ]
+unroll_char_checker = [(_op, _fn) for (_op, _fn) in unroll_char_checker
+                       if _op is not None]   # possibly removes the OPCODE37_*
 unroll_fre_checker = [(_op, _make_fre(_fn))
                       for (_op, _fn) in unroll_char_checker]
 
@@ -1119,7 +1257,8 @@ def match(pattern, string, start=0, end=sys.maxint, fullmatch=False):
     assert isinstance(pattern, CompiledPattern)
     start, end = _adjust(start, end, len(string))
     ctx = StrMatchContext(string, start, end)
-    ctx.fullmatch_only = fullmatch
+    if fullmatch:
+        ctx.match_mode = MODE_FULL
     if match_context(ctx, pattern):
         return ctx
     else:
diff --git a/rpython/rlib/rsre/rsre_utf8.py b/rpython/rlib/rsre/rsre_utf8.py
index 834748ebaa..7617acc5fd 100644
--- a/rpython/rlib/rsre/rsre_utf8.py
+++ b/rpython/rlib/rsre/rsre_utf8.py
@@ -20,10 +20,6 @@ class Utf8MatchContext(AbstractMatchContext):
         check_nonneg(index)
         return rutf8.codepoint_at_pos(self._utf8, index)
 
-    def lowstr(self, index, flags):
-        c = self.str(index)
-        return rsre_char.getlower(c, flags)
-
     def get_single_byte(self, base_position, index):
         return self._utf8[base_position + index]
 
@@ -97,10 +93,11 @@ def utf8match(pattern, utf8string, bytestart=0, byteend=sys.maxint,
               fullmatch=False):
     # bytestart and byteend must be valid byte positions inside the
     # utf8string.
-    from rpython.rlib.rsre.rsre_core import match_context
+    from rpython.rlib.rsre.rsre_core import match_context, MODE_FULL
 
     ctx = make_utf8_ctx(utf8string, bytestart, byteend)
-    ctx.fullmatch_only = fullmatch
+    if fullmatch:
+        ctx.match_mode = MODE_FULL
     if match_context(ctx, pattern):
         return ctx
     else:
diff --git a/rpython/rlib/rsre/test/support.py b/rpython/rlib/rsre/test/support.py
index 9e8fafc7c1..4e12277bcb 100644
--- a/rpython/rlib/rsre/test/support.py
+++ b/rpython/rlib/rsre/test/support.py
@@ -1,6 +1,7 @@
 import sys, random
 from rpython.rlib import debug
 from rpython.rlib.rsre.rsre_core import _adjust, match_context, search_context
+from rpython.rlib.rsre.rsre_core import MODE_FULL
 from rpython.rlib.rsre.rsre_core import StrMatchContext, EndOfString
 
 
@@ -112,7 +113,8 @@ def match(pattern, string, start=0, end=sys.maxint, fullmatch=False):
     start = Position(start)
     end = Position(end)
     ctx = MatchContextForTests(string, start, end)
-    ctx.fullmatch_only = fullmatch
+    if fullmatch:
+        ctx.match_mode = MODE_FULL
     if match_context(ctx, pattern):
         return ctx
     else:
diff --git a/rpython/rlib/rsre/test/test_char.py b/rpython/rlib/rsre/test/test_char.py
index bd3a6f2936..6e7d6f3e33 100644
--- a/rpython/rlib/rsre/test/test_char.py
+++ b/rpython/rlib/rsre/test/test_char.py
@@ -204,3 +204,15 @@ def test_general_category():
     assert check_charset(pat, 0, 99)    # Lcheck_charset(pat, 0, 453)   # Lt
     assert not check_charset(pat, 0, 688)    # Lm
     assert not check_charset(pat, 0, 5870)   # Nl
+
+def test_iscased():
+    assert rsre_char.iscased_ascii(65)
+    assert rsre_char.iscased_ascii(100)
+    assert rsre_char.iscased_ascii(64) is False
+    assert rsre_char.iscased_ascii(126) is False
+    assert rsre_char.iscased_ascii(1260) is False
+    assert rsre_char.iscased_ascii(12600) is False
+    for i in range(65536):
+        assert rsre_char.iscased_unicode(i) == (
+            rsre_char.getlower_unicode(i) != i or
+            rsre_char.getupper_unicode(i) != i)
diff --git a/rpython/rlib/rsre/test/test_match.py b/rpython/rlib/rsre/test/test_match.py
index c832244b11..758c015f7a 100644
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -1,5 +1,5 @@
 import re, random, py
-from rpython.rlib.rsre import rsre_char
+from rpython.rlib.rsre import rsre_char, rsre_constants
 from rpython.rlib.rsre.rpy import get_code, VERSION
 from rpython.rlib.rsre.test.support import match, fullmatch, Position as P
 
@@ -306,6 +306,10 @@ class TestMatch:
         rsre_char.set_unicode_db(unicodedb)
         #
         r = get_code(u"[\U00010428-\U0001044f]", re.I)
-        assert r.pattern.count(27) == 1       # OPCODE_RANGE
-        r.pattern[r.pattern.index(27)] = 32   # => OPCODE_RANGE_IGNORE
+        assert r.pattern.count(rsre_constants.OPCODE_RANGE) == 1
+        if rsre_constants.V37:
+            repl = rsre_constants.OPCODE37_RANGE_UNI_IGNORE
+        else:
+            repl = rsre_constants.OPCODE27_RANGE_IGNORE
+        r.pattern[r.pattern.index(rsre_constants.OPCODE_RANGE)] = repl
         assert match(r, u"\U00010428")
diff --git a/rpython/rlib/rwinreg.py b/rpython/rlib/rwinreg.py
index 7f45088ba3..4628804faf 100644
--- a/rpython/rlib/rwinreg.py
+++ b/rpython/rlib/rwinreg.py
@@ -49,71 +49,75 @@ PHKEY = rffi.CArrayPtr(HKEY)
 REGSAM = rwin32.DWORD
 
 def get_traits(suffix):
+    if suffix == 'A':
+        strp = rffi.CCHARP
+    else:
+        strp = rffi.CWCHARP
     RegSetValue = external(
         'RegSetValue' + suffix,
-        [HKEY, rffi.CCHARP, rwin32.DWORD, rffi.CCHARP, rwin32.DWORD],
+        [HKEY, strp, rwin32.DWORD, strp, rwin32.DWORD],
         rffi.LONG)
 
     RegSetValueEx = external(
         'RegSetValueEx' + suffix,
-        [HKEY, rffi.CCHARP, rwin32.DWORD,
-         rwin32.DWORD, rffi.CCHARP, rwin32.DWORD],
+        [HKEY, strp, rwin32.DWORD,
+         rwin32.DWORD, strp, rwin32.DWORD],
         rffi.LONG)
 
     RegQueryValue = external(
         'RegQueryValue' + suffix,
-        [HKEY, rffi.CCHARP, rffi.CCHARP, rwin32.PLONG],
+        [HKEY, strp, strp, rwin32.PLONG],
         rffi.LONG)
 
     RegQueryValueEx = external(
         'RegQueryValueEx' + suffix,
-        [HKEY, rffi.CCHARP, rwin32.LPDWORD, rwin32.LPDWORD,
-         rffi.CCHARP, rwin32.LPDWORD],
+        [HKEY, strp, rwin32.LPDWORD, rwin32.LPDWORD,
+         strp, rwin32.LPDWORD],
         rffi.LONG)
 
     RegCreateKey = external(
         'RegCreateKey' + suffix,
-        [HKEY, rffi.CCHARP, PHKEY],
+        [HKEY, strp, PHKEY],
         rffi.LONG)
 
     RegCreateKeyEx = external(
         'RegCreateKeyEx' + suffix,
-        [HKEY, rffi.CCHARP, rwin32.DWORD, rffi.CCHARP, rwin32.DWORD,
+        [HKEY, strp, rwin32.DWORD, strp, rwin32.DWORD,
          REGSAM, rffi.VOIDP, PHKEY, rwin32.LPDWORD],
         rffi.LONG)
 
     RegDeleteValue = external(
         'RegDeleteValue' + suffix,
-        [HKEY, rffi.CCHARP],
+        [HKEY, strp],
         rffi.LONG)
 
     RegDeleteKey = external(
         'RegDeleteKey' + suffix,
-        [HKEY, rffi.CCHARP],
+        [HKEY, strp],
         rffi.LONG)
 
     RegOpenKeyEx = external(
         'RegOpenKeyEx' + suffix,
-        [HKEY, rffi.CCHARP, rwin32.DWORD, REGSAM, PHKEY],
+        [HKEY, strp, rwin32.DWORD, REGSAM, PHKEY],
         rffi.LONG)
 
     RegEnumValue = external(
         'RegEnumValue' + suffix,
-        [HKEY, rwin32.DWORD, rffi.CCHARP,
+        [HKEY, rwin32.DWORD, strp,
          rwin32.LPDWORD, rwin32.LPDWORD, rwin32.LPDWORD,
          rffi.CCHARP, rwin32.LPDWORD],
         rffi.LONG)
 
     RegEnumKeyEx = external(
         'RegEnumKeyEx' + suffix,
-        [HKEY, rwin32.DWORD, rffi.CCHARP,
+        [HKEY, rwin32.DWORD, strp,
          rwin32.LPDWORD, rwin32.LPDWORD,
-         rffi.CCHARP, rwin32.LPDWORD, rwin32.PFILETIME],
+         strp, rwin32.LPDWORD, rwin32.PFILETIME],
         rffi.LONG)
 
     RegQueryInfoKey = external(
         'RegQueryInfoKey' + suffix,
-        [HKEY, rffi.CCHARP, rwin32.LPDWORD, rwin32.LPDWORD,
+        [HKEY, strp, rwin32.LPDWORD, rwin32.LPDWORD,
          rwin32.LPDWORD, rwin32.LPDWORD, rwin32.LPDWORD,
          rwin32.LPDWORD, rwin32.LPDWORD, rwin32.LPDWORD,
          rwin32.LPDWORD, rwin32.PFILETIME],
@@ -121,17 +125,17 @@ def get_traits(suffix):
 
     RegLoadKey = external(
         'RegLoadKey' + suffix,
-        [HKEY, rffi.CCHARP, rffi.CCHARP],
+        [HKEY, strp, strp],
         rffi.LONG)
 
     RegSaveKey = external(
         'RegSaveKey' + suffix,
-        [HKEY, rffi.CCHARP, rffi.VOIDP],
+        [HKEY, strp, rffi.VOIDP],
         rffi.LONG)
 
     RegConnectRegistry = external(
         'RegConnectRegistry' + suffix,
-        [rffi.CCHARP, HKEY, PHKEY],
+        [strp, HKEY, PHKEY],
         rffi.LONG)
 
     return (RegSetValue, RegSetValueEx, RegQueryValue, RegQueryValueEx,
author	Ronan Lamy <ronan.lamy@gmail.com>	2020-11-25 20:25:40 +0000
committer	Ronan Lamy <ronan.lamy@gmail.com>	2020-11-25 20:25:40 +0000
commit	3c2bacc346bf0557e146c909b1723568ac2f02d2 (patch)
tree	f83daf6cb1c7449b78984e9a9c2ee358b67fa4ac
parent	hg merge default (diff)
parent	fix for a case where the difference shows up with recent unicode databases only (diff)
download	pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.tar.gz pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.tar.bz2 pypy-3c2bacc346bf0557e146c909b1723568ac2f02d2.zip