diff options
author | Michał Górny <mgorny@gentoo.org> | 2018-01-04 21:03:07 +0100 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2018-01-04 21:34:53 +0100 |
commit | 9f660cebb2308bb021a128fdd699ea70ec383c6a (patch) | |
tree | dae66cca3c1a836c6245843c0a53583c831f18c4 /dev-python/html5lib/files | |
parent | dev-python/html2text: Clean old up (diff) | |
download | gentoo-9f660cebb2308bb021a128fdd699ea70ec383c6a.tar.gz gentoo-9f660cebb2308bb021a128fdd699ea70ec383c6a.tar.bz2 gentoo-9f660cebb2308bb021a128fdd699ea70ec383c6a.zip |
dev-python/html5lib: Clean old up
Diffstat (limited to 'dev-python/html5lib/files')
-rw-r--r-- | dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch | 117 | ||||
-rw-r--r-- | dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch | 50 |
2 files changed, 0 insertions, 167 deletions
diff --git a/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch b/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch deleted file mode 100644 index fecfab9a4fb4..000000000000 --- a/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 46046c0f7125911ff8205f09a7574573bb953105 Mon Sep 17 00:00:00 2001 -From: Geoffrey Sneddon <geoffers@gmail.com> -Date: Mon, 23 Nov 2015 15:17:07 +0000 -Subject: [PATCH 1/3] Make lxml tree-builder coerce comments to work with lxml - 3.5. - ---- - html5lib/ihatexml.py | 2 ++ - html5lib/treebuilders/etree_lxml.py | 2 +- - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py -index 0fc7930..b5b2e98 100644 ---- a/html5lib/ihatexml.py -+++ b/html5lib/ihatexml.py -@@ -225,6 +225,8 @@ def coerceComment(self, data): - while "--" in data: - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) - data = data.replace("--", "- -") -+ if data.endswith("-"): -+ data += " " - return data - - def coerceCharacters(self, data): -diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py -index 35d08ef..17007e3 100644 ---- a/html5lib/treebuilders/etree_lxml.py -+++ b/html5lib/treebuilders/etree_lxml.py -@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder): - - def __init__(self, namespaceHTMLElements, fullTree=False): - builder = etree_builders.getETreeModule(etree, fullTree=fullTree) -- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() -+ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) - self.namespaceHTMLElements = namespaceHTMLElements - - class Attributes(dict): - -From 1c22e1ce93dd4acc81a66cfa03cf9720fbd741c7 Mon Sep 17 00:00:00 2001 -From: Geoffrey Sneddon <geoffers@gmail.com> -Date: Mon, 23 Nov 2015 15:35:21 +0000 -Subject: [PATCH 2/3] fixup! Make lxml tree-builder coerce comments to work - with lxml 3.5. - ---- - html5lib/ihatexml.py | 1 + - html5lib/treebuilders/etree_lxml.py | 7 ++++--- - 2 files changed, 5 insertions(+), 3 deletions(-) - -diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py -index b5b2e98..5a81a12 100644 ---- a/html5lib/ihatexml.py -+++ b/html5lib/ihatexml.py -@@ -226,6 +226,7 @@ def coerceComment(self, data): - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) - data = data.replace("--", "- -") - if data.endswith("-"): -+ warnings.warn("Comments cannot contain end in a dash", DataLossWarning) - data += " " - return data - -diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py -index 17007e3..c6c981f 100644 ---- a/html5lib/treebuilders/etree_lxml.py -+++ b/html5lib/treebuilders/etree_lxml.py -@@ -54,7 +54,7 @@ def _getChildNodes(self): - def testSerializer(element): - rv = [] - finalText = None -- infosetFilter = ihatexml.InfosetFilter() -+ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) - - def serializeElement(element, indent=0): - if not hasattr(element, "tag"): -@@ -257,7 +257,7 @@ def _getData(self): - data = property(_getData, _setData) - - self.elementClass = Element -- self.commentClass = builder.Comment -+ self.commentClass = Comment - # self.fragmentClass = builder.DocumentFragment - _base.TreeBuilder.__init__(self, namespaceHTMLElements) - -@@ -344,7 +344,8 @@ def insertRoot(self, token): - - # Append the initial comments: - for comment_token in self.initial_comments: -- root.addprevious(etree.Comment(comment_token["data"])) -+ comment = self.commentClass(comment_token["data"]) -+ root.addprevious(comment._element) - - # Create the root document and add the ElementTree to it - self.document = self.documentClass() - -From 235a6d7ac7e0a3e2b431766e051094c2d3110ba3 Mon Sep 17 00:00:00 2001 -From: Geoffrey Sneddon <geoffers@gmail.com> -Date: Mon, 23 Nov 2015 15:42:12 +0000 -Subject: [PATCH 3/3] fixup! Make lxml tree-builder coerce comments to work - with lxml 3.5. - ---- - html5lib/ihatexml.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py -index 5a81a12..5da5d93 100644 ---- a/html5lib/ihatexml.py -+++ b/html5lib/ihatexml.py -@@ -226,7 +226,7 @@ def coerceComment(self, data): - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) - data = data.replace("--", "- -") - if data.endswith("-"): -- warnings.warn("Comments cannot contain end in a dash", DataLossWarning) -+ warnings.warn("Comments cannot end in a dash", DataLossWarning) - data += " " - return data - diff --git a/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch b/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch deleted file mode 100644 index 2fbef2ad0b9e..000000000000 --- a/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch +++ /dev/null @@ -1,50 +0,0 @@ ---- a/html5lib/sanitizer.py -+++ b/html5lib/sanitizer.py -@@ -203,7 +203,7 @@ - for attr in self.attr_val_is_uri: - if attr not in attrs: - continue -- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', -+ val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', - unescape(attrs[attr])).lower() - # remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") -@@ -228,7 +228,7 @@ - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and -- 'xlink:href' in attrs and re.search('^\s*[^#\s].*', -+ 'xlink:href' in attrs and re.search(r'^\s*[^#\s].*', - attrs['xlink:href'])): - del attrs['xlink:href'] - if 'style' in attrs: -@@ -257,16 +257,16 @@ - - def sanitize_css(self, style): - # disallow urls -- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) -+ style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) - - # gauntlet -- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): -+ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): - return '' -- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): -+ if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): - return '' - - clean = [] -- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): -+ for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): - if not value: - continue - if prop.lower() in self.allowed_css_properties: -@@ -275,7 +275,7 @@ - 'padding']: - for keyword in value.split(): - if keyword not in self.acceptable_css_keywords and \ -- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): -+ not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): - break - else: - clean.append(prop + ': ' + value + ';') |