From 96ceaf41f107a90c91d08eae6b5895c166bb0970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Ram=C3=ADrez?= Date: Mon, 13 Feb 2017 11:22:22 -0600 Subject: [PATCH] Fix #34 Automatically unescape URIs whose scheme is "secretary". --- secretary.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/secretary.py b/secretary.py index befd9cb..723b81b 100644 --- a/secretary.py +++ b/secretary.py @@ -397,8 +397,9 @@ class Renderer(object): def _unescape_entities(self, xml_text): """ - Unescape '&', '<', '"' and '>' within jinja instructions. - The regexs rules used here are compiled in _compile_escape_expressions. + Unescape links and '&', '<', '"' and '>' within jinja + instructions. The regexs rules used here are compiled in + _compile_escape_expressions. """ for regexp, replacement in self.escape_map.items(): while True: @@ -406,6 +407,23 @@ class Renderer(object): if not substitutions: break + return self._unescape_links(xml_text) + + def _unescape_links(self, xml_text): + """Fix Libreoffice auto escaping of xlink:href attribute values. + This unescaping is only done on 'secretary' scheme URLs.""" + import urllib + robj = re.compile(r'(?is)(xlink:href=\")secretary:(.*?)(\")') + + def replacement(match): + return ''.join([match.group(1), urllib.unquote(match.group(2)), + match.group(3)]) + + while True: + xml_text, rep = robj.subn(replacement, xml_text) + if not rep: + break + return xml_text @staticmethod