From d84266182af3fadcd2747609bd9c0ffe532215b7 Mon Sep 17 00:00:00 2001 From: Dieter Buys Date: Mon, 27 Apr 2015 16:29:26 -0700 Subject: [PATCH 1/3] Fix for mishandled " and & characters --- secretary.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/secretary.py b/secretary.py index 49740dc..750aea1 100644 --- a/secretary.py +++ b/secretary.py @@ -306,20 +306,26 @@ class Renderer(object): parent.removeChild(discard) - def _unescape_entities(self, xml_text): + @staticmethod + def _unescape_entities(xml_text): # unescape XML entities gt and lt unescape_rules = { r'(?is)({[{|%].*)(>)(.*[%|}]})': r'\1>\3', r'(?is)({[{|%].*)(<)(.*[%|}]})': r'\1<\3', - r'(?is)({[{|%].*)(<.?text:s.?>)(.*[%|}]})': r'\1 \3', + r'(?is)({[{|%].*)(&)(.*[%|}]})': r'\1&\3', + r'(?is)({[{|%].*)(")(.*[%|}]})': r'\1"\3', + r'(?is)({[{|%].*)(<.?text:s.?>)(.*[%|}]})': r'\1 \3' } for p, r in unescape_rules.items(): - xml_text = re.sub(p, r, xml_text) + count = None + while count != 0: + xml_text, count = re.subn(p, r, xml_text) return xml_text - def _encode_escape_chars(self, xml_text): + @staticmethod + def _encode_escape_chars(xml_text): # Replace line feed and/or tabs within text span entities. find_pattern = r'(?is)([^>]*?([\n|\t])[^<]*?)' for m in re.findall(find_pattern, xml_text): @@ -441,10 +447,10 @@ class Renderer(object): try: self.template_images = dict() self._prepare_template_tags(xml_document) - template_string = self._unescape_entities(xml_document.toxml()) + template_string = Renderer._unescape_entities(xml_document.toxml()) jinja_template = self.environment.from_string(template_string) result = jinja_template.render(**kwargs) - result = self._encode_escape_chars(result) + result = Renderer._encode_escape_chars(result) final_xml = parseString(result.encode('ascii', 'xmlcharrefreplace')) if self.template_images: From 1cffe9f224fb7b490551e548db536e39178c8719 Mon Sep 17 00:00:00 2001 From: Dieter Buys Date: Mon, 27 Apr 2015 17:45:18 -0700 Subject: [PATCH 2/3] Text style tags are now properly stripped from template elements --- secretary.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/secretary.py b/secretary.py index 750aea1..8041a5e 100644 --- a/secretary.py +++ b/secretary.py @@ -308,13 +308,17 @@ class Renderer(object): @staticmethod def _unescape_entities(xml_text): - # unescape XML entities gt and lt + """ + Strips tags of the form from inside Jinja elements + and unescapes HTML codes for >, <, & and " + """ unescape_rules = { + r'(?is)({[{|%].*)(<.?text:s.*?>)(.*[%|}]})': r'\1\3', + r'(?is)({[{|%].*)(>)(.*[%|}]})': r'\1>\3', r'(?is)({[{|%].*)(<)(.*[%|}]})': r'\1<\3', r'(?is)({[{|%].*)(&)(.*[%|}]})': r'\1&\3', - r'(?is)({[{|%].*)(")(.*[%|}]})': r'\1"\3', - r'(?is)({[{|%].*)(<.?text:s.?>)(.*[%|}]})': r'\1 \3' + r'(?is)({[{|%].*)(")(.*[%|}]})': r'\1"\3' } for p, r in unescape_rules.items(): @@ -449,6 +453,7 @@ class Renderer(object): self._prepare_template_tags(xml_document) template_string = Renderer._unescape_entities(xml_document.toxml()) jinja_template = self.environment.from_string(template_string) + result = jinja_template.render(**kwargs) result = Renderer._encode_escape_chars(result) @@ -460,6 +465,8 @@ class Renderer(object): except: self.log.error('Error rendering template:\n%s', xml_document.toprettyxml(), exc_info=True) + + self.log.error('Unescaped template was:\n{}'.format(template_string)) raise finally: self.log.debug('Rendering xml object finished') From d6abc37ccb7de78cd7c02a9fd1fac26c0631d219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Ram=C3=ADrez?= Date: Wed, 29 Apr 2015 11:54:35 -0600 Subject: [PATCH 3/3] Make ajustments in PR #15. For now leaving #16 open. This fixes #14, and closes #15. --- secretary.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/secretary.py b/secretary.py index 8041a5e..272c43c 100644 --- a/secretary.py +++ b/secretary.py @@ -26,6 +26,7 @@ from os import path from mimetypes import guess_type, guess_extension from uuid import uuid4 from xml.dom.minidom import parseString +from xml.parsers.expat import ExpatError from jinja2 import Environment, Undefined # Test python versions and normalize calls to basestring, unicode, etc. @@ -313,18 +314,16 @@ class Renderer(object): and unescapes HTML codes for >, <, & and " """ unescape_rules = { - r'(?is)({[{|%].*)(<.?text:s.*?>)(.*[%|}]})': r'\1\3', - r'(?is)({[{|%].*)(>)(.*[%|}]})': r'\1>\3', r'(?is)({[{|%].*)(<)(.*[%|}]})': r'\1<\3', r'(?is)({[{|%].*)(&)(.*[%|}]})': r'\1&\3', r'(?is)({[{|%].*)(")(.*[%|}]})': r'\1"\3' } - for p, r in unescape_rules.items(): - count = None - while count != 0: - xml_text, count = re.subn(p, r, xml_text) + for regexp, replacement in unescape_rules.items(): + subs_made = True + while subs_made: + xml_text, subs_made = re.subn(regexp, replacement, xml_text) return xml_text @@ -451,17 +450,21 @@ class Renderer(object): try: self.template_images = dict() self._prepare_template_tags(xml_document) - template_string = Renderer._unescape_entities(xml_document.toxml()) + template_string = self._unescape_entities(xml_document.toxml()) jinja_template = self.environment.from_string(template_string) result = jinja_template.render(**kwargs) - result = Renderer._encode_escape_chars(result) + result = self._encode_escape_chars(result) final_xml = parseString(result.encode('ascii', 'xmlcharrefreplace')) if self.template_images: self.replace_images(final_xml) return final_xml + except ExpatError as e: + near = result.split('\n')[e.lineno -1][e.offset-50:e.offset+50] + raise ExpatError('ExpatError at line %d, column %d\nNear of: "[...]%s[...]"' % \ + (e.lineno, e.offset, near)) except: self.log.error('Error rendering template:\n%s', xml_document.toprettyxml(), exc_info=True)