Introducing new and faster technique to escape variables values.

This commit is contained in:
Christopher Ramírez 2017-09-01 15:08:24 -06:00
parent f74046fb09
commit 76cb15e2ba
2 changed files with 35 additions and 44 deletions

View file

@ -29,12 +29,13 @@ import re
import sys import sys
import logging import logging
import zipfile import zipfile
import jinja2
from os import path from os import path
from mimetypes import guess_type, guess_extension from mimetypes import guess_type, guess_extension
from uuid import uuid4 from uuid import uuid4
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError, ErrorString from xml.parsers.expat import ExpatError, ErrorString
from jinja2 import Environment, Undefined from jinja2 import Environment, Undefined, Markup
try: try:
if sys.version_info.major == 3: if sys.version_info.major == 3:
@ -135,7 +136,8 @@ class Renderer(object):
self.environment = environment self.environment = environment
else: else:
self.environment = Environment(undefined=UndefinedSilently, self.environment = Environment(undefined=UndefinedSilently,
autoescape=True) autoescape=True,
finalize=self.finalize_value)
# Register filters # Register filters
self.environment.filters['pad'] = pad_string self.environment.filters['pad'] = pad_string
self.environment.filters['markdown'] = self.markdown_filter self.environment.filters['markdown'] = self.markdown_filter
@ -146,6 +148,13 @@ class Renderer(object):
self._compile_tags_expressions() self._compile_tags_expressions()
@jinja2.evalcontextfilter
def finalize_value(self, value, *args):
"""Escapes variables values."""
if isinstance(value, Markup):
return value
return Markup(self.get_escaped_var_value(value))
def media_loader(self, callback): def media_loader(self, callback):
"""This sets the the media loader. A user defined function which """This sets the the media loader. A user defined function which
@ -401,7 +410,6 @@ class Renderer(object):
# Finally, remove the placeholder # Finally, remove the placeholder
placeholder_parent.removeChild(placeholder) placeholder_parent.removeChild(placeholder)
def _unescape_entities(self, xml_text): def _unescape_entities(self, xml_text):
""" """
Unescape links and '&', '<', '"' and '>' within jinja Unescape links and '&', '<', '"' and '>' within jinja
@ -434,17 +442,16 @@ class Renderer(object):
return xml_text return xml_text
@staticmethod @staticmethod
def _encode_escape_chars(xml_text): def get_escaped_var_value(value):
""" """
Replace line feed and/or tabs within text:span entities. Encodes XML reserved chars in value (eg. &, <, >) and also replaces
the control chars \n and \t control chars to their ODF counterparts.
""" """
find_pattern = r'(?is)<text:([\S]+?).*?>([^>]*?([\n\t])[^<]*?)</text:\1>' value = Markup.escape(value)
for m in re.findall(find_pattern, xml_text): return (
replacement = m[1].replace('\n', '<text:line-break/>') value.replace('\n', Markup('<text:line-break/>'))
replacement = replacement.replace('\t', '<text:tab/>') .replace('\t', Markup('<text:tab/>'))
xml_text = xml_text.replace(m[1], replacement) )
return xml_text
def add_media_to_archive(self, media, mime, name=''): def add_media_to_archive(self, media, mime, name=''):
@ -568,7 +575,6 @@ class Renderer(object):
) )
result = jinja_template.render(**kwargs) result = jinja_template.render(**kwargs)
result = self._encode_escape_chars(result)
final_xml = parseString(result.encode('ascii', 'xmlcharrefreplace')) final_xml = parseString(result.encode('ascii', 'xmlcharrefreplace'))
if self.template_images: if self.template_images:

View file

@ -49,21 +49,6 @@ class RenderTestCase(TestCase):
for test, expect in test_samples.items(): for test, expect in test_samples.items():
assert self.engine._unescape_entities(test) == expect assert self.engine._unescape_entities(test) == expect
def test__encode_escape_chars(self):
test_samples = {
'<text:a>\n</text:a>': '<text:a><text:line-break/></text:a>',
'<text:h>\n</text:h>': '<text:h><text:line-break/></text:h>',
'<text:p>\n</text:p>': '<text:p><text:line-break/></text:p>',
'<text:p>Hello\n</text:p>': '<text:p>Hello<text:line-break/></text:p>',
'<text:p>Hello\nWorld\n!</text:p>': '<text:p>Hello<text:line-break/>World<text:line-break/>!</text:p>',
'<text:ruby-base>\n</text:ruby-base>': '<text:ruby-base><text:line-break/></text:ruby-base>',
'<text:meta>\u0009</text:meta>': '<text:meta><text:tab/></text:meta>',
'<text:meta-field>\n</text:meta-field>': '<text:meta-field><text:line-break/></text:meta-field>',
}
for test, expect in test_samples.items():
assert self.engine._encode_escape_chars(test) == expect
def _test_is_jinja_tag(self): def _test_is_jinja_tag(self):
assert self._is_jinja_tag('{{ foo }}')==True assert self._is_jinja_tag('{{ foo }}')==True
assert self._is_jinja_tag('{ foo }')==False assert self._is_jinja_tag('{ foo }')==False
@ -80,22 +65,22 @@ class RenderTestCase(TestCase):
assert self.engine.create_text_span_node(self.document, 'text').toxml() == '<text:span>text</text:span>' assert self.engine.create_text_span_node(self.document, 'text').toxml() == '<text:span>text</text:span>'
class EncodeLFAndFWithinTextNamespace(TestCase): class EscapingVariablesValues(TestCase):
"""Test encoding of line feed and tab chars within text: namespace""" """
Test encoding of line feed and tab variables valuess
"""
def test_encode_linefeed_char(self): def test_encode_linefeed_char(self):
xml = '<text:span>This\nLF</text:span>' xml = 'This\nLF'
espected = '<text:span>This<text:line-break/>LF</text:span>' expected = 'This<text:line-break/>LF'
assert (Renderer._encode_escape_chars(xml) == espected) assert (Renderer.get_escaped_var_value(xml) == expected)
def test_encode_tab_char(self): def test_encode_linefeed_char(self):
xml = '<text:span>This\tTab</text:span>' xml = 'This\tTab char'
espected = '<text:span>This<text:tab/>Tab</text:span>' expected = 'This<text:tab/>Tab char'
assert (Renderer._encode_escape_chars(xml) == espected) assert (Renderer.get_escaped_var_value(xml) == expected)
def test_escape_elem_with_attributes(self): def test_escape_xml_reserved_chars(self):
"""A bug in _encode_escape_chars was preventing it from escaping ''' Should also escape minor and mayor signs '''
LF and tabs inside text elements with tag attributes. See: xml = '1 is > than 0 & -1 is <'
https://github.com/christopher-ramirez/secretary/issues/39""" expected = '1 is &gt; than 0 &amp; -1 is &lt;'
xml = '<text:span attr="value">This\nLF</text:span>' assert (Renderer.get_escaped_var_value(xml) == expected)
espected = '<text:span attr="value">This<text:line-break/>LF</text:span>'
assert (Renderer._encode_escape_chars(xml) == espected)