Introducing new and faster technique to escape variables values.

This commit is contained in:
Christopher Ramírez 2017-09-01 15:08:24 -06:00
parent f74046fb09
commit 76cb15e2ba
2 changed files with 35 additions and 44 deletions

View file

@ -29,12 +29,13 @@ import re
import sys
import logging
import zipfile
import jinja2
from os import path
from mimetypes import guess_type, guess_extension
from uuid import uuid4
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError, ErrorString
from jinja2 import Environment, Undefined
from jinja2 import Environment, Undefined, Markup
try:
if sys.version_info.major == 3:
@ -135,7 +136,8 @@ class Renderer(object):
self.environment = environment
else:
self.environment = Environment(undefined=UndefinedSilently,
autoescape=True)
autoescape=True,
finalize=self.finalize_value)
# Register filters
self.environment.filters['pad'] = pad_string
self.environment.filters['markdown'] = self.markdown_filter
@ -146,6 +148,13 @@ class Renderer(object):
self._compile_tags_expressions()
@jinja2.evalcontextfilter
def finalize_value(self, value, *args):
"""Escapes variables values."""
if isinstance(value, Markup):
return value
return Markup(self.get_escaped_var_value(value))
def media_loader(self, callback):
"""This sets the the media loader. A user defined function which
@ -401,7 +410,6 @@ class Renderer(object):
# Finally, remove the placeholder
placeholder_parent.removeChild(placeholder)
def _unescape_entities(self, xml_text):
"""
Unescape links and '&', '<', '"' and '>' within jinja
@ -434,17 +442,16 @@ class Renderer(object):
return xml_text
@staticmethod
def _encode_escape_chars(xml_text):
def get_escaped_var_value(value):
"""
Replace line feed and/or tabs within text:span entities.
Encodes XML reserved chars in value (eg. &, <, >) and also replaces
the control chars \n and \t control chars to their ODF counterparts.
"""
find_pattern = r'(?is)<text:([\S]+?).*?>([^>]*?([\n\t])[^<]*?)</text:\1>'
for m in re.findall(find_pattern, xml_text):
replacement = m[1].replace('\n', '<text:line-break/>')
replacement = replacement.replace('\t', '<text:tab/>')
xml_text = xml_text.replace(m[1], replacement)
return xml_text
value = Markup.escape(value)
return (
value.replace('\n', Markup('<text:line-break/>'))
.replace('\t', Markup('<text:tab/>'))
)
def add_media_to_archive(self, media, mime, name=''):
@ -568,7 +575,6 @@ class Renderer(object):
)
result = jinja_template.render(**kwargs)
result = self._encode_escape_chars(result)
final_xml = parseString(result.encode('ascii', 'xmlcharrefreplace'))
if self.template_images:

View file

@ -49,21 +49,6 @@ class RenderTestCase(TestCase):
for test, expect in test_samples.items():
assert self.engine._unescape_entities(test) == expect
def test__encode_escape_chars(self):
test_samples = {
'<text:a>\n</text:a>': '<text:a><text:line-break/></text:a>',
'<text:h>\n</text:h>': '<text:h><text:line-break/></text:h>',
'<text:p>\n</text:p>': '<text:p><text:line-break/></text:p>',
'<text:p>Hello\n</text:p>': '<text:p>Hello<text:line-break/></text:p>',
'<text:p>Hello\nWorld\n!</text:p>': '<text:p>Hello<text:line-break/>World<text:line-break/>!</text:p>',
'<text:ruby-base>\n</text:ruby-base>': '<text:ruby-base><text:line-break/></text:ruby-base>',
'<text:meta>\u0009</text:meta>': '<text:meta><text:tab/></text:meta>',
'<text:meta-field>\n</text:meta-field>': '<text:meta-field><text:line-break/></text:meta-field>',
}
for test, expect in test_samples.items():
assert self.engine._encode_escape_chars(test) == expect
def _test_is_jinja_tag(self):
assert self._is_jinja_tag('{{ foo }}')==True
assert self._is_jinja_tag('{ foo }')==False
@ -80,22 +65,22 @@ class RenderTestCase(TestCase):
assert self.engine.create_text_span_node(self.document, 'text').toxml() == '<text:span>text</text:span>'
class EncodeLFAndFWithinTextNamespace(TestCase):
"""Test encoding of line feed and tab chars within text: namespace"""
class EscapingVariablesValues(TestCase):
"""
Test encoding of line feed and tab variables valuess
"""
def test_encode_linefeed_char(self):
xml = '<text:span>This\nLF</text:span>'
espected = '<text:span>This<text:line-break/>LF</text:span>'
assert (Renderer._encode_escape_chars(xml) == espected)
xml = 'This\nLF'
expected = 'This<text:line-break/>LF'
assert (Renderer.get_escaped_var_value(xml) == expected)
def test_encode_tab_char(self):
xml = '<text:span>This\tTab</text:span>'
espected = '<text:span>This<text:tab/>Tab</text:span>'
assert (Renderer._encode_escape_chars(xml) == espected)
def test_encode_linefeed_char(self):
xml = 'This\tTab char'
expected = 'This<text:tab/>Tab char'
assert (Renderer.get_escaped_var_value(xml) == expected)
def test_escape_elem_with_attributes(self):
"""A bug in _encode_escape_chars was preventing it from escaping
LF and tabs inside text elements with tag attributes. See:
https://github.com/christopher-ramirez/secretary/issues/39"""
xml = '<text:span attr="value">This\nLF</text:span>'
espected = '<text:span attr="value">This<text:line-break/>LF</text:span>'
assert (Renderer._encode_escape_chars(xml) == espected)
def test_escape_xml_reserved_chars(self):
''' Should also escape minor and mayor signs '''
xml = '1 is > than 0 & -1 is <'
expected = '1 is &gt; than 0 &amp; -1 is &lt;'
assert (Renderer.get_escaped_var_value(xml) == expected)