Change entities unescape stragedery.

This commit is contained in:
Christopher Ramírez 2014-06-16 12:10:17 -06:00
parent 9b10c9083a
commit 8c04982b37

View file

@ -38,7 +38,6 @@ import re
import sys import sys
import zipfile import zipfile
import io import io
from HTMLParser import HTMLParser
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from jinja2 import Environment, Undefined from jinja2 import Environment, Undefined
@ -181,27 +180,35 @@ class Render(object):
Unpack and render the internal template and Unpack and render the internal template and
returns the rendered ODF document. returns the rendered ODF document.
""" """
def unescape_gt_lt(text):
# unescape XML entities gt and lt
unescape_entities = {
r'({[{|%].*)(>)(.*[%|}]})': r'\1>\3',
r'({[{|%].*)(&lt;)(.*[%|}]})': r'\1<\3',
}
for pattern, repl in unescape_entities.iteritems():
text = re.sub(pattern, repl, text, flags=re.IGNORECASE or re.DOTALL)
return text
html_parser = HTMLParser()
self.unpack_template() self.unpack_template()
# Render content.xml # Render content.xml
self.prepare_template_tags(self.content) self.prepare_template_tags(self.content)
template = self.environment.from_string(html_parser.unescape(self.content.toxml())) template = self.environment.from_string(unescape_gt_lt(self.content.toxml()))
result = template.render(**kwargs) result = template.render(**kwargs)
result = result.replace('\n', '<text:line-break/>') result = result.replace('\n', '<text:line-break/>')
# Replace original body with rendered body # Replace original body with rendered body
original_body = self.content.getElementsByTagName('office:body')[0] original_body = self.content.getElementsByTagName('office:body')[0]
rendered_body = parseString(result.encode('ascii', 'xmlcharrefreplace')) \ rendered_body = parseString(result.encode('ascii', 'xmlcharrefreplace')).getElementsByTagName('office:body')[0]
.getElementsByTagName('office:body')[0]
document = self.content.getElementsByTagName('office:document-content')[0] document = self.content.getElementsByTagName('office:document-content')[0]
document.replaceChild(rendered_body, original_body) document.replaceChild(rendered_body, original_body)
# Render style.xml # Render style.xml
self.prepare_template_tags(self.styles) self.prepare_template_tags(self.styles)
template = self.environment.from_string(html_parser.unescape(self.styles.toxml())) template = self.environment.from_string(unescape_gt_lt(self.styles.toxml()))
result = template.render(**kwargs) result = template.render(**kwargs)
result = result.replace('\n', '<text:line-break/>') result = result.replace('\n', '<text:line-break/>')
self.styles = parseString(result.encode('ascii', 'xmlcharrefreplace')) self.styles = parseString(result.encode('ascii', 'xmlcharrefreplace'))