**WARN**: This is a backward incompatible change. In this commit we rewrite mos$

This is the new calling API:
```
    engine = Renderer()
    engine.render(template)
```
Where `Rendered` is the new class provided by secretary. It accepts an Enviroment
instance as its only param.

Now you pass the template when calling `render` function along with the template data.

This changes allows to reuse enviroments and/or render multiple documents using a single `Renderer` instance.
This commit is contained in:
Christopher Ramírez 2014-07-19 18:29:51 -06:00
parent b6b802c871
commit 97bda44242

View file

@ -55,7 +55,7 @@ class SecretaryError(Exception):
class UndefinedSilently(Undefined): class UndefinedSilently(Undefined):
# Silently undefined, # Silently undefined,
# see http://stackoverflow.com/questions/6182498/jinja2-how-to-make-it-fail-silently-like-djangotemplate # see http://stackoverflow.com/questions/6182498
def silently_undefined(*args, **kwargs): def silently_undefined(*args, **kwargs):
return '' return ''
@ -96,139 +96,267 @@ class Renderer(object):
""" """
def __init__(self, template, **kwargs): def __init__(self, environment=None, **kwargs):
""" """
Builds a Renderer instance and assign init the internal enviroment. Create a Renderer instance.
Params:
template: Either the path to the file, or a file-like object. args:
If it is a path, the file will be open with mode read 'r'. environment: Use this jinja2 enviroment. If not specified, we
create a new environment for this class instance.
returns:
None
""" """
self.log = logging.getLogger(__name__) self.log = logging.getLogger(__name__)
self.log.debug('Initing a Renderer instance\nTemplate: %s', template) self.log.debug('Initing a Renderer instance\nTemplate')
self.template = template
self.environment = Environment(undefined=UndefinedSilently, autoescape=True)
# Register provided filters if environment:
self.environment.filters['pad'] = pad_string self.environment = environment
self.environment.filters['markdown'] = self.markdown_filter else:
self.environment = Environment(undefined=UndefinedSilently,
self.file_list = {} autoescape=True)
# Register filters
self.environment.filters['pad'] = pad_string
def unpack_template(self): self.environment.filters['markdown'] = self.markdown_filter
"""
Loads the template into a ZIP file, allowing to make
CRUD operations into the ZIP archive.
"""
def _unpack_template(self, template):
# And Open/libreOffice is just a ZIP file. Here we unarchive the file
# and return a dict with every file in the archive
self.log.debug('Unpacking template file') self.log.debug('Unpacking template file')
with zipfile.ZipFile(self.template, 'r') as unpacked_template:
# go through the files in source archive_files = {}
for zi in unpacked_template.filelist: with zipfile.ZipFile(template, 'r') as archive:
file_contents = unpacked_template.read( zi.filename ) for zfile in archive.filelist:
self.file_list[zi.filename] = file_contents archive_files[zfile.filename] = archive.read(zfile.filename)
self.log.debug('File "%s" unpacked', zi.filename)
if zi.filename == 'content.xml': return archive_files
self.log.debug('Parsing content.xml\n%s', file_contents)
self.content = parseString( file_contents ) self.log.debug('Unpack completed')
elif zi.filename == 'styles.xml':
self.log.debug('Parsing styles.xml\n%s', file_contents)
self.styles = parseString( file_contents )
def pack_document(self): def _pack_document(self, files):
""" # Store to a zip files in files
Make an archive from _unpacked_template self.log.debug('packing document')
""" zip_file = io.BytesIO()
# Save rendered content and headers
self.rendered = io.BytesIO()
self.log.debug('Packing document...')
with zipfile.ZipFile(self.rendered, 'a') as packed_template:
for filename, content in self.file_list.items():
if filename in ['content.xml', 'styles.xml']:
self.log.debug(
'Trying to pack "%s" into archive and encoding it into ascii\n%s',
filename, self.styles.toxml())
content = self.styles.toxml().encode('ascii', 'xmlcharrefreplace')
with zipfile.ZipFile(zip_file, 'a') as zipdoc:
for fname, content in files.items():
if sys.version_info >= (2, 7): if sys.version_info >= (2, 7):
packed_template.writestr(filename, content, zipfile.ZIP_DEFLATED) zipdoc.writestr(fname, content, zipfile.ZIP_DEFLATED)
self.log.debug('File "%s" packed into archive with ZIP_DEFLATED', filename)
else: else:
packed_template.writestr(filename, content) zipdoc.writestr(fname, content)
self.log.debug('File "%s" packed into archive', filename)
self.log.debug('Document packing completed')
return zip_file
def _prepare_template_tags(self, xml_document):
# Here we search for every field node present in xml_document.
# For each field we found we do:
# * if field is a print field ({{ field }}), we replace it with a
# <text:span> node.
#
# * if field is a control flow ({% %}), then we find immediate node of
# type indicated in field's `text:description` attribute and replace
# the whole node and its childrens with field's content.
#
# If `text:description` attribute starts with `before::` or `after::`,
# then we move field content before or after the node in description.
#
# If no `text:description` is available, find the immediate common
# parent of this and any other field and replace its child and
# original parent of field with the field content.
#
# e.g.: original
# <table>
# <table:row>
# <field>{% for bar in bars %}</field>
# </table:row>
# <paragraph>
# <field>{{ bar }}</field>
# </paragraph>
# <table:row>
# <field>{% endfor %}</field>
# </table:row>
# </table>
#
# After processing:
# <table>
# {% for bar in bars %}
# <paragraph>
# <text:span>{{ bar }}</text:span>
# </paragraph>
# {% endfor %}
# </table>
self.log.debug('Preparing template tags')
fields = xml_document.getElementsByTagName('text:text-input')
# First, count secretary fields
for field in fields:
if not field.hasChildNodes():
continue
field_content = field.childNodes[0].data.strip()
if not re.findall(r'^{[{|%].*[%|}]}$', field_content, re.DOTALL):
# Field does not contains jinja template tags
continue
is_block_tag = re.findall(r'^{%[^{}]*%}$', field_content, re.DOTALL)
self.inc_node_fields_count(field.parentNode,
'block' if is_block_tag else 'variable')
# Do field replacement and moving
for field in fields:
if not field.hasChildNodes():
continue
field_content = field.childNodes[0].data.strip()
if not re.findall(r'^{[{|%].*[%|}]}$', field_content, re.DOTALL):
# Field does not contains jinja template tags
continue
is_block_tag = re.findall(r'^{%[^{}]*%}$', field_content, re.DOTALL)
discard = field
field_reference = field.getAttribute('text:description').strip().lower()
if re.findall(r'\|markdown', field_content):
# a markdown field should take the whole paragraph
field_reference = 'text:p'
if field_reference:
# User especified a reference. Replace immediate parent node
# of type indicated in reference with this field's content.
node_type = FLOW_REFERENCES.get(field_reference, False)
if node_type:
discard = self._parent_of_type(field, node_type)
jinja_node = self.create_text_node(xml_document, field_content)
elif is_block_tag:
# Find the common immediate parent of this and any other field.
while discard.parentNode.secretary_field_count <= 1:
discard = discard.parentNode
if discard is not None:
jinja_node = self.create_text_node(xml_document,
field_content)
else:
jinja_node = self.create_text_span_node(xml_document,
field_content)
parent = discard.parentNode
if not field_reference.startswith('after::'):
parent.insertBefore(jinja_node, discard)
else:
if discard.isSameNode(parent.lastChild):
parent.appendChild(jinja_node)
else:
parent.insertBefore(jinja_node,
discard.nextSibling)
if field_reference.startswith(('after::', 'before::')):
# Do not remove whole field container. Just remove the
# <text:text-input> parent node if field has it.
discard = self._parent_of_type(field, 'text:p')
parent = discard.parentNode
parent.removeChild(discard)
def _unescape_entities(self, xml_text):
# unescape XML entities gt and lt
unescape_rules = {
r'({[{|%].*)(&gt;)(.*[%|}]})': r'\1>\3',
r'({[{|%].*)(&lt;)(.*[%|}]})': r'\1<\3',
}
for p, r in unescape_rules.iteritems():
xml_text = re.sub(p, r, xml_text, flags=re.IGNORECASE or re.DOTALL)
return xml_text
def _encode_escape_chars(self, xml_text):
encode_rules = {
r'(<text:([ahp]|ruby-base|span|meta|meta-field)>.*)(\\n)(.*</text:([ahp]|ruby-base|span|meta|meta-field)>)': r'\1<text:line-break/>\3',
r'(<text:([ahp]|ruby-base|span|meta|meta-field)>.*)(\\n)(.*</text:([ahp]|ruby-base|span|meta|meta-field)>)': r'\1<text:line-break/>\3',
r'(<text:([ahp]|ruby-base|span|meta|meta-field)>.*)(\\n)(.*</text:([ahp]|ruby-base|span|meta|meta-field)>)': r'\1<text:tab>\3',
ur'[\u0009|\u000d|\u000a]': r'<text:s/>'
}
for p, r in encode_rules.iteritems():
xml_text = re.sub(p, r, xml_text, flags=re.IGNORECASE)
return xml_text
def _render_xml(self, xml_document, **kwargs):
# Prepare the xml object to be processed by jinja2
self.log.debug('Rendering XML object')
try:
self._prepare_template_tags(xml_document)
template_string = self._unescape_entities(xml_document.toxml())
jinja_template = self.environment.from_string(template_string)
result = jinja_template.render(**kwargs)
result = self._encode_escape_chars(result)
return parseString(result.encode('ascii', 'xmlcharrefreplace'))
except:
self.log.debug('Error rendering template:\n%s', template_string)
raise
finally:
self.log.debug('Rendering xml object finished')
def render(self, **kwargs): def render(self, template, **kwargs):
""" """
Unpack and render the internal template and Render a template
returns the rendered ODF document.
args:
template: A template file. Could be a string or a file instance
**kwargs: Template variables. Similar to jinja2
returns:
A binary stream which contains the rendered document.
""" """
self.log.debug('render called with\n%s', kwargs)
def unescape_gt_lt(text):
# unescape XML entities gt and lt
unescape_entities = {
r'({[{|%].*)(&gt;)(.*[%|}]})': r'\1>\3',
r'({[{|%].*)(&lt;)(.*[%|}]})': r'\1<\3',
}
for pattern, repl in unescape_entities.iteritems():
text = re.sub(pattern, repl, text, flags=re.IGNORECASE or re.DOTALL)
self.log.debug('GT and LT tags successfully unescaped\n%s', text) self.log.debug('Initing a template rendering')
return text self.files = self._unpack_template(template)
self.unpack_template()
# Keep content and styles object since many functions or
# filters may work with then
self.content = parseString(self.files['content.xml'])
self.styles = parseString(self.files['styles.xml'])
# Render content.xml # Render content.xml
self.log.debug('Trying to render content.xml with jinja') self.content = self._render_xml(self.content, **kwargs)
self.prepare_template_tags(self.content)
template = self.environment.from_string(unescape_gt_lt(self.content.toxml()))
result = template.render(**kwargs)
self.log.debug('Jinja2 successfully parsed content.xml')
result = result.replace('\n', '<text:line-break/>')
self.log.debug('Line breaks replaced successfully')
# Replace original body with rendered body
original_body = self.content.getElementsByTagName('office:body')[0]
rendered_body = parseString(result.encode('ascii', 'xmlcharrefreplace')).getElementsByTagName('office:body')[0]
self.log.debug(
'Replacing original document body with rendered version\n%s', result)
document = self.content.getElementsByTagName('office:document-content')[0]
document.replaceChild(rendered_body, original_body)
self.log.debug('Original body replaced with the rendered version')
# Render styles.xml # Render styles.xml
self.log.debug('Trying to render styles.xml with jinja') self.styles = self._render_xml(self.styles, **kwargs)
self.prepare_template_tags(self.styles)
template = self.environment.from_string(unescape_gt_lt(self.styles.toxml()))
result = template.render(**kwargs)
self.log.debug('Jinja2 successfully parsed styles.xml')
result = result.replace('\n', '<text:line-break/>')
self.log.debug('Lines break successfully encoded to <text:linebreaks>.')
self.log.debug('Now replacing template styles.xml with the rendered version')
self.styles = parseString(result.encode('ascii', 'xmlcharrefreplace'))
self.log.debug('New styles.xml file successfully parsed')
self.pack_document() self.log.debug('Template rendering finished')
return self.rendered.getvalue()
self.files['content.xml'] = self.content.toxml().encode('ascii', 'xmlcharrefreplace')
self.files['styles.xml'] = self.styles.toxml().encode('ascii', 'xmlcharrefreplace')
document = self._pack_document(self.files)
return document.getvalue()
def node_parents(self, node, parent_type): def _parent_of_type(self, node, of_type):
""" # Returns the first immediate parent of type `of_type`.
Returns the first node's parent with name of parent_type # Returns None if nothing is found.
If parent "text:p" is not found, returns None.
"""
if hasattr(node, 'parentNode'): if hasattr(node, 'parentNode'):
if node.parentNode.nodeName.lower() == parent_type: if node.parentNode.nodeName.lower() == of_type:
return node.parentNode return node.parentNode
else: else:
return self.node_parents(node.parentNode, parent_type) return self._parent_of_type(node.parentNode, of_type)
else: else:
return None return None
@ -269,83 +397,7 @@ class Renderer(object):
self.inc_node_fields_count(node.parentNode, field_type) self.inc_node_fields_count(node.parentNode, field_type)
def prepare_template_tags(self, xml_document):
"""
Search every field node in the inner template and
replace them with a <text:span> field. Flow tags are
replaced with a blank node and moved into the ancestor
tag defined in description field attribute.
"""
self.log.debug('Preparing template tags\n%s', xml_document.toxml())
fields = xml_document.getElementsByTagName('text:text-input')
# First, count secretary fields
for field in fields:
if not field.hasChildNodes():
continue
field_content = field.childNodes[0].data.replace('\n', '')
if not re.findall(r'(\{.*?\}*})', field_content):
# Field does not contains jinja template tags
continue
is_block_tag = re.findall(r'(^\{\%.*?\%\}*})$', field_content.strip())
self.inc_node_fields_count(field.parentNode,
'variable' if not is_block_tag else 'block')
for field in fields:
if field.hasChildNodes():
field_content = field.childNodes[0].data.replace('\n', '')
if not re.findall(r'(\{.*?\}*})', field_content):
# Field does not contains jinja template tags
continue
is_block_tag = re.findall(r'(^\{\%.*?\%\}*})$', field_content.strip())
keep_field = field
field_reference = field.getAttribute('text:description')
if re.findall(r'\|markdown', field_content):
# a markdown field should take the whole paragraph
field_reference = 'text:p'
if not field_reference:
if is_block_tag:
# Find the node where this control flow field we
# consider will be really needed.
while field.parentNode.secretary_field_count <= 1:
field = field.parentNode
if field is not None:
jinja_tag_node = self.create_text_node(xml_document, field_content)
else:
jinja_tag_node = self.create_text_span_node(xml_document, field_content)
else:
odt_reference = FLOW_REFERENCES.get(field_reference.strip(), field_reference)
if odt_reference in SUPPORTED_FIELD_REFERECES:
field = self.node_parents(field, odt_reference)
jinja_tag_node = self.create_text_node(xml_document, field_content)
parent = field.parentNode
if not field_reference.startswith('after::'):
parent.insertBefore(jinja_tag_node, field)
else:
if field.isSameNode(parent.lastChild):
parent.appendChild(jinja_tag_node)
else:
parent.insertBefore(jinja_tag_node, field.nextSibling)
if field_reference.startswith('after::') or field_reference.startswith('before::'):
# Avoid removing whole container, just original text:p parent
field = self.node_parents(keep_field, 'text:p')
parent = field.parentNode
parent.removeChild(field)
def get_style_by_name(self, style_name): def get_style_by_name(self, style_name):
""" """
Search in <office:automatic-styles> for style_name. Search in <office:automatic-styles> for style_name.
@ -353,7 +405,8 @@ class Renderer(object):
return the style node return the style node
""" """
auto_styles = self.content.getElementsByTagName('office:automatic-styles')[0] auto_styles = self.content.getElementsByTagName(
'office:automatic-styles')[0]
if not auto_styles.hasChildNodes(): if not auto_styles.hasChildNodes():
return None return None
@ -503,8 +556,8 @@ if __name__ == "__main__":
{'country': 'Mexico', 'capital': 'MExico City', 'cities': ['puebla', 'cancun']}, {'country': 'Mexico', 'capital': 'MExico City', 'cities': ['puebla', 'cancun']},
] ]
render = Renderer('simple_template.odt') render = Renderer()
result = render.render(countries=countries, document=document) result = render.render('simple_template.odt', countries=countries, document=document)
output = open('rendered.odt', 'wb') output = open('rendered.odt', 'wb')
output.write(result) output.write(result)