First usable version of markdown filter

This commit is contained in:
Christopher Ramírez 2013-09-12 15:50:17 -06:00
parent 3b7eb6d9ad
commit 686e598a73
2 changed files with 129 additions and 99 deletions

View file

@ -6,11 +6,20 @@
# as a dict in the 'attributes' key. Also, some tags may need to create # as a dict in the 'attributes' key. Also, some tags may need to create
# new styles in the document. # new styles in the document.
transform_map = { common_styles = {
'p': { 'italic': {
'replace_with': 'text:p', 'replace_with': 'text:span',
'attributes': { 'attributes': {
'style-name': 'Standard' 'style-name': 'markdown_italic'
},
'style': {
'name': 'markdown_italic',
'properties': {
'fo:font-style': 'italic',
'style:font-style-asian': 'italic',
'style:font-style-complex': 'italic'
}
} }
}, },
@ -30,21 +39,20 @@ transform_map = {
} }
}, },
'i': { 'p': {
'replace_with': 'text:span', 'replace_with': 'text:p',
'attributes': { 'attributes': {
'style-name': 'markdown_italic' 'style-name': 'Standard'
},
'style': {
'name': 'markdown_italic',
'properties': {
'fo:font-style': 'italic',
'style:font-style-asian': 'italic',
'style:font-style-complex': 'italic'
}
} }
}, }
}
transform_map = {
'p': common_styles['p'],
'strong': common_styles['strong'],
'em': common_styles['italic'],
'b': common_styles['strong'],
'i': common_styles['italic'],
# Heading Styles (Use styles defined in the document) # Heading Styles (Use styles defined in the document)
'h1': { 'h1': {

View file

@ -38,10 +38,10 @@ import re
import sys import sys
import zipfile import zipfile
import io import io
from copy import deepcopy
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from jinja2 import Environment, Undefined from jinja2 import Environment, Undefined
# ---- Exceptions # ---- Exceptions
class SecretaryError(Exception): class SecretaryError(Exception):
pass pass
@ -99,9 +99,12 @@ class Render(object):
self.template = template self.template = template
self.environment = Environment(undefined=UndefinedSilently, autoescape=True) self.environment = Environment(undefined=UndefinedSilently, autoescape=True)
self.environment.filters['pad'] = pad_string
self.file_list = {}
# Register provided filters
self.environment.filters['pad'] = pad_string
self.environment.filters['markdown'] = self.markdown_filter
self.file_list = {}
def unpack_template(self): def unpack_template(self):
""" """
@ -161,7 +164,14 @@ class Render(object):
template = self.environment.from_string(self.content.toxml()) template = self.environment.from_string(self.content.toxml())
result = template.render(**kwargs) result = template.render(**kwargs)
result = result.replace('\n', '<text:line-break/>') result = result.replace('\n', '<text:line-break/>')
self.content = parseString(result.encode('ascii', 'xmlcharrefreplace'))
# Replace original body with rendered body
original_body = self.content.getElementsByTagName('office:body')[0]
rendered_body = parseString(result.encode('ascii', 'xmlcharrefreplace')) \
.getElementsByTagName('office:body')[0]
document = self.content.getElementsByTagName('office:document-content')[0]
document.replaceChild(rendered_body, original_body)
# Render style.xml # Render style.xml
self.prepare_template_tags(self.styles) self.prepare_template_tags(self.styles)
@ -223,6 +233,10 @@ class Render(object):
field_description = field.getAttribute('text:description') field_description = field.getAttribute('text:description')
if re.findall(r'\|markdown', field_content):
# a markdown should take the whole paragraph
field_description = 'text:p'
if not field_description: if not field_description:
new_node = self.create_text_span_node(xml_document, field_content) new_node = self.create_text_span_node(xml_document, field_content)
else: else:
@ -237,104 +251,112 @@ class Render(object):
parent.removeChild(field) parent.removeChild(field)
def get_style_by_name(self, style_name): def get_style_by_name(self, style_name):
""" """
Search in <office:automatic-styles> for style_name. Search in <office:automatic-styles> for style_name.
Return None if style_name is not found. Otherwise Return None if style_name is not found. Otherwise
return the style node return the style node
""" """
auto_styles = self.content.getElementsByTagName('office:automatic-styles')[0] auto_styles = self.content.getElementsByTagName('office:automatic-styles')[0]
if not auto_styles.hasChildNodes(): if not auto_styles.hasChildNodes():
return None return None
for style_node in auto_styles.childNodes: for style_node in auto_styles.childNodes:
if style_node.hasattr('style:name') and if style_node.hasAttribute('style:name') and \
(style_node.getAttribute('style:name') == style_name): (style_node.getAttribute('style:name') == style_name):
return style_node return style_node
return None
def insert_style_in_content(self, style_name, attributes=None, def insert_style_in_content(self, style_name, attributes=None,
**style_properties): **style_properties):
""" """
Insert a new style into content.xml's <office:automatic-styles> node. Insert a new style into content.xml's <office:automatic-styles> node.
Returns a reference to the newly created node Returns a reference to the newly created node
""" """
auto_styles = self.content.getElementsByTagName('office:automatic-styles')[0] auto_styles = self.content.getElementsByTagName('office:automatic-styles')[0]
style_node = self.content.createElement(transform_map[tag]['style:style']) style_node = self.content.createElement('style:style')
style_node.setAttribute('style:name', style_name)
if attributes: style_node.setAttribute('style:name', style_name)
for k, v in attributes.iteritems(): style_node.setAttribute('style:family', 'text')
style_node.setAttribute('style:%s' % k, v) style_node.setAttribute('style:parent-style-name', 'Standard')
if style_properties: if attributes:
style_prop = self.content.createElement('style:text-properties') for k, v in attributes.iteritems():
for k, v in style_properties.iteritems(): style_node.setAttribute('style:%s' % k, v)
style_prop.setAttribute('style:%s' % k, v)
style_node.appendChild(style_prop) if style_properties:
style_prop = self.content.createElement('style:text-properties')
for k, v in style_properties.iteritems():
style_prop.setAttribute('%s' % k, v)
auto_styles.appendChild(style_node) style_node.appendChild(style_prop)
return style_node return auto_styles.appendChild(style_node)
def markdown_filter(self, markdown_text):
"""
Convert a markdown text into a ODT formated text
"""
def markdown_filter(markdown_text): from xml.dom import Node
""" from markdown_map import transform_map
Convert a markdown text into a ODT formated text
"""
from copy import deepcopy try:
from xml.dom import Node from markdown2 import markdown
from markdown_map import transform_map except ImportError:
raise SecretaryError('Could not import markdown2 library. Install it using "pip install markdown2"')
try: styles_cache = {} # cache styles searching
from markdown2 import markdown html_text = markdown(markdown_text)
except ImportError: xml_object = parseString( html_text )
raise SecretaryError('Could not import markdown2 library. Install it using "pip install markdown2"')
html_text = markdown(markdown_text) # Transform HTML tags as specified in transform_map
# Some tags may require extra attributes in ODT.
# Additional attributes are indicated in the 'attributes' property
for tag in transform_map:
html_nodes = xml_object.getElementsByTagName(tag)
for html_node in html_nodes:
odt_node = xml_object.createElement(transform_map[tag]['replace_with'])
xml_object = parseString( html_text ) # Transfer child nodes
if html_node.hasChildNodes():
for child_node in html_node.childNodes:
# Transform HTML tags as specified in transform_map # We use different methods to clone the childs
# Some tags may require extra attributes in ODT. # because 'deepcopy' duplicates TEXT_NODE nodes
# Additional attributes are indicated in the 'attributes' property # inside a ELEMENT_NODE Node, and because
# 'cloneNode' does not work with TEXT_NODE nodes.
if child_node.nodeType == Node.ELEMENT_NODE:
odt_node.appendChild(child_node.cloneNode(True))
else:
odt_node.appendChild(deepcopy(child_node))
for tag in transform_map: # Add attributes defined in transform_map
html_nodes = xml_object.getElementsByTagName(tag) if 'attributes' in transform_map[tag]:
for html_node in html_nodes: for k, v in transform_map[tag]['attributes'].iteritems():
odt_node = xml_object.createElement(transform_map[tag]['replace_with']) odt_node.setAttribute('text:%s' % k, v)
# Transfer child nodes # Does the node need to create an style?
if html_node.hasChildNodes(): if 'style' in transform_map[tag]:
for child_node in html_node.childNodes: name = transform_map[tag]['style']['name']
if not name in styles_cache:
style_node = self.get_style_by_name(name)
# We use different methods to clone the childs if style_node is None:
# because 'deepcopy' duplicates TEXT_NODE nodes # Create and cache the style node
# inside a ELEMENT_NODE Node, and because style_node = self.insert_style_in_content(
# 'cloneNode' does not work with TEXT_NODE nodes. name, transform_map[tag]['style'].get('attributes', None),
if child_node.nodeType == Node.ELEMENT_NODE: **transform_map[tag]['style']['properties'])
odt_node.appendChild(child_node.cloneNode(True)) styles_cache[name] = style_node
else:
odt_node.appendChild(deepcopy(child_node))
# Add attributes defined in transform_map html_node.parentNode.replaceChild(odt_node, html_node)
if 'attributes' in transform_map[tag]:
for k, v in transform_map[tag]['attributes'].iteritems():
odt_node.setAttribute('text:%s' % k, v)
# Does the node need to create an style? return xml_object.firstChild.toxml()
if 'append_style' in transform_map[tag]:
pass
html_node.parentNode.replaceChild(odt_node, html_node)
return xml_object.firstChild.toxml()
def render_template(template, **kwargs): def render_template(template, **kwargs):