Skip to content

Commit

Permalink
Stabilized XML and HTML output attribute order. With lxml 4.40, the h…
Browse files Browse the repository at this point in the history
…andling of attribute order changed for Py 3.6 and higher. Initial attributes set on an elementa are now sorted by key value. This matches what lxml did previously, and still does for Py 2.7 and Py 3.[0-5]. Enforcing sorted initial attributes under Py 3.6+ makes our output more stable under varying versions of lxml and Python.

 - Legacy-Id: 3201
  • Loading branch information
levkowetz committed Aug 10, 2019
1 parent ea19bda commit 295fd79
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 19 deletions.
15 changes: 15 additions & 0 deletions cli/xml2rfc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import base64
import re
import six
import sys
import textwrap

from collections import OrderedDict
from lxml.etree import _Comment

if six.PY2:
Expand Down Expand Up @@ -565,6 +567,18 @@ def build_dataurl(mime, data, base64enc=False):
}


def sdict(d):
"Create an ordered dict from the given dict, with sorted initial insertion"
# For python 3.6 and later, lxml obeys dictionary insertion order for
# attributes, for earlier it sorts initial attributes, so we need to
# use a regular dict for 3.6 and later
if sys.version_info.major == 3 and sys.version_info.minor >= 6:
return dict( (k, d[k]) for k in sorted(list(d.keys())) )
else:
if isinstance(d, OrderedDict):
return d
return OrderedDict( (k, d[k]) for k in sorted(list(d.keys())) )

# ----------------------------------------------------------------------
# Element operations

Expand Down Expand Up @@ -592,3 +606,4 @@ def is_htmlblock(h):
return h.tag in set([ 'address', 'article', 'aside', 'blockquote', 'div', 'dl', 'figure',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'nav', 'ol', 'p', 'pre', 'script', 'section',
'table', 'ul', ])

16 changes: 8 additions & 8 deletions cli/xml2rfc/writers/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from xml2rfc.util.postal import ( get_normalized_address_info, address_hcard_properties,
enhance_address_format, address_field_mapping, )
from xml2rfc.util.unicode import expand_unicode_element
from xml2rfc.utils import namespaces, is_htmlblock, find_duplicate_html_ids, build_dataurl
from xml2rfc.utils import namespaces, is_htmlblock, find_duplicate_html_ids, build_dataurl, sdict

#from xml2rfc import utils

Expand Down Expand Up @@ -89,7 +89,7 @@ class ClassElementMaker(ElementMaker):

def __call__(self, tag, *children, **attrib):
classes = attrib.pop('classes', None)
attrib = dict( (k,v) for k,v in attrib.items() if v != None)
attrib = sdict(dict( (k,v) for k,v in attrib.items() if v != None))
elem = super(ClassElementMaker, self).__call__(tag, *children, **attrib)
if classes:
elem.set('class', classes)
Expand Down Expand Up @@ -444,7 +444,7 @@ def render_rfc(self, h, x):
#
# <link rel="alternate" type="application/rfc+xml" href="source.xml">

add.link(head, None, rel='alternate', type='application/rfc+xml', href=self.xmlrfc.source)
add.link(head, None, href=self.xmlrfc.source, rel='alternate', type='application/rfc+xml')

# 6.3.5. Link to License
#
Expand All @@ -455,7 +455,7 @@ def render_rfc(self, h, x):
# <link rel="license"
# href="https://trustee.ietf.org/trust-legal-provisions.html">

add.link(head, None, rel='license', href="#copyright")
add.link(head, None, href="#copyright", rel='license')

# 6.3.6. Style
#
Expand Down Expand Up @@ -488,12 +488,12 @@ def render_rfc(self, h, x):
cssout = os.path.join(os.path.dirname(self.filename), 'xml2rfc.css')
with open(cssout, 'w', encoding='utf-8') as f:
f.write(css)
add.link(head, None, rel="stylesheet", href="xml2rfc.css", type="text/css")
add.link(head, None, href="xml2rfc.css", rel="stylesheet", type="text/css")
elif self.options.no_css:
pass
else:
add.style(head, None, css, type="text/css")
add.link(head, None, rel="stylesheet", href="rfc-local.css", type="text/css")
add.link(head, None, href="rfc-local.css", rel="stylesheet", type="text/css")

# 6.3.7. Links
#
Expand Down Expand Up @@ -1598,7 +1598,7 @@ def render_li(self, h, x):
#
# This element is rendered as its HTML counterpart, in the HTML header.
def render_link(self, h, x):
link = add.link(h, x, rel=x.get('rel'), href=x.get('href'))
link = add.link(h, x, href=x.get('href'), rel=x.get('rel'))
return link

# 9.31. <middle>
Expand Down Expand Up @@ -1704,7 +1704,7 @@ def render_ol(self, h, x):
if len(type) > 1 and '%' in type:
ol = add.dl(h, x, classes='olPercent')
else:
attrib = dict([ (k,v) for (k,v) in x.attrib.items() if k in ['start', 'type', ] ])
attrib = sdict(dict( (k,v) for (k,v) in x.attrib.items() if k in ['start', 'type', ] ))
ol = add.ol(h, x, classes=x.get('spacing'), **attrib)
for c in x.getchildren():
self.render(ol, c)
Expand Down
15 changes: 7 additions & 8 deletions cli/xml2rfc/writers/preptool.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from urllib.parse import urlsplit, urlunsplit, urljoin
from urllib.request import urlopen

from collections import OrderedDict
from lxml import etree


Expand All @@ -42,7 +41,7 @@
from xml2rfc.util.name import full_author_name_expansion
from xml2rfc.util.num import ol_style_formatter
from xml2rfc.util.unicode import unicode_content_tags, bare_unicode_tags, expand_unicode_element, isascii, downcode
from xml2rfc.utils import build_dataurl, namespaces
from xml2rfc.utils import build_dataurl, namespaces, sdict
from xml2rfc.writers.base import default_options, BaseV3Writer
from xml2rfc.writers.v2v3 import slugify

Expand Down Expand Up @@ -142,15 +141,14 @@ def get_attribute_defaults(self, tag):
ignored_attributes = set(['keepWithNext', 'keepWithPrevious', 'toc', 'pageno', ])
attr = self.schema.xpath("/x:grammar/x:define/x:element[@name='%s']//x:attribute" % tag, namespaces=namespaces)
defaults = dict( (a.get('name'), a.get("{%s}defaultValue"%namespaces['a'], None)) for a in attr )
keys = list( set(defaults.keys()) - ignored_attributes)
keys.sort()
self.attribute_defaults[tag] = OrderedDict( (k, defaults[k]) for k in keys if defaults[k] )
keys = set(defaults.keys()) - ignored_attributes
self.attribute_defaults[tag] = sdict(dict( (k, defaults[k]) for k in keys if defaults[k] ))
return copy.copy(self.attribute_defaults[tag])

def element(self, tag, line=None, **kwargs):
attrib = self.get_attribute_defaults(tag)
attrib.update(kwargs)
e = etree.Element(tag, **attrib)
e = etree.Element(tag, **sdict(attrib))
if line:
e.sourceline = line
elif self.options.debug:
Expand Down Expand Up @@ -1881,6 +1879,7 @@ def back_insert_index(self, e, p):
self.warn(e, "Found an existing Index section, not inserting another one")
return
def mkxref(self, text, **kwargs):
kwargs = sdict(kwargs)
xref = self.element('xref', **kwargs)
xref.text = text
xref.tail = '\n'+' '*16
Expand Down Expand Up @@ -2031,7 +2030,7 @@ def check_links_required(self, e, p):
item_href = "urn:issn:2070-1721"
urnlink = e.find('.//link[@rel="item"][@href="%s"]' % (item_href, ))
if urnlink is None :
e.insert(0, self.element('link', rel='alternate', href=item_href))
e.insert(0, self.element('link', href=item_href, rel='alternate'))
# 3. If in RFC production mode, check if there is a <link> element
# with a DOI for this RFC; if not, add one of the form <link
# rel="describedBy" href="https://dx.doi.org/10.17487/rfcdd"> where
Expand All @@ -2044,7 +2043,7 @@ def check_links_required(self, e, p):
doi_href = "https://dx.doi.org/10.17487/rfc%s" % self.rfcnumber
doilink = e.find('.//link[@href="%s"]' % (doi_href, ))
if doilink is None:
e.insert(0, self.element('link', rel='alternate', href=doi_href))
e.insert(0, self.element('link', href=doi_href, rel='alternate'))

#
# 4. If in RFC production mode, check if there is a <link> element
Expand Down
7 changes: 4 additions & 3 deletions cli/xml2rfc/writers/v2v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
import datetime
import traceback as tb

from collections import OrderedDict
from io import open
from lxml import etree
from lxml.etree import Element, Comment, CDATA

import xml2rfc
from xml2rfc import log
from xml2rfc.util.unicode import unicode_content_tags, isascii
from xml2rfc.utils import hastext, isempty
from xml2rfc.utils import hastext, isempty, sdict
from xml2rfc.writers.base import default_options, BaseV3Writer


Expand Down Expand Up @@ -122,7 +123,7 @@ def write(self, filename):
# --- Element Operations -------------------------------------------

def element(self, tag, line=None, **kwargs):
e = Element(tag, **kwargs)
e = Element(tag, sdict(kwargs))
if line:
e.sourceline = line
elif self.options.debug:
Expand Down Expand Up @@ -749,7 +750,7 @@ def element_list(self, e, p):
# convert to dl, ul, or ol
nstyle = None
style = e.get('style', '').strip()
attribs = {}
attribs = OrderedDict()
comments = []
if not style:
# otherwise look for the nearest list parent with a style and use it
Expand Down

0 comments on commit 295fd79

Please sign in to comment.