Stabilized XML and HTML output attribute order. With lxml 4.40, the h…

…andling of attribute order changed for Py 3.6 and higher. Initial attributes set on an elementa are now sorted by key value. This matches what lxml did previously, and still does for Py 2.7 and Py 3.[0-5]. Enforcing sorted initial attributes under Py 3.6+ makes our output more stable under varying versions of lxml and Python. - Legacy-Id: 3201
ietf-tools · Aug 10, 2019 · 295fd79 · 295fd79
1 parent ea19bda
commit 295fd79
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 19 deletions.
diff --git a/cli/xml2rfc/utils.py b/cli/xml2rfc/utils.py
@@ -7,8 +7,10 @@
 import base64
 import re
 import six
+import sys
 import textwrap
 
+from collections import OrderedDict
 from lxml.etree import _Comment
 
 if six.PY2:
@@ -565,6 +567,18 @@ def build_dataurl(mime, data, base64enc=False):
 }
 
 
+def sdict(d):
+    "Create an ordered dict from the given dict, with sorted initial insertion"
+    # For python 3.6 and later, lxml obeys dictionary insertion order for
+    # attributes, for earlier it sorts initial attributes, so we need to
+    # use a regular dict for 3.6 and later
+    if sys.version_info.major == 3 and sys.version_info.minor >= 6:
+        return dict( (k, d[k]) for k in sorted(list(d.keys())) )
+    else:
+        if isinstance(d, OrderedDict):
+            return d
+        return OrderedDict( (k, d[k]) for k in sorted(list(d.keys())) )
+
 # ----------------------------------------------------------------------
 # Element operations
 
@@ -592,3 +606,4 @@ def is_htmlblock(h):
     return h.tag in set([ 'address', 'article', 'aside', 'blockquote', 'div', 'dl', 'figure',
         'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'nav', 'ol', 'p', 'pre', 'script', 'section',
         'table', 'ul', ])
+
diff --git a/cli/xml2rfc/writers/html.py b/cli/xml2rfc/writers/html.py
@@ -41,7 +41,7 @@
 from xml2rfc.util.postal import ( get_normalized_address_info, address_hcard_properties,
                                 enhance_address_format, address_field_mapping, )
 from xml2rfc.util.unicode import expand_unicode_element
-from xml2rfc.utils import namespaces, is_htmlblock, find_duplicate_html_ids, build_dataurl
+from xml2rfc.utils import namespaces, is_htmlblock, find_duplicate_html_ids, build_dataurl, sdict
 
 #from xml2rfc import utils
 
@@ -89,7 +89,7 @@ class ClassElementMaker(ElementMaker):
 
     def __call__(self, tag, *children, **attrib):
         classes = attrib.pop('classes', None)
-        attrib = dict( (k,v) for k,v in attrib.items() if v != None)
+        attrib = sdict(dict( (k,v) for k,v in attrib.items() if v != None))
         elem = super(ClassElementMaker, self).__call__(tag, *children, **attrib)
         if classes:
             elem.set('class', classes)
@@ -444,7 +444,7 @@ def render_rfc(self, h, x):
     # 
     #    <link rel="alternate" type="application/rfc+xml" href="source.xml">
 
-        add.link(head, None, rel='alternate', type='application/rfc+xml', href=self.xmlrfc.source)
+        add.link(head, None, href=self.xmlrfc.source, rel='alternate', type='application/rfc+xml')
 
     # 6.3.5.  Link to License
     # 
@@ -455,7 +455,7 @@ def render_rfc(self, h, x):
     #    <link rel="license"
     #       href="https://trustee.ietf.org/trust-legal-provisions.html">
 
-        add.link(head, None, rel='license', href="#copyright")
+        add.link(head, None, href="#copyright", rel='license')
 
     # 6.3.6.  Style
     # 
@@ -488,12 +488,12 @@ def render_rfc(self, h, x):
             cssout = os.path.join(os.path.dirname(self.filename), 'xml2rfc.css')
             with open(cssout, 'w', encoding='utf-8') as f:
                 f.write(css)
-            add.link(head, None, rel="stylesheet", href="xml2rfc.css", type="text/css")
+            add.link(head, None, href="xml2rfc.css", rel="stylesheet", type="text/css")
         elif self.options.no_css:
             pass
         else:
             add.style(head, None, css, type="text/css")
-        add.link(head, None, rel="stylesheet", href="rfc-local.css", type="text/css")
+        add.link(head, None, href="rfc-local.css", rel="stylesheet", type="text/css")
 
     # 6.3.7.  Links
     # 
@@ -1598,7 +1598,7 @@ def render_li(self, h, x):
     # 
     #    This element is rendered as its HTML counterpart, in the HTML header.
     def render_link(self, h, x):
-        link = add.link(h, x, rel=x.get('rel'), href=x.get('href'))
+        link = add.link(h, x, href=x.get('href'), rel=x.get('rel'))
         return link
 
     # 9.31.  <middle>
@@ -1704,7 +1704,7 @@ def render_ol(self, h, x):
         if len(type) > 1 and '%' in type:
             ol = add.dl(h, x, classes='olPercent')
         else:
-            attrib = dict([ (k,v) for (k,v) in x.attrib.items() if k in ['start', 'type', ] ])
+            attrib = sdict(dict( (k,v) for (k,v) in x.attrib.items() if k in ['start', 'type', ] ))
             ol = add.ol(h, x, classes=x.get('spacing'), **attrib)
         for c in x.getchildren():
             self.render(ol, c)

diff --git a/cli/xml2rfc/writers/preptool.py b/cli/xml2rfc/writers/preptool.py
@@ -28,7 +28,6 @@
     from urllib.parse import urlsplit, urlunsplit, urljoin
     from urllib.request import urlopen
 
-from collections import OrderedDict
 from lxml import etree
 
 
@@ -42,7 +41,7 @@
 from xml2rfc.util.name import full_author_name_expansion
 from xml2rfc.util.num import ol_style_formatter
 from xml2rfc.util.unicode import unicode_content_tags, bare_unicode_tags, expand_unicode_element, isascii, downcode
-from xml2rfc.utils import build_dataurl, namespaces
+from xml2rfc.utils import build_dataurl, namespaces, sdict
 from xml2rfc.writers.base import default_options, BaseV3Writer
 from xml2rfc.writers.v2v3 import slugify
 
@@ -142,15 +141,14 @@ def get_attribute_defaults(self, tag):
             ignored_attributes = set(['keepWithNext', 'keepWithPrevious', 'toc', 'pageno', ])
             attr = self.schema.xpath("/x:grammar/x:define/x:element[@name='%s']//x:attribute" % tag, namespaces=namespaces)
             defaults = dict( (a.get('name'), a.get("{%s}defaultValue"%namespaces['a'], None)) for a in attr )
-            keys = list( set(defaults.keys()) - ignored_attributes)
-            keys.sort()
-            self.attribute_defaults[tag] = OrderedDict( (k, defaults[k]) for k in keys if defaults[k] )
+            keys = set(defaults.keys()) - ignored_attributes
+            self.attribute_defaults[tag] = sdict(dict( (k, defaults[k]) for k in keys if defaults[k] ))
         return copy.copy(self.attribute_defaults[tag])
 
     def element(self, tag, line=None, **kwargs):
         attrib = self.get_attribute_defaults(tag)
         attrib.update(kwargs)
-        e = etree.Element(tag, **attrib)
+        e = etree.Element(tag, **sdict(attrib))
         if line:
             e.sourceline = line
         elif self.options.debug:
@@ -1881,6 +1879,7 @@ def back_insert_index(self, e, p):
             self.warn(e, "Found an existing Index section, not inserting another one")
             return
         def mkxref(self, text, **kwargs):
+            kwargs = sdict(kwargs)
             xref = self.element('xref', **kwargs)
             xref.text = text
             xref.tail = '\n'+' '*16
@@ -2031,7 +2030,7 @@ def check_links_required(self, e, p):
             item_href = "urn:issn:2070-1721"
             urnlink = e.find('.//link[@rel="item"][@href="%s"]' % (item_href, ))
             if urnlink is None :
-                e.insert(0, self.element('link', rel='alternate', href=item_href))
+                e.insert(0, self.element('link', href=item_href, rel='alternate'))
     #    3.  If in RFC production mode, check if there is a <link> element
     #        with a DOI for this RFC; if not, add one of the form <link
     #        rel="describedBy" href="https://dx.doi.org/10.17487/rfcdd"> where
@@ -2044,7 +2043,7 @@ def check_links_required(self, e, p):
             doi_href = "https://dx.doi.org/10.17487/rfc%s" % self.rfcnumber
             doilink = e.find('.//link[@href="%s"]' % (doi_href, ))
             if doilink is None:
-                e.insert(0, self.element('link', rel='alternate', href=doi_href))
+                e.insert(0, self.element('link', href=doi_href, rel='alternate'))
 
     # 
     #    4.  If in RFC production mode, check if there is a <link> element

diff --git a/cli/xml2rfc/writers/v2v3.py b/cli/xml2rfc/writers/v2v3.py
@@ -8,14 +8,15 @@
 import datetime
 import traceback as tb
 
+from collections import OrderedDict
 from io import open
 from lxml import etree
 from lxml.etree import Element, Comment, CDATA
 
 import xml2rfc
 from xml2rfc import log
 from xml2rfc.util.unicode import unicode_content_tags, isascii
-from xml2rfc.utils import hastext, isempty
+from xml2rfc.utils import hastext, isempty, sdict
 from xml2rfc.writers.base import default_options, BaseV3Writer
 
 
@@ -122,7 +123,7 @@ def write(self, filename):
     # --- Element Operations -------------------------------------------
 
     def element(self, tag, line=None, **kwargs):
-        e = Element(tag, **kwargs)
+        e = Element(tag, sdict(kwargs))
         if line:
             e.sourceline = line
         elif self.options.debug:
@@ -749,7 +750,7 @@ def element_list(self, e, p):
         # convert to dl, ul, or ol
         nstyle = None
         style = e.get('style', '').strip()
-        attribs = {}
+        attribs = OrderedDict()
         comments = []
         if not style:
             # otherwise look for the nearest list parent with a style and use it