@@ -71,7 +71,7 @@
time. It is inappropriate to use Internet-Drafts as reference
material or to cite them other than as "work in progress."¶
- This Internet-Draft will expire on January 27, 2024.¶
+ This Internet-Draft will expire on January 28, 2024.
¶
diff --git a/xml2rfc/util/unicode.py b/xml2rfc/util/unicode.py
index b89afddd..6071abc5 100644
--- a/xml2rfc/util/unicode.py
+++ b/xml2rfc/util/unicode.py
@@ -35,7 +35,6 @@
'street',
'title',
'u',
- 't',
])
# Attribute values should not contain unicode, with some exceptions
@@ -85,10 +84,6 @@
'title',
])
-bare_unicode_tags_with_notice = set([
- 't',
-])
-
def is_svg(e):
'''
Returns true if an element is a SVG element
diff --git a/xml2rfc/writers/base.py b/xml2rfc/writers/base.py
index 19274e11..8821605c 100644
--- a/xml2rfc/writers/base.py
+++ b/xml2rfc/writers/base.py
@@ -25,14 +25,9 @@
pass
from xml2rfc import strings, log
-from xml2rfc.uniscripts import is_script
from xml2rfc.util.date import extract_date, augment_date, format_date, get_expiry_date
from xml2rfc.util.name import short_author_ascii_name_parts, full_author_name_expansion, short_author_name_parts
-from xml2rfc.util.unicode import (
- punctuation, unicode_replacements, unicode_content_tags,
- bare_unicode_tags, bare_unicode_tags_with_notice,
- bare_latin_tags, unicode_attributes, downcode, downcode_punctuation,
- is_svg)
+from xml2rfc.util.unicode import is_svg
from xml2rfc.utils import namespaces, find_duplicate_ids, slugify
@@ -2039,63 +2034,6 @@ def page_bottom_center(self):
text = 'Expires %s' % format_date(*parts, legacy=self.options.legacy_date_format)
return text
- def downcode_punctuation(self):
- self.downcode(replacements=punctuation)
- self.downcode_reference_punctuation()
-
- def downcode(self, replacements=unicode_replacements):
- """
-
- Traverses an lxml.etree and replaces unicode characters with the proper
- equivalents specified in rfc2629-xhtml.ent, resulting in no non-ascii
- characters except in elements that explicitly permit non-ascii content.
-
- """
- for e in self.tree.iter():
- if is_svg(e):
- continue
- if e.text:
- if not e.tag in unicode_content_tags:
- try:
- e.text.encode('ascii')
- except UnicodeEncodeError:
- e.text = downcode(e.text, replacements=replacements)
- elif e.tag in bare_unicode_tags:
- pass
- elif e.tag in bare_unicode_tags_with_notice:
- pass
- elif e.tag in bare_latin_tags and is_script(e.text, 'Latin'):
- pass
- elif not e.get('ascii'):
- try:
- e.text.encode('ascii')
- except UnicodeEncodeError:
- e.text = downcode(e.text, replacements=replacements)
- if e.tail:
- if not e.getparent().tag in unicode_content_tags:
- try:
- e.tail.encode('ascii')
- except UnicodeEncodeError:
- e.tail = downcode(e.tail, replacements=replacements)
-
- def downcode_attributes(self, replacements=unicode_replacements):
- for e in self.tree.iter():
- for key in e.attrib.keys():
- if not (e.tag, key) in unicode_attributes:
- try:
- e.get(key).encode('ascii')
- except UnicodeEncodeError:
- e.set(key, downcode(e.get(key), replacements=replacements))
-
- def downcode_reference_punctuation(self):
- for r in self.tree.xpath('.//references'):
- for e in r.iter():
- for key in e.attrib.keys():
- try:
- e.get(key).encode('ascii')
- except UnicodeEncodeError:
- e.set(key, downcode_punctuation(e.get(key)))
-
def pretty_print_prep(self, e, p):
ind = self.options.indent
## The actual printing is done in self.write()
diff --git a/xml2rfc/writers/preptool.py b/xml2rfc/writers/preptool.py
index 54e8de7d..809446fa 100644
--- a/xml2rfc/writers/preptool.py
+++ b/xml2rfc/writers/preptool.py
@@ -41,9 +41,8 @@
from xml2rfc.util.name import full_author_name_expansion
from xml2rfc.util.num import ol_style_formatter
from xml2rfc.util.unicode import (
- unicode_content_tags, unicode_attributes, bare_unicode_tags,
- bare_unicode_tags_with_notice, expand_unicode_element, isascii,
- downcode, latinscript_attributes, is_svg)
+ unicode_content_tags, unicode_attributes, expand_unicode_element,
+ isascii, latinscript_attributes, is_svg)
from xml2rfc.utils import build_dataurl, namespaces, sdict, clean_text
from xml2rfc.writers.base import default_options, BaseV3Writer, RfcWriterError
@@ -503,33 +502,13 @@ def check_attribute_values(self, e, p):
#
def check_ascii_text(self, e, p):
- self.downcode_punctuation()
for c in self.root.iter():
if is_svg(c):
continue
- p = c.getparent()
- if c.text and not isascii(c.text):
+ if c.text and not isascii(c.text) and c.tag not in unicode_content_tags:
show = c.text.encode('ascii', errors='replace')
- if c.tag in unicode_content_tags:
- if c.tag in bare_unicode_tags_with_notice:
- if self.options.warn_bare_unicode:
- self.warn(c, 'Found non-ascii characters in an element that should be inspected to ensure they are intentional, in <%s>: %s' % (c.tag, show))
- elif not c.get('ascii') and not c.tag in bare_unicode_tags and not is_script(c.text, 'Latin'):
- self.err(c, 'Found non-ascii content without matching ascii attribute in <%s>: %s' % (c.tag, show))
- else:
- self.err(c, 'Found non-ascii characters outside of elements that can have non-ascii content, in <%s>: %s' % (c.tag, show))
- c.text = downcode(c.text)
- if c.tail and not isascii(c.tail):
- show = c.tail.encode('ascii', errors='replace')
- if p.tag in unicode_content_tags:
- if p.tag in bare_unicode_tags_with_notice:
- if self.options.warn_bare_unicode:
- self.warn(p, 'Found non-ascii characters in an element that should be inspected to ensure they are intentional, in <%s>: %s' % (p.tag, show))
- elif not p.get('ascii') and not p.tag in bare_unicode_tags:
- self.err(p, 'Found non-unicode content without matching ascii attribute in <%s>: %s' % (p.tag, show))
- else:
- self.err(p, 'Found non-ascii characters outside of elements that can have non-ascii content, in <%s>: %s' % (p.tag, show))
- c.tail = downcode(c.tail)
+ if self.options.warn_bare_unicode:
+ self.warn(c, 'Found non-ascii characters in an element that should be inspected to ensure they are intentional, in <%s>: %s' % (c.tag, show))
def normalize_text_items(self, e, p):
"""
diff --git a/xml2rfc/writers/v2v3.py b/xml2rfc/writers/v2v3.py
index 3e7f0dc9..2a17016f 100644
--- a/xml2rfc/writers/v2v3.py
+++ b/xml2rfc/writers/v2v3.py
@@ -13,7 +13,6 @@
import xml2rfc
from xml2rfc import log
-from xml2rfc.util.unicode import unicode_content_tags, unicode_replacements, isascii, is_svg
from xml2rfc.utils import hastext, isempty, sdict, slugify, iscomment
from xml2rfc.writers.base import default_options, BaseV3Writer
@@ -316,7 +315,6 @@ def convert2to3(self):
'.//*[self::artwork or self::dl or self::figure or self::ol or self::sourcecode or self::t or self::ul]',
'//*[@*="yes" or @*="no"]', # convert old attribute false/true
'.;pretty_print_prep()',
- '.;wrap_non_ascii()',
]
# Remove any DOCTYPE declaration
@@ -1148,55 +1146,3 @@ def attribute_yes_no(self, e, p):
# if c.tail != None:
# if c.tail.strip() == '':
# c.tail = None
-
- def wrap_non_ascii(self, e, p):
- self.downcode(replacements=unicode_replacements)
- self.downcode_punctuation()
- for e in self.tree.iter():
- if is_svg(e):
- continue
- def uwrap(text, line):
- words = []
- elements = []
- ascii = None
- for word in re.split('(\s+)', text, flags=re.U):
- if isascii(word):
- words.append(word)
- else:
- u = self.element('u', line=line)
- u.text = word
- if ascii is None:
- ascii = ''.join(words)
- words = []
- elements.append(u)
- else:
- elements[-1].tail = ''.join(words)
- words = []
- elements.append(u)
- if words:
- if ascii is None:
- ascii = ''.join(words)
- else:
- elements[-1].tail = ''.join(words)
- return ascii, elements
- if e.text and e.tag not in unicode_content_tags:
- try:
- e.text.encode('ascii')
- except UnicodeEncodeError:
- e.text, elements = uwrap(e.text, e.sourceline)
- if len(e):
- c = e[0]
- for u in elements:
- c.addprevious(u)
- else:
- for u in elements:
- e.append(u)
- if e.tail and e.getparent().tag not in unicode_content_tags:
- try:
- e.tail.encode('ascii')
- except UnicodeEncodeError:
- e.tail, elements = uwrap(e.tail, e.sourceline)
- for u in elements:
- e.addnext(u)
- e = u
-