Skip to content

Commit

Permalink
refactored html export
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewhegarty committed Sep 11, 2023
1 parent 5431834 commit db57f39
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 23 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Improvements

- The html format now supports importing from HTML content (#243)
- Refactored html export to allow optional escaping of characters (#)

### Changes

Expand Down
53 changes: 30 additions & 23 deletions src/tablib/formats/_html.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Tablib - HTML export support.
"""
import html
from html.parser import HTMLParser
from xml.etree import ElementTree as ET


class HTMLFormat:
Expand All @@ -11,41 +11,39 @@ class HTMLFormat:
extensions = ('html', )

@classmethod
def export_set(cls, dataset):
"""HTML representation of a Dataset."""

table = ET.Element('table')
def export_set(cls, dataset, escape=False):
"""Returns HTML representation of Dataset.
If ``escape`` is True, cell data will be passed through html.escape().
"""
html_output = "<table>"
if dataset.headers is not None:
head = ET.Element('thead')
tr = ET.Element('tr')
html_output += "<thead><tr>"
for header in dataset.headers:
th = ET.Element('th')
th.text = str(header) if header is not None else ''
tr.append(th)
head.append(tr)
table.append(head)
html_output += "<th>"
html_output += cls.format_str(header, escape=escape)
html_output += "</th>"
html_output += "</tr></thead>"

body = ET.Element('tbody')
html_output += "<tbody>"
for row in dataset:
tr = ET.Element('tr')
html_output += "<tr>"
for item in row:
td = ET.Element('td')
td.text = str(item) if item is not None else ''
tr.append(td)
body.append(tr)
table.append(body)

return ET.tostring(table, method='html', encoding='unicode')
html_output += "<td>"
html_output += cls.format_str(item, escape=escape)
html_output += "</td>"
html_output += "</tr>"
html_output += "</tbody></table>"
return html_output

@classmethod
def export_book(cls, databook):
def export_book(cls, databook, escape=False):
"""HTML representation of a Databook."""

result = ''
for i, dset in enumerate(databook._datasets):
title = dset.title if dset.title else f'Set {i}'
result += f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n'
result += dset.html
result += cls.export_set(dset, escape=escape)
result += '\n'

return result
Expand All @@ -63,6 +61,15 @@ def import_set(cls, dset, in_stream, table_id=None):
else:
raise ValueError('No <table> found in input HTML')

@classmethod
def format_str(cls, s, escape=False):
if s is None:
return ''
s = str(s)
if escape:
return html.escape(s)

Check warning on line 70 in src/tablib/formats/_html.py

View check run for this annotation

Codecov / codecov/patch

src/tablib/formats/_html.py#L70

Added line #L70 was not covered by tests
return s


class TablibHTMLParser(HTMLParser):
def __init__(self, dataset, *args, table_id=None, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,11 @@ def test_html_import_table_id(self):
tablib.import_set(html_input, format="html", table_id="notfound")
self.assertEqual('No <table> found with id="notfound" in input HTML', str(exc.exception))

def test_html_export_with_special_chars(self):
self.founders = tablib.Dataset(headers=self.headers, title='Founders')
self.founders.append(('J &amp; J', 'A', 90))
self.assertIn("J &amp; J", self.founders.html)


class RSTTests(BaseTestCase):
def test_rst_force_grid(self):
Expand Down

0 comments on commit db57f39

Please sign in to comment.