Skip to content

Commit

Permalink
Fixes #567 - Implement ods import
Browse files Browse the repository at this point in the history
  • Loading branch information
claudep committed Oct 27, 2023
1 parent dc23c4c commit 40b1e43
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 8 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### Improvements

- The html format now supports importing from HTML content (#243)
- The ods format now supports importing from .ods files (#567). The support is
still a bit experimental.

### Changes

Expand Down
11 changes: 9 additions & 2 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,19 @@ If a title has been set, it will be exported as the table caption.
ods
===

Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently
export-only.
Import/export data in OpenDocument Spreadsheet format.

.. versionadded:: 3.6.0

Import functionality was added.

This format is optional, install Tablib with ``pip install "tablib[ods]"`` to
make the format available.

The ``import_set()`` method also supports a ``skip_lines`` parameter that you
can set to a number of lines that should be skipped before starting to read
data.

.. admonition:: Binary Warning

:class:`Dataset.ods` contains binary data, so make sure to write in binary mode::
Expand Down
74 changes: 74 additions & 0 deletions src/tablib/formats/_ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
"""

import numbers
from datetime import date, datetime
from io import BytesIO

from odf import opendocument, style, table, text

import tablib

bold = style.Style(name="bold", family="paragraph")
bold.addElement(style.TextProperties(
fontweight="bold",
Expand Down Expand Up @@ -49,6 +52,73 @@ def export_book(cls, databook):
wb.save(stream)
return stream.getvalue()

@classmethod
def import_sheet(cls, dset, sheet, headers=True, skip_lines=0):
"""Populate dataset `dset` with sheet data."""

dset.title = sheet.getAttribute('name')

for i, row in enumerate(sheet.childNodes):
if i < skip_lines:
continue

Check warning on line 63 in src/tablib/formats/_ods.py

View check run for this annotation

Codecov / codecov/patch

src/tablib/formats/_ods.py#L63

Added line #L63 was not covered by tests
row_vals = [cls.read_cell(cell) for cell in row.childNodes]
if i == skip_lines and headers:
dset.headers = row_vals
else:
if i > skip_lines and len(row_vals) < dset.width:
row_vals += [''] * (dset.width - len(row_vals))

Check warning on line 69 in src/tablib/formats/_ods.py

View check run for this annotation

Codecov / codecov/patch

src/tablib/formats/_ods.py#L69

Added line #L69 was not covered by tests
dset.append(row_vals)

@classmethod
def read_cell(cls, cell):
if not cell.childNodes:
if data := getattr(cell, 'data', None):
return data
value_type = cell.getAttribute('valuetype')
value = cell.getAttribute('value')
if value_type == 'float':
return float(value)
if value_type == 'date':
if 'T' in value:
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")
else:
return datetime.strptime(value, "%Y-%m-%d").date()
else:
msg = f"value_type {value_type} not handled"
raise ValueError(msg)

Check warning on line 88 in src/tablib/formats/_ods.py

View check run for this annotation

Codecov / codecov/patch

src/tablib/formats/_ods.py#L87-L88

Added lines #L87 - L88 were not covered by tests

for subnode in cell.childNodes:
value = cls.read_cell(subnode)
if value:
return value

@classmethod
def import_set(cls, dset, in_stream, headers=True, skip_lines=0):
"""Populate dataset `dset` from ODS stream."""

dset.wipe()

ods_book = opendocument.load(in_stream)
for sheet in ods_book.spreadsheet.childNodes:
if sheet.qname[1] == 'table':
sheet = ods_book.spreadsheet.childNodes[0]
cls.import_sheet(dset, sheet, headers, skip_lines)

@classmethod
def import_book(cls, dbook, in_stream, headers=True):
"""Populate databook `dbook` from ODS stream."""

dbook.wipe()

ods_book = opendocument.load(in_stream)

for sheet in ods_book.spreadsheet.childNodes:
if sheet.qname[1] != 'table':
continue
dset = tablib.Dataset()
cls.import_sheet(dset, sheet, headers)
dbook.add_sheet(dset)

@classmethod
def dset_sheet(cls, dataset, ws):
"""Completes given worksheet from given Dataset."""
Expand All @@ -66,6 +136,10 @@ def dset_sheet(cls, dataset, ws):
for j, col in enumerate(row):
if isinstance(col, numbers.Number):
cell = table.TableCell(valuetype="float", value=col)
elif isinstance(col, datetime):
cell = table.TableCell(valuetype="date", value=col.strftime('%Y-%m-%dT%H:%M:%S'))
elif isinstance(col, date):
cell = table.TableCell(valuetype="date", value=col.strftime('%Y-%m-%d'))
else:
cell = table.TableCell(valuetype="string")
cell.addElement(text.P(text=str(col), stylename=style))
Expand Down
Binary file added tests/files/book.ods
Binary file not shown.
25 changes: 19 additions & 6 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,13 +1107,26 @@ def test_tsv_export(self):


class ODSTests(BaseTestCase):
def test_ods_export_datatypes(self):
def test_ods_export_import_set(self):
date = datetime.date(2019, 10, 4)
date_time = datetime.datetime(2019, 10, 4, 12, 30, 8)
data.append(('string', '004', 42, 21.55, Decimal('34.5'), date_time))
data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date/time')
# ODS is currently write-only, just test that output doesn't crash.
assert data.ods is not None
assert len(data.ods)
data.append(('string', '004', 42, 21.55, Decimal('34.5'), date, date_time))
data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date', 'date/time')
_ods = data.ods
data.ods = _ods
self.assertEqual(data.dict[0]['string'], 'string')
self.assertEqual(data.dict[0]['start0'], '004')
self.assertEqual(data.dict[0]['integer'], 42)
self.assertEqual(data.dict[0]['float'], 21.55)
self.assertEqual(data.dict[0]['decimal'], 34.5)
self.assertEqual(data.dict[0]['date'], date)
self.assertEqual(data.dict[0]['date/time'], date_time)

def test_ods_import_book(self):
ods_source = Path(__file__).parent / 'files' / 'book.ods'
with ods_source.open('rb') as fh:
dbook = tablib.Databook().load(fh, 'ods')
self.assertEqual(len(dbook.sheets()), 2)


class XLSTests(BaseTestCase):
Expand Down

0 comments on commit 40b1e43

Please sign in to comment.