Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the archive exporter #4448

Merged
merged 23 commits into from
Oct 23, 2020
Merged
Changes from 3 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 41 additions & 27 deletions aiida/tools/importexport/dbexport/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ class ExportMetadata:
# Entity type -> database key -> meta parameters
all_fields_info: Dict[str, Dict[str, Dict[str, str]]] = field(repr=False)
aiida_version: str = field(default_factory=get_version)
export_version: str = EXPORT_VERSION


@dataclass
Expand All @@ -113,6 +112,24 @@ def __repr__(self) -> str:
) + ')'


@dataclass
class ExportReport:
"""Class for storing data about the archive process."""
chrisjsewell marked this conversation as resolved.
Show resolved Hide resolved
# time in seconds
time_collect_start: float
time_collect_stop: float
# skipped if no data to write
time_write_start: Optional[float] = None
time_write_stop: Optional[float] = None
# additional data returned by the writer
writer_data: Optional[Dict[str, Any]] = None

@property
def total_time(self) -> float:
"""Return total time taken in seconds."""
return (self.time_write_stop or self.time_collect_stop) - self.time_collect_start


class ArchiveWriterAbstract(ABC):
"""An abstract interface for AiiDA archive writers."""

ltalirz marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -174,8 +191,8 @@ def _write_to_json_archive(folder: Union[Folder, ZipFolder], export_data: Archiv
fhandle.write(json.dumps(data))

metadata = {
'export_version': EXPORT_VERSION,
'aiida_version': export_data.metadata.aiida_version,
'export_version': export_data.metadata.export_version,
'all_fields_info': export_data.metadata.all_fields_info,
'unique_identifiers': export_data.metadata.unique_identifiers,
'export_parameters': {
Expand Down Expand Up @@ -368,7 +385,7 @@ def export(
forbidden_licenses: Optional[Union[list, Callable]] = None,
writer_init: Optional[Dict[str, Any]] = None,
**traversal_rules: bool,
) -> dict:
) -> ExportReport:
"""Export AiiDA data to an archive file.

.. deprecated:: 1.2.1
Expand Down Expand Up @@ -475,9 +492,9 @@ def export(
**traversal_rules
)

output_data = {}
report_data: Dict[str, Any] = {'time_write_start': None, 'time_write_stop': None, 'writer_data': None}
Copy link
Member

@ltalirz ltalirz Oct 22, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just out of curiosity - is this the suggested way to build instances of dataclasses, i.e. to build a dict and then pass it to the constructor?

E.g. couldn't you just initialize it below with
report = ExportReport(time_collect_start=..., time_collect_stop=...)
and then just add to the instance?

It's not very important here, but in general it would avoid retyping the attributes of the data class as dict keys (possibly introducing typos).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well you would have to intialise it with a dummy time_collect_start etc, then assume that it will get overriden later on, which I don't think is a good idea

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, you could just initialize it once you have both collect times

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah potatas, potatas, both are valid 😉


output_data['time_collect_start'] = time.time()
report_data['time_collect_start'] = time.time()
export_data = _collect_archive_data(
entities=entities,
silent=silent,
Expand All @@ -487,22 +504,22 @@ def export(
include_logs=include_logs,
**traversal_rules
)
output_data['time_collect_stop'] = time.time()
report_data['time_collect_stop'] = time.time()

extract_time = output_data['time_collect_start'] - output_data['time_collect_stop']
extract_time = report_data['time_collect_stop'] - report_data['time_collect_start']
EXPORT_LOGGER.debug(f'Data extracted in {extract_time:6.2g} s.')

if export_data is not None:
try:
output_data['time_write_start'] = time.time()
output_data['writer'] = writer.write(export_data=export_data, silent=silent) # type: ignore
output_data['time_write_stop'] = time.time()
report_data['time_write_start'] = time.time()
report_data['writer_data'] = writer.write(export_data=export_data, silent=silent) # type: ignore
report_data['time_write_stop'] = time.time()
except (exceptions.ArchiveExportError, LicensingException) as exc:
if os.path.exists(filename):
os.remove(filename)
raise exc

write_time = output_data['time_write_start'] - output_data['time_write_stop']
write_time = report_data['time_write_stop'] - report_data['time_write_start']
EXPORT_LOGGER.debug(f'Data written in {write_time:6.2g} s.')

else:
Expand All @@ -513,7 +530,7 @@ def export(
if silent:
logging.disable(logging.NOTSET)

return output_data
return ExportReport(**report_data)


@override_log_formatter('%(message)s')
Expand Down Expand Up @@ -1106,7 +1123,7 @@ def export_zip(
filename: Optional[str] = None,
use_compression: bool = True,
**kwargs: Any,
) -> Tuple[float, ...]:
) -> Tuple[float, float]:
"""Export in a zipped folder

.. deprecated:: 1.2.1
Expand All @@ -1124,25 +1141,25 @@ def export_zip(
warnings.warn(
'function is deprecated and will be removed in AiiDA v2.0.0, use `export` instead', AiidaDeprecationWarning
) # pylint: disable=no-member
output_data = export(
report = export(
entities=entities,
filename=filename,
file_format=ExportFileFormat.ZIP,
use_compression=use_compression,
**kwargs,
)
if 'time_write_stop' in output_data:
return (output_data['time_collect_start'], output_data['time_write_stop'])
if report.time_write_stop is not None:
return (report.time_collect_start, report.time_write_stop)

# there was no data to write
return (output_data['time_collect_start'], output_data['time_collect_stop'])
return (report.time_collect_start, report.time_collect_stop)


def export_tar(
entities: Optional[Iterable[Any]] = None,
filename: Optional[str] = None,
**kwargs: Any,
) -> Tuple[float, ...]:
) -> Tuple[float, float, float, float]:
"""Export the entries passed in the 'entities' list to a gzipped tar file.

.. deprecated:: 1.2.1
Expand All @@ -1157,21 +1174,18 @@ def export_tar(
warnings.warn(
'function is deprecated and will be removed in AiiDA v2.0.0, use `export` instead', AiidaDeprecationWarning
) # pylint: disable=no-member
output_data = export(
report = export(
entities=entities,
filename=filename,
file_format=ExportFileFormat.TAR_GZIPPED,
**kwargs,
)

if 'writer' in output_data:
if report.writer_data is not None:
return (
output_data['time_collect_start'], output_data['time_write_stop'],
output_data['writer']['compression_time_start'], output_data['writer']['compression_time_stop']
)
report.time_collect_start, report.time_write_stop, report.writer_data['compression_time_start'],
report.writer_data['compression_time_stop']
) # type: ignore

# there was no data to write
return (
output_data['time_collect_start'], output_data['time_collect_stop'], output_data['time_collect_stop'],
output_data['time_collect_stop']
)
return (report.time_collect_start, report.time_collect_stop, report.time_collect_stop, report.time_collect_stop)