Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using bibtexparser instead of pybtex for bibtex. #3740

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 45 additions & 44 deletions pybamm/citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
# https://firedrakeproject.org/citing.html
#
import pybamm
import os
import warnings
import os
from sys import _getframe
arjxn-py marked this conversation as resolved.
Show resolved Hide resolved
from pybamm.util import import_optional_dependency


class Citations:

"""Entry point to citations management.
This object may be used to record BibTeX citation information and then register that
a particular citation is relevant for a particular simulation.
Expand Down Expand Up @@ -74,24 +72,25 @@ def read_citations(self):
"""Reads the citations in `pybamm.CITATIONS.bib`. Other works can be cited
by passing a BibTeX citation to :meth:`register`.
"""
parse_file = import_optional_dependency("pybtex.database", "parse_file")
citations_file = os.path.join(pybamm.root_dir(), "pybamm", "CITATIONS.bib")
bib_data = parse_file(citations_file, bib_format="bibtex")
for key, entry in bib_data.entries.items():
self._add_citation(key, entry)
parse_file = import_optional_dependency("bibtexparser", "parse_file")
bib_data = parse_file(citations_file)
entries = bib_data.entries
for entry in entries:
self._add_citation(entry.key, entry)

def _add_citation(self, key, entry):
"""Adds `entry` to `self._all_citations` under `key`, warning the user if a
previous entry is overwritten
"""

Entry = import_optional_dependency("pybtex.database", "Entry")
# Check input types are correct
Entry = import_optional_dependency("bibtexparser.model", "Entry")
if not isinstance(key, str) or not isinstance(entry, Entry):
raise TypeError()

# Warn if overwriting a previous citation
new_citation = entry.to_string("bibtex")
new_citation = entry
if key in self._all_citations and new_citation != self._all_citations[key]:
warnings.warn(f"Replacing citation for {key}")

Expand Down Expand Up @@ -151,23 +150,22 @@ def _parse_citation(self, key):
key: str
A BibTeX formatted citation
"""
PybtexError = import_optional_dependency("pybtex.scanner", "PybtexError")
parse_string = import_optional_dependency("pybtex.database", "parse_string")

# Parse string as a bibtex citation, and check that a citation was found
try:
# Parse string as a bibtex citation, and check that a citation was found
bib_data = parse_string(key, bib_format="bibtex")
parse_string = import_optional_dependency("bibtexparser", "parse_string")
bib_data = parse_string(key)
if not bib_data.entries:
raise PybtexError("no entries found")
raise Exception("no entries found")

# Add and register all citations
for key, entry in bib_data.entries.items():
for entry in bib_data.entries:
# Add to _all_citations dictionary
self._add_citation(key, entry)
self._add_citation(entry.key, entry)
# Add to _papers_to_cite set
self._papers_to_cite.add(key)
self._papers_to_cite.add(entry.key)
return
except PybtexError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does bibtexparser also have some attribute corresponding to PybtexError?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it doesn't. The only thing close to exception handling in bibtexparser is this.

# Unable to parse / unknown key
except Exception:
raise KeyError(f"Not a bibtex citation or known citation: {key}")

def _tag_citations(self):
Expand All @@ -179,10 +177,10 @@ def _tag_citations(self):
for key, entry in self._citation_tags.items():
print(f"{key} was cited due to the use of {entry}")

def print(self, filename=None, output_format="text", verbose=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should remove this option currently. Is there a way to convert to BibTeX, and if there isn't, is it possible that such a way be devised?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we can convert it into bibtex.

def print(self, filename=None, verbose=False):
"""Print all citations that were used for running simulations. The verbose
option is provided to print tags for citations in the output such that it can
can be seen where the citations were registered due to the use of PyBaMM models
be seen where the citations were registered due to the use of PyBaMM models
and solvers in the code.

.. note::
Expand Down Expand Up @@ -217,9 +215,6 @@ def print(self, filename=None, output_format="text", verbose=False):
Marquis2019 was cited due to the use of SPM

"""
# Parse citations that were not known keys at registration, but do not
# fail if they cannot be parsed
pybtex = import_optional_dependency("pybtex")
try:
for key in self._unknown_citations:
self._parse_citation(key)
Expand All @@ -231,37 +226,43 @@ def print(self, filename=None, output_format="text", verbose=False):
# delete the invalid citation from the set
self._unknown_citations.remove(key)

if output_format == "text":
citations = pybtex.format_from_strings(
self._cited, style="plain", output_backend="plaintext"
)
elif output_format == "bibtex":
citations = "\n".join(self._cited)
else:
raise pybamm.OptionError(
f"Output format {output_format} not recognised."
"It should be 'text' or 'bibtex'."
)
Comment on lines -234 to -244
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not be removed unless it is necessary to do so since this counts as a breaking change. I don't mind it a lot though since this is not a commonly used feature

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing that we can do now is having an additional option like filtered_output along with bibtex. This option can print the essential hardcoded values (inside init) which might lead to data loss but we can provide much cleaner output if that's what user is more concerned about. Additionally we can also have a option raw which outputs the parsed bibtex as is (which is out only current output).

With this we can solve both problems. Not having data loss and also providing cleaner output.

citations = self._cited

if filename is None:
print(citations)
for entry in citations:
print(self._string_formatting(entry))
if verbose:
self._tag_citations() # pragma: no cover
else:
with open(filename, "w") as f:
f.write(citations)
for entry in citations:
f.write(self._string_formatting(entry))

def _string_formatting(self, entry):
bibtexparser = import_optional_dependency("bibtexparser")
if not isinstance(entry, bibtexparser.model.Entry):
raise TypeError(
"Input for string formatting must be an instance of bibtexparser.model.Entry"
)
txt_format = " "
for key, value in entry.items():
if key != "ID" and key != "ENTRYTYPE":
txt_format = txt_format + " " + str(value)
return f" {txt_format} \n"

@property
def citation_err_msg(self):
return self._citation_err_msg


def print_citations(filename=None, output_format="text", verbose=False):
def print_citations(filename=None, verbose=False):
"""See :meth:`Citations.print`"""
if citations._citation_err_msg is not None:
raise ImportError(
f"Citations could not be registered. If you are on Google Colab - "
"pybtex does not work with Google Colab due to a known bug - "
"https://bitbucket.org/pybtex-devs/pybtex/issues/148/. "
f"Citations could not be registered."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if we can test this – pybtex is quite inactive and did not work on Google Colab, but bibtexparser might work well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll try this out.

"Please manually cite all the references."
"\nError encountered -\n"
f"{citations._citation_err_msg}"
f"{citations.citation_err_msg}"
)
else:
if verbose: # pragma: no cover
Expand All @@ -270,9 +271,9 @@ def print_citations(filename=None, output_format="text", verbose=False):
"Verbose output is available only for the terminal and not for printing to files",
)
else:
citations.print(filename, output_format, verbose=True)
citations.print(filename, verbose=True)
else:
pybamm.citations.print(filename, output_format)
citations.print(filename)


citations = Citations()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ plot = [
]
# For the Citations class
cite = [
"pybtex>=0.24.0",
"bibtexparser>=2.0.0b5",
]
# Battery Parameter eXchange format
bpx = [
Expand Down
23 changes: 11 additions & 12 deletions tests/unit/test_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import unittest
import contextlib
import warnings
from pybtex.database import Entry
from bibtexparser.model import Entry
from tempfile import NamedTemporaryFile


Expand Down Expand Up @@ -47,15 +47,9 @@ def test_citations(self):
def test_print_citations(self):
pybamm.citations._reset()

# Text Style
with temporary_filename() as filename:
pybamm.print_citations(filename, "text")
with open(filename) as f:
self.assertTrue(len(f.readlines()) > 0)

# Bibtext Style
with temporary_filename() as filename:
pybamm.print_citations(filename, "bibtex")
pybamm.print_citations(filename)
with open(filename) as f:
self.assertTrue(len(f.readlines()) > 0)

Expand All @@ -64,11 +58,11 @@ def test_print_citations(self):
with contextlib.redirect_stdout(f):
pybamm.print_citations()
self.assertTrue(
"Python Battery Mathematical Modelling (PyBaMM)." in f.getvalue()
" {Python Battery Mathematical Modelling (PyBaMM)}" in f.getvalue()
)

with self.assertRaisesRegex(pybamm.OptionError, "'text' or 'bibtex'"):
pybamm.print_citations("test_citations.txt", "bad format")
# with self.assertRaisesRegex(pybamm.OptionError, "'text' or 'bibtex'"):
# pybamm.print_citations("test_citations.txt", "bad format")

pybamm.citations._citation_err_msg = "Error"
with self.assertRaisesRegex(ImportError, "Error"):
Expand Down Expand Up @@ -111,7 +105,7 @@ def test_input_validation(self):
"""Test type validation of ``_add_citation``"""
pybamm.citations.register(1)

with self.assertRaises(TypeError):
with self.assertRaises(KeyError):
pybamm.citations._parse_citation(1)

with self.assertRaises(TypeError):
Expand All @@ -120,6 +114,11 @@ def test_input_validation(self):
with self.assertRaises(TypeError):
pybamm.citations._add_citation(1001, Entry("misc"))

def test_string_formatting(self):
"""Test type validation of ``_string_formatting``"""
with self.assertRaisesRegex(TypeError, "Input for string formatting"):
pybamm.citations._string_formatting("NotAnEntry")

def test_andersson_2019(self):
citations = pybamm.citations
citations._reset()
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ def test_git_commit_info(self):

def test_import_optional_dependency(self):
with self.assertRaisesRegex(
ModuleNotFoundError, "Optional dependency pybtex is not available."
ModuleNotFoundError, "Optional dependency bibtexparser is not available."
):
pybtex = sys.modules["pybtex"]
sys.modules["pybtex"] = None
bibtexparser = sys.modules["bibtexparser"]
sys.modules["bibtexparser"] = None
Comment on lines +106 to +107
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did have to think a bit about this because it wasn't really prominent in the first read. We are using the assertRaisesRegex context manager clause to test the ModuleNotFoundError message, and pybamm.print_citations used to call the have_optional_dependency utility function for pybtex, which was removed – which means that the function is not called and subsequently an error was not raised.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the case indeed. If you feel this should be changed to some other stable optional dependency then it can also be looked into.

pybamm.print_citations()
with self.assertRaisesRegex(
ModuleNotFoundError, "Optional dependency anytree is not available."
Expand All @@ -118,8 +118,8 @@ def test_import_optional_dependency(self):
sym = pybamm.div(c * pybamm.grad(c)) + (c / d + c - d) ** 5
sym.visualise(test_name)

sys.modules["pybtex"] = pybtex
pybamm.util.import_optional_dependency("pybtex")
sys.modules["bibtexparser"] = bibtexparser
pybamm.util.import_optional_dependency("bibtexparser")
pybamm.print_citations()


Expand Down
Loading