Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allowing to compute lambda fitting on series and stop dropping PM if it is in the REE pattern #100

Merged
merged 2 commits into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/dev/contributors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ comments, testing, bug reports, or feature requests.
* `Sarah Shi <https://github.com/sarahshi>`__
* `Ondrej Lexa <https://github.com/ondrolexa>`__
* `Bob Myhill <https://github.com/bobmyhill>`__
* `Malte Mues <https://github.com/mmuesly>`__
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding your name here!

15 changes: 7 additions & 8 deletions pyrolite/geochem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, obj):
"""Custom dataframe accessor for pyrolite geochemistry."""
self._validate(obj)
self._obj = obj
self._selection_index = self._obj.columns if isinstance(self._obj, pd.DataFrame) else self._obj.index

@staticmethod
def _validate(obj):
Expand All @@ -44,8 +45,7 @@ def list_elements(self):
-------
The list will have the same ordering as the source DataFrame.
"""
fltr = self._obj.columns.isin(_common_elements)
return self._obj.columns[fltr].tolist()
return [i for i in self._selection_index if i in _common_elements]

@property
def list_isotope_ratios(self):
Expand All @@ -60,8 +60,7 @@ def list_isotope_ratios(self):
-------
The list will have the same ordering as the source DataFrame.
"""
fltr = [parse.is_isotoperatio(c) for c in self._obj.columns]
return self._obj.columns[fltr].tolist()
return [c for c in self._selection_index if parse.is_isotoperatio(c)]

@property
def list_REE(self):
Expand All @@ -76,7 +75,7 @@ def list_REE(self):
-------
The returned list will reorder REE based on atomic number.
"""
return [i for i in REE() if i in self._obj.columns]
return [i for i in REE(dropPm=("Pm" not in self._selection_index)) if i in self._selection_index]

@property
def list_REY(self):
Expand All @@ -91,7 +90,7 @@ def list_REY(self):
-------
The returned list will reorder REE based on atomic number.
"""
return [i for i in REY() if i in self._obj.columns]
return [i for i in REY(dropPm=("Pm" not in self._selection_index)) if i in self._selection_index]

@property
def list_oxides(self):
Expand All @@ -106,8 +105,8 @@ def list_oxides(self):
-------
The list will have the same ordering as the source DataFrame.
"""
fltr = self._obj.columns.isin(_common_oxides)
return self._obj.columns[fltr].tolist()
fltr = self._selection_index.isin(_common_oxides)
return self._selection_index[fltr].tolist()

@property
def list_compositional(self):
Expand Down
29 changes: 29 additions & 0 deletions pyrolite/util/lambdas/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pandas import DataFrame, Series


def b_s_x2_to_df(B, s, X2, index, names, add_uncertainties, add_X2):
lambdas = DataFrame(
B,
index=index,
columns=names,
dtype="float32",
)
if add_uncertainties:
lambdas.loc[:, [n + "_" + chr(963) for n in names]] = s
if add_X2:
lambdas["X2"] = X2
return lambdas


def b_s_x2_to_series(B, s, X2, names, add_uncertainties, add_X2):
lambdas = Series(
B,
index=names,
dtype="float32",
)
if add_uncertainties:
for i, n in enumerate(names):
lambdas.loc[n + "_" + chr(963)] = s[i]
if add_X2:
lambdas["X2"] = X2
return lambdas
101 changes: 87 additions & 14 deletions pyrolite/util/lambdas/oneill.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ..missing import md_pattern
from .eval import get_function_components, lambda_poly
from .params import parse_sigmas
from .helpers import b_s_x2_to_df, b_s_x2_to_series

logger = Handle(__name__)

Expand Down Expand Up @@ -43,6 +44,90 @@ def get_polynomial_matrix(radii, params=None):
return A


def lambdas_ONeill2016_series(
series,
radii,
params=None,
sigmas=None,
add_X2=False,
add_uncertainties=False,
**kwargs
):
r"""
This is a variant of :func:`~pyrolite.geochem.lambdas_ONeill2016`
adapted to work on a Series instead of a DataFrame.
Implementation of the original algorithm. [#ref_1]_

Parameters
-----------
series : :class:`pandas.Series`
Series of REE data, with sample analyses organised by row.
radii : :class:`list`, :class:`numpy.ndarray`
Radii at which to evaluate the orthogonal polynomial.
params : :class:`tuple`
Tuple of constants for the orthogonal polynomial.
sigmas : :class:`float` | :class:`numpy.ndarray`
Single value or 1D array of normalised observed value uncertainties
(:math:`\sigma_{REE} / REE`).
add_uncertainties : :class:`bool`
Append parameter standard errors to the dataframe.

Returns
--------
:class:`pandas.Series`

See Also
---------
:func:`~pyrolite.util.lambdas.params.orthogonal_polynomial_constants`
:func:`~pyrolite.geochem.transform.lambda_lnREE`

References
-----------
.. [#ref_1] O’Neill HSC (2016) The Smoothness and Shapes of Chondrite-normalized
Rare Earth Element Patterns in Basalts. J Petrology 57:1463–1508.
doi: `10.1093/petrology/egw047 <https://dx.doi.org/10.1093/petrology/egw047>`__

"""
assert params is not None
names, x0, func_components = get_function_components(radii, params=params)
X = np.array(func_components).T
sigmas = parse_sigmas(series.size, sigmas=sigmas)

xd = len(func_components)
rad = np.array(radii)

B = np.ones(xd) * np.nan
s = np.ones(xd) * np.nan

missing_fltr = np.isfinite(series)
numberOfPresentElemetns = missing_fltr.sum()

A = get_polynomial_matrix(rad[missing_fltr], params=params)
invA = np.linalg.inv(A)
V = np.vander(rad, xd, increasing=True).T
Z = (series.values[missing_fltr][np.newaxis, :] * V[:, missing_fltr]).sum(axis=-1)
_B = (invA @ Z.T).T
############################################################################
_sigmas = sigmas[missing_fltr]
_x = X[missing_fltr, :]
W = np.eye(_sigmas.shape[0]) * 1 / _sigmas**2 # weights
invXWX = np.linalg.inv(_x.T @ W @ _x)

est = (X[missing_fltr] @ _B.T).T # modelled values
# residuals over all rows
residuals = (series.loc[missing_fltr] - est).values
dof = (
numberOfPresentElemetns - xd
) # effective degrees of freedom (for this mising filter)
# chi-sqared as SSQ / sigmas / residual degrees of freedom
reduced_chi_squared = (residuals**2 / _sigmas**2).sum() / dof
_s = np.sqrt(reduced_chi_squared.reshape(-1, 1) * np.diag(invXWX))

B[:] = _B
s[:] = _s
return b_s_x2_to_series(B, s, reduced_chi_squared, names, add_uncertainties, add_X2)


@update_docstring_references
def lambdas_ONeill2016(
df, radii, params=None, sigmas=None, add_X2=False, add_uncertainties=False, **kwargs
Expand Down Expand Up @@ -84,15 +169,14 @@ def lambdas_ONeill2016(
names, x0, func_components = get_function_components(radii, params=params)
X = np.array(func_components).T
y = df.values
sigmas = parse_sigmas(y, sigmas=sigmas)
sigmas = parse_sigmas(y.shape[1], sigmas=sigmas)

xd = len(func_components)
rad = np.array(radii) # so we can use a boolean index

B = np.ones((y.shape[0], xd)) * np.nan
s = np.ones((y.shape[0], xd)) * np.nan
χ2 = np.ones((y.shape[0], 1)) * np.nan

md_inds, patterns = md_pattern(df)
# for each missing data pattern, we create the matrix A - rather than each row
for ind in np.unique(md_inds):
Expand Down Expand Up @@ -126,15 +210,4 @@ def lambdas_ONeill2016(
B[row_fltr, :] = _B
s[row_fltr, :] = _s
χ2[row_fltr, 0] = reduced_chi_squared

lambdas = pd.DataFrame(
B,
index=df.index,
columns=names,
dtype="float32",
)
if add_uncertainties:
lambdas.loc[:, [n + "_" + chr(963) for n in names]] = s
if add_X2:
lambdas["X2"] = χ2
return lambdas
return b_s_x2_to_df(B, s, χ2, df.index, names, add_uncertainties, add_X2)
Loading
Loading