"""
This is the `qpcr.Calibrator` class that is able to compute qPCR amplification efficiencies from `qpcr.Assay` objects.
Amplification Efficiencies in ``qpcr``
------------------------
By default ``qpcr`` sets the amplification efficiency of a new ``qpcr.Assay`` to ``1`` (100%). However, they can be set to any percentage (also > 1) using the ``efficiency`` method of the ``qpcr.Assay``.
``qpcr`` stores the efficiency as percentage but actually calculates with :math:`2 \ \cdot \ efficiency` when computing Delta-Ct values.
Assigning an assay's efficiency
------
The ``qpcr.Calibrator`` is dedicated to either computing the amplification efficiency of an assay or assigning existing effiencies that have been calculated elsewhere.
In order to compute new efficiencies the Calibrator requires a set of *decorated replicates* that come from a **dilution series**. You can check out `this tutorial <https://github.com/NoahHenrikKleinschmidt/qpcr/blob/main/Examples/9_custom_efficiencies.ipynb>`_ to learn more.
If appropriate data is available we can use the ``qpcr.Calibrator`` to compute and assign new efficiencies by:
.. code-block:: python
calibrator = qpcr.Calibrator()
assay = calibrator.calibrate( assay )
This will read the data, perform a linear regression to determine the efficiency, and assign the computed efficiency to the assay. Also, the efficiency is now stored by the Calibrator.
The stored efficiencies can be easily saved to a file using:
.. code-block:: python
calibrator.save( "my_efficiencies.csv" )
We usually do not have a dilution series in each of our datasets, however. So most often you will wish to assign efficiencies that have been already computed from other qPCR runs.
For these cases, the Calibrator can read a reference "database file" and assign existing efficiencies to new Assays as long as their ``id`` is present among the reference efficiencies.
.. code-block:: python
# read already existing efficiencies from a file
calibrator.load( "my_efficiencies.csv" )
# and now simple assign an existing efficiency to the assay
assays = calibrator.assign( assay )
If we have both assays with existing efficiencies and such with new dilution series data, we can actually just use the Calibrator's ``pipe`` method to process all of them at once.
.. code-block:: python
many_assays = [ ... ]
calibrator.load( "my_efficiencies.csv" )
# pipe all assays, which will assign where possible, and calibrate anew where necessary (and data is available)
many_assays = calibrator.pipe( many_assays )
# finally, save the (now updated) database of efficiencies
# this will by default update the file "my_efficiencies.csv" which was already loaded.
calibrator.save()
"""
import qpcr.defaults as defaults
import qpcr._auxiliary as aux
import qpcr._auxiliary.warnings as aw
import pandas as pd
import numpy as np
import scipy.stats as scistats
from qpcr.Curves import EfficiencyCurve
from qpcr.main import Assay
logger = aux.default_logger()
[docs]class Calibrator(aux._ID):
"""
Calculates qPCR primer efficiency based on a dilution series.
The dilution series may either be represented as an entire assay
or as a subset of groups within an assay denoted as `calibrator : {some_name}`.
In this mode, calibrator replicates will be removed after calibration is done.
It is possible to specify the dilution steps directly in the groupnames as:
`calibrator: {some_name}: dil` where `dil` is the inverse dilution step, e.g.
`calibrator: my_sample: 2` for a `1 : 2` dilution or `calibrator: my_sample: 100`
for a `1 : 100`. Note, this will have to be present in **each** groupname!
Alternatively, if no dilution is specified in the groupnames or they cannot be inferred
for some other reason, it is possible to supply a dilution step via
the `qpcr.Calibrator.dilution` method.
"""
__slots__ = ["_id", "_loaded_file", "_eff_dict", "_computed_values", "_orig_dilution", "_manual_dilution_set"]
def __init__(self):
super().__init__()
self._eff_dict = {} # stores the efficiencies as assay_id : efficiency
self._computed_values = {} # stores newly computed efficiency data as:
# assay_id : EfficiencyCurve(...)
# The EfficiencyCurve object stores:
# - dilutions
# - Ct values
# - the Linreg model
self._dilution_step = None # the dilution step(s) used
self._manual_dilution_set = False
self._orig_dilution = None
self._loaded_file = None
def __str__(self):
effs = str(pd.DataFrame(self._eff_dict))
_length = len(effs.split("\n")[0])
s = f"""
{"-" * _length}
{self.__class__.__name__}:\t{self._id}
Loaded File:\t{self._loaded_file}
""".strip()
if self._manual_dilution_set:
s = f"{s}\nDilution:\t{self._orig_dilution}"
s = f""""{s}\n{"-" * _length}\n{effs}\n{"-" * _length}"""
return s
def __repr__(self):
file = self._loaded_file
effs = self._eff_dict
return f"{self.__class__.__name__}({file=}, {effs=})"
[docs] def save(self, filename: str = None, mode: str = "write"):
"""
Saves the calculated efficiencies to a `csv` file.
Parameters
-------
filename : str
The filepath in which to store the efficiencies. If a file
was already loaded then by default the same file will be
used to save values again.
mode : str
Can be either `"write"` to fully overwrite an existing file,
with the newly computed data, or `"append"` to only add newly
computed efficiencies.
"""
if filename is None and self._loaded_file is not None:
filename = self._loaded_file
if mode == "write":
self._save(filename, self._eff_dict)
elif mode == "append":
current = self._load(filename)
new = {**current, **self._eff_dict}
self._save(filename, new)
else:
e = aw.CalibratorError("unknown_savemode", mode=mode)
logger.info(e)
[docs] def load(self, filename, merge: bool = True, supersede: bool = False):
"""
Loads a `csv` file of previously computed efficiencies.
Parameters
-------
filename : str
The filepath to load efficiencies from.
merge : bool
In case efficiencies are already loaded, merge the
new and existing ones. If `False` the current ones will
be replaced completely.
supersede : bool
In case efficiencies of the same assay are already loaded
they will be overwritten by the newly incoming ones if `supersede = True`.
"""
try:
current = self._load(filename)
if merge:
current = {**self._eff_dict, **current} if supersede else {**current, **self._eff_dict}
self.adopt(current)
self._loaded_file = filename
return current
except Exception as e:
logger.error(e)
e = aw.CalibratorError("unknown_filetype", filename=filename)
logger.critical(e)
raise e
[docs] def get(self, which="efficiencies"):
"""
Returns
-------
dict
Either the stored efficiencies (if
`which = "efficiencies"`) or
the computed values of newly computed
efficiencies (if `which = "values"`).
"""
if which == "efficiencies":
return self.efficiencies
elif which == "values":
return self.computed_values
[docs] def merge(self, *filenames, outfile=None, adopt=True):
"""
Merges multiple efficiency files together into a single one.
Parameters
-------
filenames : iterable
Filepaths to load data from which should be merged together.
outfile : str
The filepath in which to store the merged efficiencies.
Not saved if set to `None`.
adopt : bool
Will adopt the merged dictionary as its own if `True` (default).
Returns
-------
all_effiencies : dict
The merged dictionary of all efficiencies from all files.
"""
all_efficiencies = {}
for filename in filenames:
new = self._load(filename)
all_efficiencies = {**all_efficiencies, **new}
if outfile is not None:
self._save(outfile, all_efficiencies)
if adopt:
self.adopt(all_efficiencies)
return all_efficiencies
[docs] def reset(self):
"""
Resets the Calibrator to initial settings. This will
clear all stored efficiency values and computed data!
"""
self.__init__()
return self
[docs] def clear(self):
"""
Will clear all stored efficiency values and computed data.
"""
self._eff_dict = {}
self._computed_values = {}
return self
[docs] def adopt(self, effs: dict):
"""
Adopts an externally generated dictionary of `assay : efficiency`
structure as its own.
Parameters
-------
effs : dict
A dictionary where keys are Assay Ids (`str`)
and values are `float` efficiencies.
"""
if aux.same_type(effs, {}):
self._eff_dict = effs
else:
aw.SoftWarning("Calibrator:cannot_adopt", effs=effs, eff_type=type(effs).__name__)
return self
[docs] def dilution(self, step: float or np.ndarray or tuple = None):
"""
Gets or sets the dilution steps used. This must be a `float` fraction
e.g. `0.5` for a `1 : 2` dilution series or `0.1` for a `1 : 10` series etc.
If there are multiple steps because there is a gap in the dilution series. It is
necessary to supply a step for each group individually e.g. `[1,0.5,0.25,0.0625,0.03125]`.
if there are 5 dilution steps (originally six but 0.125 was discarded).
Note, both of the above also work with the inverse dilutions e.g. `2` or `[1,2,4,16,32]`.
By default the `qpcr.Calibrator` tries to infer the dilutions automatically.
This only works, however, if the calibrator groupnames specify `calibrator: {some name} : dil` where
`dil` is the inverse dilution step (e.g. `calibrator: my_sample: 2` for a `1 : 2` dilution). Note,
it is important that the dilution step is given as the inverse (i.e. *not* as `1:2 or 1/2` or something else! )
Parameters
----------
step : float or np.ndarray
The dilution step used.
Returns
-------
dilution : float or np.ndarray
The currently used dilution step.
"""
dilution = self._dilution(step)
self._orig_dilution = self._dilution_step
# if the dilution is now set to a valid
# value we set the _manual_dilution_set check to True
if dilution is not None:
self._manual_dilution_set = True
return dilution
[docs] def pipe(self, assay: Assay, remove_calibrators: bool = True, ignore_uncalibrated: bool = False):
"""
A wrapper for calibrate / assign.
This method will first try to assign pre-computed efficiencies
and if no matching ones are found it will try to calculate a new efficiency
from the assay.
Parameters
----------
assay : qpcr.Assay
A `qpcr.Assay` object.
remove_calibrators : bool
If calibrators are present in the assay alongside other groups,
remove the calibrator replicates after assignment or efficiency calculation.
ignore_uncalibrated : bool
If `True` assays that could neither be newly calibrated nor be assigned an existing
efficiency will be ignored. Otherwise, and error will be raised.
Returns
-------
assay : qpcr.Assay
The now calibrated `qpcr.Assay`.
"""
if isinstance(assay, list):
return [self.pipe(A, remove_calibrators=remove_calibrators, ignore_uncalibrated=ignore_uncalibrated) for A in assay]
if self._eff_dict != {}:
# first try to assign (will leave the assay unchanged if nothing is found)
eff = self._get_efficiency(assay)
if eff is not None:
assay = self.assign(assay, remove_calibrators=remove_calibrators)
else:
try:
assay = self.calibrate(assay, remove_calibrators=remove_calibrators)
except Exception as e:
logger.info(e)
e = aw.CalibratorError("cannot_process_assay", id=assay.id())
if not ignore_uncalibrated:
logger.critical(e)
raise e
else:
logger.info(e)
else:
try:
assay = self.calibrate(assay, remove_calibrators=remove_calibrators)
except Exception as e:
logger.info(e)
e = aw.CalibratorError("cannot_process_assay", id=assay.id())
if not ignore_uncalibrated:
logger.critical(e)
raise e
else:
logger.info(e)
return assay
[docs] def calibrate(self, assay: Assay, remove_calibrators: bool = True):
"""
Computes an efficiency from an `qpcr.Assay` object.
This method will try to compute a new efficiency. To do this, it will check autonomously if
`calibrator : {}` replicates are present and use these for computation. If none are
found it will assume the entire assay is to be used as calibrator.
Note
----
Calibrators are searched for through the group `names` not the replicate ids!
Parameters
----------
assay : qpcr.Assay
A `qpcr.Assay` object.
remove_calibrators : bool
If calibrators are present in the assay alongside other groups,
remove the calibrator replicates after efficiency calculation.
"""
if isinstance(assay, list):
return [self.calibrate(assay=i, remove_calibrators=remove_calibrators) for i in assay]
# get the assay's groupnames and check for the calibrator prefix.
names = assay.names(as_set=False).unique()
# check if any groups are declared as calibrators
calibrators = np.array([self._has_calibrator_prefix(i) for i in names])
has_calibrators = any(calibrators)
# now get the relevant dataframe for the computation
# this will either be the entire df (if no calibrator groups are present)
# or just the subset of calibrators
df = assay.get()
if has_calibrators:
df = self._subset_calibrators(names, calibrators, df)
# get Ct column name
ct_name = defaults.raw_col_names[1]
# drop NaN cols as they are incompatible with linregress anyway...
df = df[df[ct_name] == df[ct_name]]
# now sort the dataframe by Ct values as they need to strictly
# increase for dilution series.
df = df.sort_values(ct_name).reset_index()
df = df.rename(columns={"index": "orig_index"})
# now generate dilution steps ( i.e. "concentrations" )
# to do that we first need to check if dilutions have not
# been supplied, and then try to infer them based on the groupnames
# if we got an input for dilution() we use that to generate a
# dilution steps array...
# NOTE: The non-log-scaled dilutions are now stored in self._dilution_steps
# while the log-scaled versions are returned. Hence, the dilutions
# variable below is the log-scaled version!
if not self._manual_dilution_set:
dilutions = self._infer_dilution_steps(df)
else:
dilutions = self._generate_dilution_steps(df)
# now interpolate a line through the log dilutions and the ct values
cts = df[ct_name].to_numpy()
regression_line = scistats.linregress(x=dilutions, y=cts)
# and now compute the efficiency from the regression line
efficiency = self._compute_efficiency(regression_line)
# and now assign the efficiency to the assay
assay.efficiency(efficiency)
# save the efficiency in self._eff_dict
# self._eff_dict.update( { assay.id() : assay.efficiency() } )
self._eff_dict[assay.id()] = assay.efficiency()
# and, finally, save the computed values and the efficiency
self._save_computation(assay, dilutions, cts, regression_line)
# now remove the calibrators from the assay
# but only do so in case there were calibrators found!
# If the entire assay was used, we do not delete the entries...
if has_calibrators and remove_calibrators:
self._remove_calibrators(assay, df)
return assay
[docs] def assign(self, assay: Assay, remove_calibrators: bool = True):
"""
Assigns an efficiency to an `qpcr.Assay` based on its Id.
This requires that an efficiency corresponding to the Assay's Id
is present in the currently loaded / computed effiencies.
Parameters
----------
assay : qpcr.Assay
A `qpcr.Assay` object.
remove_calibrators : bool
If calibrators are present in the assay alongside other groups,
remove the calibrator replicates.
"""
eff = self._get_efficiency(assay)
if eff is not None:
# set assay's efficiency
assay.efficiency(eff)
# check if any groups are declared as calibrators
# that should be removed from the df
names = assay.names(as_set=False).unique()
calibrators = np.array([self._has_calibrator_prefix(i) for i in names])
has_calibrators = any(calibrators)
if has_calibrators and remove_calibrators:
df = assay.get()
df = self._subset_calibrators(names, calibrators, df)
df = df.sort_values(defaults.raw_col_names[1]).reset_index()
df = df.rename(columns={"index": "orig_index"})
self._remove_calibrators(assay, df)
else:
aw.SoftWarning("Calibrator:could_not_assign", id=assay.id())
return assay
[docs] def plot(self, mode: str = None, **kwargs):
"""
A shortcut to call a `qpcr.Plotters.EfficiencyLines` plotter
to visualise the regression lines from de novo efficiency computations.
Parameters
-------
mode : str
The plotting mode. May be either "static" (matplotlib) or "interactive" (plotly).
**kwargs
Any additional keyword arguments to be passed to the plotter.
Returns
-------
fig : plt.figure or plotly.figure
The figure generated by `EfficiencyLines`.
"""
import qpcr.Plotters as Plotters
plotter = Plotters.EfficiencyLines(mode=mode)
plotter.link(self)
fig = plotter.plot(**kwargs)
return fig
def __qplot__(self, **kwargs):
return self.plot
@property
def efficiencies(self):
"""
Returns
------
dict
The currently stored efficienies.
"""
return self._eff_dict
@property
def computed_values(self):
"""
Returns
------
dict
The currently stored values from newly
computed efficiencies.
"""
return self._computed_values
def _dilution(self, step=None):
"""
The functional core of self.dilution() the only difference is
that self.dilution also sets a boolean attribute self._manual_dilution_set to True...
Which signals that the manually supplied dilutions should be used rather than that
they should be inferred from the dataset...
"""
if step is not None:
unknown_datatype = not isinstance(step, (float, int, np.ndarray, pd.Series, tuple, list))
if unknown_datatype:
aw.HardWarning("Calibration:cannot_interpret_dilution", step=step, step_type=type(step).__name__)
# check if we need to convert to numpy array
# because we have an iterable without math operations support.
if isinstance(step, (tuple, list)):
step = np.array(step)
# check for an ndarray and make sure to invert if
# the dilution steps are given as 2 4 instead of
# 0.5 0.25 etc., also do the same for a single number...
is_inverse_array = isinstance(step, (np.ndarray, pd.Series)) and any(step > 1)
is_inverse_number = isinstance(step, (float, int)) and step > 1
need_inverse = is_inverse_array or is_inverse_number
if need_inverse:
step = 1 / step
# and store new steps
self._dilution_step = step
return self._dilution_step
def _remove_calibrators(self, assay, df):
"""
Drops all calibrator replicates from the dataframe of the Assay.
Note
-------
This leaves the index unchanged! Possible we might wish to also reset the
index during this step...
"""
to_remove = df["orig_index"].to_numpy()
index = np.zeros(assay.n())
for i in to_remove:
index[i] = 1
index = np.argwhere(index == 1)
index = np.squeeze(index)
assay.ignore(index, drop=True)
def _save_computation(self, assay, dilutions, ct_values, linreg):
"""
Creates a new entry in self._computed_values for the newly computed
efficiency.
"""
self._computed_values[assay.id()] = EfficiencyCurve(dilutions=dilutions, ct_values=ct_values, model=linreg, efficiency=assay.efficiency())
# also add the Id of the Assay to the _EfficiencyCurve
self._computed_values[assay.id()].id(assay.id())
def _compute_efficiency(self, regression_line):
"""
Calculates the efficiency from the regression line slope.
"""
slope = regression_line.slope
efficiency = -1 / slope
efficiency = np.exp(efficiency)
efficiency -= 1
efficiency = round(efficiency, 4)
return efficiency
def _subset_calibrators(self, names, calibrators, df):
"""
Generates a dataframe subset containing only calibrator replicates.
"""
# generate a total query formula for all found calibrators
q = names[calibrators]
q = "' or group_name == '".join(q)
q = "group_name == '" + q + "'"
df = df.query(q)
return df
def _infer_dilution_steps(self, df):
"""
Infers the dilution steps from the group names if they are specified
as `calibrator: some_name: dilution` e.g. `calibrator: mysample: 5`.
"""
try:
# get dilution steps from groupnames in format calibrator: name : dil
steps = df["group_name"].apply(lambda x: float(x.split(":")[2]))
steps = steps.to_numpy()
# preprocess to get proper format
self._dilution(steps)
# get and transform to log-scale
dilutions = self._dilution()
dilutions = np.log(dilutions)
self._reset_dilution()
return dilutions
except Exception as e:
logger.error(e)
e = aw.CalibratorError("could_not_infer_dilution")
logger.critical(e)
raise e
def _generate_dilution_steps(self, df):
"""
Generates a numpy ndarray of log-scaled dilution steps
for the calibrators.
"""
# generate steps range
# we shall use the concept of (dilution)^m to generate
# the dilution steps. To get m we use a re-anchored df["group"]
# column
self._reset_groups(df)
steps = df["group"]
counts = df["group"].value_counts(sort=False)
repeats = steps.size if isinstance(self._dilution(), float) else counts
# repeat the dilution steps to match the group replicate numbers
dilutions = np.repeat(self._dilution(), repeats)
# if we only had a single value for the dilution we
# also need to now scale the dilutions to generate the
# actual dilution scale. If we already got a dilution series
# as input, we must not do this as we otherwise doubly scale...
if isinstance(self._dilution(), float):
dilutions = dilutions**steps
# save dilutions
self._dilution(dilutions)
# and transform to log scale
dilutions = np.log(dilutions)
# and reset the diltion back to the what was
# originally set (or None from init)
self._reset_dilution()
return dilutions
def _reset_dilution(self):
"""
Resets the to the default dilution step to ensure
the same starting conditions are met for each assay
as it is passed through the Calibrator.
"""
self._dilution_step = self._orig_dilution
def _reset_groups(self, df):
"""
Resets the numeric group identifiers to start continuously from 0.
This method sets the initial group to 0 and then successively resets any
gaps to match e.g. a 0,1,3 -> 0,1,2...
"""
# get counts of each group_name in the df
counts = df["group_name"].value_counts(sort=False)
# generate new numeric identifiers for each group
new_groups = np.arange(len(counts))
# transform to match the right repeats
new_groups = np.repeat(new_groups, counts)
# and set new groups
df["group"] = new_groups
def _has_calibrator_prefix(self, string):
"""
Checks if the calibrator prefix is the start of a string
The string is a replicate group name in this case...
"""
calibrator_prefix = defaults.calibrator_prefix
return string.startswith(calibrator_prefix)
def _get_efficiency(self, assay: Assay):
"""
Returns the efficiency from the currently loaded effiencies
that corresponds to the assay's Id. Returns None if no match is present.
"""
id = assay.id()
effs = self.get()
if id not in effs.keys():
return None
else:
return effs[id]
def _load(self, filename):
"""
Loads a csv file but does not adobt the data as its own yet.
Returns a dictionary.
"""
# current = json.load( open(filename, "r") )
current = pd.read_csv(filename)
current = current.to_dict("split")["data"]
current = {id: eff for id, eff in current}
return current
def _save(self, filename, dict_to_save):
"""
Saves a dictionary to a csv file.
"""
# json.dump( dict_to_save , open(filename, "w") )
df = pd.DataFrame(dict_to_save, index=["eff"])
df = df.transpose().reset_index()
df.to_csv(filename, index=False)
__default_Calibrator__ = Calibrator()
"""The default Calibrator"""