"""
Simplified Fitting.
Uses scipy.curve_fit (no x errors) or scipy.odr (with x errors).
"""
import enum
import numpy as np
import uncertainties.unumpy as unp
from numpy.linalg import LinAlgError
from tqdm import tqdm
from smpl import data, debug, doc, functions, stat, util, wrap
from smpl.util.util import dict_unique_by_value
unv = unp.nominal_values
usd = unp.std_devs
[docs]
class Fitter(enum.Enum):
"""
Different implementations to perform a fit.
"""
AUTO = 0
SCIPY_CURVEFIT = 1
SCIPY_ODR = 2
MINUIT_LEASTSQUARES = 3
default = {
"params": [
None,
"Initial fit parameters",
],
# 'frange': [None, "Limit the fit to given range. First integer is the lowest and second the highest index.", ],
# 'fselector': [None, "Function that takes ``x`` and ``y`` as parameters and returns an array mask in order to limit the data points for fitting. Alternatively a mask for selecting elements from datax and datay.", ],
"fixed_params": [
True,
"Enable fixing parameters by choosing the same-named variables from ``kwargs``.",
],
# 'sortbyx': [True, "Enable sorting the x and y data so that x is sorted.", ],
"maxfev": [
10000,
"Maximum function evaluations during fitting.",
],
"epsfcn": [
0.0001,
"Suitable step length for jacobian approximation.",
],
"xvar": [
None,
"Variable in fit function parameters that corresponds to the x axis. If it is None the last of the alphabetical sorted parameters is used.",
],
# 'bins': [0, "Number of bins for histogram", ],
# 'binunc': [stat.poisson_dist, "Number of bins for histogram", ],
"autotqdm": [
True,
"Auto fitting display tqdm",
],
# 'xerror': [True, "enable xerrors"],
# 'yerror': [True, "enable yerrors"],
"fitter": [Fitter.AUTO, "Choose from :class:`Fitter`s."],
}
# @doc.insert_str("\tDefault kwargs\n\n\t")
[docs]
@doc.append_doc(data.data_kwargs)
@doc.append_str("\t")
@doc.append_str(doc.array_table(default, init=False))
@doc.append_str(
doc.array_table({"fit_kwargs": ["default", "description"]}, bottom=False)
)
@doc.append_str("\n\n")
def fit_kwargs(kwargs):
"""Set default :func:`fit_kwargs` if not set."""
kwargs = data.data_kwargs(kwargs)
for k, v in default.items():
if k not in kwargs:
kwargs[k] = v[0]
return kwargs
# @append_doc(default_kwargs)
[docs]
def auto(datax, datay, funcs=None, **kwargs):
"""
Automatically loop over functions and fit the best one.
Parameters
----------
funcs : function array
functions to consider as fit. Default all ``smpl.functions``.
**kwargs : optional
see :func:`fit_kwargs`.
Returns
-------
The best fit function and it's parameters and a ``lambda`` where the parameters are already applied to the function.
"""
kwargs = fit_kwargs(kwargs)
min_sq = None
best_f = None
best_ff = None
if funcs is None:
# TODO this contains aliased functions
funcs = list(dict_unique_by_value(functions.__dict__).values())
# gives weird fitting errors...
funcs.remove(functions.fac)
for f in tqdm(funcs, disable=not kwargs["autotqdm"]):
if callable(f):
try:
ff = fit(datax, datay, f, **kwargs)
fy = f(datax, *ff)
sum_sq = (
np.sum((fy - datay) ** 2)
+ np.sum((fy + usd(fy) - datay) ** 2)
+ np.sum((fy - usd(fy) - datay) ** 2)
)
except (ValueError, LinAlgError, OverflowError) as ve:
debug.msg(ve)
continue
if min_sq is None or sum_sq < min_sq:
min_sq = sum_sq
best_f = f
best_ff = ff
# if not best_f is None:
# fit(datax,datay,best_f,**kwargs)
return best_f, best_ff, lambda x: best_f(x, *best_ff)
[docs]
def fit(datax, datay, function, **kwargs):
"""
Returns a fit of ``function`` to ``datax`` and ``datay``.
Parameters
----------
datax : array_like
X data either as ``unp.uarray`` or ``np.array`` or ``list``
datay : array_like
Y data either as ``unp.uarray`` or ``np.array`` or ``list``
function : func
Fit function with parameters: ``x``, ``params``
**kwargs : optional
see :func:`fit_kwargs`.
"""
kwargs = fit_kwargs(kwargs)
x, y, xerr, yerr = fit_split(datax, datay, **kwargs)
tmp, params, fixed, Ntot = _wrap_func_and_param(function, **kwargs)
fitter = kwargs["fitter"]
if fitter is Fitter.AUTO:
if xerr is not None:
fitter = Fitter.SCIPY_ODR
else:
fitter = Fitter.SCIPY_CURVEFIT
if fitter is Fitter.MINUIT_LEASTSQUARES:
fitt = _fit_minuit_leastsquares(x, y, tmp, params=params, yerr=yerr)
elif fitter is Fitter.SCIPY_CURVEFIT:
fitt = _fit_curvefit(x, y, tmp, params=params, yerr=yerr)
elif fitter is Fitter.SCIPY_ODR:
fitt = _fit_odr(x, y, tmp, params=params, xerr=xerr, yerr=yerr)
else:
raise ValueError(f"Unknown fitter: {fitter}")
return _unwrap_param(fitt, fixed, Ntot)
def _wrap_func_and_param(function, **kwargs):
"""
Wraps a function with a lambda function.
"""
params = None
if util.has("params", kwargs):
params = kwargs["params"]
fixed = {}
vnames = wrap.get_varnames(function, kwargs["xvar"])
Ntot = len(vnames) - 1
if util.has("fixed_params", kwargs) and kwargs["fixed_params"]:
for i in range(1, len(vnames)):
if util.has(vnames[i], kwargs):
fixed[i] = kwargs[vnames[i]]
# Count parameters for function
if params is None:
N = len(vnames)
params = [1 for i in range(N - 1)]
tmp_params = []
for i, pi in enumerate(params):
if not util.has(i + 1, fixed):
tmp_params += [pi]
params = tmp_params
N = len(params)
wlambda = wrap.get_lambda(function, kwargs["xvar"])
def _wrapped_func(x0, *x):
"""
wrapper for function with fixed parameters applied.
"""
tmp_x = []
j = 1
# print(x)
for i in range(1, Ntot + 1):
# print(i," ",j)
if not util.has(i, fixed):
tmp_x += [x[j - 1]]
# print(x[j])
j = j + 1
else:
tmp_x += [fixed[i]]
# print(Ntot)
# print(tmp_x)
return unv(wlambda(x0, *tmp_x))
return _wrapped_func, params, fixed, Ntot
def _unwrap_param(fitt, fixed, Ntot):
rfit = []
j = 0
for i in range(1, Ntot + 1):
if not util.has(i, fixed):
rfit += [fitt[j]]
j = j + 1
else:
rfit += [fixed[i]]
return rfit
[docs]
@doc.insert_doc(stat.Chi2)
def Chi2(datax, datay, function, ff, **kwargs):
kwargs = fit_kwargs(kwargs)
x, y, _, yerr = fit_split(datax, datay, **kwargs)
sigmas = yerr
return stat.Chi2(y, unv(function(x, *ff)), sigmas)
[docs]
@doc.insert_doc(stat.R2)
def R2(datax, datay, function, ff, **kwargs):
kwargs = fit_kwargs(kwargs)
x, y, _, _ = fit_split(datax, datay, **kwargs)
return stat.R2(y, unv(function(x, *ff)))
[docs]
def data_split(datax, datay, **kwargs):
"""
Split data + errors
"""
return data.__data_split(datax, datay, **kwargs)
[docs]
def fit_split(datax, datay, **kwargs):
"""
Splits datax and datay into (x,y,xerr,yerr).
Parameters
----------
**kwargs : optional
see :func:`fit_kwargs`.
"""
kwargs = fit_kwargs(kwargs)
return data.filtered_data_split(datax, datay, **kwargs)
if __name__ == "__main__":
import doctest
doctest.testmod()
import uncertainties as unc
def _fit_minuit_leastsquares(datax, datay, function, yerr, params=None, **kwargs):
# everything in iminuit is done through the Minuit object, so we import it
from iminuit import Minuit
# we also need a cost function to fit and import the LeastSquares function
from iminuit.cost import LeastSquares
from iminuit.util import Matrix
# TODO check/add params
if params is None:
params = []
least_squares = LeastSquares(
datax, datay, yerr if yerr is not None else 1, function
)
m = Minuit(least_squares, *params)
m.migrad()
m.hesse()
# print(m.values)
# print(m.covariance)
# print("chi2 = ", m.fval)
# print("ndof = ", len(datax) - m.nfit)
# fix slice issue from iminuites rewritten __getitem__ by using super
return unc.correlated_values(m.values, super(Matrix, m.covariance))
import warnings
from scipy import optimize
from scipy.odr import ODR, Model, RealData
# fittet ein dataset mit gegebenen x und y werten, eine funktion und ggf. anfangswerten und y-Fehler
# gibt die passenden parameter der funktion, sowie dessen unsicherheiten zurueck
#
# https://stackoverflow.com/questionsquestions/14581358/getting-standard-errors-on-fitted-parameters-using-the-optimize-leastsq-method-i#
# Updated on 4/6/2016
# User: https://stackoverflow.com/users/1476240/pedro-m-duarte
def _fit_curvefit(datax, datay, function, params=None, yerr=None, **kwargs):
try:
pfit, pcov = optimize.curve_fit(
function,
datax,
datay,
p0=params,
sigma=yerr,
epsfcn=util.get("epsfcn", kwargs, 0.0001),
**kwargs,
maxfev=util.get("maxfev", kwargs, 10000),
)
except RuntimeError as e:
debug.msg(str(e))
return params
error = []
for i in range(len(pfit)):
try:
error.append(np.absolute(pcov[i][i]) ** 0.5)
except Exception as e:
warnings.warn(str(e))
error.append(0.00)
# print(pcov)
# restore cov: unc.covariance_matrix([*ff])
return unc.correlated_values(pfit, pcov)
# https://stackoverflow.com/a/52592811
def _fit_odr(datax, datay, function, params=None, yerr=None, xerr=None):
model = Model(lambda p, x: function(x, *p))
realdata = RealData(datax, datay, sy=yerr, sx=xerr)
odr = ODR(realdata, model, beta0=params)
out = odr.run()
# This was the old wrong way! Now use correct co. matrix through unc-package!
# Note Issues on scipy odr and curve_fit, regarding different definitions/namings of standard deviation or error and covaraince matrix
# https://github.com/scipy/scipy/issues/6842
# https://github.com/scipy/scipy/pull/12207
# https://stackoverflow.com/questions/62460399/comparison-of-curve-fit-and-scipy-odr-absolute-sigma
return unc.correlated_values(out.beta, out.cov_beta)