Source code for smpl.fit

"""
Simplified Fitting.

Uses scipy.curve_fit (no x errors) or scipy.odr (with x errors).
"""
import enum

import numpy as np
import uncertainties.unumpy as unp
from numpy.linalg import LinAlgError
from tqdm import tqdm

from smpl import data, debug, doc, functions, stat, util, wrap

unv = unp.nominal_values
usd = unp.std_devs


[docs]class Fitter(enum.Enum): """ Different implementations to perform a fit. """ AUTO = 0 SCIPY_CURVEFIT = 1 SCIPY_ODR = 2 MINUIT_LEASTSQUARES = 3
default = { "params": [ None, "Initial fit parameters", ], # 'frange': [None, "Limit the fit to given range. First integer is the lowest and second the highest index.", ], # 'fselector': [None, "Function that takes ``x`` and ``y`` as parameters and returns an array mask in order to limit the data points for fitting. Alternatively a mask for selecting elements from datax and datay.", ], "fixed_params": [ True, "Enable fixing parameters by choosing the same-named variables from ``kwargs``.", ], # 'sortbyx': [True, "Enable sorting the x and y data so that x is sorted.", ], "maxfev": [ 10000, "Maximum function evaluations during fitting.", ], "epsfcn": [ 0.0001, "Suitable step length for jacobian approximation.", ], "xvar": [ None, "Variable in fit function parameters that corresponds to the x axis. If it is None the last of the alphabetical sorted parameters is used.", ], # 'bins': [0, "Number of bins for histogram", ], # 'binunc': [stat.poisson_dist, "Number of bins for histogram", ], "autotqdm": [ True, "Auto fitting display tqdm", ], # 'xerror': [True, "enable xerrors"], # 'yerror': [True, "enable yerrors"], "fitter": [Fitter.AUTO, "Choose from :class:`Fitter`s."], } # @doc.insert_str("\tDefault kwargs\n\n\t")
[docs]@doc.append_doc(data.data_kwargs) @doc.append_str("\t") @doc.append_str(doc.array_table(default, init=False)) @doc.append_str( doc.array_table({"fit_kwargs": ["default", "description"]}, bottom=False) ) @doc.append_str("\n\n") def fit_kwargs(kwargs): """Set default :func:`fit_kwargs` if not set.""" kwargs = data.data_kwargs(kwargs) for k, v in default.items(): if not k in kwargs: kwargs[k] = v[0] return kwargs
# @append_doc(default_kwargs)
[docs]def auto(datax, datay, funcs=None, **kwargs): """ Automatically loop over functions and fit the best one. Parameters ---------- funcs : function array functions to consider as fit. Default all ``smpl.functions``. **kwargs : optional see :func:`fit_kwargs`. Returns ------- The best fit function and it's parameters and a ``lambda`` where the parameters are already applied to the function. """ kwargs = fit_kwargs(kwargs) min_sq = None best_f = None best_ff = None if funcs is None: funcs = list(functions.__dict__.values()) for f in tqdm(funcs, disable=not kwargs["autotqdm"]): if callable(f): try: ff = fit(datax, datay, f, **kwargs) fy = f(datax, *ff) sum_sq = ( np.sum((fy - datay) ** 2) + np.sum((fy + usd(fy) - datay) ** 2) + np.sum((fy - usd(fy) - datay) ** 2) ) except (ValueError, LinAlgError, OverflowError) as ve: debug.msg(ve) continue if min_sq is None or sum_sq < min_sq: min_sq = sum_sq best_f = f best_ff = ff # if not best_f is None: # fit(datax,datay,best_f,**kwargs) return best_f, best_ff, lambda x: best_f(x, *best_ff)
[docs]def fit(datax, datay, function, **kwargs): """ Returns a fit of ``function`` to ``datax`` and ``datay``. Parameters ---------- datax : array_like X data either as ``unp.uarray`` or ``np.array`` or ``list`` datay : array_like Y data either as ``unp.uarray`` or ``np.array`` or ``list`` function : func Fit function with parameters: ``x``, ``params`` **kwargs : optional see :func:`fit_kwargs`. """ kwargs = fit_kwargs(kwargs) x, y, xerr, yerr = fit_split(datax, datay, **kwargs) tmp, params, fixed, Ntot = _wrap_func_and_param(function, **kwargs) fitter = kwargs["fitter"] if fitter is Fitter.AUTO: if xerr is not None: fitter = Fitter.SCIPY_ODR else: fitter = Fitter.SCIPY_CURVEFIT if fitter is Fitter.MINUIT_LEASTSQUARES: fitt = _fit_minuit_leastsquares(x, y, tmp, params=params, yerr=yerr) elif fitter is Fitter.SCIPY_CURVEFIT: fitt = _fit_curvefit(x, y, tmp, params=params, yerr=yerr) elif fitter is Fitter.SCIPY_ODR: fitt = _fit_odr(x, y, tmp, params=params, xerr=xerr, yerr=yerr) else: raise ValueError("Unknown fitter: {}".format(fitter)) return _unwrap_param(fitt, fixed, Ntot)
def _wrap_func_and_param(function, **kwargs): """ Wraps a function with a lambda function. """ params = None if util.has("params", kwargs): params = kwargs["params"] fixed = {} vnames = wrap.get_varnames(function, kwargs["xvar"]) Ntot = len(vnames) - 1 if util.has("fixed_params", kwargs) and kwargs["fixed_params"]: for i in range(1, len(vnames)): if util.has(vnames[i], kwargs): fixed[i] = kwargs[vnames[i]] # Count parameters for function if params is None: N = len(vnames) params = [1 for i in range(N - 1)] tmp_params = [] for i, pi in enumerate(params): if not util.has(i + 1, fixed): tmp_params += [pi] params = tmp_params N = len(params) wlambda = wrap.get_lambda(function, kwargs["xvar"]) def _wrapped_func(x0, *x): """ wrapper for function with fixed parameters applied. """ tmp_x = [] j = 1 # print(x) for i in range(1, Ntot + 1): # print(i," ",j) if not util.has(i, fixed): tmp_x += [x[j - 1]] # print(x[j]) j = j + 1 else: tmp_x += [fixed[i]] # print(Ntot) # print(tmp_x) return unv(wlambda(x0, *tmp_x)) return _wrapped_func, params, fixed, Ntot def _unwrap_param(fitt, fixed, Ntot): rfit = [] j = 0 for i in range(1, Ntot + 1): if not util.has(i, fixed): rfit += [fitt[j]] j = j + 1 else: rfit += [fixed[i]] return rfit
[docs]@doc.insert_doc(stat.Chi2) def Chi2(datax, datay, function, ff, **kwargs): kwargs = fit_kwargs(kwargs) x, y, _, yerr = fit_split(datax, datay, **kwargs) sigmas = yerr return stat.Chi2(y, unv(function(x, *ff)), sigmas)
[docs]@doc.insert_doc(stat.R2) def R2(datax, datay, function, ff, **kwargs): kwargs = fit_kwargs(kwargs) x, y, _, _ = fit_split(datax, datay, **kwargs) return stat.R2(y, unv(function(x, *ff)))
[docs]def data_split(datax, datay, **kwargs): """ Split data + errors """ return data.__data_split(datax, datay, **kwargs)
[docs]def fit_split(datax, datay, **kwargs): """ Splits datax and datay into (x,y,xerr,yerr). Parameters ---------- **kwargs : optional see :func:`fit_kwargs`. """ kwargs = fit_kwargs(kwargs) return data.filtered_data_split(datax, datay, **kwargs)
if __name__ == "__main__": import doctest doctest.testmod() import uncertainties as unc def _fit_minuit_leastsquares(datax, datay, function, yerr, params=None, **kwargs): # everything in iminuit is done through the Minuit object, so we import it from iminuit import Minuit # we also need a cost function to fit and import the LeastSquares function from iminuit.cost import LeastSquares from iminuit.util import Matrix # TODO check/add params if params is None: params = [] least_squares = LeastSquares( datax, datay, yerr if yerr is not None else 1, function ) m = Minuit(least_squares, *params) m.migrad() m.hesse() # print(m.values) # print(m.covariance) # print("chi2 = ", m.fval) # print("ndof = ", len(datax) - m.nfit) # fix slice issue from iminuites rewritten __getitem__ by using super return unc.correlated_values(m.values, super(Matrix, m.covariance)) import warnings from scipy import optimize from scipy.odr import ODR, Model, RealData # fittet ein dataset mit gegebenen x und y werten, eine funktion und ggf. anfangswerten und y-Fehler # gibt die passenden parameter der funktion, sowie dessen unsicherheiten zurueck # # https://stackoverflow.com/questionsquestions/14581358/getting-standard-errors-on-fitted-parameters-using-the-optimize-leastsq-method-i# # Updated on 4/6/2016 # User: https://stackoverflow.com/users/1476240/pedro-m-duarte def _fit_curvefit(datax, datay, function, params=None, yerr=None, **kwargs): try: pfit, pcov = optimize.curve_fit( function, datax, datay, p0=params, sigma=yerr, epsfcn=util.get("epsfcn", kwargs, 0.0001), **kwargs, maxfev=util.get("maxfev", kwargs, 10000) ) except RuntimeError as e: debug.msg(str(e)) return params error = [] for i in range(len(pfit)): try: error.append(np.absolute(pcov[i][i]) ** 0.5) except Exception as e: warnings.warn(str(e)) error.append(0.00) # print(pcov) # restore cov: unc.covariance_matrix([*ff]) return unc.correlated_values(pfit, pcov) # https://stackoverflow.com/a/52592811 def _fit_odr(datax, datay, function, params=None, yerr=None, xerr=None): model = Model(lambda p, x: function(x, *p)) realdata = RealData(datax, datay, sy=yerr, sx=xerr) odr = ODR(realdata, model, beta0=params) out = odr.run() # This was the old wrong way! Now use correct co. matrix through unc-package! # Note Issues on scipy odr and curve_fit, regarding different definitions/namings of standard deviation or error and covaraince matrix # https://github.com/scipy/scipy/issues/6842 # https://github.com/scipy/scipy/pull/12207 # https://stackoverflow.com/questions/62460399/comparison-of-curve-fit-and-scipy-odr-absolute-sigma return unc.correlated_values(out.beta, out.cov_beta)