Source code for gasfir.fitting

"""Residual functions for fitting GASFIR parameters to experimental data.

``lmfit`` is an optional dependency (``pip install gasfir[retrieval]``).
The residual functions defined here accept ``lmfit.Parameters`` objects
duck-typed — they never call any ``lmfit`` constructor themselves, so the
import is only required for type-checking tools, not at runtime.
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Callable, Optional, Union

if TYPE_CHECKING:
    import lmfit

import numpy as np
import numpy.typing as npt
import pandas

from .kernels import (
    KernelParams,
)
from .kernels import (
    get_diabatic_ionization_probability as _get_diabatic_ionization_probability,
)
from .kernels import (
    get_diabatic_ionization_probability_batch,
    get_quasi_static_rate_for_field,
    precompute_pulse_batch,
)

_log = logging.getLogger(__name__)


def ret_residual_function(
    data_Nadiabatic: pandas.DataFrame,
    uncertainty_Nadiabatic: Union[npt.NDArray[np.float64], float] = 5e-2,
    data_QS: Optional[pandas.DataFrame] = None,
    uncertainty_QS: Union[npt.NDArray[np.float64], float] = 5e-2,
    dt: float = 2,
    dT: float = 0.5,
    ret_electron_density: bool = False,
) -> Callable[[lmfit.Parameters], npt.NDArray[np.float64]]:
    """Build and return a residual callable for use with :mod:`lmfit`.

    Heavy pre-computation (pulse batch pre-computation) is done once here.
    The returned function only takes ``lmfit.Parameters`` and is cheap to
    call repeatedly during optimization.

    Args:
        data_Nadiabatic: DataFrame with columns ``"pulses"`` (laser pulse objects)
            and ``"Y"`` (target non-adiabatic ionization probabilities).
        uncertainty_Nadiabatic: Uncertainty (or per-point weight array) for the
            non-adiabatic data. Used to normalize residuals. Default is 5%.
        data_QS: Optional DataFrame with columns ``"field"`` and ``"Y"``
            (log of quasi-static ionization rates). When provided, residuals from
            both models are concatenated.
        uncertainty_QS: Uncertainty for the quasi-static data. Default is 5%.
        dt: Coarse time step (atomic units) for pulse pre-computation. Default is 2.
        dT: Fine time step (atomic units) for pulse pre-computation. Default is 0.5.
        ret_electron_density: If ``True``, return electron density instead of
            ionization probability. Default is ``False``.

    Returns:
        A callable ``residual(params) -> np.ndarray`` suitable for
        :func:`lmfit.minimize`.
    """
    _log.info("Initializing optimization engine — pre-computing pulse batch...")

    pulse_list = data_Nadiabatic.pulses.tolist()
    precomputed_tuple = precompute_pulse_batch(pulse_list, dt, dT)
    target_y = data_Nadiabatic["Y"].values

    def _residual_na(params: lmfit.Parameters) -> npt.NDArray[np.float64]:
        param_dict = params.valuesdict()
        valid_keys = KernelParams._fields
        filtered_dict = {k: v for k, v in param_dict.items() if k in valid_keys}
        kernel_params = KernelParams(**filtered_dict)
        probs = get_diabatic_ionization_probability_batch(
            kernel_params, precomputed_tuple, ret_electron_density
        )
        return (probs - target_y) / uncertainty_Nadiabatic

    if data_QS is None:
        _log.info("Using non-adiabatic probabilities only.")
        return _residual_na

    _log.info("Concatenating non-adiabatic and quasi-static residuals.")
    field_ar = data_QS["field"].to_numpy()
    QS_y = data_QS["Y"].to_numpy()

    def residuals_concat_NA_QS(params: lmfit.Parameters) -> npt.NDArray[np.float64]:
        param_dict = params.valuesdict()
        rate_qs = get_quasi_static_rate_for_field(field_ar, param_dict)
        if np.any(rate_qs <= 0):
            res_qs = np.full_like(QS_y, np.inf) / uncertainty_QS
        else:
            res_qs = (np.log(rate_qs) - QS_y) / uncertainty_QS
        return np.concatenate((_residual_na(params), res_qs))

    return residuals_concat_NA_QS


_get_diabatic_ionization_probability_vec = np.vectorize(
    _get_diabatic_ionization_probability,
    otypes=[float],
    # Exclude scalar arguments so np.vectorize only iterates over `pulse`.
    excluded={"param_dict", "dt", "dT", "ret_electron_density", "kernel_type"},
)


[docs] def residual_P( params: lmfit.Parameters, data_Nadiabatic: pandas.DataFrame, uncertainty_Nadiabatic: Union[npt.NDArray[np.float64], float] = 5e-2, dt: float = 2, dT: float = 0.5, ) -> npt.NDArray[np.float64]: """Compute normalized residuals for non-adiabatic ionization probabilities. Args: params: lmfit parameter set passed to the kernel. data_Nadiabatic: DataFrame with ``"pulses"`` and ``"Y"`` columns. uncertainty_Nadiabatic: Per-point weight or scalar uncertainty. Default is 5%. dt: Coarse time step in atomic units. Default is 2. dT: Fine time step in atomic units. Default is 0.5. Returns: Array of normalized residuals ``(model - data) / uncertainty``. """ return ( _get_diabatic_ionization_probability_vec( pulse=data_Nadiabatic.pulses.to_numpy(), param_dict=params.valuesdict(), dt=dt, dT=dT, ) - data_Nadiabatic["Y"].to_numpy() ) / uncertainty_Nadiabatic
def residual_electronDensity( params: lmfit.Parameters, data_Nadiabatic: pandas.DataFrame, uncertainty_Nadiabatic: Union[npt.NDArray[np.float64], float] = 5e-2, dt: float = 2, dT: float = 0.5, ) -> npt.NDArray[np.float64]: """Compute normalized residuals for electron density. Args: params: lmfit parameter set passed to the kernel. data_Nadiabatic: DataFrame with ``"pulses"`` and ``"Y"`` columns. uncertainty_Nadiabatic: Per-point weight or scalar uncertainty. Default is 5%. dt: Coarse time step in atomic units. Default is 2. dT: Fine time step in atomic units. Default is 0.5. Returns: Array of normalized residuals ``(model - data) / uncertainty``. """ return ( _get_diabatic_ionization_probability_vec( pulse=data_Nadiabatic.pulses.to_numpy(), param_dict=params.valuesdict(), dt=dt, dT=dT, ret_electron_density=True, ) - data_Nadiabatic["Y"].to_numpy() ) / uncertainty_Nadiabatic
[docs] def residual_QS( params: lmfit.Parameters, data_QS: pandas.DataFrame, uncertainty_QS: Union[npt.NDArray[np.float64], float] = 5e-2, ) -> npt.NDArray[np.float64]: """Compute normalized residuals for quasi-static ionization rates. Args: params: lmfit parameter set passed to the kernel. data_QS: DataFrame with ``"field"`` and ``"Y"`` columns; ``"Y"`` stores the *log* of measured ionization rates. uncertainty_QS: Per-point weight or scalar uncertainty. Default is 5%. Returns: Array of normalized residuals in log-rate space. """ rate = get_quasi_static_rate_for_field( data_QS["field"].to_numpy(), params.valuesdict() ) if np.any(rate <= 0): return np.full_like(data_QS["Y"], np.inf) / uncertainty_QS return (np.log(rate) - data_QS["Y"]) / uncertainty_QS
[docs] def get_residuals_concat_QS_NA( params: lmfit.Parameters, data_Nadiabatic: pandas.DataFrame, uncertainty_Nadiabatic: float, data_QS: pandas.DataFrame, uncertainty_QS: Union[npt.NDArray[np.float64], float] = 5e-2, dt: float = 2, dT: float = 0.5, ) -> npt.NDArray[np.float64]: """Concatenate non-adiabatic and quasi-static residuals into a single array. Convenience wrapper around :func:`residual_P` and :func:`residual_QS` for simultaneous fitting to both datasets. Args: params: lmfit parameter set. data_Nadiabatic: DataFrame with ``"pulses"`` and ``"Y"`` columns. uncertainty_Nadiabatic: Uncertainty for the non-adiabatic data. data_QS: DataFrame with ``"field"`` and ``"Y"`` columns. uncertainty_QS: Uncertainty for the quasi-static data. Default is 5%. dt: Coarse time step in atomic units. Default is 2. dT: Fine time step in atomic units. Default is 0.5. Returns: Concatenated array of residuals from both datasets. """ return np.concatenate( ( residual_P(params, data_Nadiabatic, uncertainty_Nadiabatic, dt, dT), residual_QS(params, data_QS, uncertainty_QS), ) )