"""Broad Balmer component fitting with BIC model selection.
Detects broad Hα and Hβ components by comparing narrow-only vs
narrow+broad models using the Bayesian Information Criterion (BIC).
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Callable
import numpy as np
from scipy.optimize import least_squares
from .constraints import ConstraintSet
from .continuum import fit_continuum
from .fitter import FitResult, LineResult, _grating_bounds, fit_lines
from .io import Spectrum, _flam_to_ujy, _ujy_to_flam
from .lines import REST_LINES_A, get_line_list, observable_lines
from .models import build_model, pixel_weight
from .resolution import resolve_R, sigma_inst_A
logger = logging.getLogger(__name__)
# Intermediate broad component velocity bounds (km/s).
# FWHM ~ 500–2000 km/s corresponds to σ_v ~ 210–850 km/s.
# Physical origin: AGN narrow-line region outflows, turbulent ISM.
# References: Heckman et al. (1981), Zakamska & Greene (2014),
# Förster Schreiber et al. (2019) for star-forming outflows.
BROAD1_SIGMA_V_LO = 210.0 # km/s — lower bound (FWHM ~ 500 km/s)
BROAD1_SIGMA_V_SEED = 420.0 # km/s — seed (FWHM ~ 1000 km/s)
BROAD1_SIGMA_V_HI = 850.0 # km/s — upper bound (FWHM ~ 2000 km/s)
# Very broad (BLR) component velocity bounds (km/s).
# FWHM ~ 2000–5000 km/s corresponds to σ_v ~ 850–2120 km/s.
# Physical origin: virial motions in the broad-line region.
# References: Greene & Ho (2005), Shen et al. (2011).
BROAD2_SIGMA_V_LO = 850.0 # km/s — lower bound (FWHM ~ 2000 km/s)
BROAD2_SIGMA_V_SEED = 1270.0 # km/s — seed (FWHM ~ 3000 km/s)
BROAD2_SIGMA_V_HI = 2120.0 # km/s — upper bound (FWHM ~ 5000 km/s)
# BIC threshold for accepting a more complex model.
BIC_DELTA_THRESHOLD = 6.0
# Broad line definitions — lines that get a broad component.
# NII_6549 and NII_6585 are included in the Ha complex so that the
# BIC comparison accounts for the full blend and prevents broad Ha
# from absorbing narrow NII flux.
BROAD_CANDIDATES = ["Ha", "HBETA", "HDELTA", "HGAMMA"]
# Lines whose narrow kinematics are tied to Ha during broad fitting
# to guard against the broad component leaking into NII.
HA_NII_COMPLEX = ["Ha", "NII_6549", "NII_6585"]
# Forbidden [OIII] lines eligible for an independent broad (outflow)
# component. Independent of the Balmer BIC test: an outflow can be
# present in [OIII] without showing in Hα and vice versa.
OIII_BROAD_CANDIDATES = ["OIII_5007", "OIII_4959"]
# Permitted He I lines eligible for an independent broad component.
# All trace the He+ recombination zone (IP ~24.6 eV), so within each
# broad tier they share kinematics (anchored on first present). Order
# preference: bright optical lines first so HEI_5877 / HEI_6680 tend
# to anchor when available.
HEI_BROAD_CANDIDATES = [
"HEI_5877", "HEI_6680", "HEI_4472", "HEI_7067",
"HEI_3889", "HEI_4027", "HEI_4145",
]
# Speed of light in km/s for velocity calculations.
_C_KMS = 2.99792458e5
# Velocity half-window for Balmer pixel mask (km/s).
_BALMER_MASK_DV = 3000.0
# Velocity half-window for [OIII] pixel mask (km/s).
_OIII_MASK_DV = 3000.0
# Velocity half-window for He I pixel mask (km/s).
_HEI_MASK_DV = 3000.0
[docs]
@dataclass
class BroadFitResult:
"""Result of broad component model selection.
Attributes
----------
best_fit : FitResult
The selected best-fit result.
selected_model : str
Model name: ``"narrow"``, ``"broad1"``, ``"broad2"``, or ``"both"``.
bic_narrow : float
Median BIC for narrow-only model (from bootstrap when used).
bic_broad1 : float
Median BIC for narrow + BROAD1 model (NaN if not attempted).
bic_broad2 : float
Median BIC for narrow + BROAD2 model (NaN if not attempted).
bic_both : float
Median BIC for narrow + BROAD1 + BROAD2 model (NaN if not attempted).
all_fits : dict[str, FitResult]
All fitted models for inspection.
bic_bootstrap : dict[str, np.ndarray]
Full BIC distributions from bootstrap iterations. Empty if
bootstrap BIC was not run.
"""
best_fit: FitResult
selected_model: str
bic_narrow: float
bic_broad1: float
bic_broad2: float
bic_both: float
all_fits: dict[str, FitResult]
bic_bootstrap: dict[str, np.ndarray] = field(default_factory=dict)
# --- Independent [OIII] outflow component selection ---
# selected variant: one of "off" | "broad1" | "broad2" | "both"
oiii_selected: str = "off"
bic_oiii_off: float = float("nan")
bic_oiii_broad1: float = float("nan")
bic_oiii_broad2: float = float("nan")
bic_oiii_both: float = float("nan")
bic_oiii_bootstrap: dict[str, np.ndarray] = field(default_factory=dict)
# --- Independent He I broad component selection ---
hei_selected: str = "off"
bic_hei_off: float = float("nan")
bic_hei_broad1: float = float("nan")
bic_hei_broad2: float = float("nan")
bic_hei_both: float = float("nan")
bic_hei_bootstrap: dict[str, np.ndarray] = field(default_factory=dict)
@property
def oiii_broad_selected(self) -> bool:
"""Convenience: True if any OIII broad component was selected."""
return self.oiii_selected != "off"
@property
def hei_broad_selected(self) -> bool:
"""Convenience: True if any HeI broad component was selected."""
return self.hei_selected != "off"
# Delegate common FitResult attributes to best_fit for API compatibility.
@property
def lines(self) -> dict:
"""Per-line results (delegates to ``best_fit.lines``)."""
return self.best_fit.lines
@property
def params(self) -> np.ndarray:
"""Best-fit parameter vector."""
return self.best_fit.params
@property
def model_flux(self) -> np.ndarray:
"""Best-fit emission-line model."""
return self.best_fit.model_flux
@property
def continuum(self) -> np.ndarray:
"""Best-fit continuum."""
return self.best_fit.continuum
@property
def residuals(self) -> np.ndarray:
"""Fit residuals."""
return self.best_fit.residuals
@property
def chi2(self) -> float:
"""Reduced chi-squared."""
return self.best_fit.chi2
@property
def spectrum(self):
"""Input spectrum."""
return self.best_fit.spectrum
@property
def line_names(self) -> list[str]:
"""Ordered line names."""
return self.best_fit.line_names
@property
def constraints(self):
"""Applied constraints."""
return self.best_fit.constraints
@property
def success(self) -> bool:
"""Whether the optimiser converged."""
return self.best_fit.success
def _compute_bic(
residuals: np.ndarray,
errors: np.ndarray,
valid: np.ndarray,
n_free: int,
) -> float:
"""Compute BIC = χ² + k·ln(N) on whitened residuals."""
r = residuals[valid] / errors[valid]
chi2 = float(np.sum(r**2))
N = int(np.sum(valid))
return chi2 + n_free * np.log(N)
def _balmer_pixel_mask(
wave_um: np.ndarray,
z: float,
dv: float = _BALMER_MASK_DV,
) -> np.ndarray:
"""Boolean mask selecting pixels within ±dv km/s of Hα and Hβ.
Parameters
----------
wave_um : np.ndarray
Observed wavelength in microns.
z : float
Source redshift.
dv : float
Velocity half-window in km/s (default 3000).
Returns
-------
np.ndarray
Boolean mask (True for Balmer pixels).
"""
mask = np.zeros(len(wave_um), dtype=bool)
for line_name in ("Ha", "HBETA"):
lam_rest_A = REST_LINES_A[line_name]
lam_obs_um = lam_rest_A * (1.0 + z) * 1e-4 # Å → µm
# ±dv km/s → wavelength window
lam_lo = lam_obs_um * (1.0 - dv / _C_KMS)
lam_hi = lam_obs_um * (1.0 + dv / _C_KMS)
mask |= (wave_um >= lam_lo) & (wave_um <= lam_hi)
return mask
def _oiii_pixel_mask(
wave_um: np.ndarray,
z: float,
dv: float = _OIII_MASK_DV,
) -> np.ndarray:
"""Boolean mask selecting pixels within ±dv km/s of [OIII] 5007 and 4959."""
mask = np.zeros(len(wave_um), dtype=bool)
for line_name in OIII_BROAD_CANDIDATES:
if line_name not in REST_LINES_A:
continue
lam_obs_um = REST_LINES_A[line_name] * (1.0 + z) * 1e-4
lam_lo = lam_obs_um * (1.0 - dv / _C_KMS)
lam_hi = lam_obs_um * (1.0 + dv / _C_KMS)
mask |= (wave_um >= lam_lo) & (wave_um <= lam_hi)
return mask
def _hei_pixel_mask(
wave_um: np.ndarray,
z: float,
candidates: list[str],
dv: float = _HEI_MASK_DV,
) -> np.ndarray:
"""Boolean mask selecting pixels within ±dv km/s of supplied He I lines."""
mask = np.zeros(len(wave_um), dtype=bool)
for line_name in candidates:
if line_name not in REST_LINES_A:
continue
lam_obs_um = REST_LINES_A[line_name] * (1.0 + z) * 1e-4
lam_lo = lam_obs_um * (1.0 - dv / _C_KMS)
lam_hi = lam_obs_um * (1.0 + dv / _C_KMS)
mask |= (wave_um >= lam_lo) & (wave_um <= lam_hi)
return mask
def _bic_bootstrap_single(
noise: np.ndarray,
spec: Spectrum,
z: float,
narrow_lines: list[str],
grating: str | None,
R: float | Callable | None,
continuum: np.ndarray,
deg: int,
narrow_fit: FitResult,
balmer_mask: np.ndarray,
variants: list[str],
sigma_factor: float = 1.0,
centroid_vmax: float = 500.0,
moving_average: bool | int = False,
tie_uv_doublets: bool = True,
tie_uv_centroids: bool = True,
tie_uv_widths: bool = True,
sigma_overrides: dict[str, tuple[float, float]] | None = None,
centroid_overrides: dict[str, tuple[float, float]] | None = None,
niv_doublet_ratio: float | None = None,
ciii_doublet_ratio: float | None = None,
oiii_variants: list[str] | None = None,
oiii_baseline_broad: str | None = None,
oiii_mask: np.ndarray | None = None,
hei_variants: list[str] | None = None,
hei_baseline_broad: str | None = None,
hei_baseline_oiii: str | None = None,
hei_mask: np.ndarray | None = None,
centroid_max_sigma: float = 1.0,
) -> dict[str, float]:
"""Run one BIC bootstrap iteration for all model variants.
Module-level function for joblib pickling.
Parameters
----------
noise : np.ndarray
Standard-normal noise vector (length = n_pix).
spec : Spectrum
Original spectrum.
z : float
Source redshift.
narrow_lines : list of str
Narrow line list.
grating, R, continuum, deg
Fitting configuration.
narrow_fit : FitResult
Narrow-only fit on real data (used to seed broad variants).
balmer_mask : np.ndarray
Boolean mask for Balmer pixels.
variants : list of str
Model variant names to fit (includes ``"narrow"``).
sigma_factor : float
Width upper-bound multiplier (default 1.0).
tie_uv_doublets : bool
Tie UV doublet kinematics (default ``False``).
sigma_overrides : dict, optional
Per-line sigma bounds in Angstroms ``{name: (lo, hi)}``.
niv_doublet_ratio : float, optional
Fixed F(1483)/F(1486) ratio for the NIV] doublet.
Returns
-------
dict[str, float]
``{variant_name: bic_value}`` for each variant.
"""
# Create perturbed spectrum.
perturbed = spec.copy()
perturbed.flux_ujy = spec.flux_ujy + noise * spec.err_ujy
results: dict[str, float] = {}
for variant in variants:
broad_type = None if variant == "narrow" else variant
try:
fit_v, _ = _fit_model_variant(
perturbed, z, narrow_lines, grating, R, continuum, deg,
broad_type=broad_type, n_boot=0,
narrow_fit=narrow_fit if broad_type is not None else None,
n_jobs=1, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average,
tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
)
except (ValueError, RuntimeError):
results[variant] = np.nan
continue
# Compute BIC on Balmer pixels only.
flam_err = _ujy_to_flam(perturbed.err_ujy, perturbed.wave_um)
flam_data = _ujy_to_flam(
perturbed.flux_ujy - fit_v.continuum, perturbed.wave_um
)
flam_model = _ujy_to_flam(fit_v.model_flux, perturbed.wave_um)
valid = perturbed.mask_valid() & balmer_mask
resid = flam_data - flam_model
if fit_v.constraints is not None:
n_free = int(np.sum(fit_v.constraints.free_mask()))
else:
n_free = len(fit_v.params)
results[variant] = _compute_bic(resid, flam_err, valid, n_free)
# ------------------------------------------------------------------
# Optional second-stage [OIII] broad BIC test. Independent of the
# Balmer comparison above: variants here are ``"oiii_off"`` and
# ``"oiii_on"``, both built on top of ``oiii_baseline_broad`` (the
# Balmer model picked in stage 1). BIC is evaluated on OIII pixels
# only so the test is sensitive to outflow signatures regardless of
# what happens in the Balmer complex.
# ------------------------------------------------------------------
if oiii_variants and oiii_mask is not None:
flam_err = _ujy_to_flam(perturbed.err_ujy, perturbed.wave_um)
for variant in oiii_variants:
# variant is one of "off"|"broad1"|"broad2"|"both"; map to
# the oiii_broad_type kwarg accepted by _fit_model_variant.
oiii_type = None if variant == "off" else variant
try:
fit_v, _ = _fit_model_variant(
perturbed, z, narrow_lines, grating, R, continuum, deg,
broad_type=oiii_baseline_broad, n_boot=0,
narrow_fit=narrow_fit,
n_jobs=1, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average,
tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
oiii_broad_type=oiii_type,
)
except (ValueError, RuntimeError):
results[variant] = np.nan
continue
flam_data = _ujy_to_flam(
perturbed.flux_ujy - fit_v.continuum, perturbed.wave_um
)
flam_model = _ujy_to_flam(fit_v.model_flux, perturbed.wave_um)
valid = perturbed.mask_valid() & oiii_mask
resid = flam_data - flam_model
if fit_v.constraints is not None:
n_free = int(np.sum(fit_v.constraints.free_mask()))
else:
n_free = len(fit_v.params)
results[variant] = _compute_bic(resid, flam_err, valid, n_free)
# ------------------------------------------------------------------
# Optional third-stage He I broad BIC test. Variants are
# ``"hei_off"`` / ``"hei_broad1"`` / ``"hei_broad2"`` / ``"hei_both"``,
# all built on top of (hei_baseline_broad, hei_baseline_oiii) — the
# winners of stages 1 and 2 — so the test is sensitive specifically
# to a broad HeI signature.
# ------------------------------------------------------------------
if hei_variants and hei_mask is not None:
flam_err = _ujy_to_flam(perturbed.err_ujy, perturbed.wave_um)
for variant in hei_variants:
hei_type = None if variant == "off" else variant
try:
fit_v, _ = _fit_model_variant(
perturbed, z, narrow_lines, grating, R, continuum, deg,
broad_type=hei_baseline_broad, n_boot=0,
narrow_fit=narrow_fit,
n_jobs=1, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average,
tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
oiii_broad_type=hei_baseline_oiii,
hei_broad_type=hei_type,
)
except (ValueError, RuntimeError):
results[variant] = np.nan
continue
flam_data = _ujy_to_flam(
perturbed.flux_ujy - fit_v.continuum, perturbed.wave_um
)
flam_model = _ujy_to_flam(fit_v.model_flux, perturbed.wave_um)
valid = perturbed.mask_valid() & hei_mask
resid = flam_data - flam_model
if fit_v.constraints is not None:
n_free = int(np.sum(fit_v.constraints.free_mask()))
else:
n_free = len(fit_v.params)
results[variant] = _compute_bic(resid, flam_err, valid, n_free)
return results
def _add_broad_lines(
line_names: list[str],
broad_type: str | None,
*,
oiii_broad_type: str | None = None,
hei_broad_type: str | None = None,
) -> list[str]:
"""Add broad Balmer, [OIII], and/or He I entries to a line list.
Parameters
----------
line_names : list of str
Base narrow line names.
broad_type : {"broad1", "broad2", "both", None}
Balmer broad variant. ``None`` skips Balmer broad lines.
oiii_broad_type : {"broad1", "broad2", "both", None}
[OIII] broad variant. ``None`` skips OIII broad lines.
Mirrors the Balmer mapping: BROAD1 = intermediate (FWHM
500–2000 km/s, outflow), BROAD2 = very broad (FWHM
2000–5000 km/s, fast wind). The ``_BROAD`` / ``_BROAD2``
suffixes trigger the matching velocity bounds in
``jwspecfit.fitter`` automatically.
Returns
-------
list of str
Extended line list with broad entries.
"""
extended = list(line_names)
if broad_type is not None:
for base in BROAD_CANDIDATES:
if base not in line_names:
continue
if broad_type in ("broad1", "both"):
bname = f"{base}_BROAD"
if bname not in extended:
extended.append(bname)
REST_LINES_A[bname] = REST_LINES_A[base]
if broad_type in ("broad2", "both"):
bname = f"{base}_BROAD2"
if bname not in extended:
extended.append(bname)
REST_LINES_A[bname] = REST_LINES_A[base]
if oiii_broad_type is not None:
for base in OIII_BROAD_CANDIDATES:
if base not in line_names:
continue
if oiii_broad_type in ("broad1", "both"):
bname = f"{base}_BROAD"
if bname not in extended:
extended.append(bname)
REST_LINES_A[bname] = REST_LINES_A[base]
if oiii_broad_type in ("broad2", "both"):
bname = f"{base}_BROAD2"
if bname not in extended:
extended.append(bname)
REST_LINES_A[bname] = REST_LINES_A[base]
if hei_broad_type is not None:
for base in HEI_BROAD_CANDIDATES:
if base not in line_names:
continue
if hei_broad_type in ("broad1", "both"):
bname = f"{base}_BROAD"
if bname not in extended:
extended.append(bname)
REST_LINES_A[bname] = REST_LINES_A[base]
if hei_broad_type in ("broad2", "both"):
bname = f"{base}_BROAD2"
if bname not in extended:
extended.append(bname)
REST_LINES_A[bname] = REST_LINES_A[base]
return extended
def _make_broad_constraints(line_names: list[str]) -> ConstraintSet:
"""Build constraints that do NOT tie broad Balmer widths to OIII."""
cs = ConstraintSet(line_names, tie_nii=True, tie_balmer_to_oiii=True)
# Override: broad lines should NOT have their widths tied.
# We handle this by customising the free_mask and apply methods.
# For now, the base ConstraintSet only ties narrow Balmer widths.
# Broad lines have "_BROAD" suffix so they won't match the tie targets.
return cs
def _fit_model_variant(
spec: Spectrum,
z: float,
line_names: list[str],
grating: str | None,
R: float | Callable | None,
continuum: np.ndarray,
deg: int,
broad_type: str | None = None,
n_boot: int = 200,
narrow_fit: FitResult | None = None,
n_jobs: int = -1,
sigma_factor: float = 1.0,
centroid_vmax: float = 500.0,
moving_average: bool | int = False,
tie_uv_doublets: bool = True,
tie_uv_centroids: bool = True,
tie_uv_widths: bool = True,
sigma_overrides: dict[str, tuple[float, float]] | None = None,
centroid_overrides: dict[str, tuple[float, float]] | None = None,
niv_doublet_ratio: float | None = None,
ciii_doublet_ratio: float | None = None,
oiii_broad_type: str | None = None,
hei_broad_type: str | None = None,
centroid_max_sigma: float = 1.0,
) -> tuple[FitResult, float]:
"""Fit a specific model variant and return (FitResult, BIC).
Parameters
----------
broad_type : str or None
``None`` for narrow-only, or ``"broad1"``, ``"broad2"``, ``"both"``.
narrow_fit : FitResult or None
Narrow-only fit results used to seed narrow line parameters
when fitting broad variants.
Returns
-------
tuple
``(FitResult, BIC)``.
"""
any_broad = (
broad_type is not None
or oiii_broad_type is not None
or hei_broad_type is not None
)
if any_broad:
fit_lines_list = _add_broad_lines(
line_names, broad_type,
oiii_broad_type=oiii_broad_type,
hei_broad_type=hei_broad_type,
)
else:
fit_lines_list = list(line_names)
# Build parameter hints from narrow-only fit to seed narrow lines.
p0_hint = None
if narrow_fit is not None and any_broad:
p0_hint = {}
nL_narrow = len(narrow_fit.line_names)
for i, name in enumerate(narrow_fit.line_names):
p0_hint[name] = (
narrow_fit.params[i], # amplitude
narrow_fit.params[nL_narrow + i], # centroid
narrow_fit.params[2 * nL_narrow + i], # sigma
)
parts = []
if broad_type is not None:
parts.append(broad_type)
if oiii_broad_type is not None:
parts.append(f"oiii_{oiii_broad_type}")
if hei_broad_type is not None:
parts.append(f"hei_{hei_broad_type}")
variant_label = "+".join(parts) if parts else "narrow"
result = fit_lines(
spec, z, grating=grating, R=R, lines=fit_lines_list, deg=deg, n_boot=n_boot,
n_jobs=n_jobs, sigma_factor=sigma_factor, centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average, tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
_label=variant_label, _p0_hint=p0_hint,
)
# Compute BIC.
flam_err = _ujy_to_flam(spec.err_ujy, spec.wave_um)
flam_data = _ujy_to_flam(spec.flux_ujy - result.continuum, spec.wave_um)
flam_model = _ujy_to_flam(result.model_flux, spec.wave_um)
valid = spec.mask_valid()
resid = flam_data - flam_model
if result.constraints is not None:
n_free = int(np.sum(result.constraints.free_mask()))
else:
n_free = len(result.params)
bic = _compute_bic(resid, flam_err, valid, n_free)
return result, bic
[docs]
def fit_with_broad(
spectrum: Spectrum,
z: float,
*,
grating: str | None = None,
R: float | Callable | None = None,
lines: list[str] | None = None,
deg: int = 2,
fit_balmer_broad: bool = False,
fit_oiii_broad: bool = False,
fit_hei_broad: bool = False,
n_boot: int = 1000,
n_boot_bic: int = 100,
n_jobs: int = -1,
snr_threshold: float = 5.0,
oiii_snr_threshold: float = 5.0,
hei_snr_threshold: float = 5.0,
bic_delta: float = BIC_DELTA_THRESHOLD,
sigma_factor: float = 1.0,
centroid_vmax: float = 500.0,
centroid_max_sigma: float = 1.0,
moving_average: bool | int = False,
tie_uv_doublets: bool = True,
tie_uv_centroids: bool = True,
tie_uv_widths: bool = True,
sigma_overrides: dict[str, tuple[float, float]] | None = None,
centroid_overrides: dict[str, tuple[float, float]] | None = None,
niv_doublet_ratio: float | None = None,
ciii_doublet_ratio: float | None = None,
_print_R: bool = True,
) -> BroadFitResult:
"""Fit emission lines with optional BIC-selected broad components.
Two independent BIC tests can be enabled:
- **Balmer broad** (``fit_balmer_broad``): tests narrow vs.
narrow + intermediate broad (FWHM 500–2000 km/s) vs.
narrow + very broad (FWHM 2000–5000 km/s) vs. narrow + both.
Gated by ``snr_threshold`` on Hα.
- **[OIII] broad** (``fit_oiii_broad``): tests baseline vs.
baseline + broad [OIII] 5007/4959 (outflow signature). Gated
by ``oiii_snr_threshold`` on [OIII] 5007 / 4959.
The two tests are independent — both can be selected, only one,
or neither.
Parameters
----------
spectrum : Spectrum
Input spectrum.
z : float
Source redshift.
grating : str, optional
Grating name.
R : float or callable, optional
Resolving power.
lines : list of str, optional
Narrow line list. If ``None``, auto-detected.
deg : int
Continuum polynomial degree.
fit_balmer_broad : bool
If ``True``, run BIC model selection for a broad Balmer
component (intermediate / very-broad / both). Default
``False`` — opt in only when you want to test for a BLR or
outflow signature on Hα/Hβ.
fit_oiii_broad : bool
If ``True``, run an independent BIC test for a broad component
on [OIII] 5007/4959. Decoupled from the Balmer test. Default
``False`` — opt in only when looking for outflow signatures.
fit_hei_broad : bool
If ``True``, run an independent BIC test for a broad component
on all observable He I lines (5877/6680/4472/...). Within
each tier all HeI broads share kinematics (anchored on first
present). Default ``False``.
n_boot : int
Number of bootstrap iterations for flux uncertainties (default 1000).
n_boot_bic : int
Number of bootstrap iterations for BIC model selection (default 100).
Set to 0 for single-point BIC (legacy behaviour).
n_jobs : int
Number of parallel jobs for bootstrap. ``-1`` uses all cores,
``1`` runs sequentially. Default ``-1``.
snr_threshold : float
Minimum Hα SNR to attempt the Balmer broad fit (default 5.0).
oiii_snr_threshold : float
Minimum [OIII] 5007 / 4959 SNR to attempt the OIII broad fit
(default 5.0).
bic_delta : float
ΔBIC threshold for accepting a more complex model (default 6.0).
Returns
-------
BroadFitResult
"""
grating = grating or spectrum.grating
R = R or spectrum.R
# Print resolving power once for the top-level broad-fitting call.
if _print_R:
R_arr = resolve_R(spectrum.wave_um, grating=grating, R=R)
R_med = float(np.median(R_arr))
R_lo, R_hi = float(np.min(R_arr)), float(np.max(R_arr))
if abs(R_hi - R_lo) < 10:
print(f"Resolving power: R = {R_med:.0f}")
else:
print(f"Resolving power: R \u2248 {R_med:.0f} (range {R_lo:.0f}\u2013{R_hi:.0f})")
# Determine narrow line list.
if lines is None:
if grating is not None:
candidate = get_line_list(grating)
else:
candidate = get_line_list("prism")
narrow_lines = observable_lines(
candidate, z, spectrum.wave_um.min(), spectrum.wave_um.max()
)
else:
narrow_lines = list(lines)
# Continuum (shared across all variants).
continuum = fit_continuum(
spectrum.wave_um, spectrum.flux_ujy, spectrum.err_ujy,
z, narrow_lines, grating=grating, R=R, deg=deg,
moving_average=moving_average,
)
# --- Phase 1: fast fits without bootstrap for BIC comparison ---
fit_narrow, bic_narrow = _fit_model_variant(
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type=None, n_boot=0, n_jobs=n_jobs,
sigma_factor=sigma_factor, centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average, tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
)
bic_b1 = np.nan
bic_b2 = np.nan
bic_both = np.nan
all_fits: dict[str, FitResult] = {"narrow": fit_narrow}
bic_boot_dist: dict[str, np.ndarray] = {}
if not fit_balmer_broad and not fit_oiii_broad:
# Narrow-only: skip every BIC stage and just return the (optionally
# bootstrapped) narrow fit.
if n_boot > 0:
fit_narrow, bic_narrow = _fit_model_variant(
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type=None, n_boot=n_boot, n_jobs=n_jobs,
sigma_factor=sigma_factor, centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average, tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
)
all_fits["narrow"] = fit_narrow
return BroadFitResult(
best_fit=fit_narrow,
selected_model="narrow",
bic_narrow=bic_narrow,
bic_broad1=bic_b1,
bic_broad2=bic_b2,
bic_both=bic_both,
all_fits=all_fits,
)
# Check Hα SNR before attempting Balmer broad fits.
ha_snr = 0.0
if "Ha" in fit_narrow.lines:
ha_snr = fit_narrow.lines["Ha"].snr
skip_balmer_bic = (not fit_balmer_broad) or (ha_snr < snr_threshold)
if skip_balmer_bic:
if fit_balmer_broad:
logger.info(
"Hα SNR=%.1f < %.1f; skipping Balmer broad fitting "
"(OIII broad still tested if requested).",
ha_snr, snr_threshold,
)
best_name = "narrow"
# Determine which Balmer variants to fit.
variants_to_fit: list[str] = ["narrow"]
if not skip_balmer_bic:
variants_to_fit.extend(["broad1", "broad2", "both"])
# --- Phase 2: BIC model selection ---
if not skip_balmer_bic and n_boot_bic > 0:
# Bootstrap BIC on Balmer pixels only.
from joblib import Parallel, delayed
from tqdm import tqdm
balmer_mask = _balmer_pixel_mask(spectrum.wave_um, z)
rng = np.random.default_rng()
noise_vectors = rng.standard_normal((n_boot_bic, len(spectrum.wave_um)))
bic_records: list[dict[str, float]] = Parallel(
n_jobs=n_jobs, return_as="generator"
)(
delayed(_bic_bootstrap_single)(
noise_vectors[i],
spectrum, z, narrow_lines, grating, R, continuum, deg,
fit_narrow, balmer_mask, variants_to_fit, sigma_factor,
centroid_vmax, moving_average, tie_uv_doublets,
tie_uv_centroids, tie_uv_widths, sigma_overrides,
centroid_overrides, niv_doublet_ratio, ciii_doublet_ratio,
centroid_max_sigma=centroid_max_sigma,
)
for i in range(n_boot_bic)
)
bic_records = list(tqdm(
bic_records, total=n_boot_bic, desc="BIC bootstrap",
))
# Aggregate into arrays and compute medians.
for variant in variants_to_fit:
bic_boot_dist[variant] = np.array(
[rec[variant] for rec in bic_records]
)
bic_medians = {v: float(np.nanmedian(bic_boot_dist[v])) for v in variants_to_fit}
bic_narrow = bic_medians.get("narrow", bic_narrow)
bic_b1 = bic_medians.get("broad1", bic_b1)
bic_b2 = bic_medians.get("broad2", bic_b2)
bic_both = bic_medians.get("both", bic_both)
# Also do a single real-data fit for each variant (for all_fits dict).
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=len(variants_to_fit)) as pool:
futures = {}
for variant in variants_to_fit:
if variant == "narrow":
continue # already have fit_narrow
broad_type = variant
futures[variant] = pool.submit(
_fit_model_variant,
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type, 0, fit_narrow, 1, sigma_factor, centroid_vmax,
moving_average, tie_uv_doublets, tie_uv_centroids, tie_uv_widths,
sigma_overrides, centroid_overrides, niv_doublet_ratio, ciii_doublet_ratio,
)
for variant, fut in futures.items():
fit_v, _ = fut.result()
all_fits[variant] = fit_v
elif not skip_balmer_bic:
# Single-point BIC (n_boot_bic=0).
from concurrent.futures import ThreadPoolExecutor
broad_variants = [v for v in variants_to_fit if v != "narrow"]
with ThreadPoolExecutor(max_workers=len(broad_variants) or 1) as pool:
bic_futures: dict[str, any] = {}
for variant in broad_variants:
fut = pool.submit(
_fit_model_variant,
spectrum, z, narrow_lines, grating, R, continuum, deg,
variant, 0, fit_narrow, n_jobs, sigma_factor, centroid_vmax,
moving_average, tie_uv_doublets, tie_uv_centroids, tie_uv_widths,
sigma_overrides, centroid_overrides, niv_doublet_ratio, ciii_doublet_ratio,
)
bic_futures[variant] = fut
for variant, fut in bic_futures.items():
fit_v, bic_v = fut.result()
all_fits[variant] = fit_v
if variant == "broad1":
bic_b1 = bic_v
elif variant == "broad2":
bic_b2 = bic_v
elif variant == "both":
bic_both = bic_v
# --- Phase 3: select best Balmer model by BIC ---
if not skip_balmer_bic:
candidates = {
"narrow": bic_narrow,
"broad1": bic_b1,
"broad2": bic_b2,
"both": bic_both,
}
candidates = {k: v for k, v in candidates.items() if np.isfinite(v)}
best_name = min(candidates, key=candidates.get)
if best_name != "narrow":
delta = bic_narrow - candidates[best_name]
if delta < bic_delta:
best_name = "narrow"
logger.info("ΔBIC=%.1f < %.1f; keeping narrow model.", delta, bic_delta)
bic_src = "median" if n_boot_bic > 0 else "single-point"
logger.info(
"BIC selection (%s): narrow=%.1f, broad1=%.1f, broad2=%.1f, both=%.1f → %s",
bic_src, bic_narrow, bic_b1, bic_b2, bic_both, best_name,
)
# --- Phase 3.5: independent [OIII] outflow BIC test ---
# Mirrors the Balmer Phase 3 logic but on OIII pixels. Variants:
# "off" / "broad1" / "broad2" / "both", with the same BROAD1 /
# BROAD2 velocity bounds as Balmer (outflow / fast wind).
oiii_selected = "off"
bic_oiii = {
"off": float("nan"),
"broad1": float("nan"),
"broad2": float("nan"),
"both": float("nan"),
}
bic_oiii_boot: dict[str, np.ndarray] = {}
baseline_broad = None if best_name == "narrow" else best_name
oiii_candidates_present = [
n for n in OIII_BROAD_CANDIDATES if n in narrow_lines
]
if fit_oiii_broad and oiii_candidates_present:
# SNR gate: use the best narrow OIII SNR from the Stage-1 baseline.
baseline_fit = all_fits.get(best_name, fit_narrow)
oiii_snr = 0.0
for cand in oiii_candidates_present:
lr = baseline_fit.lines.get(cand)
if lr is not None and lr.snr > oiii_snr:
oiii_snr = lr.snr
if oiii_snr < oiii_snr_threshold:
logger.info(
"[OIII] SNR=%.1f < %.1f; skipping OIII broad test.",
oiii_snr, oiii_snr_threshold,
)
else:
oiii_mask = _oiii_pixel_mask(spectrum.wave_um, z)
balmer_mask_for_oiii = _balmer_pixel_mask(spectrum.wave_um, z)
oiii_variants = ["off", "broad1", "broad2", "both"]
if n_boot_bic > 0:
from joblib import Parallel, delayed
from tqdm import tqdm
rng = np.random.default_rng()
noise_vectors = rng.standard_normal(
(n_boot_bic, len(spectrum.wave_um))
)
oiii_records: list[dict[str, float]] = Parallel(
n_jobs=n_jobs, return_as="generator"
)(
delayed(_bic_bootstrap_single)(
noise_vectors[i],
spectrum, z, narrow_lines, grating, R, continuum, deg,
fit_narrow, balmer_mask_for_oiii,
[], # skip Balmer variants in this pass
sigma_factor, centroid_vmax, moving_average,
tie_uv_doublets, tie_uv_centroids, tie_uv_widths,
sigma_overrides, centroid_overrides,
niv_doublet_ratio, ciii_doublet_ratio,
oiii_variants, baseline_broad, oiii_mask,
centroid_max_sigma=centroid_max_sigma,
)
for i in range(n_boot_bic)
)
oiii_records = list(tqdm(
oiii_records, total=n_boot_bic, desc="OIII BIC bootstrap",
))
for v in oiii_variants:
bic_oiii_boot[v] = np.array(
[rec.get(v, np.nan) for rec in oiii_records]
)
bic_oiii[v] = float(np.nanmedian(bic_oiii_boot[v]))
else:
# Single-point BIC on real data.
flam_err = _ujy_to_flam(spectrum.err_ujy, spectrum.wave_um)
for v in oiii_variants:
oiii_type = None if v == "off" else v
try:
fit_v, _ = _fit_model_variant(
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type=baseline_broad, n_boot=0,
narrow_fit=fit_narrow,
n_jobs=n_jobs, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average,
tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
oiii_broad_type=oiii_type,
)
except (ValueError, RuntimeError):
bic_oiii[v] = float("nan")
continue
flam_data = _ujy_to_flam(
spectrum.flux_ujy - fit_v.continuum, spectrum.wave_um
)
flam_model = _ujy_to_flam(fit_v.model_flux, spectrum.wave_um)
valid = spectrum.mask_valid() & oiii_mask
resid = flam_data - flam_model
nf = (
int(np.sum(fit_v.constraints.free_mask()))
if fit_v.constraints is not None
else len(fit_v.params)
)
bic_oiii[v] = _compute_bic(resid, flam_err, valid, nf)
# Selection: pick lowest BIC; require ΔBIC ≥ bic_delta vs "off".
finite = {k: v for k, v in bic_oiii.items() if np.isfinite(v)}
if finite:
best_oiii = min(finite, key=finite.get)
if best_oiii != "off":
delta = bic_oiii["off"] - finite[best_oiii]
if delta < bic_delta:
best_oiii = "off"
logger.info(
"[OIII] ΔBIC=%.1f < %.1f; keeping no OIII broad.",
delta, bic_delta,
)
oiii_selected = best_oiii
bic_src = "median" if n_boot_bic > 0 else "single-point"
logger.info(
"[OIII] BIC (%s): off=%.1f, broad1=%.1f, broad2=%.1f, "
"both=%.1f → %s",
bic_src, bic_oiii["off"], bic_oiii["broad1"],
bic_oiii["broad2"], bic_oiii["both"], oiii_selected,
)
oiii_broad_type_sel = None if oiii_selected == "off" else oiii_selected
# --- Phase 3.6: independent He I broad BIC test ---
# Built on top of (best_name Balmer + selected OIII broad), so the
# comparison isolates the He I broad signal. Uses HeI pixels only.
hei_selected = "off"
bic_hei = {
"off": float("nan"),
"broad1": float("nan"),
"broad2": float("nan"),
"both": float("nan"),
}
bic_hei_boot: dict[str, np.ndarray] = {}
hei_candidates_present = [
n for n in HEI_BROAD_CANDIDATES if n in narrow_lines
]
if fit_hei_broad and hei_candidates_present:
baseline_fit = all_fits.get(best_name, fit_narrow)
hei_snr = 0.0
for cand in hei_candidates_present:
lr = baseline_fit.lines.get(cand)
if lr is not None and lr.snr > hei_snr:
hei_snr = lr.snr
if hei_snr < hei_snr_threshold:
logger.info(
"He I SNR=%.1f < %.1f; skipping HeI broad test.",
hei_snr, hei_snr_threshold,
)
else:
hei_mask = _hei_pixel_mask(
spectrum.wave_um, z, hei_candidates_present,
)
balmer_mask_for_hei = _balmer_pixel_mask(spectrum.wave_um, z)
hei_variants = ["off", "broad1", "broad2", "both"]
if n_boot_bic > 0:
from joblib import Parallel, delayed
from tqdm import tqdm
rng = np.random.default_rng()
noise_vectors = rng.standard_normal(
(n_boot_bic, len(spectrum.wave_um))
)
hei_records: list[dict[str, float]] = Parallel(
n_jobs=n_jobs, return_as="generator"
)(
delayed(_bic_bootstrap_single)(
noise_vectors[i],
spectrum, z, narrow_lines, grating, R, continuum, deg,
fit_narrow, balmer_mask_for_hei,
[], # skip Balmer variants in this pass
sigma_factor, centroid_vmax, moving_average,
tie_uv_doublets, tie_uv_centroids, tie_uv_widths,
sigma_overrides, centroid_overrides,
niv_doublet_ratio, ciii_doublet_ratio,
None, None, None,
hei_variants, baseline_broad, oiii_broad_type_sel,
hei_mask,
centroid_max_sigma=centroid_max_sigma,
)
for i in range(n_boot_bic)
)
hei_records = list(tqdm(
hei_records, total=n_boot_bic, desc="HeI BIC bootstrap",
))
for v in hei_variants:
bic_hei_boot[v] = np.array(
[rec.get(v, np.nan) for rec in hei_records]
)
bic_hei[v] = float(np.nanmedian(bic_hei_boot[v]))
else:
flam_err = _ujy_to_flam(spectrum.err_ujy, spectrum.wave_um)
for v in hei_variants:
hei_type = None if v == "off" else v
try:
fit_v, _ = _fit_model_variant(
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type=baseline_broad, n_boot=0,
narrow_fit=fit_narrow,
n_jobs=n_jobs, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average,
tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
oiii_broad_type=oiii_broad_type_sel,
hei_broad_type=hei_type,
)
except (ValueError, RuntimeError):
bic_hei[v] = float("nan")
continue
flam_data = _ujy_to_flam(
spectrum.flux_ujy - fit_v.continuum, spectrum.wave_um
)
flam_model = _ujy_to_flam(fit_v.model_flux, spectrum.wave_um)
valid = spectrum.mask_valid() & hei_mask
resid = flam_data - flam_model
nf = (
int(np.sum(fit_v.constraints.free_mask()))
if fit_v.constraints is not None
else len(fit_v.params)
)
bic_hei[v] = _compute_bic(resid, flam_err, valid, nf)
finite = {k: v for k, v in bic_hei.items() if np.isfinite(v)}
if finite:
best_hei = min(finite, key=finite.get)
if best_hei != "off":
delta = bic_hei["off"] - finite[best_hei]
if delta < bic_delta:
best_hei = "off"
logger.info(
"HeI ΔBIC=%.1f < %.1f; keeping no HeI broad.",
delta, bic_delta,
)
hei_selected = best_hei
bic_src = "median" if n_boot_bic > 0 else "single-point"
logger.info(
"HeI BIC (%s): off=%.1f, broad1=%.1f, broad2=%.1f, "
"both=%.1f → %s",
bic_src, bic_hei["off"], bic_hei["broad1"],
bic_hei["broad2"], bic_hei["both"], hei_selected,
)
hei_broad_type_sel = None if hei_selected == "off" else hei_selected
# --- Phase 4: re-fit only the selected model with bootstrap ---
selected_key_parts = []
if best_name != "narrow":
selected_key_parts.append(best_name)
if oiii_broad_type_sel is not None:
selected_key_parts.append(f"oiii_{oiii_broad_type_sel}")
if hei_broad_type_sel is not None:
selected_key_parts.append(f"hei_{hei_broad_type_sel}")
selected_key = "+".join(selected_key_parts) if selected_key_parts else "narrow"
any_broad_sel = (
best_name != "narrow"
or oiii_broad_type_sel is not None
or hei_broad_type_sel is not None
)
if n_boot > 0:
broad_type = None if best_name == "narrow" else best_name
best_fit, _ = _fit_model_variant(
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type=broad_type, n_boot=n_boot,
narrow_fit=fit_narrow if any_broad_sel else None,
n_jobs=n_jobs, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average, tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
oiii_broad_type=oiii_broad_type_sel,
hei_broad_type=hei_broad_type_sel,
)
all_fits[selected_key] = best_fit
elif any_broad_sel and selected_key not in all_fits:
best_fit, _ = _fit_model_variant(
spectrum, z, narrow_lines, grating, R, continuum, deg,
broad_type=None if best_name == "narrow" else best_name,
n_boot=0,
narrow_fit=fit_narrow,
n_jobs=n_jobs, sigma_factor=sigma_factor,
centroid_vmax=centroid_vmax,
centroid_max_sigma=centroid_max_sigma,
moving_average=moving_average,
tie_uv_doublets=tie_uv_doublets,
tie_uv_centroids=tie_uv_centroids,
tie_uv_widths=tie_uv_widths,
sigma_overrides=sigma_overrides,
centroid_overrides=centroid_overrides,
niv_doublet_ratio=niv_doublet_ratio,
ciii_doublet_ratio=ciii_doublet_ratio,
oiii_broad_type=oiii_broad_type_sel,
hei_broad_type=hei_broad_type_sel,
)
all_fits[selected_key] = best_fit
return BroadFitResult(
best_fit=all_fits[selected_key],
selected_model=best_name,
bic_narrow=bic_narrow,
bic_broad1=bic_b1,
bic_broad2=bic_b2,
bic_both=bic_both,
all_fits=all_fits,
bic_bootstrap=bic_boot_dist,
oiii_selected=oiii_selected,
bic_oiii_off=bic_oiii["off"],
bic_oiii_broad1=bic_oiii["broad1"],
bic_oiii_broad2=bic_oiii["broad2"],
bic_oiii_both=bic_oiii["both"],
bic_oiii_bootstrap=bic_oiii_boot,
hei_selected=hei_selected,
bic_hei_off=bic_hei["off"],
bic_hei_broad1=bic_hei["broad1"],
bic_hei_broad2=bic_hei["broad2"],
bic_hei_both=bic_hei["both"],
bic_hei_bootstrap=bic_hei_boot,
)