"""Debiased / desparsified lasso for least squares.
Van de Geer–Bühlmann–Ritov–Dezeure (2014) confidence intervals and
p-values for high-dimensional lasso regression. Constructs an
approximate inverse Gram `Θ̂` via per-column **nodewise lassos**
(Meinshausen–Bühlmann 2006), uses it to debias the penalized
estimator, and reports asymptotic Gaussian inference on every
coordinate of `β̂_d`.
The debiased estimator is
β̂_d = β̂ + (1 / n) · Θ̂ · Xᵀ (y − X β̂)
with asymptotic distribution (under standard sparsity / restricted-
eigenvalue conditions)
√n · (β̂_d − β) ⇝ N(0, σ² · Θ̂ Σ̂ Θ̂ᵀ).
This is the one inference feature `glmnet` and `ncvreg` do not
offer; the canonical R implementation is `hdi::lasso.proj`. We
match its semantics: free function on `(X, y)` returning a result
dataclass plus a thin sklearn-style `DebiasedLassoRegressor`
wrapper.
Scope (v1): least-squares only, dense `X`. GLM (logistic / Poisson)
debiasing via the weighted-LS surrogate + Fisher information is a
planned follow-up.
References
----------
- Van de Geer, S., Bühlmann, P., Ritov, Y., Dezeure, R. (2014).
*On asymptotically optimal confidence regions and tests for
high-dimensional models.* Annals of Statistics 42(3): 1166–1202.
- Zhang, C.-H., Zhang, S. (2014). *Confidence intervals for low-
dimensional parameters in high-dimensional linear models.*
JRSS B 76(1): 217–242.
- Reid, S., Tibshirani, R., Friedman, J. (2016). *A study of error
variance estimation in lasso regression.* Statistica Sinica 26:
35–67.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
import numpy as np
from numpy.typing import NDArray
from scipy import stats
from sklearn.base import BaseEstimator, RegressorMixin
from skein_glm.estimators import (
CoxMCPRegressor,
ElasticNetRegressor,
LogisticLassoRegressor,
PoissonLassoRegressor,
)
_ACTIVE_EPS = 1e-10
# --- result dataclass -------------------------------------------------
[docs]
@dataclass
class DebiasedLassoResult:
"""Output of :func:`debiased_lasso`.
All coefficient arrays are returned on the **original (un-
standardized) feature scale**. ``Theta`` is on the standardized
scale used internally (where the nodewise lasso defaults are
dimensionless); it is exposed for inspection and reuse.
Attributes
----------
coef_debiased : ndarray (p,)
Desparsified estimator `β̂_d`.
coef_lasso : ndarray (p,)
The underlying penalized lasso fit `β̂` (also original-scale).
intercept_ : float
se : ndarray (p,)
Asymptotic standard error from the diagonal of
`σ̂² · Θ̂ Σ̂ Θ̂ᵀ / n`.
ci_lower, ci_upper : ndarray (p,)
Two-sided `(1 − alpha)` CIs.
pvalues : ndarray (p,)
Two-sided Wald p-values for `H_0: β_j = 0`.
z_scores : ndarray (p,)
`β̂_d / se`.
sigma_hat : float
Residual scale `‖y − ŷ‖ / sqrt(n − ‖β̂‖₀)`.
Theta : ndarray (p, p)
Approximate inverse Gram on the standardized scale.
lambda_main : float
lambda_nodewise : ndarray (p,)
alpha : float
CI level used.
"""
coef_debiased: NDArray[np.float64]
coef_lasso: NDArray[np.float64]
intercept_: float
se: NDArray[np.float64]
ci_lower: NDArray[np.float64]
ci_upper: NDArray[np.float64]
pvalues: NDArray[np.float64]
z_scores: NDArray[np.float64]
sigma_hat: float
Theta: NDArray[np.float64]
lambda_main: float
lambda_nodewise: NDArray[np.float64]
alpha: float
# --- helpers ----------------------------------------------------------
def _theoretical_lambda(n: int, p: int) -> float:
"""VBR theoretical scale `√(2 log p / n)`.
Dimensionless — appropriate when columns are standardized. For
the main lasso this is the *base* scale; users typically prefer
a CV-tuned `lambda_` and override the default.
"""
return float(np.sqrt(2.0 * np.log(max(p, 2)) / n))
def _fit_nodewise_column(
Xs: NDArray[np.float64],
j: int,
lambda_j: float,
max_iter: int,
tol: float,
) -> tuple[NDArray[np.float64], float]:
"""Run the j-th nodewise lasso: regress ``X_s[:, j]`` on the
remaining columns.
Returns
-------
gamma : ndarray (p - 1,)
Lasso coefficients with ``j`` deleted.
tau2 : float
VBR `τ̂_j² = ‖X_j − X_{−j} γ̂‖² / n + λ_j · ‖γ̂‖₁`. The `+ λ‖γ‖₁`
correction is the residual-norm-plus-penalty term that makes
Θ̂ exactly satisfy the row-wise KKT inversion identity (VBR
Lemma 6.2). Using the plain residual norm gives a slightly
biased Θ̂ at finite `n`.
"""
n = Xs.shape[0]
Xmj = np.delete(Xs, j, axis=1)
xj = Xs[:, j].copy()
fit = ElasticNetRegressor(
lambda_=lambda_j,
alpha=1.0,
fit_intercept=False,
standardize=False,
max_iter=max_iter,
tol=tol,
).fit(Xmj, xj)
gamma = np.asarray(fit.coef_, dtype=np.float64)
resid = xj - Xmj @ gamma
tau2 = float(np.dot(resid, resid)) / n + lambda_j * float(np.sum(np.abs(gamma)))
return gamma, tau2
def _assemble_theta_rows(
gammas: list[NDArray[np.float64]],
tau2s: NDArray[np.float64],
) -> NDArray[np.float64]:
"""Build Θ̂ row-by-row from the nodewise outputs.
Row j is `(−γ̂_{j,1}, …, 1, …, −γ̂_{j,p−1}) / τ̂_j²` with the `1`
in position j. Θ̂ is **not symmetrized** here; VBR's variance
formula uses the symmetric quadratic form `Θ̂ Σ̂ Θ̂ᵀ` so
symmetrization at this stage is unnecessary.
"""
p = len(gammas)
Theta = np.zeros((p, p), dtype=np.float64)
for j, (g, t2) in enumerate(zip(gammas, tau2s)):
row = np.empty(p, dtype=np.float64)
row[:j] = -g[:j]
row[j] = 1.0
row[j + 1 :] = -g[j:]
Theta[j, :] = row / t2
return Theta
# --- main entry point ------------------------------------------------
[docs]
def debiased_lasso(
X: Any,
y: Any,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
fit_intercept: bool = True,
standardize: bool = True,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> DebiasedLassoResult:
"""Van de Geer–Bühlmann–Ritov debiased lasso for least squares.
Parameters
----------
X : array-like (n, p)
y : array-like (n,)
lambda_ : float or None, default None
Regularisation for the **main** lasso fit. If ``None``, the
theoretical scale `√(2 log p / n)` is used on standardized
features. For best inference quality, pass a CV-tuned λ from
a prior :class:`~skein_glm.ElasticNetPathCV` fit.
lambda_nodewise : float, array-like (p,), or None, default None
Per-column λ for the **nodewise** lassos that build `Θ̂`.
Scalar broadcasts to every column; an array selects per
column. ``None`` uses `√(2 log p / n)` uniformly (the VBR
theoretical choice on standardized columns).
alpha : float, default 0.05
Two-sided CI / p-value level.
fit_intercept : bool, default True
Center `y` and `X`; intercept is recovered post-fit. The
intercept itself is **not** an inferential target — VBR
theory applies to the slopes.
standardize : bool, default True
Scale columns of `X` to unit variance before fitting. The
dimensionless `lambda_nodewise` default is calibrated to
standardized columns; turning this off without supplying
column-specific `lambda_nodewise` typically gives wrong
scale on the CIs.
max_iter, tol : int, float
Forwarded to every underlying lasso fit.
n_jobs : int or None
joblib parallelism over the `p` nodewise lassos. ``-1``
uses all cores. The main lasso is single-threaded; the
nodewise loop dominates for `p ≳ 50`.
Returns
-------
DebiasedLassoResult
Notes
-----
Memory is `O(p²)` for `Θ̂` plus `O(n p)` for the standardized
design — practical up to `p ~ 5000` on a modern laptop. For
larger `p`, the nodewise loop runtime (`p` lasso fits) is the
binding constraint rather than memory.
The asymptotic distribution requires (a) lasso consistency at
rate `‖β̂ − β‖₁ = o(1/√(log p))`, (b) row-wise sparsity of the
true `Σ⁻¹`, (c) sub-Gaussian design and noise. Failures show up
as poor coverage in the empirical CI simulation tests.
"""
from joblib import Parallel, delayed
X = np.ascontiguousarray(X, dtype=np.float64)
y = np.ascontiguousarray(y, dtype=np.float64)
if X.ndim != 2:
raise ValueError(f"X must be 2D, got shape {X.shape}")
if y.ndim != 1 or y.shape[0] != X.shape[0]:
raise ValueError(
f"y must be 1D with length {X.shape[0]}, got shape {y.shape}"
)
if not 0.0 < alpha < 1.0:
raise ValueError(f"alpha must be in (0, 1); got {alpha}")
if max_iter < 1:
raise ValueError(f"max_iter must be ≥ 1; got {max_iter}")
if tol <= 0:
raise ValueError(f"tol must be > 0; got {tol}")
n, p = X.shape
if p < 2:
raise ValueError(
f"debiased lasso requires p ≥ 2 features; got p = {p}"
)
# Center / scale.
if fit_intercept:
x_mean = X.mean(axis=0)
y_mean = float(y.mean())
else:
x_mean = np.zeros(p)
y_mean = 0.0
Xc = X - x_mean
yc = y - y_mean
if standardize:
x_scale = Xc.std(axis=0, ddof=0)
# Constant column ⇒ keep scale 1 so we don't divide by zero;
# the lasso will still drop it cleanly via the standard KKT.
x_scale = np.where(x_scale > 0, x_scale, 1.0)
else:
x_scale = np.ones(p)
Xs = Xc / x_scale
# Main lasso fit (standardized scale).
lambda_main = (
_theoretical_lambda(n, p) if lambda_ is None else float(lambda_)
)
if lambda_main <= 0:
raise ValueError(f"lambda_ must be > 0; got {lambda_main}")
main_fit = ElasticNetRegressor(
lambda_=lambda_main,
alpha=1.0,
fit_intercept=False,
standardize=False,
max_iter=max_iter,
tol=tol,
).fit(Xs, yc)
beta_hat_s = np.asarray(main_fit.coef_, dtype=np.float64)
# Nodewise lassos.
if lambda_nodewise is None:
lam_nw = np.full(p, _theoretical_lambda(n, p), dtype=np.float64)
elif np.isscalar(lambda_nodewise):
lam_nw = np.full(
p, float(lambda_nodewise), # type: ignore[arg-type]
dtype=np.float64,
)
else:
lam_nw = np.ascontiguousarray(lambda_nodewise, dtype=np.float64)
if lam_nw.shape != (p,):
raise ValueError(
f"lambda_nodewise must be scalar or shape ({p},); "
f"got {lam_nw.shape}"
)
if np.any(lam_nw <= 0):
raise ValueError("lambda_nodewise entries must be > 0")
results = Parallel(n_jobs=n_jobs)(
delayed(_fit_nodewise_column)(Xs, j, float(lam_nw[j]), max_iter, tol)
for j in range(p)
)
gammas = [g for g, _ in results]
tau2s = np.array([t for _, t in results], dtype=np.float64)
Theta = _assemble_theta_rows(gammas, tau2s) # (p, p), standardized scale
# Debiased estimator (standardized scale).
resid_s = yc - Xs @ beta_hat_s
beta_d_s = beta_hat_s + (Theta @ (Xs.T @ resid_s)) / n
# σ̂ from lasso residuals — Reid–Tibshirani–Friedman convention.
nz = int(np.sum(np.abs(beta_hat_s) > _ACTIVE_EPS))
sigma2 = float(np.dot(resid_s, resid_s)) / max(n - nz, 1)
sigma_hat = float(np.sqrt(sigma2))
# Variance: σ̂² · diag(Θ̂ Σ̂ Θ̂ᵀ) / n.
# Compute via U = X_s · Θ̂ᵀ ∈ R^{n×p}; then [Θ̂ Σ̂ Θ̂ᵀ]_jj = ‖U_j‖² / n.
# That avoids materializing the p×p Σ̂.
U = Xs @ Theta.T
diag_quad = np.einsum("ij,ij->j", U, U) / n
var_d_s = sigma2 * diag_quad / n
se_s = np.sqrt(np.maximum(var_d_s, 0.0))
# Back to original scale.
beta_d = beta_d_s / x_scale
beta_lasso = beta_hat_s / x_scale
se = se_s / x_scale
intercept = (
y_mean - float(np.dot(beta_d, x_mean)) if fit_intercept else 0.0
)
# Inference (Wald).
z_alpha = float(stats.norm.ppf(1.0 - alpha / 2.0))
safe_se = np.where(se > 0, se, 1.0)
z_scores = np.where(se > 0, beta_d / safe_se, 0.0)
pvalues = 2.0 * stats.norm.sf(np.abs(z_scores))
ci_lower = beta_d - z_alpha * se
ci_upper = beta_d + z_alpha * se
return DebiasedLassoResult(
coef_debiased=beta_d,
coef_lasso=beta_lasso,
intercept_=intercept,
se=se,
ci_lower=ci_lower,
ci_upper=ci_upper,
pvalues=pvalues,
z_scores=z_scores,
sigma_hat=sigma_hat,
Theta=Theta,
lambda_main=lambda_main,
lambda_nodewise=lam_nw,
alpha=alpha,
)
# --- sklearn-style wrapper -------------------------------------------
[docs]
class DebiasedLassoRegressor(BaseEstimator, RegressorMixin):
"""Sklearn-style facade over :func:`debiased_lasso`.
Exposes the debiased estimator as ``coef_`` / ``intercept_`` so
the result composes with sklearn `Pipeline` and metric helpers.
The VBR-specific attributes (``se_``, ``ci_lower_``,
``ci_upper_``, ``pvalues_``, ``z_scores_``, ``Theta_``,
``sigma_hat_``) live on the fitted estimator.
Parameters mirror :func:`debiased_lasso`.
Examples
--------
>>> from skein_glm import DebiasedLassoRegressor
>>> est = DebiasedLassoRegressor(random_state=0).fit(X, y)
>>> est.coef_, est.ci_lower_, est.ci_upper_, est.pvalues_
"""
coef_: NDArray[np.float64]
coef_lasso_: NDArray[np.float64]
intercept_: float
se_: NDArray[np.float64]
ci_lower_: NDArray[np.float64]
ci_upper_: NDArray[np.float64]
pvalues_: NDArray[np.float64]
z_scores_: NDArray[np.float64]
sigma_hat_: float
Theta_: NDArray[np.float64]
lambda_main_: float
lambda_nodewise_: NDArray[np.float64]
n_features_in_: int
def __init__(
self,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
fit_intercept: bool = True,
standardize: bool = True,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> None:
self.lambda_ = lambda_
self.lambda_nodewise = lambda_nodewise
self.alpha = alpha
self.fit_intercept = fit_intercept
self.standardize = standardize
self.max_iter = max_iter
self.tol = tol
self.n_jobs = n_jobs
def fit(self, X: Any, y: Any) -> "DebiasedLassoRegressor":
res = debiased_lasso(
X, y,
lambda_=self.lambda_,
lambda_nodewise=self.lambda_nodewise,
alpha=self.alpha,
fit_intercept=self.fit_intercept,
standardize=self.standardize,
max_iter=self.max_iter,
tol=self.tol,
n_jobs=self.n_jobs,
)
self.coef_ = res.coef_debiased
self.coef_lasso_ = res.coef_lasso
self.intercept_ = res.intercept_
self.se_ = res.se
self.ci_lower_ = res.ci_lower
self.ci_upper_ = res.ci_upper
self.pvalues_ = res.pvalues
self.z_scores_ = res.z_scores
self.sigma_hat_ = res.sigma_hat
self.Theta_ = res.Theta
self.lambda_main_ = res.lambda_main
self.lambda_nodewise_ = res.lambda_nodewise
self.n_features_in_ = int(res.coef_debiased.shape[0])
return self
def predict(self, X: Any) -> NDArray[np.float64]:
X = np.ascontiguousarray(X, dtype=np.float64)
return X @ self.coef_ + self.intercept_
# =====================================================================
# M5.x-b — Debiased GLM (logistic + Poisson)
# =====================================================================
#
# Extends the VBR construction from LS to canonical-link GLMs via the
# weighted-LS surrogate. The estimating equation at β̂ is
#
# score(β̂) = Xᵀ (y − μ̂(β̂))
#
# and the Fisher information at β̂ is J = (1/n) · Xᵀ W X where W is the
# diagonal of working weights — μ̂(1−μ̂) for binomial logit, μ̂ for
# Poisson log. The debiased estimator is
#
# β̂_d = β̂ + (1/n) · Θ̂ · Xᵀ (y − μ̂)
#
# with asymptotic distribution √n (β̂_d − β) ⇝ N(0, J⁻¹) and Θ̂ ≈ J⁻¹
# constructed nodewise on the **weighted design** X̃ = W^{1/2} X. This
# is the JM-extended-to-GLM construction (Van de Geer 2014 §3).
#
# Reuses :func:`_fit_nodewise_column` and :func:`_assemble_theta_rows`
# from the LS path — the math is identical once you swap `X` for `X̃`.
_GLM_WEIGHT_FLOOR = 1e-8
[docs]
@dataclass
class DebiasedGLMResult:
"""Output of :func:`debiased_logistic_lasso` / :func:`debiased_poisson_lasso`.
Same fields as :class:`DebiasedLassoResult` minus ``sigma_hat``
(GLMs have no residual scale — the noise is encoded in the working
weights `W`) and plus ``mu_fitted`` and ``family``.
Attributes
----------
coef_debiased : ndarray (p,)
coef_glm : ndarray (p,)
Underlying penalized GLM fit on the original feature scale.
intercept_ : float
Intercept of the underlying fit; not an inferential target (VBR
theory applies to slopes only).
se : ndarray (p,)
`sqrt(diag(Θ̂ X̃ᵀX̃ Θ̂ᵀ)) / n`.
ci_lower, ci_upper, pvalues, z_scores : ndarray (p,)
mu_fitted : ndarray (n,)
`μ̂` at the penalized fit. Useful for diagnostics.
Theta : ndarray (p, p)
Approximate inverse Fisher on the standardized scale.
lambda_main : float
lambda_nodewise : ndarray (p,)
alpha : float
family : str
``"binomial"`` or ``"poisson"``.
"""
coef_debiased: NDArray[np.float64]
coef_glm: NDArray[np.float64]
intercept_: float
se: NDArray[np.float64]
ci_lower: NDArray[np.float64]
ci_upper: NDArray[np.float64]
pvalues: NDArray[np.float64]
z_scores: NDArray[np.float64]
mu_fitted: NDArray[np.float64]
Theta: NDArray[np.float64]
lambda_main: float
lambda_nodewise: NDArray[np.float64]
alpha: float
family: str
def _debiased_glm_core(
Xs: NDArray[np.float64],
y: NDArray[np.float64],
beta_hat_s: NDArray[np.float64],
intercept_s: float,
mu_hat: NDArray[np.float64],
w_diag: NDArray[np.float64],
x_mean: NDArray[np.float64],
x_scale: NDArray[np.float64],
lam_nw: NDArray[np.float64],
alpha: float,
max_iter: int,
tol: float,
n_jobs: int | None,
family: str,
lambda_main: float,
) -> DebiasedGLMResult:
"""Shared GLM debiasing core — nodewise + variance + inference.
Inputs are on the **standardized** feature scale (mean-centered,
scaled to unit variance). Output is destandardized to the original
scale.
"""
from joblib import Parallel, delayed
n, p = Xs.shape
# Floor working weights to avoid degenerate columns in X̃ near
# saturation (logistic prob ≈ 0/1) or very small Poisson means.
# The asymptotic theory requires the working weights to be bounded
# away from zero; in practice tiny floors here don't move the
# answer materially when the data are well-conditioned but prevent
# blow-ups when they aren't.
w_safe = np.maximum(w_diag, _GLM_WEIGHT_FLOOR)
w_sqrt = np.sqrt(w_safe)
X_tilde = Xs * w_sqrt[:, None]
# Nodewise lassos on the weighted design.
results = Parallel(n_jobs=n_jobs)(
delayed(_fit_nodewise_column)(X_tilde, j, float(lam_nw[j]), max_iter, tol)
for j in range(p)
)
gammas = [g for g, _ in results]
tau2s = np.array([t for _, t in results], dtype=np.float64)
Theta = _assemble_theta_rows(gammas, tau2s)
# Debias: β̂_d = β̂ + (1/n) Θ̂ Xᵀ (y − μ̂). Note this is the
# *unweighted* score (the canonical-link gradient), not the
# weighted X̃ᵀ (y − μ̂).
score = Xs.T @ (y - mu_hat)
beta_d_s = beta_hat_s + (Theta @ score) / n
# Variance: diag(Θ̂ X̃ᵀX̃ Θ̂ᵀ) / n. Compute via U = X̃ Θ̂ᵀ to skip
# the (p, p) Gram materialization.
U = X_tilde @ Theta.T
var_d_s = np.einsum("ij,ij->j", U, U) / (n * n)
se_s = np.sqrt(np.maximum(var_d_s, 0.0))
# Destandardize to original feature scale.
beta_d = beta_d_s / x_scale
beta_glm = beta_hat_s / x_scale
se = se_s / x_scale
intercept = intercept_s - float(np.dot(beta_d, x_mean))
# Wald inference.
z_alpha = float(stats.norm.ppf(1.0 - alpha / 2.0))
safe_se = np.where(se > 0, se, 1.0)
z_scores = np.where(se > 0, beta_d / safe_se, 0.0)
pvalues = 2.0 * stats.norm.sf(np.abs(z_scores))
ci_lower = beta_d - z_alpha * se
ci_upper = beta_d + z_alpha * se
return DebiasedGLMResult(
coef_debiased=beta_d,
coef_glm=beta_glm,
intercept_=intercept,
se=se,
ci_lower=ci_lower,
ci_upper=ci_upper,
pvalues=pvalues,
z_scores=z_scores,
mu_fitted=mu_hat,
Theta=Theta,
lambda_main=lambda_main,
lambda_nodewise=lam_nw,
alpha=alpha,
family=family,
)
def _standardize_X(
X: NDArray[np.float64],
standardize: bool,
) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
"""Mean-center and (optionally) unit-scale columns.
Returns ``(Xs, x_mean, x_scale)``. When ``standardize=False`` we
still center; ``x_scale`` is all ones. Constant columns get
``scale=1`` so we don't divide by zero — the penalized fit will
drop them via the usual KKT.
"""
x_mean = X.mean(axis=0)
Xc = X - x_mean
if standardize:
x_scale = Xc.std(axis=0, ddof=0)
x_scale = np.where(x_scale > 0, x_scale, 1.0)
else:
x_scale = np.ones(X.shape[1], dtype=np.float64)
return Xc / x_scale, x_mean, x_scale
def _build_lambda_nodewise(
lambda_nodewise: float | NDArray[np.float64] | None,
n: int,
p: int,
) -> NDArray[np.float64]:
if lambda_nodewise is None:
return np.full(p, _theoretical_lambda(n, p), dtype=np.float64)
if np.isscalar(lambda_nodewise):
return np.full(
p, float(lambda_nodewise), # type: ignore[arg-type]
dtype=np.float64,
)
arr = np.ascontiguousarray(lambda_nodewise, dtype=np.float64)
if arr.shape != (p,):
raise ValueError(
f"lambda_nodewise must be scalar or shape ({p},); got {arr.shape}"
)
return arr
def _sigmoid(z: NDArray[np.float64]) -> NDArray[np.float64]:
"""Numerically stable sigmoid."""
out = np.empty_like(z)
pos = z >= 0
out[pos] = 1.0 / (1.0 + np.exp(-z[pos]))
e = np.exp(z[~pos])
out[~pos] = e / (1.0 + e)
return out
# --- logistic --------------------------------------------------------
[docs]
def debiased_logistic_lasso(
X: Any,
y: Any,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
fit_intercept: bool = True,
standardize: bool = True,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> DebiasedGLMResult:
"""VBR debiased lasso for binomial logistic regression.
Construction (Van de Geer 2014 §3): fit penalized logistic lasso
to obtain ``β̂``, compute ``μ̂_i = σ(η̂_i)`` and working weights
``W_ii = μ̂_i (1 − μ̂_i)``, then build the approximate inverse
Fisher ``Θ̂`` by nodewise lassos on ``X̃ = W^{1/2} X``. The
debiased estimator is
``β̂_d = β̂ + (1/n) · Θ̂ · Xᵀ (y − μ̂)``
with asymptotic variance ``diag(Θ̂ X̃ᵀX̃ Θ̂ᵀ) / n`` (no `σ²` —
the noise is encoded in `W`).
Parameters mirror :func:`debiased_lasso`. The penalized fit is
obtained from :class:`~skein_glm.LogisticLassoRegressor`, the new
convex-L1 primitive — debiasing on top of the prior
``MCP(γ=1e9)`` approximation would have inherited its bias.
Notes
-----
Working weights are floored at ``1e-8`` to avoid degenerate
columns in `X̃` when fitted probabilities saturate near 0 or 1.
If you see suspiciously narrow CIs, inspect ``mu_fitted`` for
boundary cases.
"""
X = np.ascontiguousarray(X, dtype=np.float64)
y = np.ascontiguousarray(y, dtype=np.float64)
if X.ndim != 2:
raise ValueError(f"X must be 2D, got shape {X.shape}")
if y.ndim != 1 or y.shape[0] != X.shape[0]:
raise ValueError(
f"y must be 1D with length {X.shape[0]}, got shape {y.shape}"
)
if not np.all((y == 0.0) | (y == 1.0)):
raise ValueError("logistic regression requires y ∈ {0, 1}")
if not 0.0 < alpha < 1.0:
raise ValueError(f"alpha must be in (0, 1); got {alpha}")
n, p = X.shape
if p < 2:
raise ValueError(f"debiased lasso requires p ≥ 2 features; got p = {p}")
Xs, x_mean, x_scale = _standardize_X(X, standardize)
# The logistic primitive fits an intercept itself; we pass
# standardized X. (`fit_intercept` here means: do we let the GLM
# fit one at all.)
lambda_main = (
_theoretical_lambda(n, p) if lambda_ is None else float(lambda_)
)
if lambda_main <= 0:
raise ValueError(f"lambda_ must be > 0; got {lambda_main}")
fit = LogisticLassoRegressor(
lambda_=lambda_main,
fit_intercept=fit_intercept,
standardize=False,
max_iter=max_iter,
tol=tol,
max_outer=20,
outer_tol=tol,
).fit(Xs, y)
beta_hat_s = np.asarray(fit.coef_, dtype=np.float64)
intercept_s = float(fit.intercept_)
eta = Xs @ beta_hat_s + intercept_s
mu_hat = _sigmoid(eta)
w_diag = mu_hat * (1.0 - mu_hat)
lam_nw = _build_lambda_nodewise(lambda_nodewise, n, p)
if np.any(lam_nw <= 0):
raise ValueError("lambda_nodewise entries must be > 0")
return _debiased_glm_core(
Xs=Xs, y=y, beta_hat_s=beta_hat_s, intercept_s=intercept_s,
mu_hat=mu_hat, w_diag=w_diag,
x_mean=x_mean, x_scale=x_scale,
lam_nw=lam_nw, alpha=alpha,
max_iter=max_iter, tol=tol, n_jobs=n_jobs,
family="binomial", lambda_main=lambda_main,
)
# --- Poisson --------------------------------------------------------
[docs]
def debiased_poisson_lasso(
X: Any,
y: Any,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
fit_intercept: bool = True,
standardize: bool = True,
offset: NDArray[np.float64] | None = None,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> DebiasedGLMResult:
"""VBR debiased lasso for Poisson log-link regression.
Construction: fit penalized Poisson lasso to obtain ``β̂``,
compute ``μ̂_i = exp(η̂_i + offset_i)`` and working weights
``W_ii = μ̂_i``, build ``Θ̂`` nodewise on ``X̃ = W^{1/2} X``,
debias as in the logistic case.
Parameters mirror :func:`debiased_logistic_lasso` plus a Poisson
``offset`` (log-exposure) that participates in the linear
predictor but is not an inferential target.
Notes
-----
Poisson working weights can have very large dynamic range
(`μ̂ = exp(η̂)`). If some `μ̂_i` are tiny, the corresponding rows
of `X̃` are heavily down-weighted and the asymptotic theory's
regularity conditions may be strained. The same `1e-8` floor as
the logistic case applies — well-conditioned problems are
unaffected.
"""
X = np.ascontiguousarray(X, dtype=np.float64)
y = np.ascontiguousarray(y, dtype=np.float64)
if X.ndim != 2:
raise ValueError(f"X must be 2D, got shape {X.shape}")
if y.ndim != 1 or y.shape[0] != X.shape[0]:
raise ValueError(
f"y must be 1D with length {X.shape[0]}, got shape {y.shape}"
)
if not np.all(np.isfinite(y)) or np.any(y < 0.0):
raise ValueError("Poisson regression requires y ≥ 0 (finite)")
if not 0.0 < alpha < 1.0:
raise ValueError(f"alpha must be in (0, 1); got {alpha}")
n, p = X.shape
if p < 2:
raise ValueError(f"debiased lasso requires p ≥ 2 features; got p = {p}")
offset_arr: NDArray[np.float64] | None = None
if offset is not None:
offset_arr = np.ascontiguousarray(offset, dtype=np.float64)
if offset_arr.shape != (n,):
raise ValueError(
f"offset must be 1D with length {n}; got shape {offset_arr.shape}"
)
Xs, x_mean, x_scale = _standardize_X(X, standardize)
lambda_main = (
_theoretical_lambda(n, p) if lambda_ is None else float(lambda_)
)
if lambda_main <= 0:
raise ValueError(f"lambda_ must be > 0; got {lambda_main}")
fit_kwargs: dict[str, Any] = dict(
lambda_=lambda_main,
fit_intercept=fit_intercept,
standardize=False,
max_iter=max_iter,
tol=tol,
max_outer=20,
outer_tol=tol,
)
if offset_arr is not None:
fit_kwargs["offset"] = offset_arr
fit = PoissonLassoRegressor(**fit_kwargs).fit(Xs, y)
beta_hat_s = np.asarray(fit.coef_, dtype=np.float64)
intercept_s = float(fit.intercept_)
eta = Xs @ beta_hat_s + intercept_s
if offset_arr is not None:
eta = eta + offset_arr
mu_hat = np.exp(eta)
w_diag = mu_hat
lam_nw = _build_lambda_nodewise(lambda_nodewise, n, p)
if np.any(lam_nw <= 0):
raise ValueError("lambda_nodewise entries must be > 0")
return _debiased_glm_core(
Xs=Xs, y=y, beta_hat_s=beta_hat_s, intercept_s=intercept_s,
mu_hat=mu_hat, w_diag=w_diag,
x_mean=x_mean, x_scale=x_scale,
lam_nw=lam_nw, alpha=alpha,
max_iter=max_iter, tol=tol, n_jobs=n_jobs,
family="poisson", lambda_main=lambda_main,
)
# --- sklearn-style wrappers -----------------------------------------
class _DebiasedGLMRegressorBase(BaseEstimator):
"""Shared facade: exposes ``coef_`` / ``intercept_`` and the
inference outputs as suffixed attributes. Subclasses pin the
family-specific free function."""
coef_: NDArray[np.float64]
coef_glm_: NDArray[np.float64]
intercept_: float
se_: NDArray[np.float64]
ci_lower_: NDArray[np.float64]
ci_upper_: NDArray[np.float64]
pvalues_: NDArray[np.float64]
z_scores_: NDArray[np.float64]
Theta_: NDArray[np.float64]
mu_fitted_: NDArray[np.float64]
lambda_main_: float
lambda_nodewise_: NDArray[np.float64]
family_: str
n_features_in_: int
def _store_result(self, res: DebiasedGLMResult) -> None:
self.coef_ = res.coef_debiased
self.coef_glm_ = res.coef_glm
self.intercept_ = res.intercept_
self.se_ = res.se
self.ci_lower_ = res.ci_lower
self.ci_upper_ = res.ci_upper
self.pvalues_ = res.pvalues
self.z_scores_ = res.z_scores
self.Theta_ = res.Theta
self.mu_fitted_ = res.mu_fitted
self.lambda_main_ = res.lambda_main
self.lambda_nodewise_ = res.lambda_nodewise
self.family_ = res.family
self.n_features_in_ = int(res.coef_debiased.shape[0])
def decision_function(self, X: Any) -> NDArray[np.float64]:
X = np.ascontiguousarray(X, dtype=np.float64)
return X @ self.coef_ + self.intercept_
[docs]
class DebiasedLogisticLassoRegressor(_DebiasedGLMRegressorBase):
"""Sklearn-style facade over :func:`debiased_logistic_lasso`."""
def __init__(
self,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
fit_intercept: bool = True,
standardize: bool = True,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> None:
self.lambda_ = lambda_
self.lambda_nodewise = lambda_nodewise
self.alpha = alpha
self.fit_intercept = fit_intercept
self.standardize = standardize
self.max_iter = max_iter
self.tol = tol
self.n_jobs = n_jobs
def fit(self, X: Any, y: Any) -> "DebiasedLogisticLassoRegressor":
res = debiased_logistic_lasso(
X, y,
lambda_=self.lambda_,
lambda_nodewise=self.lambda_nodewise,
alpha=self.alpha,
fit_intercept=self.fit_intercept,
standardize=self.standardize,
max_iter=self.max_iter,
tol=self.tol,
n_jobs=self.n_jobs,
)
self._store_result(res)
return self
def predict_proba(self, X: Any) -> NDArray[np.float64]:
return _sigmoid(self.decision_function(X))
def predict(self, X: Any) -> NDArray[np.float64]:
return (self.predict_proba(X) >= 0.5).astype(np.float64)
[docs]
class DebiasedPoissonLassoRegressor(_DebiasedGLMRegressorBase):
"""Sklearn-style facade over :func:`debiased_poisson_lasso`."""
def __init__(
self,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
fit_intercept: bool = True,
standardize: bool = True,
offset: NDArray[np.float64] | None = None,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> None:
self.lambda_ = lambda_
self.lambda_nodewise = lambda_nodewise
self.alpha = alpha
self.fit_intercept = fit_intercept
self.standardize = standardize
self.offset = offset
self.max_iter = max_iter
self.tol = tol
self.n_jobs = n_jobs
def fit(self, X: Any, y: Any) -> "DebiasedPoissonLassoRegressor":
res = debiased_poisson_lasso(
X, y,
lambda_=self.lambda_,
lambda_nodewise=self.lambda_nodewise,
alpha=self.alpha,
fit_intercept=self.fit_intercept,
standardize=self.standardize,
offset=self.offset,
max_iter=self.max_iter,
tol=self.tol,
n_jobs=self.n_jobs,
)
self._store_result(res)
return self
[docs]
def predict(self, X: Any) -> NDArray[np.float64]:
"""μ̂ = exp(η̂) — matches PoissonLassoRegressor's convention."""
eta = self.decision_function(X)
if self.offset is not None:
eta = eta + np.ascontiguousarray(self.offset, dtype=np.float64)
return np.exp(eta)
# --- Cox PH ---------------------------------------------------------
@dataclass
class DebiasedCoxResult:
"""Output of :func:`debiased_cox_lasso`.
Same inferential fields as :class:`DebiasedGLMResult` minus
``mu_fitted`` (Cox PH has no ``μ`` in the canonical GLM sense)
plus ``risk_score`` ``= X β̂`` — the prognostic index on the
original feature scale.
Cox has no intercept (the baseline hazard absorbs constants);
the ``intercept_`` field is the destandardization correction
``-⟨β̂_d, x̄⟩`` so that ``X @ coef_ + intercept_`` reproduces the
linear predictor of the original-feature-scale model.
Attributes
----------
coef_debiased : ndarray (p,)
coef_glm : ndarray (p,)
Underlying penalized Cox lasso fit on the original feature
scale.
intercept_ : float
Destandardization correction. Cox `decision_function` returns
``X @ coef_ + intercept_`` — the prognostic index.
se : ndarray (p,)
ci_lower, ci_upper, pvalues, z_scores : ndarray (p,)
risk_score : ndarray (n,)
``η̂_i = ⟨x_i, β̂⟩`` at the penalized fit. The Cox prognostic
index for the training rows.
Theta : ndarray (p, p)
Approximate inverse Fisher on the standardized scale, built
nodewise on the weighted design ``X̃ = W^{1/2} X``.
lambda_main : float
lambda_nodewise : ndarray (p,)
alpha : float
family : str
Always ``"cox"``.
ties : str
Tie-handling rule used in the partial likelihood.
"""
coef_debiased: NDArray[np.float64]
coef_glm: NDArray[np.float64]
intercept_: float
se: NDArray[np.float64]
ci_lower: NDArray[np.float64]
ci_upper: NDArray[np.float64]
pvalues: NDArray[np.float64]
z_scores: NDArray[np.float64]
risk_score: NDArray[np.float64]
Theta: NDArray[np.float64]
lambda_main: float
lambda_nodewise: NDArray[np.float64]
alpha: float
family: str
ties: str
def debiased_cox_lasso(
X: Any,
time: Any,
event: Any,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
ties: str = "breslow",
standardize: bool = True,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> DebiasedCoxResult:
"""Debiased Cox lasso with Wald confidence intervals.
Construction (van de Geer–Cai–Wang extension of VBR to Cox):
1. Fit a penalized Cox lasso to obtain ``β̂``.
2. At ``β̂``, extract the per-sample partial-likelihood Fisher
information ``w_i`` and working response ``z_i`` from the Cox
IRLS surrogate (via the Rust ``CoxPH::surrogate_at`` exposed as
``_core.cox_surrogate_weights_at``).
3. Build the **weighted design** ``X̃ = W^{1/2} X`` where
``W = diag(w)``. The partial-likelihood Fisher information is
``Xᵀ W X``, so ``X̃`` plays exactly the role of ``X`` in the
LS / GLM debiased construction.
4. Build ``Θ̂`` by node-wise lasso on ``X̃`` (Van de Geer 2014 §3
construction; ``_fit_nodewise_column`` is penalty-axis agnostic
and reused unchanged).
5. Debias: ``β̂_d = β̂ + (1/n) Θ̂ · Xᵀ (event − μ̂_cox)`` where
``μ̂_cox_i = exp(η̂_i) · Λ̂_0(t_i)`` is the partial-likelihood
contribution. Computed via ``μ̂_cox = event + w·(η − z)`` from
the surrogate identity ``z = η − g / w``, ``g = event −
exp(η)·Λ̂_0``.
6. Variance: ``diag(Θ̂ X̃ᵀX̃ Θ̂ᵀ) / n²``. Same formula as the
logistic / Poisson case; the partial likelihood supplies its
own Fisher information, so there is no σ² nuisance term.
Parameters
----------
X : array-like of shape (n, p)
time : array-like of shape (n,)
Event / censoring time; must be ≥ 0.
event : array-like of shape (n,)
0/1 censoring indicator (1 = event observed).
lambda_ : float, optional
Lasso λ for the penalized Cox fit. Defaults to the
theoretical ``c · √(log p / n)`` rate.
lambda_nodewise : float or array, optional
Per-node λ for the nodewise lassos. Defaults to a tuned grid.
alpha : float, default 0.05
Wald CI level — outputs ``[α/2, 1 − α/2]`` percentile bands.
ties : {"breslow", "efron"}, default "breslow"
Tie-handling rule for the partial likelihood. Forwarded to
both the penalized fit and the Fisher-information extraction.
standardize : bool, default True
max_iter, tol : solver controls (forwarded to nodewise lassos
and the penalized Cox fit).
n_jobs : parallel jobs for the nodewise sweep.
Returns
-------
:class:`DebiasedCoxResult`
References
----------
van de Geer, S., Bühlmann, P., Ritov, Y., & Dezeure, R. (2014).
"On asymptotically optimal confidence regions and tests for
high-dimensional models." *Annals of Statistics* 42(3): 1166–1202.
— The original LS / GLM debiased construction.
Cai, T., & Wang, J. (2017). "High-dimensional inference for
covariance and precision matrices and applications to survival
analysis." *J. Am. Stat. Assoc.* — Cox-PH extension.
"""
from joblib import Parallel, delayed
from skein_glm._core import cox_surrogate_weights_at
X = np.ascontiguousarray(X, dtype=np.float64)
time = np.ascontiguousarray(time, dtype=np.float64)
event = np.ascontiguousarray(event, dtype=np.float64)
if X.ndim != 2:
raise ValueError(f"X must be 2D, got shape {X.shape}")
n, p = X.shape
if time.shape != (n,) or event.shape != (n,):
raise ValueError(
f"time / event must be 1D with length {n}; got "
f"{time.shape} / {event.shape}"
)
if not np.all(np.isfinite(time)) or np.any(time < 0):
raise ValueError("Cox PH requires time ≥ 0 (finite)")
valid_events = np.isin(event, [0.0, 1.0])
if not np.all(valid_events):
raise ValueError("Cox PH requires event ∈ {0, 1}")
if int(event.sum()) == 0:
raise ValueError("Cox PH requires at least one event (event = 1)")
if not 0.0 < alpha < 1.0:
raise ValueError(f"alpha must be in (0, 1); got {alpha}")
if ties not in ("breslow", "efron"):
raise ValueError(f"ties must be 'breslow' or 'efron'; got {ties!r}")
if p < 2:
raise ValueError(f"debiased lasso requires p ≥ 2 features; got p = {p}")
Xs, x_mean, x_scale = _standardize_X(X, standardize)
lambda_main = (
_theoretical_lambda(n, p) if lambda_ is None else float(lambda_)
)
if lambda_main <= 0:
raise ValueError(f"lambda_ must be > 0; got {lambda_main}")
# Cox-lasso via MCP at large γ (the standard skein idiom — group-MCP
# with γ→∞ recovers L1 exactly, and the Rust path supports it
# natively). Cox has no intercept; the baseline hazard absorbs
# additive constants.
fit = CoxMCPRegressor(
lambda_=lambda_main,
gamma=1e10,
ties=ties,
standardize=False,
max_iter=max_iter,
tol=tol,
max_outer=20,
outer_tol=tol,
).fit(Xs, time, event)
beta_hat_s = np.asarray(fit.coef_, dtype=np.float64)
eta_hat = Xs @ beta_hat_s
# Extract per-sample Cox Fisher information w_i and working
# response z_i at the fitted β̂. The Rust surrogate returns
# (w, z) such that z = η − g/w, g = event − exp(η)·Λ̂_0(t).
w, z = cox_surrogate_weights_at(Xs, time, event, beta_hat_s, ties=ties)
w = np.asarray(w, dtype=np.float64)
z = np.asarray(z, dtype=np.float64)
# μ̂_cox = exp(η)·Λ̂_0(t) = event − g = event + w·(η − z).
# The GLM core consumes (y, mu_hat) and forms (y − mu_hat) =
# event − μ̂_cox = -g, the Cox score residual.
mu_hat_cox = event + w * (eta_hat - z)
lam_nw = _build_lambda_nodewise(lambda_nodewise, n, p)
if np.any(lam_nw <= 0):
raise ValueError("lambda_nodewise entries must be > 0")
# Inline the GLM-core debiasing math; intercept_s=0 because Cox
# has none. Mirrors _debiased_glm_core but produces DebiasedCoxResult.
w_safe = np.maximum(w, _GLM_WEIGHT_FLOOR)
w_sqrt = np.sqrt(w_safe)
X_tilde = Xs * w_sqrt[:, None]
results = Parallel(n_jobs=n_jobs)(
delayed(_fit_nodewise_column)(X_tilde, j, float(lam_nw[j]), max_iter, tol)
for j in range(p)
)
gammas = [g for g, _ in results]
tau2s = np.array([t for _, t in results], dtype=np.float64)
Theta = _assemble_theta_rows(gammas, tau2s)
score = Xs.T @ (event - mu_hat_cox)
beta_d_s = beta_hat_s + (Theta @ score) / n
U = X_tilde @ Theta.T
var_d_s = np.einsum("ij,ij->j", U, U) / (n * n)
se_s = np.sqrt(np.maximum(var_d_s, 0.0))
beta_d = beta_d_s / x_scale
beta_glm = beta_hat_s / x_scale
se = se_s / x_scale
intercept = -float(np.dot(beta_d, x_mean))
# Risk score is the η̂ on the *original* feature scale —
# X @ beta_d + intercept reconstructs the centered linear
# predictor; the Cox partial likelihood is invariant to the
# additive intercept so this matches the conventional usage.
risk_score = X @ beta_d + intercept
z_alpha = float(stats.norm.ppf(1.0 - alpha / 2.0))
safe_se = np.where(se > 0, se, 1.0)
z_scores = np.where(se > 0, beta_d / safe_se, 0.0)
pvalues = 2.0 * stats.norm.sf(np.abs(z_scores))
ci_lower = beta_d - z_alpha * se
ci_upper = beta_d + z_alpha * se
return DebiasedCoxResult(
coef_debiased=beta_d,
coef_glm=beta_glm,
intercept_=intercept,
se=se,
ci_lower=ci_lower,
ci_upper=ci_upper,
pvalues=pvalues,
z_scores=z_scores,
risk_score=risk_score,
Theta=Theta,
lambda_main=lambda_main,
lambda_nodewise=lam_nw,
alpha=alpha,
family="cox",
ties=ties,
)
class DebiasedCoxLassoRegressor(BaseEstimator):
"""Sklearn-style facade over :func:`debiased_cox_lasso`.
Cox PH has a different signature from the scalar-``y`` GLMs
(requires ``time`` and ``event`` instead of ``y``), so this
estimator doesn't inherit from :class:`_DebiasedGLMRegressorBase`.
Attributes mirror the other debiased estimators with the GLM
suffix convention (``coef_``, ``se_``, ``pvalues_``, etc.). The
Cox-specific :attr:`risk_score_` is set to the prognostic index
on the training data after :meth:`fit`.
"""
coef_: NDArray[np.float64]
coef_glm_: NDArray[np.float64]
intercept_: float
se_: NDArray[np.float64]
ci_lower_: NDArray[np.float64]
ci_upper_: NDArray[np.float64]
pvalues_: NDArray[np.float64]
z_scores_: NDArray[np.float64]
risk_score_: NDArray[np.float64]
Theta_: NDArray[np.float64]
lambda_main_: float
lambda_nodewise_: NDArray[np.float64]
n_features_in_: int
def __init__(
self,
*,
lambda_: float | None = None,
lambda_nodewise: float | NDArray[np.float64] | None = None,
alpha: float = 0.05,
ties: str = "breslow",
standardize: bool = True,
max_iter: int = 1000,
tol: float = 1e-7,
n_jobs: int | None = None,
) -> None:
self.lambda_ = lambda_
self.lambda_nodewise = lambda_nodewise
self.alpha = alpha
self.ties = ties
self.standardize = standardize
self.max_iter = max_iter
self.tol = tol
self.n_jobs = n_jobs
def fit(
self, X: Any, time: Any, event: Any
) -> "DebiasedCoxLassoRegressor":
res = debiased_cox_lasso(
X, time, event,
lambda_=self.lambda_,
lambda_nodewise=self.lambda_nodewise,
alpha=self.alpha,
ties=self.ties,
standardize=self.standardize,
max_iter=self.max_iter,
tol=self.tol,
n_jobs=self.n_jobs,
)
self.coef_ = res.coef_debiased
self.coef_glm_ = res.coef_glm
self.intercept_ = res.intercept_
self.se_ = res.se
self.ci_lower_ = res.ci_lower
self.ci_upper_ = res.ci_upper
self.pvalues_ = res.pvalues
self.z_scores_ = res.z_scores
self.risk_score_ = res.risk_score
self.Theta_ = res.Theta
self.lambda_main_ = res.lambda_main
self.lambda_nodewise_ = res.lambda_nodewise
self.family_ = res.family
self.ties_ = res.ties
self.n_features_in_ = int(res.coef_debiased.shape[0])
return self
def decision_function(self, X: Any) -> NDArray[np.float64]:
"""Prognostic index ``η = X · β + intercept``."""
X = np.ascontiguousarray(X, dtype=np.float64)
return X @ self.coef_ + self.intercept_
def predict(self, X: Any) -> NDArray[np.float64]:
"""Alias for :meth:`decision_function` (Cox has no μ)."""
return self.decision_function(X)
__all__ = [
"DebiasedLassoResult",
"debiased_lasso",
"DebiasedLassoRegressor",
"DebiasedGLMResult",
"debiased_logistic_lasso",
"debiased_poisson_lasso",
"DebiasedLogisticLassoRegressor",
"DebiasedPoissonLassoRegressor",
"DebiasedCoxResult",
"debiased_cox_lasso",
"DebiasedCoxLassoRegressor",
]