Sure Independent Screening

`pydts.screening.SISTwoStagesFitter()` ¤

Bases: BaseSISTwoStages

Source code in src/pydts/screening.py

def __init__(self):
    super().__init__()
    self.TwoStagesFitter_type = 'CoxPHFitter'

`TwoStagesFitter_type = 'CoxPHFitter'` `instance-attribute` ¤

`chosen_covariates = None` `instance-attribute` ¤

`chosen_covariates_j = None` `instance-attribute` ¤

`covariates = None` `instance-attribute` ¤

`df = pd.DataFrame()` `instance-attribute` ¤

`duration_col = None` `instance-attribute` ¤

`event_type_col = None` `instance-attribute` ¤

`events = None` `instance-attribute` ¤

`expanded_df = pd.DataFrame()` `instance-attribute` ¤

`final_model = None` `instance-attribute` ¤

`marginal_estimates_df = pd.DataFrame()` `instance-attribute` ¤

`null_model_df = None` `instance-attribute` ¤

`permuted_df = pd.DataFrame()` `instance-attribute` ¤

`permuted_expanded_df = pd.DataFrame()` `instance-attribute` ¤

`pid_col = None` `instance-attribute` ¤

`threshold = None` `instance-attribute` ¤

`times = None` `instance-attribute` ¤

`_get_params_cols_from_res_df(res_df)` ¤

Source code in src/pydts/screening.py

def _get_params_cols_from_res_df(self, res_df):
    if self.TwoStagesFitter_type == 'Exact':
        _params_cols = [c for c in res_df.columns if '   coef   ' in c]
    else:
        _params_cols = [c for c in res_df.columns if 'params' in c]
    return _params_cols

`fit(df, threshold=None, quantile=1, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None, fit_final_model=True)` ¤

This method performs the principled sure independence screening (PSIS) process of Zhao et al. (2012) for discrete-time data with data-driven threshold.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`threshold`	`float`	a user defined threshold. Defaults to None, i.e. data-driven threshold	`None`
`quantile`	`float`	the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Only in use when threshold = None. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.	`1`
`covariates`	`list`	list of covariates to estimate the marginal regression coefficient for.	`None`
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`WORKERS`
`seed`	`int`	pseudo random state.	`None`
`fit_final_model`	`boolean`	True if to fit and return the TwoStagesFitter with the selected covariates.	`True`

Returns:

Name	Type	Description
`final_model`	`TwoStagesFitter`	estimated model with the chosen covariates after PSIS.

Source code in src/pydts/screening.py

def fit(self,
        df: pd.DataFrame,
        threshold: float = None,
        quantile: float = 1,
        covariates: List = None,
        event_type_col: str = 'J',
        duration_col: str = 'X',
        pid_col: str = 'pid',
        x0: Union[np.array, int] = 0,
        fit_beta_kwargs: dict = {},
        verbose: int = 2,
        nb_workers: int = WORKERS,
        seed: int = None,
        fit_final_model: bool = True):

    """
    This method performs the principled sure independence screening (PSIS) process of Zhao et al. (2012) for discrete-time data with data-driven threshold.

    Args:
        df (pd.DataFrame): training data for fitting the model
        threshold (float): a user defined threshold. Defaults to None, i.e. data-driven threshold
        quantile (float): the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Only in use when threshold = None. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.
        covariates (list): list of covariates to estimate the marginal regression coefficient for.
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        verbose (int, Optional): The verbosity level of pandaallel
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.
        seed (int): pseudo random state.
        fit_final_model (boolean): True if to fit and return the TwoStagesFitter with the selected covariates.

    Returns:
        final_model (TwoStagesFitter): estimated model with the chosen covariates after PSIS.
    """

    self.events = [c for c in sorted(df[event_type_col].unique()) if c != 0]
    if covariates is None:
        covariates = [col for col in df if col not in [event_type_col, duration_col, pid_col]]
    self.covariates = covariates
    self.event_type_col = event_type_col
    self.duration_col = duration_col
    self.pid_col = pid_col
    self.times = sorted(df[duration_col].unique())

    if threshold is not None:
        self.threshold = threshold
    else:
        self.threshold = self.get_data_driven_threshold(df=df,
                                                        covariates=covariates,
                                                        quantile=quantile,
                                                        event_type_col=event_type_col,
                                                        duration_col=duration_col,
                                                        pid_col=pid_col,
                                                        x0=x0,
                                                        fit_beta_kwargs=fit_beta_kwargs,
                                                        verbose=verbose,
                                                        nb_workers=nb_workers,
                                                        seed=seed)
    self.df = df
    self.expanded_df = get_expanded_df(df=df,
                                       event_type_col=event_type_col,
                                       duration_col=duration_col,
                                       pid_col=pid_col)

    self.marginal_estimates_df = self.get_marginal_estimates(expanded_df=self.expanded_df,
                                                             covariates=covariates,
                                                             event_type_col=event_type_col,
                                                             duration_col=duration_col,
                                                             pid_col=pid_col,
                                                             verbose=verbose,
                                                             x0=x0,
                                                             fit_beta_kwargs=fit_beta_kwargs,
                                                             nb_workers=nb_workers)

    chosen_covariates = []
    chosen_covariates_j = {}

    _params_cols = self._get_params_cols_from_res_df(self.marginal_estimates_df)

    for c in _params_cols:
        if self.TwoStagesFitter_type == 'Exact':
            event = int(c[0])
        else:
            event = int(c[1:].split('_')[0])
        chosen_covariates_j[event] = self.marginal_estimates_df[self.marginal_estimates_df[c].abs() >= self.threshold].index.tolist()
        chosen_covariates.extend(chosen_covariates_j[event])

    self.chosen_covariates = sorted(np.unique(chosen_covariates))
    self.chosen_covariates_j = chosen_covariates_j

    if fit_final_model:
        if self.TwoStagesFitter_type == 'Exact':
            self.final_model = TwoStagesFitterExact()
        else:
            self.final_model = TwoStagesFitter()
        self.final_model.fit(df=df,
                             covariates=self.chosen_covariates_j,
                             event_type_col=event_type_col,
                             duration_col=duration_col,
                             pid_col=pid_col,
                             x0=x0,
                             fit_beta_kwargs=fit_beta_kwargs,
                             verbose=verbose,
                             nb_workers=nb_workers)

    return self.final_model

`fit_marginal_model(expanded_df, covariate, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=1)` ¤

This method fits a marginal model to data using a single covariate. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

Parameters:

Name	Type	Description	Default
`expanded_df`	`DataFrame`	expanded training data for fitting the model	required
`covariate`	`str`	a single covariate to be used in estimating the regression coefficients	required
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`1`

Returns:

Name	Type	Description
`result`	`DataFrame`	Estimated parameter and standard errors. TwoStagesFitter.get_beta_SE() output.

Source code in src/pydts/screening.py

def fit_marginal_model(self,
                       expanded_df,
                       covariate: str,
                       event_type_col: str = 'J',
                       duration_col: str = 'X',
                       pid_col: str = 'pid',
                       x0: Union[np.array, int] = 0,
                       fit_beta_kwargs: dict = {},
                       verbose: int = 2,
                       nb_workers: int = 1):
    """
    This method fits a marginal model to data using a single covariate. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

    Args:
        expanded_df (pd.DataFrame): expanded training data for fitting the model
        covariate (str): a single covariate to be used in estimating the regression coefficients
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        verbose (int, Optional): The verbosity level of pandaallel
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.

    Returns:
        result (pd.DataFrame): Estimated parameter and standard errors. TwoStagesFitter.get_beta_SE() output.
    """

    if self.events is None:
        self.events = [c for c in sorted(expanded_df[event_type_col].unique()) if c != 0]

    if self.TwoStagesFitter_type == 'Exact':
        marginal_model = MarginalTwoStagesFitterExact()
    else:
        marginal_model = MarginalTwoStagesFitter()

    marginal_model.fit(
        expanded_df=expanded_df[[pid_col, covariate, event_type_col, duration_col, 'j_0'] +
                                [f'j_{e}' for e in self.events]],
        covariates=[covariate],
        event_type_col=event_type_col,
        duration_col=duration_col,
        pid_col=pid_col,
        x0=x0,
        fit_beta_kwargs=fit_beta_kwargs,
        verbose=verbose,
        nb_workers=nb_workers)

    result = marginal_model.get_beta_SE()
    del marginal_model
    return result

`get_data_driven_threshold(df, covariates=None, quantile=1, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None)` ¤

This method calculates a data-driven threshold for each risk. It fits marginal models to the permuted data and returns the required quantile of the absolute values of the coefficients estimated from the null model.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`covariates`	`list`	list of covariates to estimate the marginal regression coefficient for.	`None`
`quantile`	`float`	represents the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.	`1`
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`WORKERS`
`seed`	`int`	pseudo random state.	`None`

Returns:

Name	Type	Description
`threshold`	`Series`	Estimated thresholds.

Source code in src/pydts/screening.py

def get_data_driven_threshold(self,
                              df,
                              covariates: List = None,
                              quantile: float = 1,
                              event_type_col: str = 'J',
                              duration_col: str = 'X',
                              pid_col: str = 'pid',
                              x0: Union[np.array, int] = 0,
                              fit_beta_kwargs: dict = {},
                              verbose: int = 2,
                              nb_workers: int = WORKERS,
                              seed: int = None):

    """
    This method calculates a data-driven threshold for each risk. It fits marginal models to the permuted data and returns the required quantile of the absolute values of the coefficients estimated from the null model.

    Args:
        df (pd.DataFrame): training data for fitting the model
        covariates (list): list of covariates to estimate the marginal regression coefficient for.
        quantile (float): represents the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        verbose (int, Optional): The verbosity level of pandaallel
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.
        seed (int): pseudo random state.

    Returns:
        threshold (pd.Series): Estimated thresholds.
    """

    if self.events is None:
        self.events = [c for c in sorted(df[event_type_col].unique()) if c != 0]
    if covariates is None:
        covariates = [col for col in df if col not in [event_type_col, duration_col, pid_col]]

    self.permute_df(df=df, event_type_col=event_type_col,
                    duration_col=duration_col, pid_col=pid_col, seed=seed)
    self.null_model_df = self.get_marginal_estimates(expanded_df=self.permuted_expanded_df,
                                                     covariates=covariates,
                                                     event_type_col=event_type_col,
                                                     duration_col=duration_col,
                                                     pid_col=pid_col,
                                                     verbose=verbose,
                                                     x0=x0,
                                                     fit_beta_kwargs=fit_beta_kwargs,
                                                     nb_workers=nb_workers)

    _params_cols = self._get_params_cols_from_res_df(self.null_model_df)
    self.threshold = np.quantile(self.null_model_df[_params_cols].abs().values, q=quantile)
    return self.threshold

`get_marginal_estimates(expanded_df, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', verbose=2, x0=0, fit_beta_kwargs={}, nb_workers=WORKERS)` ¤

This method fits a marginal model to data to each of the covariates. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

Parameters:

Name	Type	Description	Default
`expanded_df`	`DataFrame`	expanded training data for fitting the model	required
`covariates`	`list`	list of covariates to estimate the marginal regression coefficient for.	`None`
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`WORKERS`

Returns:

Name	Type	Description
`results_df`	`DataFrame`	Estimated parameters and standard errors of the marginal models. A concatenation of all the TwoStagesFitter.get_beta_SE() outputs.

Source code in src/pydts/screening.py

def get_marginal_estimates(self,
                           expanded_df,
                           covariates: Union[List, dict] = None,
                           event_type_col: str = 'J',
                           duration_col: str = 'X',
                           pid_col: str = 'pid',
                           verbose: int = 2,
                           x0: Union[np.array, int] = 0,
                           fit_beta_kwargs: dict = {},
                           nb_workers: int = WORKERS):

    """
    This method fits a marginal model to data to each of the covariates. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

    Args:
        expanded_df (pd.DataFrame): expanded training data for fitting the model
        covariates (list): list of covariates to estimate the marginal regression coefficient for.
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        verbose (int, Optional): The verbosity level of pandaallel
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.

    Returns:
        results_df (pd.DataFrame): Estimated parameters and standard errors of the marginal models. A concatenation of all the TwoStagesFitter.get_beta_SE() outputs.
    """

    if self.events is None:
        self.events = [c for c in sorted(expanded_df[event_type_col].unique()) if c != 0]

    if covariates is None:
        covariates = [col for col in expanded_df if col not in ([event_type_col, duration_col, pid_col, 'j_0'] +
                      [f'j_{e}' for e in self.events])]

    parallel = Parallel(n_jobs=nb_workers, verbose=verbose)
    results_df = pd.DataFrame()
    if isinstance(covariates, list):
        _results = parallel(delayed(self.fit_marginal_model)(expanded_df, cov,
                                                             event_type_col, duration_col, pid_col,
                                                             x0, fit_beta_kwargs, verbose, nb_workers)
                                                             for cov in covariates)
        results_df = pd.concat(_results)
    elif isinstance(covariates, dict):
        raise ValueError("Please provide a list of covariates for the marginal testing, including the union of all possible options across all risks.")
    # elif isinstance(covariates, dict):
    #     for event in self.events:
    #         _results = parallel(delayed(self.fit_marginal_model)(expanded_df, cov,
    #                                                              event_type_col, duration_col, pid_col,
    #                                                              x0, fit_beta_kwargs, verbose, nb_workers)
    #                                                              for cov in covariates[event])
    #         event_results_df = pd.concat(_results)
    #         results_df = pd.concat([results_df, event_results_df], axis=1)

    return results_df.astype(float)

`permute_df(df, event_type_col='J', duration_col='X', pid_col='pid', seed=None)` ¤

This method applies random permutation on the event-time and event-type columns of the training data such that the covariates are decoupled from the outcome; the permuted data follow the null model.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`seed`	`(int, Optional)`	pseudo random state.	`None`

Returns:

Name	Type	Description
`permuted_df`	`DataFrame`	null model data.

Source code in src/pydts/screening.py

def permute_df(self,
               df,
               event_type_col: str = 'J',
               duration_col: str = 'X',
               pid_col: str = 'pid',
               seed: int = None):

    """
    This method applies random permutation on the event-time and event-type columns of the training data such that the covariates are decoupled from the outcome; the permuted data follow the null model.

    Args:
        df (pd.DataFrame): training data for fitting the model
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        seed (int, Optional): pseudo random state.

    Returns:
        permuted_df (pd.DataFrame): null model data.
    """

    permuted_df = df.copy()
    np.random.seed(seed)
    permuted_index = np.random.permutation(permuted_df.index)
    permuted_df.loc[:, duration_col] = df.loc[permuted_index, duration_col].values
    permuted_df.loc[:, event_type_col] = df.loc[permuted_index, event_type_col].values
    self.permuted_df = permuted_df
    self.permuted_expanded_df = get_expanded_df(df=self.permuted_df,
                                                event_type_col=event_type_col,
                                                duration_col=duration_col,
                                                pid_col=pid_col)
    return permuted_df

`pydts.screening.SISTwoStagesFitterExact()` ¤

Bases: BaseSISTwoStages

Source code in src/pydts/screening.py

def __init__(self):
    super().__init__()
    self.TwoStagesFitter_type = 'Exact'

`TwoStagesFitter_type = 'Exact'` `instance-attribute` ¤

`chosen_covariates = None` `instance-attribute` ¤

`chosen_covariates_j = None` `instance-attribute` ¤

`covariates = None` `instance-attribute` ¤

`df = pd.DataFrame()` `instance-attribute` ¤

`duration_col = None` `instance-attribute` ¤

`event_type_col = None` `instance-attribute` ¤

`events = None` `instance-attribute` ¤

`expanded_df = pd.DataFrame()` `instance-attribute` ¤

`final_model = None` `instance-attribute` ¤

`marginal_estimates_df = pd.DataFrame()` `instance-attribute` ¤

`null_model_df = None` `instance-attribute` ¤

`permuted_df = pd.DataFrame()` `instance-attribute` ¤

`permuted_expanded_df = pd.DataFrame()` `instance-attribute` ¤

`pid_col = None` `instance-attribute` ¤

`threshold = None` `instance-attribute` ¤

`times = None` `instance-attribute` ¤

`_get_params_cols_from_res_df(res_df)` ¤

Source code in src/pydts/screening.py

def _get_params_cols_from_res_df(self, res_df):
    if self.TwoStagesFitter_type == 'Exact':
        _params_cols = [c for c in res_df.columns if '   coef   ' in c]
    else:
        _params_cols = [c for c in res_df.columns if 'params' in c]
    return _params_cols

`fit(df, threshold=None, quantile=1, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None, fit_final_model=True)` ¤

This method performs the principled sure independence screening (PSIS) process of Zhao et al. (2012) for discrete-time data with data-driven threshold.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`threshold`	`float`	a user defined threshold. Defaults to None, i.e. data-driven threshold	`None`
`quantile`	`float`	the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Only in use when threshold = None. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.	`1`
`covariates`	`list`	list of covariates to estimate the marginal regression coefficient for.	`None`
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`WORKERS`
`seed`	`int`	pseudo random state.	`None`
`fit_final_model`	`boolean`	True if to fit and return the TwoStagesFitter with the selected covariates.	`True`

Returns:

Name	Type	Description
`final_model`	`TwoStagesFitter`	estimated model with the chosen covariates after PSIS.

Source code in src/pydts/screening.py

def fit(self,
        df: pd.DataFrame,
        threshold: float = None,
        quantile: float = 1,
        covariates: List = None,
        event_type_col: str = 'J',
        duration_col: str = 'X',
        pid_col: str = 'pid',
        x0: Union[np.array, int] = 0,
        fit_beta_kwargs: dict = {},
        verbose: int = 2,
        nb_workers: int = WORKERS,
        seed: int = None,
        fit_final_model: bool = True):

    """
    This method performs the principled sure independence screening (PSIS) process of Zhao et al. (2012) for discrete-time data with data-driven threshold.

    Args:
        df (pd.DataFrame): training data for fitting the model
        threshold (float): a user defined threshold. Defaults to None, i.e. data-driven threshold
        quantile (float): the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Only in use when threshold = None. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.
        covariates (list): list of covariates to estimate the marginal regression coefficient for.
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        verbose (int, Optional): The verbosity level of pandaallel
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.
        seed (int): pseudo random state.
        fit_final_model (boolean): True if to fit and return the TwoStagesFitter with the selected covariates.

    Returns:
        final_model (TwoStagesFitter): estimated model with the chosen covariates after PSIS.
    """

    self.events = [c for c in sorted(df[event_type_col].unique()) if c != 0]
    if covariates is None:
        covariates = [col for col in df if col not in [event_type_col, duration_col, pid_col]]
    self.covariates = covariates
    self.event_type_col = event_type_col
    self.duration_col = duration_col
    self.pid_col = pid_col
    self.times = sorted(df[duration_col].unique())

    if threshold is not None:
        self.threshold = threshold
    else:
        self.threshold = self.get_data_driven_threshold(df=df,
                                                        covariates=covariates,
                                                        quantile=quantile,
                                                        event_type_col=event_type_col,
                                                        duration_col=duration_col,
                                                        pid_col=pid_col,
                                                        x0=x0,
                                                        fit_beta_kwargs=fit_beta_kwargs,
                                                        verbose=verbose,
                                                        nb_workers=nb_workers,
                                                        seed=seed)
    self.df = df
    self.expanded_df = get_expanded_df(df=df,
                                       event_type_col=event_type_col,
                                       duration_col=duration_col,
                                       pid_col=pid_col)

    self.marginal_estimates_df = self.get_marginal_estimates(expanded_df=self.expanded_df,
                                                             covariates=covariates,
                                                             event_type_col=event_type_col,
                                                             duration_col=duration_col,
                                                             pid_col=pid_col,
                                                             verbose=verbose,
                                                             x0=x0,
                                                             fit_beta_kwargs=fit_beta_kwargs,
                                                             nb_workers=nb_workers)

    chosen_covariates = []
    chosen_covariates_j = {}

    _params_cols = self._get_params_cols_from_res_df(self.marginal_estimates_df)

    for c in _params_cols:
        if self.TwoStagesFitter_type == 'Exact':
            event = int(c[0])
        else:
            event = int(c[1:].split('_')[0])
        chosen_covariates_j[event] = self.marginal_estimates_df[self.marginal_estimates_df[c].abs() >= self.threshold].index.tolist()
        chosen_covariates.extend(chosen_covariates_j[event])

    self.chosen_covariates = sorted(np.unique(chosen_covariates))
    self.chosen_covariates_j = chosen_covariates_j

    if fit_final_model:
        if self.TwoStagesFitter_type == 'Exact':
            self.final_model = TwoStagesFitterExact()
        else:
            self.final_model = TwoStagesFitter()
        self.final_model.fit(df=df,
                             covariates=self.chosen_covariates_j,
                             event_type_col=event_type_col,
                             duration_col=duration_col,
                             pid_col=pid_col,
                             x0=x0,
                             fit_beta_kwargs=fit_beta_kwargs,
                             verbose=verbose,
                             nb_workers=nb_workers)

    return self.final_model

`fit_marginal_model(expanded_df, covariate, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=1)` ¤

This method fits a marginal model to data using a single covariate. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

Parameters:

Name	Type	Description	Default
`expanded_df`	`DataFrame`	expanded training data for fitting the model	required
`covariate`	`str`	a single covariate to be used in estimating the regression coefficients	required
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`1`

Returns:

Name	Type	Description
`result`	`DataFrame`	Estimated parameter and standard errors. TwoStagesFitter.get_beta_SE() output.

Source code in src/pydts/screening.py

def fit_marginal_model(self,
                       expanded_df,
                       covariate: str,
                       event_type_col: str = 'J',
                       duration_col: str = 'X',
                       pid_col: str = 'pid',
                       x0: Union[np.array, int] = 0,
                       fit_beta_kwargs: dict = {},
                       verbose: int = 2,
                       nb_workers: int = 1):
    """
    This method fits a marginal model to data using a single covariate. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

    Args:
        expanded_df (pd.DataFrame): expanded training data for fitting the model
        covariate (str): a single covariate to be used in estimating the regression coefficients
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        verbose (int, Optional): The verbosity level of pandaallel
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.

    Returns:
        result (pd.DataFrame): Estimated parameter and standard errors. TwoStagesFitter.get_beta_SE() output.
    """

    if self.events is None:
        self.events = [c for c in sorted(expanded_df[event_type_col].unique()) if c != 0]

    if self.TwoStagesFitter_type == 'Exact':
        marginal_model = MarginalTwoStagesFitterExact()
    else:
        marginal_model = MarginalTwoStagesFitter()

    marginal_model.fit(
        expanded_df=expanded_df[[pid_col, covariate, event_type_col, duration_col, 'j_0'] +
                                [f'j_{e}' for e in self.events]],
        covariates=[covariate],
        event_type_col=event_type_col,
        duration_col=duration_col,
        pid_col=pid_col,
        x0=x0,
        fit_beta_kwargs=fit_beta_kwargs,
        verbose=verbose,
        nb_workers=nb_workers)

    result = marginal_model.get_beta_SE()
    del marginal_model
    return result

`get_data_driven_threshold(df, covariates=None, quantile=1, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None)` ¤

This method calculates a data-driven threshold for each risk. It fits marginal models to the permuted data and returns the required quantile of the absolute values of the coefficients estimated from the null model.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`covariates`	`list`	list of covariates to estimate the marginal regression coefficient for.	`None`
`quantile`	`float`	represents the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.	`1`
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`WORKERS`
`seed`	`int`	pseudo random state.	`None`

Returns:

Name	Type	Description
`threshold`	`Series`	Estimated thresholds.

Source code in src/pydts/screening.py

def get_data_driven_threshold(self,
                              df,
                              covariates: List = None,
                              quantile: float = 1,
                              event_type_col: str = 'J',
                              duration_col: str = 'X',
                              pid_col: str = 'pid',
                              x0: Union[np.array, int] = 0,
                              fit_beta_kwargs: dict = {},
                              verbose: int = 2,
                              nb_workers: int = WORKERS,
                              seed: int = None):

    """
    This method calculates a data-driven threshold for each risk. It fits marginal models to the permuted data and returns the required quantile of the absolute values of the coefficients estimated from the null model.

    Args:
        df (pd.DataFrame): training data for fitting the model
        covariates (list): list of covariates to estimate the marginal regression coefficient for.
        quantile (float): represents the quantile of the absolute values of the coefficients from the null model that determines the data-driven threshold. Defaults to 1, which corresponds to the maximum absolute value of the null model's coefficients.
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        verbose (int, Optional): The verbosity level of pandaallel
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.
        seed (int): pseudo random state.

    Returns:
        threshold (pd.Series): Estimated thresholds.
    """

    if self.events is None:
        self.events = [c for c in sorted(df[event_type_col].unique()) if c != 0]
    if covariates is None:
        covariates = [col for col in df if col not in [event_type_col, duration_col, pid_col]]

    self.permute_df(df=df, event_type_col=event_type_col,
                    duration_col=duration_col, pid_col=pid_col, seed=seed)
    self.null_model_df = self.get_marginal_estimates(expanded_df=self.permuted_expanded_df,
                                                     covariates=covariates,
                                                     event_type_col=event_type_col,
                                                     duration_col=duration_col,
                                                     pid_col=pid_col,
                                                     verbose=verbose,
                                                     x0=x0,
                                                     fit_beta_kwargs=fit_beta_kwargs,
                                                     nb_workers=nb_workers)

    _params_cols = self._get_params_cols_from_res_df(self.null_model_df)
    self.threshold = np.quantile(self.null_model_df[_params_cols].abs().values, q=quantile)
    return self.threshold

`get_marginal_estimates(expanded_df, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', verbose=2, x0=0, fit_beta_kwargs={}, nb_workers=WORKERS)` ¤

This method fits a marginal model to data to each of the covariates. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

Parameters:

Name	Type	Description	Default
`expanded_df`	`DataFrame`	expanded training data for fitting the model	required
`covariates`	`list`	list of covariates to estimate the marginal regression coefficient for.	`None`
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`verbose`	`(int, Optional)`	The verbosity level of pandaallel	`2`
`x0`	`(Union[array, int], Optional)`	initial guess to pass to scipy.optimize.minimize function	`0`
`fit_beta_kwargs`	`(dict, Optional)`	Keyword arguments to pass on to the estimation procedure.	`{}`
`nb_workers`	`(int, Optional)`	The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.	`WORKERS`

Returns:

Name	Type	Description
`results_df`	`DataFrame`	Estimated parameters and standard errors of the marginal models. A concatenation of all the TwoStagesFitter.get_beta_SE() outputs.

Source code in src/pydts/screening.py

def get_marginal_estimates(self,
                           expanded_df,
                           covariates: Union[List, dict] = None,
                           event_type_col: str = 'J',
                           duration_col: str = 'X',
                           pid_col: str = 'pid',
                           verbose: int = 2,
                           x0: Union[np.array, int] = 0,
                           fit_beta_kwargs: dict = {},
                           nb_workers: int = WORKERS):

    """
    This method fits a marginal model to data to each of the covariates. Note that the expanded discrete-time data is expected as an input (see the Methods section of PyDTS documentation and pydts.utils.get_expanded_df).

    Args:
        expanded_df (pd.DataFrame): expanded training data for fitting the model
        covariates (list): list of covariates to estimate the marginal regression coefficient for.
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        verbose (int, Optional): The verbosity level of pandaallel
        x0 (Union[numpy.array, int], Optional): initial guess to pass to scipy.optimize.minimize function
        fit_beta_kwargs (dict, Optional): Keyword arguments to pass on to the estimation procedure.
        nb_workers (int, Optional): The number of workers to pandaallel. If not sepcified, defaults to the number of workers available.

    Returns:
        results_df (pd.DataFrame): Estimated parameters and standard errors of the marginal models. A concatenation of all the TwoStagesFitter.get_beta_SE() outputs.
    """

    if self.events is None:
        self.events = [c for c in sorted(expanded_df[event_type_col].unique()) if c != 0]

    if covariates is None:
        covariates = [col for col in expanded_df if col not in ([event_type_col, duration_col, pid_col, 'j_0'] +
                      [f'j_{e}' for e in self.events])]

    parallel = Parallel(n_jobs=nb_workers, verbose=verbose)
    results_df = pd.DataFrame()
    if isinstance(covariates, list):
        _results = parallel(delayed(self.fit_marginal_model)(expanded_df, cov,
                                                             event_type_col, duration_col, pid_col,
                                                             x0, fit_beta_kwargs, verbose, nb_workers)
                                                             for cov in covariates)
        results_df = pd.concat(_results)
    elif isinstance(covariates, dict):
        raise ValueError("Please provide a list of covariates for the marginal testing, including the union of all possible options across all risks.")
    # elif isinstance(covariates, dict):
    #     for event in self.events:
    #         _results = parallel(delayed(self.fit_marginal_model)(expanded_df, cov,
    #                                                              event_type_col, duration_col, pid_col,
    #                                                              x0, fit_beta_kwargs, verbose, nb_workers)
    #                                                              for cov in covariates[event])
    #         event_results_df = pd.concat(_results)
    #         results_df = pd.concat([results_df, event_results_df], axis=1)

    return results_df.astype(float)

`permute_df(df, event_type_col='J', duration_col='X', pid_col='pid', seed=None)` ¤

This method applies random permutation on the event-time and event-type columns of the training data such that the covariates are decoupled from the outcome; the permuted data follow the null model.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`event_type_col`	`str`	The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`seed`	`(int, Optional)`	pseudo random state.	`None`

Returns:

Name	Type	Description
`permuted_df`	`DataFrame`	null model data.

Source code in src/pydts/screening.py

def permute_df(self,
               df,
               event_type_col: str = 'J',
               duration_col: str = 'X',
               pid_col: str = 'pid',
               seed: int = None):

    """
    This method applies random permutation on the event-time and event-type columns of the training data such that the covariates are decoupled from the outcome; the permuted data follow the null model.

    Args:
        df (pd.DataFrame): training data for fitting the model
        event_type_col (str): The event type column name (must be a column in df), Right-censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        seed (int, Optional): pseudo random state.

    Returns:
        permuted_df (pd.DataFrame): null model data.
    """

    permuted_df = df.copy()
    np.random.seed(seed)
    permuted_index = np.random.permutation(permuted_df.index)
    permuted_df.loc[:, duration_col] = df.loc[permuted_index, duration_col].values
    permuted_df.loc[:, event_type_col] = df.loc[permuted_index, event_type_col].values
    self.permuted_df = permuted_df
    self.permuted_expanded_df = get_expanded_df(df=self.permuted_df,
                                                event_type_col=event_type_col,
                                                duration_col=duration_col,
                                                pid_col=pid_col)
    return permuted_df

Sure Independent Screening

pydts.screening.SISTwoStagesFitter() ¤

TwoStagesFitter_type = 'CoxPHFitter' instance-attribute ¤

chosen_covariates = None instance-attribute ¤

chosen_covariates_j = None instance-attribute ¤

covariates = None instance-attribute ¤

df = pd.DataFrame() instance-attribute ¤

duration_col = None instance-attribute ¤

event_type_col = None instance-attribute ¤

events = None instance-attribute ¤

expanded_df = pd.DataFrame() instance-attribute ¤

final_model = None instance-attribute ¤

marginal_estimates_df = pd.DataFrame() instance-attribute ¤

null_model_df = None instance-attribute ¤

permuted_df = pd.DataFrame() instance-attribute ¤

permuted_expanded_df = pd.DataFrame() instance-attribute ¤

pid_col = None instance-attribute ¤

threshold = None instance-attribute ¤

times = None instance-attribute ¤

_get_params_cols_from_res_df(res_df) ¤

fit(df, threshold=None, quantile=1, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None, fit_final_model=True) ¤

fit_marginal_model(expanded_df, covariate, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=1) ¤

get_data_driven_threshold(df, covariates=None, quantile=1, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None) ¤

get_marginal_estimates(expanded_df, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', verbose=2, x0=0, fit_beta_kwargs={}, nb_workers=WORKERS) ¤

permute_df(df, event_type_col='J', duration_col='X', pid_col='pid', seed=None) ¤

pydts.screening.SISTwoStagesFitterExact() ¤

TwoStagesFitter_type = 'Exact' instance-attribute ¤

chosen_covariates = None instance-attribute ¤

chosen_covariates_j = None instance-attribute ¤

covariates = None instance-attribute ¤

df = pd.DataFrame() instance-attribute ¤

duration_col = None instance-attribute ¤

event_type_col = None instance-attribute ¤

events = None instance-attribute ¤

expanded_df = pd.DataFrame() instance-attribute ¤

final_model = None instance-attribute ¤

marginal_estimates_df = pd.DataFrame() instance-attribute ¤

null_model_df = None instance-attribute ¤

permuted_df = pd.DataFrame() instance-attribute ¤

permuted_expanded_df = pd.DataFrame() instance-attribute ¤

pid_col = None instance-attribute ¤

threshold = None instance-attribute ¤

times = None instance-attribute ¤

_get_params_cols_from_res_df(res_df) ¤

fit(df, threshold=None, quantile=1, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None, fit_final_model=True) ¤

fit_marginal_model(expanded_df, covariate, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=1) ¤

get_data_driven_threshold(df, covariates=None, quantile=1, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None) ¤

get_marginal_estimates(expanded_df, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', verbose=2, x0=0, fit_beta_kwargs={}, nb_workers=WORKERS) ¤

permute_df(df, event_type_col='J', duration_col='X', pid_col='pid', seed=None) ¤

`pydts.screening.SISTwoStagesFitter()` ¤

`TwoStagesFitter_type = 'CoxPHFitter'` `instance-attribute` ¤

`chosen_covariates = None` `instance-attribute` ¤

`chosen_covariates_j = None` `instance-attribute` ¤

`covariates = None` `instance-attribute` ¤

`df = pd.DataFrame()` `instance-attribute` ¤

`duration_col = None` `instance-attribute` ¤

`event_type_col = None` `instance-attribute` ¤

`events = None` `instance-attribute` ¤

`expanded_df = pd.DataFrame()` `instance-attribute` ¤

`final_model = None` `instance-attribute` ¤

`marginal_estimates_df = pd.DataFrame()` `instance-attribute` ¤

`null_model_df = None` `instance-attribute` ¤

`permuted_df = pd.DataFrame()` `instance-attribute` ¤

`permuted_expanded_df = pd.DataFrame()` `instance-attribute` ¤

`pid_col = None` `instance-attribute` ¤

`threshold = None` `instance-attribute` ¤

`times = None` `instance-attribute` ¤

`_get_params_cols_from_res_df(res_df)` ¤

`fit(df, threshold=None, quantile=1, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None, fit_final_model=True)` ¤

`fit_marginal_model(expanded_df, covariate, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=1)` ¤

`get_data_driven_threshold(df, covariates=None, quantile=1, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None)` ¤

`get_marginal_estimates(expanded_df, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', verbose=2, x0=0, fit_beta_kwargs={}, nb_workers=WORKERS)` ¤

`permute_df(df, event_type_col='J', duration_col='X', pid_col='pid', seed=None)` ¤

`pydts.screening.SISTwoStagesFitterExact()` ¤

`TwoStagesFitter_type = 'Exact'` `instance-attribute` ¤

`chosen_covariates = None` `instance-attribute` ¤

`chosen_covariates_j = None` `instance-attribute` ¤

`covariates = None` `instance-attribute` ¤

`df = pd.DataFrame()` `instance-attribute` ¤

`duration_col = None` `instance-attribute` ¤

`event_type_col = None` `instance-attribute` ¤

`events = None` `instance-attribute` ¤

`expanded_df = pd.DataFrame()` `instance-attribute` ¤

`final_model = None` `instance-attribute` ¤

`marginal_estimates_df = pd.DataFrame()` `instance-attribute` ¤

`null_model_df = None` `instance-attribute` ¤

`permuted_df = pd.DataFrame()` `instance-attribute` ¤

`permuted_expanded_df = pd.DataFrame()` `instance-attribute` ¤

`pid_col = None` `instance-attribute` ¤

`threshold = None` `instance-attribute` ¤

`times = None` `instance-attribute` ¤

`_get_params_cols_from_res_df(res_df)` ¤

`fit(df, threshold=None, quantile=1, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None, fit_final_model=True)` ¤

`fit_marginal_model(expanded_df, covariate, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=1)` ¤

`get_data_driven_threshold(df, covariates=None, quantile=1, event_type_col='J', duration_col='X', pid_col='pid', x0=0, fit_beta_kwargs={}, verbose=2, nb_workers=WORKERS, seed=None)` ¤

`get_marginal_estimates(expanded_df, covariates=None, event_type_col='J', duration_col='X', pid_col='pid', verbose=2, x0=0, fit_beta_kwargs={}, nb_workers=WORKERS)` ¤

`permute_df(df, event_type_col='J', duration_col='X', pid_col='pid', seed=None)` ¤