Data Expansion Procedure of Lee et al. (2018)

`pydts.fitters.DataExpansionFitter()` ¤

Bases: ExpansionBasedFitter

This class implements the estimation procedure of Lee et al. (2018) [1]. See also the Example section.

Source code in src/pydts/fitters.py

def __init__(self):
    super().__init__()
    self.models_kwargs = dict(family=sm.families.Binomial())

`covariates = None` `instance-attribute` ¤

`duration_col = None` `instance-attribute` ¤

`event_models = {}` `instance-attribute` ¤

`event_type_col = None` `instance-attribute` ¤

`events = None` `instance-attribute` ¤

`expanded_df = pd.DataFrame()` `instance-attribute` ¤

`formula = None` `instance-attribute` ¤

`models_kwargs = dict(family=sm.families.Binomial())` `instance-attribute` ¤

`pid_col = None` `instance-attribute` ¤

`times = None` `instance-attribute` ¤

`_expand_data(df, event_type_col, duration_col, pid_col)` ¤

This method expands the raw data as explained in Lee et al. 2018

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	Dataframe to expand.	required
`event_type_col`	`str`	The event type column name (must be a column in df), Right censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	required
`duration_col`	`str`	Last follow up time column name (must be a column in df).	required
`pid_col`	`str`	Sample ID column name (must be a column in df).	required

Returns:

Type	Description
`DataFrame`	Expanded df (pandas.DataFrame): the expanded dataframe.

Source code in src/pydts/base_fitters.py

def _expand_data(self,
                 df: pd.DataFrame,
                 event_type_col: str,
                 duration_col: str,
                 pid_col: str) -> pd.DataFrame:
    """
    This method expands the raw data as explained in Lee et al. 2018

    Args:
        df (pandas.DataFrame): Dataframe to expand.
        event_type_col (str): The event type column name (must be a column in df),
                              Right censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).

    Returns:
        Expanded df (pandas.DataFrame): the expanded dataframe.
    """
    self._validate_cols(df, event_type_col, duration_col, pid_col)
    return get_expanded_df(df=df, event_type_col=event_type_col, duration_col=duration_col, pid_col=pid_col)

`_fit_event(model_fit_kwargs={})` ¤

This method fits a model for a GLM model for a specific event.

Parameters:

Name	Type	Description	Default
`model_fit_kwargs`	`(dict, Optional)`	Keyword arguments to pass to model.fit() method.	`{}`

Returns:

Type	Description
	fitted GLM model

Source code in src/pydts/fitters.py

def _fit_event(self, model_fit_kwargs={}):
    """
    This method fits a model for a GLM model for a specific event.

    Args:
        model_fit_kwargs (dict, Optional): Keyword arguments to pass to model.fit() method.

    Returns:
        fitted GLM model
    """
    model = sm.GLM.from_formula(formula=self.formula, data=self.expanded_df, **self.models_kwargs)
    return model.fit(**model_fit_kwargs)

`_validate_cols(df, event_type_col, duration_col, pid_col)` ¤

Source code in src/pydts/base_fitters.py

def _validate_cols(self, df, event_type_col, duration_col, pid_col):
    assert event_type_col in df.columns, f'Event type column is missing from df: {event_type_col}'
    assert duration_col in df.columns, f'Duration column is missing from df: {duration_col}'
    assert pid_col in df.columns, f'Observation ID column is missing from df: {pid_col}'

`_validate_covariates_in_df(df)` ¤

Source code in src/pydts/base_fitters.py

def _validate_covariates_in_df(self, df):
    cov_not_fitted = []
    if isinstance(self.covariates, list):
        cov_not_fitted = [cov for cov in self.covariates if cov not in df.columns]
    elif isinstance(self.covariates, dict):
        for event in self.events:
            event_cov_not_fitted = [cov for cov in self.covariates[event] if cov not in df.columns]
            cov_not_fitted.extend(event_cov_not_fitted)
    assert len(cov_not_fitted) == 0, \
        f"Cannot predict - required covariates are missing from df: {cov_not_fitted}"

`_validate_t(t, return_iter=True)` ¤

Source code in src/pydts/base_fitters.py

def _validate_t(self, t, return_iter=True):
    _t = np.array([t]) if not isinstance(t, Iterable) else t
    t_i_not_fitted = [t_i for t_i in _t if (t_i not in self.times)]
    assert len(t_i_not_fitted) == 0, \
        f"Cannot predict for times which were not included during .fit(): {t_i_not_fitted}"
    if return_iter:
        return _t
    return t

`evaluate(test_df, oracle_col='T', **kwargs)` ¤

Source code in src/pydts/base_fitters.py

def evaluate(self, test_df: pd.DataFrame, oracle_col: str = 'T', **kwargs) -> float:
    raise NotImplementedError

`fit(df, event_type_col='J', duration_col='X', pid_col='pid', skip_expansion=False, covariates=None, formula=None, models_kwargs=None, model_fit_kwargs={})` ¤

This method fits a model to the discrete data.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	training data for fitting the model	required
`event_type_col`	`str`	The event type column name (must be a column in df), Right censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.	`'J'`
`duration_col`	`str`	Last follow up time column name (must be a column in df).	`'X'`
`pid_col`	`str`	Sample ID column name (must be a column in df).	`'pid'`
`skip_expansion`	`boolean`	Skips the dataframe expansion step. Use this option only if the provided dataframe (df) is already correctly expanded. When set to True, the df is expected to be in the format produced by the pydts.utils.get_expanded_df() method, as if it were applied to the unexpanded data.	`False`
`covariates`	`(list, Optional)`	A list of covariates, all must be columns in df. Defaults to all the columns of df except event_type_col, duration_col, and pid_col.	`None`
`formula`	`(str, Optional)`	Model formula to be fitted. Patsy format string.	`None`
`models_kwargs`	`(dict, Optional)`	Keyword arguments to pass to model instance initiation.	`None`
`model_fit_kwargs`	`(dict, Optional)`	Keyword arguments to pass to model.fit() method.	`{}`

Returns:

Name	Type	Description
`event_models`	`dict`	Fitted models dictionary. Keys - event names, Values - fitted models for the event.

Source code in src/pydts/fitters.py

def fit(self,
        df: pd.DataFrame,
        event_type_col: str = 'J',
        duration_col: str = 'X',
        pid_col: str = 'pid',
        skip_expansion: bool = False,
        covariates: Optional[list] = None,
        formula: Optional[str] = None,
        models_kwargs: Optional[dict] = None,
        model_fit_kwargs: Optional[dict] = {}) -> dict:
    """
    This method fits a model to the discrete data.

    Args:
        df (pd.DataFrame): training data for fitting the model
        event_type_col (str): The event type column name (must be a column in df), Right censored sample (i) is indicated by event value 0, df.loc[i, event_type_col] = 0.
        duration_col (str): Last follow up time column name (must be a column in df).
        pid_col (str): Sample ID column name (must be a column in df).
        skip_expansion (boolean): Skips the dataframe expansion step. Use this option only if the provided dataframe (df) is already correctly expanded. When set to True, the df is expected to be in the format produced by the pydts.utils.get_expanded_df() method, as if it were applied to the unexpanded data.
        covariates (list, Optional): A list of covariates, all must be columns in df. Defaults to all the columns of df except event_type_col, duration_col, and pid_col.
        formula (str, Optional): Model formula to be fitted. Patsy format string.
        models_kwargs (dict, Optional): Keyword arguments to pass to model instance initiation.
        model_fit_kwargs (dict, Optional): Keyword arguments to pass to model.fit() method.

    Returns:
        event_models (dict): Fitted models dictionary. Keys - event names, Values - fitted models for the event.
    """

    if models_kwargs is not None:
        self.models_kwargs = models_kwargs

    if 'C' in df.columns:
        raise ValueError('C is an invalid column name, to avoid errors with categorical symbol C() in formula')
    self._validate_cols(df, event_type_col, duration_col, pid_col)
    if covariates is not None:
        cov_not_in_df = [cov for cov in covariates if cov not in df.columns]
        if len(cov_not_in_df) > 0:
            raise ValueError(f"Error during fit - missing covariates from df: {cov_not_in_df}")

    self.events = [c for c in sorted(df[event_type_col].unique()) if c != 0]
    self.covariates = [col for col in df if col not in [event_type_col, duration_col, pid_col]] \
                      if covariates is None else covariates
    self.times = sorted(df[duration_col].unique())

    if not skip_expansion:
        self.expanded_df = self._expand_data(df=df, event_type_col=event_type_col, duration_col=duration_col,
                                             pid_col=pid_col)
    else:
        print('Skipping data expansion step, only use this option if the provided dataframe (df) is already correctly expanded.')
        self.expanded_df = df

    for event in self.events:
        cov = ' + '.join(self.covariates)
        _formula = f'j_{event} ~ {formula}' if formula is not None else \
            f'j_{event} ~ {cov} + C({duration_col}) -1 '
        self.formula = _formula
        self.event_models[event] = self._fit_event(model_fit_kwargs=model_fit_kwargs)
    return self.event_models

`get_alpha_df()` ¤

This function returns the Alpha coefficients and their Standard Errors for all the events.

Returns:

Name	Type	Description
`se_df`	`DataFrame`	Alpha coefficients and Standard Errors Dataframe

Source code in src/pydts/fitters.py

def get_alpha_df(self):
    """
    This function returns the Alpha coefficients and their Standard Errors for all the events.

    Returns:
        se_df (pandas.DataFrame): Alpha coefficients and Standard Errors Dataframe
    """

    full_table = pd.DataFrame()
    for event in self.events:
        summary = self.event_models[event].summary()
        summary_df = pd.DataFrame([x.split(',') for x in summary.tables[1].as_csv().split('\n')])
        summary_df.columns = summary_df.iloc[0]
        summary_df = summary_df.iloc[1:].set_index(summary_df.columns[0])
        summary_df.columns = pd.MultiIndex.from_product([[event], summary_df.columns])
        full_table = pd.concat([full_table, summary_df.iloc[:-len(self.covariates)-1]], axis=1)
    return full_table

`get_beta_SE()` ¤

This function returns the Beta coefficients and their Standard Errors for all the events.

Returns:

Name	Type	Description
`se_df`	`DataFrame`	Beta coefficients and Standard Errors Dataframe

Source code in src/pydts/fitters.py

def get_beta_SE(self):
    """
    This function returns the Beta coefficients and their Standard Errors for all the events.

    Returns:
        se_df (pandas.DataFrame): Beta coefficients and Standard Errors Dataframe
    """

    full_table = pd.DataFrame()
    for event in self.events:
        summary = self.event_models[event].summary()
        summary_df = pd.DataFrame([x.split(',') for x in summary.tables[1].as_csv().split('\n')])
        summary_df.columns = summary_df.iloc[0]
        summary_df = summary_df.iloc[1:].set_index(summary_df.columns[0])
        summary_df.columns = pd.MultiIndex.from_product([[event], summary_df.columns])
        full_table = pd.concat([full_table, summary_df.iloc[-len(self.covariates):]], axis=1)
    return full_table

`predict(df, **kwargs)` ¤

Source code in src/pydts/base_fitters.py

def predict(self, df: pd.DataFrame, **kwargs) -> pd.DataFrame:
    raise NotImplementedError

`predict_cumulative_incident_function(df)` ¤

This function adds columns of the predicted hazard function, overall survival, probabilities of event occurance and cumulative incident function (CIF) to the given dataframe.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe with covariates columns included	required

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with additional prediction columns

Source code in src/pydts/base_fitters.py

def predict_cumulative_incident_function(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    This function adds columns of the predicted hazard function, overall survival, probabilities of event occurance
    and cumulative incident function (CIF) to the given dataframe.

    Args:
        df (pandas.DataFrame): dataframe with covariates columns included

    Returns:
        df (pandas.DataFrame): dataframe with additional prediction columns

    """
    self._validate_covariates_in_df(df.head())

    for event in self.events:
        if f'cif_j{event}_at_t{self.times[-2]}' not in df.columns:
            df = self.predict_event_cumulative_incident_function(df=df, event=event)
    return df

`predict_event_cumulative_incident_function(df, event)` ¤

This function adds a specific event columns of the predicted hazard function, overall survival, probabilities of event occurance and cumulative incident function (CIF) to the given dataframe.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe with covariates columns included	required
`event`	`Union[str, int]`	event name	required

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with additional prediction columns

Source code in src/pydts/base_fitters.py

def predict_event_cumulative_incident_function(self, df: pd.DataFrame, event: Union[str, int]) -> pd.DataFrame:
    """
    This function adds a specific event columns of the predicted hazard function, overall survival, probabilities
    of event occurance and cumulative incident function (CIF) to the given dataframe.

    Args:
        df (pandas.DataFrame): dataframe with covariates columns included
        event (Union[str, int]): event name

    Returns:
        df (pandas.DataFrame): dataframe with additional prediction columns

    """
    assert event in self.events, \
        f"Cannot predict for event {event} - it was not included during .fit()"
    self._validate_covariates_in_df(df.head())

    if f'prob_j{event}_at_t{self.times[-2]}' not in df.columns:
        df = self.predict_prob_events(df=df)
    cols = [f'prob_j{event}_at_t{t}' for t in self.times[:-1]]
    cif_df = df[cols].cumsum(axis=1)
    cif_df.columns = [f'cif_j{event}_at_t{t}' for t in self.times[:-1]]
    df = pd.concat([df, cif_df], axis=1)
    return df

`predict_hazard_all(df)` ¤

This function calculates the hazard for all the events at all time values included in the training set for each event.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	samples to predict for	required

Returns:

Name	Type	Description
`df`	`DataFrame`	samples with the prediction columns

Source code in src/pydts/base_fitters.py

def predict_hazard_all(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    This function calculates the hazard for all the events at all time values included in the training set for each
    event.

    Args:
        df (pd.DataFrame): samples to predict for

    Returns:
        df (pd.DataFrame): samples with the prediction columns

    """
    self._validate_covariates_in_df(df.head())
    df = self.predict_hazard_t(df, t=self.times[:-1])
    return df

`predict_hazard_jt(df, event, t, n_jobs=-1)` ¤

This method calculates the hazard for the given event at the given time values if they were included in the training set of the event.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	samples to predict for	required
`event`	`Union[str, int]`	event name	required
`t`	`array`	times to calculate the hazard for	required
`n_jobs`	`int`	number of CPUs to use, defualt to every available CPU	`-1`

Returns:

Name	Type	Description
`df`	`DataFrame`	samples with the prediction columns

Source code in src/pydts/fitters.py

def predict_hazard_jt(self,
                      df: pd.DataFrame,
                      event: Union[str, int],
                      t: Union[Iterable, int],
                      n_jobs: int = -1) -> pd.DataFrame:
    """
    This method calculates the hazard for the given event at the given time values if they were included in the training set of the event.

    Args:
        df (pd.DataFrame): samples to predict for
        event (Union[str, int]): event name
        t (np.array): times to calculate the hazard for
        n_jobs: number of CPUs to use, defualt to every available CPU

    Returns:
        df (pd.DataFrame): samples with the prediction columns
    """
    t = self._validate_t(t, return_iter=True)
    assert event in self.events, \
        f"Cannot predict for event {event} - it was not included during .fit()"
    self._validate_covariates_in_df(df.head())

    _t = np.array([t_i for t_i in t if (f'hazard_j{event}_t{t_i}' not in df.columns)])
    if len(_t) == 0:
        return df

    temp_df = df.copy()
    model = self.event_models[event]
    res = Parallel(n_jobs=n_jobs)(delayed(model.predict)(df[self.covariates].assign(X=c)) for c in t)
    temp_hazard_df = pd.concat(res, axis=1)
    temp_df[[f'hazard_j{event}_t{c_}' for c_ in t]] = temp_hazard_df.values
    return temp_df

`predict_hazard_t(df, t)` ¤

This function calculates the hazard for all the events at the requested time values if they were included in the training set of each event.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	samples to predict for	required
`t`	`(int, array)`	times to calculate the hazard for	required

Returns:

Name	Type	Description
`df`	`DataFrame`	samples with the prediction columns

Source code in src/pydts/base_fitters.py

def predict_hazard_t(self, df: pd.DataFrame, t: Union[int, np.array]) -> pd.DataFrame:
    """
    This function calculates the hazard for all the events at the requested time values if they were included in
    the training set of each event.

    Args:
        df (pd.DataFrame): samples to predict for
        t (int, np.array): times to calculate the hazard for

    Returns:
        df (pd.DataFrame): samples with the prediction columns
    """
    t = self._validate_t(t)
    self._validate_covariates_in_df(df.head())

    for event, model in self.event_models.items():
        df = self.predict_hazard_jt(df=df, event=event, t=t)
    return df

`predict_marginal_prob_all_events(df)` ¤

This function calculates the marginal probability per event given the covariates for all the events.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe with covariates columns included	required

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with additional prediction columns

Source code in src/pydts/base_fitters.py

def predict_marginal_prob_all_events(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    This function calculates the marginal probability per event given the covariates for all the events.

    Args:
        df (pandas.DataFrame): dataframe with covariates columns included

    Returns:
        df (pandas.DataFrame): dataframe with additional prediction columns
    """
    self._validate_covariates_in_df(df.head())
    for event in self.events:
        df = self.predict_marginal_prob_event_j(df=df, event=event)
    return df

`predict_marginal_prob_event_j(df, event)` ¤

This function calculates the marginal probability of an event given the covariates.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe with covariates columns included	required
`event`	`Union[str, int]`	event name	required

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with additional prediction columns

Source code in src/pydts/base_fitters.py

def predict_marginal_prob_event_j(self, df: pd.DataFrame, event: Union[str, int]) -> pd.DataFrame:
    """
    This function calculates the marginal probability of an event given the covariates.

    Args:
        df (pandas.DataFrame): dataframe with covariates columns included
        event (Union[str, int]): event name

    Returns:
        df (pandas.DataFrame): dataframe with additional prediction columns
    """

    assert event in self.events, \
        f"Cannot predict for event {event} - it was not included during .fit()"
    self._validate_covariates_in_df(df.head())

    if f'prob_j{event}_at_t{self.times[-2]}' not in df.columns:
        df = self.predict_prob_event_j_all(df=df, event=event)
    cols = [f'prob_j{event}_at_t{_t}' for _t in self.times[:-1]]
    marginal_prob = df[cols].sum(axis=1)
    marginal_prob.name = f'marginal_prob_j{event}'
    return pd.concat([df, marginal_prob], axis=1)

`predict_overall_survival(df, t=None, return_hazards=False)` ¤

This function adds columns of the overall survival until time t. Args: df (pandas.DataFrame): dataframe with covariates columns t (int): time return_hazards (bool): if to keep the hazard columns

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with the additional overall survival columns

Source code in src/pydts/base_fitters.py

def predict_overall_survival(self,
                             df: pd.DataFrame,
                             t: int = None,
                             return_hazards: bool = False) -> pd.DataFrame:
    """
    This function adds columns of the overall survival until time t.
    Args:
        df (pandas.DataFrame): dataframe with covariates columns
        t (int): time
        return_hazards (bool): if to keep the hazard columns

    Returns:
        df (pandas.DataFrame): dataframe with the additional overall survival columns

    """
    if t is not None:
        self._validate_t(t, return_iter=False)
    self._validate_covariates_in_df(df.head())

    all_hazards = self.predict_hazard_all(df)
    _times = self.times[:-1] if t is None else [_t for _t in self.times[:-1] if _t <= t]
    overall = pd.DataFrame()
    for t_i in _times:
        cols = [f'hazard_j{e}_t{t_i}' for e in self.events]
        t_i_hazard = 1 - all_hazards[cols].sum(axis=1)
        t_i_hazard.name = f'overall_survival_t{t_i}'
        overall = pd.concat([overall, t_i_hazard], axis=1)
    overall = pd.concat([df, overall.cumprod(axis=1)], axis=1)

    if return_hazards:
        cols = all_hazards.columns[all_hazards.columns.str.startswith("hazard_")]
        cols = cols.difference(overall.columns)
        if len(cols) > 0:
            overall = pd.concat([overall, all_hazards[cols]], axis=1)
    return overall

`predict_prob_event_j_all(df, event)` ¤

This function adds columns of a specific event occurrence probabilities.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe with covariates columns	required
`event`	`Union[str, int]`	event name	required

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with probabilities columns

Source code in src/pydts/base_fitters.py

def predict_prob_event_j_all(self, df: pd.DataFrame, event: Union[str, int]) -> pd.DataFrame:
    """
    This function adds columns of a specific event occurrence probabilities.

    Args:
        df (pandas.DataFrame): dataframe with covariates columns
        event (Union[str, int]): event name

    Returns:
        df (pandas.DataFrame): dataframe with probabilities columns

    """
    assert event in self.events, \
        f"Cannot predict for event {event} - it was not included during .fit()"
    self._validate_covariates_in_df(df.head())

    if f'overall_survival_t{self.times[-2]}' not in df.columns:
        df = self.predict_overall_survival(df, return_hazards=True)
    for t in self.times[:-1]:
        if f'prob_j{event}_at_t{t}' not in df.columns:
            df = self.predict_prob_event_j_at_t(df=df, event=event, t=t)
    return df

`predict_prob_event_j_at_t(df, event, t)` ¤

This function adds a column with probability of occurance of a specific event at a specific a time.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe with covariates columns	required
`event`	`Union[str, int]`	event name	required
`t`	`int`	time	required

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe an additional probability column

Source code in src/pydts/base_fitters.py

def predict_prob_event_j_at_t(self, df: pd.DataFrame, event: Union[str, int], t: int) -> pd.DataFrame:
    """
    This function adds a column with probability of occurance of a specific event at a specific a time.

    Args:
        df (pandas.DataFrame): dataframe with covariates columns
        event (Union[str, int]): event name
        t (int): time

    Returns:
        df (pandas.DataFrame): dataframe an additional probability column

    """
    assert event in self.events, \
        f"Cannot predict for event {event} - it was not included during .fit()"
    self._validate_t(t, return_iter=False)
    self._validate_covariates_in_df(df.head())

    if f'prob_j{event}_at_t{t}' not in df.columns:
        if t == 1:
            if f'hazard_j{event}_t{t}' not in df.columns:
                df = self.predict_hazard_jt(df=df, event=event, t=t)
            df[f'prob_j{event}_at_t{t}'] = df[f'hazard_j{event}_t{t}']
            return df
        elif not f'overall_survival_t{t - 1}' in df.columns:
            df = self.predict_overall_survival(df, t=t, return_hazards=True)
        elif not f'hazard_j{event}_t{t}' in df.columns:
            df = self.predict_hazard_t(df, t=np.array([_t for _t in self.times[:-1] if _t <= t]))
        df[f'prob_j{event}_at_t{t}'] = df[f'overall_survival_t{t - 1}'] * df[f'hazard_j{event}_t{t}']
    return df

`predict_prob_events(df)` ¤

This function adds columns of all the events occurance probabilities. Args: df (pandas.DataFrame): dataframe with covariates columns

Returns:

Name	Type	Description
`df`	`DataFrame`	dataframe with probabilities columns

Source code in src/pydts/base_fitters.py

def predict_prob_events(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    This function adds columns of all the events occurance probabilities.
    Args:
        df (pandas.DataFrame): dataframe with covariates columns

    Returns:
        df (pandas.DataFrame): dataframe with probabilities columns

    """
    self._validate_covariates_in_df(df.head())

    for event in self.events:
        df = self.predict_prob_event_j_all(df=df, event=event)
    return df

`print_summary(summary_func='summary', summary_kwargs={})` ¤

This method prints the summary of the fitted models for all the events.

Parameters:

Name	Type	Description	Default
`summary_func`	`(str, Optional)`	print summary method of the fitted model type ("summary", "print_summary").	`'summary'`
`summary_kwargs`	`(dict, Optional)`	Keyword arguments to pass to the model summary function.	`{}`

Returns:

Type	Description
`None`	None

Source code in src/pydts/fitters.py

def print_summary(self,
                  summary_func: str = "summary",
                  summary_kwargs: dict = {}) -> None:
    """
    This method prints the summary of the fitted models for all the events.

    Args:
        summary_func (str, Optional): print summary method of the fitted model type ("summary", "print_summary").
        summary_kwargs (dict, Optional): Keyword arguments to pass to the model summary function.

    Returns:
        None
    """
    for event, model in self.event_models.items():
        _summary_func = getattr(model, summary_func, None)
        if _summary_func is not None:
            print(f'\n\nModel summary for event: {event}')
            print(_summary_func(**summary_kwargs))
        else:
            print(f'Not {summary_func} function in event {event} model')

Data Expansion Procedure of Lee et al. (2018)

pydts.fitters.DataExpansionFitter() ¤

covariates = None instance-attribute ¤

duration_col = None instance-attribute ¤

event_models = {} instance-attribute ¤

event_type_col = None instance-attribute ¤

events = None instance-attribute ¤

expanded_df = pd.DataFrame() instance-attribute ¤

formula = None instance-attribute ¤

models_kwargs = dict(family=sm.families.Binomial()) instance-attribute ¤

pid_col = None instance-attribute ¤

times = None instance-attribute ¤

_expand_data(df, event_type_col, duration_col, pid_col) ¤

_fit_event(model_fit_kwargs={}) ¤

_validate_cols(df, event_type_col, duration_col, pid_col) ¤

_validate_covariates_in_df(df) ¤

_validate_t(t, return_iter=True) ¤

evaluate(test_df, oracle_col='T', **kwargs) ¤

fit(df, event_type_col='J', duration_col='X', pid_col='pid', skip_expansion=False, covariates=None, formula=None, models_kwargs=None, model_fit_kwargs={}) ¤

get_alpha_df() ¤

get_beta_SE() ¤

predict(df, **kwargs) ¤

predict_cumulative_incident_function(df) ¤

predict_event_cumulative_incident_function(df, event) ¤

predict_hazard_all(df) ¤

predict_hazard_jt(df, event, t, n_jobs=-1) ¤

predict_hazard_t(df, t) ¤

predict_marginal_prob_all_events(df) ¤

predict_marginal_prob_event_j(df, event) ¤

predict_overall_survival(df, t=None, return_hazards=False) ¤

predict_prob_event_j_all(df, event) ¤

predict_prob_event_j_at_t(df, event, t) ¤

predict_prob_events(df) ¤

print_summary(summary_func='summary', summary_kwargs={}) ¤

`pydts.fitters.DataExpansionFitter()` ¤

`covariates = None` `instance-attribute` ¤

`duration_col = None` `instance-attribute` ¤

`event_models = {}` `instance-attribute` ¤

`event_type_col = None` `instance-attribute` ¤

`events = None` `instance-attribute` ¤

`expanded_df = pd.DataFrame()` `instance-attribute` ¤

`formula = None` `instance-attribute` ¤

`models_kwargs = dict(family=sm.families.Binomial())` `instance-attribute` ¤

`pid_col = None` `instance-attribute` ¤

`times = None` `instance-attribute` ¤

`_expand_data(df, event_type_col, duration_col, pid_col)` ¤

`_fit_event(model_fit_kwargs={})` ¤

`_validate_cols(df, event_type_col, duration_col, pid_col)` ¤

`_validate_covariates_in_df(df)` ¤

`_validate_t(t, return_iter=True)` ¤

`evaluate(test_df, oracle_col='T', **kwargs)` ¤

`fit(df, event_type_col='J', duration_col='X', pid_col='pid', skip_expansion=False, covariates=None, formula=None, models_kwargs=None, model_fit_kwargs={})` ¤

`get_alpha_df()` ¤

`get_beta_SE()` ¤

`predict(df, **kwargs)` ¤

`predict_cumulative_incident_function(df)` ¤

`predict_event_cumulative_incident_function(df, event)` ¤

`predict_hazard_all(df)` ¤

`predict_hazard_jt(df, event, t, n_jobs=-1)` ¤

`predict_hazard_t(df, t)` ¤

`predict_marginal_prob_all_events(df)` ¤

`predict_marginal_prob_event_j(df, event)` ¤

`predict_overall_survival(df, t=None, return_hazards=False)` ¤

`predict_prob_event_j_all(df, event)` ¤

`predict_prob_event_j_at_t(df, event, t)` ¤

`predict_prob_events(df)` ¤

`print_summary(summary_func='summary', summary_kwargs={})` ¤