%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from matplotlib import pyplot as plt
import pandas as pd
churn_data = pd.read_csv('https://raw.githubusercontent.com/'
'treselle-systems/customer_churn_analysis/'
'master/WA_Fn-UseC_-Telco-Customer-Churn.csv')
churn_data.head()
churn_data = churn_data.set_index('customerID')
churn_data = churn_data.drop(['TotalCharges'], axis=1)
# The dataset is naturally heirarchical: some columns only apply to some users. Ex, if you don't have internet
# then the column OnlineBackup isn't applicable, as it's value is "No internet service". We
# are going to map this back to No. We will treat the hierachical nature by stratifying on the
# different services a user may have.
churn_data = churn_data.applymap(lambda x: "No" if str(x).startswith("No ") else x)
strata_cols = ['InternetService', 'StreamingMovies', 'StreamingTV', 'PhoneService']
df = pd.get_dummies(churn_data,
columns=churn_data.columns.difference(strata_cols + ['tenure', 'MonthlyCharges']),
drop_first=True)
from lifelines import CoxPHFitter
cph = CoxPHFitter().fit(df, 'tenure', 'Churn_Yes', strata=strata_cols)
cph
cph.print_summary()
ax = plt.subplots(figsize=(8, 6))
cph.plot(ax=ax[1])
cph.plot_covariate_groups('Contract_Two year', values=[0, 1]);