%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from matplotlib import pyplot as plt
import pandas as pd
churn_data = pd.read_csv('https://raw.githubusercontent.com/'
'treselle-systems/customer_churn_analysis/'
'master/WA_Fn-UseC_-Telco-Customer-Churn.csv')
churn_data.head()
churn_data = churn_data.set_index('customerID')
churn_data = churn_data.drop(['TotalCharges'], axis=1)
# The dataset is naturally hierarchical: some columns only apply to some users. Ex, if you don't have internet
# then the column OnlineBackup isn't applicable, as it's value is "No internet service". We
# are going to map this back to No. We will treat the hierachical nature by stratifying on the
# different services a user may have.
churn_data = churn_data.applymap(lambda x: "No" if str(x).startswith("No ") else x)
churn_data['Churn'] = (churn_data['Churn'] == "Yes")
strata_cols = ['InternetService', 'StreamingMovies', 'StreamingTV', 'PhoneService']
print(churn_data.columns)
from lifelines import CoxPHFitter
cph = CoxPHFitter().fit(churn_data, 'tenure', 'Churn',
formula="gender + SeniorCitizen + Partner + Dependents + MultipleLines + OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + Contract + PaperlessBilling + PaymentMethod + MonthlyCharges",
strata=strata_cols)
cph
cph.print_summary()
ax = plt.subplots(figsize=(8, 6))
cph.plot(ax=ax[1])
cph.plot_partial_effects_on_outcome('Contract', values=["Month-to-month", "One year", "Two year"], plot_baseline=False);