import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN
import warnings
warnings.filterwarnings("ignore")


df = pd.read_csv("cleandata.csv")


df.head()


df.drop('Unnamed: 0', axis=1, inplace=True)


df.head()


x = df.drop('Churn', axis=1)
y = df['Churn']


x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)


model = DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)


model.fit(x_train,y_train)

DecisionTreeClassifier(max_depth=6, min_samples_leaf=8, random_state=100)

DecisionTreeClassifier(max_depth=6, min_samples_leaf=8, random_state=100)


predict = model.predict(x_test)
predict

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)


# Print accuracy score
accuracy = accuracy_score(y_test, predict)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:\n", classification_report(y_test, predict))

# Print confusion matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, predict))

Accuracy: 0.7742546142924751
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85      1564
           1       0.57      0.51      0.54       549

    accuracy                           0.77      2113
   macro avg       0.70      0.69      0.69      2113
weighted avg       0.77      0.77      0.77      2113

Confusion Matrix:
 [[1358  206]
 [ 271  278]]


from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


model_rf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=6, min_samples_leaf=8)
model_rf.fit(x_train, y_train)

RandomForestClassifier(max_depth=6, min_samples_leaf=8)

RandomForestClassifier(max_depth=6, min_samples_leaf=8)


model_gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
model_gbm.fit(x_train, y_train)

GradientBoostingClassifier()

GradientBoostingClassifier()


model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(x_train, y_train)

KNeighborsClassifier()

KNeighborsClassifier()


model_svm = SVC(kernel='rbf', C=1.0, probability=True)
model_svm.fit(x_train, y_train)

SVC(probability=True)

SVC(probability=True)


models = [model_rf, model_gbm, model_knn, model_svm]
model_names = ['Random Forest', 'GBM',  'KNN', 'SVM']

for model, name in zip(models, model_names):
    y_pred = model.predict(x_test)
    accuracy = model.score(x_test, y_test)
    print(f"Classifier: {name}")
    print(f"Accuracy: {accuracy:.2f}")
    print(metrics.classification_report(y_test, y_pred))
    print("------------")

Classifier: Random Forest
Accuracy: 0.79
              precision    recall  f1-score   support

           0       0.81      0.93      0.87      1564
           1       0.68      0.40      0.50       549

    accuracy                           0.79      2113
   macro avg       0.75      0.66      0.68      2113
weighted avg       0.78      0.79      0.77      2113

------------
Classifier: GBM
Accuracy: 0.80
              precision    recall  f1-score   support

           0       0.84      0.91      0.87      1564
           1       0.66      0.49      0.56       549

    accuracy                           0.80      2113
   macro avg       0.75      0.70      0.72      2113
weighted avg       0.79      0.80      0.79      2113

------------
Classifier: KNN
Accuracy: 0.77
              precision    recall  f1-score   support

           0       0.82      0.89      0.85      1564
           1       0.58      0.44      0.50       549

    accuracy                           0.77      2113
   macro avg       0.70      0.66      0.68      2113
weighted avg       0.76      0.77      0.76      2113

------------
Classifier: SVM
Accuracy: 0.74
              precision    recall  f1-score   support

           0       0.74      1.00      0.85      1564
           1       0.00      0.00      0.00       549

    accuracy                           0.74      2113
   macro avg       0.37      0.50      0.43      2113
weighted avg       0.55      0.74      0.63      2113

------------


sm = SMOTEENN()
x_resampled, y_resampled = sm.fit_resample(x_train, y_train)


xr_train,xr_test,yr_train,yr_test=train_test_split(x_resampled, y_resampled,test_size=0.3)


model_smote = DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)


model_smote.fit(xr_train,yr_train)

yr_pred_smote = model_smote.predict(xr_test)

model_score_r = model_smote.score(xr_test, yr_test)

print(round(model_score_r, 2))
print(metrics.classification_report(yr_test, yr_pred_smote))
print(metrics.confusion_matrix(yr_test, yr_pred_smote))

0.9
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       585
           1       0.95      0.85      0.90       632

    accuracy                           0.90      1217
   macro avg       0.90      0.90      0.90      1217
weighted avg       0.90      0.90      0.90      1217

[[554  31]
 [ 93 539]]


model_rf_smote = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=6, min_samples_leaf=8)


model_rf_smote.fit(xr_train,yr_train)

yr_pred_smote = model_rf_smote.predict(xr_test)

model_score_r = model_rf_smote.score(xr_test, yr_test)

print(round(model_score_r, 2))
print(metrics.classification_report(yr_test, yr_pred_smote))
print(metrics.confusion_matrix(yr_test, yr_pred_smote))

0.94
              precision    recall  f1-score   support

           0       0.96      0.92      0.94       585
           1       0.93      0.96      0.94       632

    accuracy                           0.94      1217
   macro avg       0.94      0.94      0.94      1217
weighted avg       0.94      0.94      0.94      1217

[[537  48]
 [ 25 607]]


model_gbm_smote = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5)


model_gbm_smote.fit(xr_train, yr_train)

yr_pred_smote = model_gbm_smote.predict(xr_test)

model_score_r = model_gbm_smote.score(xr_test, yr_test)

print(round(model_score_r, 2))
print(metrics.classification_report(yr_test, yr_pred_smote))
print(metrics.confusion_matrix(yr_test, yr_pred_smote))

0.96
              precision    recall  f1-score   support

           0       0.96      0.95      0.96       585
           1       0.96      0.96      0.96       632

    accuracy                           0.96      1217
   macro avg       0.96      0.96      0.96      1217
weighted avg       0.96      0.96      0.96      1217

[[558  27]
 [ 25 607]]


model_knn_smote = KNeighborsClassifier(n_neighbors=5)


model_knn_smote.fit(xr_train,yr_train)

yr_pred_smote = model_knn_smote.predict(xr_test)

model_score_r = model_knn_smote.score(xr_test, yr_test)

print(round(model_score_r, 2))
print(metrics.classification_report(yr_test, yr_pred_smote))
print(metrics.confusion_matrix(yr_test, yr_pred_smote))

0.95
              precision    recall  f1-score   support

           0       0.94      0.95      0.95       585
           1       0.96      0.94      0.95       632

    accuracy                           0.95      1217
   macro avg       0.95      0.95      0.95      1217
weighted avg       0.95      0.95      0.95      1217

[[558  27]
 [ 36 596]]


import pickle


knnmodel = 'knnchurnmodel.sav'


pickle.dump(model_knn_smote, open(knnmodel, 'wb'))


rfmodel = 'rfchurnmodel.sav'


pickle.dump(model_rf_smote, open(rfmodel, 'wb'))


gbm_model = 'gbmchurnmodel.sav'


pickle.dump(model_gbm_smote, open(gbm_model, 'wb'))


load_model = pickle.load(open(gbm_model, 'rb'))


gbm_model_score = load_model.score(xr_test, yr_test)


gbm_model_score

0.9572719802793755


load_model = pickle.load(open(rfmodel, 'rb'))


rf_model_score = load_model.score(xr_test, yr_test)


rf_model_score

0.9400164338537387


load_model = pickle.load(open(knnmodel, 'rb'))


knn_model_score = load_model.score(xr_test, yr_test)


knn_model_score

0.9482333607230896

	Unnamed: 0	MonthlyCharges	TotalCharges	Churn	gender_Male	Partner_Yes	PhoneService_Yes	MultipleLines_No phone service	...	PaperlessBilling_Yes	PaymentMethod_Electronic check	PaymentMethod_Mailed check	tenure_group_24 - 35	tenure_group_36 - 47
0	0	29.85	29.85	0	0	1	0	1	...	1	1	0	0	0
1	1	56.95	1889.50	0	1	0	1	0	...	0	0	1	1	0
2	2	53.85	108.15	1	1	0	1	0	...	1	0	1	0	0
3	3	42.30	1840.75	0	1	0	0	1	...	0	0	0	0	1
4	4	70.70	151.65	1	0	0	1	0	...	1	1	0	0	0

	MonthlyCharges	TotalCharges	Churn	gender_Male	Partner_Yes	PhoneService_Yes	MultipleLines_No phone service	...	PaperlessBilling_Yes	PaymentMethod_Electronic check	PaymentMethod_Mailed check	tenure_group_24 - 35	tenure_group_36 - 47
0	29.85	29.85	0	0	1	0	1	...	1	1	0	0	0
1	56.95	1889.50	0	1	0	1	0	...	0	0	1	1	0
2	53.85	108.15	1	1	0	1	0	...	1	0	1	0	0
3	42.30	1840.75	0	1	0	0	1	...	0	0	0	0	1
4	70.70	151.65	1	0	0	1	0	...	1	1	0	0	0

Import Libraries¶

Read the dataset¶

Drop the 'Unnamed: 0' column (not required in the analysis)¶

Separate features (x) and target (y)¶

Split the data into training and testing sets:¶

Decision Tree classifier¶

Train the Decision Tree classifier on the training data¶

Make predictions on the test set:¶

Print classification report¶

Test Different Classifier¶

Random Forest¶

Gradient Boosting Machine¶

K-Nearest Neighbors¶

SVM¶

Evaluate Classifiers:¶

Apply SMOTEENN¶

Resample data¶

Decission tree after resampling¶

Random forest after resampling data¶

Gradient Boosting Classifier after resampling data¶

K-Nearest Neighbors after resampling¶

Final result¶

Save the model¶

Checking the model¶

GBM model¶

Random forest model¶

KNN model¶