1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
| """ # @Time : 2020/9/5 # @Author : Jimou Chen """ import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report
def deal_train(path): train_data = pd.read_csv(path) train_data.loc[train_data['Geography'] == 'France', 'Geography'] = 1 train_data.loc[train_data['Geography'] == 'Spain', 'Geography'] = 2 train_data.loc[train_data['Geography'] == 'Germany', 'Geography'] = 3 train_data.loc[train_data['Gender'] == 'Female', 'Gender'] = 0 train_data.loc[train_data['Gender'] == 'Male', 'Gender'] = 1
feature = ['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary'] x_data = train_data[feature] y_data = train_data['Exited']
sc = StandardScaler() x_data = sc.fit_transform(x_data)
return x_data, y_data
if __name__ == '__main__': x_train_data, y_train_data = deal_train('data/Churn-Modelling.csv') x_test, y_test = deal_train('data/Churn-Modelling-Test-Data.csv')
lr = LogisticRegression() lr.fit(x_train_data, y_train_data)
pred = lr.predict(x_test) print(classification_report(pred, y_test)) print(lr.score(x_test, y_test))
|