1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
| """ 考试通过预测 examdata.csv为考试数据,包括Exam1, Exam2, Pass """ import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score
data = pd.read_csv('examdata.csv') data.head()
fig1 = plt.figure() plt.scatter(data.loc[:,'Exam1'], data.loc[:,'Exam2']) plt.title('Exam1-Exam2') plt.xlabel('Exam1') plt.ylabel('Exam2') plt.show()
mask = data.loc[:,'Pass'] == 1 fig2 = plt.figure() passed = plt.scatter(data.loc[:,'Exam1'][mask], data.loc[:,'Exam2'][mask]) failed = plt.scatter(data.loc[:,'Exam1'][~mask], data.loc[:,'Exam2'][~mask]) plt.title('Exam1-Exam2') plt.xlabel('Exam1') plt.ylabel('Exam2') plt.legend((passed,failed),('Pass','Fail')) plt.show()
x = data.drop('Pass', axis=1) y = data.loc[:,'Pass'] x1 =data.loc[:,'Exam1'] x2 =data.loc[:,'Exam2']
LR = LogisticRegression() LR.fit(x,y)
y_pred = LR.predict(x) print(y_pred) accuracy = accuracy_score(y, y_pred) print(accuracy)
y_text = LR.predict([[70,65]]) print('passed' if y_text == 1 else 'failed') theta0 = LR.intercept_ theta1,theta2 = LR.coef_[0][0],LR.coef_[0][1] print(theta0,theta1,theta2) x2_new = -(theta0+theta1*x1)/theta2 print(x2_new) fig3 = plt.figure() passed = plt.scatter(data.loc[:,'Exam1'][mask], data.loc[:,'Exam2'][mask]) failed = plt.scatter(data.loc[:,'Exam1'][~mask], data.loc[:,'Exam2'][~mask]) plt.plot(x1,x2_new) plt.title('Exam1-Exam2') plt.xlabel('Exam1') plt.ylabel('Exam2') plt.legend((passed,failed),('Pass','Fail')) plt.show()
x1_2 = x1*x1 x2_2 = x2*x2 x1_x2 = x1*x2 x_new = {'x1':x1,'x2':x2,'x1_2':x1_2,'x2_2':x2_2,'x1_x2':x1_x2} x_new =pd.DataFrame(x_new)
LR2 = LogisticRegression() LR2.fit(x_new,y) y2_pred = LR2.predict(x_new) accuracy2 = accuracy_score(y, y2_pred) print(accuracy2) x1_new = x1.sort_values() theta0 = LR2.intercept_ theta1,theta2,theta3,theta4,theta5 = LR2.coef_[0][0],LR2.coef_[0][1],LR2.coef_[0][2],LR2.coef_[0][3],LR2.coef_[0][4] print(theta0,theta1,theta2,theta3,theta4,theta5) a = theta4 b = theta5*x1_new+theta2 c = theta0+theta1*x1_new+theta3*x1_new*x1_new x2_new_boundary = (-b+np.sqrt(b**2-4*a*c))/(2*a) fig4 = plt.figure() passed = plt.scatter(data.loc[:,'Exam1'][mask], data.loc[:,'Exam2'][mask]) failed = plt.scatter(data.loc[:,'Exam1'][~mask], data.loc[:,'Exam2'][~mask]) plt.plot(x1_new,x2_new_boundary) plt.title('Exam1-Exam2') plt.xlabel('Exam1') plt.ylabel('Exam2') plt.legend((passed,failed),('Pass','Fail')) plt.show() plt.plot(x1,x2_new_boundary) plt.show()
|