thresh.png

单个theta时

  1. output_score = list(i for i in range(12))
  2. output_score #函数输出打分
  1. [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
  1. y = [0,0,0,0,1,0,1,1,0,1,1,1]
  2. len(y) #正确分类
  1. 12
  1. p = list(range(0,101,10))
  2. p = [i/100 for i in p]
  3. p
  1. [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
  1. c01 = 3
  2. c02 = 2
  3. # 设定代价
  1. theta = 6.5 #阈值
  1. def calculate_output_result(output_score,theta):
  2. output_result = []
  3. for i in range(len(output_score)):
  4. if output_score[i]<theta:
  5. output_result.append(0)
  6. else:
  7. output_result.append(1)
  8. return output_result
  9. output_result = calculate_output_result(output_score,theta)
  10. output_result
  1. [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
  1. import pandas as pd
  2. def calculate_m_positive_negative(y): #统计正例反例个数
  3. result = pd.value_counts(y)
  4. m_positive = result[1]
  5. m_negative = result[0]
  6. return m_positive,m_negative
  7. m_positive,m_negative = calculate_m_positive_negative(y)
  8. m_positive,m_negative
  1. (6, 6)

matrix.png

  1. def calculate_confusion(y,output_result):
  2. con1,con2,con3,con4=0,0,0,0
  3. for i in range(len(y)):
  4. if y[i]==1:
  5. if y[i]== output_result[i]:
  6. con1 += 1
  7. else:
  8. con2 += 1
  9. else:
  10. if y[i] == output_result[i]:
  11. con4 += 1
  12. else:
  13. con3 += 1
  14. return con1,con2,con3,con4
  15. con1,con2,con3,con4 = calculate_confusion(y,output_result)
  16. con1,con2,con3,con4
  1. (4, 2, 1, 5)

matrix1.png

  1. def calculate_FNR_FPR(con1,con2,con3,con4):
  2. FNR = round(con2/(con1+con2),4)
  3. FPR =round(con3/(con3+con4),4)
  4. return FNR,FPR
  5. FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)
  6. FNR,FPR
  1. (0.3333, 0.1667)
  1. #正概率代价
  2. def calculate_Pcost(p,c01,c02):
  3. Pcosts = []
  4. for i in range(len(p)):
  5. Pcost = round((p[i]*c01)/(p[i]*c01+(1-p[i])*c02),4)
  6. Pcosts.append(Pcost)
  7. return Pcosts
  8. Pcosts = calculate_Post(p,c01,c02)
  9. Pcosts
  1. [0.0, 0.1429, 0.2727, 0.3913, 0.5, 0.6, 0.6923, 0.7778, 0.8571, 0.931, 1.0]
  1. def calculate_cost_norm(p,c01,c02,FNR,FPR):
  2. costs_norm = []
  3. for i in range(len(p)):
  4. cost_norm = round(((FNR*p[i]*c01)+FPR*(1-p[i])*c02)/(p[i]*c01+(1-p[i])*c02),4)
  5. costs_norm.append(cost_norm)
  6. return costs_norm
  7. costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)
  8. costs_norm
  1. [0.1667,
  2. 0.1905,
  3. 0.2121,
  4. 0.2319,
  5. 0.25,
  6. 0.2667,
  7. 0.282,
  8. 0.2963,
  9. 0.3095,
  10. 0.3218,
  11. 0.3333]
  1. import matplotlib as mpl
  2. from matplotlib import pyplot as plt
  3. def plot_lines(X,Y,color):
  4. plt.plot(X,Y,color)
  5. return
  6. plot_lines(Pcosts,costs_norm,'r')
  7. plot_lines(p,costs_norm,'b:')
  8. plt.show()

output_14_0.png

多个theta时

  1. thetas = list(range(12))
  2. thetas = [i+0.5 for i in thetas]
  3. thetas
  1. [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5]
  1. def calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm):
  2. Pcosts_n=[]
  3. costs_norm_n= []
  4. theta_FPR_FNR = {}
  5. for i in range(len(thetas)):
  6. theta = thetas[i]
  7. output_result = calculate_output_result(output_score,theta)
  8. m_positive,m_negative = calculate_m_positive_negative(y)
  9. con1,con2,con3,con4 = calculate_confusion(y, output_result)
  10. FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)
  11. theta_FPR_FNR[theta] = [FNR,FPR]
  12. Pcosts = calculate_Pcost(p, c01,c02)
  13. Pcosts_n.append(Pcosts)
  14. costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)
  15. costs_norm_n.append(costs_norm)
  16. return Pcosts_n, costs_norm_n , theta_FPR_FNR
  17. Pcosts_n, costs_norm_n , theta_FPR_FNR =calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm)
  1. for i in range(len(Pcosts_n)):
  2. plot_lines(Pcosts_n[i],costs_norm_n[i],'r')
  3. plt.show()

output_18_0.png

  1. theta_FPR_FNR
  1. {0.5: [0.0, 0.8333],
  2. 1.5: [0.0, 0.6667],
  3. 2.5: [0.0, 0.5],
  4. 3.5: [0.0, 0.3333],
  5. 4.5: [0.1667, 0.3333],
  6. 5.5: [0.1667, 0.1667],
  7. 6.5: [0.3333, 0.1667],
  8. 7.5: [0.5, 0.1667],
  9. 8.5: [0.5, 0.0],
  10. 9.5: [0.6667, 0.0],
  11. 10.5: [0.8333, 0.0],
  12. 11.5: [1.0, 0.0]}