单个theta时
output_score = list(i for i in range(12))output_score #函数输出打分
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
y = [0,0,0,0,1,0,1,1,0,1,1,1]len(y) #正确分类
12
p = list(range(0,101,10))p = [i/100 for i in p]p
[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
c01 = 3c02 = 2# 设定代价
theta = 6.5 #阈值
def calculate_output_result(output_score,theta): output_result = [] for i in range(len(output_score)): if output_score[i]<theta: output_result.append(0) else: output_result.append(1) return output_resultoutput_result = calculate_output_result(output_score,theta)output_result
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
import pandas as pddef calculate_m_positive_negative(y): #统计正例反例个数 result = pd.value_counts(y) m_positive = result[1] m_negative = result[0] return m_positive,m_negativem_positive,m_negative = calculate_m_positive_negative(y)m_positive,m_negative
(6, 6)

def calculate_confusion(y,output_result): con1,con2,con3,con4=0,0,0,0 for i in range(len(y)): if y[i]==1: if y[i]== output_result[i]: con1 += 1 else: con2 += 1 else: if y[i] == output_result[i]: con4 += 1 else: con3 += 1 return con1,con2,con3,con4con1,con2,con3,con4 = calculate_confusion(y,output_result)con1,con2,con3,con4
(4, 2, 1, 5)

def calculate_FNR_FPR(con1,con2,con3,con4): FNR = round(con2/(con1+con2),4) FPR =round(con3/(con3+con4),4) return FNR,FPRFNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)FNR,FPR
(0.3333, 0.1667)
#正概率代价def calculate_Pcost(p,c01,c02): Pcosts = [] for i in range(len(p)): Pcost = round((p[i]*c01)/(p[i]*c01+(1-p[i])*c02),4) Pcosts.append(Pcost) return PcostsPcosts = calculate_Post(p,c01,c02)Pcosts
[0.0, 0.1429, 0.2727, 0.3913, 0.5, 0.6, 0.6923, 0.7778, 0.8571, 0.931, 1.0]
def calculate_cost_norm(p,c01,c02,FNR,FPR): costs_norm = [] for i in range(len(p)): cost_norm = round(((FNR*p[i]*c01)+FPR*(1-p[i])*c02)/(p[i]*c01+(1-p[i])*c02),4) costs_norm.append(cost_norm) return costs_normcosts_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)costs_norm
[0.1667, 0.1905, 0.2121, 0.2319, 0.25, 0.2667, 0.282, 0.2963, 0.3095, 0.3218, 0.3333]
import matplotlib as mplfrom matplotlib import pyplot as pltdef plot_lines(X,Y,color): plt.plot(X,Y,color) returnplot_lines(Pcosts,costs_norm,'r')plot_lines(p,costs_norm,'b:')plt.show()

多个theta时
thetas = list(range(12))thetas = [i+0.5 for i in thetas]thetas
[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5]
def calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm): Pcosts_n=[] costs_norm_n= [] theta_FPR_FNR = {} for i in range(len(thetas)): theta = thetas[i] output_result = calculate_output_result(output_score,theta) m_positive,m_negative = calculate_m_positive_negative(y) con1,con2,con3,con4 = calculate_confusion(y, output_result) FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4) theta_FPR_FNR[theta] = [FNR,FPR] Pcosts = calculate_Pcost(p, c01,c02) Pcosts_n.append(Pcosts) costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR) costs_norm_n.append(costs_norm) return Pcosts_n, costs_norm_n , theta_FPR_FNRPcosts_n, costs_norm_n , theta_FPR_FNR =calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm)
for i in range(len(Pcosts_n)): plot_lines(Pcosts_n[i],costs_norm_n[i],'r')plt.show()

theta_FPR_FNR
{0.5: [0.0, 0.8333], 1.5: [0.0, 0.6667], 2.5: [0.0, 0.5], 3.5: [0.0, 0.3333], 4.5: [0.1667, 0.3333], 5.5: [0.1667, 0.1667], 6.5: [0.3333, 0.1667], 7.5: [0.5, 0.1667], 8.5: [0.5, 0.0], 9.5: [0.6667, 0.0], 10.5: [0.8333, 0.0], 11.5: [1.0, 0.0]}