单个theta时
output_score = list(i for i in range(12))
output_score #函数输出打分
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
y = [0,0,0,0,1,0,1,1,0,1,1,1]
len(y) #正确分类
12
p = list(range(0,101,10))
p = [i/100 for i in p]
p
[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
c01 = 3
c02 = 2
# 设定代价
theta = 6.5 #阈值
def calculate_output_result(output_score,theta):
output_result = []
for i in range(len(output_score)):
if output_score[i]<theta:
output_result.append(0)
else:
output_result.append(1)
return output_result
output_result = calculate_output_result(output_score,theta)
output_result
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
import pandas as pd
def calculate_m_positive_negative(y): #统计正例反例个数
result = pd.value_counts(y)
m_positive = result[1]
m_negative = result[0]
return m_positive,m_negative
m_positive,m_negative = calculate_m_positive_negative(y)
m_positive,m_negative
(6, 6)

def calculate_confusion(y,output_result):
con1,con2,con3,con4=0,0,0,0
for i in range(len(y)):
if y[i]==1:
if y[i]== output_result[i]:
con1 += 1
else:
con2 += 1
else:
if y[i] == output_result[i]:
con4 += 1
else:
con3 += 1
return con1,con2,con3,con4
con1,con2,con3,con4 = calculate_confusion(y,output_result)
con1,con2,con3,con4
(4, 2, 1, 5)

def calculate_FNR_FPR(con1,con2,con3,con4):
FNR = round(con2/(con1+con2),4)
FPR =round(con3/(con3+con4),4)
return FNR,FPR
FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)
FNR,FPR
(0.3333, 0.1667)
#正概率代价
def calculate_Pcost(p,c01,c02):
Pcosts = []
for i in range(len(p)):
Pcost = round((p[i]*c01)/(p[i]*c01+(1-p[i])*c02),4)
Pcosts.append(Pcost)
return Pcosts
Pcosts = calculate_Post(p,c01,c02)
Pcosts
[0.0, 0.1429, 0.2727, 0.3913, 0.5, 0.6, 0.6923, 0.7778, 0.8571, 0.931, 1.0]
def calculate_cost_norm(p,c01,c02,FNR,FPR):
costs_norm = []
for i in range(len(p)):
cost_norm = round(((FNR*p[i]*c01)+FPR*(1-p[i])*c02)/(p[i]*c01+(1-p[i])*c02),4)
costs_norm.append(cost_norm)
return costs_norm
costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)
costs_norm
[0.1667,
0.1905,
0.2121,
0.2319,
0.25,
0.2667,
0.282,
0.2963,
0.3095,
0.3218,
0.3333]
import matplotlib as mpl
from matplotlib import pyplot as plt
def plot_lines(X,Y,color):
plt.plot(X,Y,color)
return
plot_lines(Pcosts,costs_norm,'r')
plot_lines(p,costs_norm,'b:')
plt.show()

多个theta时
thetas = list(range(12))
thetas = [i+0.5 for i in thetas]
thetas
[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5]
def calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm):
Pcosts_n=[]
costs_norm_n= []
theta_FPR_FNR = {}
for i in range(len(thetas)):
theta = thetas[i]
output_result = calculate_output_result(output_score,theta)
m_positive,m_negative = calculate_m_positive_negative(y)
con1,con2,con3,con4 = calculate_confusion(y, output_result)
FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)
theta_FPR_FNR[theta] = [FNR,FPR]
Pcosts = calculate_Pcost(p, c01,c02)
Pcosts_n.append(Pcosts)
costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)
costs_norm_n.append(costs_norm)
return Pcosts_n, costs_norm_n , theta_FPR_FNR
Pcosts_n, costs_norm_n , theta_FPR_FNR =calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm)
for i in range(len(Pcosts_n)):
plot_lines(Pcosts_n[i],costs_norm_n[i],'r')
plt.show()

theta_FPR_FNR
{0.5: [0.0, 0.8333],
1.5: [0.0, 0.6667],
2.5: [0.0, 0.5],
3.5: [0.0, 0.3333],
4.5: [0.1667, 0.3333],
5.5: [0.1667, 0.1667],
6.5: [0.3333, 0.1667],
7.5: [0.5, 0.1667],
8.5: [0.5, 0.0],
9.5: [0.6667, 0.0],
10.5: [0.8333, 0.0],
11.5: [1.0, 0.0]}