单个theta时

output_score = list(i for i in range(12))
output_score   #函数输出打分

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

y = [0,0,0,0,1,0,1,1,0,1,1,1]
len(y)  #正确分类

p = list(range(0,101,10))
p = [i/100  for i in p]
p

[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

c01 = 3
c02 = 2
# 设定代价

theta = 6.5 #阈值

def calculate_output_result(output_score,theta):
    output_result = []
    for i in range(len(output_score)):
        if output_score[i]<theta:
            output_result.append(0)
        else:
            output_result.append(1)
    return output_result
output_result = calculate_output_result(output_score,theta)
output_result

[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

import pandas as pd
def calculate_m_positive_negative(y):  #统计正例反例个数
    result = pd.value_counts(y)
    m_positive = result[1]
    m_negative = result[0]
    return m_positive,m_negative
m_positive,m_negative = calculate_m_positive_negative(y)
m_positive,m_negative

(6, 6)

def calculate_confusion(y,output_result):
    con1,con2,con3,con4=0,0,0,0
    for i in range(len(y)):
        if y[i]==1:
            if y[i]== output_result[i]:
                con1 += 1
            else:
                con2 += 1
        else:
            if y[i] == output_result[i]:
                con4 += 1
            else:
                con3 += 1
    return con1,con2,con3,con4
con1,con2,con3,con4 = calculate_confusion(y,output_result)
con1,con2,con3,con4

(4, 2, 1, 5)

def calculate_FNR_FPR(con1,con2,con3,con4):
    FNR =  round(con2/(con1+con2),4)
    FPR =round(con3/(con3+con4),4)
    return FNR,FPR
FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)
FNR,FPR

(0.3333, 0.1667)

#正概率代价
def calculate_Pcost(p,c01,c02):
    Pcosts = []
    for i in range(len(p)):
        Pcost = round((p[i]*c01)/(p[i]*c01+(1-p[i])*c02),4)
        Pcosts.append(Pcost)
    return Pcosts
Pcosts = calculate_Post(p,c01,c02)
Pcosts

[0.0, 0.1429, 0.2727, 0.3913, 0.5, 0.6, 0.6923, 0.7778, 0.8571, 0.931, 1.0]

def calculate_cost_norm(p,c01,c02,FNR,FPR):
    costs_norm = []
    for i in range(len(p)):
        cost_norm = round(((FNR*p[i]*c01)+FPR*(1-p[i])*c02)/(p[i]*c01+(1-p[i])*c02),4)
        costs_norm.append(cost_norm)
    return costs_norm
costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)
costs_norm

[0.1667,
 0.1905,
 0.2121,
 0.2319,
 0.25,
 0.2667,
 0.282,
 0.2963,
 0.3095,
 0.3218,
 0.3333]

import matplotlib as mpl
from matplotlib import pyplot as plt
def plot_lines(X,Y,color):
    plt.plot(X,Y,color)
    return
plot_lines(Pcosts,costs_norm,'r')
plot_lines(p,costs_norm,'b:')
plt.show()

多个theta时

thetas = list(range(12))
thetas = [i+0.5 for i in thetas]
thetas

[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5]

def calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm):
    Pcosts_n=[]
    costs_norm_n= []
    theta_FPR_FNR = {}
    for i in range(len(thetas)):
        theta = thetas[i]
        output_result = calculate_output_result(output_score,theta)
        m_positive,m_negative = calculate_m_positive_negative(y)
        con1,con2,con3,con4 = calculate_confusion(y, output_result)
        FNR,FPR = calculate_FNR_FPR(con1,con2,con3,con4)
        theta_FPR_FNR[theta] = [FNR,FPR]
        Pcosts = calculate_Pcost(p, c01,c02)
        Pcosts_n.append(Pcosts)
        costs_norm = calculate_cost_norm(p,c01,c02,FNR,FPR)
        costs_norm_n.append(costs_norm)
    return Pcosts_n, costs_norm_n , theta_FPR_FNR
Pcosts_n, costs_norm_n , theta_FPR_FNR =calculae_Pcost_cost_norm(thetas,output_score,y,calculate_Pcost,calculate_cost_norm)

for i in range(len(Pcosts_n)):
    plot_lines(Pcosts_n[i],costs_norm_n[i],'r')
plt.show()

theta_FPR_FNR

{0.5: [0.0, 0.8333],
 1.5: [0.0, 0.6667],
 2.5: [0.0, 0.5],
 3.5: [0.0, 0.3333],
 4.5: [0.1667, 0.3333],
 5.5: [0.1667, 0.1667],
 6.5: [0.3333, 0.1667],
 7.5: [0.5, 0.1667],
 8.5: [0.5, 0.0],
 9.5: [0.6667, 0.0],
 10.5: [0.8333, 0.0],
 11.5: [1.0, 0.0]}

机器学习笔记

代价敏感错误率与代价函数

单个theta时

多个theta时