”’ DEPENDENCIES”’
import numpy as np
import gurobipy
from gurobipy import *
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
”’
####### ‘get_bAUC’ function description #######
INPUTS:
samples:
numpy array of samples of a random variable
estimate:
if True, get_bAUC will return estimate of bPOE by simply ordering samples and taking a running expectation of the tail until it exceeds
the threshold of zero
if False, get_bAUC will solve the linear programming formulation to calculate exact bAUC
OUTPUTS:
bAUC:
Buffered AUC
gamma:
VaR (Value-at-Risk), or equivalently the quantile, at probability level equal to bAUC
”’
def get_bAUC(samples,estimate=True):
################# estimate bPOE by simply sorting samples #################
if estimate==True:
N=samples.shape[0]
sorted_samples=sorted(samples)
cvar=sorted_samples[-1]
probability_level=1
for i in range(N):
probability_level = probability_level – 1/float(N)
cvar = (cvar*(i) + sorted_samples[-(i+1)])/float(i+1)
if cvar <=0: break
var=sorted_samples[int( probability_level*N ) ]
bPOE=1-probability_level
bAUC=probability_level
a=1/float(-var)
gamma=var
return bAUC,gamma
######################Get bAUC exactly by solving an LP#############################
m=Model(‘get bAUC’)
m.setParam(“OutputFlag”,0)
E=[]
for i in range(samples.shape[0]):
E.append(m.addVar(lb=0,ub=GRB.INFINITY,obj=0,vtype=GRB.CONTINUOUS,name=”E”+str(i) ))
a=m.addVar(lb=0,ub=GRB.INFINITY,obj=0,vtype=GRB.CONTINUOUS,name=”a” )
m.update()
m.setObjective( (1/float(samples.shape[0]))*quicksum(E[i] for i in range(samples.shape[0]) ),GRB.MINIMIZE)
m.optimize()
for i in range(samples.shape[0]):
m.addConstr ( E[i] >= a * (samples[i]) + 1 )
m.addConstr ( E[i] >= 0 )
m.optimize()
bPOE=m.getObjective().getValue()
bAUC=1-bPOE
a=a.X
if a==0:
a=10**-10
gamma= – 1/float(a)
return bAUC,gamma
”’
####### ‘plot_bROC’ function description #######
USAGE NOTE: The code, right now, is made to immediately show the bROC plot (using matplotlib) when the ‘plot_bROC’ function is called.
But there are other options.
1) If you wish to save the plot, instead of printing to screen, replace “plt.show()” with “plt.savefig(*args, **kwargs)”.
2) If you wish to plot multiple bROC curves on a single plot, simply comment out “plt.show()” within the function, call the function
multiple times (e.g. 2 times), and call plt.show() or plt.savefig(*args, **kwargs) after all of the function calls. All curves (e.g. 2 from 2 function calls) will be on one chart.
INPUTS:
scores_n:
numpy array of scores given to samples from the negative class
scores_p:
numpy array of scores given to samples from the positive class
ranking_losses:
numpy array of ranking losses for classifier h,
i.e. let h(X_i) be the score given to sample i by classifier h. The ranking losses
are the differences h(negative sample) – h(positive samples) for al pairs of negative and positive class samples.
plot_ROC:
if True, plot the normal ROC curve alongside the bROC curve. (i.e. the curve for which gamma=0$)
if False, only plot the bROC curve.
color:
array of color options for matplotlib plotting. Use if plot_ROC=True to give different colors
to the ROC and bROC curves. (e.g. color=[‘r’,’b’] to give bROC curve a blue color and ROC curve red color)
estimate:
if True, get_bAUC will return estimate of bPOE by simply ordering samples and taking a running expectation of the tail until it exceeds
the threshold of zero
if False, get_bAUC will solve the linear programming formulation to calculate exact bPOE
OUTPUTS:
Displays plot of bROC curve and possibly ROC curve if plot_ROC=True.
bAUC:
Buffered AUC
gamma:
VaR (Value-at-Risk), or equivalently the quantile, at probability level equal to bAUC
”’
def plot_bROC(scores_n,scores_p,ranking_losses,plot_ROC=True,color=None, estimate=True):
######################Get bAUC and optimal gamma, then plot TPR and FPR for bROC curve#############################
bTPR=[] bFPR=[]
thresholds=sorted( np.hstack( (scores_p,scores_n) ) )
bAUC,gamma=get_bAUC(np.array(ranking_losses))
bTPR.append([ get_POE(scores_p,t – (gamma ) ) for t in thresholds])
bFPR.append([get_POE(scores_n , t ) for t in thresholds])
plt.plot(bFPR[-1],bTPR[-1],color)
######################Plot the normal ROC curve alongside the bROC curve#############################
if plot_ROC==True:
TPR=[ get_POE(scores_p,t) for t in thresholds]
FPR=[ get_POE(scores_n,t) for t in thresholds]
plt.plot(FPR,TPR,color)
######################Plot the curve, or curves, with matplotlib#############################
plt.xlim([0,1])
plt.ylim([0,1])
plt.title(‘bROC Curve’)
plt.xlabel(r’False Positive Rate: $P\;( h(X^-) > t )$’,fontsize=’large’)
plt.ylabel(r’True Positive Rate: $P\;( h(X^+) > t – \gamma^* )$’,fontsize=’large’)
###SHOW PLOT###
plt.show() #COMMENT OUT IF NEEDED#
#############
######################Get AUC as well#############################
labels=[-1 for i in scores_n]+[1 for i in scores_p] AUC=roc_auc_score(labels,np.hstack( (scores_n,scores_p) ) )
######################Return AUC and bAUC#############################
return AUC,bAUC
”’
####### ‘get_POE’ function description #######
INPUTS:
samples:
numpy array of samples of a random variable
z:
threshold
OUTPUTS:
POE:
Probability Of your random variable Exceeding the threshold of z (i.e. POE at z)
”’
def get_POE(samples,z):
POE=0
num_samples=samples.shape[0]
for i in samples:
if i>z:
POE+= 1/float(num_samples)
return POE