PlotUtils.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # This file defines helper functions for plotting.
  2. import matplotlib.pyplot as plt
  3. from sklearn.metrics import roc_curve, auc
  4. def plot_roc_curve(test_labels: list, test_df: list, title: str, figsize=(8, 8), savefile = None, show: bool = True):
  5. """Plots the roc curve of a classifier.
  6. Args:
  7. test_labels (list): Labels for the test examples.
  8. test_df (list): Decision function values for the test examples.
  9. title (str): Title of the plot.
  10. figsize (tuple, optional): Size of the plot. Defaults to (8, 8).
  11. savefile (_type_, optional): Output file without ending. Will be saved as pdf and png. If None, the plot is not saved. Defaults to None.
  12. show (bool, optional): If False, do not show the plot. Defaults to True.
  13. Returns:
  14. fpr (list of float), tpr (list of float), thresholds (list of float), auc_score (float): Points on roc curves, their thresholds, and the area under ROC curve.
  15. """
  16. fpr, tpr, thresholds = roc_curve(test_labels, test_df)
  17. auc_score = auc(fpr, tpr)
  18. if not show:
  19. plt.ioff()
  20. plt.figure(figsize=figsize)
  21. plt.plot(fpr, tpr, lw=1)
  22. plt.fill_between(fpr, tpr, label=f"AUC = {auc_score:.4f}", alpha=0.5)
  23. plt.plot([0, 1], [0, 1], color="gray", linestyle="dotted")
  24. plt.xlim([0.0, 1.0])
  25. plt.ylim([0.0, 1.0])
  26. plt.xlabel("FPR")
  27. plt.ylabel("TPR")
  28. plt.title(f"{title}")
  29. plt.legend(loc="lower right")
  30. if savefile is not None:
  31. plt.savefig(f"{savefile}.png", bbox_inches="tight")
  32. plt.savefig(f"{savefile}.pdf", bbox_inches="tight")
  33. if show:
  34. plt.show()
  35. return fpr, tpr, thresholds, auc_score
  36. def get_percentiles(fpr, tpr, thresholds, percentiles=[0.9, 0.95, 0.98, 0.99], verbose = True):
  37. """Returns the maximum possible TNR (elimination rate) for given minimum TPR.
  38. Args:
  39. fpr (list of float): FPR values from ROC curve.
  40. tpr (list of float): TPR values from ROC curve.
  41. thresholds (list of float): Thresholds from ROC curve.
  42. percentiles (list of float, optional): List of minimum TPR values to use as input. Defaults to [0.9, 0.95, 0.98, 0.99].
  43. verbose (bool, optional): If True, print the results. Defaults to True.
  44. Returns:
  45. list of float: TNR values aka elimination rates.
  46. """
  47. assert percentiles == sorted(percentiles)
  48. tnrs = []
  49. for percentile in percentiles:
  50. for i, tp in enumerate(tpr):
  51. if tp >= percentile:
  52. tnrs.append(1 - fpr[i]) # append tnr
  53. if verbose:
  54. print(f"{percentile} percentile : TPR = {tp:.4f}, FPR = {fpr[i]:.4f} <-> TNR = {(1 - fpr[i]):.4f} @ thresh {thresholds[i]}")
  55. break
  56. return tnrs