Source code for conformal.classification

"""Classification module contains methods for conformal classification.

Conformal classifiers predict a set of classes (not always a single class) under a given
significance level (error rate). Every classifier works in combination with a nonconformity measure
and on average predicts the correct class with the given error rate. Lower error rates result in
smaller sets of predicted classes.

Structure:

- ConformalClassifier
    - Transductive (:py:class:`TransductiveClassifier`)
    - Inductive (:py:class:`InductiveClassifier`)
    - Cross (:py:class:`CrossClassifier`)
"""

from copy import deepcopy

import numpy as np
from sklearn.model_selection import KFold

from Orange.data import Table, Instance

from orangecontrib.conformal.base import ConformalPredictor
from orangecontrib.conformal.nonconformity import ClassNC


[docs]class PredictionClass: """Conformal classification prediction object, which is produced by the :py:func:`ConformalClassifier.predict` method. Attributes: p (List): List of pairs (p-value, class) eps (float): Default significance level (error rate). Examples: >>> train, test = next(LOOSampler(Table('iris'))) >>> tcp = TransductiveClassifier(InverseProbability(NaiveBayesLearner()), train) >>> prediction = tcp.predict(test[0], 0.1) >>> print(prediction.confidence(), prediction.credibility()) >>> prediction = tcp.predict(test[0]) >>> print(prediction.classes(0.1), prediction.classes(0.9)) """
[docs] def __init__(self, p, eps): """Initialize the prediction. Args: p (List): List of pairs (p-value, class) eps (float): Default significance level (error rate). """ self.p = p self.eps = eps
[docs] def classes(self, eps=None): """ Compute the set of classes under the default or given `eps` value. Args: eps (float): Significance level (error rate). Returns: List of predicted classes. """ if eps is None: eps = self.eps assert(eps is not None) return [y for p_y, y in self.p if p_y > eps]
[docs] def verdict(self, ref, eps=None): """Conformal classification prediction is correct when the actual class appears among the predicted classes. Args: ref: Reference/actual class eps (float): Significance level (error rate). Returns: True if the prediction with default or specified `eps` is correct. """ return ref in self.classes(eps)
[docs] def confidence(self): """Confidence is an efficiency measure of a single prediction. Computes minimum :math:`\\mathit{eps}` that would still result in a prediction of a single label. :math:`\\mathit{eps} = \\text{second\_largest}(p_i)` Returns: float: Confidence :math:`1-\\mathit{eps}`. """ return 1-sorted([p_y for p_y, y in self.p], reverse=True)[1]
[docs] def credibility(self): """Credibility is an efficiency measure of a single prediction. Small credibility indicates an unusual example. Computes minimum :math:`\\mathit{eps}` that would result in an empty prediction set. :math:`\\mathit{eps} = \\text{max}(p_i)` Returns: float: Credibility :math:`\\mathit{eps}`. """ return max(p_y for p_y, y in self.p)
[docs]class ConformalClassifier(ConformalPredictor): """Base class for conformal classifiers."""
[docs] def __init__(self, nc_measure, mondrian=False): """Verify that the nonconformity measure can be used for classification.""" assert isinstance(nc_measure, ClassNC), "Inappropriate nonconformity measure for classification" self.nc_measure = nc_measure self.mondrian = mondrian
def __str__(self): return "{} ({})".format(self.__class__.__name__, self.nc_measure)
[docs] def p_values(self, example): """Extending classes should implement this method to return a list of pairs (p-value, class) for a given example. Conformal classifier assigns an assumed class value to the given example and computes its nonconformity. P-value is the ratio of more nonconformal (stranger) instances that the given example. """ raise NotImplementedError
[docs] def predict(self, example, eps=None): """Compute a classification prediction object from p-values for a given example and significance level. Args: example (Instance): Orange row instance. eps (float): Default significance level (error rate). Returns: PredictionClass: Classification prediction object. """ ps = self.p_values(example) return PredictionClass(ps, eps)
[docs] def __call__(self, example, eps): """Compute predicted classes for a given example and significance level. Args: example (Instance): Orange row instance. eps (float): Significance level (error rate). Returns: List of predicted classes. """ pred = self.predict(example) return pred.classes(eps)
[docs]class TransductiveClassifier(ConformalClassifier): """Transductive classification. Examples: >>> train, test = next(LOOSampler(Table('iris'))) >>> tcp = TransductiveClassifier(ProbabilityMargin(NaiveBayesLearner()), train) >>> print(tcp(test[0], 0.1)) """
[docs] def __init__(self, nc_measure, train=None, mondrian=False): """Initialize transductive classifier with a nonconformity measure and a training set. Fit the conformal classifier to the training set if present. Args: nc_measure (ClassNC): Classification nonconformity measure. train (Optional[Table]): Table of examples used as a training set. mondrian (bool): Use a mondrian setting for computing p-values. """ super().__init__(nc_measure, mondrian=mondrian) if train is not None: self.fit(train)
[docs] def fit(self, train): """Fit the conformal classifier to the training set and store the domain. Args: train (Optional[Table]): Table of examples used as a training set. """ self.train = train self.domain = self.train.domain
[docs] def p_values(self, example): """Compute p-values for every possible class. Transductive classifier appends the given example with an assumed class value to the training set and compares its nonconformity against all other instances. Args: example (Instance): Orange row instance. Returns: List of pairs (p-value, class) """ ps = [] temp = example.get_class() for yi, y in enumerate(self.domain.class_var.values): example.set_class(yi) data = Table(self.domain, np.vstack((self.train, example))) self.nc_measure.fit(data) scores = np.array([self.nc_measure.nonconformity(row) for row in data if not self.mondrian or self.mondrian and row.get_class() == y]) alpha, alpha_n = scores[:-1], scores[-1] p_y = sum(scores >= alpha_n) / len(scores) ps.append((p_y, y)) example.set_class(temp) return ps
[docs]class InductiveClassifier(ConformalClassifier): """Inductive classification. Attributes: alpha: Nonconformity scores of the calibration instances. Computed by the :py:func:`fit` method. Examples: >>> train, test = next(LOOSampler(Table('iris'))) >>> train, calibrate = next(RandomSampler(train, 2, 1)) >>> icp = InductiveClassifier(InverseProbability(LogisticRegressionLearner()), train, calibrate) >>> print(icp(test[0], 0.1)) """
[docs] def __init__(self, nc_measure, train=None, calibrate=None, mondrian=False): """Initialize inductive classifier with a nonconformity measure, training set and calibration set. If present, fit the conformal classifier to the training set and compute the nonconformity scores of calibration set. Args: nc_measure (ClassNC): Classification nonconformity measure. train (Optional[Table]): Table of examples used as a training set. calibrate (Optional[Table]): Table of examples used as a calibration set. mondrian (bool): Use a mondrian setting for computing p-values. """ super().__init__(nc_measure, mondrian=mondrian) if train is not None and calibrate is not None: self.fit(train, calibrate)
[docs] def fit(self, train, calibrate): """Fit the conformal classifier to the training set, compute and store nonconformity scores (:py:attr:`alpha`) on the calibration set and store the domain. Args: train (Optional[Table]): Table of examples used as a training set. calibrate (Optional[Table]): Table of examples used as a calibration set. """ self.domain = train.domain self.calibrate = calibrate self.nc_measure.fit(train) self.alpha = np.array([self.nc_measure.nonconformity(inst) for inst in calibrate])
[docs] def p_values(self, example): """Compute p-values for every possible class. Inductive classifier assigns an assumed class value to the given example and compares its nonconformity against all other instances in the calibration set. Args: example (Instance): Orange row instance. Returns: List of pairs (p-value, class) """ classes = [] ps = [] temp = example.get_class() for yi, y in enumerate(self.domain.class_var.values): example.set_class(yi) alpha_n = self.nc_measure.nonconformity(example) if self.mondrian: alpha = np.array([a for a, cal in zip(self.alpha, self.calibrate) if cal.get_class() == y]) else: alpha = self.alpha p_y = (sum(alpha >= alpha_n)+1) / (len(alpha)+1) ps.append((p_y, y)) example.set_class(temp) return ps
[docs]class CrossClassifier(InductiveClassifier): """Cross classification. Examples: >>> train, test = next(LOOSampler(Table('iris'))) >>> ccp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 3, train) >>> print(ccp(test[0], 0.1)) """
[docs] def __init__(self, nc_measure, k, train=None, mondrian=False): """Initialize cross classifier with a nonconformity measure, number of folds and training set. If present, fit the conformal classifier to the training set. Args: nc_measure (ClassNC): Classification nonconformity measure. k (int): Number of folds. train (Optional[Table]): Table of examples used as a training set. mondrian (bool): Use a mondrian setting for computing p-values. """ # store the unfitted nonconformity measure for making copies to fit on individual folds super().__init__(nc_measure, mondrian=mondrian) self.nc_measure_base = deepcopy(self.nc_measure) self.k = k if train is not None: self.fit(train)
[docs] def fit(self, train): """Fit the cross classifier to the training set. Split the training set into k folds for use as training and calibration set with an inductive classifier. Concatenate the computed nonconformity scores and store them (:py:attr:`InductiveClassifier.alpha`). Args: train (Table): Table of examples used as a training set. """ self.domain = train.domain self.calibrate = train self.nc_measure.fit(train) self.alpha = np.array([]) for train_index, calibrate_index in KFold(self.k, shuffle=True).split(train): icp = InductiveClassifier(deepcopy(self.nc_measure_base), train[train_index], train[calibrate_index]) self.alpha = np.concatenate((self.alpha, icp.alpha))
[docs]class LOOClassifier(CrossClassifier): """Leave-one-out classifier is a cross conformal classifier with the number of folds equal to the size of the training set. Examples: >>> train, test = next(LOOSampler(Table('iris'))) >>> loocp = LOOClassifier(InverseProbability(LogisticRegressionLearner()), train) >>> print(loocp(test[0], 0.1)) """
[docs] def __init__(self, nc_measure, train=None, mondrian=False): super().__init__(nc_measure, 0, train, mondrian)
[docs] def fit(self, train): self.k = len(train) super().fit(train)