"""Regression module contains methods for conformal regression.
Conformal regressors predict a range of values (not always a single value) under a given
significance level (error rate). Every regressors works in combination with a nonconformity measure
and on average predicts the correct value with the given error rate. Lower error rates result in
narrower ranges of predicted values.
Structure:
- ConformalRegressor
- Inductive (:py:class:`InductiveRegressor`)
- Cross (:py:class:`CrossRegressor`)
"""
from copy import deepcopy
import numpy as np
from sklearn.model_selection import KFold
from Orange.data import Instance, Unknown
from orangecontrib.conformal.base import ConformalPredictor
from orangecontrib.conformal.nonconformity import RegrNC
[docs]class PredictionRegr:
"""Conformal regression prediction object,
which is produced by the :py:func:`ConformalRegressor.predict` method.
Attributes:
lo (float): Lowest value of the predicted range.
hi (float): Highest value of the predicted range.
Examples:
>>> train, test = next(LOOSampler(Table('housing')))
>>> ccr = CrossRegressor(AbsError(LinearRegressionLearner()), 5, train)
>>> prediction = ccr.predict(test[0], 0.1)
>>> print(prediction.width())
"""
[docs] def __init__(self, lo, hi):
"""Initialize the prediction.
Args:
lo (float): Lowest value of the predicted range.
hi (float): Highest value of the predicted range.
"""
self.lo = lo
self.hi = hi
[docs] def range(self):
"""Predicted range: :py:attr:`lo`, :py:attr:`hi`."""
return self.lo, self.hi
[docs] def verdict(self, ref):
"""Conformal regression prediction is correct when the actual value appears
in the predicted range.
Args:
ref: Reference/actual value
Returns:
True if the prediction is correct.
"""
return self.lo <= ref <= self.hi
[docs] def width(self):
"""Width of the predicted range: :py:attr:`hi` - :py:attr:`lo`."""
if np.isnan(self.lo) or np.isnan(self.hi):
return 0
else:
return self.hi-self.lo
[docs]class TransductiveRegressor(ConformalRegressor):
"""Transductive regression. TODO
"""
pass
[docs]class InductiveRegressor(ConformalRegressor):
"""Inductive regression.
Attributes:
alpha: Nonconformity scores of the calibration instances. Computed by the :py:func:`fit` method.
Must be *sorted* in increasing order.
Examples:
>>> train, test = next(LOOSampler(Table('housing')))
>>> train, calibrate = next(RandomSampler(train, 2, 1))
>>> icr = InductiveRegressor(AbsError(LinearRegressionLearner()), train, calibrate)
>>> print(icr(test[0], 0.1))
"""
[docs] def __init__(self, nc_measure, train=None, calibrate=None):
"""Initialize inductive regressor with a nonconformity measure, training set and calibration set.
If present, fit the conformal regressor to the training set and compute the nonconformity scores of
calibration set.
Args:
nc_measure (RegrNC): Regression nonconformity measure.
train (Optional[Table]): Table of examples used as a training set.
calibrate (Optional[Table]): Table of examples used as a calibration set.
"""
super().__init__(nc_measure)
if train is not None and calibrate is not None:
self.fit(train, calibrate)
[docs] def fit(self, train, calibrate):
"""Fit the conformal regressor to the training set, compute and store sorted nonconformity scores (:py:attr:`alpha`)
on the calibration set and store the domain.
Args:
train (Optional[Table]): Table of examples used as a training set.
calibrate (Optional[Table]): Table of examples used as a calibration set.
"""
self.domain = train.domain
self.nc_measure.fit(train)
self.alpha = [self.nc_measure.nonconformity(inst) for inst in calibrate]
self.alpha = np.array(sorted(self.alpha, reverse=True))
[docs]class CrossRegressor(InductiveRegressor):
"""Cross regression.
Examples:
>>> train, test = next(LOOSampler(Table('housing')))
>>> ccr = CrossRegressor(AbsError(LinearRegressionLearner()), 4, train)
>>> print(ccr(test[0], 0.1))
"""
[docs] def __init__(self, nc_measure, k, train=None):
"""Initialize cross regressor with a nonconformity measure, number of folds and training set.
If present, fit the conformal regressor to the training set.
Args:
nc_measure (RegrNC): Regression nonconformity measure.
k (int): Number of folds.
train (Optional[Table]): Table of examples used as a training set.
"""
# store the unfitted nonconformity measure for making copies to fit on individual folds
super().__init__(nc_measure)
self.nc_measure_base = deepcopy(self.nc_measure)
self.k = k
if train is not None:
self.fit(train)
[docs] def fit(self, train):
"""Fit the cross regressor to the training set. Split the training set into k folds for use as
training and calibration set with an inductive regressor. Concatenate the computed nonconformity scores
and store them (:py:attr:`InductiveRegressor.alpha`).
Args:
train (Table): Table of examples used as a training set.
"""
self.domain = train.domain
self.nc_measure.fit(train)
self.alpha = np.array([])
for train_index, calibrate_index in KFold(self.k, shuffle=True).split(train):
icr = InductiveRegressor(deepcopy(self.nc_measure_base), train[train_index], train[calibrate_index])
self.alpha = np.concatenate((self.alpha, icr.alpha))
self.alpha = np.array(sorted(self.alpha, reverse=True))
[docs]class LOORegressor(CrossRegressor):
"""Leave-one-out regressor is a cross conformal regressor with the number of folds equal
to the size of the training set.
Examples:
>>> train, test = next(LOOSampler(Table('housing')))
>>> ccr = LOORegressor(AbsError(LinearRegressionLearner()), train)
>>> print(ccr(test[0], 0.1))
"""
[docs] def __init__(self, nc_measure, train=None):
super().__init__(nc_measure, 0, train)
[docs] def fit(self, train):
self.k = len(train)
super().fit(train)