Source code for menelaus.concept_drift.adwin_accuracy

from menelaus.change_detection.adwin import ADWIN


[docs]class ADWINAccuracy(ADWIN): """ADWIN (ADaptive WINdowing) is a change detection algorithm which uses a sliding window to estimate the running mean and variance of a given real-valued number. It can be applied as a concept drift detector by monitoring a performance metric for a given classifier. ADWINAccuracy specifically expects ``y_true``, ``y_pred``, and uses that input to monitor the running accuracy of a classifier. To use ADWIN to monitor other values, see ``change_detection.ADWIN``. As each sample is added, ADWIN stores a running estimate (mean and variance) for a given statistic, calculated over a sliding window which will grow to the right until drift is detected. The condition for drift is defined over pairs of subwindows at certain cutpoints within the current window. If, for any such pair, the difference between the running estimates of the statistic is over a certain threshold (controlled by delta), we identify drift, and remove the oldest elements of the window until all differences are again below the threshold. The running estimates in each subwindow are maintained by storing summaries of the elements in "buckets," which, in this implementation, are themselves stored in the ``bucket_row_list`` attribute, whose total size scales with the ``max_buckets`` parameter. When drift occurs, the index of the element at the beginning of ADWIN's new window is stored in ``self.retraining_recs``. Ref. :cite:t:`bifet2007learning` """
[docs] def __init__( self, delta=0.002, max_buckets=5, new_sample_thresh=32, window_size_thresh=10, subwindow_size_thresh=5, conservative_bound=False, ): """ Args: delta (float, optional): confidence value on on 0 to 1. ADWIN will incorrectly detect drift with at most probability ``delta``, and correctly detect drift with at least probability ``1 - delta``. Defaults to 0.002. max_buckets (int, optional): the maximum number of buckets to maintain in each BucketRow. Corresponds to the "M" parameter in Bifet 2006. Defaults to 5. new_sample_thresh (int, optional): the drift detection procedure will run every ``new_sample_thresh samples``, not in between. Defaults to 32. window_size_thresh (int, optional): the minimum number of samples in the window required to check for drift. Defaults to 10. subwindow_size_thresh (int, optional): the minimum number of samples in each subwindow reqired to check it for drift. Defaults to 5. conservative_bound (bool, optional): whether to assume a 'large enough' sample when constructing drift cutoff. Defaults to ``False``. Raises: ValueError: If ``ADWIN.delta`` is not on the range 0 to 1. """ # TODO - um, shouldn't this use the init parameters super().__init__( delta=0.002, max_buckets=5, new_sample_thresh=32, window_size_thresh=10, subwindow_size_thresh=5, conservative_bound=False, )
[docs] def update(self, y_true, y_pred, X=None): """Update the detector with a new sample. Args: y_true: one true label from input data. y_pred: one predicted label from input data. X: next sample in the stream of data. Not used for this accuracy-based ADWIN. See ``change_detection.ADWIN`` for that application. """ # This class is here to avoid asking the user to provide such a direct # function of (y_true, y_pred) in the X argument, which is unintuitive. _, y_true, y_pred = super()._validate_input(None, y_true, y_pred) new_value = int(y_true == y_pred) # the arrays should have a single element after validation. y_true, y_pred = y_true[0], y_pred[0] super().update(new_value, y_true=None, y_pred=None)