Source code for acton.kde_predictor

"""A predictor that uses KDE to classify instances."""

import numpy
import sklearn.base
import sklearn.neighbors
import sklearn.utils.multiclass
import sklearn.utils.validation


[docs]class KDEClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin): """A classifier using kernel density estimation to classify instances.""" def __init__(self, bandwidth=1.0): """A classifier using kernel density estimation to classify instances. A kernel density estimate is fit to each class. These estimates are used to score instances and the highest score class is used as the label for each instance. bandwidth : float Bandwidth for the kernel density estimate. """ self.bandwidth = bandwidth
[docs] def fit(self, X, y): """Fits kernel density models to the data. Parameters ---------- X : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. y : array-like, shape (n_samples,) Target vector relative to X. """ X, y = sklearn.utils.validation.check_X_y(X, y) self.classes_ = sklearn.utils.multiclass.unique_labels(y) self.kdes_ = [ sklearn.neighbors.KernelDensity(self.bandwidth).fit(X[y == label]) for label in self.classes_] return self
[docs] def predict(self, X): """Predicts class labels. Parameters ---------- X : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. """ sklearn.utils.validation.check_is_fitted(self, ['kdes_', 'classes_']) X = sklearn.utils.validation.check_array(X) scores = self.predict_proba(X) most_probable_indices = scores.argmax(axis=1) assert most_probable_indices.shape[0] == X.shape[0] return numpy.array([self.classes_[i] for i in most_probable_indices])
@staticmethod def _softmax(data, axis=0): """Computes the softmax of an array along an axis. Notes ----- Adapted from https://gist.github.com/stober/1946926. Parameters ---------- data : array_like Array of numbers. axis : int Axis to softmax along. """ e_x = numpy.exp( data - numpy.expand_dims(numpy.max(data, axis=axis), axis)) out = e_x / numpy.expand_dims(e_x.sum(axis=axis), axis) return out
[docs] def predict_proba(self, X): """Predicts class probabilities. Class probabilities are normalised log densities of the kernel density estimates. Parameters ---------- X : array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. """ sklearn.utils.validation.check_is_fitted(self, ['kdes_', 'classes_']) X = sklearn.utils.validation.check_array(X) scores = numpy.zeros((X.shape[0], len(self.classes_))) for label, kde in enumerate(self.kdes_): scores[:, label] = kde.score_samples(X) scores = self._softmax(scores, axis=1) assert scores.shape == (X.shape[0], len(self.classes_)) assert numpy.allclose(scores.sum(axis=1), numpy.ones((X.shape[0],))) return scores