Source code for hfs.hnb

"HNB feature selection"

import numpy as np
from sklearn.naive_bayes import BernoulliNB

from .lazyHierarchicalFeatureSelector import LazyHierarchicalFeatureSelector


[docs]class HNB(LazyHierarchicalFeatureSelector): """ Select the k non-redundant features with the highest relevance following the algorithm proposed by Wan and Freitas. """
[docs] def __init__(self, hierarchy=None, k=0): """Initializes a HNB-Selector. Parameters ---------- hierarchy : np.ndarray The hierarchy graph as an adjacency matrix. k : int The numbers of features to select. """ super(HNB, self).__init__(hierarchy) self.k = k
[docs] def select_and_predict( self, predict=True, saveFeatures=False, estimator=BernoulliNB() ): """ Select features lazy for each test instance amd optionally predict target value of test instances. It selects the top-k-ranked features, such that redundancy along each path is removed, in descending order of their individual predictive power measured by their relevance defined in helpers.py. Parameters ---------- predict : bool true if predictions shall be obtained. saveFeatures : bool true if features selected for each test instance shall be saved. estimator : sklearn-compatible estimator Estimator to use for predictions. Returns ------- predictions for test input samples, if predict = false, returns empty array. """ predictions = np.array([]) for idx in range(len(self._xtest)): self._get_nonredundant_features_relevance(idx) self._get_top_k() if predict: predictions = np.append(predictions, self._predict(idx, estimator)[0]) if saveFeatures: self._features[idx] = np.array(list(self._instance_status.values())) self._feature_length[idx] = len( [nodes for nodes, status in self._instance_status.items() if status] ) for node in self._hierarchy: self._instance_status[node] = 1 return predictions