"HNB feature selection"
import numpy as np
from sklearn.naive_bayes import BernoulliNB
from .lazyHierarchicalFeatureSelector import LazyHierarchicalFeatureSelector
[docs]class HNB(LazyHierarchicalFeatureSelector):
"""
Select the k non-redundant features with the highest relevance following the algorithm proposed by Wan and Freitas.
"""
[docs] def __init__(self, hierarchy=None, k=0):
"""Initializes a HNB-Selector.
Parameters
----------
hierarchy : np.ndarray
The hierarchy graph as an adjacency matrix.
k : int
The numbers of features to select.
"""
super(HNB, self).__init__(hierarchy)
self.k = k
[docs] def select_and_predict(
self, predict=True, saveFeatures=False, estimator=BernoulliNB()
):
"""
Select features lazy for each test instance amd optionally predict target value of test instances.
It selects the top-k-ranked features, such that redundancy along each path is removed,
in descending order of their individual predictive power measured by their relevance defined in helpers.py.
Parameters
----------
predict : bool
true if predictions shall be obtained.
saveFeatures : bool
true if features selected for each test instance shall be saved.
estimator : sklearn-compatible estimator
Estimator to use for predictions.
Returns
-------
predictions for test input samples, if predict = false, returns empty array.
"""
predictions = np.array([])
for idx in range(len(self._xtest)):
self._get_nonredundant_features_relevance(idx)
self._get_top_k()
if predict:
predictions = np.append(predictions, self._predict(idx, estimator)[0])
if saveFeatures:
self._features[idx] = np.array(list(self._instance_status.values()))
self._feature_length[idx] = len(
[nodes for nodes, status in self._instance_status.items() if status]
)
for node in self._hierarchy:
self._instance_status[node] = 1
return predictions