Source code for nervos.dataloader.iris

"""
This module implements the `IrisLoader` class, which extends the `Dataloader` 
base class. It is specifically designed to handle the Iris dataset, providing 
methods for:

    - Loading the dataset
    - Generating more synthetic data in the dataset
    - Converting samples into spike trains

The `IrisLoader` supports preprocessing for Spiking Neural Network (SNN) 
training workflows.
"""

from ..utils import *
from .loader import Dataloader
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


[docs] class IrisLoader(Dataloader): def __init__(self, parameters: Parameters, num_rf: tuple, var: tuple) -> None: """ Initializes the IrisLoader with the given parameters and loads the Iris dataset. Args: parameters (Parameters): Configuration parameters for the dataloader. num_rf (tuple): Number of receptive fields for each feature dimension. var (tuple): Variance values for Gaussian encoding. """ super().__init__(parameters) self.parameters = parameters self.X, self.y = load_iris(return_X_y=True) self.var = var self.num_rf = num_rf
[docs] def fluff_data(self, samples_per_class: int) -> None: """ Generates additional synthetic data by adding noise to the existing samples. Args: samples_per_class (int): Number of synthetic samples to generate per class. """ new_X = list(self.X) new_Y = list(self.y) classes = np.unique(self.y) for _cls in classes: X_class = self.X[self.y == _cls] mu = X_class.mean(axis=0) std = X_class.std(axis=0) for _ in range(samples_per_class): new_X.append( mu + 1.5 * np.random.rand() * np.random.randn(*mu.shape) * std ) new_Y.append(_cls) self.X = np.array(new_X) self.y = np.array(new_Y)
[docs] def generate_data(self, total_samples: int) -> tuple[np.ndarray, np.ndarray]: """ Generates a new dataset with balanced classes using Gaussian-distributed noise. Args: total_samples (int): Total number of samples to generate. Returns: tuple[np.ndarray, np.ndarray]: Generated feature matrix and corresponding labels. """ classes = np.unique(self.y) samples_per_class = total_samples // len(classes) new_X = [] new_Y = [] for _cls in classes: X_class = self.X[self.y == _cls] mu = X_class.mean(axis=0) std = X_class.std(axis=0) class_samples = mu + np.random.randn(samples_per_class, *mu.shape) * std new_X.append(class_samples) new_Y.extend([_cls] * samples_per_class) return np.vstack(new_X), np.array(new_Y)
[docs] def normalise(self) -> None: """ Normalizes the dataset features to the range [0, 1]. """ arr_min = self.X.min(axis=0) arr_max = self.X.max(axis=0) self.X = (self.X - arr_min) / (arr_max - arr_min)
[docs] def encode_coordinates(self, point: np.ndarray) -> np.ndarray: """ Encodes a given feature vector using Gaussian receptive fields. Args: point (np.ndarray): Feature vector to encode. Returns: np.ndarray: Encoded representation using Gaussian functions. """ axes = [np.linspace(0, 1, self.num_rf[i]) for i in range(len(self.num_rf))] gaussians = [ np.exp(-((point[i] - axes[i]) ** 2) / (2 * self.var[i])) for i in range(len(self.num_rf)) ] return np.concatenate(gaussians)
[docs] def generate_poisson_spikes( self, spike_probs: np.ndarray, time_steps: int ) -> np.ndarray: """ Converts encoded feature vectors into Poisson-distributed spike trains. Args: spike_probs (np.ndarray): Probability values for spike generation. time_steps (int): Number of time steps for the spike train. Returns: np.ndarray: Spike train representation of the input. """ normalized_probs = spike_probs / np.max(spike_probs) firing_frequencies = self.parameters.min_frequency + normalized_probs * ( self.parameters.max_frequency - self.parameters.min_frequency ) spike_trains = np.zeros((len(spike_probs), time_steps), dtype=int) intervals = np.ceil( (self.parameters.training_duration + 1) / firing_frequencies ).astype(int) for t in range(1, time_steps): spikes = t % intervals == 0 spike_trains[:, t] = spikes.astype(int) return spike_trains
[docs] def dataloader( self, size: int = None, train: bool = True, preprocess: bool = False, seed: int = 42, ) -> tuple[np.ndarray, np.ndarray]: """ Loads and optionally preprocesses the Iris dataset for SNN training. Args: size (int, optional): Number of samples to load (for test set only). train (bool, optional): Whether to load training or test data. Defaults to True. preprocess (bool, optional): Whether to convert features into spike trains. Defaults to False. seed (int, optional): Random seed for reproducibility. Defaults to 42. Returns: tuple[np.ndarray, np.ndarray]: Feature matrix and labels. """ fin_X = [] self.normalise() tr_X, te_X, tr_y, te_y = train_test_split( self.X, self.y, test_size=(self.parameters.testing_images_amount) / ( self.parameters.testing_images_amount + self.parameters.training_images_amount ), random_state=seed, ) if train: X = tr_X y = tr_y else: X = te_X y = te_y if size: if train: raise "DOnt pls dont" X, y = self.generate_data(size) for point in X: if preprocess: point = self.generate_poisson_spikes( self.encode_coordinates(point), self.parameters.training_duration + 1, ) fin_X.append(point) return np.array(fin_X), y