Source code for nervos.dataloader.iris
"""
This module implements the `IrisLoader` class, which extends the `Dataloader`
base class. It is specifically designed to handle the Iris dataset, providing
methods for:
- Loading the dataset
- Generating more synthetic data in the dataset
- Converting samples into spike trains
The `IrisLoader` supports preprocessing for Spiking Neural Network (SNN)
training workflows.
"""
from ..utils import *
from .loader import Dataloader
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
[docs]
class IrisLoader(Dataloader):
def __init__(self, parameters: Parameters, num_rf: tuple, var: tuple) -> None:
"""
Initializes the IrisLoader with the given parameters and loads the Iris dataset.
Args:
parameters (Parameters): Configuration parameters for the dataloader.
num_rf (tuple): Number of receptive fields for each feature dimension.
var (tuple): Variance values for Gaussian encoding.
"""
super().__init__(parameters)
self.parameters = parameters
self.X, self.y = load_iris(return_X_y=True)
self.var = var
self.num_rf = num_rf
[docs]
def fluff_data(self, samples_per_class: int) -> None:
"""
Generates additional synthetic data by adding noise to the existing samples.
Args:
samples_per_class (int): Number of synthetic samples to generate per class.
"""
new_X = list(self.X)
new_Y = list(self.y)
classes = np.unique(self.y)
for _cls in classes:
X_class = self.X[self.y == _cls]
mu = X_class.mean(axis=0)
std = X_class.std(axis=0)
for _ in range(samples_per_class):
new_X.append(
mu + 1.5 * np.random.rand() * np.random.randn(*mu.shape) * std
)
new_Y.append(_cls)
self.X = np.array(new_X)
self.y = np.array(new_Y)
[docs]
def generate_data(self, total_samples: int) -> tuple[np.ndarray, np.ndarray]:
"""
Generates a new dataset with balanced classes using Gaussian-distributed noise.
Args:
total_samples (int): Total number of samples to generate.
Returns:
tuple[np.ndarray, np.ndarray]: Generated feature matrix and corresponding labels.
"""
classes = np.unique(self.y)
samples_per_class = total_samples // len(classes)
new_X = []
new_Y = []
for _cls in classes:
X_class = self.X[self.y == _cls]
mu = X_class.mean(axis=0)
std = X_class.std(axis=0)
class_samples = mu + np.random.randn(samples_per_class, *mu.shape) * std
new_X.append(class_samples)
new_Y.extend([_cls] * samples_per_class)
return np.vstack(new_X), np.array(new_Y)
[docs]
def normalise(self) -> None:
"""
Normalizes the dataset features to the range [0, 1].
"""
arr_min = self.X.min(axis=0)
arr_max = self.X.max(axis=0)
self.X = (self.X - arr_min) / (arr_max - arr_min)
[docs]
def encode_coordinates(self, point: np.ndarray) -> np.ndarray:
"""
Encodes a given feature vector using Gaussian receptive fields.
Args:
point (np.ndarray): Feature vector to encode.
Returns:
np.ndarray: Encoded representation using Gaussian functions.
"""
axes = [np.linspace(0, 1, self.num_rf[i]) for i in range(len(self.num_rf))]
gaussians = [
np.exp(-((point[i] - axes[i]) ** 2) / (2 * self.var[i]))
for i in range(len(self.num_rf))
]
return np.concatenate(gaussians)
[docs]
def generate_poisson_spikes(
self, spike_probs: np.ndarray, time_steps: int
) -> np.ndarray:
"""
Converts encoded feature vectors into Poisson-distributed spike trains.
Args:
spike_probs (np.ndarray): Probability values for spike generation.
time_steps (int): Number of time steps for the spike train.
Returns:
np.ndarray: Spike train representation of the input.
"""
normalized_probs = spike_probs / np.max(spike_probs)
firing_frequencies = self.parameters.min_frequency + normalized_probs * (
self.parameters.max_frequency - self.parameters.min_frequency
)
spike_trains = np.zeros((len(spike_probs), time_steps), dtype=int)
intervals = np.ceil(
(self.parameters.training_duration + 1) / firing_frequencies
).astype(int)
for t in range(1, time_steps):
spikes = t % intervals == 0
spike_trains[:, t] = spikes.astype(int)
return spike_trains
[docs]
def dataloader(
self,
size: int = None,
train: bool = True,
preprocess: bool = False,
seed: int = 42,
) -> tuple[np.ndarray, np.ndarray]:
"""
Loads and optionally preprocesses the Iris dataset for SNN training.
Args:
size (int, optional): Number of samples to load (for test set only).
train (bool, optional): Whether to load training or test data. Defaults to True.
preprocess (bool, optional): Whether to convert features into spike trains. Defaults to False.
seed (int, optional): Random seed for reproducibility. Defaults to 42.
Returns:
tuple[np.ndarray, np.ndarray]: Feature matrix and labels.
"""
fin_X = []
self.normalise()
tr_X, te_X, tr_y, te_y = train_test_split(
self.X,
self.y,
test_size=(self.parameters.testing_images_amount)
/ (
self.parameters.testing_images_amount
+ self.parameters.training_images_amount
),
random_state=seed,
)
if train:
X = tr_X
y = tr_y
else:
X = te_X
y = te_y
if size:
if train:
raise "DOnt pls dont"
X, y = self.generate_data(size)
for point in X:
if preprocess:
point = self.generate_poisson_spikes(
self.encode_coordinates(point),
self.parameters.training_duration + 1,
)
fin_X.append(point)
return np.array(fin_X), y