Initial commit

This commit is contained in:
Yura Dupyn 2026-03-20 11:48:07 +01:00
commit fa8eae3146
17 changed files with 141328 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
target/

108
Cargo.lock generated Normal file
View file

@ -0,0 +1,108 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "pv021_project"
version = "0.1.0"
dependencies = [
"rand",
"rand_distr",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_distr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

10
Cargo.toml Normal file
View file

@ -0,0 +1,10 @@
[package]
name = "pv021_project"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rand = "0.8.5"
rand_distr = "0.4.3"

BIN
OVERVIEW.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 649 KiB

16
README.md Normal file
View file

@ -0,0 +1,16 @@
Rust Deep Learning
School Project I did for a Neural Networks Class at the end of 2023:
- Implements a neural net (backpropagation / multilayer perceptron) in Rust
- Constraint: Can't use any Linear Algebra libraries or frameworks
- Training/Dataset: Fashion-MNIST dataset (achieves about 91% accuracy in less than 10 min of training)
See `OVERVIEW.png` for the underlying math I came up with to do backprop and organize memory.
# Dataset
Fashion MNIST (https://arxiv.org/pdf/1708.07747.pdf). Dataset of images consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. The dataset is in CSV format:
- `fashion_mnist_train_vectors.csv` - training input vectors
- `fashion_mnist_test_vectors.csv` - testing input vectors
- `fashion_mnist_train_labels.csv` - training labels
- `fashion_mnist_test_labels.csv` - testing labels

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

16
src/env.rs Normal file
View file

@ -0,0 +1,16 @@
pub const NUMBER_OF_PIXELS_PER_IMAGE: usize = 28 * 28;
pub const TEST_CSVS: (&str, &str) = (
"data/fashion_mnist_test_labels.csv",
"data/fashion_mnist_test_vectors.csv",
);
pub const TRAIN_CSVS: (&str, &str) = (
"data/fashion_mnist_train_labels.csv",
"data/fashion_mnist_train_vectors.csv",
);
pub const TEST_PREDICTIONS_CSV: &str = "test_predictions.csv";
pub const TRAIN_PREDICTIONS_CSV: &str = "train_predictions.csv";
pub const TEST_DISTRIBUTIONS_CSV: &str = "test_distributions.csv";
pub const TRAIN_DISTRIBUTIONS_CSV: &str = "train_distributions.csv";

1
src/float.rs Normal file
View file

@ -0,0 +1 @@
pub type Float = f32; // f64 or f32, doesn't seem to make any difference

305
src/linear_algebra.rs Normal file
View file

@ -0,0 +1,305 @@
mod raw_operation {
use crate::float::Float;
pub fn zero(out: &mut [Float]) {
for y in out {
*y = 0.0
}
}
pub fn inner_product(xs: &[Float], ys: &[Float]) -> Float {
let mut result: Float = 0.0;
for (x, y) in xs.iter().zip(ys) {
result += x * y;
}
result
}
pub fn scale(xs: &[Float], k: Float, out: &mut [Float]) {
for (x, y) in xs.iter().zip(out) {
*y = *x * k
}
}
pub fn add(xs: &[Float], ys: &[Float], out: &mut [Float]) {
for ((x, y), z) in xs.iter().zip(ys).zip(out) {
*z = *x + *y
}
}
pub fn sub(xs: &[Float], ys: &[Float], out: &mut [Float]) {
for ((x, y), z) in xs.iter().zip(ys).zip(out) {
*z = *x - *y
}
}
}
use crate::float::Float;
use rand_distr::{Distribution, Normal};
use std::iter::FromIterator;
use std::ops::{Index, IndexMut};
use std::slice::{Iter, IterMut, SliceIndex};
#[derive(Clone, Debug)]
pub struct Vector(Vec<Float>);
impl<Idx> Index<Idx> for Vector
where
Idx: SliceIndex<[Float]>,
{
type Output = Idx::Output;
fn index(&self, index: Idx) -> &Self::Output {
&self.0[index]
}
}
impl<Idx> IndexMut<Idx> for Vector
where
Idx: SliceIndex<[Float]>,
{
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
&mut self.0[index]
}
}
impl FromIterator<Float> for Vector {
fn from_iter<I: IntoIterator<Item = Float>>(iter: I) -> Self {
let mut v = vec![];
for x in iter {
v.push(x)
}
Vector(v)
}
}
impl Vector {
pub fn iter(&self) -> Iter<'_, Float> {
self.0.iter()
}
pub fn iter_mut(&mut self) -> IterMut<'_, Float> {
self.0.iter_mut()
}
pub fn as_slice(&self) -> &[Float] {
self.0.as_slice()
}
pub fn as_mut_slice(&mut self) -> &mut [Float] {
self.0.as_mut_slice()
}
pub fn copy_from_slice(&mut self, src: &[Float]) {
self.0.copy_from_slice(src)
}
pub fn to_vec(self) -> Vec<Float> {
self.0
}
}
impl Vector {
pub fn new(vector: Vec<Float>) -> Self {
Self(vector)
}
pub fn zero(size: usize) -> Self {
Self(vec![0.0; size])
}
pub fn add_mut(&self, w: &[Float], out: &mut [Float]) {
raw_operation::add(&self[..], w, out)
}
pub fn sub_mut(&self, w: &[Float], out: &mut [Float]) {
raw_operation::add(&self[..], w, out)
}
pub fn scale_mut(&self, k: Float, out: &mut [Float]) {
raw_operation::scale(&self[..], k, out)
}
pub fn inner_product(&self, w: &[Float]) -> Float {
raw_operation::inner_product(&self[..], w)
}
}
#[derive(Clone, Debug)]
pub struct ColumnEfficientMatrix {
pub input_dimension: usize,
pub output_dimension: usize,
pub components: Vec<Float>,
}
impl Index<(usize, usize)> for ColumnEfficientMatrix {
type Output = Float;
fn index(&self, (column, row): (usize, usize)) -> &Self::Output {
&self.components[column * self.output_dimension + row]
}
}
impl IndexMut<(usize, usize)> for ColumnEfficientMatrix {
fn index_mut(&mut self, (column, row): (usize, usize)) -> &mut Self::Output {
&mut self.components[column * self.output_dimension + row]
}
}
impl ColumnEfficientMatrix {
pub fn from_rows(rows: Vec<Vec<Float>>) -> Self {
let output_dimension = rows.len();
if output_dimension == 0 {
Self {
input_dimension: 0,
output_dimension: 0,
components: vec![],
}
} else {
let input_dimension = rows[0].len();
let mut components = Vec::with_capacity(input_dimension * output_dimension);
for i in 0..input_dimension {
for j in 0..output_dimension {
components.push(rows[j][i])
}
}
Self {
input_dimension,
output_dimension,
components,
}
}
}
pub fn zero(input_dimension: usize, output_dimension: usize) -> ColumnEfficientMatrix {
Self {
input_dimension,
output_dimension,
components: vec![0.0; input_dimension * output_dimension],
}
}
pub fn random_with_normal_distribution(
input_dimension: usize,
output_dimension: usize,
distribution: Normal<Float>,
) -> ColumnEfficientMatrix {
let mut components = Vec::with_capacity(input_dimension * output_dimension);
for _ in 0..input_dimension {
for _ in 0..output_dimension {
components.push(distribution.sample(&mut rand::thread_rng()))
}
}
Self {
input_dimension,
output_dimension,
components,
}
}
pub fn zero_mut(&mut self) {
for a in &mut self.components {
*a = 0.0
}
}
pub fn add_scaled_mut(&self, k: Float, b: &Self, c: &mut Self) {
for ((a, b), c) in self
.components
.iter()
.zip(&b.components)
.zip(&mut c.components)
{
*c = *a + *b * k
}
}
pub fn add_to_self_scaled_mut(&mut self, k: Float, b: &Self) {
for (a, b) in self.components.iter_mut().zip(&b.components) {
*a += *b * k
}
}
pub fn apply_mut(&self, v: &[Float], out: &mut [Float]) {
for j in 0..self.output_dimension {
let mut result: Float = 0.0;
for i in 0..self.input_dimension {
result += self[(i, j)] * v[i]
}
out[j] = result;
}
}
// Apply the transpose
pub fn coapply_mut(&self, w: &[Float], out: &mut [Float]) {
for i in 0..self.input_dimension {
let start_index = i * self.output_dimension;
out[i] = raw_operation::inner_product(
&self.components[start_index..start_index + self.output_dimension],
w,
);
}
}
pub fn drop_first_column_coapply_mut(&self, w: &[Float], out: &mut [Float]) {
for i in 1..self.input_dimension {
let start_index = i * self.output_dimension;
out[i - 1] = raw_operation::inner_product(
&self.components[start_index..start_index + self.output_dimension],
w,
);
}
}
}
#[derive(Clone, Debug)]
pub struct DiagonalMatrix<'a> {
pub diagonal: &'a [Float],
}
impl<'a> DiagonalMatrix<'a> {
pub fn new(diagonal: &'a [Float]) -> DiagonalMatrix<'a> {
Self { diagonal }
}
pub fn apply_mut(&self, v: &[Float], out: &mut [Float]) {
for (i, (d, x)) in self.diagonal.iter().zip(v.iter()).enumerate() {
out[i] = d * x;
}
}
}
// Given vector `w : W` and a covector `f : V -> Float`,
// the linear map `w tensor f : V -> W` computes `v : V ~> f(v) * w`
#[derive(Clone, Debug)]
pub struct VectorTensorCovectorMatrix<'a> {
pub output_vector: &'a [Float], // w : W
pub input_covector: &'a [Float], // f : V -> Float
}
impl<'a> VectorTensorCovectorMatrix<'a> {
pub fn new(
output_vector: &'a [Float],
input_covector: &'a [Float],
) -> VectorTensorCovectorMatrix<'a> {
Self {
output_vector,
input_covector,
}
}
pub fn apply_mut(&self, v: &[Float], out: &mut [Float]) {
let scalar = raw_operation::inner_product(self.input_covector, v);
raw_operation::scale(self.output_vector, scalar, out)
}
pub fn add_to_mut(&self, matrix: &mut ColumnEfficientMatrix) {
// TODO: Surely this can be optimized by iterating over the columns of the matrix directly
for (i, x) in self.input_covector.iter().enumerate() {
for (j, y) in self.output_vector.iter().enumerate() {
matrix[(i, j)] += x * y
}
}
}
}

76
src/main.rs Normal file
View file

@ -0,0 +1,76 @@
mod env;
mod float;
mod linear_algebra;
mod neural_network;
mod preprocessing;
mod transforms;
use preprocessing::{dataset_from_file, export_distributions_to, export_to};
use std::io;
use crate::neural_network::{NNPoint, NeuralNetworkInTraining, NeuralNetworkParameters};
fn main() -> Result<(), io::Error> {
let size_of_full_training_dataset = 60000;
let size_of_training_dataset = 50000;
let size_of_validation_dataset = 10000;
let size_of_testing_dataset = 10000;
println!("Importing datasets...");
let full_training_dataset: Vec<NNPoint> =
dataset_from_file(env::TRAIN_CSVS, size_of_full_training_dataset)?;
let mut training_dataset: Vec<NNPoint> = {
let mut training_dataset = Vec::with_capacity(size_of_training_dataset);
training_dataset.extend_from_slice(&full_training_dataset[0..size_of_training_dataset]);
training_dataset
};
let validation_dataset: Vec<NNPoint> = {
let mut validation_dataset = Vec::with_capacity(size_of_validation_dataset);
validation_dataset.extend_from_slice(
&full_training_dataset
[size_of_training_dataset..size_of_training_dataset + size_of_validation_dataset],
);
validation_dataset
};
let test_dataset: Vec<NNPoint> = dataset_from_file(env::TEST_CSVS, size_of_testing_dataset)?;
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 70, 10]);
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 10]);
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 60, 40, 10]);
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 10]);
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 60, 40, 10]); // batch=20, rate=2 is pretty good. Seems to reliably reach 89%
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 80, 60, 40, 10]); // batch=20, rate=2, pretty good like 88 % then suddenly drops to 10%
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 150, 10]); // this gets me over 90%, nice
let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 150, 10]);
println!("Begin training");
let params = NeuralNetworkParameters {
epochs: 30,
batch_size: 30,
learning_rate: 2.00,
};
nn.train(params, &mut training_dataset, Some(&validation_dataset));
// nn.train(params, &mut training_dataset, None);
nn.show_accuracy_on(&full_training_dataset, "training");
nn.show_accuracy_on(&test_dataset, "test");
let predictions_on_full_training_set = nn.test(&full_training_dataset);
let predictions_on_test_set = nn.test(&test_dataset);
println!("Exporting to {}", env::TRAIN_PREDICTIONS_CSV);
export_to(
&predictions_on_full_training_set,
env::TRAIN_PREDICTIONS_CSV,
)?;
println!("Exporting to {}", env::TEST_PREDICTIONS_CSV);
export_to(&predictions_on_test_set, env::TEST_PREDICTIONS_CSV)?;
// TODO: Comment this out
// let distributions_on_full_training_set = nn.test_distributions(&full_training_dataset);
// let distributions_on_test_set = nn.test_distributions(&test_dataset);
// export_distributions_to(&distributions_on_full_training_set, env::TRAIN_DISTRIBUTIONS_CSV)?;
// export_distributions_to(&distributions_on_test_set, env::TEST_DISTRIBUTIONS_CSV)?;
Ok(())
}

336
src/neural_network.rs Normal file
View file

@ -0,0 +1,336 @@
use crate::float::Float;
use crate::linear_algebra::Vector;
use crate::transforms;
use crate::transforms::{ReluTransform, SoftmaxTransform};
#[derive(Debug, Clone)]
pub struct NNPoint {
label: usize,
normalized_image: Vec<Float>,
}
impl NNPoint {
pub fn new(label: u8, normalized_image: Vec<Float>) -> Self {
Self {
label: label as usize,
normalized_image,
}
}
}
#[derive(Debug)]
pub struct NeuralNetworkInTraining {
neurons_per_layer: Vec<usize>,
inputs: Vec<Vector>, // Each input will start with 1
output: Vector,
input_gradients: Vec<Vector>,
transforms: Vec<ReluTransform>,
output_transform: SoftmaxTransform,
}
#[derive(Debug, Copy, Clone)]
pub struct NeuralNetworkParameters {
pub epochs: usize,
pub batch_size: usize,
pub learning_rate: Float,
}
impl NeuralNetworkParameters {
fn show(&self) -> String {
format!(
"epoch = {}, batch = {}, rate = {}",
self.epochs, self.batch_size, self.learning_rate
)
}
}
impl NeuralNetworkInTraining {
pub fn new(neurons_per_layer: Vec<usize>) -> Self {
// e.g. neurons_per_layer = [699, 79, 49, 19, 14, 10]
// By a layer here we mean a collection of neurons that's between two neighbouring Transforms or the initial input or final input neurons.
// Note that there are N + 1 layers where N is the number of Transforms used.
if neurons_per_layer.len() < 2 {
todo!()
}
let neurons_per_layer_except_last: Vec<usize> = neurons_per_layer
.clone()
.into_iter()
.rev()
.skip(1)
.rev()
.collect();
let neurons_per_layer_except_first: Vec<usize> =
neurons_per_layer.clone().into_iter().skip(1).collect();
// Convention: the first component should always be 1.0 - this allows the first
// column of the weight matrix to be interpreted as bias.
let inputs: Vec<Vector> = neurons_per_layer_except_last
.iter()
.map(|neuron_count| {
let mut v = Vector::zero(neuron_count + 1);
v[0] = 1.0;
v
})
.collect();
let input_gradients: Vec<Vector> = neurons_per_layer_except_first
.iter()
.map(|neuron_count| Vector::zero(*neuron_count))
.collect();
let transforms: Vec<ReluTransform> = neurons_per_layer_except_last
.iter()
.zip(neurons_per_layer_except_last.iter().skip(1))
.map(|(input_neuron_count, output_neuron_count)| {
ReluTransform::new(*input_neuron_count + 1, *output_neuron_count)
})
.collect();
let neurons_in_last_layer = *neurons_per_layer.last().unwrap();
let neurons_in_next_to_last_layer = *neurons_per_layer_except_last.last().unwrap();
let output = Vector::zero(neurons_in_last_layer);
let output_transform =
SoftmaxTransform::new(neurons_in_next_to_last_layer + 1, neurons_in_last_layer);
Self {
neurons_per_layer,
inputs,
output,
input_gradients,
transforms,
output_transform,
}
}
// You need to initialize inputs[0] before use.
fn output_mut(&mut self) {
// The following iterates over pairs of neighbouring inputs where we
// have a mutable reference to both of them.
// With Rust borrow-checking rules this can't be done directly,
// so I had to resort to `split_at_mut(k)`
for i in 0..self.inputs.len() - 1 {
let (left_inputs, right_inputs) = self.inputs.split_at_mut(i + 1);
let transform = &mut self.transforms[i];
let input = &left_inputs[i][..];
let output = &mut right_inputs[0][1..];
transform.output_mut(input, output);
}
self.output_transform.output_mut(
&self.inputs[self.inputs.len() - 1][..],
&mut self.output[..],
);
}
// Initialize input_gradients[-1] with error gradient before use.
fn update_weights_mut(&mut self) {
let last_index = self.input_gradients.len() - 1;
// TODO: Last layer is no different. This should be part of the same loop
{
let transform = &mut self.output_transform;
transform.potential_gradient_mut(&self.input_gradients[last_index][..]);
transform
.gradient_with_respect_to_input_mut(&mut self.input_gradients[last_index - 1][..]);
transform.add_gradient_with_respect_to_weights_mut(&self.inputs[last_index][..]);
}
for i in (0..self.input_gradients.len() - 2).rev() {
let (left_input_gradient, right_input_gradient) =
self.input_gradients.split_at_mut(i + 1);
let transform = &mut self.transforms[i + 1];
let left_grad = &mut left_input_gradient[i][..];
let right_grad = &right_input_gradient[0][..];
let input = &self.inputs[i + 1][..];
transform.potential_gradient_mut(right_grad);
transform.gradient_with_respect_to_input_mut(left_grad);
transform.add_gradient_with_respect_to_weights_mut(input);
}
{
let transform = &mut self.transforms[0];
transform.potential_gradient_mut(&self.input_gradients[0][..]);
transform.add_gradient_with_respect_to_weights_mut(&self.inputs[0][..]);
// Note that we are not computing gradient with respect to input, since this is the
// first layer and we don't care about changes to input, only to weights.
}
}
fn forward_and_backwards_mut(&mut self, point: &NNPoint) {
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
self.output_mut();
let last_index = self.input_gradients.len() - 1;
// WARNING: Use proper error function
// transforms::gradient_error_mut(&self.output[..], desired_output, &mut self.input_gradients[last_index][..]);
transforms::cross_entropy_derivative_simple(
&self.output[..],
point.label,
&mut self.input_gradients[last_index][..],
);
self.update_weights_mut()
}
fn iterate_over_batch_mut(&mut self, learning_rate: Float, batch: &[NNPoint]) {
// iterates over the batch, while updating gradient of weights.
for point in batch {
self.forward_and_backwards_mut(point);
}
// Update the current weights by the opposite of the epsilon / batch_size *weight_gradients
let batch_size = batch.len() as Float;
let epsilon = -learning_rate / batch_size;
for transform in &mut self.transforms {
transform
.weight
.add_to_self_scaled_mut(epsilon, &transform.weight_gradient);
// Resets the weight gradient to zero so it can be used in next batch.
transform.weight_gradient.zero_mut();
}
}
fn iterate_over_epoch(
&mut self,
training_set: &mut [NNPoint],
batch_size: usize,
learning_rate: Float,
) {
use rand::seq::SliceRandom;
use rand::thread_rng;
training_set.shuffle(&mut thread_rng()); // Shuffling is linear in the size of the slice
for batch in training_set.chunks(batch_size) {
self.iterate_over_batch_mut(learning_rate, batch);
}
}
pub fn train(
&mut self,
parameters: NeuralNetworkParameters,
training_set: &mut [NNPoint],
testing_set: Option<&[NNPoint]>,
) {
fn test(
nn: &mut NeuralNetworkInTraining,
parameters: NeuralNetworkParameters,
testing_set: Option<&[NNPoint]>,
accuracy_per_epoch: &mut Vec<f32>,
) {
if let Some(testing_set) = testing_set {
let accuracy = nn.accuracy(testing_set);
accuracy_per_epoch.push(accuracy);
println!();
println!("{}", parameters.show());
println!("{:?}", nn.neurons_per_layer);
println!("{:?}", accuracy_per_epoch);
}
}
fn next_learning_rate(initial_learning_rate: Float, epoch: usize) -> Float {
// initial_learning_rate / (1.0 + epoch as Float / 30.0)
// initial_learning_rate * (0.1 as Float).powf(epoch as Float / 50.0)
initial_learning_rate * (0.1 as Float).powf(epoch as Float / 20.0)
}
use std::time::Instant;
let now = Instant::now();
let number_of_epochs = parameters.epochs;
let batch_size = parameters.batch_size;
let mut learning_rate = parameters.learning_rate;
let mut accuracy_per_epoch = Vec::with_capacity(number_of_epochs + 1);
test(self, parameters, testing_set, &mut accuracy_per_epoch);
for epoch in 0..number_of_epochs {
println!("Epoch {}/{}", epoch + 1, number_of_epochs);
println!("Current learning rate = {}", learning_rate);
self.iterate_over_epoch(training_set, batch_size, learning_rate);
test(self, parameters, testing_set, &mut accuracy_per_epoch);
learning_rate = next_learning_rate(parameters.learning_rate, epoch);
let elapsed = now.elapsed();
let total_seconds = elapsed.as_secs();
let minutes = total_seconds / 60;
let seconds = total_seconds % 60;
println!("Duration of training: {} min {} sec", minutes, seconds);
}
}
pub fn show_accuracy_on(&mut self, testing_set: &[NNPoint], dataset_name: &str) {
println!(
"{} dataset accuracy: {:?}",
dataset_name,
self.accuracy(testing_set)
);
}
pub fn output_label(&self) -> usize {
let mut state: Option<(usize, Float)> = None;
for (i, y) in self.output.iter().enumerate() {
match state {
Some((_, max_so_far)) => {
if *y > max_so_far {
state = Some((i, *y))
}
}
None => state = Some((i, *y)),
}
}
match state {
Some((label, _)) => label,
None => {
todo!()
}
}
}
pub fn accuracy(&mut self, testing_set: &[NNPoint]) -> f32 {
let mut num_of_correct_classifications = 0;
for point in testing_set {
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
self.output_mut(); // This doesn't change the weights
let neural_network_produced_label = self.output_label();
if point.label == neural_network_produced_label {
num_of_correct_classifications += 1
}
// println!("max-label: {}, desired-label: {}, prob-distr: {:?}", neural_network_produced_label, point.label, self.output);
}
num_of_correct_classifications as f32 / testing_set.len() as f32
}
pub fn test(&mut self, dataset: &[NNPoint]) -> Vec<usize> {
dataset
.iter()
.map(|point| {
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
self.output_mut();
self.output_label()
})
.collect()
}
pub fn test_distributions(&mut self, dataset: &[NNPoint]) -> Vec<Vec<Float>> {
dataset
.iter()
.map(|point| {
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
self.output_mut();
self.output.clone().to_vec()
})
.collect()
}
}

121
src/preprocessing.rs Normal file
View file

@ -0,0 +1,121 @@
use crate::env;
use crate::float::Float;
use crate::neural_network::NNPoint;
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader, BufWriter, Write};
#[derive(Debug)]
enum ParsingError {
CouldNotParseLabel,
CouldNotParseVector,
ImageHasWrongSize,
}
fn parse_dataset(
labels_buffer: impl BufRead,
vectors_buffer: impl BufRead,
number_of_points: usize,
) -> Result<Result<Vec<NNPoint>, ParsingError>, io::Error> {
let mut output = vec![];
for (label_result, vector_result) in labels_buffer
.lines()
.zip(vectors_buffer.lines())
.take(number_of_points)
{
let label_str = label_result?;
let image_str = vector_result?;
let label: u8 = match label_str.parse() {
Ok(label) => label,
Err(_) => return Ok(Err(ParsingError::CouldNotParseLabel)),
};
let mut image: Vec<u8> = vec![];
for str in image_str.split(',') {
match str.parse() {
Ok(pixel_value) => {
image.push(pixel_value);
}
Err(_) => return Ok(Err(ParsingError::CouldNotParseVector)),
}
}
if image.len() != env::NUMBER_OF_PIXELS_PER_IMAGE {
return Ok(Err(ParsingError::ImageHasWrongSize));
}
output.push(NNPoint::new(label, normalize_input(&image)));
}
Ok(Ok(output))
}
fn average(vec: &[u8]) -> Float {
let mut result: u32 = 0; // the result is bounded by NUMBER_OF_PIXELS_PER_IMAGE * 255 ~~ 200 k
for x in vec {
result += *x as u32;
}
(result as Float) / (vec.len() as Float)
}
fn variance(vec: &[Float]) -> Float {
// assumes that the average of vec is 0
let mut result: Float = 0.0;
for x in vec {
result += x * x;
}
result
}
fn normalize_input(vec: &[u8]) -> Vec<Float> {
let average = average(vec);
let mut result: Vec<Float> = vec.iter().map(|x| (*x as Float) - average).collect();
let stddev = variance(&result).sqrt();
for i in &mut result {
*i /= stddev;
}
result
}
pub fn dataset_from_file(
(labels_file_path, vectors_file_path): (&str, &str),
number_of_points: usize,
) -> Result<Vec<NNPoint>, io::Error> {
let (labels_file, vectors_file): (File, File) = (
File::open(labels_file_path)?,
File::open(vectors_file_path)?,
);
match parse_dataset(
BufReader::new(labels_file),
BufReader::new(vectors_file),
number_of_points,
)? {
Ok(points) => Ok(points),
Err(parsing_error) => {
println!("ERROR: {:?}", parsing_error);
todo!()
}
}
}
pub fn export_to(outputs: &[usize], file_path: &str) -> Result<(), io::Error> {
let mut file = BufWriter::new(File::create(file_path)?);
for x in outputs {
writeln!(file, "{}", x)?
}
Ok(())
}
pub fn export_distributions_to(outputs: &[Vec<Float>], file_path: &str) -> Result<(), io::Error> {
let mut file = BufWriter::new(File::create(file_path)?);
for ps in outputs {
let mut s = "".to_string();
for p in ps {
let p_str = format!("{}, ", p);
s += &p_str
}
writeln!(file, "[{}]", s)?
}
Ok(())
}

315
src/transforms.rs Normal file
View file

@ -0,0 +1,315 @@
use crate::float::Float;
use crate::linear_algebra::{ColumnEfficientMatrix, Vector};
use rand_distr::Normal;
pub fn l2_error(xs: &[Float], ys: &[Float]) -> Float {
// err-squared
let mut result = 0.0;
for (x, y) in xs.iter().zip(ys) {
let s = x - y;
result += s * s;
}
0.5 * result
}
pub fn gradient_l2_error_mut(xs: &[Float], ys: &[Float], out: &mut [Float]) {
for (i, (x, y)) in xs.iter().zip(ys).enumerate() {
out[i] = x - y;
}
}
fn sigmoid(x: Float) -> Float {
1.0 / (1.0 + (-x).exp())
}
fn activation_of_sigmoid_potential(potential: Float) -> Float {
sigmoid(potential)
}
fn vectorized_sigmoid_activation_of_potential(potentials: &[Float], out: &mut [Float]) {
for (potential, y) in potentials.iter().zip(out) {
*y = activation_of_sigmoid_potential(*potential)
}
}
fn derivative_of_sigmoid_activation_of_potential(potential: Float) -> Float {
let s = sigmoid(potential);
s * (1.0 - s)
}
fn vectorized_gradient_of_sigmoid_activation_of_potential_mut(
potentials: &[Float],
derivatives_state: &mut [Float],
output_gradient: &[Float],
input_gradient: &mut [Float],
) {
use crate::linear_algebra::DiagonalMatrix;
for (potential, state) in potentials.iter().zip(derivatives_state.iter_mut()) {
*state = derivative_of_sigmoid_activation_of_potential(*potential)
}
DiagonalMatrix::new(derivatives_state).apply_mut(output_gradient, input_gradient);
}
// =====cross-entropy=====
// takes in two probability distributions
fn cross_entropy(p: &[Float], q: &[Float]) -> Float {
let mut result = 0.0;
for (x, y) in p.iter().zip(q) {
result += y * x.ln()
}
-result
}
// Second probability distribution is deterministic,
// i.e. it deterministically outputs the same value.
fn cross_entropy_simple(p: &[Float], q: usize) -> Float {
-p[q].ln()
}
fn cross_entropy_derivative_mut(p: &[Float], q: &[Float], out: &mut [Float]) {
for ((a, b), c) in p.iter().zip(q).zip(out) {
*c = -b / a
}
}
// Second probability distribution is deterministic,
// i.e. it deterministically outputs the same value.
pub fn cross_entropy_derivative_simple(p: &[Float], q: usize, out: &mut [Float]) {
// TODO: Do we really need to reset everything to besides the q-th index?
for a in out.iter_mut() {
*a = 0.0;
}
out[q] = -1.0 / p[q];
}
fn softmax_mut(input: &[Float], out: &mut [Float]) {
let mut s = 0.0;
for (x, y) in input.iter().zip(out.iter_mut()) {
let e = x.exp();
*y = e;
s += e
}
for y in out {
*y /= s
}
}
fn softmax_gradient_mut(
softmax_output: &[Float],
gradient_output: &[Float],
gradient_input: &mut [Float],
) {
for (j, dx) in gradient_input.iter_mut().enumerate() {
*dx = 0.0;
for (i, dy) in gradient_output.iter().enumerate() {
// Note that the gradient matrix is symmetric, so don't worry about the order of
// indices
*dx += softmax_output[i] * (if i == j { 1.0 } else { 0.0 } - softmax_output[j]) * dy
}
}
}
// relu
fn relu_mut(input: &[Float], out: &mut [Float]) {
for (x, y) in input.iter().zip(out.iter_mut()) {
*y = x.max(0.0)
}
}
fn relu_gradient_mut(input: &[Float], gradient_output: &[Float], gradient_input: &mut [Float]) {
for ((dy, dx), x) in gradient_output.iter().zip(gradient_input).zip(input) {
*dx = if *x > 0.0 { *dy } else { 0.0 }
}
}
// =====sigmoid=====
#[derive(Debug)]
pub struct SigmoidTransform {
pub weight: ColumnEfficientMatrix,
potential_vector: Vector,
derivatives_state: Vector,
potential_gradient: Vector,
pub weight_gradient: ColumnEfficientMatrix,
}
impl SigmoidTransform {
pub fn new(input_dimension: usize, output_dimension: usize) -> Self {
let mean = 0.0;
let std_dev = 1.0 / (input_dimension as Float).sqrt();
let normal_distr = Normal::new(mean, std_dev).unwrap();
Self {
weight: ColumnEfficientMatrix::random_with_normal_distribution(
input_dimension,
output_dimension,
normal_distr,
),
potential_vector: Vector::zero(output_dimension),
derivatives_state: Vector::zero(output_dimension), // TODO: Can I get rid of this?
potential_gradient: Vector::zero(output_dimension),
weight_gradient: ColumnEfficientMatrix::zero(input_dimension, output_dimension),
}
}
pub fn output_mut(&mut self, input: &[Float], output: &mut [Float]) {
self.weight.apply_mut(input, &mut self.potential_vector[..]); // potential = W[input]
vectorized_sigmoid_activation_of_potential(&self.potential_vector[..], output);
// y = f(potential)
}
// Note below that (1) and (2) are independent, but they both depend on (0).
// (0)
pub fn potential_gradient_mut(&mut self, output_gradient: &[Float]) {
// updates the potential gradient
vectorized_gradient_of_sigmoid_activation_of_potential_mut(
&self.potential_vector[..],
&mut self.derivatives_state[..],
output_gradient,
&mut self.potential_gradient[..],
); // potential_gradient = grad[f](potential)[output_gradient]
}
// Note that it makes sense to have the two gradients split, since for the input layer we will
// not need to compute the input gradient, only the weight gradient is important.
// WARNING: You need to call `potential_gradient_mut` before using the below function
// (1)
pub fn gradient_with_respect_to_input_mut(&self, input_gradient: &mut [Float]) {
// updates the input gradient
//
// Note that the first column of `self.weight` is the bias,
// and the previous layer doesn't care about its gradient.
// So we just return the gradient below the first component of the input
// by dropping the bias column.
self.weight
.drop_first_column_coapply_mut(&self.potential_gradient[..], input_gradient);
// transpose[T without the first column][potential_gradient]
}
// Note that the proof ensures that the `potential_gradient` has been updated
// WARNING: You need to call `potential_gradient_mut` before using the below function
// (2)
pub fn add_gradient_with_respect_to_weights_mut(&mut self, input: &[Float]) {
use crate::linear_algebra::VectorTensorCovectorMatrix;
let matrix = VectorTensorCovectorMatrix::new(&self.potential_gradient[..], input); // grad[f](potential)[output_grad] **tensor** input
matrix.add_to_mut(&mut self.weight_gradient);
}
}
// =====softmax=====
#[derive(Debug)]
pub struct SoftmaxTransform {
pub weight: ColumnEfficientMatrix,
potential_vector: Vector,
softmax_output: Vector, // Used for computation of the softmax gradient
potential_gradient: Vector,
pub weight_gradient: ColumnEfficientMatrix,
}
impl SoftmaxTransform {
pub fn new(input_dimension: usize, output_dimension: usize) -> Self {
let mean = 0.0;
let std_dev = 1.0 / (input_dimension as Float).sqrt();
let normal_distr = Normal::new(mean, std_dev).unwrap();
Self {
weight: ColumnEfficientMatrix::random_with_normal_distribution(
input_dimension,
output_dimension,
normal_distr,
),
potential_vector: Vector::zero(output_dimension),
softmax_output: Vector::zero(output_dimension),
potential_gradient: Vector::zero(output_dimension),
weight_gradient: ColumnEfficientMatrix::zero(input_dimension, output_dimension),
}
}
pub fn output_mut(&mut self, input: &[Float], output: &mut [Float]) {
self.weight.apply_mut(input, &mut self.potential_vector[..]); // potential = W[input]
softmax_mut(&self.potential_vector[..], output); // y = f(potential)
self.softmax_output.copy_from_slice(output)
}
pub fn potential_gradient_mut(&mut self, output_gradient: &[Float]) {
softmax_gradient_mut(
&self.softmax_output[..],
output_gradient,
&mut self.potential_gradient[..],
); // potential_gradient = grad[softmax](potential)[output_gradient]
}
pub fn gradient_with_respect_to_input_mut(&self, input_gradient: &mut [Float]) {
self.weight
.drop_first_column_coapply_mut(&self.potential_gradient[..], input_gradient);
// transpose[T without the first column][potential_gradient]
}
pub fn add_gradient_with_respect_to_weights_mut(&mut self, input: &[Float]) {
use crate::linear_algebra::VectorTensorCovectorMatrix;
let matrix = VectorTensorCovectorMatrix::new(&self.potential_gradient[..], input); // grad[f](potential)[output_grad] **tensor** input
matrix.add_to_mut(&mut self.weight_gradient);
}
}
#[derive(Debug)]
pub struct ReluTransform {
pub weight: ColumnEfficientMatrix,
potential_vector: Vector,
potential_gradient: Vector,
pub weight_gradient: ColumnEfficientMatrix,
}
impl ReluTransform {
pub fn new(input_dimension: usize, output_dimension: usize) -> Self {
let mean = 0.0;
let std_dev = 1.0 / (input_dimension as Float).sqrt();
let normal_distr = Normal::new(mean, std_dev).unwrap();
Self {
weight: ColumnEfficientMatrix::random_with_normal_distribution(
input_dimension,
output_dimension,
normal_distr,
),
potential_vector: Vector::zero(output_dimension),
potential_gradient: Vector::zero(output_dimension),
weight_gradient: ColumnEfficientMatrix::zero(input_dimension, output_dimension),
}
}
pub fn output_mut(&mut self, input: &[Float], output: &mut [Float]) {
self.weight.apply_mut(input, &mut self.potential_vector[..]); // potential = W[input]
relu_mut(&self.potential_vector[..], output); // y = f(potential)
}
pub fn potential_gradient_mut(&mut self, output_gradient: &[Float]) {
relu_gradient_mut(
&self.potential_vector[..],
output_gradient,
&mut self.potential_gradient[..],
); // potential_gradient = grad[softmax](potential)[output_gradient]
}
pub fn gradient_with_respect_to_input_mut(&self, input_gradient: &mut [Float]) {
self.weight
.drop_first_column_coapply_mut(&self.potential_gradient[..], input_gradient);
// transpose[T without the first column][potential_gradient]
}
pub fn add_gradient_with_respect_to_weights_mut(&mut self, input: &[Float]) {
use crate::linear_algebra::VectorTensorCovectorMatrix;
let matrix = VectorTensorCovectorMatrix::new(&self.potential_gradient[..], input); // grad[f](potential)[output_grad] **tensor** input
matrix.add_to_mut(&mut self.weight_gradient);
}
}

23
tmp_repl.txt Normal file
View file

@ -0,0 +1,23 @@
cargo init --edition 2018
cargo add rand
cargo add rand_distr
cargo run --release
module add rust
cargo build
cargo fmt
# linter
cargo clippy -- -D warnings
python3 evaluator/evaluate.py test_predictions.csv data/fashion_mnist_test_labels.csv
python3 evaluator/evaluate.py train_predictions.csv data/fashion_mnist_train_labels.csv