Initial commit
This commit is contained in:
commit
fa8eae3146
17 changed files with 141328 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
target/
|
||||
108
Cargo.lock
generated
Normal file
108
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "pv021_project"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rand",
|
||||
"rand_distr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
10
Cargo.toml
Normal file
10
Cargo.toml
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "pv021_project"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rand = "0.8.5"
|
||||
rand_distr = "0.4.3"
|
||||
BIN
OVERVIEW.png
Normal file
BIN
OVERVIEW.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 649 KiB |
16
README.md
Normal file
16
README.md
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
Rust Deep Learning
|
||||
|
||||
School Project I did for a Neural Networks Class at the end of 2023:
|
||||
- Implements a neural net (backpropagation / multilayer perceptron) in Rust
|
||||
- Constraint: Can't use any Linear Algebra libraries or frameworks
|
||||
- Training/Dataset: Fashion-MNIST dataset (achieves about 91% accuracy in less than 10 min of training)
|
||||
|
||||
See `OVERVIEW.png` for the underlying math I came up with to do backprop and organize memory.
|
||||
|
||||
# Dataset
|
||||
Fashion MNIST (https://arxiv.org/pdf/1708.07747.pdf). Dataset of images ‒ consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. The dataset is in CSV format:
|
||||
- `fashion_mnist_train_vectors.csv` - training input vectors
|
||||
- `fashion_mnist_test_vectors.csv` - testing input vectors
|
||||
- `fashion_mnist_train_labels.csv` - training labels
|
||||
- `fashion_mnist_test_labels.csv` - testing labels
|
||||
|
||||
10000
data/fashion_mnist_test_labels.csv
Normal file
10000
data/fashion_mnist_test_labels.csv
Normal file
File diff suppressed because it is too large
Load diff
10000
data/fashion_mnist_test_vectors.csv
Normal file
10000
data/fashion_mnist_test_vectors.csv
Normal file
File diff suppressed because it is too large
Load diff
60000
data/fashion_mnist_train_labels.csv
Normal file
60000
data/fashion_mnist_train_labels.csv
Normal file
File diff suppressed because it is too large
Load diff
60000
data/fashion_mnist_train_vectors.csv
Normal file
60000
data/fashion_mnist_train_vectors.csv
Normal file
File diff suppressed because it is too large
Load diff
16
src/env.rs
Normal file
16
src/env.rs
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
pub const NUMBER_OF_PIXELS_PER_IMAGE: usize = 28 * 28;
|
||||
|
||||
pub const TEST_CSVS: (&str, &str) = (
|
||||
"data/fashion_mnist_test_labels.csv",
|
||||
"data/fashion_mnist_test_vectors.csv",
|
||||
);
|
||||
pub const TRAIN_CSVS: (&str, &str) = (
|
||||
"data/fashion_mnist_train_labels.csv",
|
||||
"data/fashion_mnist_train_vectors.csv",
|
||||
);
|
||||
|
||||
pub const TEST_PREDICTIONS_CSV: &str = "test_predictions.csv";
|
||||
pub const TRAIN_PREDICTIONS_CSV: &str = "train_predictions.csv";
|
||||
|
||||
pub const TEST_DISTRIBUTIONS_CSV: &str = "test_distributions.csv";
|
||||
pub const TRAIN_DISTRIBUTIONS_CSV: &str = "train_distributions.csv";
|
||||
1
src/float.rs
Normal file
1
src/float.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub type Float = f32; // f64 or f32, doesn't seem to make any difference
|
||||
305
src/linear_algebra.rs
Normal file
305
src/linear_algebra.rs
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
mod raw_operation {
|
||||
use crate::float::Float;
|
||||
|
||||
pub fn zero(out: &mut [Float]) {
|
||||
for y in out {
|
||||
*y = 0.0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inner_product(xs: &[Float], ys: &[Float]) -> Float {
|
||||
let mut result: Float = 0.0;
|
||||
for (x, y) in xs.iter().zip(ys) {
|
||||
result += x * y;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn scale(xs: &[Float], k: Float, out: &mut [Float]) {
|
||||
for (x, y) in xs.iter().zip(out) {
|
||||
*y = *x * k
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(xs: &[Float], ys: &[Float], out: &mut [Float]) {
|
||||
for ((x, y), z) in xs.iter().zip(ys).zip(out) {
|
||||
*z = *x + *y
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sub(xs: &[Float], ys: &[Float], out: &mut [Float]) {
|
||||
for ((x, y), z) in xs.iter().zip(ys).zip(out) {
|
||||
*z = *x - *y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use crate::float::Float;
|
||||
use rand_distr::{Distribution, Normal};
|
||||
use std::iter::FromIterator;
|
||||
use std::ops::{Index, IndexMut};
|
||||
use std::slice::{Iter, IterMut, SliceIndex};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Vector(Vec<Float>);
|
||||
|
||||
impl<Idx> Index<Idx> for Vector
|
||||
where
|
||||
Idx: SliceIndex<[Float]>,
|
||||
{
|
||||
type Output = Idx::Output;
|
||||
|
||||
fn index(&self, index: Idx) -> &Self::Output {
|
||||
&self.0[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx> IndexMut<Idx> for Vector
|
||||
where
|
||||
Idx: SliceIndex<[Float]>,
|
||||
{
|
||||
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
|
||||
&mut self.0[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<Float> for Vector {
|
||||
fn from_iter<I: IntoIterator<Item = Float>>(iter: I) -> Self {
|
||||
let mut v = vec![];
|
||||
for x in iter {
|
||||
v.push(x)
|
||||
}
|
||||
Vector(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector {
|
||||
pub fn iter(&self) -> Iter<'_, Float> {
|
||||
self.0.iter()
|
||||
}
|
||||
|
||||
pub fn iter_mut(&mut self) -> IterMut<'_, Float> {
|
||||
self.0.iter_mut()
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[Float] {
|
||||
self.0.as_slice()
|
||||
}
|
||||
|
||||
pub fn as_mut_slice(&mut self) -> &mut [Float] {
|
||||
self.0.as_mut_slice()
|
||||
}
|
||||
|
||||
pub fn copy_from_slice(&mut self, src: &[Float]) {
|
||||
self.0.copy_from_slice(src)
|
||||
}
|
||||
|
||||
pub fn to_vec(self) -> Vec<Float> {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector {
|
||||
pub fn new(vector: Vec<Float>) -> Self {
|
||||
Self(vector)
|
||||
}
|
||||
|
||||
pub fn zero(size: usize) -> Self {
|
||||
Self(vec![0.0; size])
|
||||
}
|
||||
|
||||
pub fn add_mut(&self, w: &[Float], out: &mut [Float]) {
|
||||
raw_operation::add(&self[..], w, out)
|
||||
}
|
||||
|
||||
pub fn sub_mut(&self, w: &[Float], out: &mut [Float]) {
|
||||
raw_operation::add(&self[..], w, out)
|
||||
}
|
||||
|
||||
pub fn scale_mut(&self, k: Float, out: &mut [Float]) {
|
||||
raw_operation::scale(&self[..], k, out)
|
||||
}
|
||||
|
||||
pub fn inner_product(&self, w: &[Float]) -> Float {
|
||||
raw_operation::inner_product(&self[..], w)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ColumnEfficientMatrix {
|
||||
pub input_dimension: usize,
|
||||
pub output_dimension: usize,
|
||||
pub components: Vec<Float>,
|
||||
}
|
||||
|
||||
impl Index<(usize, usize)> for ColumnEfficientMatrix {
|
||||
type Output = Float;
|
||||
|
||||
fn index(&self, (column, row): (usize, usize)) -> &Self::Output {
|
||||
&self.components[column * self.output_dimension + row]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<(usize, usize)> for ColumnEfficientMatrix {
|
||||
fn index_mut(&mut self, (column, row): (usize, usize)) -> &mut Self::Output {
|
||||
&mut self.components[column * self.output_dimension + row]
|
||||
}
|
||||
}
|
||||
|
||||
impl ColumnEfficientMatrix {
|
||||
pub fn from_rows(rows: Vec<Vec<Float>>) -> Self {
|
||||
let output_dimension = rows.len();
|
||||
if output_dimension == 0 {
|
||||
Self {
|
||||
input_dimension: 0,
|
||||
output_dimension: 0,
|
||||
components: vec![],
|
||||
}
|
||||
} else {
|
||||
let input_dimension = rows[0].len();
|
||||
let mut components = Vec::with_capacity(input_dimension * output_dimension);
|
||||
for i in 0..input_dimension {
|
||||
for j in 0..output_dimension {
|
||||
components.push(rows[j][i])
|
||||
}
|
||||
}
|
||||
Self {
|
||||
input_dimension,
|
||||
output_dimension,
|
||||
components,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn zero(input_dimension: usize, output_dimension: usize) -> ColumnEfficientMatrix {
|
||||
Self {
|
||||
input_dimension,
|
||||
output_dimension,
|
||||
components: vec![0.0; input_dimension * output_dimension],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn random_with_normal_distribution(
|
||||
input_dimension: usize,
|
||||
output_dimension: usize,
|
||||
distribution: Normal<Float>,
|
||||
) -> ColumnEfficientMatrix {
|
||||
let mut components = Vec::with_capacity(input_dimension * output_dimension);
|
||||
for _ in 0..input_dimension {
|
||||
for _ in 0..output_dimension {
|
||||
components.push(distribution.sample(&mut rand::thread_rng()))
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
input_dimension,
|
||||
output_dimension,
|
||||
components,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn zero_mut(&mut self) {
|
||||
for a in &mut self.components {
|
||||
*a = 0.0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_scaled_mut(&self, k: Float, b: &Self, c: &mut Self) {
|
||||
for ((a, b), c) in self
|
||||
.components
|
||||
.iter()
|
||||
.zip(&b.components)
|
||||
.zip(&mut c.components)
|
||||
{
|
||||
*c = *a + *b * k
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_to_self_scaled_mut(&mut self, k: Float, b: &Self) {
|
||||
for (a, b) in self.components.iter_mut().zip(&b.components) {
|
||||
*a += *b * k
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_mut(&self, v: &[Float], out: &mut [Float]) {
|
||||
for j in 0..self.output_dimension {
|
||||
let mut result: Float = 0.0;
|
||||
for i in 0..self.input_dimension {
|
||||
result += self[(i, j)] * v[i]
|
||||
}
|
||||
out[j] = result;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the transpose
|
||||
pub fn coapply_mut(&self, w: &[Float], out: &mut [Float]) {
|
||||
for i in 0..self.input_dimension {
|
||||
let start_index = i * self.output_dimension;
|
||||
out[i] = raw_operation::inner_product(
|
||||
&self.components[start_index..start_index + self.output_dimension],
|
||||
w,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn drop_first_column_coapply_mut(&self, w: &[Float], out: &mut [Float]) {
|
||||
for i in 1..self.input_dimension {
|
||||
let start_index = i * self.output_dimension;
|
||||
out[i - 1] = raw_operation::inner_product(
|
||||
&self.components[start_index..start_index + self.output_dimension],
|
||||
w,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DiagonalMatrix<'a> {
|
||||
pub diagonal: &'a [Float],
|
||||
}
|
||||
|
||||
impl<'a> DiagonalMatrix<'a> {
|
||||
pub fn new(diagonal: &'a [Float]) -> DiagonalMatrix<'a> {
|
||||
Self { diagonal }
|
||||
}
|
||||
|
||||
pub fn apply_mut(&self, v: &[Float], out: &mut [Float]) {
|
||||
for (i, (d, x)) in self.diagonal.iter().zip(v.iter()).enumerate() {
|
||||
out[i] = d * x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Given vector `w : W` and a covector `f : V -> Float`,
|
||||
// the linear map `w tensor f : V -> W` computes `v : V ~> f(v) * w`
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct VectorTensorCovectorMatrix<'a> {
|
||||
pub output_vector: &'a [Float], // w : W
|
||||
pub input_covector: &'a [Float], // f : V -> Float
|
||||
}
|
||||
|
||||
impl<'a> VectorTensorCovectorMatrix<'a> {
|
||||
pub fn new(
|
||||
output_vector: &'a [Float],
|
||||
input_covector: &'a [Float],
|
||||
) -> VectorTensorCovectorMatrix<'a> {
|
||||
Self {
|
||||
output_vector,
|
||||
input_covector,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_mut(&self, v: &[Float], out: &mut [Float]) {
|
||||
let scalar = raw_operation::inner_product(self.input_covector, v);
|
||||
raw_operation::scale(self.output_vector, scalar, out)
|
||||
}
|
||||
|
||||
pub fn add_to_mut(&self, matrix: &mut ColumnEfficientMatrix) {
|
||||
// TODO: Surely this can be optimized by iterating over the columns of the matrix directly
|
||||
for (i, x) in self.input_covector.iter().enumerate() {
|
||||
for (j, y) in self.output_vector.iter().enumerate() {
|
||||
matrix[(i, j)] += x * y
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
76
src/main.rs
Normal file
76
src/main.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
mod env;
|
||||
mod float;
|
||||
mod linear_algebra;
|
||||
mod neural_network;
|
||||
mod preprocessing;
|
||||
mod transforms;
|
||||
|
||||
use preprocessing::{dataset_from_file, export_distributions_to, export_to};
|
||||
use std::io;
|
||||
|
||||
use crate::neural_network::{NNPoint, NeuralNetworkInTraining, NeuralNetworkParameters};
|
||||
|
||||
fn main() -> Result<(), io::Error> {
|
||||
let size_of_full_training_dataset = 60000;
|
||||
let size_of_training_dataset = 50000;
|
||||
let size_of_validation_dataset = 10000;
|
||||
let size_of_testing_dataset = 10000;
|
||||
|
||||
println!("Importing datasets...");
|
||||
let full_training_dataset: Vec<NNPoint> =
|
||||
dataset_from_file(env::TRAIN_CSVS, size_of_full_training_dataset)?;
|
||||
let mut training_dataset: Vec<NNPoint> = {
|
||||
let mut training_dataset = Vec::with_capacity(size_of_training_dataset);
|
||||
training_dataset.extend_from_slice(&full_training_dataset[0..size_of_training_dataset]);
|
||||
training_dataset
|
||||
};
|
||||
let validation_dataset: Vec<NNPoint> = {
|
||||
let mut validation_dataset = Vec::with_capacity(size_of_validation_dataset);
|
||||
validation_dataset.extend_from_slice(
|
||||
&full_training_dataset
|
||||
[size_of_training_dataset..size_of_training_dataset + size_of_validation_dataset],
|
||||
);
|
||||
validation_dataset
|
||||
};
|
||||
let test_dataset: Vec<NNPoint> = dataset_from_file(env::TEST_CSVS, size_of_testing_dataset)?;
|
||||
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 70, 10]);
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 10]);
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 60, 40, 10]);
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 10]);
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 60, 40, 10]); // batch=20, rate=2 is pretty good. Seems to reliably reach 89%
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 80, 60, 40, 10]); // batch=20, rate=2, pretty good like 88 % then suddenly drops to 10%
|
||||
// let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 150, 10]); // this gets me over 90%, nice
|
||||
let mut nn = NeuralNetworkInTraining::new(vec![env::NUMBER_OF_PIXELS_PER_IMAGE, 150, 150, 10]);
|
||||
|
||||
println!("Begin training");
|
||||
|
||||
let params = NeuralNetworkParameters {
|
||||
epochs: 30,
|
||||
batch_size: 30,
|
||||
learning_rate: 2.00,
|
||||
};
|
||||
nn.train(params, &mut training_dataset, Some(&validation_dataset));
|
||||
// nn.train(params, &mut training_dataset, None);
|
||||
|
||||
nn.show_accuracy_on(&full_training_dataset, "training");
|
||||
nn.show_accuracy_on(&test_dataset, "test");
|
||||
|
||||
let predictions_on_full_training_set = nn.test(&full_training_dataset);
|
||||
let predictions_on_test_set = nn.test(&test_dataset);
|
||||
println!("Exporting to {}", env::TRAIN_PREDICTIONS_CSV);
|
||||
export_to(
|
||||
&predictions_on_full_training_set,
|
||||
env::TRAIN_PREDICTIONS_CSV,
|
||||
)?;
|
||||
println!("Exporting to {}", env::TEST_PREDICTIONS_CSV);
|
||||
export_to(&predictions_on_test_set, env::TEST_PREDICTIONS_CSV)?;
|
||||
|
||||
// TODO: Comment this out
|
||||
// let distributions_on_full_training_set = nn.test_distributions(&full_training_dataset);
|
||||
// let distributions_on_test_set = nn.test_distributions(&test_dataset);
|
||||
// export_distributions_to(&distributions_on_full_training_set, env::TRAIN_DISTRIBUTIONS_CSV)?;
|
||||
// export_distributions_to(&distributions_on_test_set, env::TEST_DISTRIBUTIONS_CSV)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
336
src/neural_network.rs
Normal file
336
src/neural_network.rs
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
use crate::float::Float;
|
||||
use crate::linear_algebra::Vector;
|
||||
|
||||
use crate::transforms;
|
||||
use crate::transforms::{ReluTransform, SoftmaxTransform};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NNPoint {
|
||||
label: usize,
|
||||
normalized_image: Vec<Float>,
|
||||
}
|
||||
|
||||
impl NNPoint {
|
||||
pub fn new(label: u8, normalized_image: Vec<Float>) -> Self {
|
||||
Self {
|
||||
label: label as usize,
|
||||
normalized_image,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NeuralNetworkInTraining {
|
||||
neurons_per_layer: Vec<usize>,
|
||||
|
||||
inputs: Vec<Vector>, // Each input will start with 1
|
||||
output: Vector,
|
||||
input_gradients: Vec<Vector>,
|
||||
|
||||
transforms: Vec<ReluTransform>,
|
||||
output_transform: SoftmaxTransform,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct NeuralNetworkParameters {
|
||||
pub epochs: usize,
|
||||
pub batch_size: usize,
|
||||
pub learning_rate: Float,
|
||||
}
|
||||
|
||||
impl NeuralNetworkParameters {
|
||||
fn show(&self) -> String {
|
||||
format!(
|
||||
"epoch = {}, batch = {}, rate = {}",
|
||||
self.epochs, self.batch_size, self.learning_rate
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl NeuralNetworkInTraining {
|
||||
pub fn new(neurons_per_layer: Vec<usize>) -> Self {
|
||||
// e.g. neurons_per_layer = [699, 79, 49, 19, 14, 10]
|
||||
|
||||
// By a layer here we mean a collection of neurons that's between two neighbouring Transforms or the initial input or final input neurons.
|
||||
// Note that there are N + 1 layers where N is the number of Transforms used.
|
||||
if neurons_per_layer.len() < 2 {
|
||||
todo!()
|
||||
}
|
||||
|
||||
let neurons_per_layer_except_last: Vec<usize> = neurons_per_layer
|
||||
.clone()
|
||||
.into_iter()
|
||||
.rev()
|
||||
.skip(1)
|
||||
.rev()
|
||||
.collect();
|
||||
let neurons_per_layer_except_first: Vec<usize> =
|
||||
neurons_per_layer.clone().into_iter().skip(1).collect();
|
||||
|
||||
// Convention: the first component should always be 1.0 - this allows the first
|
||||
// column of the weight matrix to be interpreted as bias.
|
||||
let inputs: Vec<Vector> = neurons_per_layer_except_last
|
||||
.iter()
|
||||
.map(|neuron_count| {
|
||||
let mut v = Vector::zero(neuron_count + 1);
|
||||
v[0] = 1.0;
|
||||
v
|
||||
})
|
||||
.collect();
|
||||
|
||||
let input_gradients: Vec<Vector> = neurons_per_layer_except_first
|
||||
.iter()
|
||||
.map(|neuron_count| Vector::zero(*neuron_count))
|
||||
.collect();
|
||||
|
||||
let transforms: Vec<ReluTransform> = neurons_per_layer_except_last
|
||||
.iter()
|
||||
.zip(neurons_per_layer_except_last.iter().skip(1))
|
||||
.map(|(input_neuron_count, output_neuron_count)| {
|
||||
ReluTransform::new(*input_neuron_count + 1, *output_neuron_count)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let neurons_in_last_layer = *neurons_per_layer.last().unwrap();
|
||||
let neurons_in_next_to_last_layer = *neurons_per_layer_except_last.last().unwrap();
|
||||
|
||||
let output = Vector::zero(neurons_in_last_layer);
|
||||
let output_transform =
|
||||
SoftmaxTransform::new(neurons_in_next_to_last_layer + 1, neurons_in_last_layer);
|
||||
|
||||
Self {
|
||||
neurons_per_layer,
|
||||
|
||||
inputs,
|
||||
output,
|
||||
input_gradients,
|
||||
|
||||
transforms,
|
||||
output_transform,
|
||||
}
|
||||
}
|
||||
|
||||
// You need to initialize inputs[0] before use.
|
||||
fn output_mut(&mut self) {
|
||||
// The following iterates over pairs of neighbouring inputs where we
|
||||
// have a mutable reference to both of them.
|
||||
// With Rust borrow-checking rules this can't be done directly,
|
||||
// so I had to resort to `split_at_mut(k)`
|
||||
for i in 0..self.inputs.len() - 1 {
|
||||
let (left_inputs, right_inputs) = self.inputs.split_at_mut(i + 1);
|
||||
|
||||
let transform = &mut self.transforms[i];
|
||||
let input = &left_inputs[i][..];
|
||||
let output = &mut right_inputs[0][1..];
|
||||
transform.output_mut(input, output);
|
||||
}
|
||||
self.output_transform.output_mut(
|
||||
&self.inputs[self.inputs.len() - 1][..],
|
||||
&mut self.output[..],
|
||||
);
|
||||
}
|
||||
|
||||
// Initialize input_gradients[-1] with error gradient before use.
|
||||
fn update_weights_mut(&mut self) {
|
||||
let last_index = self.input_gradients.len() - 1;
|
||||
|
||||
// TODO: Last layer is no different. This should be part of the same loop
|
||||
{
|
||||
let transform = &mut self.output_transform;
|
||||
|
||||
transform.potential_gradient_mut(&self.input_gradients[last_index][..]);
|
||||
transform
|
||||
.gradient_with_respect_to_input_mut(&mut self.input_gradients[last_index - 1][..]);
|
||||
transform.add_gradient_with_respect_to_weights_mut(&self.inputs[last_index][..]);
|
||||
}
|
||||
|
||||
for i in (0..self.input_gradients.len() - 2).rev() {
|
||||
let (left_input_gradient, right_input_gradient) =
|
||||
self.input_gradients.split_at_mut(i + 1);
|
||||
|
||||
let transform = &mut self.transforms[i + 1];
|
||||
let left_grad = &mut left_input_gradient[i][..];
|
||||
let right_grad = &right_input_gradient[0][..];
|
||||
let input = &self.inputs[i + 1][..];
|
||||
|
||||
transform.potential_gradient_mut(right_grad);
|
||||
transform.gradient_with_respect_to_input_mut(left_grad);
|
||||
transform.add_gradient_with_respect_to_weights_mut(input);
|
||||
}
|
||||
|
||||
{
|
||||
let transform = &mut self.transforms[0];
|
||||
transform.potential_gradient_mut(&self.input_gradients[0][..]);
|
||||
transform.add_gradient_with_respect_to_weights_mut(&self.inputs[0][..]);
|
||||
// Note that we are not computing gradient with respect to input, since this is the
|
||||
// first layer and we don't care about changes to input, only to weights.
|
||||
}
|
||||
}
|
||||
|
||||
fn forward_and_backwards_mut(&mut self, point: &NNPoint) {
|
||||
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
|
||||
self.output_mut();
|
||||
|
||||
let last_index = self.input_gradients.len() - 1;
|
||||
// WARNING: Use proper error function
|
||||
// transforms::gradient_error_mut(&self.output[..], desired_output, &mut self.input_gradients[last_index][..]);
|
||||
transforms::cross_entropy_derivative_simple(
|
||||
&self.output[..],
|
||||
point.label,
|
||||
&mut self.input_gradients[last_index][..],
|
||||
);
|
||||
self.update_weights_mut()
|
||||
}
|
||||
|
||||
fn iterate_over_batch_mut(&mut self, learning_rate: Float, batch: &[NNPoint]) {
|
||||
// iterates over the batch, while updating gradient of weights.
|
||||
for point in batch {
|
||||
self.forward_and_backwards_mut(point);
|
||||
}
|
||||
|
||||
// Update the current weights by the opposite of the epsilon / batch_size *weight_gradients
|
||||
let batch_size = batch.len() as Float;
|
||||
let epsilon = -learning_rate / batch_size;
|
||||
|
||||
for transform in &mut self.transforms {
|
||||
transform
|
||||
.weight
|
||||
.add_to_self_scaled_mut(epsilon, &transform.weight_gradient);
|
||||
// Resets the weight gradient to zero so it can be used in next batch.
|
||||
transform.weight_gradient.zero_mut();
|
||||
}
|
||||
}
|
||||
|
||||
fn iterate_over_epoch(
|
||||
&mut self,
|
||||
training_set: &mut [NNPoint],
|
||||
batch_size: usize,
|
||||
learning_rate: Float,
|
||||
) {
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::thread_rng;
|
||||
training_set.shuffle(&mut thread_rng()); // Shuffling is linear in the size of the slice
|
||||
|
||||
for batch in training_set.chunks(batch_size) {
|
||||
self.iterate_over_batch_mut(learning_rate, batch);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn train(
|
||||
&mut self,
|
||||
parameters: NeuralNetworkParameters,
|
||||
training_set: &mut [NNPoint],
|
||||
testing_set: Option<&[NNPoint]>,
|
||||
) {
|
||||
fn test(
|
||||
nn: &mut NeuralNetworkInTraining,
|
||||
parameters: NeuralNetworkParameters,
|
||||
testing_set: Option<&[NNPoint]>,
|
||||
accuracy_per_epoch: &mut Vec<f32>,
|
||||
) {
|
||||
if let Some(testing_set) = testing_set {
|
||||
let accuracy = nn.accuracy(testing_set);
|
||||
accuracy_per_epoch.push(accuracy);
|
||||
println!();
|
||||
println!("{}", parameters.show());
|
||||
println!("{:?}", nn.neurons_per_layer);
|
||||
println!("{:?}", accuracy_per_epoch);
|
||||
}
|
||||
}
|
||||
|
||||
fn next_learning_rate(initial_learning_rate: Float, epoch: usize) -> Float {
|
||||
// initial_learning_rate / (1.0 + epoch as Float / 30.0)
|
||||
// initial_learning_rate * (0.1 as Float).powf(epoch as Float / 50.0)
|
||||
initial_learning_rate * (0.1 as Float).powf(epoch as Float / 20.0)
|
||||
}
|
||||
|
||||
use std::time::Instant;
|
||||
let now = Instant::now();
|
||||
|
||||
let number_of_epochs = parameters.epochs;
|
||||
let batch_size = parameters.batch_size;
|
||||
let mut learning_rate = parameters.learning_rate;
|
||||
let mut accuracy_per_epoch = Vec::with_capacity(number_of_epochs + 1);
|
||||
|
||||
test(self, parameters, testing_set, &mut accuracy_per_epoch);
|
||||
for epoch in 0..number_of_epochs {
|
||||
println!("Epoch {}/{}", epoch + 1, number_of_epochs);
|
||||
println!("Current learning rate = {}", learning_rate);
|
||||
self.iterate_over_epoch(training_set, batch_size, learning_rate);
|
||||
test(self, parameters, testing_set, &mut accuracy_per_epoch);
|
||||
|
||||
learning_rate = next_learning_rate(parameters.learning_rate, epoch);
|
||||
|
||||
let elapsed = now.elapsed();
|
||||
let total_seconds = elapsed.as_secs();
|
||||
let minutes = total_seconds / 60;
|
||||
let seconds = total_seconds % 60;
|
||||
println!("Duration of training: {} min {} sec", minutes, seconds);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn show_accuracy_on(&mut self, testing_set: &[NNPoint], dataset_name: &str) {
|
||||
println!(
|
||||
"{} dataset accuracy: {:?}",
|
||||
dataset_name,
|
||||
self.accuracy(testing_set)
|
||||
);
|
||||
}
|
||||
|
||||
pub fn output_label(&self) -> usize {
|
||||
let mut state: Option<(usize, Float)> = None;
|
||||
for (i, y) in self.output.iter().enumerate() {
|
||||
match state {
|
||||
Some((_, max_so_far)) => {
|
||||
if *y > max_so_far {
|
||||
state = Some((i, *y))
|
||||
}
|
||||
}
|
||||
None => state = Some((i, *y)),
|
||||
}
|
||||
}
|
||||
match state {
|
||||
Some((label, _)) => label,
|
||||
None => {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn accuracy(&mut self, testing_set: &[NNPoint]) -> f32 {
|
||||
let mut num_of_correct_classifications = 0;
|
||||
for point in testing_set {
|
||||
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
|
||||
self.output_mut(); // This doesn't change the weights
|
||||
|
||||
let neural_network_produced_label = self.output_label();
|
||||
if point.label == neural_network_produced_label {
|
||||
num_of_correct_classifications += 1
|
||||
}
|
||||
// println!("max-label: {}, desired-label: {}, prob-distr: {:?}", neural_network_produced_label, point.label, self.output);
|
||||
}
|
||||
num_of_correct_classifications as f32 / testing_set.len() as f32
|
||||
}
|
||||
|
||||
pub fn test(&mut self, dataset: &[NNPoint]) -> Vec<usize> {
|
||||
dataset
|
||||
.iter()
|
||||
.map(|point| {
|
||||
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
|
||||
self.output_mut();
|
||||
self.output_label()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn test_distributions(&mut self, dataset: &[NNPoint]) -> Vec<Vec<Float>> {
|
||||
dataset
|
||||
.iter()
|
||||
.map(|point| {
|
||||
self.inputs[0][1..].copy_from_slice(&point.normalized_image[..]);
|
||||
self.output_mut();
|
||||
self.output.clone().to_vec()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
121
src/preprocessing.rs
Normal file
121
src/preprocessing.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use crate::env;
|
||||
use crate::float::Float;
|
||||
use crate::neural_network::NNPoint;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::{BufRead, BufReader, BufWriter, Write};
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ParsingError {
|
||||
CouldNotParseLabel,
|
||||
CouldNotParseVector,
|
||||
ImageHasWrongSize,
|
||||
}
|
||||
|
||||
fn parse_dataset(
|
||||
labels_buffer: impl BufRead,
|
||||
vectors_buffer: impl BufRead,
|
||||
number_of_points: usize,
|
||||
) -> Result<Result<Vec<NNPoint>, ParsingError>, io::Error> {
|
||||
let mut output = vec![];
|
||||
for (label_result, vector_result) in labels_buffer
|
||||
.lines()
|
||||
.zip(vectors_buffer.lines())
|
||||
.take(number_of_points)
|
||||
{
|
||||
let label_str = label_result?;
|
||||
let image_str = vector_result?;
|
||||
|
||||
let label: u8 = match label_str.parse() {
|
||||
Ok(label) => label,
|
||||
Err(_) => return Ok(Err(ParsingError::CouldNotParseLabel)),
|
||||
};
|
||||
|
||||
let mut image: Vec<u8> = vec![];
|
||||
for str in image_str.split(',') {
|
||||
match str.parse() {
|
||||
Ok(pixel_value) => {
|
||||
image.push(pixel_value);
|
||||
}
|
||||
Err(_) => return Ok(Err(ParsingError::CouldNotParseVector)),
|
||||
}
|
||||
}
|
||||
|
||||
if image.len() != env::NUMBER_OF_PIXELS_PER_IMAGE {
|
||||
return Ok(Err(ParsingError::ImageHasWrongSize));
|
||||
}
|
||||
|
||||
output.push(NNPoint::new(label, normalize_input(&image)));
|
||||
}
|
||||
Ok(Ok(output))
|
||||
}
|
||||
|
||||
fn average(vec: &[u8]) -> Float {
|
||||
let mut result: u32 = 0; // the result is bounded by NUMBER_OF_PIXELS_PER_IMAGE * 255 ~~ 200 k
|
||||
for x in vec {
|
||||
result += *x as u32;
|
||||
}
|
||||
(result as Float) / (vec.len() as Float)
|
||||
}
|
||||
|
||||
fn variance(vec: &[Float]) -> Float {
|
||||
// assumes that the average of vec is 0
|
||||
let mut result: Float = 0.0;
|
||||
for x in vec {
|
||||
result += x * x;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn normalize_input(vec: &[u8]) -> Vec<Float> {
|
||||
let average = average(vec);
|
||||
let mut result: Vec<Float> = vec.iter().map(|x| (*x as Float) - average).collect();
|
||||
let stddev = variance(&result).sqrt();
|
||||
for i in &mut result {
|
||||
*i /= stddev;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn dataset_from_file(
|
||||
(labels_file_path, vectors_file_path): (&str, &str),
|
||||
number_of_points: usize,
|
||||
) -> Result<Vec<NNPoint>, io::Error> {
|
||||
let (labels_file, vectors_file): (File, File) = (
|
||||
File::open(labels_file_path)?,
|
||||
File::open(vectors_file_path)?,
|
||||
);
|
||||
|
||||
match parse_dataset(
|
||||
BufReader::new(labels_file),
|
||||
BufReader::new(vectors_file),
|
||||
number_of_points,
|
||||
)? {
|
||||
Ok(points) => Ok(points),
|
||||
Err(parsing_error) => {
|
||||
println!("ERROR: {:?}", parsing_error);
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn export_to(outputs: &[usize], file_path: &str) -> Result<(), io::Error> {
|
||||
let mut file = BufWriter::new(File::create(file_path)?);
|
||||
for x in outputs {
|
||||
writeln!(file, "{}", x)?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn export_distributions_to(outputs: &[Vec<Float>], file_path: &str) -> Result<(), io::Error> {
|
||||
let mut file = BufWriter::new(File::create(file_path)?);
|
||||
for ps in outputs {
|
||||
let mut s = "".to_string();
|
||||
for p in ps {
|
||||
let p_str = format!("{}, ", p);
|
||||
s += &p_str
|
||||
}
|
||||
writeln!(file, "[{}]", s)?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
315
src/transforms.rs
Normal file
315
src/transforms.rs
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
use crate::float::Float;
|
||||
use crate::linear_algebra::{ColumnEfficientMatrix, Vector};
|
||||
use rand_distr::Normal;
|
||||
|
||||
pub fn l2_error(xs: &[Float], ys: &[Float]) -> Float {
|
||||
// err-squared
|
||||
let mut result = 0.0;
|
||||
for (x, y) in xs.iter().zip(ys) {
|
||||
let s = x - y;
|
||||
result += s * s;
|
||||
}
|
||||
0.5 * result
|
||||
}
|
||||
|
||||
pub fn gradient_l2_error_mut(xs: &[Float], ys: &[Float], out: &mut [Float]) {
|
||||
for (i, (x, y)) in xs.iter().zip(ys).enumerate() {
|
||||
out[i] = x - y;
|
||||
}
|
||||
}
|
||||
|
||||
fn sigmoid(x: Float) -> Float {
|
||||
1.0 / (1.0 + (-x).exp())
|
||||
}
|
||||
|
||||
fn activation_of_sigmoid_potential(potential: Float) -> Float {
|
||||
sigmoid(potential)
|
||||
}
|
||||
|
||||
fn vectorized_sigmoid_activation_of_potential(potentials: &[Float], out: &mut [Float]) {
|
||||
for (potential, y) in potentials.iter().zip(out) {
|
||||
*y = activation_of_sigmoid_potential(*potential)
|
||||
}
|
||||
}
|
||||
|
||||
fn derivative_of_sigmoid_activation_of_potential(potential: Float) -> Float {
|
||||
let s = sigmoid(potential);
|
||||
s * (1.0 - s)
|
||||
}
|
||||
|
||||
fn vectorized_gradient_of_sigmoid_activation_of_potential_mut(
|
||||
potentials: &[Float],
|
||||
derivatives_state: &mut [Float],
|
||||
output_gradient: &[Float],
|
||||
input_gradient: &mut [Float],
|
||||
) {
|
||||
use crate::linear_algebra::DiagonalMatrix;
|
||||
for (potential, state) in potentials.iter().zip(derivatives_state.iter_mut()) {
|
||||
*state = derivative_of_sigmoid_activation_of_potential(*potential)
|
||||
}
|
||||
DiagonalMatrix::new(derivatives_state).apply_mut(output_gradient, input_gradient);
|
||||
}
|
||||
|
||||
// =====cross-entropy=====
|
||||
// takes in two probability distributions
|
||||
fn cross_entropy(p: &[Float], q: &[Float]) -> Float {
|
||||
let mut result = 0.0;
|
||||
for (x, y) in p.iter().zip(q) {
|
||||
result += y * x.ln()
|
||||
}
|
||||
-result
|
||||
}
|
||||
|
||||
// Second probability distribution is deterministic,
|
||||
// i.e. it deterministically outputs the same value.
|
||||
fn cross_entropy_simple(p: &[Float], q: usize) -> Float {
|
||||
-p[q].ln()
|
||||
}
|
||||
|
||||
fn cross_entropy_derivative_mut(p: &[Float], q: &[Float], out: &mut [Float]) {
|
||||
for ((a, b), c) in p.iter().zip(q).zip(out) {
|
||||
*c = -b / a
|
||||
}
|
||||
}
|
||||
|
||||
// Second probability distribution is deterministic,
|
||||
// i.e. it deterministically outputs the same value.
|
||||
pub fn cross_entropy_derivative_simple(p: &[Float], q: usize, out: &mut [Float]) {
|
||||
// TODO: Do we really need to reset everything to besides the q-th index?
|
||||
for a in out.iter_mut() {
|
||||
*a = 0.0;
|
||||
}
|
||||
out[q] = -1.0 / p[q];
|
||||
}
|
||||
|
||||
fn softmax_mut(input: &[Float], out: &mut [Float]) {
|
||||
let mut s = 0.0;
|
||||
for (x, y) in input.iter().zip(out.iter_mut()) {
|
||||
let e = x.exp();
|
||||
*y = e;
|
||||
s += e
|
||||
}
|
||||
|
||||
for y in out {
|
||||
*y /= s
|
||||
}
|
||||
}
|
||||
|
||||
fn softmax_gradient_mut(
|
||||
softmax_output: &[Float],
|
||||
gradient_output: &[Float],
|
||||
gradient_input: &mut [Float],
|
||||
) {
|
||||
for (j, dx) in gradient_input.iter_mut().enumerate() {
|
||||
*dx = 0.0;
|
||||
for (i, dy) in gradient_output.iter().enumerate() {
|
||||
// Note that the gradient matrix is symmetric, so don't worry about the order of
|
||||
// indices
|
||||
*dx += softmax_output[i] * (if i == j { 1.0 } else { 0.0 } - softmax_output[j]) * dy
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// relu
|
||||
fn relu_mut(input: &[Float], out: &mut [Float]) {
|
||||
for (x, y) in input.iter().zip(out.iter_mut()) {
|
||||
*y = x.max(0.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn relu_gradient_mut(input: &[Float], gradient_output: &[Float], gradient_input: &mut [Float]) {
|
||||
for ((dy, dx), x) in gradient_output.iter().zip(gradient_input).zip(input) {
|
||||
*dx = if *x > 0.0 { *dy } else { 0.0 }
|
||||
}
|
||||
}
|
||||
|
||||
// =====sigmoid=====
|
||||
#[derive(Debug)]
|
||||
pub struct SigmoidTransform {
|
||||
pub weight: ColumnEfficientMatrix,
|
||||
potential_vector: Vector,
|
||||
derivatives_state: Vector,
|
||||
potential_gradient: Vector,
|
||||
|
||||
pub weight_gradient: ColumnEfficientMatrix,
|
||||
}
|
||||
|
||||
impl SigmoidTransform {
|
||||
pub fn new(input_dimension: usize, output_dimension: usize) -> Self {
|
||||
let mean = 0.0;
|
||||
let std_dev = 1.0 / (input_dimension as Float).sqrt();
|
||||
let normal_distr = Normal::new(mean, std_dev).unwrap();
|
||||
|
||||
Self {
|
||||
weight: ColumnEfficientMatrix::random_with_normal_distribution(
|
||||
input_dimension,
|
||||
output_dimension,
|
||||
normal_distr,
|
||||
),
|
||||
potential_vector: Vector::zero(output_dimension),
|
||||
derivatives_state: Vector::zero(output_dimension), // TODO: Can I get rid of this?
|
||||
potential_gradient: Vector::zero(output_dimension),
|
||||
|
||||
weight_gradient: ColumnEfficientMatrix::zero(input_dimension, output_dimension),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn output_mut(&mut self, input: &[Float], output: &mut [Float]) {
|
||||
self.weight.apply_mut(input, &mut self.potential_vector[..]); // potential = W[input]
|
||||
vectorized_sigmoid_activation_of_potential(&self.potential_vector[..], output);
|
||||
// y = f(potential)
|
||||
}
|
||||
|
||||
// Note below that (1) and (2) are independent, but they both depend on (0).
|
||||
|
||||
// (0)
|
||||
pub fn potential_gradient_mut(&mut self, output_gradient: &[Float]) {
|
||||
// updates the potential gradient
|
||||
vectorized_gradient_of_sigmoid_activation_of_potential_mut(
|
||||
&self.potential_vector[..],
|
||||
&mut self.derivatives_state[..],
|
||||
output_gradient,
|
||||
&mut self.potential_gradient[..],
|
||||
); // potential_gradient = grad[f](potential)[output_gradient]
|
||||
}
|
||||
|
||||
// Note that it makes sense to have the two gradients split, since for the input layer we will
|
||||
// not need to compute the input gradient, only the weight gradient is important.
|
||||
// WARNING: You need to call `potential_gradient_mut` before using the below function
|
||||
// (1)
|
||||
pub fn gradient_with_respect_to_input_mut(&self, input_gradient: &mut [Float]) {
|
||||
// updates the input gradient
|
||||
//
|
||||
// Note that the first column of `self.weight` is the bias,
|
||||
// and the previous layer doesn't care about its gradient.
|
||||
// So we just return the gradient below the first component of the input
|
||||
// by dropping the bias column.
|
||||
self.weight
|
||||
.drop_first_column_coapply_mut(&self.potential_gradient[..], input_gradient);
|
||||
// transpose[T without the first column][potential_gradient]
|
||||
}
|
||||
|
||||
// Note that the proof ensures that the `potential_gradient` has been updated
|
||||
// WARNING: You need to call `potential_gradient_mut` before using the below function
|
||||
// (2)
|
||||
pub fn add_gradient_with_respect_to_weights_mut(&mut self, input: &[Float]) {
|
||||
use crate::linear_algebra::VectorTensorCovectorMatrix;
|
||||
|
||||
let matrix = VectorTensorCovectorMatrix::new(&self.potential_gradient[..], input); // grad[f](potential)[output_grad] **tensor** input
|
||||
matrix.add_to_mut(&mut self.weight_gradient);
|
||||
}
|
||||
}
|
||||
|
||||
// =====softmax=====
|
||||
#[derive(Debug)]
|
||||
pub struct SoftmaxTransform {
|
||||
pub weight: ColumnEfficientMatrix,
|
||||
potential_vector: Vector,
|
||||
softmax_output: Vector, // Used for computation of the softmax gradient
|
||||
potential_gradient: Vector,
|
||||
|
||||
pub weight_gradient: ColumnEfficientMatrix,
|
||||
}
|
||||
|
||||
impl SoftmaxTransform {
|
||||
pub fn new(input_dimension: usize, output_dimension: usize) -> Self {
|
||||
let mean = 0.0;
|
||||
let std_dev = 1.0 / (input_dimension as Float).sqrt();
|
||||
let normal_distr = Normal::new(mean, std_dev).unwrap();
|
||||
|
||||
Self {
|
||||
weight: ColumnEfficientMatrix::random_with_normal_distribution(
|
||||
input_dimension,
|
||||
output_dimension,
|
||||
normal_distr,
|
||||
),
|
||||
potential_vector: Vector::zero(output_dimension),
|
||||
softmax_output: Vector::zero(output_dimension),
|
||||
potential_gradient: Vector::zero(output_dimension),
|
||||
|
||||
weight_gradient: ColumnEfficientMatrix::zero(input_dimension, output_dimension),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn output_mut(&mut self, input: &[Float], output: &mut [Float]) {
|
||||
self.weight.apply_mut(input, &mut self.potential_vector[..]); // potential = W[input]
|
||||
softmax_mut(&self.potential_vector[..], output); // y = f(potential)
|
||||
self.softmax_output.copy_from_slice(output)
|
||||
}
|
||||
|
||||
pub fn potential_gradient_mut(&mut self, output_gradient: &[Float]) {
|
||||
softmax_gradient_mut(
|
||||
&self.softmax_output[..],
|
||||
output_gradient,
|
||||
&mut self.potential_gradient[..],
|
||||
); // potential_gradient = grad[softmax](potential)[output_gradient]
|
||||
}
|
||||
|
||||
pub fn gradient_with_respect_to_input_mut(&self, input_gradient: &mut [Float]) {
|
||||
self.weight
|
||||
.drop_first_column_coapply_mut(&self.potential_gradient[..], input_gradient);
|
||||
// transpose[T without the first column][potential_gradient]
|
||||
}
|
||||
|
||||
pub fn add_gradient_with_respect_to_weights_mut(&mut self, input: &[Float]) {
|
||||
use crate::linear_algebra::VectorTensorCovectorMatrix;
|
||||
|
||||
let matrix = VectorTensorCovectorMatrix::new(&self.potential_gradient[..], input); // grad[f](potential)[output_grad] **tensor** input
|
||||
matrix.add_to_mut(&mut self.weight_gradient);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReluTransform {
|
||||
pub weight: ColumnEfficientMatrix,
|
||||
potential_vector: Vector,
|
||||
potential_gradient: Vector,
|
||||
|
||||
pub weight_gradient: ColumnEfficientMatrix,
|
||||
}
|
||||
|
||||
impl ReluTransform {
|
||||
pub fn new(input_dimension: usize, output_dimension: usize) -> Self {
|
||||
let mean = 0.0;
|
||||
let std_dev = 1.0 / (input_dimension as Float).sqrt();
|
||||
let normal_distr = Normal::new(mean, std_dev).unwrap();
|
||||
|
||||
Self {
|
||||
weight: ColumnEfficientMatrix::random_with_normal_distribution(
|
||||
input_dimension,
|
||||
output_dimension,
|
||||
normal_distr,
|
||||
),
|
||||
potential_vector: Vector::zero(output_dimension),
|
||||
potential_gradient: Vector::zero(output_dimension),
|
||||
|
||||
weight_gradient: ColumnEfficientMatrix::zero(input_dimension, output_dimension),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn output_mut(&mut self, input: &[Float], output: &mut [Float]) {
|
||||
self.weight.apply_mut(input, &mut self.potential_vector[..]); // potential = W[input]
|
||||
relu_mut(&self.potential_vector[..], output); // y = f(potential)
|
||||
}
|
||||
|
||||
pub fn potential_gradient_mut(&mut self, output_gradient: &[Float]) {
|
||||
relu_gradient_mut(
|
||||
&self.potential_vector[..],
|
||||
output_gradient,
|
||||
&mut self.potential_gradient[..],
|
||||
); // potential_gradient = grad[softmax](potential)[output_gradient]
|
||||
}
|
||||
|
||||
pub fn gradient_with_respect_to_input_mut(&self, input_gradient: &mut [Float]) {
|
||||
self.weight
|
||||
.drop_first_column_coapply_mut(&self.potential_gradient[..], input_gradient);
|
||||
// transpose[T without the first column][potential_gradient]
|
||||
}
|
||||
|
||||
pub fn add_gradient_with_respect_to_weights_mut(&mut self, input: &[Float]) {
|
||||
use crate::linear_algebra::VectorTensorCovectorMatrix;
|
||||
|
||||
let matrix = VectorTensorCovectorMatrix::new(&self.potential_gradient[..], input); // grad[f](potential)[output_grad] **tensor** input
|
||||
matrix.add_to_mut(&mut self.weight_gradient);
|
||||
}
|
||||
}
|
||||
23
tmp_repl.txt
Normal file
23
tmp_repl.txt
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
cargo init --edition 2018
|
||||
|
||||
cargo add rand
|
||||
|
||||
cargo add rand_distr
|
||||
|
||||
cargo run --release
|
||||
|
||||
module add rust
|
||||
|
||||
cargo build
|
||||
|
||||
|
||||
cargo fmt
|
||||
|
||||
# linter
|
||||
cargo clippy -- -D warnings
|
||||
|
||||
|
||||
python3 evaluator/evaluate.py test_predictions.csv data/fashion_mnist_test_labels.csv
|
||||
|
||||
python3 evaluator/evaluate.py train_predictions.csv data/fashion_mnist_train_labels.csv
|
||||
Loading…
Add table
Add a link
Reference in a new issue