|
|
|
/*! @file
|
|
|
|
|
|
|
|
@id $Id$
|
|
|
|
*/
|
|
|
|
// 1 2 3 4 5 6 7 8
|
|
|
|
// 45678901234567890123456789012345678901234567890123456789012345678901234567890
|
|
|
|
|
|
|
|
#include <matrix.hxx>
|
|
|
|
#include <cmath>
|
|
|
|
|
|
|
|
/** @mainpage Neural Network with Hidden Layers
|
|
|
|
|
|
|
|
@section neuro-intro Overview
|
|
|
|
|
|
|
|
@subsection nature Natural Neural Network
|
|
|
|
|
|
|
|
From <a href="https://en.wikipedia.org/wiki/Neuron">Wikipedia</a>:
|
|
|
|
«A neuron is an electrically excitable cell that processes and
|
|
|
|
transmits information through electrical and chemical
|
|
|
|
signals. These signals between neurons occur via synapses,
|
|
|
|
specialized connections with other cells. Neurons can connect to
|
|
|
|
each other to form neural networks. Neurons are the core
|
|
|
|
components of the brain and spinal cord of the central nervous
|
|
|
|
system, and of the ganglia of the peripheral nervous system.» The
|
|
|
|
neuron connects with dendrites to the world or to the axon of
|
|
|
|
other neuirons. The neurites (dendrite or axon) transport
|
|
|
|
electrical stimulation to the cell, which emits the signal to the
|
|
|
|
dendrites if the activation reaches a certain level.
|
|
|
|
|
|
|
|
@dot
|
|
|
|
digraph g {
|
|
|
|
rankdir=LR;
|
|
|
|
ranksep=0.8;
|
|
|
|
node [shape=hexagon];
|
|
|
|
edge [arrowhead=none];
|
|
|
|
subgraph clusterInput {
|
|
|
|
label="sensors";
|
|
|
|
color="white";
|
|
|
|
node [shape=point];
|
|
|
|
I0; I1; I2; I3; I4; I5; I6; I7; I8 I9;
|
|
|
|
}
|
|
|
|
subgraph clusterOutput {
|
|
|
|
label="actors";
|
|
|
|
color="white";
|
|
|
|
node [shape=point];
|
|
|
|
O0; O1; O2; O3; O4; O5; O6;
|
|
|
|
}
|
|
|
|
I1 -> Cell1 [label="axon";taillabel="synapse"];
|
|
|
|
{ I2; I3; I4; } -> Cell1;
|
|
|
|
{ I5; I6; I7; I8; } -> Cell2;
|
|
|
|
{ I4; I6; I9; I0; } -> Cell3;
|
|
|
|
Cell1 -> Cell8 [label="axon / dendrite"];
|
|
|
|
Cell1 -> { Cell2; Cell4; Cell5; }
|
|
|
|
Cell2 -> { Cell4; Cell5; Cell6; Cell8; }
|
|
|
|
Cell3 -> { Cell4; Cell6; Cell7; Cell8; }
|
|
|
|
{ Cell4; Cell5; Cell6 } -> { Cell7; Cell8; }
|
|
|
|
Cell7 -> { O0; O1; O2 };
|
|
|
|
Cell8 -> { O3; O4; O5; };
|
|
|
|
Cell8 -> O6 [label="dendrite"];
|
|
|
|
}
|
|
|
|
@enddot
|
|
|
|
|
|
|
|
@subsection art Artificial Neural Network
|
|
|
|
|
|
|
|
A complex neural network can be imitiated as a vector @c I of @c i
|
|
|
|
input values, a vector @c O of @c o output values and any number
|
|
|
|
@c l of hidden layers, where each of them contains @c h
|
|
|
|
neurons.
|
|
|
|
|
|
|
|
A neural network with double precision is initialized as:
|
|
|
|
@code
|
|
|
|
NeuroNet<double, i, o, l, h> net;
|
|
|
|
@endcode
|
|
|
|
|
|
|
|
@dot
|
|
|
|
digraph g {
|
|
|
|
rankdir=LR;
|
|
|
|
ranksep=1.5;
|
|
|
|
subgraph clusterInput {
|
|
|
|
label="Input Layer";
|
|
|
|
I1 [label=<I<SUB>1</SUB>>];
|
|
|
|
I2 [label=<I<SUB>2</SUB>>];
|
|
|
|
Ix [label=<I<SUB>…</SUB>>];
|
|
|
|
Ii [label=<I<SUB>i</SUB>>];
|
|
|
|
}
|
|
|
|
subgraph clusterHidden1 {
|
|
|
|
label="First Hidden Layer";
|
|
|
|
H11 [label=<H<SUB>11</SUB>>];
|
|
|
|
H12 [label=<H<SUB>12</SUB>>];
|
|
|
|
H1x [label=<H<SUB>1…</SUB>>];
|
|
|
|
H1h [label=<H<SUB>1h</SUB>>];
|
|
|
|
}
|
|
|
|
subgraph clusterHidden2 {
|
|
|
|
label="Second Hidden Layer";
|
|
|
|
H21 [label=<H<SUB>21</SUB>>];
|
|
|
|
H22 [label=<H<SUB>22</SUB>>];
|
|
|
|
H2x [label=<H<SUB>2…</SUB>>];
|
|
|
|
H2h [label=<H<SUB>2h</SUB>>];
|
|
|
|
}
|
|
|
|
subgraph clusterHiddenx {
|
|
|
|
label="More Hidden Layers";
|
|
|
|
Hx1 [label=<H<SUB>…1</SUB>>];
|
|
|
|
Hx2 [label=<H<SUB>…2</SUB>>];
|
|
|
|
Hxx [label=<H<SUB>……</SUB>>];
|
|
|
|
Hxh [label=<H<SUB>…h</SUB>>];
|
|
|
|
}
|
|
|
|
subgraph clusterHiddenl {
|
|
|
|
label="Last Hidden Layer";
|
|
|
|
Hl1 [label=<H<SUB>l1</SUB>>];
|
|
|
|
Hl2 [label=<H<SUB>l2</SUB>>];
|
|
|
|
Hlx [label=<H<SUB>l…</SUB>>];
|
|
|
|
Hlh [label=<H<SUB>lh</SUB>>];
|
|
|
|
}
|
|
|
|
subgraph clusterOutput {
|
|
|
|
label="Output Layer";
|
|
|
|
O1 [label=<O<SUB>1</SUB>>];
|
|
|
|
O2 [label=<O<SUB>2</SUB>>];
|
|
|
|
Ox [label=<O<SUB>…</SUB>>];
|
|
|
|
Oo [label=<O<SUB>o</SUB>>];
|
|
|
|
}
|
|
|
|
{ I1; I2; Ix; Ii; }
|
|
|
|
-> { H11; H12; H1x; H1h; }
|
|
|
|
-> { H21; H22; H2x; H2h; }
|
|
|
|
-> { Hx1; Hx2; Hxx; Hxh; }
|
|
|
|
-> { Hl1; Hl2; Hlx; Hlh; }
|
|
|
|
-> { O1; O2; Ox; Oo; }
|
|
|
|
}
|
|
|
|
@enddot
|
|
|
|
|
|
|
|
@section neuro-forward Forward Propagation
|
|
|
|
|
|
|
|
The connections between two layers can be modelled as a
|
|
|
|
Matrix. Then Matrix H<sub>1</sub> contains the weights from @c I
|
|
|
|
to the first hidden layer, @c H<sub>2</sub> from the first to the
|
|
|
|
second, and so on, until @c H<sub>l+1</sub> contains the weights
|
|
|
|
from layer @c l to the output @c O.
|
|
|
|
|
|
|
|
There is also an activation function @f. For back propagation,
|
|
|
|
this function needs a first derivation @c f'.
|
|
|
|
|
|
|
|
To get the activation of the first hidden layer, the input vector
|
|
|
|
is multiplied with the weight matrix of the first hidden layer,
|
|
|
|
this results in an output vector. Then the activation function is
|
|
|
|
applied to all values of the output vector:
|
|
|
|
|
|
|
|
<pre>
|
|
|
|
V<sub>1</sub> = f(I×H<sub>1</sub>)
|
|
|
|
</pre>
|
|
|
|
|
|
|
|
This is done for all layers, up to the output. The output vector
|
|
|
|
is then calculated as:
|
|
|
|
|
|
|
|
<pre>
|
|
|
|
O = f(f(f(f(I×H<sub>1</sub>)×H<sub>2</sub>)×H<sub>…</sub>)×H<sub>l+1</sub>)
|
|
|
|
</pre>
|
|
|
|
|
|
|
|
@code
|
|
|
|
const size_type i(4);
|
|
|
|
const size_type o(2);
|
|
|
|
NeuroNet<double, i, o> net;
|
|
|
|
Matrix<1, i> input(1.0, 2.0, 0.0, -1.0);
|
|
|
|
Matrix<1, o> output = feed(input);
|
|
|
|
@endcode
|
|
|
|
|
|
|
|
@section neuro-backward Back Propagation
|
|
|
|
|
|
|
|
@page biblio Bibliography
|
|
|
|
|
|
|
|
- <a href="http://briandolhansky.com/blog/2014/10/30/artificial-neural-networks-matrix-form-part-5">Artificial Neural Networks: Matrix Form (Part 5)</a>
|
|
|
|
- <a href="http://briandolhansky.com/blog/2013/9/27/artificial-neural-networks-backpropagation-part-4">Artificial Neural Networks: Mathematics of Backpropagation (Part 4)</a>
|
|
|
|
- <a href="http://www.tornau.name/wp-content/uploads/2009/04/studiumsmaterialien/neuronale_netze_zusammefassung.pdf">Vorlesung Neuronale Netze - Zusammenfassung - Christoph Tornau</a>
|
|
|
|
- <a href="http://www.neuronalesnetz.de/">Neuronale Netze — Eine Einführung</a>
|
|
|
|
- <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>
|
|
|
|
- <a href="http://cs231n.github.io/convolutional-networks/">Convolutional Neural Networks (CNNs / ConvNets)</a>
|
|
|
|
- <a href="https://www.tensorflow.org/versions/r0.9/tutorials/index.html">TensorFlow Tutorials</a>
|
|
|
|
- <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
namespace math {
|
|
|
|
// tangens hyperbolicus as standard activation function
|
|
|
|
template<typename TYPE> TYPE tanh(const TYPE& v) {
|
|
|
|
return ::tanh((long double)v);
|
|
|
|
}
|
|
|
|
// derivate of activation function for back propagation
|
|
|
|
template<typename TYPE> TYPE tanh_diff(const TYPE& v) {
|
|
|
|
TYPE ch(::cosh((long double)v));
|
|
|
|
return 1/(ch*ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template
|
|
|
|
<typename TYPE,
|
|
|
|
size_t INPUT_LAYERS,
|
|
|
|
size_t OUTPUT_LAYERS,
|
|
|
|
size_t HIDDEN_LAYERS = INPUT_LAYERS+OUTPUT_LAYERS,
|
|
|
|
size_t HIDDEN_LAYER_SIZE = INPUT_LAYERS+OUTPUT_LAYERS,
|
|
|
|
TYPE(*ACTIVATION)(const TYPE&) = math::tanh<TYPE>,
|
|
|
|
TYPE(*ACTIVATION_DIFF)(const TYPE&) = math::tanh_diff<TYPE>>
|
|
|
|
class NeuroNet {
|
|
|
|
public:
|
|
|
|
NeuroNet() {
|
|
|
|
}
|
|
|
|
Matrix<TYPE, 1, OUTPUT_LAYERS> operator()(const Matrix<TYPE, 1, INPUT_LAYERS>& in) {
|
|
|
|
Matrix<TYPE, 1, HIDDEN_LAYER_SIZE> l((in*_wi).apply(ACTIVATION));
|
|
|
|
for (int i(0); i<HIDDEN_LAYERS-1; ++i)
|
|
|
|
l = (l*_wh[i]).apply(ACTIVATION);
|
|
|
|
Matrix<TYPE, 1, OUTPUT_LAYERS> out((l*_wo).apply(ACTIVATION));
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
Matrix<TYPE, 1, OUTPUT_LAYERS> learn(const Matrix<TYPE, 1, INPUT_LAYERS>& in,
|
|
|
|
const Matrix<TYPE, 1, OUTPUT_LAYERS>& expect) {
|
|
|
|
Matrix<TYPE, 1, OUTPUT_LAYERS> out((*this)(in));
|
|
|
|
Matrix<TYPE, 1, OUTPUT_LAYERS> diff(expect-out);
|
|
|
|
return diff;
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
Matrix<TYPE, INPUT_LAYERS, HIDDEN_LAYER_SIZE> _wi;
|
|
|
|
Matrix<TYPE, HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE> _wh[HIDDEN_LAYERS-1];
|
|
|
|
Matrix<TYPE, HIDDEN_LAYER_SIZE, OUTPUT_LAYERS> _wo;
|
|
|
|
};
|