libneuron/src/neuron.hxx

/*! @file

    @id $Id$
*/
//       1         2         3         4         5         6         7         8
// 45678901234567890123456789012345678901234567890123456789012345678901234567890

#include <matrix.hxx>
#include <cmath>

/** @mainpage Neural Network with Hidden Layers

    @section neuro-intro Overview

    @subsection nature Natural Neural Network

    From <a href="https://en.wikipedia.org/wiki/Neuron">Wikipedia</a>:
    «A neuron is an electrically excitable cell that processes and
    transmits information through electrical and chemical
    signals. These signals between neurons occur via synapses,
    specialized connections with other cells. Neurons can connect to
    each other to form neural networks. Neurons are the core
    components of the brain and spinal cord of the central nervous
    system, and of the ganglia of the peripheral nervous system.» The
    neuron connects with dendrites to the world or to the axon of
    other neuirons. The neurites (dendrite or axon) transport
    electrical stimulation to the cell, which emits the signal to the
    dendrites if the activation reaches a certain level.

    @dot
    digraph g {
      rankdir=LR;
      ranksep=0.8;
      node [shape=hexagon];
      edge [arrowhead=none];
      subgraph clusterInput {
        label="sensors";
        color="white";
        node [shape=point];
        I0; I1; I2; I3; I4; I5; I6; I7; I8 I9;
      }
      subgraph clusterOutput {
        label="actors";
        color="white";
        node [shape=point];
        O0; O1; O2; O3; O4; O5; O6;
      }
      I1 -> Cell1 [label="axon";taillabel="synapse"];
      { I2; I3; I4; } -> Cell1;
      { I5; I6; I7; I8; } -> Cell2;
      { I4; I6; I9; I0; } -> Cell3;
      Cell1 -> Cell8 [label="axon / dendrite"];
      Cell1 -> { Cell2; Cell4; Cell5; }
      Cell2 -> { Cell4; Cell5; Cell6; Cell8; }
      Cell3 -> { Cell4; Cell6; Cell7; Cell8; }
      { Cell4; Cell5; Cell6 } -> { Cell7; Cell8; }
      Cell7 -> { O0; O1; O2 };
      Cell8 -> { O3; O4; O5; };
      Cell8 -> O6 [label="dendrite"];
    }
    @enddot
    
    @subsection art Artificial Neural Network

    A complex neural network can be imitiated as a vector @c I of @c i
    input values, a vector @c O of @c o output values and any number
    @c l of hidden layers, where each of them contains @c h
    neurons.

    A neural network with double precision is initialized as:
    @code
    NeuroNet<double, i, o, l, h> net;
    @endcode

    @dot
    digraph g {
      rankdir=LR;
      ranksep=1.5;
      subgraph clusterInput {
        label="Input Layer";
        I1 [label=<I<SUB>1</SUB>>];
        I2 [label=<I<SUB>2</SUB>>];
        Ix [label=<I<SUB>…</SUB>>];
        Ii [label=<I<SUB>i</SUB>>];
      }
      subgraph clusterHidden1 {
        label="First Hidden Layer";
        H11 [label=<H<SUB>11</SUB>>];
        H12 [label=<H<SUB>12</SUB>>];
        H1x [label=<H<SUB>1…</SUB>>];
        H1h [label=<H<SUB>1h</SUB>>];
      }
      subgraph clusterHidden2 {
        label="Second Hidden Layer";
        H21 [label=<H<SUB>21</SUB>>];
        H22 [label=<H<SUB>22</SUB>>];
        H2x [label=<H<SUB>2…</SUB>>];
        H2h [label=<H<SUB>2h</SUB>>];
      }
      subgraph clusterHiddenx {
        label="More Hidden Layers";
        Hx1 [label=<H<SUB>…1</SUB>>];
        Hx2 [label=<H<SUB>…2</SUB>>];
        Hxx [label=<H<SUB>……</SUB>>];
        Hxh [label=<H<SUB>…h</SUB>>];
      }
      subgraph clusterHiddenl {
        label="Last Hidden Layer";
        Hl1 [label=<H<SUB>l1</SUB>>];
        Hl2 [label=<H<SUB>l2</SUB>>];
        Hlx [label=<H<SUB>l…</SUB>>];
        Hlh [label=<H<SUB>lh</SUB>>];
      }
      subgraph clusterOutput {
        label="Output Layer";
        O1 [label=<O<SUB>1</SUB>>];
        O2 [label=<O<SUB>2</SUB>>];
        Ox [label=<O<SUB>…</SUB>>];
        Oo [label=<O<SUB>o</SUB>>];
      }
      { I1; I2; Ix; Ii; }
      -> { H11; H12; H1x; H1h; }
      -> { H21; H22; H2x; H2h; }
      -> { Hx1; Hx2; Hxx; Hxh; }
      -> { Hl1; Hl2; Hlx; Hlh; }
      -> { O1; O2; Ox; Oo; }
    }
    @enddot

    @section neuro-forward Forward Propagation
    
    The connections between two layers can be modelled as a
    Matrix. Then Matrix H<sub>1</sub> contains the weights from @c I
    to the first hidden layer, @c H<sub>2</sub> from the first to the
    second, and so on, until @c H<sub>l+1</sub> contains the weights
    from layer @c l to the output @c O.

    There is also an activation function @f. For back propagation,
    this function needs a first derivation @c f'.

    To get the activation of the first hidden layer, the input vector
    is multiplied with the weight matrix of the first hidden layer,
    this results in an output vector. Then the activation function is
    applied to all values of the output vector:

    <pre>
    V<sub>1</sub> = f(I×H<sub>1</sub>)
    </pre>
    
    This is done for all layers, up to the output. The output vector
    is then calculated as:
    
    <pre>
    O = f(f(f(f(I×H<sub>1</sub>)×H<sub>2</sub>)×H<sub>…</sub>)×H<sub>l+1</sub>)
    </pre>

    @code
    const size_type i(4);
    const size_type o(2);
    NeuroNet<double, i, o> net;
    Matrix<1, i> input(1.0, 2.0, 0.0, -1.0);
    Matrix<1, o> output = feed(input);
    @endcode
    
    @section neuro-backward Back Propagation

    @page biblio Bibliography

     - <a href="http://briandolhansky.com/blog/2014/10/30/artificial-neural-networks-matrix-form-part-5">Artificial Neural Networks: Matrix Form (Part 5)</a>
     - <a href="http://briandolhansky.com/blog/2013/9/27/artificial-neural-networks-backpropagation-part-4">Artificial Neural Networks: Mathematics of Backpropagation (Part 4)</a>
     - <a href="http://www.tornau.name/wp-content/uploads/2009/04/studiumsmaterialien/neuronale_netze_zusammefassung.pdf">Vorlesung Neuronale Netze - Zusammenfassung - Christoph Tornau</a>
     - <a href="http://www.neuronalesnetz.de/">Neuronale Netze — Eine Einführung</a>
     - <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>
     - <a href="http://cs231n.github.io/convolutional-networks/">Convolutional Neural Networks (CNNs / ConvNets)</a>
     - <a href="https://www.tensorflow.org/versions/r0.9/tutorials/index.html">TensorFlow Tutorials</a>
     - <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>

    */

namespace math {
  // tangens hyperbolicus as standard activation function
  template<typename TYPE> TYPE tanh(const TYPE& v) {
    return ::tanh((long double)v);
  }
  // derivate of activation function for back propagation
  template<typename TYPE> TYPE tanh_diff(const TYPE& v) {
    TYPE ch(::cosh((long double)v));
    return 1/(ch*ch);
  }
}

template
    <typename TYPE,
    size_t INPUT_LAYERS,
    size_t OUTPUT_LAYERS,
    size_t HIDDEN_LAYERS = INPUT_LAYERS+OUTPUT_LAYERS,
    size_t HIDDEN_LAYER_SIZE = INPUT_LAYERS+OUTPUT_LAYERS,
    TYPE(*ACTIVATION)(const TYPE&) = math::tanh<TYPE>,
    TYPE(*ACTIVATION_DIFF)(const TYPE&)  = math::tanh_diff<TYPE>>
class NeuroNet {
  public:
    NeuroNet() {
    }
    Matrix<TYPE, 1, OUTPUT_LAYERS> operator()(const Matrix<TYPE, 1, INPUT_LAYERS>& in) {
      Matrix<TYPE, 1, HIDDEN_LAYER_SIZE> l((in*_wi).apply(ACTIVATION));
      for (int i(0); i<HIDDEN_LAYERS-1; ++i)
        l = (l*_wh[i]).apply(ACTIVATION);
      Matrix<TYPE, 1, OUTPUT_LAYERS> out((l*_wo).apply(ACTIVATION));
      return out;
    }
    Matrix<TYPE, 1, OUTPUT_LAYERS> learn(const Matrix<TYPE, 1, INPUT_LAYERS>& in,
                                         const Matrix<TYPE, 1, OUTPUT_LAYERS>& expect) {
      Matrix<TYPE, 1, OUTPUT_LAYERS> out((*this)(in));
      Matrix<TYPE, 1, OUTPUT_LAYERS> diff(expect-out);
      return diff;
    }
  private:
    Matrix<TYPE, INPUT_LAYERS, HIDDEN_LAYER_SIZE> _wi;
    Matrix<TYPE, HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE> _wh[HIDDEN_LAYERS-1];
    Matrix<TYPE, HIDDEN_LAYER_SIZE, OUTPUT_LAYERS> _wo;
};
initial version 9 years ago			`/*! @file`

			`@id $Id$`
			`*/`
			`// 1 2 3 4 5 6 7 8`
			`// 45678901234567890123456789012345678901234567890123456789012345678901234567890`

with dummy test 9 years ago			`#include <matrix.hxx>`
first try of propagation 9 years ago			`#include <cmath>`
with dummy test 9 years ago
move documentation to main page 9 years ago			`/** @mainpage Neural Network with Hidden Layers`
initial version 9 years ago
			`@section neuro-intro Overview`

documentation updated 9 years ago			`@subsection nature Natural Neural Network`

			`From <a href="https://en.wikipedia.org/wiki/Neuron">Wikipedia</a>:`
			`«A neuron is an electrically excitable cell that processes and`
			`transmits information through electrical and chemical`
			`signals. These signals between neurons occur via synapses,`
			`specialized connections with other cells. Neurons can connect to`
			`each other to form neural networks. Neurons are the core`
			`components of the brain and spinal cord of the central nervous`
			`system, and of the ganglia of the peripheral nervous system.» The`
			`neuron connects with dendrites to the world or to the axon of`
			`other neuirons. The neurites (dendrite or axon) transport`
			`electrical stimulation to the cell, which emits the signal to the`
			`dendrites if the activation reaches a certain level.`

			`@dot`
			`digraph g {`
			`rankdir=LR;`
			`ranksep=0.8;`
			`node [shape=hexagon];`
			`edge [arrowhead=none];`
			`subgraph clusterInput {`
			`label="sensors";`
			`color="white";`
			`node [shape=point];`
			`I0; I1; I2; I3; I4; I5; I6; I7; I8 I9;`
			`}`
			`subgraph clusterOutput {`
			`label="actors";`
			`color="white";`
			`node [shape=point];`
			`O0; O1; O2; O3; O4; O5; O6;`
			`}`
			`I1 -> Cell1 [label="axon";taillabel="synapse"];`
			`{ I2; I3; I4; } -> Cell1;`
			`{ I5; I6; I7; I8; } -> Cell2;`
			`{ I4; I6; I9; I0; } -> Cell3;`
			`Cell1 -> Cell8 [label="axon / dendrite"];`
			`Cell1 -> { Cell2; Cell4; Cell5; }`
			`Cell2 -> { Cell4; Cell5; Cell6; Cell8; }`
			`Cell3 -> { Cell4; Cell6; Cell7; Cell8; }`
			`{ Cell4; Cell5; Cell6 } -> { Cell7; Cell8; }`
			`Cell7 -> { O0; O1; O2 };`
			`Cell8 -> { O3; O4; O5; };`
			`Cell8 -> O6 [label="dendrite"];`
			`}`
			`@enddot`

			`@subsection art Artificial Neural Network`

initial version 9 years ago			`A complex neural network can be imitiated as a vector @c I of @c i`
			`input values, a vector @c O of @c o output values and any number`
			`@c l of hidden layers, where each of them contains @c h`
			`neurons.`

first try of propagation 9 years ago			`A neural network with double precision is initialized as:`
initial version 9 years ago			`@code`
first try of propagation 9 years ago			`NeuroNet<double, i, o, l, h> net;`
initial version 9 years ago			`@endcode`

			`@dot`
			`digraph g {`
			`rankdir=LR;`
			`ranksep=1.5;`
			`subgraph clusterInput {`
			`label="Input Layer";`
			`I1 [label=<I<SUB>1</SUB>>];`
			`I2 [label=<I<SUB>2</SUB>>];`
			`Ix [label=<I<SUB>…</SUB>>];`
			`Ii [label=<I<SUB>i</SUB>>];`
			`}`
			`subgraph clusterHidden1 {`
			`label="First Hidden Layer";`
			`H11 [label=<H<SUB>11</SUB>>];`
			`H12 [label=<H<SUB>12</SUB>>];`
			`H1x [label=<H<SUB>1…</SUB>>];`
			`H1h [label=<H<SUB>1h</SUB>>];`
			`}`
			`subgraph clusterHidden2 {`
			`label="Second Hidden Layer";`
			`H21 [label=<H<SUB>21</SUB>>];`
			`H22 [label=<H<SUB>22</SUB>>];`
			`H2x [label=<H<SUB>2…</SUB>>];`
			`H2h [label=<H<SUB>2h</SUB>>];`
			`}`
			`subgraph clusterHiddenx {`
			`label="More Hidden Layers";`
			`Hx1 [label=<H<SUB>…1</SUB>>];`
			`Hx2 [label=<H<SUB>…2</SUB>>];`
			`Hxx [label=<H<SUB>……</SUB>>];`
			`Hxh [label=<H<SUB>…h</SUB>>];`
			`}`
			`subgraph clusterHiddenl {`
			`label="Last Hidden Layer";`
			`Hl1 [label=<H<SUB>l1</SUB>>];`
			`Hl2 [label=<H<SUB>l2</SUB>>];`
			`Hlx [label=<H<SUB>l…</SUB>>];`
			`Hlh [label=<H<SUB>lh</SUB>>];`
			`}`
			`subgraph clusterOutput {`
			`label="Output Layer";`
			`O1 [label=<O<SUB>1</SUB>>];`
			`O2 [label=<O<SUB>2</SUB>>];`
			`Ox [label=<O<SUB>…</SUB>>];`
			`Oo [label=<O<SUB>o</SUB>>];`
			`}`
documentation updated 9 years ago			`{ I1; I2; Ix; Ii; }`
			`-> { H11; H12; H1x; H1h; }`
			`-> { H21; H22; H2x; H2h; }`
			`-> { Hx1; Hx2; Hxx; Hxh; }`
			`-> { Hl1; Hl2; Hlx; Hlh; }`
			`-> { O1; O2; Ox; Oo; }`
initial version 9 years ago			`}`
			`@enddot`

			`@section neuro-forward Forward Propagation`

			`The connections between two layers can be modelled as a`
			`Matrix. Then Matrix H<sub>1</sub> contains the weights from @c I`
			`to the first hidden layer, @c H<sub>2</sub> from the first to the`
			`second, and so on, until @c H<sub>l+1</sub> contains the weights`
			`from layer @c l to the output @c O.`
first try of propagation 9 years ago
			`There is also an activation function @f. For back propagation,`
			`this function needs a first derivation @c f'.`

			`To get the activation of the first hidden layer, the input vector`
			`is multiplied with the weight matrix of the first hidden layer,`
			`this results in an output vector. Then the activation function is`
			`applied to all values of the output vector:`

			`<pre>`
			`V<sub>1</sub> = f(I×H<sub>1</sub>)`
			`</pre>`

			`This is done for all layers, up to the output. The output vector`
			`is then calculated as:`
initial version 9 years ago
first try of propagation 9 years ago			`<pre>`
			`O = f(f(f(f(I×H<sub>1</sub>)×H<sub>2</sub>)×H<sub>…</sub>)×H<sub>l+1</sub>)`
			`</pre>`
move documentation to main page 9 years ago
			`@code`
			`const size_type i(4);`
			`const size_type o(2);`
			`NeuroNet<double, i, o> net;`
			`Matrix<1, i> input(1.0, 2.0, 0.0, -1.0);`
first try of propagation 9 years ago			`Matrix<1, o> output = feed(input);`
move documentation to main page 9 years ago			`@endcode`
initial version 9 years ago
more docu 9 years ago			`@section neuro-backward Back Propagation`
documentation updated 9 years ago
			`@page biblio Bibliography`

first try of propagation 9 years ago			`- <a href="http://briandolhansky.com/blog/2014/10/30/artificial-neural-networks-matrix-form-part-5">Artificial Neural Networks: Matrix Form (Part 5)</a>`
learn - not yet implemented, test does not yet work 9 years ago			`- <a href="http://briandolhansky.com/blog/2013/9/27/artificial-neural-networks-backpropagation-part-4">Artificial Neural Networks: Mathematics of Backpropagation (Part 4)</a>`
documentation updated 9 years ago			`- <a href="http://www.tornau.name/wp-content/uploads/2009/04/studiumsmaterialien/neuronale_netze_zusammefassung.pdf">Vorlesung Neuronale Netze - Zusammenfassung - Christoph Tornau</a>`
			`- <a href="http://www.neuronalesnetz.de/">Neuronale Netze — Eine Einführung</a>`
			`- <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>`
			`- <a href="http://cs231n.github.io/convolutional-networks/">Convolutional Neural Networks (CNNs / ConvNets)</a>`
first try of propagation 9 years ago			`- <a href="https://www.tensorflow.org/versions/r0.9/tutorials/index.html">TensorFlow Tutorials</a>`
documentation updated 9 years ago			`- <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>`
initial version 9 years ago
			`*/`
first try of propagation 9 years ago
			`namespace math {`
			`// tangens hyperbolicus as standard activation function`
			`template<typename TYPE> TYPE tanh(const TYPE& v) {`
learn - not yet implemented, test does not yet work 9 years ago			`return ::tanh((long double)v);`
first try of propagation 9 years ago			`}`
			`// derivate of activation function for back propagation`
			`template<typename TYPE> TYPE tanh_diff(const TYPE& v) {`
learn - not yet implemented, test does not yet work 9 years ago			`TYPE ch(::cosh((long double)v));`
first try of propagation 9 years ago			`return 1/(ch*ch);`
			`}`
			`}`

initial version 9 years ago			`template`
			`<typename TYPE,`
			`size_t INPUT_LAYERS,`
			`size_t OUTPUT_LAYERS,`
			`size_t HIDDEN_LAYERS = INPUT_LAYERS+OUTPUT_LAYERS,`
first try of propagation 9 years ago			`size_t HIDDEN_LAYER_SIZE = INPUT_LAYERS+OUTPUT_LAYERS,`
			`TYPE(*ACTIVATION)(const TYPE&) = math::tanh<TYPE>,`
			`TYPE(*ACTIVATION_DIFF)(const TYPE&) = math::tanh_diff<TYPE>>`
initial version 9 years ago			`class NeuroNet {`
first try of propagation 9 years ago			`public:`
			`NeuroNet() {`
			`}`
learn - not yet implemented, test does not yet work 9 years ago			`Matrix<TYPE, 1, OUTPUT_LAYERS> operator()(const Matrix<TYPE, 1, INPUT_LAYERS>& in) {`
first try of propagation 9 years ago			`Matrix<TYPE, 1, HIDDEN_LAYER_SIZE> l((in*_wi).apply(ACTIVATION));`
			`for (int i(0); i<HIDDEN_LAYERS-1; ++i)`
			`l = (l*_wh[i]).apply(ACTIVATION);`
			`Matrix<TYPE, 1, OUTPUT_LAYERS> out((l*_wo).apply(ACTIVATION));`
			`return out;`
			`}`
learn - not yet implemented, test does not yet work 9 years ago			`Matrix<TYPE, 1, OUTPUT_LAYERS> learn(const Matrix<TYPE, 1, INPUT_LAYERS>& in,`
			`const Matrix<TYPE, 1, OUTPUT_LAYERS>& expect) {`
			`Matrix<TYPE, 1, OUTPUT_LAYERS> out((*this)(in));`
			`Matrix<TYPE, 1, OUTPUT_LAYERS> diff(expect-out);`
			`return diff;`
			`}`
first try of propagation 9 years ago			`private:`
			`Matrix<TYPE, INPUT_LAYERS, HIDDEN_LAYER_SIZE> _wi;`
			`Matrix<TYPE, HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE> _wh[HIDDEN_LAYERS-1];`
			`Matrix<TYPE, HIDDEN_LAYER_SIZE, OUTPUT_LAYERS> _wo;`
initial version 9 years ago			`};`