libneuron/src/neuron.hxx

/*! @file

    @id $Id$
*/
//       1         2         3         4         5         6         7         8
// 45678901234567890123456789012345678901234567890123456789012345678901234567890

#include <matrix.hxx>
#include <cmath>

/** @mainpage Neural Network with Hidden Layers

    @section neuro-intro Overview

    @subsection nature Natural Neural Network

    From <a href="https://en.wikipedia.org/wiki/Neuron">Wikipedia</a>:
    «A neuron is an electrically excitable cell that processes and
    transmits information through electrical and chemical
    signals. These signals between neurons occur via synapses,
    specialized connections with other cells. Neurons can connect to
    each other to form neural networks. Neurons are the core
    components of the brain and spinal cord of the central nervous
    system, and of the ganglia of the peripheral nervous system.» The
    neuron connects with dendrites to the world or to the axon of
    other neuirons. The neurites (dendrite or axon) transport
    electrical stimulation to the cell, which emits the signal to the
    dendrites if the activation reaches a certain level.

    @dot
    digraph g {
      rankdir=LR;
      ranksep=0.8;
      node [shape=hexagon];
      edge [arrowhead=none];
      subgraph clusterInput {
        label="sensors";
        color="white";
        node [shape=point];
        I0; I1; I2; I3; I4; I5; I6; I7; I8 I9;
      }
      subgraph clusterOutput {
        label="actors";
        color="white";
        node [shape=point];
        O0; O1; O2; O3; O4; O5; O6;
      }
      I1 -> Cell1 [label="axon";taillabel="synapse"];
      { I2; I3; I4; } -> Cell1;
      { I5; I6; I7; I8; } -> Cell2;
      { I4; I6; I9; I0; } -> Cell3;
      Cell1 -> Cell8 [label="axon / dendrite"];
      Cell1 -> { Cell2; Cell4; Cell5; }
      Cell2 -> { Cell4; Cell5; Cell6; Cell8; }
      Cell3 -> { Cell4; Cell6; Cell7; Cell8; }
      { Cell4; Cell5; Cell6 } -> { Cell7; Cell8; }
      Cell7 -> { O0; O1; O2 };
      Cell8 -> { O3; O4; O5; };
      Cell8 -> O6 [label="dendrite"];
    }
    @enddot

    @subsection art Artificial Neural Network

    A complex neural network can be imitiated as a vector @c I of @c i
    input values, a vector @c O of @c o output values and any number
    @c l of hidden layers, where each of them contains @c h
    neurons.

    A neural network with double precision is initialized as:
    @code
    NeuroNet<double, i, o, l, h> net;
    @endcode

    @dot
    digraph g {
      rankdir=LR;
      ranksep=1.5;
      subgraph clusterInput {
        label="Input Layer";
        I1 [label=<I<SUB>1</SUB>>];
        I2 [label=<I<SUB>2</SUB>>];
        Ix [label=<I<SUB>…</SUB>>];
        Ii [label=<I<SUB>i</SUB>>];
      }
      subgraph clusterHidden1 {
        label="First Hidden Layer";
        H11 [label=<H<SUB>11</SUB>>];
        H12 [label=<H<SUB>12</SUB>>];
        H1x [label=<H<SUB>1…</SUB>>];
        H1h [label=<H<SUB>1h</SUB>>];
      }
      subgraph clusterHidden2 {
        label="Second Hidden Layer";
        H21 [label=<H<SUB>21</SUB>>];
        H22 [label=<H<SUB>22</SUB>>];
        H2x [label=<H<SUB>2…</SUB>>];
        H2h [label=<H<SUB>2h</SUB>>];
      }
      subgraph clusterHiddenx {
        label="More Hidden Layers";
        Hx1 [label=<H<SUB>…1</SUB>>];
        Hx2 [label=<H<SUB>…2</SUB>>];
        Hxx [label=<H<SUB>……</SUB>>];
        Hxh [label=<H<SUB>…h</SUB>>];
      }
      subgraph clusterHiddenl {
        label="Last Hidden Layer";
        Hl1 [label=<H<SUB>l1</SUB>>];
        Hl2 [label=<H<SUB>l2</SUB>>];
        Hlx [label=<H<SUB>l…</SUB>>];
        Hlh [label=<H<SUB>lh</SUB>>];
      }
      subgraph clusterOutput {
        label="Output Layer";
        O1 [label=<O<SUB>1</SUB>>];
        O2 [label=<O<SUB>2</SUB>>];
        Ox [label=<O<SUB>…</SUB>>];
        Oo [label=<O<SUB>o</SUB>>];
      }
      { I1; I2; Ix; Ii; }
      -> { H11; H12; H1x; H1h; }
      -> { H21; H22; H2x; H2h; }
      -> { Hx1; Hx2; Hxx; Hxh; }
      -> { Hl1; Hl2; Hlx; Hlh; }
      -> { O1; O2; Ox; Oo; }
    }
    @enddot

    @section neuro-forward Forward Propagation

    The connections between two layers can be modelled as a
    Matrix. Then Matrix H<sub>1</sub> contains the weights from @c I
    to the first hidden layer, @c H<sub>2</sub> from the first to the
    second, and so on, until @c H<sub>l+1</sub> contains the weights
    from layer @c l to the output @c O.

    There is also an activation function @f. For back propagation,
    this function needs a first derivation @c f'.

    To get the activation of the first hidden layer, the input vector
    is multiplied with the weight matrix of the first hidden layer,
    this results in an output vector. Then the activation function is
    applied to all values of the output vector:

    <pre>
    V<sub>1</sub> = f(I×H<sub>1</sub>)
    </pre>

    This is done for all layers, up to the output. The output vector
    is then calculated as:

    <pre>
    O = f(f(f(f(I×H<sub>1</sub>)×H<sub>2</sub>)×H<sub>…</sub>)×H<sub>l+1</sub>)
    </pre>

    @code
    const size_type i(4);
    const size_type o(2);
    NeuroNet<double, i, o> net;
    Matrix<1, i> input(1.0, 2.0, 0.0, -1.0);
    Matrix<1, o> output = feed(input);
    @endcode

    @section neuro-backward Back Propagation

    @page biblio Bibliography

     - <a href="http://briandolhansky.com/blog/2014/10/30/artificial-neural-networks-matrix-form-part-5">Artificial Neural Networks: Matrix Form (Part 5)</a>
     - <a href="http://briandolhansky.com/blog/2013/9/27/artificial-neural-networks-backpropagation-part-4">Artificial Neural Networks: Mathematics of Backpropagation (Part 4)</a>
     - <a href="http://www.tornau.name/wp-content/uploads/2009/04/studiumsmaterialien/neuronale_netze_zusammefassung.pdf">Vorlesung Neuronale Netze - Zusammenfassung - Christoph Tornau</a>
     - <a href="http://www.neuronalesnetz.de/">Neuronale Netze — Eine Einführung</a>
     - <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>
     - <a href="http://cs231n.github.io/convolutional-networks/">Convolutional Neural Networks (CNNs / ConvNets)</a>
     - <a href="https://www.tensorflow.org/versions/r0.9/tutorials/index.html">TensorFlow Tutorials</a>
     - <a href="http://alphard.ethz.ch/hafner/Vorles/Optim/ANN/Artificial%20Neural%20Network%20based%20Curve%20Prediction%20Documentation.pdf">Artificial Neural Network based Curve Prediction</a>

    */

namespace math {
  // tangens hyperbolicus as standard activation function
  template<typename TYPE> TYPE tanh(const TYPE& v) {
    return ::tanh((long double)v);
  }
  // derivate of activation function for back propagation
  template<typename TYPE> TYPE tanh_diff(const TYPE& v) {
    TYPE ch(::cosh((long double)v));
    return 1/(ch*ch);
  }
}

template
    <typename TYPE,
    size_t INPUT_LAYERS,
    size_t OUTPUT_LAYERS,
    size_t HIDDEN_LAYERS = INPUT_LAYERS+OUTPUT_LAYERS,
    size_t HIDDEN_LAYER_SIZE = INPUT_LAYERS+OUTPUT_LAYERS,
    TYPE(*ACTIVATION)(const TYPE&) = math::tanh<TYPE>,
    TYPE(*ACTIVATION_DIFF)(const TYPE&)  = math::tanh_diff<TYPE>>
class NeuroNet {
  public:
    NeuroNet() {
    }
    Matrix<TYPE, 1, OUTPUT_LAYERS> operator()(const Matrix<TYPE, 1, INPUT_LAYERS>& in) {
      Matrix<TYPE, 1, HIDDEN_LAYER_SIZE> l((in*_wi).apply(ACTIVATION));
      for (int i(0); i<HIDDEN_LAYERS-1; ++i)
        l = (l*_wh[i]).apply(ACTIVATION);
      Matrix<TYPE, 1, OUTPUT_LAYERS> out((l*_wo).apply(ACTIVATION));
      return out;
    }
    Matrix<TYPE, 1, OUTPUT_LAYERS> learn(const Matrix<TYPE, 1, INPUT_LAYERS>& in,
                                         const Matrix<TYPE, 1, OUTPUT_LAYERS>& expect) {
      Matrix<TYPE, 1, OUTPUT_LAYERS> out((*this)(in));
      Matrix<TYPE, 1, OUTPUT_LAYERS> diff(expect-out);
      return diff;
    }
  private:
    Matrix<TYPE, INPUT_LAYERS, HIDDEN_LAYER_SIZE> _wi;
    Matrix<TYPE, HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE> _wh[HIDDEN_LAYERS-1];
    Matrix<TYPE, HIDDEN_LAYER_SIZE, OUTPUT_LAYERS> _wo;
};