From 02e1fd98c43a13a3966a8d9fb9c9fbd24ee99879 Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <lester.miranda@obf.ateneo.edu>
Date: Mon, 31 Jul 2017 14:03:55 +0900
Subject: [PATCH] Update example on training a neural network (#16)

This is an update to the previous commit regarding the documentation.
A use-case example for training a neural network is now implemented
as a Jupyter Notebook and as an .reST file in ReadTheDocs.

Also, a small stylistic change was done in asserting membership, if
we are to check if a certain key is in a dictionary, we now do the
following

  >>> if <set-of-keys> not in <required-set-of-keys>

Before, we are doing a `if not <set-of-keys> in`, which is
grammatically awkward. The new call sounds more natural and idiomatic.

Author: ljvmiranda921
---
 README.rst                                    |  11 +-
 docs/examples/train_neural_network.rst        | 270 ++++++++++++++
 docs/examples/usecases.rst                    |   3 +-
 docs/index.rst                                |  11 +-
 .../train_neural_network-checkpoint.ipynb     | 348 ++++++++++++++++++
 examples/train_neural_network.ipynb           | 348 ++++++++++++++++++
 pyswarms/single/lb.py                         |   2 +-
 7 files changed, 987 insertions(+), 6 deletions(-)
 create mode 100644 docs/examples/train_neural_network.rst
 create mode 100644 examples/.ipynb_checkpoints/train_neural_network-checkpoint.ipynb
 create mode 100644 examples/train_neural_network.ipynb
diff --git a/README.rst b/README.rst
index 13b7d651..919f673a 100644
--- a/README.rst
+++ b/README.rst
@@ -3,8 +3,8 @@ PySwarms
 ========
 
 
-.. image:: https://img.shields.io/pypi/v/pyswarms.svg
-        :target: https://pypi.python.org/pypi/pyswarms
+.. image:: https://badge.fury.io/py/pyswarms.svg
+        :target: https://badge.fury.io/py/pyswarms
 
 .. image:: https://img.shields.io/travis/ljvmiranda921/pyswarms.svg
         :target: https://travis-ci.org/ljvmiranda921/pyswarms
@@ -13,10 +13,17 @@ PySwarms
         :target: https://pyswarms.readthedocs.io/en/latest/?badge=latest
         :alt: Documentation Status
 
+.. image:: https://landscape.io/github/ljvmiranda921/pyswarms/master/landscape.svg?style=flat
+   :target: https://landscape.io/github/ljvmiranda921/pyswarms/master
+   :alt: Code Health
+
 .. image:: https://pyup.io/repos/github/ljvmiranda921/pyswarms/shield.svg
      :target: https://pyup.io/repos/github/ljvmiranda921/pyswarms/
      :alt: Updates
 
+.. image:: https://img.shields.io/badge/license-MIT-blue.svg   
+     :target: https://raw.githubusercontent.com/ljvmiranda921/pyswarms/master/LICENSE
+
 
 PySwarms is a simple, Python-based, Particle Swarm Optimization (PSO) library.
 
diff --git a/docs/examples/train_neural_network.rst b/docs/examples/train_neural_network.rst
new file mode 100644
index 00000000..2ea4650a
--- /dev/null
+++ b/docs/examples/train_neural_network.rst
@@ -0,0 +1,270 @@
+
+Training a Neural Network
+=========================
+
+In this example, we'll be training a neural network using particle swarm
+optimization. For this we'll be using the standard global-best PSO
+``pyswarms.single.GBestPSO`` for optimizing the network's weights and
+biases. This aims to demonstrate how the API is capable of handling
+custom-defined functions.
+
+For this example, we'll try to classify the three iris species in the
+Iris Dataset.
+
+.. code-block:: python
+
+    # Import modules
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from sklearn.datasets import load_iris
+    
+    
+    # Import PySwarms
+    import pyswarms as ps
+    
+    # Some more magic so that the notebook will reload external python modules;
+    # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
+    %load_ext autoreload
+    %autoreload 2
+
+First, we'll load the dataset from ``scikit-learn``. The Iris Dataset
+contains 3 classes for each of the iris species (*iris setosa*, *iris
+virginica*, and *iris versicolor*). It has 50 samples per class with 150
+samples in total, making it a very balanced dataset. Each sample is
+characterized by four features (or dimensions): sepal length, sepal
+width, petal length, petal width.
+
+.. code-block:: python
+
+    # Load the iris dataset
+    data = load_iris()
+    
+    # Store the features as X and the labels as y
+    X = data.data
+    y = data.target
+
+Constructing a custom objective function
+----------------------------------------
+
+Recall that neural networks can simply be seen as a mapping function
+from one space to another. For now, we'll build a simple neural network
+with the following characteristics: 
+
+* Input layer size: 4 
+
+* Hidden layer size: 20 (activation: :math:`\tanh(x)`)
+
+* Output layer size: 3 (activation: :math:`softmax(x)`)
+
+Things we'll do: 
+
+1. Create a ``forward_prop`` method that will do forward propagation for one particle.
+
+2. Create an overhead objective function ``f()`` that will compute ``forward_prop()`` for the whole swarm.
+
+What we'll be doing then is to create a swarm with a number of
+dimensions equal to the weights and biases. We will **unroll** these
+parameters into an n-dimensional array, and have each particle take on
+different values. Thus, each particle represents a candidate neural
+network with its own weights and bias. When feeding back to the network,
+we will reconstruct the learned weights and biases.
+
+When rolling-back the parameters into weights and biases, it is useful
+to recall the shape and bias matrices: 
+
+* Shape of input-to-hidden weight matrix: (4, 20)
+
+* Shape of input-to-hidden bias array: (20, )
+
+* Shape of hidden-to-output weight matrix: (20, 3)
+
+* Shape of hidden-to-output bias array: (3, )
+
+By unrolling them together, we have
+:math:`(4 * 20) + (20 * 3) + 20 + 3 = 163` parameters, or 163 dimensions
+for each particle in the swarm.
+
+The negative log-likelihood will be used to compute for the error
+between the ground-truth values and the predictions. Also, because PSO
+doesn't rely on the gradients, we'll not be performing backpropagation
+(this may be a good thing or bad thing under some circumstances).
+
+Now, let's write the forward propagation procedure as our objective
+function. Let :math:`X` be the input, :math:`z_l` the pre-activation at
+layer :math:`l`, and :math:`a_l` the activation for layer :math:`l`:
+
+.. code-block:: python
+
+    # Forward propagation
+    def forward_prop(params):
+        """Forward propagation as objective function
+        
+        This computes for the forward propagation of the neural network, as
+        well as the loss. It receives a set of parameters that must be 
+        rolled-back into the corresponding weights and biases.
+        
+        Inputs
+        ------
+        params: np.ndarray
+            The dimensions should include an unrolled version of the 
+            weights and biases.
+            
+        Returns
+        -------
+        float
+            The computed negative log-likelihood loss given the parameters
+        """
+        # Neural network architecture
+        n_inputs = 4
+        n_hidden = 20
+        n_classes = 3
+        
+        # Roll-back the weights and biases
+        W1 = params[0:80].reshape((n_inputs,n_hidden))
+        b1 = params[80:100].reshape((n_hidden,))
+        W2 = params[100:160].reshape((n_hidden,n_classes))
+        b2 = params[160:163].reshape((n_classes,))
+        
+        # Perform forward propagation
+        z1 = X.dot(W1) + b1  # Pre-activation in Layer 1
+        a1 = np.tanh(z1)     # Activation in Layer 1
+        z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2
+        logits = z2          # Logits for Layer 2
+        
+        # Compute for the softmax of the logits
+        exp_scores = np.exp(logits)
+        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) 
+        
+        # Compute for the negative log likelihood
+        N = 150 # Number of samples
+        corect_logprobs = -np.log(probs[range(N), y])
+        loss = np.sum(corect_logprobs) / N
+        
+        return loss
+    
+
+Now that we have a method to do forward propagation for one particle (or
+for one set of dimensions), we can then create a higher-level method to
+compute ``forward_prop()`` to the whole swarm:
+
+.. code-block:: python
+
+    def f(x):
+        """Higher-level method to do forward_prop in the 
+        whole swarm.
+        
+        Inputs
+        ------
+        x: numpy.ndarray of shape (n_particles, dims)
+            The swarm that will perform the search
+            
+        Returns
+        -------
+        numpy.ndarray of shape (n_particles, )
+            The computed loss for each particle
+        """
+        n_particles = x.shape[0]
+        j = [forward_prop(x[i]) for i in range(n_particles)]
+        return np.array(j)
+        
+
+Performing PSO on the custom-function
+-------------------------------------
+
+Now that everything has been set-up, we just call our global-best PSO
+and run the optimizer as usual. For now, we'll just set the PSO
+parameters arbitrarily.
+
+.. code-block:: python
+
+    # Initialize swarm
+    options = {'c1': 0.5, 'c2': 0.3, 'm':0.9}
+    
+    # Call instance of PSO with bounds argument
+    dims = (4 * 20) + (20 * 3) + 20 + 3 
+    optimizer = ps.single.GBestPSO(n_particles=100, dims=dims, **options)
+    
+    # Perform optimization
+    cost, pos = optimizer.optimize(f, print_step=100, iters=1000, verbose=3)
+
+
+.. parsed-literal::
+
+    Iteration 1/1000, cost: 1.11338932053
+    Iteration 101/1000, cost: 0.0541135752532
+    Iteration 201/1000, cost: 0.0468046270747
+    Iteration 301/1000, cost: 0.0434828849533
+    Iteration 401/1000, cost: 0.0358833340106
+    Iteration 501/1000, cost: 0.0312474981647
+    Iteration 601/1000, cost: 0.0150869267541
+    Iteration 701/1000, cost: 0.01267166403
+    Iteration 801/1000, cost: 0.00632312205821
+    Iteration 901/1000, cost: 0.00194080306565
+    ================================
+    Optimization finished!
+    Final cost: 0.0015
+    Best value: -0.356506 0.441392 -0.605476 0.620517 -0.156904 0.206396 ...
+    
+    
+
+Checking the accuracy
+---------------------
+
+We can then check the accuracy by performing forward propagation once
+again to create a set of predictions. Then it's only a simple matter of
+matching which one's correct or not. For the ``logits``, we take the
+``argmax``. Recall that the softmax function returns probabilities where
+the whole vector sums to 1. We just take the one with the highest
+probability then treat it as the network's prediction.
+
+Moreover, we let the best position vector found by the swarm be the
+weight and bias parameters of the network.
+
+.. code-block:: python
+
+    def predict(X, pos):
+        """
+        Use the trained weights to perform class predictions.
+        
+        Inputs
+        ------
+        X: numpy.ndarray
+            Input Iris dataset
+        pos: numpy.ndarray
+            Position matrix found by the swarm. Will be rolled
+            into weights and biases.
+        """
+        # Neural network architecture
+        n_inputs = 4
+        n_hidden = 20
+        n_classes = 3
+        
+        # Roll-back the weights and biases
+        W1 = pos[0:80].reshape((n_inputs,n_hidden))
+        b1 = pos[80:100].reshape((n_hidden,))
+        W2 = pos[100:160].reshape((n_hidden,n_classes))
+        b2 = pos[160:163].reshape((n_classes,))
+        
+        # Perform forward propagation
+        z1 = X.dot(W1) + b1  # Pre-activation in Layer 1
+        a1 = np.tanh(z1)     # Activation in Layer 1
+        z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2
+        logits = z2          # Logits for Layer 2
+        
+        y_pred = np.argmax(logits, axis=1)
+        return y_pred
+
+And from this we can just compute for the accuracy. We perform
+predictions, compare an equivalence to the ground-truth value ``y``, and
+get the mean.
+
+.. code-block:: python
+
+    (predict(X, pos) == y).mean()
+
+
+.. parsed-literal::
+
+    1.0
+
+
diff --git a/docs/examples/usecases.rst b/docs/examples/usecases.rst
index 02ceb90f..c1e88208 100644
--- a/docs/examples/usecases.rst
+++ b/docs/examples/usecases.rst
@@ -5,4 +5,5 @@ If you wish to check the actual Jupyter Notebooks, please go to this `link <http
 
 .. toctree::
 
-   basic_optimization
\ No newline at end of file
+   basic_optimization
+   train_neural_network
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 90ca22ac..a7f59cdd 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,8 +1,8 @@
 Welcome to PySwarms's documentation!
 ======================================
 
-.. image:: https://img.shields.io/pypi/v/pyswarms.svg
-        :target: https://pypi.python.org/pypi/pyswarms
+.. image:: https://badge.fury.io/py/pyswarms.svg
+        :target: https://badge.fury.io/py/pyswarms
 
 .. image:: https://img.shields.io/travis/ljvmiranda921/pyswarms.svg
         :target: https://travis-ci.org/ljvmiranda921/pyswarms
@@ -11,10 +11,17 @@ Welcome to PySwarms's documentation!
         :target: https://pyswarms.readthedocs.io/en/latest/?badge=latest
         :alt: Documentation Status
 
+.. image:: https://landscape.io/github/ljvmiranda921/pyswarms/master/landscape.svg?style=flat
+   :target: https://landscape.io/github/ljvmiranda921/pyswarms/master
+   :alt: Code Health
+
 .. image:: https://pyup.io/repos/github/ljvmiranda921/pyswarms/shield.svg
      :target: https://pyup.io/repos/github/ljvmiranda921/pyswarms/
      :alt: Updates
 
+.. image:: https://img.shields.io/badge/license-MIT-blue.svg   
+     :target: https://raw.githubusercontent.com/ljvmiranda921/pyswarms/master/LICENSE
+
 PySwarms is a simple, Python-based, Particle Swarm Optimization (PSO) library.
 
 * Free software: MIT license
diff --git a/examples/.ipynb_checkpoints/train_neural_network-checkpoint.ipynb b/examples/.ipynb_checkpoints/train_neural_network-checkpoint.ipynb
new file mode 100644
index 00000000..e48868a2
--- /dev/null
+++ b/examples/.ipynb_checkpoints/train_neural_network-checkpoint.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training a Neural Network\n",
+    "In this example, we'll be training a neural network using particle swarm optimization. For this we'll be using the standard global-best PSO `pyswarms.single.GBestPSO` for optimizing the network's weights and biases. This aims to demonstrate how the API is capable of handling custom-defined functions.\n",
+    "\n",
+    "For this example, we'll try to classify the three iris species in the Iris Dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Import modules\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.datasets import load_iris\n",
+    "\n",
+    "\n",
+    "# Import PySwarms\n",
+    "import pyswarms as ps\n",
+    "\n",
+    "# Some more magic so that the notebook will reload external python modules;\n",
+    "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, we'll load the dataset from `scikit-learn`. The Iris Dataset contains 3 classes for each of the iris species (_iris setosa_, _iris virginica_, and _iris versicolor_). It has 50 samples per class with 150 samples in total, making it a very balanced dataset. Each sample is characterized by four features (or dimensions): sepal length, sepal width, petal length, petal width."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Load the iris dataset\n",
+    "data = load_iris()\n",
+    "\n",
+    "# Store the features as X and the labels as y\n",
+    "X = data.data\n",
+    "y = data.target"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constructing a custom objective function\n",
+    "Recall that neural networks can simply be seen as a mapping function from one space to another. For now, we'll build a simple neural network with the following characteristics:\n",
+    "* Input layer size: 4\n",
+    "* Hidden layer size: 20 (activation: $\\tanh(x)$)\n",
+    "* Output layer size: 3 (activation: $softmax(x)$)\n",
+    "\n",
+    "Things we'll do:\n",
+    "1. Create a `forward_prop` method that will do forward propagation for one particle.\n",
+    "2. Create an overhead objective function `f()` that will compute `forward_prop()` for the whole swarm."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What we'll be doing then is to create a swarm with a number of dimensions equal to the weights and biases. We will __unroll__ these parameters into an n-dimensional array, and have each particle take on different values. Thus, each particle represents a candidate neural network with its own weights and bias. When feeding back to the network, we will reconstruct the learned weights and biases. \n",
+    "\n",
+    "When rolling-back the parameters into weights and biases, it is useful to recall the shape and bias matrices:\n",
+    "* Shape of input-to-hidden weight matrix: (4, 20)\n",
+    "* Shape of input-to-hidden bias array: (20, )\n",
+    "* Shape of hidden-to-output weight matrix: (20, 3)\n",
+    "* Shape of hidden-to-output bias array: (3, )\n",
+    "\n",
+    "By unrolling them together, we have $(4 * 20) + (20 * 3) + 20 + 3 = 163$ parameters, or 163 dimensions for each particle in the swarm.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The negative log-likelihood will be used to compute for the error between the ground-truth values and the predictions. Also, because PSO doesn't rely on the gradients, we'll not be performing backpropagation (this may be a good thing or bad thing under some circumstances).\n",
+    "\n",
+    "Now, let's write the forward propagation procedure as our objective function. Let $X$ be the input, $z_l$ the pre-activation at layer $l$, and $a_l$ the activation for layer $l$:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Forward propagation\n",
+    "def forward_prop(params):\n",
+    "    \"\"\"Forward propagation as objective function\n",
+    "    \n",
+    "    This computes for the forward propagation of the neural network, as\n",
+    "    well as the loss. It receives a set of parameters that must be \n",
+    "    rolled-back into the corresponding weights and biases.\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    params: np.ndarray\n",
+    "        The dimensions should include an unrolled version of the \n",
+    "        weights and biases.\n",
+    "        \n",
+    "    Returns\n",
+    "    -------\n",
+    "    float\n",
+    "        The computed negative log-likelihood loss given the parameters\n",
+    "    \"\"\"\n",
+    "    # Neural network architecture\n",
+    "    n_inputs = 4\n",
+    "    n_hidden = 20\n",
+    "    n_classes = 3\n",
+    "    \n",
+    "    # Roll-back the weights and biases\n",
+    "    W1 = params[0:80].reshape((n_inputs,n_hidden))\n",
+    "    b1 = params[80:100].reshape((n_hidden,))\n",
+    "    W2 = params[100:160].reshape((n_hidden,n_classes))\n",
+    "    b2 = params[160:163].reshape((n_classes,))\n",
+    "    \n",
+    "    # Perform forward propagation\n",
+    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
+    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
+    "    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
+    "    logits = z2          # Logits for Layer 2\n",
+    "    \n",
+    "    # Compute for the softmax of the logits\n",
+    "    exp_scores = np.exp(logits)\n",
+    "    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) \n",
+    "    \n",
+    "    # Compute for the negative log likelihood\n",
+    "    N = 150 # Number of samples\n",
+    "    corect_logprobs = -np.log(probs[range(N), y])\n",
+    "    loss = np.sum(corect_logprobs) / N\n",
+    "    \n",
+    "    return loss\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that we have a method to do forward propagation for one particle (or for one set of dimensions), we can then create a higher-level method to compute `forward_prop()` to the whole swarm:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def f(x):\n",
+    "    \"\"\"Higher-level method to do forward_prop in the \n",
+    "    whole swarm.\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    x: numpy.ndarray of shape (n_particles, dims)\n",
+    "        The swarm that will perform the search\n",
+    "        \n",
+    "    Returns\n",
+    "    -------\n",
+    "    numpy.ndarray of shape (n_particles, )\n",
+    "        The computed loss for each particle\n",
+    "    \"\"\"\n",
+    "    n_particles = x.shape[0]\n",
+    "    j = [forward_prop(x[i]) for i in range(n_particles)]\n",
+    "    return np.array(j)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Performing PSO on the custom-function\n",
+    "Now that everything has been set-up, we just call our global-best PSO and run the optimizer as usual. For now, we'll just set the PSO parameters arbitrarily."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Iteration 1/1000, cost: 1.11338932053\n",
+      "Iteration 101/1000, cost: 0.0541135752532\n",
+      "Iteration 201/1000, cost: 0.0468046270747\n",
+      "Iteration 301/1000, cost: 0.0434828849533\n",
+      "Iteration 401/1000, cost: 0.0358833340106\n",
+      "Iteration 501/1000, cost: 0.0312474981647\n",
+      "Iteration 601/1000, cost: 0.0150869267541\n",
+      "Iteration 701/1000, cost: 0.01267166403\n",
+      "Iteration 801/1000, cost: 0.00632312205821\n",
+      "Iteration 901/1000, cost: 0.00194080306565\n",
+      "================================\n",
+      "Optimization finished!\n",
+      "Final cost: 0.0015\n",
+      "Best value: -0.356506 0.441392 -0.605476 0.620517 -0.156904 0.206396 ...\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize swarm\n",
+    "options = {'c1': 0.5, 'c2': 0.3, 'm':0.9}\n",
+    "\n",
+    "# Call instance of PSO with bounds argument\n",
+    "dims = (4 * 20) + (20 * 3) + 20 + 3 \n",
+    "optimizer = ps.single.GBestPSO(n_particles=100, dims=dims, **options)\n",
+    "\n",
+    "# Perform optimization\n",
+    "cost, pos = optimizer.optimize(f, print_step=100, iters=1000, verbose=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking the accuracy\n",
+    "We can then check the accuracy by performing forward propagation once again to create a set of predictions. Then it's only a simple matter of matching which one's correct or not. For the `logits`, we take the `argmax`. Recall that the softmax function returns probabilities where the whole vector sums to 1. We just take the one with the highest probability then treat it as the network's prediction.\n",
+    "\n",
+    "Moreover, we let the best position vector found by the swarm be the weight and bias parameters of the network."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def predict(X, pos):\n",
+    "    \"\"\"\n",
+    "    Use the trained weights to perform class predictions.\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    X: numpy.ndarray\n",
+    "        Input Iris dataset\n",
+    "    pos: numpy.ndarray\n",
+    "        Position matrix found by the swarm. Will be rolled\n",
+    "        into weights and biases.\n",
+    "    \"\"\"\n",
+    "    # Neural network architecture\n",
+    "    n_inputs = 4\n",
+    "    n_hidden = 20\n",
+    "    n_classes = 3\n",
+    "    \n",
+    "    # Roll-back the weights and biases\n",
+    "    W1 = pos[0:80].reshape((n_inputs,n_hidden))\n",
+    "    b1 = pos[80:100].reshape((n_hidden,))\n",
+    "    W2 = pos[100:160].reshape((n_hidden,n_classes))\n",
+    "    b2 = pos[160:163].reshape((n_classes,))\n",
+    "    \n",
+    "    # Perform forward propagation\n",
+    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
+    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
+    "    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
+    "    logits = z2          # Logits for Layer 2\n",
+    "    \n",
+    "    y_pred = np.argmax(logits, axis=1)\n",
+    "    return y_pred"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And from this we can just compute for the accuracy. We perform predictions, compare an equivalence to the ground-truth value `y`, and get the mean."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(predict(X, pos) == y).mean()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/train_neural_network.ipynb b/examples/train_neural_network.ipynb
new file mode 100644
index 00000000..e48868a2
--- /dev/null
+++ b/examples/train_neural_network.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training a Neural Network\n",
+    "In this example, we'll be training a neural network using particle swarm optimization. For this we'll be using the standard global-best PSO `pyswarms.single.GBestPSO` for optimizing the network's weights and biases. This aims to demonstrate how the API is capable of handling custom-defined functions.\n",
+    "\n",
+    "For this example, we'll try to classify the three iris species in the Iris Dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Import modules\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.datasets import load_iris\n",
+    "\n",
+    "\n",
+    "# Import PySwarms\n",
+    "import pyswarms as ps\n",
+    "\n",
+    "# Some more magic so that the notebook will reload external python modules;\n",
+    "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, we'll load the dataset from `scikit-learn`. The Iris Dataset contains 3 classes for each of the iris species (_iris setosa_, _iris virginica_, and _iris versicolor_). It has 50 samples per class with 150 samples in total, making it a very balanced dataset. Each sample is characterized by four features (or dimensions): sepal length, sepal width, petal length, petal width."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Load the iris dataset\n",
+    "data = load_iris()\n",
+    "\n",
+    "# Store the features as X and the labels as y\n",
+    "X = data.data\n",
+    "y = data.target"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constructing a custom objective function\n",
+    "Recall that neural networks can simply be seen as a mapping function from one space to another. For now, we'll build a simple neural network with the following characteristics:\n",
+    "* Input layer size: 4\n",
+    "* Hidden layer size: 20 (activation: $\\tanh(x)$)\n",
+    "* Output layer size: 3 (activation: $softmax(x)$)\n",
+    "\n",
+    "Things we'll do:\n",
+    "1. Create a `forward_prop` method that will do forward propagation for one particle.\n",
+    "2. Create an overhead objective function `f()` that will compute `forward_prop()` for the whole swarm."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What we'll be doing then is to create a swarm with a number of dimensions equal to the weights and biases. We will __unroll__ these parameters into an n-dimensional array, and have each particle take on different values. Thus, each particle represents a candidate neural network with its own weights and bias. When feeding back to the network, we will reconstruct the learned weights and biases. \n",
+    "\n",
+    "When rolling-back the parameters into weights and biases, it is useful to recall the shape and bias matrices:\n",
+    "* Shape of input-to-hidden weight matrix: (4, 20)\n",
+    "* Shape of input-to-hidden bias array: (20, )\n",
+    "* Shape of hidden-to-output weight matrix: (20, 3)\n",
+    "* Shape of hidden-to-output bias array: (3, )\n",
+    "\n",
+    "By unrolling them together, we have $(4 * 20) + (20 * 3) + 20 + 3 = 163$ parameters, or 163 dimensions for each particle in the swarm.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The negative log-likelihood will be used to compute for the error between the ground-truth values and the predictions. Also, because PSO doesn't rely on the gradients, we'll not be performing backpropagation (this may be a good thing or bad thing under some circumstances).\n",
+    "\n",
+    "Now, let's write the forward propagation procedure as our objective function. Let $X$ be the input, $z_l$ the pre-activation at layer $l$, and $a_l$ the activation for layer $l$:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Forward propagation\n",
+    "def forward_prop(params):\n",
+    "    \"\"\"Forward propagation as objective function\n",
+    "    \n",
+    "    This computes for the forward propagation of the neural network, as\n",
+    "    well as the loss. It receives a set of parameters that must be \n",
+    "    rolled-back into the corresponding weights and biases.\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    params: np.ndarray\n",
+    "        The dimensions should include an unrolled version of the \n",
+    "        weights and biases.\n",
+    "        \n",
+    "    Returns\n",
+    "    -------\n",
+    "    float\n",
+    "        The computed negative log-likelihood loss given the parameters\n",
+    "    \"\"\"\n",
+    "    # Neural network architecture\n",
+    "    n_inputs = 4\n",
+    "    n_hidden = 20\n",
+    "    n_classes = 3\n",
+    "    \n",
+    "    # Roll-back the weights and biases\n",
+    "    W1 = params[0:80].reshape((n_inputs,n_hidden))\n",
+    "    b1 = params[80:100].reshape((n_hidden,))\n",
+    "    W2 = params[100:160].reshape((n_hidden,n_classes))\n",
+    "    b2 = params[160:163].reshape((n_classes,))\n",
+    "    \n",
+    "    # Perform forward propagation\n",
+    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
+    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
+    "    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
+    "    logits = z2          # Logits for Layer 2\n",
+    "    \n",
+    "    # Compute for the softmax of the logits\n",
+    "    exp_scores = np.exp(logits)\n",
+    "    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) \n",
+    "    \n",
+    "    # Compute for the negative log likelihood\n",
+    "    N = 150 # Number of samples\n",
+    "    corect_logprobs = -np.log(probs[range(N), y])\n",
+    "    loss = np.sum(corect_logprobs) / N\n",
+    "    \n",
+    "    return loss\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that we have a method to do forward propagation for one particle (or for one set of dimensions), we can then create a higher-level method to compute `forward_prop()` to the whole swarm:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def f(x):\n",
+    "    \"\"\"Higher-level method to do forward_prop in the \n",
+    "    whole swarm.\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    x: numpy.ndarray of shape (n_particles, dims)\n",
+    "        The swarm that will perform the search\n",
+    "        \n",
+    "    Returns\n",
+    "    -------\n",
+    "    numpy.ndarray of shape (n_particles, )\n",
+    "        The computed loss for each particle\n",
+    "    \"\"\"\n",
+    "    n_particles = x.shape[0]\n",
+    "    j = [forward_prop(x[i]) for i in range(n_particles)]\n",
+    "    return np.array(j)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Performing PSO on the custom-function\n",
+    "Now that everything has been set-up, we just call our global-best PSO and run the optimizer as usual. For now, we'll just set the PSO parameters arbitrarily."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Iteration 1/1000, cost: 1.11338932053\n",
+      "Iteration 101/1000, cost: 0.0541135752532\n",
+      "Iteration 201/1000, cost: 0.0468046270747\n",
+      "Iteration 301/1000, cost: 0.0434828849533\n",
+      "Iteration 401/1000, cost: 0.0358833340106\n",
+      "Iteration 501/1000, cost: 0.0312474981647\n",
+      "Iteration 601/1000, cost: 0.0150869267541\n",
+      "Iteration 701/1000, cost: 0.01267166403\n",
+      "Iteration 801/1000, cost: 0.00632312205821\n",
+      "Iteration 901/1000, cost: 0.00194080306565\n",
+      "================================\n",
+      "Optimization finished!\n",
+      "Final cost: 0.0015\n",
+      "Best value: -0.356506 0.441392 -0.605476 0.620517 -0.156904 0.206396 ...\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize swarm\n",
+    "options = {'c1': 0.5, 'c2': 0.3, 'm':0.9}\n",
+    "\n",
+    "# Call instance of PSO with bounds argument\n",
+    "dims = (4 * 20) + (20 * 3) + 20 + 3 \n",
+    "optimizer = ps.single.GBestPSO(n_particles=100, dims=dims, **options)\n",
+    "\n",
+    "# Perform optimization\n",
+    "cost, pos = optimizer.optimize(f, print_step=100, iters=1000, verbose=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking the accuracy\n",
+    "We can then check the accuracy by performing forward propagation once again to create a set of predictions. Then it's only a simple matter of matching which one's correct or not. For the `logits`, we take the `argmax`. Recall that the softmax function returns probabilities where the whole vector sums to 1. We just take the one with the highest probability then treat it as the network's prediction.\n",
+    "\n",
+    "Moreover, we let the best position vector found by the swarm be the weight and bias parameters of the network."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def predict(X, pos):\n",
+    "    \"\"\"\n",
+    "    Use the trained weights to perform class predictions.\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    X: numpy.ndarray\n",
+    "        Input Iris dataset\n",
+    "    pos: numpy.ndarray\n",
+    "        Position matrix found by the swarm. Will be rolled\n",
+    "        into weights and biases.\n",
+    "    \"\"\"\n",
+    "    # Neural network architecture\n",
+    "    n_inputs = 4\n",
+    "    n_hidden = 20\n",
+    "    n_classes = 3\n",
+    "    \n",
+    "    # Roll-back the weights and biases\n",
+    "    W1 = pos[0:80].reshape((n_inputs,n_hidden))\n",
+    "    b1 = pos[80:100].reshape((n_hidden,))\n",
+    "    W2 = pos[100:160].reshape((n_hidden,n_classes))\n",
+    "    b2 = pos[160:163].reshape((n_classes,))\n",
+    "    \n",
+    "    # Perform forward propagation\n",
+    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
+    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
+    "    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
+    "    logits = z2          # Logits for Layer 2\n",
+    "    \n",
+    "    y_pred = np.argmax(logits, axis=1)\n",
+    "    return y_pred"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And from this we can just compute for the accuracy. We perform predictions, compare an equivalence to the ground-truth value `y`, and get the mean."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(predict(X, pos) == y).mean()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pyswarms/single/lb.py b/pyswarms/single/lb.py
index 6b17fb33..d2fa82bd 100644
--- a/pyswarms/single/lb.py
+++ b/pyswarms/single/lb.py
@@ -95,7 +95,7 @@ def assertions(self):
 
         if not 0 <= self.k <= self.n_particles:
             raise ValueError('No. of neighbors must be between 0 and no. of particles.')
-        if not self.p in [1,2]:
+        if self.p not in [1,2]:
             raise ValueError('p-value should either be 1 (for L1/Minkowski) or 2 (for L2/Euclidean).')
 
     def __init__(self, n_particles, dims, bounds=None,