Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BaseSearch and RandomSearch #25

Merged
merged 1 commit into from
Sep 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,12 @@ These functions can be used as benchmark tests for assessing the performance of
algorithm.

* :mod:`pyswarms.utils.functions.single_obj` - single-objective test functions

Search
~~~~~~

These search methods can be used to compare the relative performance of hyperparameter value combinations in reducing a specified objective function.

* :mod:`pyswarms.utils.search.grid_search` - exhaustive search of optimal performance on selected objective function over cartesian products of provided hyperparameter values

* :mod:`pyswarms.utils.search.random_search` - search for optimal performance on selected objective function over combinations of randomly selected hyperparameter values within specified bounds for specified number of selection iterations
5 changes: 5 additions & 0 deletions pyswarms/utils/search/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
The :mod:`pyswarms.utils.search` module implements various techniques in
hyperparameter value optimization.
"""

114 changes: 114 additions & 0 deletions pyswarms/utils/search/base_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*-

"""
Base class for hyperparameter optimization search functions.
"""

import operator as op
import itertools
import numpy as np

class SearchBase(object):
def __init__(self, optimizer, n_particles, dimensions, options,
objective_func, iters,
bounds=None, velocity_clamp=None):
"""Initializes the Search.

Attributes
----------
optimizer: pyswarms.single
either LocalBestPSO or GlobalBestPSO
n_particles : int
number of particles in the swarm.
dimensions : int
number of dimensions in the space.
options : dict with keys :code:`{'c1', 'c2', 'w', 'k', 'p'}`
a dictionary containing the parameters for the specific
optimization technique
* c1 : float
cognitive parameter
* c2 : float
social parameter
* w : float
inertia parameter
* k : int
number of neighbors to be considered. Must be a
positive integer less than :code:`n_particles`
* p: int {1,2}
the Minkowski p-norm to use. 1 is the
sum-of-absolute values (or L1 distance) while 2 is
the Euclidean (or L2) distance.
objective_func: function
objective function to be evaluated
iters: int
number of iterations
bounds : tuple of np.ndarray, optional (default is None)
a tuple of size 2 where the first entry is the minimum bound
while the second entry is the maximum bound. Each array must
be of shape :code:`(dimensions,)`.
velocity_clamp : tuple (default is :code:`None`)
a tuple of size 2 where the first entry is the minimum velocity
and the second entry is the maximum velocity. It
sets the limits for velocity clamping.
"""

# Assign attributes
self.optimizer = optimizer
self.n_particles = n_particles
self.dims = dimensions
self.options = options
self.bounds = bounds
self.vclamp = velocity_clamp
self.objective_func = objective_func
self.iters = iters

def generate_score(self, options):
"""Generates score for optimizer's performance on objective function.

Parameters
----------

options: dict
a dict of 5 hyperparameter values ('c1', 'c2', 'w', 'k', 'p'
"""

#Intialize optimizer
f = self.optimizer(self.n_particles, self.dims, options,
self.bounds, self.vclamp)

#Return score
return f.optimize(self.objective_func, self.iters)[0]

def search(self, maximum=False):
"""Compares optimizer's objective function performance scores
for all combinations of provided parameters.

Parameters
----------

maximum: bool
a bool defaulting to False, returning the minimum value for the
objective function. If set to True, will return the maximum value
for the objective function.
"""

#Assign parameter keys
params = self.options.keys()

#Generate the grid of all hyperparameter value combinations
grid = self.generate_grid()

#Calculate scores for all hyperparameter combinations
scores = [self.generate_score(i) for i in grid]

#Default behavior
idx, self.best_score = min(enumerate(scores), key=op.itemgetter(1))

#Catches the maximum bool flag
if maximum:
idx, self.best_score = max(enumerate(scores), key=op.itemgetter(1))

#Return optimum hyperparameter value property from grid using index
self.best_options = op.itemgetter(idx)(grid)
return self.best_score, self.best_options

104 changes: 15 additions & 89 deletions pyswarms/utils/search/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
Hyperparameter grid search.

Compares the relative performance of hyperparameter value combinations in
reducing a specified objective function.
optimizing a specified objective function.

For each hyperparameter, user can provide either a single value or a list
of possible values, and their cartesian product is taken to produce a grid
of all possible combinations. These combinations are then tested to produce
a list of objective function scores. The default of the optimize method
returns the hyperparameters that yield the minimum score, yet maximum score
can also be evaluated.
of possible values. The cartesian products of these hyperparameters are taken
to produce a grid of all possible combinations. These combinations are then
tested to produce a list of objective function scores. The search method
default returns the minimum objective function score and hyperparameters that
yield the minimum score, yet maximum score can also be evaluated.

Parameters
----------
Expand All @@ -36,9 +36,9 @@
'p' : 1}
>>> g = GridSearch(LocalBestPSO, n_particles=40, dimensions=20,
options=options, objective_func=sphere_func, iters=10)
>>> best_score, best_options = g.optimize()
>>> best_score, best_options = g.search()
>>> best_score
301.418815268
0.498641604188
>>> best_options['c1']
1
>>> best_options['c2']
Expand All @@ -48,62 +48,20 @@
import operator as op
import itertools
import numpy as np
from pyswarms.utils.search.base_search import SearchBase

class GridSearch(object):
class GridSearch(SearchBase):
"""Exhaustive search of optimal performance on selected objective function
over all combinations of specified hyperparameter values."""
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm... I may be wrong or my GitHub is not working properly, but does this need to have an __init__ method similar to RandomSearch()? 😕 😄

You also mentioned about the possibility that GridSearch will take so long when we're looking for a lot of hyperparameters and take too many iterations 👍 . Maybe we can add a logger.warn? check the implementations in GlobalBestPSO and LocalBestPSO.

There is a logger.info('My information') call in place of print() functions. I also initialized it with logger = logging.getlogger(__name__). You can do the same thing but call logger.warn() instead.

Thus we can have something like:

if condition_is_met:
    logger.warn('Using GridSearch will take so much time') 
# of course you can write a better warning than mine hehe

This will still run a search, only with an additional warning.

Copy link
Collaborator Author

@SioKCronin SioKCronin Sep 12, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha! Yes, I think I thought I was being clever because if no new attributes are added in the GridSearch __init__ it could just just inherit SearchBase's __init__, which would be retrieved when creating a GridSearch instance. But looking at it now, I think it's clearer to call the super().__init__ explicitly.

+  57     def __init__(self, optimizer, n_particles, dimensions, options,
+  58                  objective_func, iters,bounds=None, velocity_clamp=None):
+  59         """Initializes the paramsearch."""
+  60
+  61         # Assign attributes
+  62         super().__init__(optimizer, n_particles, dimensions, options,
+  63                 objective_func, iters, bounds=bounds,
+  64                 velocity_clamp=velocity_clamp)

Copy link
Collaborator Author

@SioKCronin SioKCronin Sep 12, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing me to 'logger.warning'. In looking at search I realized I'm not sure how much of an issue runtime will be, even for GridSearch with lots of hyperparameter options. My hunch was that runtime on big search spaces would need to be flagged for users, but I don't have a good sense of when a user would input a really long list for each of the 'c' weights, for instance.

What I'd like to do next is write up an example for the docs on Tuning PSO hyperparameters that shows these two methods in action, and use that writing as a chance to read the literature on conventions for setting PSO hyperparameters. I think this will help me have a better sense of kinds of situations might come up. 📚

What I can see happening is including logger.warn, but also having clear documentation that speaks to the conventions for setting options (and perhaps resources linked in the Example) that would steer users away from setting search values that don't quite make sense, and might take too long to run.

Copy link
Owner

@ljvmiranda921 ljvmiranda921 Sep 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I realized that it's kinda hard to pinpoint when to issue a warning based on user input. I believe your plan is good for now 👍

Regarding the __init__, the reason why I often add the super() is that it enables me to pass defaults into the base class. So just in case I want defaults in GridSearch etc., I just declare them on GridSearch's init, then do the super. But for now i guess this implementation is fine


def __init__(self, optimizer, n_particles, dimensions, options,
objective_func, iters, bounds=None, velocity_clamp=None):
"""Initializes the GridSearch.

Attributes
----------
optimizer: PySwarms class
either LocalBestPSO or GlobalBestPSO
n_particles : int
number of particles in the swarm.
dimensions : int
number of dimensions in the space.
options : dict with keys :code:`{'c1', 'c2', 'w', 'k', 'p'}`
a dictionary containing the parameters for the specific
optimization technique
* c1 : float
cognitive parameter
* c2 : float
social parameter
* w : float
inertia parameter
* k : int
number of neighbors to be considered. Must be a
positive integer less than :code:`n_particles`
* p: int {1,2}
the Minkowski p-norm to use. 1 is the
sum-of-absolute values (or L1 distance) while 2 is
the Euclidean (or L2) distance.
objective_func: function
objective function to be evaluated
iters: int
number of iterations
bounds : tuple of np.ndarray, optional (default is None)
a tuple of size 2 where the first entry is the minimum bound
while the second entry is the maximum bound. Each array must
be of shape :code:`(dimensions,)`.
velocity_clamp : tuple (default is :code:`None`)
a tuple of size 2 where the first entry is the minimum velocity
and the second entry is the maximum velocity. It
sets the limits for velocity clamping.
"""
objective_func, iters,bounds=None, velocity_clamp=None):
"""Initializes the paramsearch."""

# Assign attributes
self.optimizer = optimizer
self.n_particles = n_particles
self.dims = dimensions
self.options = options
self.bounds = bounds
self.vclamp = velocity_clamp
self.objective_func = objective_func
self.iters = iters
super().__init__(optimizer, n_particles, dimensions, options,
objective_func, iters, bounds=bounds,
velocity_clamp=velocity_clamp)

def generate_grid(self):
"""Generates the grid of all hyperparameter value combinations."""
Expand All @@ -119,35 +77,3 @@ def generate_grid(self):
#Return list of dicts for all hyperparameter value combinations
return [dict(zip(*[params, list(x)])) for x in list_of_products]

def generate_score(self, options):
"""Generates score for optimizer's performance on objective function."""

#Intialize optimizer
f = self.optimizer(self.n_particles, self.dims, options,
self.bounds, self.vclamp)

#Return score
return f.optimize(self.objective_func, self.iters)[0]

def search(self, maximum=False):
"""Compares optimizer's objective function performance scores
for all combinations of provided parameters."""

#Assign parameter keys
params = self.options.keys()

#Generate the grid of all hyperparameter value combinations
grid = self.generate_grid()

#Calculate scores for all hyperparameter combinations
scores = [self.generate_score(i) for i in grid]

#Select optimization function
if maximum:
idx, self.best_score = max(enumerate(scores), key=op.itemgetter(1))
else:
idx, self.best_score = min(enumerate(scores), key=op.itemgetter(1))

#Return optimum hyperparameter value property from grid using index
self.best_options = op.itemgetter(idx)(grid)
return self.best_score, self.best_options
109 changes: 109 additions & 0 deletions pyswarms/utils/search/random_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-

"""
Hyperparameter random search.

Compares the relative performance of combinations of randomly generated
hyperparameter values in optimizing a specified objective function.

User provides lists of bounds for the uniform random value generation of
'c1', 'c2', and 'w', and the random integer value generation of 'k'.
Combinations of values are generated for the number of iterations specified,
and the generated grid of combinations is used in the search method to find
the optimal parameters for the objective function. The search method default
returns the minimum objective function score and hyperparameters that yield
the minimum score, yet maximum score can also be evaluated.

Parameters
----------
* c1 : float
cognitive parameter
* c2 : float
social parameter
* w : float
inertia parameter
* k : int
number of neighbors to be considered. Must be a
positive integer less than `n_particles`
* p: int {1,2}
the Minkowski p-norm to use. 1 is the
sum-of-absolute values (or L1 distance) while 2 is
the Euclidean (or L2) distance.

>>> options = {'c1': [1, 5],
'c2': [6, 10],
'w' : [2, 5],
'k' : [11, 15],
'p' : 1}
>>> g = RandomSearch(LocalBestPSO, n_particles=40, dimensions=20,
options=options, objective_func=sphere_func, iters=10)
>>> best_score, best_options = g.search()
>>> best_score
1.41978545901
>>> best_options['c1']
1.543556887693
>>> best_options['c2']
9.504769054771
"""

import operator as op
import itertools
import numpy as np
import random
from pyswarms.utils.search.base_search import SearchBase

from past.builtins import xrange

class RandomSearch(SearchBase):
"""Search of optimal performance on selected objective function
over combinations of randomly selected hyperparameter values
within specified bounds for specified number of selection iterations."""

def __init__(self, optimizer, n_particles, dimensions, options,
objective_func, iters, n_selection_iters,
bounds=None, velocity_clamp=None):
"""Initializes the paramsearch.

Attributes
----------
n_selection_iters: int
number of iterations of random parameter selection
"""

# Assign attributes
super().__init__(optimizer, n_particles, dimensions, options,
objective_func, iters, bounds=bounds,
velocity_clamp=velocity_clamp)
self.n_selection_iters = n_selection_iters

def generate_grid(self):
"""Generates the grid of hyperparameter value combinations."""

options = dict(self.options)
params = {}

#Remove 'p' to hold as a constant in the paramater combinations
p = options.pop('p')
params['p'] = [p for _ in xrange(self.n_selection_iters)]

#Assign generators based on parameter type
param_generators = {
'c1': np.random.uniform,
'c2': np.random.uniform,
'w': np.random.uniform,
'k': np.random.randint
}

#Generate random values for hyperparameters 'c1', 'c2', 'w', and 'k'
for idx, bounds in options.items():
params[idx] = param_generators[idx](
*bounds, size=self.n_selection_iters)

#Return list of dicts of hyperparameter combinations
return [{'c1': params['c1'][i],
'c2': params['c2'][i],
'w': params['w'][i],
'k': params['k'][i],
'p': params['p'][i]}
for i in xrange(self.n_selection_iters)]

Loading