From 96f7cbee5a80beb9eea9648da70916dc54a4f1a0 Mon Sep 17 00:00:00 2001 From: SioKCronin Date: Sun, 10 Sep 2017 09:54:55 -0700 Subject: [PATCH] Add BaseSearch and RandomSearch This commit adds a BaseSearch class for GridSearch and RandomSearch, which defines an __init__ that sets all attributes for instances as well as score and search methods. This commit also adds RandomSearch, which provides search for optimal performance on objective function over combinations of hyperparameter values with specified bounds for specified number of combination iterations. This commit includes 'xrange' functionality for RandomSearch to support Python 2.7 Author: SioKCronin Email: siobhankcronin@gmail.com --- docs/features.rst | 9 ++ pyswarms/utils/search/__init__.py | 5 ++ pyswarms/utils/search/base_search.py | 114 ++++++++++++++++++++++++ pyswarms/utils/search/grid_search.py | 104 ++++----------------- pyswarms/utils/search/random_search.py | 109 ++++++++++++++++++++++ tests/utils/search/test_gridsearch.py | 6 +- tests/utils/search/test_randomsearch.py | 71 +++++++++++++++ 7 files changed, 326 insertions(+), 92 deletions(-) create mode 100644 pyswarms/utils/search/__init__.py create mode 100644 pyswarms/utils/search/base_search.py create mode 100644 pyswarms/utils/search/random_search.py create mode 100644 tests/utils/search/test_randomsearch.py diff --git a/docs/features.rst b/docs/features.rst index 85289761..d859aab9 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -38,3 +38,12 @@ These functions can be used as benchmark tests for assessing the performance of algorithm. * :mod:`pyswarms.utils.functions.single_obj` - single-objective test functions + +Search +~~~~~~ + +These search methods can be used to compare the relative performance of hyperparameter value combinations in reducing a specified objective function. + +* :mod:`pyswarms.utils.search.grid_search` - exhaustive search of optimal performance on selected objective function over cartesian products of provided hyperparameter values + +* :mod:`pyswarms.utils.search.random_search` - search for optimal performance on selected objective function over combinations of randomly selected hyperparameter values within specified bounds for specified number of selection iterations diff --git a/pyswarms/utils/search/__init__.py b/pyswarms/utils/search/__init__.py new file mode 100644 index 00000000..ca62bc3d --- /dev/null +++ b/pyswarms/utils/search/__init__.py @@ -0,0 +1,5 @@ +""" +The :mod:`pyswarms.utils.search` module implements various techniques in +hyperparameter value optimization. +""" + diff --git a/pyswarms/utils/search/base_search.py b/pyswarms/utils/search/base_search.py new file mode 100644 index 00000000..8325e728 --- /dev/null +++ b/pyswarms/utils/search/base_search.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- + +""" +Base class for hyperparameter optimization search functions. +""" + +import operator as op +import itertools +import numpy as np + +class SearchBase(object): + def __init__(self, optimizer, n_particles, dimensions, options, + objective_func, iters, + bounds=None, velocity_clamp=None): + """Initializes the Search. + + Attributes + ---------- + optimizer: pyswarms.single + either LocalBestPSO or GlobalBestPSO + n_particles : int + number of particles in the swarm. + dimensions : int + number of dimensions in the space. + options : dict with keys :code:`{'c1', 'c2', 'w', 'k', 'p'}` + a dictionary containing the parameters for the specific + optimization technique + * c1 : float + cognitive parameter + * c2 : float + social parameter + * w : float + inertia parameter + * k : int + number of neighbors to be considered. Must be a + positive integer less than :code:`n_particles` + * p: int {1,2} + the Minkowski p-norm to use. 1 is the + sum-of-absolute values (or L1 distance) while 2 is + the Euclidean (or L2) distance. + objective_func: function + objective function to be evaluated + iters: int + number of iterations + bounds : tuple of np.ndarray, optional (default is None) + a tuple of size 2 where the first entry is the minimum bound + while the second entry is the maximum bound. Each array must + be of shape :code:`(dimensions,)`. + velocity_clamp : tuple (default is :code:`None`) + a tuple of size 2 where the first entry is the minimum velocity + and the second entry is the maximum velocity. It + sets the limits for velocity clamping. + """ + + # Assign attributes + self.optimizer = optimizer + self.n_particles = n_particles + self.dims = dimensions + self.options = options + self.bounds = bounds + self.vclamp = velocity_clamp + self.objective_func = objective_func + self.iters = iters + + def generate_score(self, options): + """Generates score for optimizer's performance on objective function. + + Parameters + ---------- + + options: dict + a dict of 5 hyperparameter values ('c1', 'c2', 'w', 'k', 'p' + """ + + #Intialize optimizer + f = self.optimizer(self.n_particles, self.dims, options, + self.bounds, self.vclamp) + + #Return score + return f.optimize(self.objective_func, self.iters)[0] + + def search(self, maximum=False): + """Compares optimizer's objective function performance scores + for all combinations of provided parameters. + + Parameters + ---------- + + maximum: bool + a bool defaulting to False, returning the minimum value for the + objective function. If set to True, will return the maximum value + for the objective function. + """ + + #Assign parameter keys + params = self.options.keys() + + #Generate the grid of all hyperparameter value combinations + grid = self.generate_grid() + + #Calculate scores for all hyperparameter combinations + scores = [self.generate_score(i) for i in grid] + + #Default behavior + idx, self.best_score = min(enumerate(scores), key=op.itemgetter(1)) + + #Catches the maximum bool flag + if maximum: + idx, self.best_score = max(enumerate(scores), key=op.itemgetter(1)) + + #Return optimum hyperparameter value property from grid using index + self.best_options = op.itemgetter(idx)(grid) + return self.best_score, self.best_options + diff --git a/pyswarms/utils/search/grid_search.py b/pyswarms/utils/search/grid_search.py index eb712aa0..44ef7c75 100644 --- a/pyswarms/utils/search/grid_search.py +++ b/pyswarms/utils/search/grid_search.py @@ -4,14 +4,14 @@ Hyperparameter grid search. Compares the relative performance of hyperparameter value combinations in -reducing a specified objective function. +optimizing a specified objective function. For each hyperparameter, user can provide either a single value or a list -of possible values, and their cartesian product is taken to produce a grid -of all possible combinations. These combinations are then tested to produce -a list of objective function scores. The default of the optimize method -returns the hyperparameters that yield the minimum score, yet maximum score -can also be evaluated. +of possible values. The cartesian products of these hyperparameters are taken +to produce a grid of all possible combinations. These combinations are then +tested to produce a list of objective function scores. The search method +default returns the minimum objective function score and hyperparameters that +yield the minimum score, yet maximum score can also be evaluated. Parameters ---------- @@ -36,9 +36,9 @@ 'p' : 1} >>> g = GridSearch(LocalBestPSO, n_particles=40, dimensions=20, options=options, objective_func=sphere_func, iters=10) ->>> best_score, best_options = g.optimize() +>>> best_score, best_options = g.search() >>> best_score -301.418815268 +0.498641604188 >>> best_options['c1'] 1 >>> best_options['c2'] @@ -48,62 +48,20 @@ import operator as op import itertools import numpy as np +from pyswarms.utils.search.base_search import SearchBase -class GridSearch(object): +class GridSearch(SearchBase): """Exhaustive search of optimal performance on selected objective function over all combinations of specified hyperparameter values.""" def __init__(self, optimizer, n_particles, dimensions, options, - objective_func, iters, bounds=None, velocity_clamp=None): - """Initializes the GridSearch. - - Attributes - ---------- - optimizer: PySwarms class - either LocalBestPSO or GlobalBestPSO - n_particles : int - number of particles in the swarm. - dimensions : int - number of dimensions in the space. - options : dict with keys :code:`{'c1', 'c2', 'w', 'k', 'p'}` - a dictionary containing the parameters for the specific - optimization technique - * c1 : float - cognitive parameter - * c2 : float - social parameter - * w : float - inertia parameter - * k : int - number of neighbors to be considered. Must be a - positive integer less than :code:`n_particles` - * p: int {1,2} - the Minkowski p-norm to use. 1 is the - sum-of-absolute values (or L1 distance) while 2 is - the Euclidean (or L2) distance. - objective_func: function - objective function to be evaluated - iters: int - number of iterations - bounds : tuple of np.ndarray, optional (default is None) - a tuple of size 2 where the first entry is the minimum bound - while the second entry is the maximum bound. Each array must - be of shape :code:`(dimensions,)`. - velocity_clamp : tuple (default is :code:`None`) - a tuple of size 2 where the first entry is the minimum velocity - and the second entry is the maximum velocity. It - sets the limits for velocity clamping. - """ + objective_func, iters,bounds=None, velocity_clamp=None): + """Initializes the paramsearch.""" # Assign attributes - self.optimizer = optimizer - self.n_particles = n_particles - self.dims = dimensions - self.options = options - self.bounds = bounds - self.vclamp = velocity_clamp - self.objective_func = objective_func - self.iters = iters + super().__init__(optimizer, n_particles, dimensions, options, + objective_func, iters, bounds=bounds, + velocity_clamp=velocity_clamp) def generate_grid(self): """Generates the grid of all hyperparameter value combinations.""" @@ -119,35 +77,3 @@ def generate_grid(self): #Return list of dicts for all hyperparameter value combinations return [dict(zip(*[params, list(x)])) for x in list_of_products] - def generate_score(self, options): - """Generates score for optimizer's performance on objective function.""" - - #Intialize optimizer - f = self.optimizer(self.n_particles, self.dims, options, - self.bounds, self.vclamp) - - #Return score - return f.optimize(self.objective_func, self.iters)[0] - - def search(self, maximum=False): - """Compares optimizer's objective function performance scores - for all combinations of provided parameters.""" - - #Assign parameter keys - params = self.options.keys() - - #Generate the grid of all hyperparameter value combinations - grid = self.generate_grid() - - #Calculate scores for all hyperparameter combinations - scores = [self.generate_score(i) for i in grid] - - #Select optimization function - if maximum: - idx, self.best_score = max(enumerate(scores), key=op.itemgetter(1)) - else: - idx, self.best_score = min(enumerate(scores), key=op.itemgetter(1)) - - #Return optimum hyperparameter value property from grid using index - self.best_options = op.itemgetter(idx)(grid) - return self.best_score, self.best_options diff --git a/pyswarms/utils/search/random_search.py b/pyswarms/utils/search/random_search.py new file mode 100644 index 00000000..e509afe2 --- /dev/null +++ b/pyswarms/utils/search/random_search.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +""" +Hyperparameter random search. + +Compares the relative performance of combinations of randomly generated +hyperparameter values in optimizing a specified objective function. + +User provides lists of bounds for the uniform random value generation of +'c1', 'c2', and 'w', and the random integer value generation of 'k'. +Combinations of values are generated for the number of iterations specified, +and the generated grid of combinations is used in the search method to find +the optimal parameters for the objective function. The search method default +returns the minimum objective function score and hyperparameters that yield +the minimum score, yet maximum score can also be evaluated. + +Parameters +---------- +* c1 : float + cognitive parameter +* c2 : float + social parameter +* w : float + inertia parameter +* k : int + number of neighbors to be considered. Must be a + positive integer less than `n_particles` +* p: int {1,2} + the Minkowski p-norm to use. 1 is the + sum-of-absolute values (or L1 distance) while 2 is + the Euclidean (or L2) distance. + +>>> options = {'c1': [1, 5], + 'c2': [6, 10], + 'w' : [2, 5], + 'k' : [11, 15], + 'p' : 1} +>>> g = RandomSearch(LocalBestPSO, n_particles=40, dimensions=20, + options=options, objective_func=sphere_func, iters=10) +>>> best_score, best_options = g.search() +>>> best_score +1.41978545901 +>>> best_options['c1'] +1.543556887693 +>>> best_options['c2'] +9.504769054771 +""" + +import operator as op +import itertools +import numpy as np +import random +from pyswarms.utils.search.base_search import SearchBase + +from past.builtins import xrange + +class RandomSearch(SearchBase): + """Search of optimal performance on selected objective function + over combinations of randomly selected hyperparameter values + within specified bounds for specified number of selection iterations.""" + + def __init__(self, optimizer, n_particles, dimensions, options, + objective_func, iters, n_selection_iters, + bounds=None, velocity_clamp=None): + """Initializes the paramsearch. + + Attributes + ---------- + n_selection_iters: int + number of iterations of random parameter selection + """ + + # Assign attributes + super().__init__(optimizer, n_particles, dimensions, options, + objective_func, iters, bounds=bounds, + velocity_clamp=velocity_clamp) + self.n_selection_iters = n_selection_iters + + def generate_grid(self): + """Generates the grid of hyperparameter value combinations.""" + + options = dict(self.options) + params = {} + + #Remove 'p' to hold as a constant in the paramater combinations + p = options.pop('p') + params['p'] = [p for _ in xrange(self.n_selection_iters)] + + #Assign generators based on parameter type + param_generators = { + 'c1': np.random.uniform, + 'c2': np.random.uniform, + 'w': np.random.uniform, + 'k': np.random.randint + } + + #Generate random values for hyperparameters 'c1', 'c2', 'w', and 'k' + for idx, bounds in options.items(): + params[idx] = param_generators[idx]( + *bounds, size=self.n_selection_iters) + + #Return list of dicts of hyperparameter combinations + return [{'c1': params['c1'][i], + 'c2': params['c2'][i], + 'w': params['w'][i], + 'k': params['k'][i], + 'p': params['p'][i]} + for i in xrange(self.n_selection_iters)] + diff --git a/tests/utils/search/test_gridsearch.py b/tests/utils/search/test_gridsearch.py index 3d8729aa..cf3e57f3 100644 --- a/tests/utils/search/test_gridsearch.py +++ b/tests/utils/search/test_gridsearch.py @@ -28,8 +28,8 @@ def setUp(self): self.iters = 10 self.objective_func = sphere_func - def test_optimize(self): - """Tests if the optimize method returns expected values.""" + def test_search(self): + """Tests if the search method returns expected values.""" g = GridSearch(self.optimizer, self.n_particles, self.dimensions, self.options, self.objective_func, self.iters, bounds=None, velocity_clamp=None) @@ -37,7 +37,7 @@ def test_optimize(self): minimum_best_score, minimum_best_options = g.search() maximum_best_score, maximum_best_options = g.search(maximum=True) - # Test method returns a dict + # Test search method returns a dict self.assertEqual(type(minimum_best_options), dict) self.assertEqual(type(maximum_best_options), dict) diff --git a/tests/utils/search/test_randomsearch.py b/tests/utils/search/test_randomsearch.py new file mode 100644 index 00000000..8af3c208 --- /dev/null +++ b/tests/utils/search/test_randomsearch.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" Unit testing for pyswarms.random_search""" + +# Import modules +import unittest +import numpy as np + +from pyswarms.utils.search.random_search import RandomSearch +from pyswarms.single import LocalBestPSO +from pyswarms.single import GlobalBestPSO +from pyswarms.utils.functions.single_obj import sphere_func + +class TestRandomSearch(unittest.TestCase): + + def setUp(self): + """Sets up test fixtures""" + self.optimizer = LocalBestPSO + self.n_particles = 40 + self.dimensions = 20 + self.options = {'c1': [1, 5], + 'c2': [6, 10], + 'k' : [11, 15], + 'w' : [0.4, 0.9], + 'p' : 1} + self.bounds = (np.array([-5,-5]), np.array([5,5])) + self.iters = 10 + self.n_selection_iters = 100 + self.objective_func = sphere_func + + def test_generate_grid(self): + """Tests if generate_grid function returns expected values.""" + g = RandomSearch(self.optimizer, self.n_particles, self.dimensions, + self.options, self.objective_func, self.iters, + self.n_selection_iters, + self.bounds, velocity_clamp=None) + + #Test that the number of combinations in grid equals + #the number parameter selection iterations specficied + grid = g.generate_grid() + self.assertEqual(len(grid), self.n_selection_iters) + + #Test that generated values are correctly mapped to each parameter + #and are within the specified bounds + for i in ['c1','c2','k','w']: + values = [x[i] for x in grid] + for j in values: + self.assertGreaterEqual(j, self.options[i][0]) + self.assertLessEqual(j, self.options[i][1]) + + def test_search(self): + """Tests if the search method returns expected values.""" + g = RandomSearch(self.optimizer, self.n_particles, self.dimensions, + self.options, self.objective_func, self.iters, + self.n_selection_iters, + bounds=None, velocity_clamp=None) + + minimum_best_score, minimum_best_options = g.search() + maximum_best_score, maximum_best_options = g.search(maximum=True) + + # Test method returns a dict + self.assertEqual(type(minimum_best_options), dict) + self.assertEqual(type(maximum_best_options), dict) + + # The scores could be equal, but for our test case the + # max score is greater than the min. + self.assertGreater(maximum_best_score, minimum_best_score) + +if __name__ == '__main__': + unittest.main()