diff --git a/pydatastructs/linear_data_structures/__init__.py b/pydatastructs/linear_data_structures/__init__.py index 0bb6161de..b4ad7041b 100644 --- a/pydatastructs/linear_data_structures/__init__.py +++ b/pydatastructs/linear_data_structures/__init__.py @@ -27,6 +27,7 @@ brick_sort_parallel, heapsort, matrix_multiply_parallel, - counting_sort + counting_sort, + bucket_sort ) __all__.extend(algorithms.__all__) diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index 2b5a320f7..a2fd774af 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -10,7 +10,8 @@ 'brick_sort_parallel', 'heapsort', 'matrix_multiply_parallel', - 'counting_sort' + 'counting_sort', + 'bucket_sort', ] def _merge(array, sl, el, sr, er, end, comp): @@ -438,3 +439,110 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads): i, j).result() return C + +def _bucket_sort_helper(bucket: Array) -> Array: + for i in range(1, len(bucket)): + key = bucket[i] + j = i - 1 + while j >= 0 and bucket[j] > key: + bucket[j+1] = bucket[j] + j -= 1 + bucket[j+1] = key + return bucket + +def bucket_sort(array: Array, **kwargs) -> Array: + """ + Performs bucket sort on the given array. + + Parameters + ========== + + array: Array + The array which is to be sorted. + start: int + The starting index of the portion + which is to be sorted. + Optional, by default 0 + end: int + The ending index of the portion which + is to be sorted. + Optional, by default the index + of the last position filled. + + Returns + ======= + + output: Array + The sorted array. + + Examples + ======== + + >>> from pydatastructs import DynamicOneDimensionalArray as DODA, bucket_sort + >>> arr = DODA(int, [5, 78, 1, 0]) + >>> out = bucket_sort(arr) + >>> str(out) + "['0', '1', '5', '78']" + >>> arr.delete(2) + >>> out = bucket_sort(arr) + >>> str(out) + "['0', '1', '78']" + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Bucket_sort + + Note + ==== + + This function does not support custom comparators as is the case with + other sorting functions in this file. + The ouput array doesn't contain any `None` value. + """ + start = kwargs.get('start', 0) + end = kwargs.get('end', len(array) - 1) + + #Find maximum value in the list and use length of the list to determine which value in the list goes into which bucket + max_value = None + for i in range(start, end+1): + if array[i] is not None: + max_value = array[i] + + count = 0 + for i in range(start, end+1): + if array[i] is not None: + count += 1 + if array[i] > max_value: + max_value = array[i] + + number_of_null_values = end - start + 1 - count + size = max_value // count + + # Create n empty buckets where n is equal to the length of the input list + buckets_list = [[] for _ in range(count)] + + # Put list elements into different buckets based on the size + for i in range(start, end + 1): + if array[i] is not None: + j = array[i] // size + if j is not count: + buckets_list[j].append(array[i]) + else: + buckets_list[count-1].append(array[i]) + + # Sort elements within the buckets using Insertion Sort + for z in range(count): + _bucket_sort_helper(buckets_list[z]) + + # Concatenate buckets with sorted elements into a single array + sorted_list = [] + for x in range(count): + sorted_list.extend(buckets_list[x]) + for i in range(end, end - number_of_null_values, -1): + array[i] = None + for i in range(start, end - number_of_null_values + 1): + array[i] = sorted_list[i-start] + if _check_type(array, DynamicArray): + array._modify(force=True) + return array diff --git a/pydatastructs/linear_data_structures/tests/test_algorithms.py b/pydatastructs/linear_data_structures/tests/test_algorithms.py index 829e5b016..b220dee11 100644 --- a/pydatastructs/linear_data_structures/tests/test_algorithms.py +++ b/pydatastructs/linear_data_structures/tests/test_algorithms.py @@ -1,7 +1,7 @@ from pydatastructs import ( merge_sort_parallel, DynamicOneDimensionalArray, OneDimensionalArray, brick_sort, brick_sort_parallel, - heapsort, matrix_multiply_parallel, counting_sort) + heapsort, matrix_multiply_parallel, counting_sort, bucket_sort) from pydatastructs.utils.raises_util import raises import random @@ -53,6 +53,9 @@ def test_brick_sort_parallel(): def test_heapsort(): _test_common_sort(heapsort) +def test_bucket_sort(): + _test_common_sort(bucket_sort) + def test_counting_sort(): random.seed(1000)