From 6473abd5094bbdc70881a9d687f72f359f3540ef Mon Sep 17 00:00:00 2001 From: halilsen Date: Thu, 28 Jan 2021 14:13:33 +0100 Subject: [PATCH 1/7] Cleanup for relation implementation --- lib/balanced_vrp_clustering.rb | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index 8e11b5c..df6f64d 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -30,6 +30,8 @@ require 'color-generator' require 'geojson2image' +INCOMPATIBILITY_DISTANCE_PENALTY = 2**32 + module Ai4r module Clusterers class BalancedVRPClustering < KMeans @@ -69,7 +71,7 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {}) # First of all, set and display the seed options[:seed] ||= Random.new_seed - @logger&.debug "Clustering with seed=#{options[:seed]}" + @logger&.info "Clustering with seed=#{options[:seed]}" srand options[:seed] # DEPRECATED variables (to be removed before public release) @@ -429,7 +431,7 @@ def evaluate(data_item) distances = @centroids.collect.with_index{ |centroid, cluster_index| dist = distance(data_item, centroid, cluster_index) - dist += 2**32 unless @compatibility_function.call(data_item, centroid) + dist += INCOMPATIBILITY_DISTANCE_PENALTY unless @compatibility_function.call(data_item, centroid) dist } @@ -437,7 +439,7 @@ def evaluate(data_item) closest_cluster_index = get_min_index(distances) if capactity_violation?(data_item, closest_cluster_index) - mininimum_without_limit_violation = 2**32 # only consider compatible ones + mininimum_without_limit_violation = INCOMPATIBILITY_DISTANCE_PENALTY # only consider compatible ones closest_cluster_wo_violation_index = nil @number_of_clusters.times{ |k| next unless distances[k] < mininimum_without_limit_violation && @@ -480,13 +482,17 @@ def calculate_membership_clusters @data_set.data_items.each{ |data_item| cluster_index = evaluate(data_item) - @clusters[cluster_index] << data_item + assign_item(data_item, cluster_index) update_metrics(data_item, cluster_index) } manage_empty_clusters if has_empty_cluster? end + def assign_item(data_item, cluster_index) + @clusters[cluster_index] << data_item + end + def calc_initial_centroids @centroids, @old_centroids_lat_lon = [], nil if @centroid_indices.empty? From 26e806e09248b5967ce560ba2206b750b6ca47f1 Mon Sep 17 00:00:00 2001 From: halilsen Date: Mon, 8 Mar 2021 16:57:30 +0100 Subject: [PATCH 2/7] Remove dead code --- lib/balanced_vrp_clustering.rb | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index df6f64d..4f5be3a 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -478,7 +478,6 @@ def calculate_membership_clusters @clusters = Array.new(@number_of_clusters) do Ai4r::Data::DataSet.new data_labels: @data_set.data_labels end - @cluster_indices = Array.new(@number_of_clusters){ [] } @data_set.data_items.each{ |data_item| cluster_index = evaluate(data_item) @@ -617,22 +616,6 @@ def manage_empty_clusters } end - def eliminate_empty_clusters - old_clusters, old_centroids, old_cluster_indices = @clusters, @centroids, @cluster_indices - @clusters, @centroids, @cluster_indices = [], [], [] - @remaining_skills = [] - @number_of_clusters.times do |i| - if old_clusters[i].data_items.empty? - @remaining_skills << old_centroids[i][4] - else - @clusters << old_clusters[i] - @cluster_indices << old_cluster_indices[i] - @centroids << old_centroids[i] - end - end - @number_of_clusters = @centroids.length - end - def stop_criteria_met centroids_converged_or_in_loop(Math.sqrt(@iteration).to_i) && # This check should stay first since it keeps track of the centroid movements.. @limit_violation_count.zero? && # Do not converge if a decision is taken due to limit violation. From 932e0f9619eb80f5bcf86ccb25a06a6410605881 Mon Sep 17 00:00:00 2001 From: halilsen Date: Mon, 8 Mar 2021 17:41:44 +0100 Subject: [PATCH 3/7] Introduce relations and preprocessing --- README.md | 7 ++-- lib/balanced_vrp_clustering.rb | 58 +++++++++++++++++++++++++++++++++- test/clustering_test.rb | 37 +++++++++++++++++++--- 3 files changed, 94 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fa411b5..aae9dae 100644 --- a/README.md +++ b/README.md @@ -54,10 +54,13 @@ data_items = items.collect{ |i| "matrix_index": only if any matrix was provided } } -cut_symbol = :duration # or any other unit (:unit_1, :unit_2) to be used for balancig clusters +cut_symbol = :duration # or any other unit (:unit_1, :unit_2) to be used for balancing clusters +related_item_indices = { shipment: [[0, 1] [2, 3]], same_route: [[4, 5]]} # Available relations are as follows + # LINKING_RELATIONS = %i[order same_route sequence shipment] + # BINDING_RELATIONS = %i[order same_route sequence] ratio = 1 # by default, used to over/underestimate vehicles limits c.logger = Logger.new(STDOUT) # for debug output -c.build(DataSet.new(data_items: data_items), cut_symbol, ratio, options)``` +c.build(DataSet.new(data_items: data_items), cut_symbol, related_item_indices, ratio, options)``` cut_symbol is the referent unit to use when balancing clusters. This unit should exist in both vehicles and data_items structures. ``` diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index 4f5be3a..7adc0fd 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -35,6 +35,9 @@ module Ai4r module Clusterers class BalancedVRPClustering < KMeans + LINKING_RELATIONS = %i[order same_route sequence shipment].freeze + BINDING_RELATIONS = %i[order same_route sequence].freeze + include OverloadableFunctions attr_reader :iteration @@ -59,7 +62,7 @@ def initialize @on_empty = 'closest' # the other options are not available end - def build(data_set, cut_symbol, cut_ratio = 1.0, options = {}) + def build(data_set, cut_symbol, related_item_indices = {}, cut_ratio = 1.0, options = {}) # Build a new clusterer, using data items found in data_set. # Items will be clustered in "number_of_clusters" different # clusters. Each item is defined by : @@ -101,6 +104,8 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {}) raise ArgumentError, 'All vehicles should have a limit for the unit corresponding to the cut symbol' end + connect_linked_items(data_set.data_items, related_item_indices) + ### values ### @data_set = data_set @cut_symbol = cut_symbol @@ -224,6 +229,57 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {}) self end + def connect_linked_items(data_items, related_item_indices) + (LINKING_RELATIONS | BINDING_RELATIONS).each{ |relation| + related_item_indices[relation]&.each{ |linked_indices| + raise ArgumentError, 'Each relation group of related_item_indices should contain only unique indices' unless linked_indices.uniq.size == linked_indices.size + } + } + + (LINKING_RELATIONS - BINDING_RELATIONS).each{ |relation| + related_item_indices[relation]&.each{ |linked_indices| + raise ArgumentError, 'A service should not appear in multiple non-binding linking relations' if linked_indices.any?{ |ind| data_items[ind][4].key?(:linked_item) } + + linked_indices << linked_indices.first # create a loop + (linked_indices.size - 1).times{ |i| + item = data_items[linked_indices[i]] + next_item = data_items[linked_indices[i + 1]] + item[4][:linked_item] = next_item + } + } + } + + BINDING_RELATIONS.each{ |relation| + related_item_indices[relation]&.each{ |linked_indices| + linked_indices << linked_indices.first # create a loop + (linked_indices.size - 1).times{ |i| + item = data_items[linked_indices[i]] + next_item = data_items[linked_indices[i + 1]] + if !item[4].key?(:linked_item) && !next_item[4].key?(:linked_item) + item[4][:linked_item] = next_item + elsif item[4].key?(:linked_item) && next_item[4].key?(:linked_item) + # either there are two loops to join together or these items are already connected via loop + first_loop_end = item + item = item[4][:linked_item] while [first_loop_end, next_item].exclude? item[4][:linked_item] + next if item[4][:linked_item] == next_item # connected via loop, nothing to do + + second_loop_end = next_item + next_item = next_item[4][:linked_item] while next_item[4][:linked_item] != second_loop_end + + item[4][:linked_item] = second_loop_end + next_item[4][:linked_item] = first_loop_end + else + next_item, item = item, next_item if item[4].key?(:linked_item) + item[4][:linked_item] = next_item + loop_end = next_item + next_item = next_item[4][:linked_item] while next_item[4][:linked_item] != loop_end + next_item[4][:linked_item] = item unless next_item == item + end + } + } + } + end + def move_limit_violating_dataitems @limit_violation_count = @items_with_limit_violation.size mean_distance_diff = @items_with_limit_violation.collect{ |d| d[1] }.mean diff --git a/test/clustering_test.rb b/test/clustering_test.rb index a680395..c0612a1 100644 --- a/test/clustering_test.rb +++ b/test/clustering_test.rb @@ -101,14 +101,14 @@ def test_infeasible_skills # the skills of the service does not exist in any of the vehicles clusterer, data_set, options, ratio = Instance.load_clusterer('test/fixtures/infeasible_skills.bindump') - assert clusterer.build(data_set, options[:cut_symbol], ratio, options) + assert clusterer.build(data_set, options[:cut_symbol], {}, ratio, options) end def test_division_by_nan clusterer, data_set, options, ratio = Instance.load_clusterer('test/fixtures/division_by_nan.bindump') # options[:seed] = 182581703914854297101438278871236808945 - assert clusterer.build(data_set, options[:cut_symbol], ratio, options) + assert clusterer.build(data_set, options[:cut_symbol], {}, ratio, options) end def test_cluster_balance @@ -131,7 +131,7 @@ def test_cluster_balance while data_set.data_items.size > 100 number_of_items_expected = data_set.data_items.size - clusterer.build(data_set, options[:cut_symbol], ratio, options) + clusterer.build(data_set, options[:cut_symbol], {}, ratio, options) repartition = clusterer.clusters.collect{ |c| c.data_items.size } puts "#{number_of_items_expected} items divided in into #{repartition}" @@ -186,7 +186,7 @@ def test_length_centroid # more vehicles than data_items.. clusterer, data_set, options, ratio = Instance.load_clusterer('test/fixtures/length_centroid.bindump') - clusterer.build(data_set, options[:cut_symbol], ratio, options) + clusterer.build(data_set, options[:cut_symbol], {}, ratio, options) assert_equal 2, clusterer.clusters.count{ |c| !c.data_items.empty? }, 'There are only 2 data_items, should have at most 2 non-empty clusters.' end @@ -198,7 +198,7 @@ def test_avoid_capacities_overlap # TODO: we should handle these extreme cases better clusterer, data_set, options, ratio = Instance.load_clusterer('test/fixtures/avoid_capacities_overlap.bindump') - clusterer.build(data_set, options[:cut_symbol], ratio, options) + clusterer.build(data_set, options[:cut_symbol], {}, ratio, options) assert_equal 5, clusterer.clusters.count{ |c| !c.data_items.empty? }, 'There should be 5 non-empty clusters' @@ -226,4 +226,31 @@ def test_less_items_than_clusters clusterer.build(data_set, :visits) end end + + def test_a_service_cannot_appear_in_two_nonbinding_linking_relations + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + + assert_raises ArgumentError do + clusterer.connect_linked_items(data_set.data_items, { shipment: [[0, 1], [0, 2]] }) + end + end + + def test_connect_linked_items_to_eachother + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + + data_items = Marshal.load(Marshal.dump(data_set.data_items)) + # two separate 2-loops + clusterer.connect_linked_items(data_items, { shipment: [[0, 1], [2, 3]] }) + assert_equal([1, 0, 3, 2], data_items.collect{ |item| data_items.index(item[4][:linked_item]) }) + + data_items = Marshal.load(Marshal.dump(data_set.data_items)) + # two merged 2-loops (one 4-loop) + clusterer.connect_linked_items(data_items, { shipment: [[0, 1], [2, 3]], same_route: [[0, 2]] }) + assert_equal([1, 2, 3, 0], data_items.collect{ |item| data_items.index(item[4][:linked_item]) }) + + data_items = Marshal.load(Marshal.dump(data_set.data_items)) + # one 3-loop + clusterer.connect_linked_items(data_items, { same_route: [[0, 1, 2]] }) + assert_equal([1, 2, 0, nil], data_items.collect{ |item| data_items.index(item[4][:linked_item]) }) + end end From 45c4691ea994a5c164ad1eb6d6f36010c803601d Mon Sep 17 00:00:00 2001 From: halilsen Date: Tue, 9 Mar 2021 16:36:23 +0100 Subject: [PATCH 4/7] Clustering respects relations --- Gemfile.lock | 11 +++++++---- balanced_vrp_clustering.gemspec | 6 +++--- lib/balanced_vrp_clustering.rb | 26 +++++++++++++++++++++++--- test/clustering_test.rb | 20 ++++++++++++++++++++ 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 8127cc4..42f0dc6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - balanced_vrp_clustering (0.1.7) + balanced_vrp_clustering (0.2.0) awesome_print color-generator geojson2image @@ -18,12 +18,12 @@ GEM ansi (1.5.0) anyway_config (1.4.4) ast (2.4.0) - awesome_print (1.8.0) + awesome_print (1.9.2) backport (0.3.0) benchmark-ips (2.7.2) builder (3.2.4) byebug (11.0.1) - chunky_png (1.3.12) + chunky_png (1.4.0) color-generator (0.0.4) concurrent-ruby (1.1.6) docile (1.3.2) @@ -58,7 +58,7 @@ GEM nio4r (2.5.2) nokogiri (1.10.9) mini_portile2 (~> 2.4.0) - oj (3.10.8) + oj (3.11.2) parallel (1.19.1) parser (2.7.0.4) ast (~> 2.4.0) @@ -153,3 +153,6 @@ DEPENDENCIES RUBY VERSION ruby 2.3.3p222 + +BUNDLED WITH + 2.1.4 diff --git a/balanced_vrp_clustering.gemspec b/balanced_vrp_clustering.gemspec index 0f3eba8..5737b99 100644 --- a/balanced_vrp_clustering.gemspec +++ b/balanced_vrp_clustering.gemspec @@ -1,8 +1,8 @@ Gem::Specification.new do |s| s.name = 'balanced_vrp_clustering' - s.version = '0.1.7' - s.date = '2020-08-13' - s.summary = 'Gem to clusterize points of a given VRP.' + s.version = '0.2.0' + s.date = '2021-03-09' + s.summary = 'Gem for clustering points of a given VRP.' s.authors = 'Mapotempo' s.files = [ "lib/balanced_vrp_clustering.rb", diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index 7adc0fd..5f9f07d 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -524,7 +524,11 @@ def evaluate(data_item) protected def distance(data_item, centroid, cluster_index) - @distance_function.call(data_item, centroid) * @balance_coeff[cluster_index] + total_dist = 0 + + do_forall_linked_items_of(data_item){ |linked_item| total_dist += @distance_function.call(linked_item, centroid) } + + total_dist * @balance_coeff[cluster_index] end def calculate_membership_clusters @@ -535,16 +539,24 @@ def calculate_membership_clusters Ai4r::Data::DataSet.new data_labels: @data_set.data_labels end + @already_assigned = Hash.new{ |h, k| h[k] = false } + @data_set.data_items.each{ |data_item| + next if @already_assigned[data_item] # another item with a relation handled this item + cluster_index = evaluate(data_item) - assign_item(data_item, cluster_index) - update_metrics(data_item, cluster_index) + + do_forall_linked_items_of(data_item){ |linked_item| + assign_item(linked_item, cluster_index) + update_metrics(linked_item, cluster_index) + } } manage_empty_clusters if has_empty_cluster? end def assign_item(data_item, cluster_index) + @already_assigned[data_item] = true @clusters[cluster_index] << data_item end @@ -708,6 +720,14 @@ def calculate_local_speeds } end + def do_forall_linked_items_of(item) + linked_item = nil + until linked_item == item + linked_item = (linked_item && linked_item[4][:linked_item]) || item[4][:linked_item] || item + yield(linked_item) + end + end + def mark_the_items_which_needs_to_stay_at_the_top @data_set.data_items.each{ |i| i[4][:needs_to_stay_at_the_top] = false } @vehicles.flat_map{ |c| c[:capacities].keys }.uniq.each{ |unit| diff --git a/test/clustering_test.rb b/test/clustering_test.rb index c0612a1..a38f701 100644 --- a/test/clustering_test.rb +++ b/test/clustering_test.rb @@ -253,4 +253,24 @@ def test_connect_linked_items_to_eachother clusterer.connect_linked_items(data_items, { same_route: [[0, 1, 2]] }) assert_equal([1, 2, 0, nil], data_items.collect{ |item| data_items.index(item[4][:linked_item]) }) end + + def test_clustering_respects_relations + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + clusterer.build(data_set, :duration, { shipment: [[0, 1], [2, 3]] }) + assert_equal [%w[point_1 point_2], %w[point_3 point_4]], + clusterer.clusters.collect{ |c| c.data_items.collect{ |i| i[2] }.sort! }.sort!, + 'Clustering should respect linking relations' + + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + clusterer.build(data_set, :duration, { shipment: [[0, 1], [2, 3]], same_route: [[0, 2]] }) + assert_equal [[], %w[point_1 point_2 point_3 point_4]], + clusterer.clusters.collect{ |c| c.data_items.collect{ |i| i[2] }.sort! }.sort!, + 'Clustering should respect binding relations' + + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + clusterer.build(data_set, :duration, { shipment: [[0, 1]], same_route: [[0, 2]] }) + assert_equal [%w[point_1 point_2 point_3], %w[point_4]], + clusterer.clusters.collect{ |c| c.data_items.collect{ |i| i[2] }.sort! }.sort!, + 'Clustering should respect relations' + end end From 555d107a032adeda437eee3f793991fcff59c279 Mon Sep 17 00:00:00 2001 From: halilsen Date: Wed, 10 Mar 2021 11:52:47 +0100 Subject: [PATCH 5/7] Centroid indices respect relations --- lib/balanced_vrp_clustering.rb | 6 ++++++ test/clustering_test.rb | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index 5f9f07d..e55f38a 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -641,6 +641,12 @@ def populate_centroids(populate_method, number_of_clusters = @number_of_clusters skills = @remaining_skills.shift item = @data_set.data_items[index] + # check if linked data items are assigned to different centroids + do_forall_linked_items_of(item){ |linked_item| + msg = "Centroid #{ind} is initialised with a service which has a linked service that is used to initialise centroid #{insert_at_begining.index(linked_item)}" + raise ArgumentError, msg if insert_at_begining.include?(linked_item) + } + raise ArgumentError, "Centroid #{ind} is initialised with an incompatible service -- #{index}" unless @compatibility_function.call(item, [nil, nil, nil, nil, skills]) skills[:matrix_index] = item[4][:matrix_index] diff --git a/test/clustering_test.rb b/test/clustering_test.rb index a38f701..a5efe11 100644 --- a/test/clustering_test.rb +++ b/test/clustering_test.rb @@ -273,4 +273,14 @@ def test_clustering_respects_relations clusterer.clusters.collect{ |c| c.data_items.collect{ |i| i[2] }.sort! }.sort!, 'Clustering should respect relations' end + + def test_centroid_indices_respect_relations + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + clusterer.centroid_indices = [0, 1] + error = assert_raises ArgumentError do + clusterer.build(data_set, :duration, { shipment: [[0, 1]] }) + end + expected_msg = 'Centroid 1 is initialised with a service which has a linked service that is used to initialise centroid 0' + assert_equal expected_msg, error.message, 'Error message is changed' + end end From c2c0055ca7add695ebad84a7a4cec2ac2aef927b Mon Sep 17 00:00:00 2001 From: halilsen Date: Wed, 10 Mar 2021 15:33:53 +0100 Subject: [PATCH 6/7] centroid initialisation respects relations --- lib/balanced_vrp_clustering.rb | 5 ++--- test/clustering_test.rb | 13 +++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index e55f38a..cfc772f 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -171,7 +171,6 @@ def build(data_set, cut_symbol, related_item_indices = {}, cut_ratio = 1.0, opti } @strict_limitations, @cut_limit = compute_limits(cut_symbol, cut_ratio, @vehicles, @data_set.data_items) - @remaining_skills = @vehicles.dup ### algo start ### @iteration = 0 @@ -561,7 +560,7 @@ def assign_item(data_item, cluster_index) end def calc_initial_centroids - @centroids, @old_centroids_lat_lon = [], nil + @centroids, @old_centroids_lat_lon, @remaining_skills = [], nil, @vehicles.dup if @centroid_indices.empty? populate_centroids('random') else @@ -625,7 +624,7 @@ def populate_centroids(populate_method, number_of_clusters = @number_of_clusters skills[:duration_from_and_to_depot] = item[4][:duration_from_and_to_depot][@centroids.length] @centroids << [item[0], item[1], item[2], Hash.new(0), skills] - available_items.delete(item) + do_forall_linked_items_of(item){ |linked_item| available_items.delete(linked_item) } @data_set.data_items.insert(0, @data_set.data_items.delete(item)) end diff --git a/test/clustering_test.rb b/test/clustering_test.rb index a5efe11..e8af261 100644 --- a/test/clustering_test.rb +++ b/test/clustering_test.rb @@ -283,4 +283,17 @@ def test_centroid_indices_respect_relations expected_msg = 'Centroid 1 is initialised with a service which has a linked service that is used to initialise centroid 0' assert_equal expected_msg, error.message, 'Error message is changed' end + + def test_centroid_initialisation_respects_relations + clusterer, data_set = Instance.two_clusters_4_items_with_matrix + clusterer.stub(:calc_initial_centroids, lambda{ + 10.times{ + clusterer.send(:__minitest_stub__calc_initial_centroids) + assert_match(/point_[1-3]point_4/, clusterer.centroids.map{ |i| i[2] }.sort!.join) + } + return + }) do + clusterer.build(data_set, :duration, { same_route: [[0, 1, 2]] }) + end + end end From 657b71af8721cbbae44c5bba36790fb1c1d6842f Mon Sep 17 00:00:00 2001 From: halilsen Date: Thu, 11 Mar 2021 17:07:51 +0100 Subject: [PATCH 7/7] manage_empty_clusters respects relations --- lib/balanced_vrp_clustering.rb | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/balanced_vrp_clustering.rb b/lib/balanced_vrp_clustering.rb index cfc772f..18d4224 100644 --- a/lib/balanced_vrp_clustering.rb +++ b/lib/balanced_vrp_clustering.rb @@ -551,7 +551,7 @@ def calculate_membership_clusters } } - manage_empty_clusters if has_empty_cluster? + manage_empty_clusters end def assign_item(data_item, cluster_index) @@ -663,6 +663,8 @@ def populate_centroids(populate_method, number_of_clusters = @number_of_clusters end def manage_empty_clusters + return unless has_empty_cluster? + @clusters.each_with_index{ |empty_cluster, ind| next unless empty_cluster.data_items.empty? @@ -671,21 +673,29 @@ def manage_empty_clusters distances = @clusters.collect{ |cluster| next unless cluster.data_items.size > 1 + min_distance = Float::INFINITY + closest_item = cluster.data_items.select{ |d_i| @compatibility_function.call(d_i, empty_centroid) }.min_by{ |d_i| - @distance_function.call(d_i, empty_centroid) + total_dist = 0 + + do_forall_linked_items_of(d_i){ |linked_item| total_dist += @distance_function.call(linked_item, empty_centroid) } + + min_distance = [total_dist, min_distance].min + + total_dist } next if closest_item.nil? - [@distance_function.call(closest_item, empty_centroid), closest_item, cluster] + [min_distance, closest_item, cluster] } closest = distances.min_by{ |d| d.nil? ? Float::INFINITY : d[0] } next if closest.nil? - empty_cluster.data_items << closest[2].data_items.delete(closest[1]) + do_forall_linked_items_of(closest[1]){ |linked_item| empty_cluster.data_items << closest[2].data_items.delete(linked_item) } } end