Skip to content

Commit

Permalink
Merge pull request #16 from senhalil/dev
Browse files Browse the repository at this point in the history
Clustering respects relations
  • Loading branch information
fab-girard authored Apr 6, 2021
2 parents f05b84f + 657b71a commit c76cca8
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 45 deletions.
11 changes: 7 additions & 4 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
balanced_vrp_clustering (0.1.7)
balanced_vrp_clustering (0.2.0)
awesome_print
color-generator
geojson2image
Expand All @@ -18,12 +18,12 @@ GEM
ansi (1.5.0)
anyway_config (1.4.4)
ast (2.4.0)
awesome_print (1.8.0)
awesome_print (1.9.2)
backport (0.3.0)
benchmark-ips (2.7.2)
builder (3.2.4)
byebug (11.0.1)
chunky_png (1.3.12)
chunky_png (1.4.0)
color-generator (0.0.4)
concurrent-ruby (1.1.6)
docile (1.3.2)
Expand Down Expand Up @@ -58,7 +58,7 @@ GEM
nio4r (2.5.2)
nokogiri (1.10.9)
mini_portile2 (~> 2.4.0)
oj (3.10.8)
oj (3.11.2)
parallel (1.19.1)
parser (2.7.0.4)
ast (~> 2.4.0)
Expand Down Expand Up @@ -153,3 +153,6 @@ DEPENDENCIES

RUBY VERSION
ruby 2.3.3p222

BUNDLED WITH
2.1.4
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,13 @@ data_items = items.collect{ |i|
"matrix_index": only if any matrix was provided
}
}
cut_symbol = :duration # or any other unit (:unit_1, :unit_2) to be used for balancig clusters
cut_symbol = :duration # or any other unit (:unit_1, :unit_2) to be used for balancing clusters
related_item_indices = { shipment: [[0, 1] [2, 3]], same_route: [[4, 5]]} # Available relations are as follows
# LINKING_RELATIONS = %i[order same_route sequence shipment]
# BINDING_RELATIONS = %i[order same_route sequence]
ratio = 1 # by default, used to over/underestimate vehicles limits
c.logger = Logger.new(STDOUT) # for debug output
c.build(DataSet.new(data_items: data_items), cut_symbol, ratio, options)```
c.build(DataSet.new(data_items: data_items), cut_symbol, related_item_indices, ratio, options)```
cut_symbol is the referent unit to use when balancing clusters. This unit should exist in both vehicles and data_items structures.
```
Expand Down
6 changes: 3 additions & 3 deletions balanced_vrp_clustering.gemspec
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Gem::Specification.new do |s|
s.name = 'balanced_vrp_clustering'
s.version = '0.1.7'
s.date = '2020-08-13'
s.summary = 'Gem to clusterize points of a given VRP.'
s.version = '0.2.0'
s.date = '2021-03-09'
s.summary = 'Gem for clustering points of a given VRP.'
s.authors = 'Mapotempo'
s.files = [
"lib/balanced_vrp_clustering.rb",
Expand Down
142 changes: 111 additions & 31 deletions lib/balanced_vrp_clustering.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@
require 'color-generator'
require 'geojson2image'

INCOMPATIBILITY_DISTANCE_PENALTY = 2**32

module Ai4r
module Clusterers
class BalancedVRPClustering < KMeans
LINKING_RELATIONS = %i[order same_route sequence shipment].freeze
BINDING_RELATIONS = %i[order same_route sequence].freeze

include OverloadableFunctions

attr_reader :iteration
Expand All @@ -57,7 +62,7 @@ def initialize
@on_empty = 'closest' # the other options are not available
end

def build(data_set, cut_symbol, cut_ratio = 1.0, options = {})
def build(data_set, cut_symbol, related_item_indices = {}, cut_ratio = 1.0, options = {})
# Build a new clusterer, using data items found in data_set.
# Items will be clustered in "number_of_clusters" different
# clusters. Each item is defined by :
Expand All @@ -69,7 +74,7 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {})

# First of all, set and display the seed
options[:seed] ||= Random.new_seed
@logger&.debug "Clustering with seed=#{options[:seed]}"
@logger&.info "Clustering with seed=#{options[:seed]}"
srand options[:seed]

# DEPRECATED variables (to be removed before public release)
Expand Down Expand Up @@ -99,6 +104,8 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {})
raise ArgumentError, 'All vehicles should have a limit for the unit corresponding to the cut symbol'
end

connect_linked_items(data_set.data_items, related_item_indices)

### values ###
@data_set = data_set
@cut_symbol = cut_symbol
Expand Down Expand Up @@ -164,7 +171,6 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {})
}

@strict_limitations, @cut_limit = compute_limits(cut_symbol, cut_ratio, @vehicles, @data_set.data_items)
@remaining_skills = @vehicles.dup

### algo start ###
@iteration = 0
Expand Down Expand Up @@ -222,6 +228,57 @@ def build(data_set, cut_symbol, cut_ratio = 1.0, options = {})
self
end

def connect_linked_items(data_items, related_item_indices)
(LINKING_RELATIONS | BINDING_RELATIONS).each{ |relation|
related_item_indices[relation]&.each{ |linked_indices|
raise ArgumentError, 'Each relation group of related_item_indices should contain only unique indices' unless linked_indices.uniq.size == linked_indices.size
}
}

(LINKING_RELATIONS - BINDING_RELATIONS).each{ |relation|
related_item_indices[relation]&.each{ |linked_indices|
raise ArgumentError, 'A service should not appear in multiple non-binding linking relations' if linked_indices.any?{ |ind| data_items[ind][4].key?(:linked_item) }

linked_indices << linked_indices.first # create a loop
(linked_indices.size - 1).times{ |i|
item = data_items[linked_indices[i]]
next_item = data_items[linked_indices[i + 1]]
item[4][:linked_item] = next_item
}
}
}

BINDING_RELATIONS.each{ |relation|
related_item_indices[relation]&.each{ |linked_indices|
linked_indices << linked_indices.first # create a loop
(linked_indices.size - 1).times{ |i|
item = data_items[linked_indices[i]]
next_item = data_items[linked_indices[i + 1]]
if !item[4].key?(:linked_item) && !next_item[4].key?(:linked_item)
item[4][:linked_item] = next_item
elsif item[4].key?(:linked_item) && next_item[4].key?(:linked_item)
# either there are two loops to join together or these items are already connected via loop
first_loop_end = item
item = item[4][:linked_item] while [first_loop_end, next_item].exclude? item[4][:linked_item]
next if item[4][:linked_item] == next_item # connected via loop, nothing to do

second_loop_end = next_item
next_item = next_item[4][:linked_item] while next_item[4][:linked_item] != second_loop_end

item[4][:linked_item] = second_loop_end
next_item[4][:linked_item] = first_loop_end
else
next_item, item = item, next_item if item[4].key?(:linked_item)
item[4][:linked_item] = next_item
loop_end = next_item
next_item = next_item[4][:linked_item] while next_item[4][:linked_item] != loop_end
next_item[4][:linked_item] = item unless next_item == item
end
}
}
}
end

def move_limit_violating_dataitems
@limit_violation_count = @items_with_limit_violation.size
mean_distance_diff = @items_with_limit_violation.collect{ |d| d[1] }.mean
Expand Down Expand Up @@ -429,15 +486,15 @@ def evaluate(data_item)
distances = @centroids.collect.with_index{ |centroid, cluster_index|
dist = distance(data_item, centroid, cluster_index)

dist += 2**32 unless @compatibility_function.call(data_item, centroid)
dist += INCOMPATIBILITY_DISTANCE_PENALTY unless @compatibility_function.call(data_item, centroid)

dist
}

closest_cluster_index = get_min_index(distances)

if capactity_violation?(data_item, closest_cluster_index)
mininimum_without_limit_violation = 2**32 # only consider compatible ones
mininimum_without_limit_violation = INCOMPATIBILITY_DISTANCE_PENALTY # only consider compatible ones
closest_cluster_wo_violation_index = nil
@number_of_clusters.times{ |k|
next unless distances[k] < mininimum_without_limit_violation &&
Expand Down Expand Up @@ -466,7 +523,11 @@ def evaluate(data_item)
protected

def distance(data_item, centroid, cluster_index)
@distance_function.call(data_item, centroid) * @balance_coeff[cluster_index]
total_dist = 0

do_forall_linked_items_of(data_item){ |linked_item| total_dist += @distance_function.call(linked_item, centroid) }

total_dist * @balance_coeff[cluster_index]
end

def calculate_membership_clusters
Expand All @@ -476,19 +537,30 @@ def calculate_membership_clusters
@clusters = Array.new(@number_of_clusters) do
Ai4r::Data::DataSet.new data_labels: @data_set.data_labels
end
@cluster_indices = Array.new(@number_of_clusters){ [] }

@already_assigned = Hash.new{ |h, k| h[k] = false }

@data_set.data_items.each{ |data_item|
next if @already_assigned[data_item] # another item with a relation handled this item

cluster_index = evaluate(data_item)
@clusters[cluster_index] << data_item
update_metrics(data_item, cluster_index)

do_forall_linked_items_of(data_item){ |linked_item|
assign_item(linked_item, cluster_index)
update_metrics(linked_item, cluster_index)
}
}

manage_empty_clusters if has_empty_cluster?
manage_empty_clusters
end

def assign_item(data_item, cluster_index)
@already_assigned[data_item] = true
@clusters[cluster_index] << data_item
end

def calc_initial_centroids
@centroids, @old_centroids_lat_lon = [], nil
@centroids, @old_centroids_lat_lon, @remaining_skills = [], nil, @vehicles.dup
if @centroid_indices.empty?
populate_centroids('random')
else
Expand Down Expand Up @@ -552,7 +624,7 @@ def populate_centroids(populate_method, number_of_clusters = @number_of_clusters
skills[:duration_from_and_to_depot] = item[4][:duration_from_and_to_depot][@centroids.length]
@centroids << [item[0], item[1], item[2], Hash.new(0), skills]

available_items.delete(item)
do_forall_linked_items_of(item){ |linked_item| available_items.delete(linked_item) }

@data_set.data_items.insert(0, @data_set.data_items.delete(item))
end
Expand All @@ -568,6 +640,12 @@ def populate_centroids(populate_method, number_of_clusters = @number_of_clusters
skills = @remaining_skills.shift
item = @data_set.data_items[index]

# check if linked data items are assigned to different centroids
do_forall_linked_items_of(item){ |linked_item|
msg = "Centroid #{ind} is initialised with a service which has a linked service that is used to initialise centroid #{insert_at_begining.index(linked_item)}"
raise ArgumentError, msg if insert_at_begining.include?(linked_item)
}

raise ArgumentError, "Centroid #{ind} is initialised with an incompatible service -- #{index}" unless @compatibility_function.call(item, [nil, nil, nil, nil, skills])

skills[:matrix_index] = item[4][:matrix_index]
Expand All @@ -585,6 +663,8 @@ def populate_centroids(populate_method, number_of_clusters = @number_of_clusters
end

def manage_empty_clusters
return unless has_empty_cluster?

@clusters.each_with_index{ |empty_cluster, ind|
next unless empty_cluster.data_items.empty?

Expand All @@ -593,40 +673,32 @@ def manage_empty_clusters
distances = @clusters.collect{ |cluster|
next unless cluster.data_items.size > 1

min_distance = Float::INFINITY

closest_item = cluster.data_items.select{ |d_i|
@compatibility_function.call(d_i, empty_centroid)
}.min_by{ |d_i|
@distance_function.call(d_i, empty_centroid)
total_dist = 0

do_forall_linked_items_of(d_i){ |linked_item| total_dist += @distance_function.call(linked_item, empty_centroid) }

min_distance = [total_dist, min_distance].min

total_dist
}
next if closest_item.nil?

[@distance_function.call(closest_item, empty_centroid), closest_item, cluster]
[min_distance, closest_item, cluster]
}

closest = distances.min_by{ |d| d.nil? ? Float::INFINITY : d[0] }

next if closest.nil?

empty_cluster.data_items << closest[2].data_items.delete(closest[1])
do_forall_linked_items_of(closest[1]){ |linked_item| empty_cluster.data_items << closest[2].data_items.delete(linked_item) }
}
end

def eliminate_empty_clusters
old_clusters, old_centroids, old_cluster_indices = @clusters, @centroids, @cluster_indices
@clusters, @centroids, @cluster_indices = [], [], []
@remaining_skills = []
@number_of_clusters.times do |i|
if old_clusters[i].data_items.empty?
@remaining_skills << old_centroids[i][4]
else
@clusters << old_clusters[i]
@cluster_indices << old_cluster_indices[i]
@centroids << old_centroids[i]
end
end
@number_of_clusters = @centroids.length
end

def stop_criteria_met
centroids_converged_or_in_loop(Math.sqrt(@iteration).to_i) && # This check should stay first since it keeps track of the centroid movements..
@limit_violation_count.zero? && # Do not converge if a decision is taken due to limit violation.
Expand Down Expand Up @@ -663,6 +735,14 @@ def calculate_local_speeds
}
end

def do_forall_linked_items_of(item)
linked_item = nil
until linked_item == item
linked_item = (linked_item && linked_item[4][:linked_item]) || item[4][:linked_item] || item
yield(linked_item)
end
end

def mark_the_items_which_needs_to_stay_at_the_top
@data_set.data_items.each{ |i| i[4][:needs_to_stay_at_the_top] = false }
@vehicles.flat_map{ |c| c[:capacities].keys }.uniq.each{ |unit|
Expand Down
Loading

0 comments on commit c76cca8

Please sign in to comment.