Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ethen8181
GitHub Repository: ethen8181/machine-learning
Path: blob/master/ga/tsp_solver/tspga.py
2596 views
1
import random
2
import numpy as np
3
import pandas as pd
4
import matplotlib.pyplot as plt
5
from collections import namedtuple
6
from itertools import combinations
7
8
class TSPGA(object):
9
"""
10
Travel Salesman Problem using Genetic Algorithm
11
12
Parameters
13
----------
14
generation : int
15
number of iteration to train the algorithm
16
17
population_size : int
18
number of tours in the population
19
20
retain_rate : float between 0 ~ 1
21
the fraction of the best tour (shortest total distance)
22
in the population to retain, which is then used in the
23
crossover and mutation staget to generate the children
24
for the next generation
25
26
mutate_rate : float between 0 ~ 1
27
the probability that each tour will mutate
28
29
Example
30
-------
31
%matplotlib inline
32
import pandas as pd
33
from tsp_solver import TSPGA
34
import matplotlib.pyplot as plt
35
36
# toy dataset
37
tsp_data = pd.read_table(
38
'TSP_berlin52.txt',
39
skiprows = 1, # the first row is simply the number of cities
40
header = None,
41
names = [ 'city', 'x', 'y' ],
42
sep = ' '
43
)
44
45
# specify the parameters and fit the data
46
tsp_ga = TSPGA(
47
generation = 5000,
48
population_size = 250,
49
retain_rate = 0.5,
50
mutate_rate = 0.25
51
)
52
tsp_ga.fit(tsp_data)
53
54
# distance convergence plot, and the best tour's distance
55
# and the corresponding city tour
56
tsp_ga.convergence_plot()
57
tsp_ga.best_tour
58
59
Reference
60
---------
61
http://www.theprojectspot.com/tutorial-post/applying-a-genetic-algorithm-to-the-travelling-salesman-problem/5
62
"""
63
def __init__( self, generation, population_size, retain_rate, mutate_rate ):
64
self.generation = generation
65
self.retain_rate = retain_rate
66
self.mutate_rate = mutate_rate
67
self.population_size = population_size
68
self.retain_len = int( population_size * retain_rate )
69
70
71
def fit( self, tsp_data ):
72
"""
73
fit the genetic algorithm on the tsp data
74
75
Parameters
76
----------
77
tsp_data : DataFrame
78
contains the 'city' and its 'x', 'y' coordinate, note that
79
the column name must match or the code will break (ordering
80
of the column does not matter)
81
"""
82
83
# store the coordinate and total number of cities
84
self.city_num = tsp_data.shape[0]
85
self.cities = tsp_data[['x', 'y']].values
86
87
# a mapping from city to index to make life easier to
88
# compute and store the pairwise distance
89
self.city_dict = { city: index for index, city in enumerate(tsp_data['city']) }
90
self.pairwise_distance = self._compute_pairwise_distance()
91
92
# 1. data structure that stores the tour (city's order) and it's distance
93
# 2. randomly generate the initial population (list of tour_info)
94
self.tour_info = namedtuple( 'tour_info', [ 'dist', 'tour' ] )
95
population = self._generate_tours( city = tsp_data['city'] )
96
97
# train the genetic algorithm, during the process
98
# store the best tour and its distance for each generation,
99
# so we can get an idea of when the algorithm converged
100
self.generation_history = []
101
for _ in range(self.generation):
102
population, generation_best = self._evolve(population)
103
self.generation_history.append(generation_best)
104
105
self.best_tour = self.generation_history[self.generation - 1]
106
self.is_fitted = True
107
return self
108
109
110
def _compute_pairwise_distance(self):
111
"""
112
readable but not optimized way of computing and storing
113
the symmetric pairwise distance for between all city pairs
114
"""
115
pairwise_distance = np.zeros( ( self.city_num, self.city_num ) )
116
for i1, i2 in combinations( self.city_dict.values(), 2 ):
117
distance = np.linalg.norm( self.cities[i1] - self.cities[i2] )
118
pairwise_distance[i1][i2] = pairwise_distance[i2][i1] = distance
119
120
return pairwise_distance
121
122
123
def _generate_tours( self, city ):
124
"""
125
or in genetic algorithm terms, generate the populations.
126
generate a new random tour with the size of the population_size
127
and compute the distance for each tour
128
"""
129
tours = []
130
for _ in range(self.population_size):
131
tour = city.values.copy()
132
np.random.shuffle(tour)
133
tours.append(tour)
134
135
# combine the tour and distance into a single namedtuple,
136
# store all of them in a list, this serves as the population
137
# and sort the population increasingly according to the tour's distance
138
tours_dist = [ self._compute_tour_distance( tour = t ) for t in tours ]
139
population = [ self.tour_info( dist, tour )
140
for dist, tour in zip( tours_dist, tours ) ]
141
population = sorted(population)
142
return population
143
144
145
def _compute_tour_distance( self, tour ):
146
"""
147
1. compute the total distance for each tour,
148
note that tour stores the name of the city, thus you need to map it
149
with the city_dict to access the pairwise distance.
150
2. initialize the distance with the last city to the first city's distance
151
"""
152
first_city = self.city_dict[ tour[0] ]
153
last_index = self.city_num - 1
154
last_city = self.city_dict[ tour[last_index] ]
155
tour_dist = self.pairwise_distance[last_city][first_city]
156
157
for index, city_index in enumerate(tour):
158
city = self.city_dict[city_index]
159
next_index = index + 1
160
next_city = self.city_dict[ tour[next_index] ]
161
tour_dist += self.pairwise_distance[city][next_city]
162
163
if next_index == last_index:
164
break
165
166
return tour_dist
167
168
169
def _evolve( self, population ):
170
"""
171
core method that does the crossover, mutation to generate
172
the possibly best children for the next generation
173
"""
174
175
# retain a copy of the population, why it is needed is explained
176
# in the try, except block below
177
population_backup = population.copy()
178
179
# retain the best tour (number determined by the retain_len)
180
population = population[:self.retain_len]
181
parent = [ p.tour for p in population ]
182
183
# generate the children (tours) for the next generation
184
children = []
185
while len(children) < self.population_size:
186
child = self._crossover(parent)
187
child = self._mutate(child)
188
children.append(child)
189
190
# evaluate the children chromosome and retain the overall best,
191
# overall simply means the best from the parent and the children, where
192
# the size retained is determined by the population size
193
children_dist = [ self._compute_tour_distance( tour = c ) for c in children ]
194
population_children = [ self.tour_info( dist, tour )
195
for dist, tour in zip( children_dist, children ) ]
196
197
# if the generated two tours are the same or have the same length
198
# (rarely occurs) then sorting will break; if this happens, simply
199
# use the original population as it seems unreasonable to check
200
# every combination of the tour to spot the duplicate
201
try:
202
population.extend(population_children)
203
population = sorted(population)
204
population = population[:self.population_size]
205
except ValueError:
206
population = population_backup
207
208
generation_best = population[0]
209
return population, generation_best
210
211
212
def _crossover( self, parent ):
213
"""randomly select two parent and mate them"""
214
index1, index2 = random.sample( range(self.retain_len), k = 2 )
215
male, female = parent[index1], parent[index2]
216
217
# generate the position to slice one of the parent,
218
# then combine the two parent into one, during this process,
219
# make sure every city is being considered
220
pos_start, pos_end = random.sample( range(self.city_num), k = 2 )
221
if pos_start > pos_end:
222
pos_start, pos_end = pos_end, pos_start
223
224
# remove the city that are already in female,
225
# and concatenate the array together
226
subset = slice( pos_start, pos_end )
227
boolean = np.in1d( female, male[subset], invert = True )
228
not_in_male = female[boolean]
229
child = np.r_[ not_in_male[:pos_start], male[subset], not_in_male[pos_start:] ]
230
return child
231
232
233
def _mutate( self, child ):
234
"""
235
randomly swap the position of two cities if
236
the the mutuate_rate's threshold is met
237
"""
238
if self.mutate_rate > random.random():
239
swap1, swap2 = random.sample( range(self.city_num), k = 2 )
240
child[swap1], child[swap2] = child[swap2], child[swap1]
241
242
return child
243
244
245
def convergence_plot(self):
246
"""
247
convergence plot showing the decrease of each generation's
248
best tour's distance
249
"""
250
if not self.is_fitted:
251
ValueError('you have not fitted the algorithm using .fit')
252
253
dist = [ g.dist for g in self.generation_history ]
254
plt.plot( range( 1, len(dist) + 1 ), dist, '-' )
255
plt.title( 'Distance Convergence Plot' )
256
plt.xlabel('Iteration')
257
plt.ylabel('Distance')
258
plt.tight_layout()
259
plt.show()
260
261
262