Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ethen8181
GitHub Repository: ethen8181/machine-learning
Path: blob/master/ga/tsp_solver/__pycache__/tspga.cpython-35.pyc
2594 views


5O@Y� �@smddlZddlZddlZddljZddlm	Z	ddl
mZGdd�de�Z
dS)�N)�
namedtuple)�combinationsc@s�eZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zdd�ZdS)�TSPGAa
	Travel Salesman Problem using Genetic Algorithm

	Parameters
	----------
	generation : int
		number of iteration to train the algorithm

	population_size : int
		number of tours in the population

	retain_rate : float between 0 ~ 1
		the fraction of the best tour (shortest total distance) 
		in the population to retain, which is then used in the 
		crossover and mutation staget to generate the children 
		for the next generation
	
	mutate_rate : float between 0 ~ 1
		the probability that each tour will mutate

	Example
	-------
	%matplotlib inline
	import pandas as pd
	from tsp_solver import TSPGA
	import matplotlib.pyplot as plt

	# toy dataset
	tsp_data = pd.read_table( 
		'TSP_berlin52.txt', 
		skiprows = 1, # the first row is simply the number of cities
		header = None, 
		names = [ 'city', 'x', 'y' ], 
		sep = ' '
	)
	
	# specify the parameters and fit the data
	tsp_ga = TSPGA( 
		generation = 5000, 
		population_size = 250, 
		retain_rate = 0.5, 
		mutate_rate = 0.25
	)
	tsp_ga.fit(tsp_data)
	
	# distance convergence plot, and the best tour's distance
	# and the corresponding city tour
	tsp_ga.convergence_plot()
	tsp_ga.best_tour

	Reference
	---------
	http://www.theprojectspot.com/tutorial-post/applying-a-genetic-algorithm-to-the-travelling-salesman-problem/5
	cCs;||_||_||_||_t||�|_dS)N)�
generation�retain_rate�mutate_rate�population_size�int�
retain_len)�selfrrrr�r�6/Users/r631854/machine-learning/ga/tsp_solver/tspga.py�__init__?s
				zTSPGA.__init__cCs�|jd|_|ddgj|_dd�t|d�D�|_|j�|_tddd	g�|_	|j
d|d�}g|_x<t|j
�D]+}|j|�\}}|jj|�q�W|j|j
d
|_d|_|S)a
		fit the genetic algorithm on the tsp data

		Parameters
		----------
		tsp_data : DataFrame 
			contains the 'city' and its 'x', 'y' coordinate, note that 
			the column name must match or the code will break (ordering 
			of the column does not matter)
		r�x�ycSsi|]\}}||�qSrr)�.0�index�cityrrr
�
<dictcomp>Ys	zTSPGA.fit.<locals>.<dictcomp>r�	tour_info�dist�tour�T)�shape�city_num�values�cities�	enumerate�	city_dict�_compute_pairwise_distance�pairwise_distancerr�_generate_tours�generation_history�ranger�_evolve�append�	best_tour�	is_fitted)r�tsp_data�
population�_�generation_bestrrr
�fitGs
 		z	TSPGA.fitcCs�tj|j|jf�}xdt|jj�d�D]J\}}tjj|j||j|�}||||<|||<q4W|S)z{
		readable but not optimized way of computing and storing 
		the symmetric pairwise distance for between all city pairs
		�)	�np�zerosrrrr�linalg�normr)rr �i1�i2�distancerrr
rns
%$z TSPGA._compute_pairwise_distancecs�g}xCt�j�D]2}|jj�}tjj|�|j|�qW�fdd�|D�}�fdd�t||�D�}t	|�}|S)z�
		or in genetic algorithm terms, generate the populations.
		generate a new random tour with the size of the population_size
		and compute the distance for each tour
		cs"g|]}�jd|��qS)r)�_compute_tour_distance)r�t)rrr
�
<listcomp>�s	z)TSPGA._generate_tours.<locals>.<listcomp>cs(g|]\}}�j||��qSr)r)rrr)rrr
r7�s	)
r#rr�copyr.�random�shuffler%�zip�sorted)rrZtoursr*rZ
tours_distr)r)rr
r!{szTSPGA._generate_tourscCs�|j|d}|jd}|j||}|j||}xdt|�D]V\}}|j|}|d}	|j||	}
||j||
7}|	|krMPqMW|S)z�
		1. compute the total distance for each tour,
		note that tour stores the name of the city, thus you need to map it
		with the city_dict to access the pairwise distance.
		2. initialize the distance with the last city to the first city's distance
		rr)rrr r)rrZ
first_city�
last_indexZ	last_cityZ	tour_distrZ
city_indexrZ
next_indexZ	next_cityrrr
r5�s


zTSPGA._compute_tour_distancec	s|j�}|d�j�}dd�|D�}g}xDt|��jkr~�j|�}�j|�}|j|�q;W�fdd�|D�}�fdd�t||�D�}y0|j|�t	|�}|d�j�}Wnt
k
r|}YnX|d}||fS)zt
		core method that does the crossover, mutation to generate
		the possibly best children for the next generation
		NcSsg|]}|j�qSr)r)r�prrr
r7�s	z!TSPGA._evolve.<locals>.<listcomp>cs"g|]}�jd|��qS)r)r5)r�c)rrr
r7�s	cs(g|]\}}�j||��qSr)r)rrr)rrr
r7�s	r)r8r
�lenr�
_crossover�_mutater%r;�extendr<�
ValueError)	rr)Zpopulation_backup�parent�children�childZ
children_distZpopulation_childrenr+r)rr
r$�s&


z
TSPGA._evolvecCs�tjt|j�dd�\}}||||}}tjt|j�dd�\}}||krv||}}t||�}tj|||dd�}	||	}
tj|
d|�|||
|d�f}|S)z(randomly select two parent and mate them�kr-�invertTN)	r9�sampler#r
r�slicer.�in1d�r_)rrE�index1�index2�male�femaleZ	pos_startZpos_end�subset�booleanZnot_in_malerGrrr
rA�s$$

.zTSPGA._crossovercCsZ|jtj�krVtjt|j�dd�\}}||||||<||<|S)z[
		randomly swap the position of two cities if
		the the mutuate_rate's threshold is met
		rHr-)rr9rJr#r)rrGZswap1Zswap2rrr
rB�s$z
TSPGA._mutatecCs�|jstd�dd�|jD�}tjtdt|�d�|d�tjd�tjd�tj	d�tj
�tj�d	S)
zX
		convergence plot showing the decrease of each generation's 
		best tour's distance
		z,you have not fitted the algorithm using .fitcSsg|]}|j�qSr)r)r�grrr
r7�s	z*TSPGA.convergence_plot.<locals>.<listcomp>r�-zDistance Convergence PlotZ	IterationZDistanceN)r'rDr"�plt�plotr#r@�title�xlabel�ylabel�tight_layout�show)rrrrr
�convergence_plot�s	
&



zTSPGA.convergence_plotN)
�__name__�
__module__�__qualname__�__doc__rr,rr!r5r$rArBr]rrrr
rs6'
+r)r9�numpyr.�pandas�pd�matplotlib.pyplot�pyplotrV�collectionsr�	itertoolsr�objectrrrrr
�<module>s