Essential to have
>> %matplotlib inline
at the beginning, otherwise the plots will not be displayed
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as rand
# make up some fake data
nameList = ['James','Mary','John','Patricia','Robert','Jennifer','Michael','Elizabeth','William',
'Linda','David','Barbara','Richard','Susan','Joseph','Margaret','Charles','Jessica','Christopher','Dorothy']
ageList = rand.randint(17, 70,size=20)
carList = ['Toyota Camry','Toyota Corolla','Nissan Altima','Honda Civic','Honda Accord',
'Ford Focus','Hyundai Elantra','Ford Fusion','Chevrolet Cruze','Ford Focus',
'Nissan Sentra','Chevrolet Malibu','Hyundai Sonata','Ford Focus','Honda Civic',
'Ford Focus','Ford Focus','Ford Mustang','Volkswagen Jetta','Subaru Outback','Nissan Versa']
firstSpeedingTicket = rand.randint(2,size=20)
ticketTimeList = rand.randint(0,4,size=20)
#lis[lis.index(0)] = 'AM'
# put the fake data into a list of dictionaries
fakeTable = []
for i in range(20):
tmpDict = {'name':nameList[i],'age':ageList[i],'ticket':firstSpeedingTicket[i],'ticketTime':ticketTimeList[i],'car':carList[i]}
fakeTable.append(tmpDict)
print tmpDict
# first we need to get the pieces we want
# let's do this with reusable functions
def produce_list( table, column ):
new_list = []
for row in table:
value = row[column]
new_list.append( value ) # could alternatively use int(value)
return new_list
# distribution of ages
ages = produce_list(fakeTable, 'age')
bins = (max(ages)-min(ages))//5
plt.hist(ages,bins, facecolor='green',linewidth = 0.5,zorder=3)
plt.title('Distribution of Ages',
y=1.05,
fontsize=15)
ax1 = plt.gca()
ax1.set_xlabel('age (yrs)')
ax1.set_ylabel('number of people')
plt.axis([min(ages)-1, max(ages)+1, 0, 5])
plt.grid(True)
plt.show()
# also can visualize the spread of the ages with a boxplot
# shows min, max, and quartiles of the data (red line is median)
plt.boxplot(ages, notch=0, sym='+', vert=0, whis=1.5,widths=.5)
ax1= plt.gca()
ax1.xaxis.grid(True, linestyle='-', which='major', color='lightgrey',
alpha=0.5)
ax1.set_axisbelow(True)
ax1.set_title('Spread of ages for speeding tickets')
ax1.set_xlabel('Age')
ax1.set_yticks([])
plt.show()
# cars
carModels = produce_list(fakeTable, 'car')
# get the make of each car model
allCarMakes = []
for c in carModels:
c.lower()
words = c.split(' ')
allCarMakes.append(words[0])
# print allCarMakes
# count up the all car makes
cars = list(set(allCarMakes))
makeCounts = dict()
for make in cars :
num = allCarMakes.count(make)
# print make,num
makeCounts[make] = num
# print makeCounts
mylabels = makeCounts.keys() # make name
mynums = makeCounts.values() # num of those makes
mycolors = plt.cm.hsv(np.linspace(0,2,20)) # colors I like better than the default
# pie chart
plt.pie(mynums,
labels=mylabels,
colors = mycolors,
autopct='%1.1f%%', # format for the percentage labels
pctdistance=0.7 # distance from the center of the pct label
)
plt.axis('equal')
plt.title('Percentage of Car Makes',
y=1.09,
fontsize=15)
plt.show()
# cars with speeding tickets and when they got them
# want to plot time on x-axis and total speeding tickets on y-axis
time = produce_list(fakeTable, 'ticketTime')
labels = ['Morning', 'Afternoon', 'Evening', 'Odd']
data = [time.count(0), time.count(1), time.count(2), time.count(3)]
ind = np.arange(len(data)) # the x locations for the groups
width # the width of the bars: can also be len(x) sequence
plt.bar(ind,data, width = 0.65, linewidth = 0.4, color='r',align = 'center',zorder=3)
plt.grid(True)
plt.xticks(ind, labels)
plt.yticks(np.arange(0, max(data)+2, 1))
ax1 = plt.gca()
ax1.set_ylabel("number of tickets", fontsize=14)
ax1.set_xlabel("time of day", fontsize=14)
ax1.set_title("Time received speeding tickets", fontsize=18)
plt.show()