Plotting Examples

Essential to have

>> %matplotlib inline

at the beginning, otherwise the plots will not be displayed

In [70]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as rand
In [71]:
# make up some fake data
nameList = ['James','Mary','John','Patricia','Robert','Jennifer','Michael','Elizabeth','William',
            'Linda','David','Barbara','Richard','Susan','Joseph','Margaret','Charles','Jessica','Christopher','Dorothy']
ageList = rand.randint(17, 70,size=20)
carList = ['Toyota Camry','Toyota Corolla','Nissan Altima','Honda Civic','Honda Accord',
           'Ford Focus','Hyundai Elantra','Ford Fusion','Chevrolet Cruze','Ford Focus',
           'Nissan Sentra','Chevrolet Malibu','Hyundai Sonata','Ford Focus','Honda Civic',
           'Ford Focus','Ford Focus','Ford Mustang','Volkswagen Jetta','Subaru Outback','Nissan Versa']
firstSpeedingTicket = rand.randint(2,size=20)
ticketTimeList = rand.randint(0,4,size=20)
#lis[lis.index(0)] = 'AM'

# put the fake data into a list of dictionaries
fakeTable = []
for i in range(20):
    tmpDict = {'name':nameList[i],'age':ageList[i],'ticket':firstSpeedingTicket[i],'ticketTime':ticketTimeList[i],'car':carList[i]}
    fakeTable.append(tmpDict)
    print tmpDict
{'car': 'Toyota Camry', 'age': 57, 'ticket': 1, 'name': 'James', 'ticketTime': 3}
{'car': 'Toyota Corolla', 'age': 48, 'ticket': 1, 'name': 'Mary', 'ticketTime': 1}
{'car': 'Nissan Altima', 'age': 67, 'ticket': 1, 'name': 'John', 'ticketTime': 0}
{'car': 'Honda Civic', 'age': 66, 'ticket': 1, 'name': 'Patricia', 'ticketTime': 0}
{'car': 'Honda Accord', 'age': 23, 'ticket': 0, 'name': 'Robert', 'ticketTime': 3}
{'car': 'Ford Focus', 'age': 46, 'ticket': 1, 'name': 'Jennifer', 'ticketTime': 2}
{'car': 'Hyundai Elantra', 'age': 47, 'ticket': 1, 'name': 'Michael', 'ticketTime': 1}
{'car': 'Ford Fusion', 'age': 41, 'ticket': 0, 'name': 'Elizabeth', 'ticketTime': 3}
{'car': 'Chevrolet Cruze', 'age': 63, 'ticket': 1, 'name': 'William', 'ticketTime': 2}
{'car': 'Ford Focus', 'age': 37, 'ticket': 1, 'name': 'Linda', 'ticketTime': 3}
{'car': 'Nissan Sentra', 'age': 36, 'ticket': 1, 'name': 'David', 'ticketTime': 1}
{'car': 'Chevrolet Malibu', 'age': 39, 'ticket': 1, 'name': 'Barbara', 'ticketTime': 1}
{'car': 'Hyundai Sonata', 'age': 63, 'ticket': 0, 'name': 'Richard', 'ticketTime': 2}
{'car': 'Ford Focus', 'age': 21, 'ticket': 0, 'name': 'Susa', 'ticketTime': 3}
{'car': 'Honda Civic', 'age': 35, 'ticket': 1, 'name': 'Joseph', 'ticketTime': 1}
{'car': 'Ford Focus', 'age': 20, 'ticket': 0, 'name': 'Margaret', 'ticketTime': 3}
{'car': 'Ford Focus', 'age': 47, 'ticket': 1, 'name': 'Charles', 'ticketTime': 3}
{'car': 'Ford Mustang', 'age': 56, 'ticket': 0, 'name': 'Jessica', 'ticketTime': 1}
{'car': 'Volkswagen Jetta', 'age': 35, 'ticket': 1, 'name': 'Christopher', 'ticketTime': 3}
{'car': 'Subaru Outback', 'age': 52, 'ticket': 0, 'name': 'Dorothy', 'ticketTime': 1}


lets plot that data!

  • histogram
  • bar plot
  • scatter plot
  • line plot
  • pie chart
  • other??

In [72]:
# first we need to get the pieces we want
# let's do this with reusable functions

def produce_list( table, column ):
    new_list = []
    for row in table:
        value = row[column]
        new_list.append( value ) # could alternatively use int(value)
    return new_list
In [162]:
# distribution of ages 

ages = produce_list(fakeTable, 'age')

bins = (max(ages)-min(ages))//5

plt.hist(ages,bins, facecolor='green',linewidth = 0.5,zorder=3)
         
plt.title('Distribution of Ages', 
          y=1.05, 
          fontsize=15)

ax1 = plt.gca()

ax1.set_xlabel('age (yrs)')

ax1.set_ylabel('number of people')

plt.axis([min(ages)-1, max(ages)+1, 0, 5])

plt.grid(True)

plt.show()
In [57]:
# also can visualize the spread of the ages with a boxplot
# shows min, max, and quartiles of the data (red line is median)

plt.boxplot(ages, notch=0, sym='+', vert=0, whis=1.5,widths=.5)

ax1= plt.gca()
ax1.xaxis.grid(True, linestyle='-', which='major', color='lightgrey',
               alpha=0.5)
ax1.set_axisbelow(True)
ax1.set_title('Spread of ages for speeding tickets')
ax1.set_xlabel('Age')
ax1.set_yticks([])

plt.show()
In [65]:
# cars
carModels = produce_list(fakeTable, 'car')

# get the make of each car model
allCarMakes = []
for c in carModels:
    c.lower()
    words = c.split(' ')
    allCarMakes.append(words[0])
# print allCarMakes

# count up the all car makes 
cars = list(set(allCarMakes))
makeCounts = dict()
for make in cars :
    num = allCarMakes.count(make)
    # print make,num
    makeCounts[make] = num
# print makeCounts

mylabels = makeCounts.keys() # make name
mynums = makeCounts.values() # num of those makes
mycolors = plt.cm.hsv(np.linspace(0,2,20)) # colors I like better than the default

# pie chart
plt.pie(mynums, 
        labels=mylabels, 
        colors = mycolors,
        autopct='%1.1f%%', # format for the percentage labels
        pctdistance=0.7 # distance from the center of the pct label
       ) 

plt.axis('equal') 
plt.title('Percentage of Car Makes', 
          y=1.09, 
          fontsize=15)

plt.show()
In [152]:
# cars with speeding tickets and when they got them

# want to plot time on x-axis and total speeding tickets on y-axis
time = produce_list(fakeTable, 'ticketTime')

labels = ['Morning', 'Afternoon', 'Evening', 'Odd']

data = [time.count(0), time.count(1), time.count(2), time.count(3)]

ind = np.arange(len(data))    # the x locations for the groups

width        # the width of the bars: can also be len(x) sequence

plt.bar(ind,data, width = 0.65, linewidth = 0.4, color='r',align = 'center',zorder=3)

plt.grid(True)

plt.xticks(ind, labels)

plt.yticks(np.arange(0, max(data)+2, 1))

ax1 = plt.gca()

ax1.set_ylabel("number of tickets", fontsize=14)

ax1.set_xlabel("time of day", fontsize=14)

ax1.set_title("Time received speeding tickets", fontsize=18)


plt.show()
In [ ]: