Friday, October 15, 2010

US Unemployment Rate since 1948

This is a quick response to a discussion on the Visual Analytics LinkedIn group regarding a Wall Street Journal (WSJ) Graph of 'Historical US Unemployment'.

The WSJ chose a heatmap where ordinarily a simple line chart would suffice and discussion revolved around whether the heatmap was successful in providing information to the reader anymore than a line chart would.

Consequently, I put together a quick line plot in python + matplotlib using the following data sources:
The following graph is a little different to the WSJ one with respect to:
  • WSJ chose colour bands for each percentage point, I have chosen 0-4 as good,  4-8 ok, 8+ a bit high, loosely based on the colours of the WSJ example. I have no idea if these values are considered good or not by economists.
  • Periods of recession are represented by vertical bands, as opposed to white dots in the WSJ example.
  • For readability, the dates have been labeled every 4th year (as per Bureau) - it would vary depending on zoom level if it was implemented as a dynamic chart for the web.

Graph of US Unemployment rates 1948 - 2009


Graph of Australian and US Unemployment Rates 1948 - 2010
Note: Australian data only available from late 70s, source: Australian Bureau of Statistics - Labour force data




Code:
import datetime
import pylab
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter as ff
from numpy import ma,logical_or, arange

rawdata = []
ymd = []

k = []
mdata = []

def chomp(string):
    return string.rstrip('\r\n')

f = open('SeriesReport20101014161301.csv', 'rb')
rawdata = f.readlines()
f.close()
for i in xrange(1,len(rawdata)): #skip header
    rawdata[i] = chomp(rawdata[i])
    yrates = rawdata[i].split(',')
    if i == 1:
        startyear = int(yrates[0])
    elif i == (len(rawdata)-1):
        endyear = int(yrates[0])+1

    for q in xrange(1,13):
        mdata.append(yrates[q])


for x in xrange(startyear,endyear):
    for g in xrange(1,13):
        k.append(datetime.date(x, g, 1))

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot_date(k, mdata, '-',label='unemployment rate', color='#571d1c', linewidth=1)
ax.set_xlim(datetime.date(1947, 12, 1),datetime.date(2010, 1, 1))
ax.xaxis.set_major_locator(mdates.YearLocator(4))
ax.xaxis.set_minor_locator(mdates.MonthLocator(1))

# acceptable? rate
p = plt.axhspan(0, 4, facecolor='green', alpha=0.05) 
#getting worse
p = plt.axhspan(4, 8, facecolor='#aaaaff', alpha=0.05) 
# bad?
p = plt.axhspan(8, 12, facecolor='red', alpha=0.05) 
#ax.xaxis.grid(color='white', linestyle='-', linewidth=0.5)

ax.set_ylim(0,12)
ax.yaxis.set_major_locator(pylab.MultipleLocator(1))
ax.yaxis.set_minor_locator(pylab.MultipleLocator(0.1))
#ax.yaxis.grid(color='white', linestyle='-', linewidth=0.5)

#recession http://en.wikipedia.org/wiki/List_of_recessions_in_the_United_States
plt.axvspan(datetime.date(1948, 11, 1), datetime.date(1949, 10, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1953, 7, 1), datetime.date(1954, 5, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1957, 8, 1), datetime.date(1958, 4, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1960, 4, 1), datetime.date(1961, 2, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1969, 12, 1), datetime.date(1970, 11, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1973, 11, 1), datetime.date(1975, 3, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1980, 1, 1), datetime.date(1980, 7, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1981, 7, 1), datetime.date(1982, 11, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(1990, 7, 1), datetime.date(1991, 3, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(2000, 11, 1), datetime.date(2001, 10, 1), facecolor='orange', alpha=0.1)
plt.axvspan(datetime.date(2007, 11, 1), datetime.date(2009, 6, 1), facecolor='orange', alpha=0.1)


labels = ax.get_xticklabels()
for label in labels:
    label.set_rotation(30)
plt.legend(loc='best')
plt.xlabel('Date' )
plt.ylabel('% Rate' )
plt.title('U.S. Unemployment rate for the period 1948 - 2009')
plt.show()
 

Sunday, October 3, 2010

Plotting time vs date in R

Having done the plot with python+matplotlib, thought I would have a go reproducing it in R, using only the builtin "plot". So, just to recap - this is a plot of the sun times (rise/set,twilight and blinding) as generated by the great python library pyephem. The R code reads in a csv file as output from a modified version of the python code used in my original post.

For completeness, the csv generation code is below:

import ephem
import datetime, math
import pylab
 
place = ephem.city('Melbourne')
 
start_date = datetime.datetime(2009,12,1,12)
end_date = datetime.datetime(2011, 1, 31,12)
 
base_offset = '0'
twilight_offset = '-6:00:0.0' # "twilight" = centre of the sun is -6deg ideal horizon
eyeline_offset = '15:34:0.0' # arbitrary +15deg
 
sun = ephem.Sun(place)
 
dates = []
sunrise = []
sunset = []
firstlight = []
lastlight = []
firsteyel = []
lasteyel = []

numdays = (end_date - start_date).days
dates = [start_date + datetime.timedelta(days=i) for i in xrange(numdays+1)] 
dates.sort()

def dt2m(dt):
    return (dt.hour*60) + dt.minute

def m2hm(x):
    h = int(x/60)
    m = int(x%60)
    return '%(h)02d%(m)02d' % {'h':h,'m':m}

sunrise = map(lambda x:dt2m(ephem.localtime(place.next_rising(sun,start=x))),dates)
sunset = map(lambda x:dt2m(ephem.localtime(place.next_setting(sun,start=x))),dates)

place.horizon = twilight_offset
firstlight = map(lambda x:dt2m(ephem.localtime(place.next_rising(sun,start=x))),dates)
lastlight = map(lambda x:dt2m(ephem.localtime(place.next_setting(sun,start=x))),dates)

place.horizon = eyeline_offset
firsteyel = map(lambda x:dt2m(ephem.localtime(place.next_rising(sun,start=x))),dates)
lasteyel = map(lambda x:dt2m(ephem.localtime(place.next_setting(sun,start=x))),dates)

writer = open("suntimes.csv", "w")
writer.write("date,firstlight,sunrise,firsteyel,lasteyel,sunset,lastlight\n")
for n in xrange(numdays):
 writer.write(str(dates[n]) +","+  (m2hm(firstlight[n]))+"," \
 + m2hm(sunrise[n])+"," + m2hm(firsteyel[n])+"," + m2hm(lasteyel[n])+"," \
 + m2hm(sunset[n])+"," + m2hm(lastlight[n])+"\n")
writer.close()
 
 
I chose to leave the format of the times, as they resemble the format of the Geoscience Australia times.

This is how the csv file looks:
date,firstlight,sunrise,firsteyel,lasteyel,sunset,lastlight
2009-12-01 00:00:00,0518,0551,0719,1858,2026,2059
2009-12-02 00:00:00,0518,0551,0719,1859,2027,2100
2009-12-03 00:00:00,0518,0551,0719,1859,2028,2101
2009-12-04 00:00:00,0518,0551,0719,1900,2029,2102
2009-12-05 00:00:00,0518,0550,0719,1901,2030,2103


And here's the resulting graph:


Here's the R code:

suntimes <- read.csv("suntimes.csv")
 
suntimes$date <- sub(' 00:00:00$', '', suntimes$date) #strip off trailing 0
suntimes$date <- as.Date(suntimes$date,"%Y-%m-%d") # make real date so we can axis.date
suntimes$firstlight <- sub('^([[:digit:]]{3})$', '0\\1', suntimes$firstlight) #pad 0
suntimes$sunrise <- sub('^([[:digit:]]{3})$', '0\\1', suntimes$sunrise)
suntimes$firsteyel <- sub('^([[:digit:]]{3})$', '0\\1', suntimes$firsteyel)
suntimes$lasteyel <- sub('^([[:digit:]]{3})$', '0\\1', suntimes$lasteyel)
suntimes$sunset <- sub('^([[:digit:]]{3})$', '0\\1', suntimes$sunset)
suntimes$lastlight <- sub('^([[:digit:]]{3})$', '0\\1', suntimes$lastlight)
  
#calc as minutes from midnight
suntimes$firstlighttimes <-  as.integer(substr(suntimes$firstlight,1,2))*60 + as.integer(substr(suntimes$firstlight,3,4))
suntimes$sunrisetimes <-  as.integer(substr(suntimes$sunrise,1,2))*60 + as.integer(substr(suntimes$sunrise,3,4))
suntimes$firsteyeltimes <-  as.integer(substr(suntimes$firsteyel,1,2))*60 + as.integer(substr(suntimes$firsteyel,3,4))
suntimes$lasteyeltimes <-  as.integer(substr(suntimes$lasteyel,1,2))*60 + as.integer(substr(suntimes$lasteyel,3,4))
suntimes$sunsettimes <-  as.integer(substr(suntimes$sunset,1,2))*60 + as.integer(substr(suntimes$sunset,3,4))
suntimes$lastlighttimes <-  as.integer(substr(suntimes$lastlight,1,2))*60 + as.integer(substr(suntimes$lastlight,3,4))
 
plot(suntimes$date,suntimes$firstlighttimes,type="l", lwd=1,col='blue',ylim=c(240,1330), axes= F, xlab= "Dates", ylab= "Time")
# make grid first, then overlay points
abline(h=(seq(from=240,to=1330, by=20)), lwd =0.1, lty="dotted", col='#fafafa')
abline(h=(seq(from=240,to=1330, by=60)), lwd =0.1, lty="dotted", col='#fceae4')
abline(v=(seq(from=min(suntimes$date),to=max(suntimes$date),"month")), lwd =0.1, lty="dotted", col='#fceae4')

points(suntimes$date,suntimes$firstlighttimes,type='l', lwd=1,col='blue')
#points(suntimes$date,suntimes$sunrisetimes,type='p',pch=20, cex = 0.75, lwd=0.5,col='#FF0000')
points(suntimes$date,suntimes$sunrisetimes,type='l', lwd=2,col='#FF0000')
points(suntimes$date,suntimes$firsteyeltimes,type='l', lwd=1,col='orange')
points(suntimes$date,suntimes$lasteyeltimes,type='l', lwd=1,col='orange')
#points(suntimes$date,suntimes$sunsettimes,type='p',pch=20, cex = 0.75, lwd=0.75, col='#FF0000')
points(suntimes$date,suntimes$sunsettimes,type='l', lwd=2, col='#FF0000')
points(suntimes$date,suntimes$lastlighttimes,type='l', lwd=1,col='blue')
 
#customise X Axis
axis.Date(side=1, at=seq(from=min(suntimes$date),to=max(suntimes$date),"days"),col.ticks='gray', format="%b", labels="")
axis.Date(side=1, at=seq(from=min(suntimes$date),to=max(suntimes$date),"month"),  col.ticks='red', format="%b\n%Y" )
 
#customise Y Axis
yaxisMajorTicks.hours <- seq(from=240,to=1330, by=60) # whole hours from 4am to 10pm
yaxisMajorTicks.hournames <- as.character(yaxisMajorTicks.hours/60) #eg 240/60=4am
yaxisMajorTicks.hournames <- sub('^(.+)$', '\\1:00', yaxisMajorTicks.hournames) #append ":00"
 
yaxisMinorTicks.tens <- seq(from=240,to=1330, by=10)
yaxisMinorTicks.tensnames <- as.character(yaxisMinorTicks.tens)
yaxisMinorTicks.tensnames <- sub('^(.+)$', "", yaxisMinorTicks.tensnames)
# draw major over minor ticks
axis(side=2, at=yaxisMinorTicks.tens, col.ticks='gray',labels=yaxisMinorTicks.tensnames)
axis(side=2, at=yaxisMajorTicks.hours, col.ticks='red',labels=yaxisMajorTicks.hournames, las=2)
box()
#legend  max(yaxisMajorTicks.hours),c(1,2,1,2,1)pch=c(NA,20,NA,20,NA)
legend("right","center", c("firstlight","sunrise","in eyes","sunset","lastlight"), cex=0.75, 
   col=c("blue","red","orange","red","blue"), lty=1, lwd=2, bty="o", bg='#ffffff');

 

Friday, October 1, 2010

Plotting time vs date in python and matplotlib

Having to commute to the city each day and frequently getting the blinding sun, I thought as a fun exercise, I could plot the sunrise/sets and thus estimate with some accuracy when the sun is most blinding (using a purely subjective (which seems to fit my anecdotal observations) value for the degrees above the horizon).

The essentials (using python 2.6) are pyephem (astronomical library), numpy (scientific library), and matplotlib (2D graphics and charting library).

Calculating the times is very easy using pyephem, set your observer object to the place you want and off you go, calling next_rising or next_setting, they are returned as datetime objects when wrapped in ephem's "localtime" function (otherwise they're custom dates).

The hard part? plotting times on the Y axis using matplotlib ... if you do it with datetime objects. I gave up after trying a few different ways, and resorted to a very easy workaround.

Plot the times as integer minutes : 0 being midnight, 60 = 1am ... which is 24hr * 60. Then format the Y axis as Hour:Minute using a custom formatter:
from matplotlib.ticker import FuncFormatter as ff


def m2hm(x, i):
    h = int(x/60)
    m = int(x%60)
    return '%(h)02d:%(m)02d' % {'h':h,'m':m}

ax.yaxis.set_major_formatter(ff(m2hm))

To get sensible tick marks, I set the major ticks to an hour
ax.yaxis.set_major_locator(pylab.MultipleLocator(60))
The minor are set to 15 minutes though I left off showing the labels for the Y minor as it clutters the graph, but left it commented in the code.







import ephem
import datetime, math
import pylab
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter as ff

place = ephem.city('Melbourne')

start_date = datetime.datetime(2009,12,1,12) # yyyy,m,d,h - midday is best
end_date = datetime.datetime(2011, 1, 31,12) # midday Jan 31st, 2011

twilight_offset = '-6:00:0.0' # "twilight" = centre of the sun is -6deg ideal horizon
eyeline_offset = '15:00:0.0' # arbitrary +15deg

sun = ephem.Sun(place)

dates = []
sunrise = []
sunset = []
firstlight = []
lastlight = []
firsteyel = []
lasteyel = []

numdays = (end_date - start_date).days
dates = [start_date + datetime.timedelta(days=i) for i in xrange(numdays+1)] 
dates.sort()

def dt2m(dt):
    return (dt.hour*60) + dt.minute

def m2hm(x, i):
    h = int(x/60)
    m = int(x%60)
    return '%(h)02d:%(m)02d' % {'h':h,'m':m}

sunrise = map(lambda x:dt2m(ephem.localtime(place.next_rising(sun,start=x))),dates)
sunset = map(lambda x:dt2m(ephem.localtime(place.next_setting(sun,start=x))),dates)

place.horizon = twilight_offset
firstlight = map(lambda x:dt2m(ephem.localtime(place.next_rising(sun,start=x))),dates)
lastlight = map(lambda x:dt2m(ephem.localtime(place.next_setting(sun,start=x))),dates)

place.horizon = eyeline_offset
firsteyel = map(lambda x:dt2m(ephem.localtime(place.next_rising(sun,start=x))),dates)
lasteyel = map(lambda x:dt2m(ephem.localtime(place.next_setting(sun,start=x))),dates)

fig = plt.figure()

ax = fig.add_subplot(111)
ax.plot_date(dates, firstlight, ':',label=' First light (' + twilight_offset + ')', color='#06544e')
ax.plot_date(dates, sunrise, linestyle='-',label=' Sunrise', color='red')
ax.plot_date(dates, firsteyel, linestyle='-', label=' In eyes ('+ eyeline_offset + ')', color='orange')
ax.plot_date(dates, lasteyel, linestyle='-', color='orange')
ax.plot_date(dates, sunset, linestyle='-', label=' Sunset', color='red')
ax.plot_date(dates, lastlight, ':',label=' Last light (' + twilight_offset + ')', color='blue')

ax.set_xlim([dates[0],dates[len(dates)-1]])
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
#ax.xaxis.grid(False)
ax.xaxis.grid(color='#ffcf06', linestyle=':', linewidth=0.5)

ax.set_ylim(240,1330) #4am to 10pm
ax.yaxis.set_major_locator(pylab.MultipleLocator(60))
ax.yaxis.set_minor_locator(pylab.MultipleLocator(15))
ax.yaxis.set_major_formatter(ff(m2hm))
#ax.yaxis.set_minor_formatter(ff(m2hm))
ax.yaxis.grid(color='r', linestyle=':', linewidth=0.5)

labels = ax.get_xticklabels()
for label in labels:
    label.set_rotation(30)
plt.legend(loc='best')
plt.xlabel('Date' )
plt.ylabel('Hour (24hr)' )
plt.title("Sun times\n" + str(start_date) + " - " + str(end_date) + "\n (localised for " + place.name + ')')
plt.show()