#  Code initially written by Ryan Sieberg.

######################################################################
####################  Old from 1.4, now restricted and in grey #######
######################################################################


#  First set up a jpeg graphics device in which we will print the image.
jpeg("hurshR.jpg")

#  Read data from CSV.
hursh<-read.csv("hursh.csv")
#  hursh contains two named columns, 'diameter' and 'velocity'.

#  The 'par' commands change graphical parameters.  cex stands for
#  "character expansion" and determines the size of text, in this
#  case, cex.lab changes the size of the label.
par(cex.lab=2)
#  cex.axis changes the size of the axis font.
par(cex.axis=1.75)
#  mar changes the margins of the graph area.
par(mar=(c(5,4.7,4,2)+.1))



#  First restric the data to those neurons with diameter between 5 and 15.
min.diameter <- 5
max.diameter <- 15
withinRange <- hursh$diameter > min.diameter & hursh$diameter < max.diameter
hursh <- hursh[withinRange, ]


#  Create a linear model for the hursh data.
lmhursh<-lm(hursh$velocity ~ hursh$diameter)

# Plot the data.
plot(hursh$diameter,
     hursh$velocity,
     main="",
     axes=FALSE,
     xlab="Diameter (microns)",
     ylab="  Velocity (m/s)",
     xlim=c(0,20),
     ylim=c(0,120))

axis(1,pos=0)
axis(2,pos=0)

#  Add a line.  The linear model object "lmhursh" contains a list
#  called "coef", which contains the coefficients of the model.  The
#  first entry is the intercept, and the second is the coefficient for
#  velocity.  They are stored as strings, so we convert them to
#  numerics before using them in abline.
intercept <- as.numeric(lmhursh$coef[1])
slope <- as.numeric(lmhursh$coef[2])
abline(intercept, slope, untf = "FALSE")



######################################################################
######################### New for 12.2 ###############################
######################################################################
#  We've now added the points and line, as in figure 1.4, but this
#  time on a restricted portion of the data, and this time in grey.
#  Now we add arrows showing how the regresion line gives us y-bar
#  from x-bar, and y-hat from an x.

#  Choose an x
x.example <- hursh$diameter[24]
y.example <- hursh$velocity[24]
y.example.fit <- intercept + slope * x.example
x.bar <- mean(hursh$diameter)
y.bar <- mean(hursh$velocity)

#  Set up arrows so they end at the y-axis.
arrow.size <- 0.23
arrow.start <- min.diameter - arrow.size

#  Plot line from x to regression line to y-hat.
arrows(arrow.start, y.example.fit, x.eample, y.example.fit, lty = 2, code = 1)
segments(x, 0, x, y.example.fit, lty = 3)

#  And one from x to the actual observation to the actual y.  The
#  distance from the regression line here is the residual.
arrows(arrow.start, y.example, x.example, y.example, lty = 2, code = 1)
segments(x, 0, x, y.example, lty = 3)

#  Plot line from x-bar to regression line to y-bar.
arrows(arrow.start, y.bar, x.bar, y.bar)
segments(x.bar, 0, x.bar, y.bar, lty = 3)

#  Set axes and close.
#  First add labels for y and y-hat on y-axis, and for x on x-axis.
y.label = expression(y[i])
x.label = expression(x[i])
y.hat.label = expression(hat(y)[i])
axis(side = 2, at = c(y.example, y.example.fit),
     labels = c(y.label, y.hat.label), las = 2, cex.axis = 1.3, tick = F)
axis(side = 1, at = x.example, labels = x.label, cex.axis = 1.3)

#  Then add labels for x-bar and y-bar on their axes.
axis(side = 2, at = y.bar, labels = expression(bar(y)),
     las = 2, cex.axis = 1.3, tick = F)
axis(side = 1, at = x.bar, labels = expression(bar(x)),
     cex.axis = 1.3)


#  With our figue complete, close the connection to the PNG.
dev.off()