First generate some pretend data (this will look familiar)
set.seed(3)
true.curve = function(x) {sin(x)*cos(5*x)}
ylim = c(-2,2)
n = 100
x = sort(rnorm(n,1.5,.5))
y = true.curve(x)+rnorm(length(x),0,0.15)
plot(x,y,xlab="x",ylab=expression(f(x)+epsilon),ylim=ylim)
curve(true.curve(x),col="grey",add=TRUE)
data = data.frame(x=x,y=y)
We can fit splines with smooth.spline
.
spar
(actually specifies a monotone function of it)x
and y
arguments, or as a data frame with x
and y
columnsWe can predict new values with predict
.
?predict.smooth.spline
x
values as x
argumentx
and y
elements (sorted)Very low \(\lambda\):
fit_rough = smooth.spline(data,spar = 0)
eval.grid = seq(from=min(data$x),to=max(data$x),length.out=1000)
plot(data, ylim=ylim)
lines(predict(fit_rough, x=eval.grid))
Very high \(\lambda\):
fit_verysmooth = smooth.spline(data,spar=2)
plot(data, ylim=ylim)
lines(predict(fit_verysmooth, x=eval.grid))
Slightly lower, but still very high \(\lambda\):
fit_smooth = smooth.spline(data,spar=1)
plot(data, ylim=ylim)
lines(predict(fit_smooth, x=eval.grid))
Cross validated \(\lambda\):
fit_cv = smooth.spline(data)
plot(data, ylim=ylim)
lines(predict(fit_cv, x=eval.grid))
First, we load data from Yahoo finance
require(pdfetch)
sp <- pdfetch_YAHOO("SPY", fields="adjclose",
from=as.Date("1993-02-09"), to=as.Date("2017-02-13"))
load('09.Rdata')
# Compute log returns
sp <- diff(log(sp))
# need to drop the initial NA which makes difficulties later
sp <- sp[-1]
#All but the last entry
sp.today <- head(sp,-1)
#All but the first entry
sp.tomorrow <- tail(sp,-1)
#Plot tomorrow versus today
plot(as.numeric(sp.today),as.numeric(sp.tomorrow),pch=20,col=rgb(0,0,0,.3))
#Add a nice horizontal line
abline(h=0,lty=3)
#Fit a linear model, see the coefficients
sp.linear = lm(sp.tomorrow ~ sp.today)
coefficients(sp.linear)
## (Intercept) sp.today
## 0.0003685476 -0.0610859914
abline(sp.linear,col='grey',lwd=2)
We can fit a spline instead of a line:
#Fit a spline
sp.spline <- smooth.spline(x=sp.today,y=sp.tomorrow,cv=TRUE)
sp.spline
## Call:
## smooth.spline(x = sp.today, y = sp.tomorrow, cv = TRUE)
##
## Smoothing Parameter spar= 1.319928 lambda= 0.008680907 (12 iterations)
## Equivalent Degrees of Freedom (Df): 6.464913
## Penalized Criterion: 0.8201753
## PRESS: 0.0001372498
sp.spline$lambda
## [1] 0.008680907
#Plot the spline (and the other stuff)
plot(as.numeric(sp.today),as.numeric(sp.tomorrow),pch=20,col=rgb(0,0,0,.3),xlab="Today's log-return", ylab="Tomorrow's log-return")
abline(h=0,lty=3)
abline(sp.linear,col='grey',lwd=2)
lines(sp.spline,type='l',lwd=2,col='blue') # The spline
What happens if we play around with the \(\lambda\) parameter:
plot(as.numeric(sp.today),as.numeric(sp.tomorrow),pch=20,col=rgb(0,0,0,.3),xlab="Today's log-return", ylab="Tomorrow's log-return")
abline(sp.linear,col="grey")
lines(sp.spline)
lines(smooth.spline(sp.today,sp.tomorrow,spar=1.5),col="blue")
lines(smooth.spline(sp.today,sp.tomorrow,spar=2),col="blue",lty=2)
lines(smooth.spline(sp.today,sp.tomorrow,spar=1.1),col="red")
lines(smooth.spline(sp.today,sp.tomorrow,spar=0.5),col="red",lty=2)
We notice that the spline is steeper on the left than on the right! Could this be sampling error? Let’s find bootstrap error bounds on the fit!
First, we build a resampling function to get bootstrap data sets:
# A data frame to hold our data. Makes it easier to sample rows
sp.frame <- data.frame(today=sp.today,tomorrow=sp.tomorrow)
# A function to sample rows with replacement
sp.resampler <- function() {
n <- nrow(sp.frame)
resample.rows <- sample(1:n,size=n,replace=TRUE)
return(sp.frame[resample.rows,])
}
We set up a function to evaluate predictions on an even grid
# Set up a grid of evenly-spaced points on which to evaluate the spline
grid.300 <- seq(from=min(sp.today),to=max(sp.today),length.out=300)
sp.spline.estimator <- function(data,eval.grid=grid.300) {
# Fit spline to data, with cross-validation to pick lambda
fit <- smooth.spline(x=data[,1],y=data[,2],cv=TRUE)
# Do the prediction on the grid and return the predicted values
return(predict(fit,x=eval.grid)$y) # We only want the predicted values
}
A function to draw \(B\) bootstrap samples and compute pointwise quantiles:
sp.spline.cis <- function(B,alpha,eval.grid=grid.300) {
spline.main <- sp.spline.estimator(sp.frame, eval.grid=eval.grid)
# Draw B boottrap samples, fit the spline to each
# Result has length(eval.grid) rows and B columns
spline.boots <- replicate(B, sp.spline.estimator(sp.resampler(),eval.grid=eval.grid))
cis.lower <- 2*spline.main - apply(spline.boots,1,quantile,probs=1-alpha/2)
cis.upper <- 2*spline.main - apply(spline.boots,1,quantile,probs=alpha/2)
return(list(main.curve=spline.main, lower.ci=cis.lower, upper.ci=cis.upper, x=eval.grid))
}
Now we run it all together!
#Get the confidence intervals
sp.cis <- sp.spline.cis(B=1000,alpha=0.05)
#Plot the confidence intervals
plot(as.vector(sp.today),as.vector(sp.tomorrow),xlab="Today's log-return",
ylab="Tomorrow's log-return",pch=16,cex=0.5,col="grey")
abline(sp.linear,col="darkgrey")
lines(x=sp.cis$x,y=sp.cis$main.curve,lwd=2)
lines(x=sp.cis$x,y=sp.cis$lower.ci,col='red')
lines(x=sp.cis$x,y=sp.cis$upper.ci)