# Code to run some analyses on the test data # Install rube according to http://www.stat.cmu.edu/~hseltman/rube/ . # Load the "rube" package (If any errors about missing packages are # seen, install those packages using the R menu item # Packages/InstallPackages then run "library(rube)" again. # (It's OK to ignore warnings about "built under R version".) library(rube) # Load the simulated data tbY = read.table("tbY.dat", header=TRUE) # times and test scores tbCov = read.table("tbCov.dat", header=TRUE) # covariates # Format data for WinBUGS # Note: use of as.numeric(factor()) allows non-numeric and/or # non-consecutive ids. dat = list(X = tbY$age, Y = tbY$score, subj = as.numeric(factor(tbY$id)), M = length(unique(tbY$id)), NN = nrow(tbY), male = tbCov$male, educ = tbCov$educ, SNP1 = tbCov$SNP1, SNP2 = tbCov$SNP2, SNP3 = tbCov$SNP3) # Test and summarize the model (by not providing data) rube("tbModel.txt", varList=list(RATE.VARS="male+educ+SNP1+SNP2+SNP3-1", MID.VARS="male+educ+SNP1+SNP2+SNP3-1"))
# Now also examine constants and data summary rube("tbModel.txt", data=dat, varList=list(RATE.VARS="male+educ+SNP1+SNP2+SNP3-1", MID.VARS="male+educ+SNP1+SNP2+SNP3-1"))
# A function for random initial values initF = function(n.chains, M) { rtn = vector("list", n.chains) for (i in 1:n.chains) { rtn[[i]] = list(yMinFac=pmax(0.01,rnorm(M,0.20,0.03)), yMaxFac=pmin(0.99,rnorm(M,0.90,0.03)), tMid = runif(M, 70, 90), Rate = rnorm(M, 5, 1), logvarY = rnorm(1, 0.5, 0.1), minAoverB0 = rnorm(1, 1.33, 0.2), minBeta0 = rnorm(1, 20, 1), maxAoverB0 = rnorm(1, 4, 0.2), maxBeta0 = rnorm(1, 20, 1), meantMid = rnorm(1, 80, 2), bmaletMid = rnorm(1, 0, 0.4), beductMid = rnorm(1, 0, 0.4), bSNP1tMid = rnorm(1, 0, 0.4), bSNP2tMid = rnorm(1, 0, 0.4), bSNP3tMid = rnorm(1, 0, 0.4), meanRate = rnorm(1, 3.8, 1), bmaleRate = rnorm(1, 0, 0.04), beducRate = rnorm(1, 0, 0.04), bSNP1Rate = rnorm(1, 0, 0.04), bSNP2Rate = rnorm(1, 0, 0.04), bSNP3Rate = rnorm(1, 0, 0.04), logvartMid = rnorm(1, 2, 0.2), logvarRate = rnorm(1, -1.5, 0.2)) } return(rtn) } # Generate some reasonable, but random, initial values i0 = initF(n.chains=3, M=dat$M) # Check that all initial values are present, and none are flagged # as unreasonable. rube("tbModel.txt", data=dat, inits=i0, varList=list(RATE.VARS="male+educ+SNP1+SNP2+SNP3-1", MID.VARS="male+educ+SNP1+SNP2+SNP3-1"))
# Run a small run (1000 iterations) with no discarded (burnin) values # (This is the first time WinBUGS actually runs.) PTS=names(i0[[1]]) m0 = rube("tbModel.txt", data=dat, inits=i0, n.chains=3, n.sim=1000, n.burnin=0, bin=100, parameters.to.save=PTS, varList=list(RATE.VARS="male+educ+SNP1+SNP2+SNP3-1", MID.VARS="male+educ+SNP1+SNP2+SNP3-1")) p3(m0) # view results
This screen view shows the traces of the three chains for one parameters. Along the bottom are buttons for selecting other parameters and views. See the rube documentation for more details on using the p3() function.
The left panel or this screen view shows the posterior 95% intervals for each of
the five covariate effects on trajectory mid-point for all
three chains (in three colors). The similarity of the three chains is an indicator
of good MCMC mixing. The middle panel shows how many lags are statistically
significant for autocorrelation for each parameter and chain (again, color=chain).
The short segments indicate no problem
with autocorrelation. The right panel shows the Gelman R-hat for each
parameter, and the low value and green color indicates no mixing problems. An
orange or red color is used in this panel to flag an elevated value.
The use of "limit" in the rube summary() command restricts the output
for parameters with many values, such as the four individualized
trajectory parameters.
The main things to look at are the mean, 2.5% and 97.5% columns for the
b***tMid and b***Rate rows. E.g.,
for bSNP3Rate the posterior mean is -0.611 and the 95% posterior interval
is [-0.778, -0.454], so we are quite confident that the true effect
of SNP3 on the Rate parameter is non-zero with a best estimate of -0.611,
corresponding to a more rapid rate of decline.
# Choose run parameters that will give good results
m1 = rube("tbModel.txt", data=dat, inits=i0, n.chains=3, n.thin=10,
n.sim=5500, n.burnin=500, bin=100, parameters.to.save=PTS,
varList=list(RATE.VARS="male+educ+SNP1+SNP2+SNP3-1",
MID.VARS="male+educ+SNP1+SNP2+SNP3-1"))
p3(m1)
# Examine results
summary(m1, limit=3)
# Visualization of trajectories
if (!exists("plotTraj")) source("plotTraj.R")
# Important: plotTraj() is an interactive plot function.
# You will NOT BE ABLE TO ENTER R COMMANDS UNTIL YOU CLICK
# AT THE BOTTOM OF THE PLOT.
# View individual (or overlaid) trajectories
plotTraj(m1,dat)
# Save a certain trajectory
plotTraj(m1, dat, subset=7, noInstructions=TRUE)
dev.copy(pdf, "TrajNumber7.pdf"); dev.off()
TrajNumber7.pdf
# View individual residual plots
plotTraj(m1, dat, res=TRUE, main="Residuals for ID =")
plotTraj(m1, dat, res=TRUE, main="Residuals for ID =",
subset=7, noInstructions=TRUE)
dev.copy(pdf, "ResNumber7.pdf"); dev.off()
ResNumber7.pdf
# Full residual vs. age plot
res=plotTraj(m1, dat, res=TRUE, main="Residual vs. Age",
nostop=TRUE, noInstructions=TRUE, ylim=c(-16,16))
dev.copy(pdf, "ResVsAge.pdf"); dev.off()
ResVsAge.pdf
# Residual vs. fit
plot(dat$Y, res, xlab="fitted values", ylab="residuals",
main="Residual vs. Fit")
abline(h=0)
dev.copy(pdf, "ResVsFit.pdf"); dev.off()
ResVsFit.pdf
# Residual Quantile Normal Plot
# (demonstrates heavy tails and need for T error distribution)
if (!exists("qqn"))
source("http://www.stat.cmu.edu/~hseltman/files/qqn.R")
qqn(res)
dev.copy(pdf, "ResQN.pdf"); dev.off()
ResQN.pdf
# Example of a prior sensitivity check (changing the hyperpriors
# for the rate covariates precisions from 0.01 to 0.1)
m2 = rube("tbModel.txt", data=dat, inits=i0, n.chains=3, n.thin=10,
n.sim=5500, n.burnin=500, bin=100, parameters.to.save=PTS,
subs=list(PREC.B.RATE=0.1),
varList=list(RATE.VARS="male+educ+SNP1+SNP2+SNP3-1",
MID.VARS="male+educ+SNP1+SNP2+SNP3-1"))
p3(m2)
summary(m2, limit=0) # matches summary(m1, limit=0) well
Please direct any questions to Howard Seltman, Carnegie Mellon University, Department of Statistics,