Drug effect example

(Continued)

# Simulate from model, supposing 60 subjects in each group 
set.seed(0)
n = 60 
mu.drug = 2
mu.nodrug = runif(n, min=0, max=1)
x.drug = 100*rexp(n, rate=1/mu.drug) 
x.nodrug = 100*rexp(n, rate=1/mu.nodrug)

(Continued)

# Find the range of all the measurements together, and define breaks
x.range = range(c(x.nodrug,x.drug))
breaks = seq(min(x.range),max(x.range),length=20)
# Produce hist of the non drug measurements, then drug measurements on top
hist(x.nodrug, breaks=breaks, probability=TRUE, xlim=x.range, 
     col="lightgray", xlab="Percentage reduction in tumor size", 
     main="Comparison of tumor reduction")
# Plot a histogram of the drug measurements, on top
hist(x.drug, breaks=breaks, probability=TRUE, col=rgb(1,0,0,0.2), add=TRUE) 
# Draw estimated densities on top, for each dist
lines(density(x.nodrug), lwd=3, col=1)
lines(density(x.drug), lwd=3, col=2)
legend("topright", legend=c("No drug","Drug"), lty=1, lwd=3, col=1:2)

Repetition and reproducibility

Iteration and simulation (and functions): good friends

Code sketch

Consider the code below for a generic simulation. (Think about how you would frame this for the drug effect example)

# Function to do one simulation run
one.sim = function(param1, param2=value2, param3=value3) {
  # Possibly intricate simulation code goes here
}

# Function to do repeated simulation runs
rep.sim = function(nreps, param1, param2=value2, param3=value3, seed=NULL) {
  # Set the seed, if we need to
  (if !is.null(seed)) set.seed(seed)
  
  # Run the simulation over and over
  sim.objs = vector(length=nreps, mode="list")
  for (r in 1:nreps) {
    sim.objs[r] = one.sim(param1, param2, param3)
  }
  
  # Aggregate the results somehow, and then return something
}

Saving results

Sometimes simulations take a long time to run, and we want to save intermediate or final output, for quick reference later

There two different ways of saving things from R (there are more than two, but here are two useful ones):

saveRDS(my.mat, file="my.matrix.rds")
save(mat.x, mat.y, list.z, file="my.objects.rdata")

Note: there is a big difference between how these two treat variable names

Loading results

Corresponding to the two different ways of saving, we have two ways of loading things into R:

ls() # Show all the variables in our workspace, none so far
##  [1] "breaks"     "file.name"  "file.names" "mu.drug"    "mu.nodrug" 
##  [6] "n"          "wd"         "x.drug"     "x.nodrug"   "x.range"
my.new.mat = readRDS("my.matrix.rds")
dim(my.new.mat)
## [1] 100 200
my.new.mat[1:5,1:5]
##            [,1]       [,2]       [,3]       [,4]       [,5]
## [1,] -0.5066503  0.8957993 -1.6550584 -0.7274510  0.4704499
## [2,]  0.4542122 -0.5653446  1.4971748  1.0012207 -0.7367822
## [3,] -0.1173487  1.1318548  1.0156416 -0.1820566  0.4049708
## [4,] -0.2511109 -0.6232948  0.8540076  0.5656846  0.3169460
## [5,]  1.6586227  1.0974390 -0.1337345  0.4488469  0.1243605
ls() # Show all the variables in our workspace, only my.new.mat
##  [1] "breaks"     "file.name"  "file.names" "mu.drug"    "mu.nodrug" 
##  [6] "my.new.mat" "n"          "wd"         "x.drug"     "x.nodrug"  
## [11] "x.range"
load("my.objects.rdata")
ls() # Show all the variables in our workspace, some are new
##  [1] "breaks"     "file.name"  "file.names" "list.z"     "mat.x"     
##  [6] "mat.y"      "mu.drug"    "mu.nodrug"  "my.new.mat" "n"         
## [11] "wd"         "x.drug"     "x.nodrug"   "x.range"