36-402, Section A
29 January 2019 (Lecture 5)
## [1] 1010391288
## [1] 880957225
## [1] 5386087
# Calculate quantiles of the Pareto distribution
# Inputs: desired quantile (p)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# Outputs: the pth quantile
qpareto.1 <- function(p, exponent, threshold) {
q <- threshold*((1-p)^(-1/(exponent-1)))
return(q)
}
## [1] 1010391288
## [1] 880957225
## [1] 5386087
c()
or list()
to return more than one thing (We will see some examples of this)# Calculate quantiles of the Pareto distribution
# Inputs: desired quantile (p)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# flag for whether to give lower or upper quantiles (lower.tail)
# Outputs: the pth quantile
qpareto.2 <- function(p, exponent, threshold, lower.tail=TRUE) {
if(lower.tail==FALSE) {
p <- 1-p
}
q <- threshold*((1-p)^(-1/(exponent-1)))
return(q)
}
## [1] 5386087
## [1] 1057162
qpareto.1
in qpareto.2
:# Calculate quantiles of the Pareto distribution
# Inputs: desired quantile (p)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# flag for whether to give lower or upper quantiles (lower.tail)
# Outputs: the pth quantile
qpareto.3 <- function(p, exponent, threshold, lower.tail=TRUE) {
if(lower.tail==FALSE) {
p <- 1-p
}
# Call qpareto.1() to do the actual calculation
q <- qpareto.1(p, exponent, threshold)
return(q)
}
cv.lm()
calls a function, response.name()
, to figure out what variable in a lm
formula is the responseresponse.name()
is defined just after cv.lm()
, but before the code +runs_ cv.lm()
traceback()
: Show all the calls leading to the last error
print()
: Make R send output to the console
browser()
: A convenient way to mess around inside a function
[Dangerous Expression]
and calls browser()
if there is an error.tryCatch({[Dangerous Expression]},warning=function(w){browser()},
error=function(e){browser()})
# Calculate quantiles of the Pareto distribution
# Inputs: desired quantile (p)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# flag for whether to give lower or upper quantiles (lower.tail)
# Outputs: the pth quantile
qpareto.4 <- function(p, exponent, threshold, lower.tail=TRUE) {
stopifnot(p >= 0, p <= 1, exponent > 1, threshold > 0)
q <- qpareto.3(p,exponent,threshold,lower.tail)
return(q)
}
## Error: threshold > 0 is not TRUE
# Generate random numbers from the Pareto distribution
# Inputs: number of random draws (n)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# Outputs: vector of random numbers
rpareto <- function(n,exponent,threshold) {
x <- vector(length=n)
for (i in 1:n) {
x[i] <- qpareto.4(p=rnorm(1),exponent=exponent,threshold=threshold)
}
return(x)
}
## Error in qpareto.4(p = rnorm(1), exponent = exponent, threshold = threshold): argument "exponent" is missing, with no default
## Error: p >= 0 is not TRUE
EXERCISE: Where is the bug? Why doesn’t rpareto(n=10,exponent=2.5,threshold=1)
work?
qpareto.1 <- function(p, exponent, threshold) {
q <- threshold*((1-p)^(-1/(exponent-1)))
return(q)
}
qpareto.3 <- function(p, exponent, threshold, lower.tail=TRUE) {
if(lower.tail==FALSE) {
p <- 1-p
}
q <- qpareto.1(p, exponent, threshold)
return(q)
}
qpareto.4 <- function(p, exponent, threshold, lower.tail=TRUE) {
stopifnot(p >= 0, p <= 1, exponent > 1, threshold > 0)
q <- qpareto.3(p,exponent,threshold,lower.tail)
return(q)
}
rpareto <- function(n,exponent,threshold) {
x <- vector(length=n)
for (i in 1:n) {
x[i] <- qpareto.4(p=rnorm(1),exponent=exponent,threshold=threshold)
}
return(x)
}
qpareto
functionsp >= 0
is not trueqpareto.4()
p
has to be going into qpareto.4()
when it’s called by rpareto()
p=rnorm(1)
, and rnorm()
draws from the standard normal, which is negative half the timep=runif(1)
(uniform distribution on \([0,1]\)), not p=rnorm(1)
rpareto
in 2004# Generate random numbers from the Pareto distribution
# Inputs: number of random draws (n)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# Outputs: vector of random numbers
rpareto.1 <- function(n,exponent,threshold) {
x <- c()
for (i in 1:n) {
x <- c(x,qpareto.4(p=runif(1),exponent=exponent,threshold=threshold))
}
return(x)
}
## user system elapsed
## 0.323 0.018 0.343
# Generate random numbers from the Pareto distribution
# Inputs: number of random draws (n)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# Outputs: vector of random numbers
rpareto.2 <- function(n,exponent,threshold) {
x = replicate(n, qpareto.4(p=runif(1),exponent=exponent,threshold=threshold))
return(x)
}
system.time(rpareto.2(n=1e4,exponent=2.5,threshold=1))
## user system elapsed
## 0.122 0.001 0.123
Lots of R functions are happy with vector arguments, and vectorize along them
Our qpareto()
functions can handle vector p
arguments just fine:
## [1] 1.072766 1.160397 1.268434
# Generate random numbers from the Pareto distribution
# Inputs: number of random draws (n)
# exponent of the distribution (exponent)
# lower threshold of the distribution (threshold)
# Outputs: vector of random numbers
rpareto.3 <- function(n,exponent,threshold) {
x = qpareto.4(p=runif(n), exponent=exponent, threshold=threshold)
return(x)
}
## user system elapsed
## 0.000 0.000 0.001
ifelse
ifelse
to do conditional things to entire vectors# Get some long vectors
n = 1e6
a = rnorm(n)
b = rnorm(n)
# Find the inner product with a for() loop
system.time({
total = 0
for(i in 1:length(a)){total = total + a[i]*b[i]}
})
## user system elapsed
## 0.058 0.000 0.058
## user system elapsed
## 0.005 0.000 0.005
apply
and friendsapply
to apply a function to rows or columns of a matrixsapply
to apply a function to every element of a vector#Make a 100x100 matrix of uniform [0,1] variables
X = matrix(runif(10000),nrow=100,ncol=100)
#Find the maximum of each column
column.maxs = apply(X, MARGIN = 2, FUN = max)
#Plot a histogram. Do you know the distribution?
hist(column.maxs,20)
tidyverse
package defines a lot of new functions to push this idea to logical extremesrpareto()
:rpareto.3 <- function(n,exponent,threshold) {
x = qpareto.4(p=runif(n), exponent=exponent, threshold=threshold)
return(x) #If n>1, this is a vector!
}
cv_bws_npreg()
from lecture 4:attach()
with()
insteaddata
# Function to return the biggest correlation of 5 length-10 Gaussian vectors
# Takes no arguments, so it can be used in a simulation
random.correlation.max = function(){
#Draw a 10x5 matrix of Gaussians
X = matrix(rnorm(50),nrow=10,ncol=5)
#Compute the correlation matrix
cor.matrix = cor(X)
#Find the biggest entry, making sure to avoid the diagonal (which is all 1)
return(max(cor.matrix[upper.tri(cor.matrix)]))
}
add.one()
to do its addition## [1] 4
add.one()
:## [1] 5