rm(list=ls())

## --------------------------------------------------
## Random sampling warm-up

## 1.
rbinom(n=1, size=20, prob=0.5)

## 2.
rbinom(n=5, size=20, prob=0.5)

## 3.
rbinom(n=1, size=18, prob=0.7)

## 4.
rbinom(n=5, size=18, prob=0.7)

## 5.
rnorm (30 ,0 ,2)

## 6.
get.mean.and.sd <- function(nobs, mm, ss) {
  samples <- rnorm(n=nobs, mean=mm, sd=ss)
  c(mean=mean(samples), sd=sd(samples))
}
get.mean.and.sd(nobs=30,mm=0,ss=2)
get.mean.and.sd(nobs=30,mm=0,ss=2)
get.mean.and.sd(nobs=30,mm=0,ss=2)
get.mean.and.sd(nobs=30,mm=0,ss=2)
get.mean.and.sd(nobs=30,mm=0,ss=2)

## note that if we increase nobs to something large, we get almost
## exact values
get.mean.and.sd(nobs=1e5,mm=0,ss=2)

## --------------------------------------------------
## For loops

## 3.
new_vec <- c()
for(i in 1:10) {
  new_vec <- c(new_vec, i^2)
}

## 3.
my.mat <- matrix(NA, nrow=10, ncol=2)
## this next line is not necessary, but just makes the matrix nicer to
## look at (and you can access it using column names instead of
## numbers if you want)
colnames(my.mat) <- c('squared', 'cubed')
for(i in 1:10) {
  my.mat[i,] <- c(i^2, i^3)
}

## --------------------------------------------------
## Plan for precision

## 1.
size <- 10
nsuccess <- rbinom(n=1, size=size, prob=0.5)

## use sprintf command to print out the number of sucesses
cat(sprintf('%d successes out of %d trials\n', nsuccess, size))

## 2.
##
## using the 'binom' package and the Agresti-Coull method to calculate
## a confidence interval.  First you need to install the binom package,
## if you haven't already
## install.packages('binom')
##
## then load it
library(binom)

## calculate confidene interval (default is 95%)
myCI <- binom.confint(x=nsuccess, n=size, method='ac') # gets the confidence interval
myCI # shows the results

## could also calculate it manually without a package
calc.agresti.coull <- function(size, nsuccess) {
  z <- 1.96
  ntild <- size + z^2
  ptild <- 1/ntild * (nsuccess + z^2/2)
  xx <- z*sqrt(ptild/ntild*(1-ptild))
  c(ptild-xx, ptild+xx)
}
calc.agresti.coull(size=10, nsuccess=4)
## and compare
binom.confint(x=4, n=10, method='ac')

## 3. & 4.
for(i in 1:5) {
  nsuccess <- rbinom(n=1, size=size, prob=0.5)
  print(binom.confint(x=nsuccess, n=size, method='ac'))
}

## 5.
size <- 20
for(i in 1:5) {
  nsuccess <- rbinom(n=1, size=size, prob=0.5)
  print(binom.confint(x=nsuccess, n=size, method='ac'))
}

## 6.
size <- 100
for(i in 1:5) {
  nsuccess <- rbinom(n=1, size=size, prob=0.5)
  print(binom.confint(x=nsuccess, n=size, method='ac'))
}
## it looks like about 100 trials would be sufficient to demonstrate
## that any preference is weak to non-existent.

## 8.

## lets define a variable to specify how many replicates we want to
## run and save
nrep <- 1e3

## create structure to save intervals in
saved.intervals <- data.frame(lower=rep(NA,nrep),
                              upper=rep(NA,nrep))
## have a look at it
head(saved.intervals)
## check its dimensions
dim(saved.intervals)

## now use a for loop to fill it in (lets start with a sample of 10)
size <- 10
for(i in 1:nrep) {
  nsuccess <- rbinom(n=1, size=size, prob=0.5)
  ci <- binom.confint(x=nsuccess, n=size, method='ac')
  ## add the lower and upper limits for our confidence interval to our
  ## data-frame (in the i^th row)
  saved.intervals[i,] <- c(ci$lower, ci$upper)
}
## have a look at the saved.intervals, now that they're filled in
head(saved.intervals)
## calculate the "average" interval by taking column means
colMeans(saved.intervals)
## now try the above, changing 'size' to 100

## --------------------------------------------------
## Plan for power

## 1.
size <- 20
nsuccess <- rbinom(n=1, size=size, prob=0.7)
nsuccess

## 2.
z <- binom.test(x=nsuccess, n=size, p=0.5)
z$p.value

## 3.
size <- 20
nrep <- 100
saved.p.vals <- rep(NA, nrep)
for(i in 1:nrep) {
  nsuccess <- rbinom(n=1, size=size, prob=0.7)
  z <- binom.test(x=nsuccess, n=size, p=0.5)
  saved.p.vals[i] <- z$p.value
}
## create a table showing how many times the null hypothesis was
## rejected vs not-rejected (TRUE=rejected)
table(saved.p.vals<0.05)

## alternatively, we can simply calculate the number of times it was
## rejected
sum(saved.p.vals<0.05)
## note that we are summing a vector of TRUE and FALSE.  When doing
## this, R automatically converts TRUE to 1 and FALSE to 0, so the
## output is the number of TRUE.