rm(list=ls()) ## -------------------------------------------------- ## Random sampling warm-up ## 1. rbinom(n=1, size=20, prob=0.5) ## 2. rbinom(n=5, size=20, prob=0.5) ## 3. rbinom(n=1, size=18, prob=0.7) ## 4. rbinom(n=5, size=18, prob=0.7) ## 5. rnorm (30 ,0 ,2) ## 6. get.mean.and.sd <- function(nobs, mm, ss) { samples <- rnorm(n=nobs, mean=mm, sd=ss) c(mean=mean(samples), sd=sd(samples)) } get.mean.and.sd(nobs=30,mm=0,ss=2) get.mean.and.sd(nobs=30,mm=0,ss=2) get.mean.and.sd(nobs=30,mm=0,ss=2) get.mean.and.sd(nobs=30,mm=0,ss=2) get.mean.and.sd(nobs=30,mm=0,ss=2) ## note that if we increase nobs to something large, we get almost ## exact values get.mean.and.sd(nobs=1e5,mm=0,ss=2) ## -------------------------------------------------- ## For loops ## 3. new_vec <- c() for(i in 1:10) { new_vec <- c(new_vec, i^2) } ## 3. my.mat <- matrix(NA, nrow=10, ncol=2) ## this next line is not necessary, but just makes the matrix nicer to ## look at (and you can access it using column names instead of ## numbers if you want) colnames(my.mat) <- c('squared', 'cubed') for(i in 1:10) { my.mat[i,] <- c(i^2, i^3) } ## -------------------------------------------------- ## Plan for precision ## 1. size <- 10 nsuccess <- rbinom(n=1, size=size, prob=0.5) ## use sprintf command to print out the number of sucesses cat(sprintf('%d successes out of %d trials\n', nsuccess, size)) ## 2. ## ## using the 'binom' package and the Agresti-Coull method to calculate ## a confidence interval. First you need to install the binom package, ## if you haven't already ## install.packages('binom') ## ## then load it library(binom) ## calculate confidene interval (default is 95%) myCI <- binom.confint(x=nsuccess, n=size, method='ac') # gets the confidence interval myCI # shows the results ## could also calculate it manually without a package calc.agresti.coull <- function(size, nsuccess) { z <- 1.96 ntild <- size + z^2 ptild <- 1/ntild * (nsuccess + z^2/2) xx <- z*sqrt(ptild/ntild*(1-ptild)) c(ptild-xx, ptild+xx) } calc.agresti.coull(size=10, nsuccess=4) ## and compare binom.confint(x=4, n=10, method='ac') ## 3. & 4. for(i in 1:5) { nsuccess <- rbinom(n=1, size=size, prob=0.5) print(binom.confint(x=nsuccess, n=size, method='ac')) } ## 5. size <- 20 for(i in 1:5) { nsuccess <- rbinom(n=1, size=size, prob=0.5) print(binom.confint(x=nsuccess, n=size, method='ac')) } ## 6. size <- 100 for(i in 1:5) { nsuccess <- rbinom(n=1, size=size, prob=0.5) print(binom.confint(x=nsuccess, n=size, method='ac')) } ## it looks like about 100 trials would be sufficient to demonstrate ## that any preference is weak to non-existent. ## 8. ## lets define a variable to specify how many replicates we want to ## run and save nrep <- 1e3 ## create structure to save intervals in saved.intervals <- data.frame(lower=rep(NA,nrep), upper=rep(NA,nrep)) ## have a look at it head(saved.intervals) ## check its dimensions dim(saved.intervals) ## now use a for loop to fill it in (lets start with a sample of 10) size <- 10 for(i in 1:nrep) { nsuccess <- rbinom(n=1, size=size, prob=0.5) ci <- binom.confint(x=nsuccess, n=size, method='ac') ## add the lower and upper limits for our confidence interval to our ## data-frame (in the i^th row) saved.intervals[i,] <- c(ci$lower, ci$upper) } ## have a look at the saved.intervals, now that they're filled in head(saved.intervals) ## calculate the "average" interval by taking column means colMeans(saved.intervals) ## now try the above, changing 'size' to 100 ## -------------------------------------------------- ## Plan for power ## 1. size <- 20 nsuccess <- rbinom(n=1, size=size, prob=0.7) nsuccess ## 2. z <- binom.test(x=nsuccess, n=size, p=0.5) z$p.value ## 3. size <- 20 nrep <- 100 saved.p.vals <- rep(NA, nrep) for(i in 1:nrep) { nsuccess <- rbinom(n=1, size=size, prob=0.7) z <- binom.test(x=nsuccess, n=size, p=0.5) saved.p.vals[i] <- z$p.value } ## create a table showing how many times the null hypothesis was ## rejected vs not-rejected (TRUE=rejected) table(saved.p.vals<0.05) ## alternatively, we can simply calculate the number of times it was ## rejected sum(saved.p.vals<0.05) ## note that we are summing a vector of TRUE and FALSE. When doing ## this, R automatically converts TRUE to 1 and FALSE to 0, so the ## output is the number of TRUE.