## ************************************************** ## EXAMPLE 1: simple linear model with one predictor ## ************************************************** set.seed(1) ## -------------------------------------------------- ## simulate data - we're simulating with very strong trends here so ## that everything is easy to see! ## ## Lets create a simple numeric predictor num <- 100 pred <- rnorm(100, mean=0, sd=1) hist(pred, col='red') ## Now, lets create our response variable. Let's assume a strong ## positive effect of pred. effect.size <- 2 ## sd around this response effect.sd <- 1.5 response <- rnorm(num, mean=effect.size*pred, sd=effect.sd) ## package into a data.frame dd <- data.frame(pred=pred, response=response) head(dd) ## -------------------------------------------------- ## -------------------------------------------------- ## plot the raw data plot(x=dd$pred, y=dd$response, xlab='pred', ylab='response', col='red', pch=16, las=1) ## -------------------------------------------------- ## -------------------------------------------------- ## fit a model out <- lm(response ~ pred, data=dd) summary(out) ## extract model coefficients coeffs <- coef(out) b0 <- coeffs['(Intercept)'] b1 <- coeffs['pred'] ## add the best fit line curve(b0 + b1*x, from=min(dd$pred), to=max(dd$pred), col='blue', add=TRUE) ## -------------------------------------------------- ## ************************************************** ## EXAMPLE 2: multiple regression (with 2-level factor) ## ************************************************** ## -------------------------------------------------- ## simulate data ## ## Lets create a first 'predictor' - individuals are either 'A' or 'B' ## and num.A and num.B indicate how many of each we have num.A <- 50 num.B <- 50 p1 <- factor(c(rep('A', num.A), rep('B', num.B))) p1 ## and let's create a second vector - normally distributed with mean 0 ## and sd 1 p2 <- rnorm(num.A+num.B, mean=0, sd=1) p2 ## Now, lets create our response variable. Let's assume a strong ## positive effect of p2 for 'A' individuals and a weak negative ## effect for 'B' individuals. effect.size.A <- 1 effect.size.B <- -0.2 ## sd around this response sd.A <- 0.25 sd.B <- 0.25 response <- c(rnorm(num.A, mean=effect.size.A*p2[p1=='A'], sd=sd.A), rnorm(num.B, mean=effect.size.B*p2[p1=='B'], sd=sd.B)) ## package all this up into a data.frame dd <- data.frame(p1=p1, p2=p2, response=response) head(dd) ## -------------------------------------------------- ## plot the raw data - I'm going to wrap this up in a function, so I ## can call it again easily ## let's make a colour vector plot.raw.data <- function() { col <- c(rep('red', num.A), rep('blue', num.B)) plot(x=dd$p2, y=dd$response, xlab='p2', ylab='response', col=col, pch=16, las=1) legend('topleft', bty='n', legend=c('A', 'B'), col=c('red', 'blue'), pch=16) } plot.raw.data() ## -------------------------------------------------- ## -------------------------------------------------- ## fit a model without an interaction out <- lm(response ~ p1 + p2, data=dd) summary(out) ## extract model coefficients coeffs <- coef(out) b0 <- coeffs['(Intercept)'] b1 <- coeffs['p1B'] b2 <- coeffs['p2'] ## -------------------------------------------------- ## -------------------------------------------------- ## plot the raw data, with the best fit lines plot.raw.data() ## let's add our first line (for A individuals) curve(b0 + b1*0 + b2*x, from=min(dd$p2), to=max(dd$p2), add=TRUE, col='red') ## not a great fit... but thats ok. ## ## let's add our second line (for B individuals) curve(b0 + b1*1 + b2*x, from=min(dd$p2), to=max(dd$p2), add=TRUE, col='blue') ## -------------------------------------------------- ## -------------------------------------------------- ## let's now fit a model with the interaction out <- lm(response ~ p1 * p2, data=dd) summary(out) ## extract model coefficients coeffs <- coef(out) b0 <- coeffs['(Intercept)'] b1 <- coeffs['p1B'] b2 <- coeffs['p2'] b3 <- coeffs['p1B:p2'] ## -------------------------------------------------- ## -------------------------------------------------- ## plot the raw data, with the best fit lines plot.raw.data() ## let's add our first line curve(b0 + b1*0 + b2*x + b3*0*x, ## b0 + b2*x from=min(dd$p2), to=max(dd$p2), add=TRUE, col='red') ## let's add our second line curve(b0 + b1*1 + b2*x + b3*1*x, ## (b0+b1) + (b2+b3)*x from=min(dd$p2), to=max(dd$p2), add=TRUE, col='blue') ## -------------------------------------------------- ## ************************************************** ## EXAMPLE 3: multiple regression (with 3-level factor) ## ************************************************** ## -------------------------------------------------- ## simulate data ## ## this is basically a repeat of the above example, but with three ## variables in the second category (and we'll use a function to add ## the curves) num.A <- 50 num.B <- 50 num.C <- 50 p1 <- factor(rep(c('A', 'B', 'C'), c(num.A, num.B, num.C))) p1 ## and let's create a second vector - normally distributed with mean 0 ## and sd 1 p2 <- rnorm(num.A+num.B+num.C, mean=0, sd=1) p2 ## Now, lets create our response variable. Let's assume a strong ## positive effect of p2 for 'A' individuals and a weak negative ## effect for 'B' individuals. effect.size.A <- 1 effect.size.B <- -0.2 effect.size.C <- -1.5 ## sd around this response sd.A <- 0.25 sd.B <- 0.25 sd.C <- 0.25 response <- c(rnorm(num.A, mean=effect.size.A*p2[p1=='A'], sd=sd.A), rnorm(num.B, mean=effect.size.B*p2[p1=='B'], sd=sd.B), rnorm(num.C, mean=effect.size.C*p2[p1=='C'], sd=sd.C)) ## package all this up into a data.frame dd <- data.frame(p1=p1, p2=p2, response=response) head(dd) ## -------------------------------------------------- ## plot the raw data - I'm going to wrap this up in a function, so I ## can call it again easily ## let's make a colour vector plot.raw.data <- function() { col <- c(rep('red', num.A), rep('blue', num.B), rep('green4', num.C)) plot(x=dd$p2, y=dd$response, xlab='p2', ylab='response', col=col, pch=16, las=1) legend('topleft', bty='n', legend=c('A', 'B', 'C'), col=c('red', 'blue', 'green4'), pch=16) } plot.raw.data() ## -------------------------------------------------- ## -------------------------------------------------- ## fit the model (with interaction) out <- lm(response ~ p1 * p2, data=dd) summary(out) ## -------------------------------------------------- ## -------------------------------------------------- ## plot the raw data, with the best fit lines plot.raw.data() bb <- coef(out) ## function to add our lines add.curve <- function(B,C,col) { curve(bb['(Intercept)'] + bb['p1B']*B + bb['p1C']*C + bb['p2']*x + bb['p1B:p2']*B*x + bb['p1C:p2']*C*x, from=min(dd$p2), to=max(dd$p2), add=TRUE, col=col) } plot.raw.data() add.curve(B=0,C=0,col='red') add.curve(B=1,C=0,col='blue') add.curve(B=0,C=1,col='green4') ## --------------------------------------------------