## **************************************************
## EXAMPLE 1: simple linear model with one predictor
## **************************************************

set.seed(1)

## --------------------------------------------------
## simulate data - we're simulating with very strong trends here so
## that everything is easy to see!
##
## Lets create a simple numeric predictor
num <- 100
pred <- rnorm(100, mean=0, sd=1)
hist(pred, col='red')

## Now, lets create our response variable.  Let's assume a strong
## positive effect of pred.
effect.size <- 2
## sd around this response
effect.sd <- 1.5

response <- rnorm(num, mean=effect.size*pred, sd=effect.sd)

## package into a data.frame
dd <- data.frame(pred=pred, response=response)
head(dd)
## --------------------------------------------------

## --------------------------------------------------
## plot the raw data
plot(x=dd$pred, y=dd$response, xlab='pred', ylab='response',
     col='red', pch=16, las=1)
## --------------------------------------------------

## --------------------------------------------------
## fit a model
out <- lm(response ~ pred, data=dd)
summary(out)

## extract model coefficients
coeffs <- coef(out)
b0 <- coeffs['(Intercept)']
b1 <- coeffs['pred']

## add the best fit line
curve(b0 + b1*x,
      from=min(dd$pred), to=max(dd$pred), col='blue', add=TRUE)
## --------------------------------------------------


## **************************************************
## EXAMPLE 2: multiple regression (with 2-level factor)
## **************************************************

## --------------------------------------------------
## simulate data
##
## Lets create a first 'predictor' - individuals are either 'A' or 'B'
## and num.A and num.B indicate how many of each we have
num.A <- 50
num.B <- 50
p1 <- factor(c(rep('A', num.A), rep('B', num.B)))
p1
## and let's create a second vector - normally distributed with mean 0
## and sd 1
p2 <- rnorm(num.A+num.B, mean=0, sd=1)
p2

## Now, lets create our response variable.  Let's assume a strong
## positive effect of p2 for 'A' individuals and a weak negative
## effect for 'B' individuals.
effect.size.A <- 1
effect.size.B <- -0.2
## sd around this response
sd.A <- 0.25
sd.B <- 0.25

response <- c(rnorm(num.A, mean=effect.size.A*p2[p1=='A'], sd=sd.A),
              rnorm(num.B, mean=effect.size.B*p2[p1=='B'], sd=sd.B))

## package all this up into a data.frame
dd <- data.frame(p1=p1, p2=p2, response=response)
head(dd)

## --------------------------------------------------
## plot the raw data - I'm going to wrap this up in a function, so I
## can call it again easily

## let's make a colour vector
plot.raw.data <- function() {
  col <- c(rep('red', num.A), rep('blue', num.B))
  plot(x=dd$p2, y=dd$response, xlab='p2', ylab='response',
       col=col, pch=16, las=1)
  legend('topleft',
         bty='n',
         legend=c('A', 'B'),
         col=c('red', 'blue'),
         pch=16)
}
plot.raw.data()
## --------------------------------------------------

## --------------------------------------------------
## fit a model without an interaction
out <- lm(response ~ p1 + p2, data=dd)
summary(out)

## extract model coefficients
coeffs <- coef(out)
b0 <- coeffs['(Intercept)']
b1 <- coeffs['p1B']
b2 <- coeffs['p2']
## --------------------------------------------------

## --------------------------------------------------
## plot the raw data, with the best fit lines
plot.raw.data()

## let's add our first line (for A individuals)
curve(b0 + b1*0 + b2*x,
      from=min(dd$p2), to=max(dd$p2), add=TRUE, col='red')
## not a great fit... but thats ok.
##
## let's add our second line (for B individuals)
curve(b0 + b1*1 + b2*x,
      from=min(dd$p2), to=max(dd$p2), add=TRUE, col='blue')
## --------------------------------------------------

## --------------------------------------------------
## let's now fit a model with the interaction
out <- lm(response ~ p1 * p2, data=dd)
summary(out)

## extract model coefficients
coeffs <- coef(out)
b0 <- coeffs['(Intercept)']
b1 <- coeffs['p1B']
b2 <- coeffs['p2']
b3 <- coeffs['p1B:p2']
## --------------------------------------------------

## --------------------------------------------------
## plot the raw data, with the best fit lines
plot.raw.data()

## let's add our first line
curve(b0 + b1*0 + b2*x + b3*0*x, ## b0 + b2*x
      from=min(dd$p2), to=max(dd$p2), add=TRUE, col='red')
## let's add our second line
curve(b0 + b1*1 + b2*x + b3*1*x, ## (b0+b1) + (b2+b3)*x
      from=min(dd$p2), to=max(dd$p2), add=TRUE, col='blue')
## --------------------------------------------------


## **************************************************
## EXAMPLE 3: multiple regression (with 3-level factor)
## **************************************************

## --------------------------------------------------
## simulate data
##
## this is basically a repeat of the above example, but with three
## variables in the second category (and we'll use a function to add
## the curves)
num.A <- 50
num.B <- 50
num.C <- 50
p1 <- factor(rep(c('A', 'B', 'C'), c(num.A, num.B, num.C)))
p1
## and let's create a second vector - normally distributed with mean 0
## and sd 1
p2 <- rnorm(num.A+num.B+num.C, mean=0, sd=1)
p2

## Now, lets create our response variable.  Let's assume a strong
## positive effect of p2 for 'A' individuals and a weak negative
## effect for 'B' individuals.
effect.size.A <- 1
effect.size.B <- -0.2
effect.size.C <- -1.5
## sd around this response
sd.A <- 0.25
sd.B <- 0.25
sd.C <- 0.25

response <- c(rnorm(num.A, mean=effect.size.A*p2[p1=='A'], sd=sd.A),
              rnorm(num.B, mean=effect.size.B*p2[p1=='B'], sd=sd.B),
              rnorm(num.C, mean=effect.size.C*p2[p1=='C'], sd=sd.C))

## package all this up into a data.frame
dd <- data.frame(p1=p1, p2=p2, response=response)
head(dd)

## --------------------------------------------------
## plot the raw data - I'm going to wrap this up in a function, so I
## can call it again easily

## let's make a colour vector
plot.raw.data <- function() {
  col <- c(rep('red', num.A), rep('blue', num.B), rep('green4', num.C))
  plot(x=dd$p2, y=dd$response, xlab='p2', ylab='response',
       col=col, pch=16, las=1)
  legend('topleft',
         bty='n',
         legend=c('A', 'B', 'C'),
         col=c('red', 'blue', 'green4'),
         pch=16)
}
plot.raw.data()
## --------------------------------------------------

## --------------------------------------------------
## fit the model (with interaction)
out <- lm(response ~ p1 * p2, data=dd)
summary(out)
## --------------------------------------------------

## --------------------------------------------------
## plot the raw data, with the best fit lines
plot.raw.data()

bb <- coef(out)
## function to add our lines
add.curve <- function(B,C,col) {
  curve(bb['(Intercept)'] + bb['p1B']*B + bb['p1C']*C + bb['p2']*x + bb['p1B:p2']*B*x + bb['p1C:p2']*C*x, from=min(dd$p2), to=max(dd$p2), add=TRUE, col=col)
}

plot.raw.data()
add.curve(B=0,C=0,col='red')
add.curve(B=1,C=0,col='blue')
add.curve(B=0,C=1,col='green4')
## --------------------------------------------------