rm(list=ls())

library(emmeans)
library(visreg)

## useful functions
expit <- function(x) 1/(1+exp(-x))
logit <- function(x) log(x/(1-x))

## **************************************************
## Natural selection in song sparrows
## **************************************************

## --------------------------------------------------
## Read and examine the data

## 1.
##
## Read the data
ss <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/songsparrow.csv',
               stringsAsFactors=TRUE,
               strip.white=TRUE)
head(ss)

## 2.
class(ss$year)
## not a factor
ss$year <- as.factor(ss$year)

## 3.
plot(x=ss$tarsus, y=jitter(ss$survival), las=1, pch=16,
     xlab='Tarsus length', ylab='Surviva')

## 4.
out <- lm(survival ~ tarsus, data=ss)
abline(a=coef(out)['(Intercept)'], b=coef(out)['tarsus'], col='red')

## --------------------------------------------------
## Fit a generalized linear model

## 1.
## logistic model (binomial distribution) with logit link

## 2.
out <- glm(survival ~ tarsus, family=binomial(link='logit'), data=ss)

## 3.
visreg(out)

## 4.
summary(out)
## or
coef(summary(out))

## 5.
expit(24.63612 - 1.25779 * 20.5)
## answer: 0.2407495

## 6.
-(24.63612/-1.25779)
## answer: 19.58683

## 7.
ci <- confint(out)
ci
exp(ci)/(1 + exp(ci))

## 8.
drop1(out, test='Chisq')

## 9.
out <- glm(survival ~ tarsus + year, family=binomial(link='logit'),
           data=ss)
summary(out)

## plot via visreg
visreg(out, xvar='year')
emmeans(out, specs='year')


## **************************************************
## Crab satellites
## **************************************************

## --------------------------------------------------
## Read and examine the data

## 1.
##
cc <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/satellites.csv',
               stringsAsFactors=TRUE,
               strip.white=TRUE)
head(cc)

## 2.
plot(x=cc$width.cm, y=cc$nsatellites, las=1, pch=16,
     xlab='Number of satellites', ylab='Width of carapice')
lines(lowess(x=cc$width.cm, y=cc$nsatellites), col='red')

## --------------------------------------------------
## Fit a generalized linear model

## 1.
## count data, Poisson, log-link

## 2.
out <- glm(nsatellites ~ width.cm, family=poisson(link='log'), data=cc)

## 3.
visreg(out)

## 4.
coefs <- coef(out)
## these are the intercept and slope that specify the linear fit in
## log-transformed space

## 5.
plot(x=cc$width.cm, y=cc$nsatellites, las=1, pch=16,
     xlab='Number of satellites', ylab='Width of carapice')
curve(exp(coefs[1] + coefs[2]*x),
      from=min(cc$width.cm), to=max(cc$width.cm),
      col='red', add=TRUE)

## 6.
ci <- confint(out)
exp(ci['width.cm',])

## 7.
drop1(out, test='Chisq')

## 8.
## By using the Poisson distribution to model the residuals, we assume
## that for any given value of the x-variable, the variance of y is
## equal to the mean of y. Typically, however, in real data the
## variance of y is greater than the mean of y at any given x
## (“overdispersion”). One reason is that a variety of factors cause
## variation in y, and most aren’t included in the model being fitted.

## 9.
out.qp <- glm(nsatellites ~ width.cm, family=quasipoisson, data=cc)
summary(out.qp)

## 10.
coefs.qp <- coef(out)
coefs.qp
## The dispersion parameter is 3.182205.  This is greater than 1, so
## the data are over-dispersed.  The quasipoisson will provide a
## better fit.

## 11.
## The regression coefficients are the same as before, but the
## standard errors are now larger, as the variance is no longer
## constrained to equal the mean.

## 12.
## Poisson
layout(matrix(1:2, ncol=2))
visreg(out)
## quasipoisson
visreg(out.qp)

## 13.
drop1(out.qp, test='F')

## **************************************************
## Prion resistance not futile
## **************************************************

## --------------------------------------------------
## Read and examine the data

## 1.
kk <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/kurudata.csv',
               stringsAsFactors=TRUE,
               strip.white=TRUE)
head(kk)

## 2.
counts <- t(table(kk$Genotype, kk$Cohort))
## note, the 't' command is the 'transpose' which switches rows and
## columns

## 3.
barplot(counts, main='',
  xlab='Genotype Gears', col=c('darkblue','red'),
 	legend=rownames(counts), beside=TRUE, las=1)

## --------------------------------------------------
## Fit a generalized linear model

## 1.
kk.2 <- data.frame(table(kk$Genotype, kk$Cohort))
colnames(kk.2)[1] <- 'Genotype'
colnames(kk.2)[2] <- 'Cohort'

## 2.
out <- glm(Freq ~ Genotype + Cohort, family=poisson(link='log'),
           data=kk.2)
summary(out)

## 3.
visreg(out)

## 4.
out.full <- glm(Freq ~ Genotype * Cohort, family=poisson(link='log'),
                data=kk.2)
summary(out.full)
visreg(out.full)

## 5.
drop1(out.full, test='Chisq')