rm(list=ls()) library(emmeans) library(visreg) ## useful functions expit <- function(x) 1/(1+exp(-x)) logit <- function(x) log(x/(1-x)) ## ************************************************** ## Natural selection in song sparrows ## ************************************************** ## -------------------------------------------------- ## Read and examine the data ## 1. ## ## Read the data ss <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/songsparrow.csv', stringsAsFactors=TRUE, strip.white=TRUE) head(ss) ## 2. class(ss$year) ## not a factor ss$year <- as.factor(ss$year) ## 3. plot(x=ss$tarsus, y=jitter(ss$survival), las=1, pch=16, xlab='Tarsus length', ylab='Surviva') ## 4. out <- lm(survival ~ tarsus, data=ss) abline(a=coef(out)['(Intercept)'], b=coef(out)['tarsus'], col='red') ## -------------------------------------------------- ## Fit a generalized linear model ## 1. ## logistic model (binomial distribution) with logit link ## 2. out <- glm(survival ~ tarsus, family=binomial(link='logit'), data=ss) ## 3. visreg(out) ## 4. summary(out) ## or coef(summary(out)) ## 5. expit(24.63612 - 1.25779 * 20.5) ## answer: 0.2407495 ## 6. -(24.63612/-1.25779) ## answer: 19.58683 ## 7. ci <- confint(out) ci exp(ci)/(1 + exp(ci)) ## 8. drop1(out, test='Chisq') ## 9. out <- glm(survival ~ tarsus + year, family=binomial(link='logit'), data=ss) summary(out) ## plot via visreg visreg(out, xvar='year') emmeans(out, specs='year') ## ************************************************** ## Crab satellites ## ************************************************** ## -------------------------------------------------- ## Read and examine the data ## 1. ## cc <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/satellites.csv', stringsAsFactors=TRUE, strip.white=TRUE) head(cc) ## 2. plot(x=cc$width.cm, y=cc$nsatellites, las=1, pch=16, xlab='Number of satellites', ylab='Width of carapice') lines(lowess(x=cc$width.cm, y=cc$nsatellites), col='red') ## -------------------------------------------------- ## Fit a generalized linear model ## 1. ## count data, Poisson, log-link ## 2. out <- glm(nsatellites ~ width.cm, family=poisson(link='log'), data=cc) ## 3. visreg(out) ## 4. coefs <- coef(out) ## these are the intercept and slope that specify the linear fit in ## log-transformed space ## 5. plot(x=cc$width.cm, y=cc$nsatellites, las=1, pch=16, xlab='Number of satellites', ylab='Width of carapice') curve(exp(coefs[1] + coefs[2]*x), from=min(cc$width.cm), to=max(cc$width.cm), col='red', add=TRUE) ## 6. ci <- confint(out) exp(ci['width.cm',]) ## 7. drop1(out, test='Chisq') ## 8. ## By using the Poisson distribution to model the residuals, we assume ## that for any given value of the x-variable, the variance of y is ## equal to the mean of y. Typically, however, in real data the ## variance of y is greater than the mean of y at any given x ## (“overdispersion”). One reason is that a variety of factors cause ## variation in y, and most aren’t included in the model being fitted. ## 9. out.qp <- glm(nsatellites ~ width.cm, family=quasipoisson, data=cc) summary(out.qp) ## 10. coefs.qp <- coef(out) coefs.qp ## The dispersion parameter is 3.182205. This is greater than 1, so ## the data are over-dispersed. The quasipoisson will provide a ## better fit. ## 11. ## The regression coefficients are the same as before, but the ## standard errors are now larger, as the variance is no longer ## constrained to equal the mean. ## 12. ## Poisson layout(matrix(1:2, ncol=2)) visreg(out) ## quasipoisson visreg(out.qp) ## 13. drop1(out.qp, test='F') ## ************************************************** ## Prion resistance not futile ## ************************************************** ## -------------------------------------------------- ## Read and examine the data ## 1. kk <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/kurudata.csv', stringsAsFactors=TRUE, strip.white=TRUE) head(kk) ## 2. counts <- t(table(kk$Genotype, kk$Cohort)) ## note, the 't' command is the 'transpose' which switches rows and ## columns ## 3. barplot(counts, main='', xlab='Genotype Gears', col=c('darkblue','red'), legend=rownames(counts), beside=TRUE, las=1) ## -------------------------------------------------- ## Fit a generalized linear model ## 1. kk.2 <- data.frame(table(kk$Genotype, kk$Cohort)) colnames(kk.2)[1] <- 'Genotype' colnames(kk.2)[2] <- 'Cohort' ## 2. out <- glm(Freq ~ Genotype + Cohort, family=poisson(link='log'), data=kk.2) summary(out) ## 3. visreg(out) ## 4. out.full <- glm(Freq ~ Genotype * Cohort, family=poisson(link='log'), data=kk.2) summary(out.full) visreg(out.full) ## 5. drop1(out.full, test='Chisq')