## **************************************************
## Scaling of basal metabolic rate in mammals
## **************************************************
rm(list=ls())
gc()

## Read and examine the data
bmr <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/bmr.csv',
                stringsAsFactors=TRUE,
                strip.white=TRUE)
head(bmr)

## 1.
plot(x=log(bmr$mass.g), y=log(bmr$bmr.w),
     pch=16, las=1,
     xlab='Mass', ylab='BMR')
## nice linear relationship on a log-log scale

## 2.
out <- lm(log(bmr.w)~log(mass.g), data=bmr)
summary(out)
## slope estimate is 0.73654

## 3.
confint(out)
## the confidence interval does include 3/4

## 4.
abline(out, col='red')

## 5.
## model with a slope forced to 2/3
out.2.3 <- lm(log(bmr.w)~offset(2/3*log(mass.g)), data=bmr)
summary(out.2.3)
## model with a slope forced to 3/4
out.3.4 <- lm(log(bmr.w)~offset(3/4*log(mass.g)), data=bmr)
summary(out.3.4)

## 6.
plot(x=log(bmr$mass.g), y=log(bmr$bmr.w),
     pch=16, las=1,
     xlab='Mass', ylab='BMR')
abline(out, col='red')
abline(a=coef(out.2.3)['(Intercept)'], b=2/3,
       col='blue', lty=2)
abline(a=coef(out.3.4)['(Intercept)'], b=3/4,
       col='green4', lty=2)

## 7.
summary(out.2.3)
## residual standard error 0.408
summary(out.3.4)
## residual standard error 0.3247
##
## latter is a better fit

## 8.
logLik(out.2.3)
logLik(out.3.4)
## latter is higher

## 9.
aic.vals <- c(AIC(out.2.3), AIC(out.3.4))
aic.vals
## latter is smaller and thus better
abs(diff(aic.vals))

## 10.
## log-likelihood, discounted by the number of parameters in the model

## 11.
delta <- aic.vals-min(aic.vals)
L <- exp(-0.5 * delta)
w <- L/sum(L)

## 12.
## The slope 3/4 model has much more support under this framework.

## 13.
## Because they are not nested models.

## 14.
out.quad <- lm(log(bmr.w)~poly(log(mass.g),2), data=bmr)
summary(out.quad)

plot(x=log(bmr$mass.g), y=log(bmr$bmr.w),
     pch=16, las=1,
     xlab='Mass', ylab='BMR')
## add linear regression
lines(x=log(bmr$mass.g), y=predict(out), col='red')
## add quadratic regression
lines(x=log(bmr$mass.g), y=predict(out.quad), col='blue')

## calculate AIC
AIC(out)
AIC(out.quad)
## it would seem that quadratic is a better fit

## **************************************************
## Bird abundance in forest fragments
## **************************************************
rm(list=ls())
gc()

library(MASS)
library(MuMIn)
library(visreg)

## Read and examine the data
ba <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/birdabund.csv',
               stringsAsFactors=TRUE,
               strip.white=TRUE)
head(ba)

## 1.

## area
hist(ba$area, col='red')
## definitely needs to be log-transformed
ba$log.area <- log(ba$area)
hist(ba$log.area, col='red')
## much better!

## dist
hist(ba$dist, col='red')
## same thing here
ba$log.dist <- log(ba$dist)
hist(ba$log.dist, col='red')

## ldist
hist(ba$ldist, col='red')
## same thing here
ba$log.ldist <- log(ba$ldist)
hist(ba$log.ldist, col='red')

## graze
hist(ba$graze, col='red')
## this one looks ok as is

## alt
hist(ba$alt, col='red')
## also fine

## 2.
vars <- c('log.area', 'log.dist', 'log.ldist', 'yr.isol', 'graze', 'alt')
round(cor(ba[,vars]), digits=2)
## strong correlations between;
## graze    / yr.isol
## graze    / log.area
## graze    / alt
## log.dist / log.ldist

## 3.
##
## first create global model
out.full <- lm(abund ~
                 log.area +
                 log.dist +
                 log.ldist +
                 yr.isol +
                 graze +
                 alt,
               data=ba)
summary(out.full)

## Need to set this so dredge analysis works
options(na.action='na.fail')

out.dredge <- dredge(out.full, beta=FALSE, evaluate=TRUE, rank='AIC')

## 4.
## three

## 5.
sum(out.dredge$AIC-min(out.dredge$AIC)<7)

## 6.
out.dredge$weight
## weight for best model is 0.127
##
## best models all have 'graze' and 'log.area'

## 7.
## the 'cumsum' (=cumulative sum) function is useful here
which(cumsum(out.dredge$weight)>0.95)[1]
## 19 models in 95% set

## 8.
out.best <- lm(abund ~ log.area + yr.isol + graze, data=ba)
summary(out.best)
layout(matrix(1:3, nrow=1))
visreg(out.best)
## It should be apparent that the strongest relationship is for
## log.area - this is also consistent with estimated coefficients.
## However, predictors are not on the same scale, so effect sizes are
## not really comparable.

## Part II - stepAIC

## 1.

## first create a data-set that only contains the predictors of
## interest, along with the response
ba.sub <- ba[,c('abund',
                'log.area',
                'log.dist',
                'log.ldist',
                'yr.isol',
                'graze',
                'alt')]

## fit a full model with no interactions
out.full <- lm(abund ~., data=ba.sub)
out.best <- stepAIC(out.full, lower=~1) # out.best will store only the
                                        # best model(s)

## the best model is 'abund ~ log.area + yr.isol + graze' - consistent
## wit what we found before

## 2.
out.best <- lm(abund ~ log.area + yr.isol + graze, data=ba.sub)

## 3.
summary(out.best)
drop1(out.best, test='F')

## 4.
AIC(out.best) ## 371.1092
## note, that stepAIC uses:
extractAIC(out.best) ## 210.1881

## 5.
##
## below includes all two-way interactions
out.full <- lm(abund ~(.)^2, data=ba.sub)
out.best <- stepAIC(out.full, lower=~1) # out.best will store only the
                                        # best model(s)

## 6.
##
## best model is 'abund ~ log.area + yr.isol + graze + log.area:yr.isol + log.area:graze + yr.isol:graze'
##
## not the same as before...

## 7.
AIC(out.best) ## 360.7471
## note, that stepAIC uses:
extractAIC(out.best) ## 199.826

371.1092 - 360.7471 ## 10.3621
## This new model is better.  The 'main effects only' model has
## essentially no support, according to our rule of thumb.