## ************************************************** ## Scaling of basal metabolic rate in mammals ## ************************************************** rm(list=ls()) gc() ## Read and examine the data bmr <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/bmr.csv', stringsAsFactors=TRUE, strip.white=TRUE) head(bmr) ## 1. plot(x=log(bmr$mass.g), y=log(bmr$bmr.w), pch=16, las=1, xlab='Mass', ylab='BMR') ## nice linear relationship on a log-log scale ## 2. out <- lm(log(bmr.w)~log(mass.g), data=bmr) summary(out) ## slope estimate is 0.73654 ## 3. confint(out) ## the confidence interval does include 3/4 ## 4. abline(out, col='red') ## 5. ## model with a slope forced to 2/3 out.2.3 <- lm(log(bmr.w)~offset(2/3*log(mass.g)), data=bmr) summary(out.2.3) ## model with a slope forced to 3/4 out.3.4 <- lm(log(bmr.w)~offset(3/4*log(mass.g)), data=bmr) summary(out.3.4) ## 6. plot(x=log(bmr$mass.g), y=log(bmr$bmr.w), pch=16, las=1, xlab='Mass', ylab='BMR') abline(out, col='red') abline(a=coef(out.2.3)['(Intercept)'], b=2/3, col='blue', lty=2) abline(a=coef(out.3.4)['(Intercept)'], b=3/4, col='green4', lty=2) ## 7. summary(out.2.3) ## residual standard error 0.408 summary(out.3.4) ## residual standard error 0.3247 ## ## latter is a better fit ## 8. logLik(out.2.3) logLik(out.3.4) ## latter is higher ## 9. aic.vals <- c(AIC(out.2.3), AIC(out.3.4)) aic.vals ## latter is smaller and thus better abs(diff(aic.vals)) ## 10. ## log-likelihood, discounted by the number of parameters in the model ## 11. delta <- aic.vals-min(aic.vals) L <- exp(-0.5 * delta) w <- L/sum(L) ## 12. ## The slope 3/4 model has much more support under this framework. ## 13. ## Because they are not nested models. ## 14. out.quad <- lm(log(bmr.w)~poly(log(mass.g),2), data=bmr) summary(out.quad) plot(x=log(bmr$mass.g), y=log(bmr$bmr.w), pch=16, las=1, xlab='Mass', ylab='BMR') ## add linear regression lines(x=log(bmr$mass.g), y=predict(out), col='red') ## add quadratic regression lines(x=log(bmr$mass.g), y=predict(out.quad), col='blue') ## calculate AIC AIC(out) AIC(out.quad) ## it would seem that quadratic is a better fit ## ************************************************** ## Bird abundance in forest fragments ## ************************************************** rm(list=ls()) gc() library(MASS) library(MuMIn) library(visreg) ## Read and examine the data ba <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/birdabund.csv', stringsAsFactors=TRUE, strip.white=TRUE) head(ba) ## 1. ## area hist(ba$area, col='red') ## definitely needs to be log-transformed ba$log.area <- log(ba$area) hist(ba$log.area, col='red') ## much better! ## dist hist(ba$dist, col='red') ## same thing here ba$log.dist <- log(ba$dist) hist(ba$log.dist, col='red') ## ldist hist(ba$ldist, col='red') ## same thing here ba$log.ldist <- log(ba$ldist) hist(ba$log.ldist, col='red') ## graze hist(ba$graze, col='red') ## this one looks ok as is ## alt hist(ba$alt, col='red') ## also fine ## 2. vars <- c('log.area', 'log.dist', 'log.ldist', 'yr.isol', 'graze', 'alt') round(cor(ba[,vars]), digits=2) ## strong correlations between; ## graze / yr.isol ## graze / log.area ## graze / alt ## log.dist / log.ldist ## 3. ## ## first create global model out.full <- lm(abund ~ log.area + log.dist + log.ldist + yr.isol + graze + alt, data=ba) summary(out.full) ## Need to set this so dredge analysis works options(na.action='na.fail') out.dredge <- dredge(out.full, beta=FALSE, evaluate=TRUE, rank='AIC') ## 4. ## three ## 5. sum(out.dredge$AIC-min(out.dredge$AIC)<7) ## 6. out.dredge$weight ## weight for best model is 0.127 ## ## best models all have 'graze' and 'log.area' ## 7. ## the 'cumsum' (=cumulative sum) function is useful here which(cumsum(out.dredge$weight)>0.95)[1] ## 19 models in 95% set ## 8. out.best <- lm(abund ~ log.area + yr.isol + graze, data=ba) summary(out.best) layout(matrix(1:3, nrow=1)) visreg(out.best) ## It should be apparent that the strongest relationship is for ## log.area - this is also consistent with estimated coefficients. ## However, predictors are not on the same scale, so effect sizes are ## not really comparable. ## Part II - stepAIC ## 1. ## first create a data-set that only contains the predictors of ## interest, along with the response ba.sub <- ba[,c('abund', 'log.area', 'log.dist', 'log.ldist', 'yr.isol', 'graze', 'alt')] ## fit a full model with no interactions out.full <- lm(abund ~., data=ba.sub) out.best <- stepAIC(out.full, lower=~1) # out.best will store only the # best model(s) ## the best model is 'abund ~ log.area + yr.isol + graze' - consistent ## wit what we found before ## 2. out.best <- lm(abund ~ log.area + yr.isol + graze, data=ba.sub) ## 3. summary(out.best) drop1(out.best, test='F') ## 4. AIC(out.best) ## 371.1092 ## note, that stepAIC uses: extractAIC(out.best) ## 210.1881 ## 5. ## ## below includes all two-way interactions out.full <- lm(abund ~(.)^2, data=ba.sub) out.best <- stepAIC(out.full, lower=~1) # out.best will store only the # best model(s) ## 6. ## ## best model is 'abund ~ log.area + yr.isol + graze + log.area:yr.isol + log.area:graze + yr.isol:graze' ## ## not the same as before... ## 7. AIC(out.best) ## 360.7471 ## note, that stepAIC uses: extractAIC(out.best) ## 199.826 371.1092 - 360.7471 ## 10.3621 ## This new model is better. The 'main effects only' model has ## essentially no support, according to our rule of thumb.