## clear the workspace rm(list=ls()) mm <- read.csv('https://www.sfu.ca/~lmgonigl/materials-qm/data/mammals.csv', stringsAsFactors=TRUE, na.strings=c(''), strip.white=TRUE) ## -------------------------------------------------- ## Data set 1: Body mass of late Quaternary mammals ## Read and examine the data ## 1. head(mm) ## 2. table(mm$continent) ## 3. mm$continent[mm$continent=='Af'] <- 'AF' mm$continent <- droplevels(mm$continent) ## 4. mm$log.mass <- log10(mm$mass.grams) ## Visualizing frequency distributions ## 1. tab.contintent <- table(mm$continent) barplot(tab.contintent) ## and with roated y-labels barplot(tab.contintent, las=1) ## 2. barplot(tab.contintent, las=1, cex.names=0.7) ## 3. barplot(tab.contintent, las=1, cex.names=0.7, col='red', xlab='Continent', ylab='Number') ## 4. barplot(tab.contintent, las=1, cex.names=0.7, col='red', xlab='Continent', ylab='Number', ylim=c(0,1500)) ## 5. barplot(sort(tab.contintent, decreasing=TRUE), las=1, cex.names=0.7, col='red', xlab='Continent', ylab='Number', ylim=c(0,1500)) ## 10. qqnorm(mm$log.mass[!is.na(mm$log.mass)]) ## 11. qqnorm(mm$log.mass[!is.na(mm$log.mass)], pch=16, cex=0.5) ## 12. hist(mm$log.mass, col='red', xlab='Log(Mass)', freq=FALSE) ## generate normal distribution with same mean and sd x <- seq(0, 8, length=1000) y <- dnorm(x, mean=mean(mm$log.mass, na.rm=TRUE), sd=sd(mm$log.mass, na.rm=TRUE)) lines(x, y, type="l", lwd=2, col='blue') ## Visualizing associations between variables ## 1. boxplot(log.mass ~ status, data=mm, cex.axis=0.7) ## 3. boxplot(log.mass ~ status, data=mm, cex.axis=0.7, width=sqrt(table(mm$status)), main='Mammalian body size') ## 4. tapply(mm$log.mass, mm$status, median, na.rm=TRUE) ## 5. tapply(mm$log.mass, mm$status, mean, na.rm=TRUE) ## 6. tab <- table(mm$status, mm$continent) ## to calculate which continent has the greatest number of ## extinctions relative to the number of extant species, you could do: tab['extinct',]/tab['extant',] ## 7. mosaicplot(table(mm$status, mm$continent), main='Mosaic plot') mosaicplot(table(mm$continent, mm$status), main='Mosaic plot') ## you could also try a stacked barplot (in this case, its not ## particularly helpful) barplot(table(mm$status, mm$continent)) barplot(table(mm$continent, mm$status)) ## taking the log of the values helps a bit but this is barplot(log(table(mm$status, mm$continent)+1)) barplot(log(table(mm$continent, mm$status)+1)) ## -------------------------------------------------- ## Data set 2: Fly sex and longevity rm(list=ls()) ## read directly from URL ff <- read.csv(url("https://www.sfu.ca/~lmgonigl/materials-qm/data/fruitflies.csv"), stringsAsFactors=TRUE, strip.white=TRUE) ## 1. head(ff) ## 2. boxplot(longevity.days ~ treatment, data=ff, xaxt='n', las=1, main='Longevity') ## add fancy x-labels x.labs <- sort(unique(ff$treatment)) text(x=seq_along(x.labs)+0.3, y=par('usr')[3], labels=x.labs, srt=35, adj=c(1.1,1.1), xpd=NA, cex=1) ## 3. stripchart(ff$longevity.days ~ ff$treatment, xaxt='n', xlab='', ylab='', pch=16, cex=1, las=1, vertical=TRUE) ## add x-labels x.labs <- sort(unique(ff$treatment)) text(x=seq_along(x.labs)+0.3, y=par('usr')[3], labels=x.labs, srt=35, adj=c(1.1,1.1), xpd=NA, cex=1) axis(2, las=1) ## now, with some jitter stripchart(ff$longevity.days ~ ff$treatment, method='jitter', xaxt='n', xlab='', ylab='', pch=16, cex=1, jitter=0.1, las=1, vertical=TRUE) ## add x-labels x.labs <- sort(unique(ff$treatment)) text(x=seq_along(x.labs)+0.3, y=par('usr')[3], labels=x.labs, srt=35, adj=c(1.1,1.1), xpd=NA, cex=1) axis(2, las=1) ## ggplot: try using geom_jitter() ## 4. plot(x=ff$thorax, y=ff$longevity, xlab='Thorax length', ylab='Longevity', pch=16, las=1) ## 5. plot(x=ff$thorax, y=ff$longevity, xlab='Thorax length', ylab='Longevity', pch=16, las=1) lines(lowess(x=ff$thorax, y=ff$longevity), col='red') ## 6. cols <- as.numeric(ff$treatment) plot(x=ff$thorax, y=ff$longevity, xlab='Thorax length', ylab='Longevity', pch=16, las=1, col=cols) legend('bottomright', legend=sort(unique(ff$treatment)), col=as.numeric(sort(unique(ff$treatment))), pch=16, bty='n') ## Multipanel plots ## 1. make.hist <- function(case) { ff.sub <- ff[ff$treatment==case,] hist(ff.sub$longevity, xlab='Thorax length', ylab='Longevity', pch=16, las=1, col='red', main=case) lines(lowess(x=ff.sub$longevity, y=ff.sub$thorax), col='red') } layout(matrix(1:5, ncol=5, nrow=1)) lapply(levels(ff$treatment), make.hist) ## 2. layout(matrix(1:5, ncol=1, nrow=5)) lapply(levels(ff$treatment), make.hist) ## 3. make.scatter <- function(case) { ff.sub <- ff[ff$treatment==case,] plot(x=ff.sub$longevity, y=ff.sub$thorax, xlab='Thorax length', ylab='Longevity', pch=16, las=1) lines(lowess(x=ff.sub$longevity, y=ff.sub$thorax), col='red') } layout(matrix(1:5, ncol=1, nrow=5)) lapply(levels(ff$treatment), make.scatter)