options(useFancyQuotes=FALSE) # renders summary output corrects
source("schwarz.functions.r")
#source('http://www.stat.sfu.ca/~cschwarz/Stat-650/Notes/MyPrograms/schwarz.functions.r')
# This is a quick demo of using Rstudio
x <- 1:10
x
## [1] 1 2 3 4 5 6 7 8 9 10
plot(x,x)

# This script will read in the cereal data set,
# do a simple listing,
# fit a regression line,
# draw a scatter plot and add the line to the plot
# do a single factor crd anova
# get the compact letter display
# make some plots
# load required libraries
library(ggplot2)
library(emmeans)
# Read in the cereal data from a csv file
cereal <- read.csv('../SampleData/cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
cereal3 <- read.table("http://lib.stat.cmu.edu/datasets/1993.expo/cereal",
header=FALSE, as.is=TRUE, strip.white=TRUE)
names(cereal3) <- c('Name','mfr','type','Calories','protein','Fat','sodium','fiber','carbo',
'sugars','shelf','potass','vitamins','weight','cups')
# Define new variables and factors (for categorical variables). CHeck the structure of the data frame
cereal$shelfF <- factor(cereal$shelf)
cereal$Calories.fr.Protein <- cereal$protein * 4;
str(cereal)
## 'data.frame': 77 obs. of 17 variables:
## $ name : chr "100%_Bran" "100%_Natural_Bran" "All-Bran" "All-Bran_with_Extra_Fiber" ...
## $ mfr : chr "N" "Q" "K" "K" ...
## $ type : chr "C" "C" "C" "C" ...
## $ calories : int 60 110 80 50 110 110 110 140 90 90 ...
## $ protein : int 4 3 4 4 2 2 2 3 2 3 ...
## $ fat : int 1 5 1 0 2 2 0 2 1 0 ...
## $ sodium : int 130 15 260 140 200 180 125 210 200 210 ...
## $ fiber : num 10 2 9 14 1 1.5 1 2 4 5 ...
## $ carbo : num 5 8 7 8 14 10.5 11 18 15 13 ...
## $ sugars : int 6 8 5 0 8 10 14 8 6 5 ...
## $ shelf : int 3 3 3 3 3 1 2 3 1 3 ...
## $ potass : int 280 135 320 330 NA 70 30 100 125 190 ...
## $ vitamins : int 25 0 25 25 25 25 25 25 25 25 ...
## $ weight : num 1 1 1 1 1 1 1 1.33 1 1 ...
## $ cups : num 0.331 NA 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
## $ shelfF : Factor w/ 3 levels "1","2","3": 3 3 3 3 3 1 2 3 1 3 ...
## $ Calories.fr.Protein: num 16 12 16 16 8 8 8 12 8 12 ...
# List the first few records
cereal[1:5,]
## name mfr type calories protein fat sodium fiber carbo
## 1 100%_Bran N C 60 4 1 130 10 5
## 2 100%_Natural_Bran Q C 110 3 5 15 2 8
## 3 All-Bran K C 80 4 1 260 9 7
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140 14 8
## 5 Almond_Delight R C 110 2 2 200 1 14
## sugars shelf potass vitamins weight cups shelfF Calories.fr.Protein
## 1 6 3 280 25 1 0.331 3 16
## 2 8 3 135 0 1 NA 3 12
## 3 5 3 320 25 1 0.330 3 16
## 4 0 3 330 25 1 0.500 3 16
## 5 8 3 NA 25 1 0.750 3 8
# List some variables
cereal$calories
## [1] 60 110 80 50 110 110 110 140 90 90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100 90 100 100 110 90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160 90 120 140 90 130 130 90 40 50 100
## [58] 90 120 90 90 110 100 80 80 90 110 100 80 100 150 110 100 110 100 90
## [77] 110
cereal[,"calories"]
## [1] 60 110 80 50 110 110 110 140 90 90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100 90 100 100 110 90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160 90 120 140 90 130 130 90 40 50 100
## [58] 90 120 90 90 110 100 80 80 90 110 100 80 100 150 110 100 110 100 90
## [77] 110
cereal$fat
## [1] 1 5 1 0 2 2 0 2 1 0 2 2 3 2 1 0 0 0 1 3 0 0 1 0 1 0 0 2 0 1 0 1 1 0 3 2 1 0
## [39] 1 1 1 2 1 1 3 3 2 1 1 2 0 2 1 0 0 0 1 2 1 2 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1
## [77] 1
cereal[1:5,c("name","fat","calories")]
## name fat calories
## 1 100%_Bran 1 60
## 2 100%_Natural_Bran 5 110
## 3 All-Bran 1 80
## 4 All-Bran_with_Extra_Fiber 0 50
## 5 Almond_Delight 2 110
# Make a basic scatter plot
plotbasic <- ggplot(data=cereal, aes(x=fat, y=calories))+
ggtitle("Calories vs Fat in cereals")+
xlab("Grams of Fat")+ylab("Calories/serving")+
geom_point()
plotbasic

ggsave(plotbasic, file='cal-vs-fat1.png', h=4, w=6, units="in", dpi=300)
plotbasic2 <- ggplot(data=cereal, aes(x=fat, y=calories))+
ggtitle("Calories vs Fat in cereals")+
xlab("Grams of Fat")+ylab("Calories/serving")+
geom_jitter()
plotbasic2

ggsave(plotbasic, file='cal-vs-fat2.png', h=4, w=6, units="in", dpi=300)
# Fit a regression between calories and grams of fat
fit.calories.fat <- lm( calories ~ fat, data=cereal)
summary(fit.calories.fat)
##
## Call:
## lm(formula = calories ~ fat, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -55.132 -5.132 4.868 14.868 45.256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.132 3.141 30.285 < 2e-16 ***
## fat 9.806 2.207 4.443 3.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.36 on 75 degrees of freedom
## Multiple R-squared: 0.2084, Adjusted R-squared: 0.1978
## F-statistic: 19.74 on 1 and 75 DF, p-value: 3.009e-05
anova(fit.calories.fat) # careful Type I SS
## Analysis of Variance Table
##
## Response: calories
## Df Sum Sq Mean Sq F value Pr(>F)
## fat 1 7402.9 7402.9 19.743 3.009e-05 ***
## Residuals 75 28121.8 375.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit.calories.fat)
## (Intercept) fat
## 95.131579 9.806005
sqrt(diag(vcov(fit.calories.fat))) # extract the SE
## (Intercept) fat
## 3.141224 2.206897
confint(fit.calories.fat) # confidence intervals on parameters
## 2.5 % 97.5 %
## (Intercept) 88.873939 101.38922
## fat 5.409642 14.20237
names(summary(fit.calories.fat))
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
summary(fit.calories.fat)$r.squared
## [1] 0.2083875
summary(fit.calories.fat)$sigma
## [1] 19.36381
class(fit.calories.fat)
## [1] "lm"
methods(class=class(fit.calories.fat))
## [1] add1 alias anova case.names coerce
## [6] confint cooks.distance deviance dfbeta dfbetas
## [11] drop1 dummy.coef effects emm_basis extractAIC
## [16] family formula fortify hatvalues influence
## [21] initialize kappa labels logLik model.frame
## [26] model.matrix nobs plot predict print
## [31] proj qr recover_data residuals rstandard
## [36] rstudent show simulate slotsFromS3 summary
## [41] variable.names vcov
## see '?methods' for accessing help and source code
# Add the fitted line to the scatter plot; and save
plotline <- plotbasic2 +
geom_abline(intercept=coef(fit.calories.fat)[1],
slope =coef(fit.calories.fat)[2])
plotline

ggsave(plot=plotline, file="cal-vs-fat3.png", h=4, w=6, units="in", dpi=300)
# Or, if you don't want' to do the actual fit, use ggplot directly
plot.calories.fat <- ggplot(data=cereal, aes(x=fat, y=calories)) +
geom_jitter(shape=1) + # Use hollow circles
geom_smooth(method=lm, # Add linear regression line
se=FALSE) # Don't add shaded confidence region
plot.calories.fat
## `geom_smooth()` using formula 'y ~ x'

# Do a simple single factor ANOVA
# Is the mean number of calories the same for all shelves
# Need to use a FACTOR variable for the categorical variable
fit.sugars.shelf <- lm( sugars ~ shelfF, data=cereal)
anova(fit.sugars.shelf)
## Analysis of Variance Table
##
## Response: sugars
## Df Sum Sq Mean Sq F value Pr(>F)
## shelfF 2 220.23 110.117 6.6013 0.002316 **
## Residuals 73 1217.71 16.681
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimate the marginal means along with confidence limits and Tukey multiple comparison.
fit.sugars.shelf.emmo <- emmeans::emmeans(fit.sugars.shelf, ~shelfF)
fit.sugars.shelf.cld <- multcomp::cld(fit.sugars.shelf.emmo, adjust='tukey')
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
fit.sugars.shelf.cld
## shelfF emmean SE df lower.CL upper.CL .group
## 1 5.11 0.937 73 2.82 7.40 1
## 3 6.53 0.681 73 4.86 8.19 1
## 2 9.62 0.891 73 7.44 11.80 2
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
## P value adjustment: tukey method for comparing a family of 3 estimates
## significance level used: alpha = 0.05
## NOTE: Compact letter displays can be misleading
## because they show NON-findings rather than findings.
## Consider using 'pairs()', 'pwpp()', or 'pwpm()' instead.
cld.plot <- sf.cld.plot.bar(fit.sugars.shelf.cld, "shelfF", order=FALSE)
cld.plot

ggsave(cld.plot, file="fat-vs-shelf.png",h=4, w=6, units="in", dpi=200)
# Estimate the pairwise differences
pairs(fit.sugars.shelf.emmo)
## contrast estimate SE df t.ratio p.value
## 1 - 2 -4.51 1.29 73 -3.490 0.0023
## 1 - 3 -1.42 1.16 73 -1.228 0.4405
## 2 - 3 3.09 1.12 73 2.756 0.0199
##
## P value adjustment: tukey method for comparing a family of 3 estimates
# an alternate way to look at pairwise comparisons
pwpp(fit.sugars.shelf.emmo)

# Fun with vectors
age <- c(56, 56, 28, 23, 22)
height <- c(185, 162, 185, 167, 190)
f.names <- c('Carl', "Lois", 'Matthew', 'Marianne', 'David')
over.30 <- c(T, T, F, F, F) # AVOID using T/F for TRUE/FALSE
odd <- c(2.3, 'Carl') # surprising, but look at result!
length(age)
## [1] 5
length(family) # number of elements not lengths of elements
## [1] 1
str(age) # what is the structure of age?
## num [1:5] 56 56 28 23 22
str(f.names)
## chr [1:5] "Carl" "Lois" "Matthew" "Marianne" "David"
# The c() function is very versatile
# The c() function is very versatile
ah <- c(age, height)
ah
## [1] 56 56 28 23 22 185 162 185 167 190
age0age <- c(age, 0, age)
age0age
## [1] 56 56 28 23 22 0 56 56 28 23 22
length(age0age)
## [1] 11
odd <- c(f.names, over.30) # ??
odd
## [1] "Carl" "Lois" "Matthew" "Marianne" "David" "TRUE"
## [7] "TRUE" "FALSE" "FALSE" "FALSE"
# Dataframes
age <- c(56, 56, 28, 23, 22)
height <- c(185, 162, 185, 167, 190)
f.names <- c('Carl', "Lois", 'Matthew', 'Marianne', 'David')
over.30 <- c(TRUE, TRUE, FALSE, FALSE, FALSE)
schwarz <- data.frame( f.names, age, height, over.30,
stringsAsFactors=FALSE)
schwarz
## f.names age height over.30
## 1 Carl 56 185 TRUE
## 2 Lois 56 162 TRUE
## 3 Matthew 28 185 FALSE
## 4 Marianne 23 167 FALSE
## 5 David 22 190 FALSE
str(schwarz)
## 'data.frame': 5 obs. of 4 variables:
## $ f.names: chr "Carl" "Lois" "Matthew" "Marianne" ...
## $ age : num 56 56 28 23 22
## $ height : num 185 162 185 167 190
## $ over.30: logi TRUE TRUE FALSE FALSE FALSE
length(schwarz) # number of vectors, not length of vectors
## [1] 4
dim(schwarz)
## [1] 5 4
nrow(schwarz)
## [1] 5
ncol(schwarz)
## [1] 4
names(schwarz)
## [1] "f.names" "age" "height" "over.30"
# Most commonly created from data.
cereal <- read.csv('../sampledata/cereal.csv',
header=TRUE, as.is=TRUE, strip.white=TRUE)
str(cereal) # this function is VERY useful when things seem to go wrong
## 'data.frame': 77 obs. of 15 variables:
## $ name : chr "100%_Bran" "100%_Natural_Bran" "All-Bran" "All-Bran_with_Extra_Fiber" ...
## $ mfr : chr "N" "Q" "K" "K" ...
## $ type : chr "C" "C" "C" "C" ...
## $ calories: int 60 110 80 50 110 110 110 140 90 90 ...
## $ protein : int 4 3 4 4 2 2 2 3 2 3 ...
## $ fat : int 1 5 1 0 2 2 0 2 1 0 ...
## $ sodium : int 130 15 260 140 200 180 125 210 200 210 ...
## $ fiber : num 10 2 9 14 1 1.5 1 2 4 5 ...
## $ carbo : num 5 8 7 8 14 10.5 11 18 15 13 ...
## $ sugars : int 6 8 5 0 8 10 14 8 6 5 ...
## $ shelf : int 3 3 3 3 3 1 2 3 1 3 ...
## $ potass : int 280 135 320 330 NA 70 30 100 125 190 ...
## $ vitamins: int 25 0 25 25 25 25 25 25 25 25 ...
## $ weight : num 1 1 1 1 1 1 1 1.33 1 1 ...
## $ cups : num 0.331 NA 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
length(cereal) # number of vectors, not length of vectors
## [1] 15
dim(cereal)
## [1] 77 15
nrow(cereal)
## [1] 77
ncol(cereal)
## [1] 15
names(cereal)
## [1] "name" "mfr" "type" "calories" "protein" "fat"
## [7] "sodium" "fiber" "carbo" "sugars" "shelf" "potass"
## [13] "vitamins" "weight" "cups"
# How to refer to parts of data frame
names(cereal)
## [1] "name" "mfr" "type" "calories" "protein" "fat"
## [7] "sodium" "fiber" "carbo" "sugars" "shelf" "potass"
## [13] "vitamins" "weight" "cups"
cereal$name
## [1] "100%_Bran"
## [2] "100%_Natural_Bran"
## [3] "All-Bran"
## [4] "All-Bran_with_Extra_Fiber"
## [5] "Almond_Delight"
## [6] "Apple_Cinnamon_Cheerios"
## [7] "Apple_Jacks"
## [8] "Basic_4"
## [9] "Bran_Chex"
## [10] "Bran_Flakes"
## [11] "Cap'n'Crunch"
## [12] "Cheerios"
## [13] "Cinnamon_Toast_Crunch"
## [14] "Clusters"
## [15] "Cocoa_Puffs"
## [16] "Corn_Chex"
## [17] "Corn_Flakes"
## [18] "Corn_Pops"
## [19] "Count_Chocula"
## [20] "Crackling_Oat_Bran"
## [21] "Cream_of_Wheat_(Quick)"
## [22] "Crispix"
## [23] "Crispy_Wheat_&_Raisins"
## [24] "Double_Chex"
## [25] "Froot_Loops"
## [26] "Frosted_Flakes"
## [27] "Frosted_Mini-Wheats"
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"
## [30] "Fruity_Pebbles"
## [31] "Golden_Crisp"
## [32] "Golden_Grahams"
## [33] "Grape_Nuts_Flakes"
## [34] "Grape-Nuts"
## [35] "Great_Grains_Pecan"
## [36] "Honey_Graham_Ohs"
## [37] "Honey_Nut_Cheerios"
## [38] "Honey-comb"
## [39] "Just_Right_Crunchy__Nuggets"
## [40] "Just_Right_Fruit_&_Nut"
## [41] "Kix"
## [42] "Life"
## [43] "Lucky_Charms"
## [44] "Maypo"
## [45] "Muesli_Raisins,_Dates,_&_Almonds"
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"
## [47] "Mueslix_Crispy_Blend"
## [48] "Multi-Grain_Cheerios"
## [49] "Nut&Honey_Crunch"
## [50] "Nutri-Grain_Almond-Raisin"
## [51] "Nutri-grain_Wheat"
## [52] "Oatmeal_Raisin_Crisp"
## [53] "Post_Nat._Raisin_Bran"
## [54] "Product_19"
## [55] "Puffed_Rice"
## [56] "Puffed_Wheat"
## [57] "Quaker_Oat_Squares"
## [58] "Quaker_Oatmeal"
## [59] "Raisin_Bran"
## [60] "Raisin_Nut_Bran"
## [61] "Raisin_Squares"
## [62] "Rice_Chex"
## [63] "Rice_Krispies"
## [64] "Shredded_Wheat"
## [65] "Shredded_Wheat_'n'Bran"
## [66] "Shredded_Wheat_spoon_size"
## [67] "Smacks"
## [68] "Special_K"
## [69] "Strawberry_Fruit_Wheats"
## [70] "Total_Corn_Flakes"
## [71] "Total_Raisin_Bran"
## [72] "Total_Whole_Grain"
## [73] "Triples"
## [74] "Trix"
## [75] "Wheat_Chex"
## [76] "Wheaties"
## [77] "Wheaties_Honey_Gold"
cereal$calories
## [1] 60 110 80 50 110 110 110 140 90 90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100 90 100 100 110 90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160 90 120 140 90 130 130 90 40 50 100
## [58] 90 120 90 90 110 100 80 80 90 110 100 80 100 150 110 100 110 100 90
## [77] 110
cereal[ , "calories"] # first index missing = ALL rows
## [1] 60 110 80 50 110 110 110 140 90 90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100 90 100 100 110 90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160 90 120 140 90 130 130 90 40 50 100
## [58] 90 120 90 90 110 100 80 80 90 110 100 80 100 150 110 100 110 100 90
## [77] 110
#calories # doesn't work because vector is hidden
with(cereal, calories) # careful of case.
## [1] 60 110 80 50 110 110 110 140 90 90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100 90 100 100 110 90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160 90 120 140 90 130 130 90 40 50 100
## [58] 90 120 90 90 110 100 80 80 90 110 100 80 100 150 110 100 110 100 90
## [77] 110
cereal[1,]
## name mfr type calories protein fat sodium fiber carbo sugars shelf
## 1 100%_Bran N C 60 4 1 130 10 5 6 3
## potass vitamins weight cups
## 1 280 25 1 0.331
cereal[1:5,]
## name mfr type calories protein fat sodium fiber carbo
## 1 100%_Bran N C 60 4 1 130 10 5
## 2 100%_Natural_Bran Q C 110 3 5 15 2 8
## 3 All-Bran K C 80 4 1 260 9 7
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140 14 8
## 5 Almond_Delight R C 110 2 2 200 1 14
## sugars shelf potass vitamins weight cups
## 1 6 3 280 25 1 0.331
## 2 8 3 135 0 1 NA
## 3 5 3 320 25 1 0.330
## 4 0 3 330 25 1 0.500
## 5 8 3 NA 25 1 0.750
cereal[, 1]
## [1] "100%_Bran"
## [2] "100%_Natural_Bran"
## [3] "All-Bran"
## [4] "All-Bran_with_Extra_Fiber"
## [5] "Almond_Delight"
## [6] "Apple_Cinnamon_Cheerios"
## [7] "Apple_Jacks"
## [8] "Basic_4"
## [9] "Bran_Chex"
## [10] "Bran_Flakes"
## [11] "Cap'n'Crunch"
## [12] "Cheerios"
## [13] "Cinnamon_Toast_Crunch"
## [14] "Clusters"
## [15] "Cocoa_Puffs"
## [16] "Corn_Chex"
## [17] "Corn_Flakes"
## [18] "Corn_Pops"
## [19] "Count_Chocula"
## [20] "Crackling_Oat_Bran"
## [21] "Cream_of_Wheat_(Quick)"
## [22] "Crispix"
## [23] "Crispy_Wheat_&_Raisins"
## [24] "Double_Chex"
## [25] "Froot_Loops"
## [26] "Frosted_Flakes"
## [27] "Frosted_Mini-Wheats"
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"
## [30] "Fruity_Pebbles"
## [31] "Golden_Crisp"
## [32] "Golden_Grahams"
## [33] "Grape_Nuts_Flakes"
## [34] "Grape-Nuts"
## [35] "Great_Grains_Pecan"
## [36] "Honey_Graham_Ohs"
## [37] "Honey_Nut_Cheerios"
## [38] "Honey-comb"
## [39] "Just_Right_Crunchy__Nuggets"
## [40] "Just_Right_Fruit_&_Nut"
## [41] "Kix"
## [42] "Life"
## [43] "Lucky_Charms"
## [44] "Maypo"
## [45] "Muesli_Raisins,_Dates,_&_Almonds"
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"
## [47] "Mueslix_Crispy_Blend"
## [48] "Multi-Grain_Cheerios"
## [49] "Nut&Honey_Crunch"
## [50] "Nutri-Grain_Almond-Raisin"
## [51] "Nutri-grain_Wheat"
## [52] "Oatmeal_Raisin_Crisp"
## [53] "Post_Nat._Raisin_Bran"
## [54] "Product_19"
## [55] "Puffed_Rice"
## [56] "Puffed_Wheat"
## [57] "Quaker_Oat_Squares"
## [58] "Quaker_Oatmeal"
## [59] "Raisin_Bran"
## [60] "Raisin_Nut_Bran"
## [61] "Raisin_Squares"
## [62] "Rice_Chex"
## [63] "Rice_Krispies"
## [64] "Shredded_Wheat"
## [65] "Shredded_Wheat_'n'Bran"
## [66] "Shredded_Wheat_spoon_size"
## [67] "Smacks"
## [68] "Special_K"
## [69] "Strawberry_Fruit_Wheats"
## [70] "Total_Corn_Flakes"
## [71] "Total_Raisin_Bran"
## [72] "Total_Whole_Grain"
## [73] "Triples"
## [74] "Trix"
## [75] "Wheat_Chex"
## [76] "Wheaties"
## [77] "Wheaties_Honey_Gold"
cereal[, 1:5]
## name mfr type calories protein
## 1 100%_Bran N C 60 4
## 2 100%_Natural_Bran Q C 110 3
## 3 All-Bran K C 80 4
## 4 All-Bran_with_Extra_Fiber K C 50 4
## 5 Almond_Delight R C 110 2
## 6 Apple_Cinnamon_Cheerios G C 110 2
## 7 Apple_Jacks K C 110 2
## 8 Basic_4 G C 140 3
## 9 Bran_Chex R C 90 2
## 10 Bran_Flakes P C 90 3
## 11 Cap'n'Crunch Q C 120 1
## 12 Cheerios G C 110 6
## 13 Cinnamon_Toast_Crunch G C 130 1
## 14 Clusters G C 100 3
## 15 Cocoa_Puffs G C 110 1
## 16 Corn_Chex R C 110 2
## 17 Corn_Flakes K C 110 2
## 18 Corn_Pops K C 100 1
## 19 Count_Chocula G C 110 1
## 20 Crackling_Oat_Bran K C 110 3
## 21 Cream_of_Wheat_(Quick) N H 100 3
## 22 Crispix K C 100 2
## 23 Crispy_Wheat_&_Raisins G C 90 2
## 24 Double_Chex R C 100 2
## 25 Froot_Loops K C 100 2
## 26 Frosted_Flakes K C 110 1
## 27 Frosted_Mini-Wheats K C 90 3
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3
## 29 Fruitful_Bran K C 130 3
## 30 Fruity_Pebbles P C 100 1
## 31 Golden_Crisp P C 100 2
## 32 Golden_Grahams G C 100 1
## 33 Grape_Nuts_Flakes P C 100 3
## 34 Grape-Nuts P C 110 3
## 35 Great_Grains_Pecan P C 110 3
## 36 Honey_Graham_Ohs Q C 130 1
## 37 Honey_Nut_Cheerios G C 110 3
## 38 Honey-comb P C 120 1
## 39 Just_Right_Crunchy__Nuggets K C 100 2
## 40 Just_Right_Fruit_&_Nut K C 140 3
## 41 Kix G C 100 2
## 42 Life Q C 100 4
## 43 Lucky_Charms G C 110 2
## 44 Maypo A H 110 4
## 45 Muesli_Raisins,_Dates,_&_Almonds R C 150 4
## 46 Muesli_Raisins,_Peaches,_&_Pecans R C 150 4
## 47 Mueslix_Crispy_Blend K C 160 3
## 48 Multi-Grain_Cheerios G C 90 2
## 49 Nut&Honey_Crunch K C 120 2
## 50 Nutri-Grain_Almond-Raisin K C 140 3
## 51 Nutri-grain_Wheat K C 90 3
## 52 Oatmeal_Raisin_Crisp G C 130 3
## 53 Post_Nat._Raisin_Bran P C 130 3
## 54 Product_19 K C 90 3
## 55 Puffed_Rice Q C 40 1
## 56 Puffed_Wheat Q C 50 2
## 57 Quaker_Oat_Squares Q C 100 4
## 58 Quaker_Oatmeal Q H 90 5
## 59 Raisin_Bran K C 120 3
## 60 Raisin_Nut_Bran G C 90 3
## 61 Raisin_Squares K C 90 2
## 62 Rice_Chex R C 110 1
## 63 Rice_Krispies K C 100 2
## 64 Shredded_Wheat N C 80 2
## 65 Shredded_Wheat_'n'Bran N C 80 3
## 66 Shredded_Wheat_spoon_size N C 90 3
## 67 Smacks K C 110 2
## 68 Special_K K C 100 6
## 69 Strawberry_Fruit_Wheats N C 80 2
## 70 Total_Corn_Flakes G C 100 2
## 71 Total_Raisin_Bran G C 150 3
## 72 Total_Whole_Grain G C 110 3
## 73 Triples G C 100 2
## 74 Trix G C 110 1
## 75 Wheat_Chex R C 100 3
## 76 Wheaties G C 90 3
## 77 Wheaties_Honey_Gold G C 110 2
cereal[1:4, 1:5]
## name mfr type calories protein
## 1 100%_Bran N C 60 4
## 2 100%_Natural_Bran Q C 110 3
## 3 All-Bran K C 80 4
## 4 All-Bran_with_Extra_Fiber K C 50 4
# Adding removing variables from data frames
cereal$CalPerGramFat <- cereal$calories / cereal$fat
cereal$CalPerGramFat # some interesting values!
## [1] 60.00000 22.00000 80.00000 Inf 55.00000 55.00000 Inf
## [8] 70.00000 90.00000 Inf 60.00000 55.00000 43.33333 50.00000
## [15] 110.00000 Inf Inf Inf 110.00000 36.66667 Inf
## [22] Inf 90.00000 Inf 100.00000 Inf Inf 60.00000
## [29] Inf 100.00000 Inf 100.00000 100.00000 Inf 36.66667
## [36] 65.00000 110.00000 Inf 100.00000 140.00000 100.00000 50.00000
## [43] 110.00000 110.00000 50.00000 50.00000 80.00000 90.00000 120.00000
## [50] 70.00000 Inf 65.00000 130.00000 Inf Inf Inf
## [57] 100.00000 45.00000 120.00000 45.00000 Inf Inf Inf
## [64] Inf Inf Inf 110.00000 Inf Inf 100.00000
## [71] 150.00000 110.00000 100.00000 110.00000 100.00000 90.00000 110.00000
cereal$CalPerGramFat <- NULL # removes this variable from df
# More fun with vectors
# Operations with vectors
age <- c(56, 56, 28, 23, 22)
age.next.year <- age + 1
yob <- 2013- age # element by element operations if same length
# Operations on vectors
x <- c(0.5, 1, 1.5, 2, 4, 6, 8, 9, 10, 12)
length(x)
## [1] 10
str(x)
## num [1:10] 0.5 1 1.5 2 4 6 8 9 10 12
sqrt(x) # function applied to EACH element
## [1] 0.7071068 1.0000000 1.2247449 1.4142136 2.0000000 2.4494897 2.8284271
## [8] 3.0000000 3.1622777 3.4641016
# Other useful functions
range(x)
## [1] 0.5 12.0
mean(x)
## [1] 5.4
sd(x)
## [1] 4.175324
median(x)
## [1] 5
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.500 1.625 5.000 5.400 8.750 12.000
sum(x)
## [1] 54
# Compare the min() and pmin() functions
min(x, 3)
## [1] 0.5
pmin(x, 3)
## [1] 0.5 1.0 1.5 2.0 3.0 3.0 3.0 3.0 3.0 3.0
# Exercise
mean(cereal$calories)
## [1] 105.0649
max(cereal$fat)
## [1] 5
min(cereal$fat)
## [1] 0
range(cereal$fat)
## [1] 0 5
mean(cereal$weight)
## [1] NA
mean(cereal$weight, na.rm=TRUE)
## [1] 1.0304
# Simple increments
help(":") # be sure to put operators in quotes for the help() function
5:10
## [1] 5 6 7 8 9 10
10.2:3
## [1] 10.2 9.2 8.2 7.2 6.2 5.2 4.2 3.2
5:10-1 # careful : is evaluated prior to arithmetic
## [1] 4 5 6 7 8 9
seq(1, 100, 10)
## [1] 1 11 21 31 41 51 61 71 81 91
seq(to=100, from=1, by=10)
## [1] 1 11 21 31 41 51 61 71 81 91
# replicate things
x <- c(5, 6, 7)
help(rep)
rep(TRUE, 10)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rep(x, times=2)
## [1] 5 6 7 5 6 7
rep(x, length.out=8)
## [1] 5 6 7 5 6 7 5 6
rep(x, each=2)
## [1] 5 5 6 6 7 7
# Indexing
x <- c(5:9, 12:15, 34:37)
x
## [1] 5 6 7 8 9 12 13 14 15 34 35 36 37
# Simple indexing
x[2] # note the use of SQUARE brackets for indexing
## [1] 6
x[c(2,3,7)] # the index can also be a vector
## [1] 6 7 13
#x[2,3,7] # oops, not a proper index for a vector
n <- 10
x[n] # indices can be variables. What does this mean?
## [1] 34
inx <- c(3, 5, 7)
#x(inx) # Oops wrong types of brackets
x[inx]
## [1] 7 9 13
# using selection vector
select <- x >6 & x < 10
select
## [1] FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
sum(select)
## [1] 3
x[select]
## [1] 7 8 9
# Indexing
x <- c(5:9, 12:15, 34:37)
x
## [1] 5 6 7 8 9 12 13 14 15 34 35 36 37
# Simple replacement indexing
x
## [1] 5 6 7 8 9 12 13 14 15 34 35 36 37
x[2] <- 100 # note the use of SQUARE brackets for indexing
x
## [1] 5 100 7 8 9 12 13 14 15 34 35 36 37
x[c(2,3,7)] <- 200 # the index can also be a vector
x
## [1] 5 200 200 8 9 12 200 14 15 34 35 36 37
x[c(2,3,7)] <- c(200,201) # notice the warning message
## Warning in x[c(2, 3, 7)] <- c(200, 201): number of items to replace is not a
## multiple of replacement length
x
## [1] 5 200 201 8 9 12 200 14 15 34 35 36 37
n <- 10
x[n] <- 500 # indices can be variables. What does this mean?
inx <- c(8,9,10)
x[inx] <- c(300, 400)
## Warning in x[inx] <- c(300, 400): number of items to replace is not a multiple
## of replacement length
x
## [1] 5 200 201 8 9 12 200 300 400 300 35 36 37
# using selection vector
select <- x >6 & x < 10
select
## [1] FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
sum(select)
## [1] 2
x[select] <- -1
x
## [1] 5 200 201 -1 -1 12 200 300 400 300 35 36 37
sum(cereal== -1, na.rm=TRUE)
## [1] 0
sum(is.na(cereal))
## [1] 15
# Dropping elements
x
## [1] 5 200 201 -1 -1 12 200 300 400 300 35 36 37
x[-2] # note the use of SQUARE brackets for indexing
## [1] 5 201 -1 -1 12 200 300 400 300 35 36 37
x[-c(2,3,7)] # the index can also be a vector
## [1] 5 -1 -1 12 300 400 300 35 36 37
n <- -10
x[n] # indices can be variables. What does this mean?
## [1] 5 200 201 -1 -1 12 200 300 400 35 36 37
inx <- c(3, 5, 7)
x[-inx]
## [1] 5 200 -1 12 300 400 300 35 36 37
# Using logical vectors to select elements
x <- c(5:9, 12:15, 34:37)
x
## [1] 5 6 7 8 9 12 13 14 15 34 35 36 37
# Selecting elements where entry is TRUE
x > 10
## [1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE
x[ x>10 ]
## [1] 12 13 14 15 34 35 36 37
x[ x > 10 & x < 20]
## [1] 12 13 14 15
x[ x %% 2 == 0] # %note the use of == to test for equality
## [1] 6 8 12 14 34 36
x[ x>10 ] <- 500
x
## [1] 5 6 7 8 9 500 500 500 500 500 500 500 500
# We want to rename "Fiber" to "Fibre"
# Avoid using explicit index (i.e. names(cereal)[9] <-"Fibre" as not robust
names(cereal)
## [1] "name" "mfr" "type" "calories" "protein" "fat"
## [7] "sodium" "fiber" "carbo" "sugars" "shelf" "potass"
## [13] "vitamins" "weight" "cups"
names(cereal)[grepl("fiber",names(cereal))] <- "fibre"
names(cereal)
## [1] "name" "mfr" "type" "calories" "protein" "fat"
## [7] "sodium" "fibre" "carbo" "sugars" "shelf" "potass"
## [13] "vitamins" "weight" "cups"
# Select certain cereal manufacturers
cereal[ cereal$mfr %in% c("P","A"),] # don't forget the last comma
## name mfr type calories protein fat sodium
## 10 Bran_Flakes P C 90 3 0 210
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 160
## 30 Fruity_Pebbles P C 100 1 1 135
## 31 Golden_Crisp P C 100 2 0 45
## 33 Grape_Nuts_Flakes P C 100 3 1 140
## 34 Grape-Nuts P C 110 3 0 170
## 35 Great_Grains_Pecan P C 110 3 3 75
## 38 Honey-comb P C 120 1 0 180
## 44 Maypo A H 110 4 1 0
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200
## fibre carbo sugars shelf potass vitamins weight cups
## 10 5 13 5 3 190 25 1.00 0.67
## 28 5 12 10 3 200 25 1.25 0.67
## 30 0 13 12 2 25 25 1.00 0.75
## 31 0 11 15 1 40 25 1.00 0.88
## 33 3 15 5 3 85 25 1.00 0.88
## 34 3 17 3 3 90 25 1.00 0.25
## 35 3 13 4 3 100 25 1.00 0.33
## 38 0 14 11 1 35 25 1.00 1.33
## 44 0 16 3 2 95 25 1.00 NA
## 53 6 11 14 3 260 25 1.33 0.67
cereal[grep("Bran", cereal$name),] # don't forget the last comma
## name mfr type calories protein fat sodium fibre carbo
## 1 100%_Bran N C 60 4 1 130 10.0 5.0
## 2 100%_Natural_Bran Q C 110 3 5 15 2.0 8.0
## 3 All-Bran K C 80 4 1 260 9.0 7.0
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140 14.0 8.0
## 9 Bran_Chex R C 90 2 1 200 4.0 15.0
## 10 Bran_Flakes P C 90 3 0 210 5.0 13.0
## 20 Crackling_Oat_Bran K C 110 3 3 140 4.0 10.0
## 29 Fruitful_Bran K C 130 3 0 240 5.0 14.0
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200 6.0 11.0
## 59 Raisin_Bran K C 120 3 1 210 5.0 14.0
## 60 Raisin_Nut_Bran G C 90 3 2 140 2.5 10.5
## 65 Shredded_Wheat_'n'Bran N C 80 3 0 0 4.0 19.0
## 71 Total_Raisin_Bran G C 150 3 1 190 4.0 15.0
## sugars shelf potass vitamins weight cups
## 1 6 3 280 25 1.00 0.331
## 2 8 3 135 0 1.00 NA
## 3 5 3 320 25 1.00 0.330
## 4 0 3 330 25 1.00 0.500
## 9 6 1 125 25 1.00 0.670
## 10 5 3 190 25 1.00 0.670
## 20 7 3 160 25 1.00 0.500
## 29 12 3 190 25 1.33 0.670
## 53 14 3 260 25 1.33 0.670
## 59 12 2 240 25 1.33 0.750
## 60 8 3 140 25 1.00 0.500
## 65 0 1 140 0 1.00 0.670
## 71 14 3 230 100 1.50 1.000
# Fun with functions
# concatenating objects together, especially to make a vector
limits <- c(0, 100)
ggplot(data=cereal, aes(x=fat, y=calories))+
geom_point()+
ylim(c(0,100))
## Warning: Removed 38 rows containing missing values (geom_point).

# Generating sequences
seq(1,10,2)
## [1] 1 3 5 7 9
seq(1, by=2, length.out=10 )
## [1] 1 3 5 7 9 11 13 15 17 19
# Generating all possible combinations
expand.grid( sex=c("m","f"), age=c(10,20,30), stringsAsFactors=FALSE)
## sex age
## 1 m 10
## 2 f 10
## 3 m 20
## 4 f 20
## 5 m 30
## 6 f 30
# checking for and counting number of missing values; selecting rows without missing values
is.na(cereal$weight)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE
sum(is.na(cereal$weight))
## [1] 2
sum(!is.na(cereal$weight))
## [1] 75
select <- is.na(cereal$weight)
cereal[ !select,]
## name mfr type calories protein fat sodium
## 1 100%_Bran N C 60 4 1 130
## 2 100%_Natural_Bran Q C 110 3 5 15
## 3 All-Bran K C 80 4 1 260
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140
## 5 Almond_Delight R C 110 2 2 200
## 6 Apple_Cinnamon_Cheerios G C 110 2 2 180
## 7 Apple_Jacks K C 110 2 0 125
## 8 Basic_4 G C 140 3 2 210
## 9 Bran_Chex R C 90 2 1 200
## 10 Bran_Flakes P C 90 3 0 210
## 11 Cap'n'Crunch Q C 120 1 2 220
## 12 Cheerios G C 110 6 2 290
## 13 Cinnamon_Toast_Crunch G C 130 1 3 210
## 14 Clusters G C 100 3 2 140
## 15 Cocoa_Puffs G C 110 1 1 180
## 16 Corn_Chex R C 110 2 0 280
## 17 Corn_Flakes K C 110 2 0 290
## 18 Corn_Pops K C 100 1 0 90
## 19 Count_Chocula G C 110 1 1 180
## 20 Crackling_Oat_Bran K C 110 3 3 140
## 21 Cream_of_Wheat_(Quick) N H 100 3 0 80
## 22 Crispix K C 100 2 0 220
## 23 Crispy_Wheat_&_Raisins G C 90 2 1 140
## 24 Double_Chex R C 100 2 0 190
## 25 Froot_Loops K C 100 2 1 125
## 26 Frosted_Flakes K C 110 1 0 200
## 27 Frosted_Mini-Wheats K C 90 3 0 0
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 160
## 29 Fruitful_Bran K C 130 3 0 240
## 30 Fruity_Pebbles P C 100 1 1 135
## 31 Golden_Crisp P C 100 2 0 45
## 32 Golden_Grahams G C 100 1 1 280
## 33 Grape_Nuts_Flakes P C 100 3 1 140
## 34 Grape-Nuts P C 110 3 0 170
## 35 Great_Grains_Pecan P C 110 3 3 75
## 36 Honey_Graham_Ohs Q C 130 1 2 220
## 37 Honey_Nut_Cheerios G C 110 3 1 250
## 38 Honey-comb P C 120 1 0 180
## 39 Just_Right_Crunchy__Nuggets K C 100 2 1 170
## 40 Just_Right_Fruit_&_Nut K C 140 3 1 170
## 41 Kix G C 100 2 1 260
## 42 Life Q C 100 4 2 150
## 43 Lucky_Charms G C 110 2 1 180
## 44 Maypo A H 110 4 1 0
## 47 Mueslix_Crispy_Blend K C 160 3 2 150
## 48 Multi-Grain_Cheerios G C 90 2 1 220
## 49 Nut&Honey_Crunch K C 120 2 1 190
## 50 Nutri-Grain_Almond-Raisin K C 140 3 2 220
## 51 Nutri-grain_Wheat K C 90 3 0 170
## 52 Oatmeal_Raisin_Crisp G C 130 3 2 170
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200
## 54 Product_19 K C 90 3 0 320
## 55 Puffed_Rice Q C 40 1 0 0
## 56 Puffed_Wheat Q C 50 2 0 0
## 57 Quaker_Oat_Squares Q C 100 4 1 135
## 58 Quaker_Oatmeal Q H 90 5 2 0
## 59 Raisin_Bran K C 120 3 1 210
## 60 Raisin_Nut_Bran G C 90 3 2 140
## 61 Raisin_Squares K C 90 2 0 0
## 62 Rice_Chex R C 110 1 0 240
## 63 Rice_Krispies K C 100 2 0 290
## 64 Shredded_Wheat N C 80 2 0 0
## 65 Shredded_Wheat_'n'Bran N C 80 3 0 0
## 66 Shredded_Wheat_spoon_size N C 90 3 0 0
## 67 Smacks K C 110 2 1 70
## 68 Special_K K C 100 6 0 230
## 69 Strawberry_Fruit_Wheats N C 80 2 0 15
## 70 Total_Corn_Flakes G C 100 2 1 200
## 71 Total_Raisin_Bran G C 150 3 1 190
## 72 Total_Whole_Grain G C 110 3 1 200
## 73 Triples G C 100 2 1 250
## 74 Trix G C 110 1 1 140
## 75 Wheat_Chex R C 100 3 1 230
## 76 Wheaties G C 90 3 1 200
## 77 Wheaties_Honey_Gold G C 110 2 1 200
## fibre carbo sugars shelf potass vitamins weight cups
## 1 10.0 5.0 6 3 280 25 1.00 0.331
## 2 2.0 8.0 8 3 135 0 1.00 NA
## 3 9.0 7.0 5 3 320 25 1.00 0.330
## 4 14.0 8.0 0 3 330 25 1.00 0.500
## 5 1.0 14.0 8 3 NA 25 1.00 0.750
## 6 1.5 10.5 10 1 70 25 1.00 0.750
## 7 1.0 11.0 14 2 30 25 1.00 1.000
## 8 2.0 18.0 8 3 100 25 1.33 0.750
## 9 4.0 15.0 6 1 125 25 1.00 0.670
## 10 5.0 13.0 5 3 190 25 1.00 0.670
## 11 0.0 12.0 12 2 35 25 1.00 0.750
## 12 2.0 17.0 1 1 105 25 1.00 1.250
## 13 0.0 13.0 9 2 45 25 1.00 0.750
## 14 2.0 13.0 7 3 105 25 1.00 0.500
## 15 0.0 12.0 13 2 55 25 1.00 1.000
## 16 0.0 22.0 3 1 25 25 1.00 1.000
## 17 1.0 21.0 2 1 35 25 1.00 1.000
## 18 1.0 13.0 12 2 20 25 1.00 1.000
## 19 0.0 12.0 13 2 65 25 1.00 1.000
## 20 4.0 10.0 7 3 160 25 1.00 0.500
## 21 1.0 21.0 0 2 NA 0 1.00 1.000
## 22 1.0 21.0 3 3 30 25 1.00 1.000
## 23 2.0 11.0 10 3 120 25 1.00 0.750
## 24 1.0 18.0 5 3 80 25 1.00 0.750
## 25 1.0 11.0 13 2 30 25 1.00 1.000
## 26 1.0 14.0 11 1 25 25 1.00 0.750
## 27 3.0 14.0 7 2 100 25 1.00 0.800
## 28 5.0 12.0 10 3 200 25 1.25 0.670
## 29 5.0 14.0 12 3 190 25 1.33 0.670
## 30 0.0 13.0 12 2 25 25 1.00 0.750
## 31 0.0 11.0 15 1 40 25 1.00 0.880
## 32 0.0 15.0 9 2 45 25 1.00 0.750
## 33 3.0 15.0 5 3 85 25 1.00 0.880
## 34 3.0 17.0 3 3 90 25 1.00 0.250
## 35 3.0 13.0 4 3 100 25 1.00 0.330
## 36 1.0 12.0 11 2 45 25 1.00 1.000
## 37 1.5 11.5 10 1 90 25 1.00 0.750
## 38 0.0 14.0 11 1 35 25 1.00 1.330
## 39 1.0 17.0 6 3 60 100 1.00 NA
## 40 2.0 20.0 9 3 95 100 1.30 0.750
## 41 0.0 21.0 3 2 40 25 1.00 1.500
## 42 2.0 12.0 6 2 95 25 1.00 0.670
## 43 0.0 12.0 12 2 55 25 1.00 1.000
## 44 0.0 16.0 3 2 95 25 1.00 NA
## 47 3.0 17.0 13 3 160 25 1.50 0.670
## 48 2.0 15.0 6 1 90 25 1.00 1.000
## 49 0.0 15.0 9 2 40 25 1.00 0.670
## 50 3.0 21.0 7 3 130 25 1.33 0.670
## 51 3.0 18.0 2 3 90 25 1.00 NA
## 52 1.5 13.5 10 3 120 25 1.25 0.500
## 53 6.0 11.0 14 3 260 25 1.33 0.670
## 54 1.0 20.0 3 3 45 100 1.00 1.000
## 55 0.0 13.0 0 3 15 0 0.50 1.000
## 56 1.0 10.0 0 3 50 0 0.50 NA
## 57 2.0 14.0 6 3 110 25 1.00 0.500
## 58 2.7 NA NA 1 110 0 1.00 0.670
## 59 5.0 14.0 12 2 240 25 1.33 0.750
## 60 2.5 10.5 8 3 140 25 1.00 0.500
## 61 2.0 15.0 6 3 110 25 1.00 0.500
## 62 0.0 23.0 2 1 30 25 1.00 1.130
## 63 0.0 22.0 3 1 35 25 1.00 1.000
## 64 3.0 16.0 0 1 95 0 0.83 NA
## 65 4.0 19.0 0 1 140 0 1.00 0.670
## 66 3.0 20.0 0 1 120 0 1.00 0.670
## 67 1.0 9.0 15 2 40 25 1.00 0.750
## 68 1.0 16.0 3 1 55 25 1.00 1.000
## 69 3.0 15.0 5 2 90 25 1.00 NA
## 70 0.0 21.0 3 3 35 100 1.00 1.000
## 71 4.0 15.0 14 3 230 100 1.50 1.000
## 72 3.0 16.0 3 3 110 100 1.00 1.000
## 73 0.0 21.0 3 3 60 25 1.00 0.750
## 74 0.0 13.0 12 2 25 25 1.00 1.000
## 75 3.0 17.0 3 1 115 25 1.00 0.670
## 76 3.0 17.0 3 1 110 25 1.00 1.000
## 77 1.0 16.0 8 1 60 25 1.00 0.750
# max and parallel maximum
x <- c(1,2,3,4,5,6)
max(x)
## [1] 6
pmax(3, x)
## [1] 3 3 3 4 5 6
# finding the set of unique values
unique(cereal$type)
## [1] "C" "H"
# xtabs - counting and checking
xtabs(~type, data=cereal, exclude=NULL, na.action=na.pass)
## type
## C H
## 74 3
xtabs(~type+cups, data=cereal, exclude=NULL, na.action=na.pass)
## cups
## type 0.25 0.33 0.331 0.5 0.67 0.75 0.8 0.88 1 1.13 1.25 1.33 1.5 <NA>
## C 1 2 1 7 12 16 1 2 20 1 1 1 1 8
## H 0 0 0 0 1 0 0 0 1 0 0 0 0 1
# pasting text together
paste("Analysis of ", nrow(cereal), ' breakfast cereals', sep="")
## [1] "Analysis of 77 breakfast cereals"
ggplot(data=cereal, aes(x=fat, y=calories))+
geom_point()+
ggtitle(paste("Analysis of ", nrow(cereal), ' breakfast cereals', sep=""))

# pattern matching - Googleis your friend
select <- grepl("bran", cereal$name) # exact match
cereal[select,]
## [1] name mfr type calories protein fat sodium fibre
## [9] carbo sugars shelf potass vitamins weight cups
## <0 rows> (or 0-length row.names)
select <- grepl("bran", cereal$name, ignore.case=TRUE)
cereal[select,]
## name mfr type calories protein fat sodium fibre carbo
## 1 100%_Bran N C 60 4 1 130 10.0 5.0
## 2 100%_Natural_Bran Q C 110 3 5 15 2.0 8.0
## 3 All-Bran K C 80 4 1 260 9.0 7.0
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140 14.0 8.0
## 9 Bran_Chex R C 90 2 1 200 4.0 15.0
## 10 Bran_Flakes P C 90 3 0 210 5.0 13.0
## 20 Crackling_Oat_Bran K C 110 3 3 140 4.0 10.0
## 29 Fruitful_Bran K C 130 3 0 240 5.0 14.0
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200 6.0 11.0
## 59 Raisin_Bran K C 120 3 1 210 5.0 14.0
## 60 Raisin_Nut_Bran G C 90 3 2 140 2.5 10.5
## 65 Shredded_Wheat_'n'Bran N C 80 3 0 0 4.0 19.0
## 71 Total_Raisin_Bran G C 150 3 1 190 4.0 15.0
## sugars shelf potass vitamins weight cups
## 1 6 3 280 25 1.00 0.331
## 2 8 3 135 0 1.00 NA
## 3 5 3 320 25 1.00 0.330
## 4 0 3 330 25 1.00 0.500
## 9 6 1 125 25 1.00 0.670
## 10 5 3 190 25 1.00 0.670
## 20 7 3 160 25 1.00 0.500
## 29 12 3 190 25 1.33 0.670
## 53 14 3 260 25 1.33 0.670
## 59 12 2 240 25 1.33 0.750
## 60 8 3 140 25 1.00 0.500
## 65 0 1 140 0 1.00 0.670
## 71 14 3 230 100 1.50 1.000
select <- grepl("^bran", cereal$name, ignore.case=TRUE)
cereal[select,] # start with bran
## name mfr type calories protein fat sodium fibre carbo sugars shelf
## 9 Bran_Chex R C 90 2 1 200 4 15 6 1
## 10 Bran_Flakes P C 90 3 0 210 5 13 5 3
## potass vitamins weight cups
## 9 125 25 1 0.67
## 10 190 25 1 0.67
select <- grepl("bran$", cereal$name, ignore.case=TRUE)
cereal[select,] # end with bran
## name mfr type calories protein fat sodium fibre carbo
## 1 100%_Bran N C 60 4 1 130 10.0 5.0
## 2 100%_Natural_Bran Q C 110 3 5 15 2.0 8.0
## 3 All-Bran K C 80 4 1 260 9.0 7.0
## 20 Crackling_Oat_Bran K C 110 3 3 140 4.0 10.0
## 29 Fruitful_Bran K C 130 3 0 240 5.0 14.0
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200 6.0 11.0
## 59 Raisin_Bran K C 120 3 1 210 5.0 14.0
## 60 Raisin_Nut_Bran G C 90 3 2 140 2.5 10.5
## 65 Shredded_Wheat_'n'Bran N C 80 3 0 0 4.0 19.0
## 71 Total_Raisin_Bran G C 150 3 1 190 4.0 15.0
## sugars shelf potass vitamins weight cups
## 1 6 3 280 25 1.00 0.331
## 2 8 3 135 0 1.00 NA
## 3 5 3 320 25 1.00 0.330
## 20 7 3 160 25 1.00 0.500
## 29 12 3 190 25 1.33 0.670
## 53 14 3 260 25 1.33 0.670
## 59 12 2 240 25 1.33 0.750
## 60 8 3 140 25 1.00 0.500
## 65 0 1 140 0 1.00 0.670
## 71 14 3 230 100 1.50 1.000
# dealing with strings
toupper(cereal$name)
## [1] "100%_BRAN"
## [2] "100%_NATURAL_BRAN"
## [3] "ALL-BRAN"
## [4] "ALL-BRAN_WITH_EXTRA_FIBER"
## [5] "ALMOND_DELIGHT"
## [6] "APPLE_CINNAMON_CHEERIOS"
## [7] "APPLE_JACKS"
## [8] "BASIC_4"
## [9] "BRAN_CHEX"
## [10] "BRAN_FLAKES"
## [11] "CAP'N'CRUNCH"
## [12] "CHEERIOS"
## [13] "CINNAMON_TOAST_CRUNCH"
## [14] "CLUSTERS"
## [15] "COCOA_PUFFS"
## [16] "CORN_CHEX"
## [17] "CORN_FLAKES"
## [18] "CORN_POPS"
## [19] "COUNT_CHOCULA"
## [20] "CRACKLING_OAT_BRAN"
## [21] "CREAM_OF_WHEAT_(QUICK)"
## [22] "CRISPIX"
## [23] "CRISPY_WHEAT_&_RAISINS"
## [24] "DOUBLE_CHEX"
## [25] "FROOT_LOOPS"
## [26] "FROSTED_FLAKES"
## [27] "FROSTED_MINI-WHEATS"
## [28] "FRUIT_&_FIBRE_DATES,_WALNUTS,_AND_OATS"
## [29] "FRUITFUL_BRAN"
## [30] "FRUITY_PEBBLES"
## [31] "GOLDEN_CRISP"
## [32] "GOLDEN_GRAHAMS"
## [33] "GRAPE_NUTS_FLAKES"
## [34] "GRAPE-NUTS"
## [35] "GREAT_GRAINS_PECAN"
## [36] "HONEY_GRAHAM_OHS"
## [37] "HONEY_NUT_CHEERIOS"
## [38] "HONEY-COMB"
## [39] "JUST_RIGHT_CRUNCHY__NUGGETS"
## [40] "JUST_RIGHT_FRUIT_&_NUT"
## [41] "KIX"
## [42] "LIFE"
## [43] "LUCKY_CHARMS"
## [44] "MAYPO"
## [45] "MUESLI_RAISINS,_DATES,_&_ALMONDS"
## [46] "MUESLI_RAISINS,_PEACHES,_&_PECANS"
## [47] "MUESLIX_CRISPY_BLEND"
## [48] "MULTI-GRAIN_CHEERIOS"
## [49] "NUT&HONEY_CRUNCH"
## [50] "NUTRI-GRAIN_ALMOND-RAISIN"
## [51] "NUTRI-GRAIN_WHEAT"
## [52] "OATMEAL_RAISIN_CRISP"
## [53] "POST_NAT._RAISIN_BRAN"
## [54] "PRODUCT_19"
## [55] "PUFFED_RICE"
## [56] "PUFFED_WHEAT"
## [57] "QUAKER_OAT_SQUARES"
## [58] "QUAKER_OATMEAL"
## [59] "RAISIN_BRAN"
## [60] "RAISIN_NUT_BRAN"
## [61] "RAISIN_SQUARES"
## [62] "RICE_CHEX"
## [63] "RICE_KRISPIES"
## [64] "SHREDDED_WHEAT"
## [65] "SHREDDED_WHEAT_'N'BRAN"
## [66] "SHREDDED_WHEAT_SPOON_SIZE"
## [67] "SMACKS"
## [68] "SPECIAL_K"
## [69] "STRAWBERRY_FRUIT_WHEATS"
## [70] "TOTAL_CORN_FLAKES"
## [71] "TOTAL_RAISIN_BRAN"
## [72] "TOTAL_WHOLE_GRAIN"
## [73] "TRIPLES"
## [74] "TRIX"
## [75] "WHEAT_CHEX"
## [76] "WHEATIES"
## [77] "WHEATIES_HONEY_GOLD"
tolower(cereal$name)
## [1] "100%_bran"
## [2] "100%_natural_bran"
## [3] "all-bran"
## [4] "all-bran_with_extra_fiber"
## [5] "almond_delight"
## [6] "apple_cinnamon_cheerios"
## [7] "apple_jacks"
## [8] "basic_4"
## [9] "bran_chex"
## [10] "bran_flakes"
## [11] "cap'n'crunch"
## [12] "cheerios"
## [13] "cinnamon_toast_crunch"
## [14] "clusters"
## [15] "cocoa_puffs"
## [16] "corn_chex"
## [17] "corn_flakes"
## [18] "corn_pops"
## [19] "count_chocula"
## [20] "crackling_oat_bran"
## [21] "cream_of_wheat_(quick)"
## [22] "crispix"
## [23] "crispy_wheat_&_raisins"
## [24] "double_chex"
## [25] "froot_loops"
## [26] "frosted_flakes"
## [27] "frosted_mini-wheats"
## [28] "fruit_&_fibre_dates,_walnuts,_and_oats"
## [29] "fruitful_bran"
## [30] "fruity_pebbles"
## [31] "golden_crisp"
## [32] "golden_grahams"
## [33] "grape_nuts_flakes"
## [34] "grape-nuts"
## [35] "great_grains_pecan"
## [36] "honey_graham_ohs"
## [37] "honey_nut_cheerios"
## [38] "honey-comb"
## [39] "just_right_crunchy__nuggets"
## [40] "just_right_fruit_&_nut"
## [41] "kix"
## [42] "life"
## [43] "lucky_charms"
## [44] "maypo"
## [45] "muesli_raisins,_dates,_&_almonds"
## [46] "muesli_raisins,_peaches,_&_pecans"
## [47] "mueslix_crispy_blend"
## [48] "multi-grain_cheerios"
## [49] "nut&honey_crunch"
## [50] "nutri-grain_almond-raisin"
## [51] "nutri-grain_wheat"
## [52] "oatmeal_raisin_crisp"
## [53] "post_nat._raisin_bran"
## [54] "product_19"
## [55] "puffed_rice"
## [56] "puffed_wheat"
## [57] "quaker_oat_squares"
## [58] "quaker_oatmeal"
## [59] "raisin_bran"
## [60] "raisin_nut_bran"
## [61] "raisin_squares"
## [62] "rice_chex"
## [63] "rice_krispies"
## [64] "shredded_wheat"
## [65] "shredded_wheat_'n'bran"
## [66] "shredded_wheat_spoon_size"
## [67] "smacks"
## [68] "special_k"
## [69] "strawberry_fruit_wheats"
## [70] "total_corn_flakes"
## [71] "total_raisin_bran"
## [72] "total_whole_grain"
## [73] "triples"
## [74] "trix"
## [75] "wheat_chex"
## [76] "wheaties"
## [77] "wheaties_honey_gold"
trimws(cereal$name)
## [1] "100%_Bran"
## [2] "100%_Natural_Bran"
## [3] "All-Bran"
## [4] "All-Bran_with_Extra_Fiber"
## [5] "Almond_Delight"
## [6] "Apple_Cinnamon_Cheerios"
## [7] "Apple_Jacks"
## [8] "Basic_4"
## [9] "Bran_Chex"
## [10] "Bran_Flakes"
## [11] "Cap'n'Crunch"
## [12] "Cheerios"
## [13] "Cinnamon_Toast_Crunch"
## [14] "Clusters"
## [15] "Cocoa_Puffs"
## [16] "Corn_Chex"
## [17] "Corn_Flakes"
## [18] "Corn_Pops"
## [19] "Count_Chocula"
## [20] "Crackling_Oat_Bran"
## [21] "Cream_of_Wheat_(Quick)"
## [22] "Crispix"
## [23] "Crispy_Wheat_&_Raisins"
## [24] "Double_Chex"
## [25] "Froot_Loops"
## [26] "Frosted_Flakes"
## [27] "Frosted_Mini-Wheats"
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"
## [30] "Fruity_Pebbles"
## [31] "Golden_Crisp"
## [32] "Golden_Grahams"
## [33] "Grape_Nuts_Flakes"
## [34] "Grape-Nuts"
## [35] "Great_Grains_Pecan"
## [36] "Honey_Graham_Ohs"
## [37] "Honey_Nut_Cheerios"
## [38] "Honey-comb"
## [39] "Just_Right_Crunchy__Nuggets"
## [40] "Just_Right_Fruit_&_Nut"
## [41] "Kix"
## [42] "Life"
## [43] "Lucky_Charms"
## [44] "Maypo"
## [45] "Muesli_Raisins,_Dates,_&_Almonds"
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"
## [47] "Mueslix_Crispy_Blend"
## [48] "Multi-Grain_Cheerios"
## [49] "Nut&Honey_Crunch"
## [50] "Nutri-Grain_Almond-Raisin"
## [51] "Nutri-grain_Wheat"
## [52] "Oatmeal_Raisin_Crisp"
## [53] "Post_Nat._Raisin_Bran"
## [54] "Product_19"
## [55] "Puffed_Rice"
## [56] "Puffed_Wheat"
## [57] "Quaker_Oat_Squares"
## [58] "Quaker_Oatmeal"
## [59] "Raisin_Bran"
## [60] "Raisin_Nut_Bran"
## [61] "Raisin_Squares"
## [62] "Rice_Chex"
## [63] "Rice_Krispies"
## [64] "Shredded_Wheat"
## [65] "Shredded_Wheat_'n'Bran"
## [66] "Shredded_Wheat_spoon_size"
## [67] "Smacks"
## [68] "Special_K"
## [69] "Strawberry_Fruit_Wheats"
## [70] "Total_Corn_Flakes"
## [71] "Total_Raisin_Bran"
## [72] "Total_Whole_Grain"
## [73] "Triples"
## [74] "Trix"
## [75] "Wheat_Chex"
## [76] "Wheaties"
## [77] "Wheaties_Honey_Gold"
substr(cereal$name, 1, 4)
## [1] "100%" "100%" "All-" "All-" "Almo" "Appl" "Appl" "Basi" "Bran" "Bran"
## [11] "Cap'" "Chee" "Cinn" "Clus" "Coco" "Corn" "Corn" "Corn" "Coun" "Crac"
## [21] "Crea" "Cris" "Cris" "Doub" "Froo" "Fros" "Fros" "Frui" "Frui" "Frui"
## [31] "Gold" "Gold" "Grap" "Grap" "Grea" "Hone" "Hone" "Hone" "Just" "Just"
## [41] "Kix" "Life" "Luck" "Mayp" "Mues" "Mues" "Mues" "Mult" "Nut&" "Nutr"
## [51] "Nutr" "Oatm" "Post" "Prod" "Puff" "Puff" "Quak" "Quak" "Rais" "Rais"
## [61] "Rais" "Rice" "Rice" "Shre" "Shre" "Shre" "Smac" "Spec" "Stra" "Tota"
## [71] "Tota" "Tota" "Trip" "Trix" "Whea" "Whea" "Whea"
substring(cereal$name, 4)
## [1] "%_Bran" "%_Natural_Bran"
## [3] "-Bran" "-Bran_with_Extra_Fiber"
## [5] "ond_Delight" "le_Cinnamon_Cheerios"
## [7] "le_Jacks" "ic_4"
## [9] "n_Chex" "n_Flakes"
## [11] "'n'Crunch" "erios"
## [13] "namon_Toast_Crunch" "sters"
## [15] "oa_Puffs" "n_Chex"
## [17] "n_Flakes" "n_Pops"
## [19] "nt_Chocula" "ckling_Oat_Bran"
## [21] "am_of_Wheat_(Quick)" "spix"
## [23] "spy_Wheat_&_Raisins" "ble_Chex"
## [25] "ot_Loops" "sted_Flakes"
## [27] "sted_Mini-Wheats" "it_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "itful_Bran" "ity_Pebbles"
## [31] "den_Crisp" "den_Grahams"
## [33] "pe_Nuts_Flakes" "pe-Nuts"
## [35] "at_Grains_Pecan" "ey_Graham_Ohs"
## [37] "ey_Nut_Cheerios" "ey-comb"
## [39] "t_Right_Crunchy__Nuggets" "t_Right_Fruit_&_Nut"
## [41] "" "e"
## [43] "ky_Charms" "po"
## [45] "sli_Raisins,_Dates,_&_Almonds" "sli_Raisins,_Peaches,_&_Pecans"
## [47] "slix_Crispy_Blend" "ti-Grain_Cheerios"
## [49] "&Honey_Crunch" "ri-Grain_Almond-Raisin"
## [51] "ri-grain_Wheat" "meal_Raisin_Crisp"
## [53] "t_Nat._Raisin_Bran" "duct_19"
## [55] "fed_Rice" "fed_Wheat"
## [57] "ker_Oat_Squares" "ker_Oatmeal"
## [59] "sin_Bran" "sin_Nut_Bran"
## [61] "sin_Squares" "e_Chex"
## [63] "e_Krispies" "edded_Wheat"
## [65] "edded_Wheat_'n'Bran" "edded_Wheat_spoon_size"
## [67] "cks" "cial_K"
## [69] "awberry_Fruit_Wheats" "al_Corn_Flakes"
## [71] "al_Raisin_Bran" "al_Whole_Grain"
## [73] "ples" "x"
## [75] "at_Chex" "aties"
## [77] "aties_Honey_Gold"
substr(cereal$name, 1, -1 +regexpr("_", cereal$name, fixed=TRUE))
## [1] "100%" "100%" "" "All-Bran" "Almond"
## [6] "Apple" "Apple" "Basic" "Bran" "Bran"
## [11] "" "" "Cinnamon" "" "Cocoa"
## [16] "Corn" "Corn" "Corn" "Count" "Crackling"
## [21] "Cream" "" "Crispy" "Double" "Froot"
## [26] "Frosted" "Frosted" "Fruit" "Fruitful" "Fruity"
## [31] "Golden" "Golden" "Grape" "" "Great"
## [36] "Honey" "Honey" "" "Just" "Just"
## [41] "" "" "Lucky" "" "Muesli"
## [46] "Muesli" "Mueslix" "Multi-Grain" "Nut&Honey" "Nutri-Grain"
## [51] "Nutri-grain" "Oatmeal" "Post" "Product" "Puffed"
## [56] "Puffed" "Quaker" "Quaker" "Raisin" "Raisin"
## [61] "Raisin" "Rice" "Rice" "Shredded" "Shredded"
## [66] "Shredded" "" "Special" "Strawberry" "Total"
## [71] "Total" "Total" "" "" "Wheat"
## [76] "" "Wheaties"
substr(cereal$name, 1, pmax(5,-1 +regexpr("_", cereal$name, fixed=TRUE)))
## [1] "100%_" "100%_" "All-B" "All-Bran" "Almond"
## [6] "Apple" "Apple" "Basic" "Bran_" "Bran_"
## [11] "Cap'n" "Cheer" "Cinnamon" "Clust" "Cocoa"
## [16] "Corn_" "Corn_" "Corn_" "Count" "Crackling"
## [21] "Cream" "Crisp" "Crispy" "Double" "Froot"
## [26] "Frosted" "Frosted" "Fruit" "Fruitful" "Fruity"
## [31] "Golden" "Golden" "Grape" "Grape" "Great"
## [36] "Honey" "Honey" "Honey" "Just_" "Just_"
## [41] "Kix" "Life" "Lucky" "Maypo" "Muesli"
## [46] "Muesli" "Mueslix" "Multi-Grain" "Nut&Honey" "Nutri-Grain"
## [51] "Nutri-grain" "Oatmeal" "Post_" "Product" "Puffed"
## [56] "Puffed" "Quaker" "Quaker" "Raisin" "Raisin"
## [61] "Raisin" "Rice_" "Rice_" "Shredded" "Shredded"
## [66] "Shredded" "Smack" "Special" "Strawberry" "Total"
## [71] "Total" "Total" "Tripl" "Trix" "Wheat"
## [76] "Wheat" "Wheaties"
gsub("-","_", cereal$name)
## [1] "100%_Bran"
## [2] "100%_Natural_Bran"
## [3] "All_Bran"
## [4] "All_Bran_with_Extra_Fiber"
## [5] "Almond_Delight"
## [6] "Apple_Cinnamon_Cheerios"
## [7] "Apple_Jacks"
## [8] "Basic_4"
## [9] "Bran_Chex"
## [10] "Bran_Flakes"
## [11] "Cap'n'Crunch"
## [12] "Cheerios"
## [13] "Cinnamon_Toast_Crunch"
## [14] "Clusters"
## [15] "Cocoa_Puffs"
## [16] "Corn_Chex"
## [17] "Corn_Flakes"
## [18] "Corn_Pops"
## [19] "Count_Chocula"
## [20] "Crackling_Oat_Bran"
## [21] "Cream_of_Wheat_(Quick)"
## [22] "Crispix"
## [23] "Crispy_Wheat_&_Raisins"
## [24] "Double_Chex"
## [25] "Froot_Loops"
## [26] "Frosted_Flakes"
## [27] "Frosted_Mini_Wheats"
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"
## [30] "Fruity_Pebbles"
## [31] "Golden_Crisp"
## [32] "Golden_Grahams"
## [33] "Grape_Nuts_Flakes"
## [34] "Grape_Nuts"
## [35] "Great_Grains_Pecan"
## [36] "Honey_Graham_Ohs"
## [37] "Honey_Nut_Cheerios"
## [38] "Honey_comb"
## [39] "Just_Right_Crunchy__Nuggets"
## [40] "Just_Right_Fruit_&_Nut"
## [41] "Kix"
## [42] "Life"
## [43] "Lucky_Charms"
## [44] "Maypo"
## [45] "Muesli_Raisins,_Dates,_&_Almonds"
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"
## [47] "Mueslix_Crispy_Blend"
## [48] "Multi_Grain_Cheerios"
## [49] "Nut&Honey_Crunch"
## [50] "Nutri_Grain_Almond_Raisin"
## [51] "Nutri_grain_Wheat"
## [52] "Oatmeal_Raisin_Crisp"
## [53] "Post_Nat._Raisin_Bran"
## [54] "Product_19"
## [55] "Puffed_Rice"
## [56] "Puffed_Wheat"
## [57] "Quaker_Oat_Squares"
## [58] "Quaker_Oatmeal"
## [59] "Raisin_Bran"
## [60] "Raisin_Nut_Bran"
## [61] "Raisin_Squares"
## [62] "Rice_Chex"
## [63] "Rice_Krispies"
## [64] "Shredded_Wheat"
## [65] "Shredded_Wheat_'n'Bran"
## [66] "Shredded_Wheat_spoon_size"
## [67] "Smacks"
## [68] "Special_K"
## [69] "Strawberry_Fruit_Wheats"
## [70] "Total_Corn_Flakes"
## [71] "Total_Raisin_Bran"
## [72] "Total_Whole_Grain"
## [73] "Triples"
## [74] "Trix"
## [75] "Wheat_Chex"
## [76] "Wheaties"
## [77] "Wheaties_Honey_Gold"
# sorting
sort(cereal$name)
## [1] "100%_Bran"
## [2] "100%_Natural_Bran"
## [3] "All-Bran"
## [4] "All-Bran_with_Extra_Fiber"
## [5] "Almond_Delight"
## [6] "Apple_Cinnamon_Cheerios"
## [7] "Apple_Jacks"
## [8] "Basic_4"
## [9] "Bran_Chex"
## [10] "Bran_Flakes"
## [11] "Cap'n'Crunch"
## [12] "Cheerios"
## [13] "Cinnamon_Toast_Crunch"
## [14] "Clusters"
## [15] "Cocoa_Puffs"
## [16] "Corn_Chex"
## [17] "Corn_Flakes"
## [18] "Corn_Pops"
## [19] "Count_Chocula"
## [20] "Crackling_Oat_Bran"
## [21] "Cream_of_Wheat_(Quick)"
## [22] "Crispix"
## [23] "Crispy_Wheat_&_Raisins"
## [24] "Double_Chex"
## [25] "Froot_Loops"
## [26] "Frosted_Flakes"
## [27] "Frosted_Mini-Wheats"
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"
## [30] "Fruity_Pebbles"
## [31] "Golden_Crisp"
## [32] "Golden_Grahams"
## [33] "Grape_Nuts_Flakes"
## [34] "Grape-Nuts"
## [35] "Great_Grains_Pecan"
## [36] "Honey_Graham_Ohs"
## [37] "Honey_Nut_Cheerios"
## [38] "Honey-comb"
## [39] "Just_Right_Crunchy__Nuggets"
## [40] "Just_Right_Fruit_&_Nut"
## [41] "Kix"
## [42] "Life"
## [43] "Lucky_Charms"
## [44] "Maypo"
## [45] "Muesli_Raisins,_Dates,_&_Almonds"
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"
## [47] "Mueslix_Crispy_Blend"
## [48] "Multi-Grain_Cheerios"
## [49] "Nut&Honey_Crunch"
## [50] "Nutri-Grain_Almond-Raisin"
## [51] "Nutri-grain_Wheat"
## [52] "Oatmeal_Raisin_Crisp"
## [53] "Post_Nat._Raisin_Bran"
## [54] "Product_19"
## [55] "Puffed_Rice"
## [56] "Puffed_Wheat"
## [57] "Quaker_Oat_Squares"
## [58] "Quaker_Oatmeal"
## [59] "Raisin_Bran"
## [60] "Raisin_Nut_Bran"
## [61] "Raisin_Squares"
## [62] "Rice_Chex"
## [63] "Rice_Krispies"
## [64] "Shredded_Wheat"
## [65] "Shredded_Wheat_'n'Bran"
## [66] "Shredded_Wheat_spoon_size"
## [67] "Smacks"
## [68] "Special_K"
## [69] "Strawberry_Fruit_Wheats"
## [70] "Total_Corn_Flakes"
## [71] "Total_Raisin_Bran"
## [72] "Total_Whole_Grain"
## [73] "Triples"
## [74] "Trix"
## [75] "Wheat_Chex"
## [76] "Wheaties"
## [77] "Wheaties_Honey_Gold"
cereal[ order(cereal$name), ] # reorder a data frame
## name mfr type calories protein fat sodium
## 1 100%_Bran N C 60 4 1 130
## 2 100%_Natural_Bran Q C 110 3 5 15
## 3 All-Bran K C 80 4 1 260
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140
## 5 Almond_Delight R C 110 2 2 200
## 6 Apple_Cinnamon_Cheerios G C 110 2 2 180
## 7 Apple_Jacks K C 110 2 0 125
## 8 Basic_4 G C 140 3 2 210
## 9 Bran_Chex R C 90 2 1 200
## 10 Bran_Flakes P C 90 3 0 210
## 11 Cap'n'Crunch Q C 120 1 2 220
## 12 Cheerios G C 110 6 2 290
## 13 Cinnamon_Toast_Crunch G C 130 1 3 210
## 14 Clusters G C 100 3 2 140
## 15 Cocoa_Puffs G C 110 1 1 180
## 16 Corn_Chex R C 110 2 0 280
## 17 Corn_Flakes K C 110 2 0 290
## 18 Corn_Pops K C 100 1 0 90
## 19 Count_Chocula G C 110 1 1 180
## 20 Crackling_Oat_Bran K C 110 3 3 140
## 21 Cream_of_Wheat_(Quick) N H 100 3 0 80
## 22 Crispix K C 100 2 0 220
## 23 Crispy_Wheat_&_Raisins G C 90 2 1 140
## 24 Double_Chex R C 100 2 0 190
## 25 Froot_Loops K C 100 2 1 125
## 26 Frosted_Flakes K C 110 1 0 200
## 27 Frosted_Mini-Wheats K C 90 3 0 0
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 160
## 29 Fruitful_Bran K C 130 3 0 240
## 30 Fruity_Pebbles P C 100 1 1 135
## 31 Golden_Crisp P C 100 2 0 45
## 32 Golden_Grahams G C 100 1 1 280
## 33 Grape_Nuts_Flakes P C 100 3 1 140
## 34 Grape-Nuts P C 110 3 0 170
## 35 Great_Grains_Pecan P C 110 3 3 75
## 36 Honey_Graham_Ohs Q C 130 1 2 220
## 37 Honey_Nut_Cheerios G C 110 3 1 250
## 38 Honey-comb P C 120 1 0 180
## 39 Just_Right_Crunchy__Nuggets K C 100 2 1 170
## 40 Just_Right_Fruit_&_Nut K C 140 3 1 170
## 41 Kix G C 100 2 1 260
## 42 Life Q C 100 4 2 150
## 43 Lucky_Charms G C 110 2 1 180
## 44 Maypo A H 110 4 1 0
## 45 Muesli_Raisins,_Dates,_&_Almonds R C 150 4 3 95
## 46 Muesli_Raisins,_Peaches,_&_Pecans R C 150 4 3 150
## 47 Mueslix_Crispy_Blend K C 160 3 2 150
## 48 Multi-Grain_Cheerios G C 90 2 1 220
## 49 Nut&Honey_Crunch K C 120 2 1 190
## 50 Nutri-Grain_Almond-Raisin K C 140 3 2 220
## 51 Nutri-grain_Wheat K C 90 3 0 170
## 52 Oatmeal_Raisin_Crisp G C 130 3 2 170
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200
## 54 Product_19 K C 90 3 0 320
## 55 Puffed_Rice Q C 40 1 0 0
## 56 Puffed_Wheat Q C 50 2 0 0
## 57 Quaker_Oat_Squares Q C 100 4 1 135
## 58 Quaker_Oatmeal Q H 90 5 2 0
## 59 Raisin_Bran K C 120 3 1 210
## 60 Raisin_Nut_Bran G C 90 3 2 140
## 61 Raisin_Squares K C 90 2 0 0
## 62 Rice_Chex R C 110 1 0 240
## 63 Rice_Krispies K C 100 2 0 290
## 64 Shredded_Wheat N C 80 2 0 0
## 65 Shredded_Wheat_'n'Bran N C 80 3 0 0
## 66 Shredded_Wheat_spoon_size N C 90 3 0 0
## 67 Smacks K C 110 2 1 70
## 68 Special_K K C 100 6 0 230
## 69 Strawberry_Fruit_Wheats N C 80 2 0 15
## 70 Total_Corn_Flakes G C 100 2 1 200
## 71 Total_Raisin_Bran G C 150 3 1 190
## 72 Total_Whole_Grain G C 110 3 1 200
## 73 Triples G C 100 2 1 250
## 74 Trix G C 110 1 1 140
## 75 Wheat_Chex R C 100 3 1 230
## 76 Wheaties G C 90 3 1 200
## 77 Wheaties_Honey_Gold G C 110 2 1 200
## fibre carbo sugars shelf potass vitamins weight cups
## 1 10.0 5.0 6 3 280 25 1.00 0.331
## 2 2.0 8.0 8 3 135 0 1.00 NA
## 3 9.0 7.0 5 3 320 25 1.00 0.330
## 4 14.0 8.0 0 3 330 25 1.00 0.500
## 5 1.0 14.0 8 3 NA 25 1.00 0.750
## 6 1.5 10.5 10 1 70 25 1.00 0.750
## 7 1.0 11.0 14 2 30 25 1.00 1.000
## 8 2.0 18.0 8 3 100 25 1.33 0.750
## 9 4.0 15.0 6 1 125 25 1.00 0.670
## 10 5.0 13.0 5 3 190 25 1.00 0.670
## 11 0.0 12.0 12 2 35 25 1.00 0.750
## 12 2.0 17.0 1 1 105 25 1.00 1.250
## 13 0.0 13.0 9 2 45 25 1.00 0.750
## 14 2.0 13.0 7 3 105 25 1.00 0.500
## 15 0.0 12.0 13 2 55 25 1.00 1.000
## 16 0.0 22.0 3 1 25 25 1.00 1.000
## 17 1.0 21.0 2 1 35 25 1.00 1.000
## 18 1.0 13.0 12 2 20 25 1.00 1.000
## 19 0.0 12.0 13 2 65 25 1.00 1.000
## 20 4.0 10.0 7 3 160 25 1.00 0.500
## 21 1.0 21.0 0 2 NA 0 1.00 1.000
## 22 1.0 21.0 3 3 30 25 1.00 1.000
## 23 2.0 11.0 10 3 120 25 1.00 0.750
## 24 1.0 18.0 5 3 80 25 1.00 0.750
## 25 1.0 11.0 13 2 30 25 1.00 1.000
## 26 1.0 14.0 11 1 25 25 1.00 0.750
## 27 3.0 14.0 7 2 100 25 1.00 0.800
## 28 5.0 12.0 10 3 200 25 1.25 0.670
## 29 5.0 14.0 12 3 190 25 1.33 0.670
## 30 0.0 13.0 12 2 25 25 1.00 0.750
## 31 0.0 11.0 15 1 40 25 1.00 0.880
## 32 0.0 15.0 9 2 45 25 1.00 0.750
## 33 3.0 15.0 5 3 85 25 1.00 0.880
## 34 3.0 17.0 3 3 90 25 1.00 0.250
## 35 3.0 13.0 4 3 100 25 1.00 0.330
## 36 1.0 12.0 11 2 45 25 1.00 1.000
## 37 1.5 11.5 10 1 90 25 1.00 0.750
## 38 0.0 14.0 11 1 35 25 1.00 1.330
## 39 1.0 17.0 6 3 60 100 1.00 NA
## 40 2.0 20.0 9 3 95 100 1.30 0.750
## 41 0.0 21.0 3 2 40 25 1.00 1.500
## 42 2.0 12.0 6 2 95 25 1.00 0.670
## 43 0.0 12.0 12 2 55 25 1.00 1.000
## 44 0.0 16.0 3 2 95 25 1.00 NA
## 45 3.0 16.0 11 3 170 25 NA NA
## 46 3.0 16.0 11 3 170 25 NA NA
## 47 3.0 17.0 13 3 160 25 1.50 0.670
## 48 2.0 15.0 6 1 90 25 1.00 1.000
## 49 0.0 15.0 9 2 40 25 1.00 0.670
## 50 3.0 21.0 7 3 130 25 1.33 0.670
## 51 3.0 18.0 2 3 90 25 1.00 NA
## 52 1.5 13.5 10 3 120 25 1.25 0.500
## 53 6.0 11.0 14 3 260 25 1.33 0.670
## 54 1.0 20.0 3 3 45 100 1.00 1.000
## 55 0.0 13.0 0 3 15 0 0.50 1.000
## 56 1.0 10.0 0 3 50 0 0.50 NA
## 57 2.0 14.0 6 3 110 25 1.00 0.500
## 58 2.7 NA NA 1 110 0 1.00 0.670
## 59 5.0 14.0 12 2 240 25 1.33 0.750
## 60 2.5 10.5 8 3 140 25 1.00 0.500
## 61 2.0 15.0 6 3 110 25 1.00 0.500
## 62 0.0 23.0 2 1 30 25 1.00 1.130
## 63 0.0 22.0 3 1 35 25 1.00 1.000
## 64 3.0 16.0 0 1 95 0 0.83 NA
## 65 4.0 19.0 0 1 140 0 1.00 0.670
## 66 3.0 20.0 0 1 120 0 1.00 0.670
## 67 1.0 9.0 15 2 40 25 1.00 0.750
## 68 1.0 16.0 3 1 55 25 1.00 1.000
## 69 3.0 15.0 5 2 90 25 1.00 NA
## 70 0.0 21.0 3 3 35 100 1.00 1.000
## 71 4.0 15.0 14 3 230 100 1.50 1.000
## 72 3.0 16.0 3 3 110 100 1.00 1.000
## 73 0.0 21.0 3 3 60 25 1.00 0.750
## 74 0.0 13.0 12 2 25 25 1.00 1.000
## 75 3.0 17.0 3 1 115 25 1.00 0.670
## 76 3.0 17.0 3 1 110 25 1.00 1.000
## 77 1.0 16.0 8 1 60 25 1.00 0.750
cereal[ order(cereal$calories, cereal$name),]
## name mfr type calories protein fat sodium
## 55 Puffed_Rice Q C 40 1 0 0
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140
## 56 Puffed_Wheat Q C 50 2 0 0
## 1 100%_Bran N C 60 4 1 130
## 3 All-Bran K C 80 4 1 260
## 64 Shredded_Wheat N C 80 2 0 0
## 65 Shredded_Wheat_'n'Bran N C 80 3 0 0
## 69 Strawberry_Fruit_Wheats N C 80 2 0 15
## 9 Bran_Chex R C 90 2 1 200
## 10 Bran_Flakes P C 90 3 0 210
## 23 Crispy_Wheat_&_Raisins G C 90 2 1 140
## 27 Frosted_Mini-Wheats K C 90 3 0 0
## 48 Multi-Grain_Cheerios G C 90 2 1 220
## 51 Nutri-grain_Wheat K C 90 3 0 170
## 54 Product_19 K C 90 3 0 320
## 58 Quaker_Oatmeal Q H 90 5 2 0
## 60 Raisin_Nut_Bran G C 90 3 2 140
## 61 Raisin_Squares K C 90 2 0 0
## 66 Shredded_Wheat_spoon_size N C 90 3 0 0
## 76 Wheaties G C 90 3 1 200
## 14 Clusters G C 100 3 2 140
## 18 Corn_Pops K C 100 1 0 90
## 21 Cream_of_Wheat_(Quick) N H 100 3 0 80
## 22 Crispix K C 100 2 0 220
## 24 Double_Chex R C 100 2 0 190
## 25 Froot_Loops K C 100 2 1 125
## 30 Fruity_Pebbles P C 100 1 1 135
## 31 Golden_Crisp P C 100 2 0 45
## 32 Golden_Grahams G C 100 1 1 280
## 33 Grape_Nuts_Flakes P C 100 3 1 140
## 39 Just_Right_Crunchy__Nuggets K C 100 2 1 170
## 41 Kix G C 100 2 1 260
## 42 Life Q C 100 4 2 150
## 57 Quaker_Oat_Squares Q C 100 4 1 135
## 63 Rice_Krispies K C 100 2 0 290
## 68 Special_K K C 100 6 0 230
## 70 Total_Corn_Flakes G C 100 2 1 200
## 73 Triples G C 100 2 1 250
## 75 Wheat_Chex R C 100 3 1 230
## 2 100%_Natural_Bran Q C 110 3 5 15
## 5 Almond_Delight R C 110 2 2 200
## 6 Apple_Cinnamon_Cheerios G C 110 2 2 180
## 7 Apple_Jacks K C 110 2 0 125
## 12 Cheerios G C 110 6 2 290
## 15 Cocoa_Puffs G C 110 1 1 180
## 16 Corn_Chex R C 110 2 0 280
## 17 Corn_Flakes K C 110 2 0 290
## 19 Count_Chocula G C 110 1 1 180
## 20 Crackling_Oat_Bran K C 110 3 3 140
## 26 Frosted_Flakes K C 110 1 0 200
## 34 Grape-Nuts P C 110 3 0 170
## 35 Great_Grains_Pecan P C 110 3 3 75
## 37 Honey_Nut_Cheerios G C 110 3 1 250
## 43 Lucky_Charms G C 110 2 1 180
## 44 Maypo A H 110 4 1 0
## 62 Rice_Chex R C 110 1 0 240
## 67 Smacks K C 110 2 1 70
## 72 Total_Whole_Grain G C 110 3 1 200
## 74 Trix G C 110 1 1 140
## 77 Wheaties_Honey_Gold G C 110 2 1 200
## 11 Cap'n'Crunch Q C 120 1 2 220
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 160
## 38 Honey-comb P C 120 1 0 180
## 49 Nut&Honey_Crunch K C 120 2 1 190
## 59 Raisin_Bran K C 120 3 1 210
## 13 Cinnamon_Toast_Crunch G C 130 1 3 210
## 29 Fruitful_Bran K C 130 3 0 240
## 36 Honey_Graham_Ohs Q C 130 1 2 220
## 52 Oatmeal_Raisin_Crisp G C 130 3 2 170
## 53 Post_Nat._Raisin_Bran P C 130 3 1 200
## 8 Basic_4 G C 140 3 2 210
## 40 Just_Right_Fruit_&_Nut K C 140 3 1 170
## 50 Nutri-Grain_Almond-Raisin K C 140 3 2 220
## 45 Muesli_Raisins,_Dates,_&_Almonds R C 150 4 3 95
## 46 Muesli_Raisins,_Peaches,_&_Pecans R C 150 4 3 150
## 71 Total_Raisin_Bran G C 150 3 1 190
## 47 Mueslix_Crispy_Blend K C 160 3 2 150
## fibre carbo sugars shelf potass vitamins weight cups
## 55 0.0 13.0 0 3 15 0 0.50 1.000
## 4 14.0 8.0 0 3 330 25 1.00 0.500
## 56 1.0 10.0 0 3 50 0 0.50 NA
## 1 10.0 5.0 6 3 280 25 1.00 0.331
## 3 9.0 7.0 5 3 320 25 1.00 0.330
## 64 3.0 16.0 0 1 95 0 0.83 NA
## 65 4.0 19.0 0 1 140 0 1.00 0.670
## 69 3.0 15.0 5 2 90 25 1.00 NA
## 9 4.0 15.0 6 1 125 25 1.00 0.670
## 10 5.0 13.0 5 3 190 25 1.00 0.670
## 23 2.0 11.0 10 3 120 25 1.00 0.750
## 27 3.0 14.0 7 2 100 25 1.00 0.800
## 48 2.0 15.0 6 1 90 25 1.00 1.000
## 51 3.0 18.0 2 3 90 25 1.00 NA
## 54 1.0 20.0 3 3 45 100 1.00 1.000
## 58 2.7 NA NA 1 110 0 1.00 0.670
## 60 2.5 10.5 8 3 140 25 1.00 0.500
## 61 2.0 15.0 6 3 110 25 1.00 0.500
## 66 3.0 20.0 0 1 120 0 1.00 0.670
## 76 3.0 17.0 3 1 110 25 1.00 1.000
## 14 2.0 13.0 7 3 105 25 1.00 0.500
## 18 1.0 13.0 12 2 20 25 1.00 1.000
## 21 1.0 21.0 0 2 NA 0 1.00 1.000
## 22 1.0 21.0 3 3 30 25 1.00 1.000
## 24 1.0 18.0 5 3 80 25 1.00 0.750
## 25 1.0 11.0 13 2 30 25 1.00 1.000
## 30 0.0 13.0 12 2 25 25 1.00 0.750
## 31 0.0 11.0 15 1 40 25 1.00 0.880
## 32 0.0 15.0 9 2 45 25 1.00 0.750
## 33 3.0 15.0 5 3 85 25 1.00 0.880
## 39 1.0 17.0 6 3 60 100 1.00 NA
## 41 0.0 21.0 3 2 40 25 1.00 1.500
## 42 2.0 12.0 6 2 95 25 1.00 0.670
## 57 2.0 14.0 6 3 110 25 1.00 0.500
## 63 0.0 22.0 3 1 35 25 1.00 1.000
## 68 1.0 16.0 3 1 55 25 1.00 1.000
## 70 0.0 21.0 3 3 35 100 1.00 1.000
## 73 0.0 21.0 3 3 60 25 1.00 0.750
## 75 3.0 17.0 3 1 115 25 1.00 0.670
## 2 2.0 8.0 8 3 135 0 1.00 NA
## 5 1.0 14.0 8 3 NA 25 1.00 0.750
## 6 1.5 10.5 10 1 70 25 1.00 0.750
## 7 1.0 11.0 14 2 30 25 1.00 1.000
## 12 2.0 17.0 1 1 105 25 1.00 1.250
## 15 0.0 12.0 13 2 55 25 1.00 1.000
## 16 0.0 22.0 3 1 25 25 1.00 1.000
## 17 1.0 21.0 2 1 35 25 1.00 1.000
## 19 0.0 12.0 13 2 65 25 1.00 1.000
## 20 4.0 10.0 7 3 160 25 1.00 0.500
## 26 1.0 14.0 11 1 25 25 1.00 0.750
## 34 3.0 17.0 3 3 90 25 1.00 0.250
## 35 3.0 13.0 4 3 100 25 1.00 0.330
## 37 1.5 11.5 10 1 90 25 1.00 0.750
## 43 0.0 12.0 12 2 55 25 1.00 1.000
## 44 0.0 16.0 3 2 95 25 1.00 NA
## 62 0.0 23.0 2 1 30 25 1.00 1.130
## 67 1.0 9.0 15 2 40 25 1.00 0.750
## 72 3.0 16.0 3 3 110 100 1.00 1.000
## 74 0.0 13.0 12 2 25 25 1.00 1.000
## 77 1.0 16.0 8 1 60 25 1.00 0.750
## 11 0.0 12.0 12 2 35 25 1.00 0.750
## 28 5.0 12.0 10 3 200 25 1.25 0.670
## 38 0.0 14.0 11 1 35 25 1.00 1.330
## 49 0.0 15.0 9 2 40 25 1.00 0.670
## 59 5.0 14.0 12 2 240 25 1.33 0.750
## 13 0.0 13.0 9 2 45 25 1.00 0.750
## 29 5.0 14.0 12 3 190 25 1.33 0.670
## 36 1.0 12.0 11 2 45 25 1.00 1.000
## 52 1.5 13.5 10 3 120 25 1.25 0.500
## 53 6.0 11.0 14 3 260 25 1.33 0.670
## 8 2.0 18.0 8 3 100 25 1.33 0.750
## 40 2.0 20.0 9 3 95 100 1.30 0.750
## 50 3.0 21.0 7 3 130 25 1.33 0.670
## 45 3.0 16.0 11 3 170 25 NA NA
## 46 3.0 16.0 11 3 170 25 NA NA
## 71 4.0 15.0 14 3 230 100 1.50 1.000
## 47 3.0 17.0 13 3 160 25 1.50 0.670
# file path that is device independent
file.path("..","sampledata")
## [1] "../sampledata"
# merging and combining data frames
byear <- data.frame(name =c('Carl', 'Lois', 'Matthew', 'Marianne', 'David'),
byear=c( 1956, 1956, 1986, 1990, 1991))
bcity <- data.frame(name =c('Carl', 'Lois', 'Matthew'),
city =c('Wpg', 'Brandon', 'Wpg'))
wcity <- data.frame(name =c('Matthew', 'Marianne', 'David'),
city =c('Ottawa', 'Vancouver', 'Victoria'))
# cbind must be used with caution
cbind(bcity, wcity)
## name city name city
## 1 Carl Wpg Matthew Ottawa
## 2 Lois Brandon Marianne Vancouver
## 3 Matthew Wpg David Victoria
# merge - careful of non-matches
merge(byear, bcity)
## name byear city
## 1 Carl 1956 Wpg
## 2 Lois 1956 Brandon
## 3 Matthew 1986 Wpg
merge(byear, bcity, all=TRUE)
## name byear city
## 1 Carl 1956 Wpg
## 2 David 1991 <NA>
## 3 Lois 1956 Brandon
## 4 Marianne 1990 <NA>
## 5 Matthew 1986 Wpg
merge(byear, bcity, all.y=TRUE)
## name byear city
## 1 Carl 1956 Wpg
## 2 Lois 1956 Brandon
## 3 Matthew 1986 Wpg
# multiple merges - Google is your friend
Reduce(function(...){merge(..., all=TRUE)}, list(byear, bcity, wcity))
## name city byear
## 1 Carl Wpg 1956
## 2 David Victoria NA
## 3 David <NA> 1991
## 4 Lois Brandon 1956
## 5 Marianne Vancouver NA
## 6 Marianne <NA> 1990
## 7 Matthew Ottawa NA
## 8 Matthew Wpg 1986