options(useFancyQuotes=FALSE) # renders summary output corrects
source("schwarz.functions.r")
#source('http://www.stat.sfu.ca/~cschwarz/Stat-650/Notes/MyPrograms/schwarz.functions.r')

# This is a quick demo of using Rstudio
x <- 1:10
x
##  [1]  1  2  3  4  5  6  7  8  9 10
plot(x,x)

# This script will read in the cereal data set, 
#    do a simple listing,
#    fit a regression line, 
#       draw a scatter plot and add the line to the plot
#    do a single factor crd anova
#       get the compact letter display
#       make some plots


# load required libraries
library(ggplot2)
library(emmeans)

# Read in the cereal data from a csv file
cereal <- read.csv('../SampleData/cereal.csv', 
              header=TRUE, as.is=TRUE, strip.white=TRUE)


cereal3 <- read.table("http://lib.stat.cmu.edu/datasets/1993.expo/cereal", 
                      header=FALSE, as.is=TRUE, strip.white=TRUE)
names(cereal3) <- c('Name','mfr','type','Calories','protein','Fat','sodium','fiber','carbo',
                   'sugars','shelf','potass','vitamins','weight','cups')


# Define new variables and factors (for categorical variables). CHeck the structure of the data frame
cereal$shelfF <- factor(cereal$shelf)
cereal$Calories.fr.Protein <- cereal$protein * 4;

str(cereal)
## 'data.frame':    77 obs. of  17 variables:
##  $ name               : chr  "100%_Bran" "100%_Natural_Bran" "All-Bran" "All-Bran_with_Extra_Fiber" ...
##  $ mfr                : chr  "N" "Q" "K" "K" ...
##  $ type               : chr  "C" "C" "C" "C" ...
##  $ calories           : int  60 110 80 50 110 110 110 140 90 90 ...
##  $ protein            : int  4 3 4 4 2 2 2 3 2 3 ...
##  $ fat                : int  1 5 1 0 2 2 0 2 1 0 ...
##  $ sodium             : int  130 15 260 140 200 180 125 210 200 210 ...
##  $ fiber              : num  10 2 9 14 1 1.5 1 2 4 5 ...
##  $ carbo              : num  5 8 7 8 14 10.5 11 18 15 13 ...
##  $ sugars             : int  6 8 5 0 8 10 14 8 6 5 ...
##  $ shelf              : int  3 3 3 3 3 1 2 3 1 3 ...
##  $ potass             : int  280 135 320 330 NA 70 30 100 125 190 ...
##  $ vitamins           : int  25 0 25 25 25 25 25 25 25 25 ...
##  $ weight             : num  1 1 1 1 1 1 1 1.33 1 1 ...
##  $ cups               : num  0.331 NA 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
##  $ shelfF             : Factor w/ 3 levels "1","2","3": 3 3 3 3 3 1 2 3 1 3 ...
##  $ Calories.fr.Protein: num  16 12 16 16 8 8 8 12 8 12 ...
# List  the first few records
cereal[1:5,]
##                        name mfr type calories protein fat sodium fiber carbo
## 1                 100%_Bran   N    C       60       4   1    130    10     5
## 2         100%_Natural_Bran   Q    C      110       3   5     15     2     8
## 3                  All-Bran   K    C       80       4   1    260     9     7
## 4 All-Bran_with_Extra_Fiber   K    C       50       4   0    140    14     8
## 5            Almond_Delight   R    C      110       2   2    200     1    14
##   sugars shelf potass vitamins weight  cups shelfF Calories.fr.Protein
## 1      6     3    280       25      1 0.331      3                  16
## 2      8     3    135        0      1    NA      3                  12
## 3      5     3    320       25      1 0.330      3                  16
## 4      0     3    330       25      1 0.500      3                  16
## 5      8     3     NA       25      1 0.750      3                   8
# List some variables
cereal$calories
##  [1]  60 110  80  50 110 110 110 140  90  90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100  90 100 100 110  90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160  90 120 140  90 130 130  90  40  50 100
## [58]  90 120  90  90 110 100  80  80  90 110 100  80 100 150 110 100 110 100  90
## [77] 110
cereal[,"calories"]
##  [1]  60 110  80  50 110 110 110 140  90  90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100  90 100 100 110  90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160  90 120 140  90 130 130  90  40  50 100
## [58]  90 120  90  90 110 100  80  80  90 110 100  80 100 150 110 100 110 100  90
## [77] 110
cereal$fat
##  [1] 1 5 1 0 2 2 0 2 1 0 2 2 3 2 1 0 0 0 1 3 0 0 1 0 1 0 0 2 0 1 0 1 1 0 3 2 1 0
## [39] 1 1 1 2 1 1 3 3 2 1 1 2 0 2 1 0 0 0 1 2 1 2 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1
## [77] 1
cereal[1:5,c("name","fat","calories")]
##                        name fat calories
## 1                 100%_Bran   1       60
## 2         100%_Natural_Bran   5      110
## 3                  All-Bran   1       80
## 4 All-Bran_with_Extra_Fiber   0       50
## 5            Almond_Delight   2      110
# Make a basic scatter plot
plotbasic <- ggplot(data=cereal, aes(x=fat, y=calories))+
      ggtitle("Calories vs Fat in cereals")+
      xlab("Grams of Fat")+ylab("Calories/serving")+
      geom_point()
plotbasic

ggsave(plotbasic, file='cal-vs-fat1.png', h=4, w=6, units="in", dpi=300)

plotbasic2 <- ggplot(data=cereal, aes(x=fat, y=calories))+
      ggtitle("Calories vs Fat in cereals")+
      xlab("Grams of Fat")+ylab("Calories/serving")+
      geom_jitter()
plotbasic2

ggsave(plotbasic, file='cal-vs-fat2.png', h=4, w=6, units="in", dpi=300)


# Fit a regression between calories and grams of fat
fit.calories.fat <- lm( calories ~ fat, data=cereal)
summary(fit.calories.fat)
## 
## Call:
## lm(formula = calories ~ fat, data = cereal)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.132  -5.132   4.868  14.868  45.256 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   95.132      3.141  30.285  < 2e-16 ***
## fat            9.806      2.207   4.443 3.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.36 on 75 degrees of freedom
## Multiple R-squared:  0.2084, Adjusted R-squared:  0.1978 
## F-statistic: 19.74 on 1 and 75 DF,  p-value: 3.009e-05
anova(fit.calories.fat) # careful Type I SS
## Analysis of Variance Table
## 
## Response: calories
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## fat        1  7402.9  7402.9  19.743 3.009e-05 ***
## Residuals 75 28121.8   375.0                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit.calories.fat)
## (Intercept)         fat 
##   95.131579    9.806005
sqrt(diag(vcov(fit.calories.fat))) # extract the SE
## (Intercept)         fat 
##    3.141224    2.206897
confint(fit.calories.fat) # confidence intervals on parameters
##                 2.5 %    97.5 %
## (Intercept) 88.873939 101.38922
## fat          5.409642  14.20237
names(summary(fit.calories.fat))
##  [1] "call"          "terms"         "residuals"     "coefficients" 
##  [5] "aliased"       "sigma"         "df"            "r.squared"    
##  [9] "adj.r.squared" "fstatistic"    "cov.unscaled"
summary(fit.calories.fat)$r.squared
## [1] 0.2083875
summary(fit.calories.fat)$sigma
## [1] 19.36381
class(fit.calories.fat)
## [1] "lm"
methods(class=class(fit.calories.fat))
##  [1] add1           alias          anova          case.names     coerce        
##  [6] confint        cooks.distance deviance       dfbeta         dfbetas       
## [11] drop1          dummy.coef     effects        emm_basis      extractAIC    
## [16] family         formula        fortify        hatvalues      influence     
## [21] initialize     kappa          labels         logLik         model.frame   
## [26] model.matrix   nobs           plot           predict        print         
## [31] proj           qr             recover_data   residuals      rstandard     
## [36] rstudent       show           simulate       slotsFromS3    summary       
## [41] variable.names vcov          
## see '?methods' for accessing help and source code
# Add the fitted line to the scatter plot; and save
plotline <- plotbasic2 +
  geom_abline(intercept=coef(fit.calories.fat)[1],
              slope    =coef(fit.calories.fat)[2])
plotline

ggsave(plot=plotline, file="cal-vs-fat3.png", h=4, w=6, units="in", dpi=300)


# Or, if you don't want' to do the actual fit, use ggplot directly
plot.calories.fat <- ggplot(data=cereal, aes(x=fat, y=calories)) +
    geom_jitter(shape=1) +    # Use hollow circles
    geom_smooth(method=lm,   # Add linear regression line
                se=FALSE)    # Don't add shaded confidence region
plot.calories.fat
## `geom_smooth()` using formula 'y ~ x'

# Do a simple single factor ANOVA
# Is the mean number of calories the same for all shelves
# Need to use a FACTOR variable for the categorical variable
fit.sugars.shelf <- lm( sugars ~ shelfF, data=cereal)
anova(fit.sugars.shelf)
## Analysis of Variance Table
## 
## Response: sugars
##           Df  Sum Sq Mean Sq F value   Pr(>F)   
## shelfF     2  220.23 110.117  6.6013 0.002316 **
## Residuals 73 1217.71  16.681                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimate the marginal means along with confidence limits and Tukey multiple comparison.
fit.sugars.shelf.emmo <- emmeans::emmeans(fit.sugars.shelf, ~shelfF)
fit.sugars.shelf.cld <- multcomp::cld(fit.sugars.shelf.emmo, adjust='tukey')
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
fit.sugars.shelf.cld
##  shelfF emmean    SE df lower.CL upper.CL .group
##  1        5.11 0.937 73     2.82     7.40  1    
##  3        6.53 0.681 73     4.86     8.19  1    
##  2        9.62 0.891 73     7.44    11.80   2   
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## significance level used: alpha = 0.05 
## NOTE: Compact letter displays can be misleading
##       because they show NON-findings rather than findings.
##       Consider using 'pairs()', 'pwpp()', or 'pwpm()' instead.
cld.plot <- sf.cld.plot.bar(fit.sugars.shelf.cld, "shelfF", order=FALSE)
cld.plot

ggsave(cld.plot, file="fat-vs-shelf.png",h=4, w=6, units="in", dpi=200)
# Estimate the pairwise differences
pairs(fit.sugars.shelf.emmo)
##  contrast estimate   SE df t.ratio p.value
##  1 - 2       -4.51 1.29 73  -3.490  0.0023
##  1 - 3       -1.42 1.16 73  -1.228  0.4405
##  2 - 3        3.09 1.12 73   2.756  0.0199
## 
## P value adjustment: tukey method for comparing a family of 3 estimates
# an alternate way to look at pairwise comparisons
pwpp(fit.sugars.shelf.emmo)

# Fun with vectors

age <- c(56, 56, 28, 23, 22)
height <- c(185, 162, 185, 167, 190)
f.names <- c('Carl', "Lois", 'Matthew', 'Marianne', 'David')
over.30 <- c(T, T, F, F, F) # AVOID using T/F for TRUE/FALSE

odd <- c(2.3, 'Carl')  # surprising, but look at result!

length(age)
## [1] 5
length(family) # number of elements not lengths of elements
## [1] 1
str(age)    # what is the structure of age?
##  num [1:5] 56 56 28 23 22
str(f.names)
##  chr [1:5] "Carl" "Lois" "Matthew" "Marianne" "David"
# The c() function is very versatile
# The c() function is very versatile
ah <- c(age, height)
ah
##  [1]  56  56  28  23  22 185 162 185 167 190
age0age <- c(age, 0, age)
age0age
##  [1] 56 56 28 23 22  0 56 56 28 23 22
length(age0age)
## [1] 11
odd <- c(f.names, over.30) # ??
odd
##  [1] "Carl"     "Lois"     "Matthew"  "Marianne" "David"    "TRUE"    
##  [7] "TRUE"     "FALSE"    "FALSE"    "FALSE"
# Dataframes 
age <- c(56, 56, 28, 23, 22)
height <- c(185, 162, 185, 167, 190)
f.names <- c('Carl', "Lois", 'Matthew', 'Marianne', 'David')
over.30 <- c(TRUE, TRUE, FALSE, FALSE, FALSE)

schwarz <- data.frame( f.names, age, height, over.30,
          stringsAsFactors=FALSE)
schwarz
##    f.names age height over.30
## 1     Carl  56    185    TRUE
## 2     Lois  56    162    TRUE
## 3  Matthew  28    185   FALSE
## 4 Marianne  23    167   FALSE
## 5    David  22    190   FALSE
str(schwarz)
## 'data.frame':    5 obs. of  4 variables:
##  $ f.names: chr  "Carl" "Lois" "Matthew" "Marianne" ...
##  $ age    : num  56 56 28 23 22
##  $ height : num  185 162 185 167 190
##  $ over.30: logi  TRUE TRUE FALSE FALSE FALSE
length(schwarz) # number of vectors, not length of vectors
## [1] 4
dim(schwarz)
## [1] 5 4
nrow(schwarz)
## [1] 5
ncol(schwarz)
## [1] 4
names(schwarz)
## [1] "f.names" "age"     "height"  "over.30"
# Most commonly created from data.
cereal <- read.csv('../sampledata/cereal.csv', 
                   header=TRUE, as.is=TRUE, strip.white=TRUE)

str(cereal)  # this function is VERY useful when things seem to go wrong
## 'data.frame':    77 obs. of  15 variables:
##  $ name    : chr  "100%_Bran" "100%_Natural_Bran" "All-Bran" "All-Bran_with_Extra_Fiber" ...
##  $ mfr     : chr  "N" "Q" "K" "K" ...
##  $ type    : chr  "C" "C" "C" "C" ...
##  $ calories: int  60 110 80 50 110 110 110 140 90 90 ...
##  $ protein : int  4 3 4 4 2 2 2 3 2 3 ...
##  $ fat     : int  1 5 1 0 2 2 0 2 1 0 ...
##  $ sodium  : int  130 15 260 140 200 180 125 210 200 210 ...
##  $ fiber   : num  10 2 9 14 1 1.5 1 2 4 5 ...
##  $ carbo   : num  5 8 7 8 14 10.5 11 18 15 13 ...
##  $ sugars  : int  6 8 5 0 8 10 14 8 6 5 ...
##  $ shelf   : int  3 3 3 3 3 1 2 3 1 3 ...
##  $ potass  : int  280 135 320 330 NA 70 30 100 125 190 ...
##  $ vitamins: int  25 0 25 25 25 25 25 25 25 25 ...
##  $ weight  : num  1 1 1 1 1 1 1 1.33 1 1 ...
##  $ cups    : num  0.331 NA 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
length(cereal) # number of vectors, not length of vectors
## [1] 15
dim(cereal)
## [1] 77 15
nrow(cereal)
## [1] 77
ncol(cereal)
## [1] 15
names(cereal)
##  [1] "name"     "mfr"      "type"     "calories" "protein"  "fat"     
##  [7] "sodium"   "fiber"    "carbo"    "sugars"   "shelf"    "potass"  
## [13] "vitamins" "weight"   "cups"
# How to refer to parts of data frame
names(cereal)
##  [1] "name"     "mfr"      "type"     "calories" "protein"  "fat"     
##  [7] "sodium"   "fiber"    "carbo"    "sugars"   "shelf"    "potass"  
## [13] "vitamins" "weight"   "cups"
cereal$name
##  [1] "100%_Bran"                             
##  [2] "100%_Natural_Bran"                     
##  [3] "All-Bran"                              
##  [4] "All-Bran_with_Extra_Fiber"             
##  [5] "Almond_Delight"                        
##  [6] "Apple_Cinnamon_Cheerios"               
##  [7] "Apple_Jacks"                           
##  [8] "Basic_4"                               
##  [9] "Bran_Chex"                             
## [10] "Bran_Flakes"                           
## [11] "Cap'n'Crunch"                          
## [12] "Cheerios"                              
## [13] "Cinnamon_Toast_Crunch"                 
## [14] "Clusters"                              
## [15] "Cocoa_Puffs"                           
## [16] "Corn_Chex"                             
## [17] "Corn_Flakes"                           
## [18] "Corn_Pops"                             
## [19] "Count_Chocula"                         
## [20] "Crackling_Oat_Bran"                    
## [21] "Cream_of_Wheat_(Quick)"                
## [22] "Crispix"                               
## [23] "Crispy_Wheat_&_Raisins"                
## [24] "Double_Chex"                           
## [25] "Froot_Loops"                           
## [26] "Frosted_Flakes"                        
## [27] "Frosted_Mini-Wheats"                   
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"                         
## [30] "Fruity_Pebbles"                        
## [31] "Golden_Crisp"                          
## [32] "Golden_Grahams"                        
## [33] "Grape_Nuts_Flakes"                     
## [34] "Grape-Nuts"                            
## [35] "Great_Grains_Pecan"                    
## [36] "Honey_Graham_Ohs"                      
## [37] "Honey_Nut_Cheerios"                    
## [38] "Honey-comb"                            
## [39] "Just_Right_Crunchy__Nuggets"           
## [40] "Just_Right_Fruit_&_Nut"                
## [41] "Kix"                                   
## [42] "Life"                                  
## [43] "Lucky_Charms"                          
## [44] "Maypo"                                 
## [45] "Muesli_Raisins,_Dates,_&_Almonds"      
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"     
## [47] "Mueslix_Crispy_Blend"                  
## [48] "Multi-Grain_Cheerios"                  
## [49] "Nut&Honey_Crunch"                      
## [50] "Nutri-Grain_Almond-Raisin"             
## [51] "Nutri-grain_Wheat"                     
## [52] "Oatmeal_Raisin_Crisp"                  
## [53] "Post_Nat._Raisin_Bran"                 
## [54] "Product_19"                            
## [55] "Puffed_Rice"                           
## [56] "Puffed_Wheat"                          
## [57] "Quaker_Oat_Squares"                    
## [58] "Quaker_Oatmeal"                        
## [59] "Raisin_Bran"                           
## [60] "Raisin_Nut_Bran"                       
## [61] "Raisin_Squares"                        
## [62] "Rice_Chex"                             
## [63] "Rice_Krispies"                         
## [64] "Shredded_Wheat"                        
## [65] "Shredded_Wheat_'n'Bran"                
## [66] "Shredded_Wheat_spoon_size"             
## [67] "Smacks"                                
## [68] "Special_K"                             
## [69] "Strawberry_Fruit_Wheats"               
## [70] "Total_Corn_Flakes"                     
## [71] "Total_Raisin_Bran"                     
## [72] "Total_Whole_Grain"                     
## [73] "Triples"                               
## [74] "Trix"                                  
## [75] "Wheat_Chex"                            
## [76] "Wheaties"                              
## [77] "Wheaties_Honey_Gold"
cereal$calories
##  [1]  60 110  80  50 110 110 110 140  90  90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100  90 100 100 110  90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160  90 120 140  90 130 130  90  40  50 100
## [58]  90 120  90  90 110 100  80  80  90 110 100  80 100 150 110 100 110 100  90
## [77] 110
cereal[ , "calories"] # first index missing = ALL rows
##  [1]  60 110  80  50 110 110 110 140  90  90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100  90 100 100 110  90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160  90 120 140  90 130 130  90  40  50 100
## [58]  90 120  90  90 110 100  80  80  90 110 100  80 100 150 110 100 110 100  90
## [77] 110
#calories  # doesn't work because vector is hidden
with(cereal, calories) # careful of case. 
##  [1]  60 110  80  50 110 110 110 140  90  90 120 110 130 100 110 110 110 100 110
## [20] 110 100 100  90 100 100 110  90 120 130 100 100 100 100 110 110 130 110 120
## [39] 100 140 100 100 110 110 150 150 160  90 120 140  90 130 130  90  40  50 100
## [58]  90 120  90  90 110 100  80  80  90 110 100  80 100 150 110 100 110 100  90
## [77] 110
cereal[1,]
##        name mfr type calories protein fat sodium fiber carbo sugars shelf
## 1 100%_Bran   N    C       60       4   1    130    10     5      6     3
##   potass vitamins weight  cups
## 1    280       25      1 0.331
cereal[1:5,]
##                        name mfr type calories protein fat sodium fiber carbo
## 1                 100%_Bran   N    C       60       4   1    130    10     5
## 2         100%_Natural_Bran   Q    C      110       3   5     15     2     8
## 3                  All-Bran   K    C       80       4   1    260     9     7
## 4 All-Bran_with_Extra_Fiber   K    C       50       4   0    140    14     8
## 5            Almond_Delight   R    C      110       2   2    200     1    14
##   sugars shelf potass vitamins weight  cups
## 1      6     3    280       25      1 0.331
## 2      8     3    135        0      1    NA
## 3      5     3    320       25      1 0.330
## 4      0     3    330       25      1 0.500
## 5      8     3     NA       25      1 0.750
cereal[, 1]
##  [1] "100%_Bran"                             
##  [2] "100%_Natural_Bran"                     
##  [3] "All-Bran"                              
##  [4] "All-Bran_with_Extra_Fiber"             
##  [5] "Almond_Delight"                        
##  [6] "Apple_Cinnamon_Cheerios"               
##  [7] "Apple_Jacks"                           
##  [8] "Basic_4"                               
##  [9] "Bran_Chex"                             
## [10] "Bran_Flakes"                           
## [11] "Cap'n'Crunch"                          
## [12] "Cheerios"                              
## [13] "Cinnamon_Toast_Crunch"                 
## [14] "Clusters"                              
## [15] "Cocoa_Puffs"                           
## [16] "Corn_Chex"                             
## [17] "Corn_Flakes"                           
## [18] "Corn_Pops"                             
## [19] "Count_Chocula"                         
## [20] "Crackling_Oat_Bran"                    
## [21] "Cream_of_Wheat_(Quick)"                
## [22] "Crispix"                               
## [23] "Crispy_Wheat_&_Raisins"                
## [24] "Double_Chex"                           
## [25] "Froot_Loops"                           
## [26] "Frosted_Flakes"                        
## [27] "Frosted_Mini-Wheats"                   
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"                         
## [30] "Fruity_Pebbles"                        
## [31] "Golden_Crisp"                          
## [32] "Golden_Grahams"                        
## [33] "Grape_Nuts_Flakes"                     
## [34] "Grape-Nuts"                            
## [35] "Great_Grains_Pecan"                    
## [36] "Honey_Graham_Ohs"                      
## [37] "Honey_Nut_Cheerios"                    
## [38] "Honey-comb"                            
## [39] "Just_Right_Crunchy__Nuggets"           
## [40] "Just_Right_Fruit_&_Nut"                
## [41] "Kix"                                   
## [42] "Life"                                  
## [43] "Lucky_Charms"                          
## [44] "Maypo"                                 
## [45] "Muesli_Raisins,_Dates,_&_Almonds"      
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"     
## [47] "Mueslix_Crispy_Blend"                  
## [48] "Multi-Grain_Cheerios"                  
## [49] "Nut&Honey_Crunch"                      
## [50] "Nutri-Grain_Almond-Raisin"             
## [51] "Nutri-grain_Wheat"                     
## [52] "Oatmeal_Raisin_Crisp"                  
## [53] "Post_Nat._Raisin_Bran"                 
## [54] "Product_19"                            
## [55] "Puffed_Rice"                           
## [56] "Puffed_Wheat"                          
## [57] "Quaker_Oat_Squares"                    
## [58] "Quaker_Oatmeal"                        
## [59] "Raisin_Bran"                           
## [60] "Raisin_Nut_Bran"                       
## [61] "Raisin_Squares"                        
## [62] "Rice_Chex"                             
## [63] "Rice_Krispies"                         
## [64] "Shredded_Wheat"                        
## [65] "Shredded_Wheat_'n'Bran"                
## [66] "Shredded_Wheat_spoon_size"             
## [67] "Smacks"                                
## [68] "Special_K"                             
## [69] "Strawberry_Fruit_Wheats"               
## [70] "Total_Corn_Flakes"                     
## [71] "Total_Raisin_Bran"                     
## [72] "Total_Whole_Grain"                     
## [73] "Triples"                               
## [74] "Trix"                                  
## [75] "Wheat_Chex"                            
## [76] "Wheaties"                              
## [77] "Wheaties_Honey_Gold"
cereal[, 1:5]
##                                      name mfr type calories protein
## 1                               100%_Bran   N    C       60       4
## 2                       100%_Natural_Bran   Q    C      110       3
## 3                                All-Bran   K    C       80       4
## 4               All-Bran_with_Extra_Fiber   K    C       50       4
## 5                          Almond_Delight   R    C      110       2
## 6                 Apple_Cinnamon_Cheerios   G    C      110       2
## 7                             Apple_Jacks   K    C      110       2
## 8                                 Basic_4   G    C      140       3
## 9                               Bran_Chex   R    C       90       2
## 10                            Bran_Flakes   P    C       90       3
## 11                           Cap'n'Crunch   Q    C      120       1
## 12                               Cheerios   G    C      110       6
## 13                  Cinnamon_Toast_Crunch   G    C      130       1
## 14                               Clusters   G    C      100       3
## 15                            Cocoa_Puffs   G    C      110       1
## 16                              Corn_Chex   R    C      110       2
## 17                            Corn_Flakes   K    C      110       2
## 18                              Corn_Pops   K    C      100       1
## 19                          Count_Chocula   G    C      110       1
## 20                     Crackling_Oat_Bran   K    C      110       3
## 21                 Cream_of_Wheat_(Quick)   N    H      100       3
## 22                                Crispix   K    C      100       2
## 23                 Crispy_Wheat_&_Raisins   G    C       90       2
## 24                            Double_Chex   R    C      100       2
## 25                            Froot_Loops   K    C      100       2
## 26                         Frosted_Flakes   K    C      110       1
## 27                    Frosted_Mini-Wheats   K    C       90       3
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats   P    C      120       3
## 29                          Fruitful_Bran   K    C      130       3
## 30                         Fruity_Pebbles   P    C      100       1
## 31                           Golden_Crisp   P    C      100       2
## 32                         Golden_Grahams   G    C      100       1
## 33                      Grape_Nuts_Flakes   P    C      100       3
## 34                             Grape-Nuts   P    C      110       3
## 35                     Great_Grains_Pecan   P    C      110       3
## 36                       Honey_Graham_Ohs   Q    C      130       1
## 37                     Honey_Nut_Cheerios   G    C      110       3
## 38                             Honey-comb   P    C      120       1
## 39            Just_Right_Crunchy__Nuggets   K    C      100       2
## 40                 Just_Right_Fruit_&_Nut   K    C      140       3
## 41                                    Kix   G    C      100       2
## 42                                   Life   Q    C      100       4
## 43                           Lucky_Charms   G    C      110       2
## 44                                  Maypo   A    H      110       4
## 45       Muesli_Raisins,_Dates,_&_Almonds   R    C      150       4
## 46      Muesli_Raisins,_Peaches,_&_Pecans   R    C      150       4
## 47                   Mueslix_Crispy_Blend   K    C      160       3
## 48                   Multi-Grain_Cheerios   G    C       90       2
## 49                       Nut&Honey_Crunch   K    C      120       2
## 50              Nutri-Grain_Almond-Raisin   K    C      140       3
## 51                      Nutri-grain_Wheat   K    C       90       3
## 52                   Oatmeal_Raisin_Crisp   G    C      130       3
## 53                  Post_Nat._Raisin_Bran   P    C      130       3
## 54                             Product_19   K    C       90       3
## 55                            Puffed_Rice   Q    C       40       1
## 56                           Puffed_Wheat   Q    C       50       2
## 57                     Quaker_Oat_Squares   Q    C      100       4
## 58                         Quaker_Oatmeal   Q    H       90       5
## 59                            Raisin_Bran   K    C      120       3
## 60                        Raisin_Nut_Bran   G    C       90       3
## 61                         Raisin_Squares   K    C       90       2
## 62                              Rice_Chex   R    C      110       1
## 63                          Rice_Krispies   K    C      100       2
## 64                         Shredded_Wheat   N    C       80       2
## 65                 Shredded_Wheat_'n'Bran   N    C       80       3
## 66              Shredded_Wheat_spoon_size   N    C       90       3
## 67                                 Smacks   K    C      110       2
## 68                              Special_K   K    C      100       6
## 69                Strawberry_Fruit_Wheats   N    C       80       2
## 70                      Total_Corn_Flakes   G    C      100       2
## 71                      Total_Raisin_Bran   G    C      150       3
## 72                      Total_Whole_Grain   G    C      110       3
## 73                                Triples   G    C      100       2
## 74                                   Trix   G    C      110       1
## 75                             Wheat_Chex   R    C      100       3
## 76                               Wheaties   G    C       90       3
## 77                    Wheaties_Honey_Gold   G    C      110       2
cereal[1:4, 1:5]
##                        name mfr type calories protein
## 1                 100%_Bran   N    C       60       4
## 2         100%_Natural_Bran   Q    C      110       3
## 3                  All-Bran   K    C       80       4
## 4 All-Bran_with_Extra_Fiber   K    C       50       4
# Adding removing variables from data frames
cereal$CalPerGramFat <- cereal$calories / cereal$fat
cereal$CalPerGramFat  # some interesting values!
##  [1]  60.00000  22.00000  80.00000       Inf  55.00000  55.00000       Inf
##  [8]  70.00000  90.00000       Inf  60.00000  55.00000  43.33333  50.00000
## [15] 110.00000       Inf       Inf       Inf 110.00000  36.66667       Inf
## [22]       Inf  90.00000       Inf 100.00000       Inf       Inf  60.00000
## [29]       Inf 100.00000       Inf 100.00000 100.00000       Inf  36.66667
## [36]  65.00000 110.00000       Inf 100.00000 140.00000 100.00000  50.00000
## [43] 110.00000 110.00000  50.00000  50.00000  80.00000  90.00000 120.00000
## [50]  70.00000       Inf  65.00000 130.00000       Inf       Inf       Inf
## [57] 100.00000  45.00000 120.00000  45.00000       Inf       Inf       Inf
## [64]       Inf       Inf       Inf 110.00000       Inf       Inf 100.00000
## [71] 150.00000 110.00000 100.00000 110.00000 100.00000  90.00000 110.00000
cereal$CalPerGramFat <- NULL # removes this variable from df
# More fun with vectors
# Operations with vectors

age <- c(56, 56, 28, 23, 22)
age.next.year <- age + 1
yob <- 2013- age  # element by element operations if same length

# Operations on vectors
x <- c(0.5, 1, 1.5, 2, 4, 6, 8, 9, 10, 12)
length(x)
## [1] 10
str(x)
##  num [1:10] 0.5 1 1.5 2 4 6 8 9 10 12
sqrt(x)  # function applied to EACH element
##  [1] 0.7071068 1.0000000 1.2247449 1.4142136 2.0000000 2.4494897 2.8284271
##  [8] 3.0000000 3.1622777 3.4641016
 # Other useful functions
range(x)
## [1]  0.5 12.0
mean(x)
## [1] 5.4
sd(x)
## [1] 4.175324
median(x)
## [1] 5
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.500   1.625   5.000   5.400   8.750  12.000
sum(x)
## [1] 54
# Compare the min() and pmin() functions
min(x, 3)
## [1] 0.5
pmin(x, 3)
##  [1] 0.5 1.0 1.5 2.0 3.0 3.0 3.0 3.0 3.0 3.0
# Exercise
mean(cereal$calories)
## [1] 105.0649
max(cereal$fat)
## [1] 5
min(cereal$fat)
## [1] 0
range(cereal$fat)
## [1] 0 5
mean(cereal$weight)
## [1] NA
mean(cereal$weight, na.rm=TRUE)
## [1] 1.0304
# Simple increments
help(":")  # be sure to put operators in quotes for the help() function
5:10
## [1]  5  6  7  8  9 10
10.2:3
## [1] 10.2  9.2  8.2  7.2  6.2  5.2  4.2  3.2
5:10-1  # careful : is evaluated prior to arithmetic
## [1] 4 5 6 7 8 9
seq(1, 100, 10)
##  [1]  1 11 21 31 41 51 61 71 81 91
seq(to=100, from=1, by=10)
##  [1]  1 11 21 31 41 51 61 71 81 91
# replicate things
x <- c(5, 6, 7)
help(rep)
rep(TRUE, 10)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rep(x, times=2)
## [1] 5 6 7 5 6 7
rep(x, length.out=8)
## [1] 5 6 7 5 6 7 5 6
rep(x, each=2)
## [1] 5 5 6 6 7 7
# Indexing
x <- c(5:9, 12:15, 34:37)
x
##  [1]  5  6  7  8  9 12 13 14 15 34 35 36 37
# Simple indexing
x[2]  # note the use of SQUARE brackets for indexing
## [1] 6
x[c(2,3,7)]  # the index can also be a vector
## [1]  6  7 13
#x[2,3,7]     # oops, not a proper index for a vector

n <- 10
x[n]   # indices can be variables. What does this mean?
## [1] 34
inx <- c(3, 5, 7)
#x(inx)   # Oops wrong types of brackets
x[inx]
## [1]  7  9 13
# using selection vector 
select <- x >6  & x < 10
select
##  [1] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
sum(select)
## [1] 3
x[select]
## [1] 7 8 9
# Indexing
x <- c(5:9, 12:15, 34:37)
x
##  [1]  5  6  7  8  9 12 13 14 15 34 35 36 37
# Simple replacement indexing
x
##  [1]  5  6  7  8  9 12 13 14 15 34 35 36 37
x[2] <- 100  # note the use of SQUARE brackets for indexing
x
##  [1]   5 100   7   8   9  12  13  14  15  34  35  36  37
x[c(2,3,7)] <- 200 # the index can also be a vector
x
##  [1]   5 200 200   8   9  12 200  14  15  34  35  36  37
x[c(2,3,7)] <- c(200,201)  # notice the warning message
## Warning in x[c(2, 3, 7)] <- c(200, 201): number of items to replace is not a
## multiple of replacement length
x
##  [1]   5 200 201   8   9  12 200  14  15  34  35  36  37
n <- 10
x[n]  <- 500  # indices can be variables. What does this mean?
inx <- c(8,9,10)
x[inx] <- c(300, 400) 
## Warning in x[inx] <- c(300, 400): number of items to replace is not a multiple
## of replacement length
x
##  [1]   5 200 201   8   9  12 200 300 400 300  35  36  37
# using selection vector 
select <- x >6  & x < 10
select
##  [1] FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
sum(select)
## [1] 2
x[select] <- -1
x
##  [1]   5 200 201  -1  -1  12 200 300 400 300  35  36  37
sum(cereal== -1, na.rm=TRUE)
## [1] 0
sum(is.na(cereal))
## [1] 15
# Dropping elements
x
##  [1]   5 200 201  -1  -1  12 200 300 400 300  35  36  37
x[-2]  # note the use of SQUARE brackets for indexing
##  [1]   5 201  -1  -1  12 200 300 400 300  35  36  37
x[-c(2,3,7)]  # the index can also be a vector
##  [1]   5  -1  -1  12 300 400 300  35  36  37
n <- -10
x[n]   # indices can be variables. What does this mean?
##  [1]   5 200 201  -1  -1  12 200 300 400  35  36  37
inx <- c(3, 5, 7)
x[-inx]
##  [1]   5 200  -1  12 300 400 300  35  36  37
# Using logical vectors to select elements
x <- c(5:9, 12:15, 34:37)
x
##  [1]  5  6  7  8  9 12 13 14 15 34 35 36 37
# Selecting elements where entry is TRUE
x > 10
##  [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [13]  TRUE
x[ x>10 ]
## [1] 12 13 14 15 34 35 36 37
x[ x > 10 & x < 20]
## [1] 12 13 14 15
x[ x %% 2 == 0]  # %note the use of == to test for equality
## [1]  6  8 12 14 34 36
x[ x>10 ] <- 500
x
##  [1]   5   6   7   8   9 500 500 500 500 500 500 500 500
# We want to rename "Fiber" to "Fibre"
# Avoid using explicit index (i.e. names(cereal)[9] <-"Fibre" as not robust
names(cereal)
##  [1] "name"     "mfr"      "type"     "calories" "protein"  "fat"     
##  [7] "sodium"   "fiber"    "carbo"    "sugars"   "shelf"    "potass"  
## [13] "vitamins" "weight"   "cups"
names(cereal)[grepl("fiber",names(cereal))] <- "fibre"
names(cereal)
##  [1] "name"     "mfr"      "type"     "calories" "protein"  "fat"     
##  [7] "sodium"   "fibre"    "carbo"    "sugars"   "shelf"    "potass"  
## [13] "vitamins" "weight"   "cups"
# Select certain cereal manufacturers
cereal[ cereal$mfr %in% c("P","A"),] # don't forget the last comma
##                                      name mfr type calories protein fat sodium
## 10                            Bran_Flakes   P    C       90       3   0    210
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats   P    C      120       3   2    160
## 30                         Fruity_Pebbles   P    C      100       1   1    135
## 31                           Golden_Crisp   P    C      100       2   0     45
## 33                      Grape_Nuts_Flakes   P    C      100       3   1    140
## 34                             Grape-Nuts   P    C      110       3   0    170
## 35                     Great_Grains_Pecan   P    C      110       3   3     75
## 38                             Honey-comb   P    C      120       1   0    180
## 44                                  Maypo   A    H      110       4   1      0
## 53                  Post_Nat._Raisin_Bran   P    C      130       3   1    200
##    fibre carbo sugars shelf potass vitamins weight cups
## 10     5    13      5     3    190       25   1.00 0.67
## 28     5    12     10     3    200       25   1.25 0.67
## 30     0    13     12     2     25       25   1.00 0.75
## 31     0    11     15     1     40       25   1.00 0.88
## 33     3    15      5     3     85       25   1.00 0.88
## 34     3    17      3     3     90       25   1.00 0.25
## 35     3    13      4     3    100       25   1.00 0.33
## 38     0    14     11     1     35       25   1.00 1.33
## 44     0    16      3     2     95       25   1.00   NA
## 53     6    11     14     3    260       25   1.33 0.67
cereal[grep("Bran", cereal$name),] # don't forget the last comma
##                         name mfr type calories protein fat sodium fibre carbo
## 1                  100%_Bran   N    C       60       4   1    130  10.0   5.0
## 2          100%_Natural_Bran   Q    C      110       3   5     15   2.0   8.0
## 3                   All-Bran   K    C       80       4   1    260   9.0   7.0
## 4  All-Bran_with_Extra_Fiber   K    C       50       4   0    140  14.0   8.0
## 9                  Bran_Chex   R    C       90       2   1    200   4.0  15.0
## 10               Bran_Flakes   P    C       90       3   0    210   5.0  13.0
## 20        Crackling_Oat_Bran   K    C      110       3   3    140   4.0  10.0
## 29             Fruitful_Bran   K    C      130       3   0    240   5.0  14.0
## 53     Post_Nat._Raisin_Bran   P    C      130       3   1    200   6.0  11.0
## 59               Raisin_Bran   K    C      120       3   1    210   5.0  14.0
## 60           Raisin_Nut_Bran   G    C       90       3   2    140   2.5  10.5
## 65    Shredded_Wheat_'n'Bran   N    C       80       3   0      0   4.0  19.0
## 71         Total_Raisin_Bran   G    C      150       3   1    190   4.0  15.0
##    sugars shelf potass vitamins weight  cups
## 1       6     3    280       25   1.00 0.331
## 2       8     3    135        0   1.00    NA
## 3       5     3    320       25   1.00 0.330
## 4       0     3    330       25   1.00 0.500
## 9       6     1    125       25   1.00 0.670
## 10      5     3    190       25   1.00 0.670
## 20      7     3    160       25   1.00 0.500
## 29     12     3    190       25   1.33 0.670
## 53     14     3    260       25   1.33 0.670
## 59     12     2    240       25   1.33 0.750
## 60      8     3    140       25   1.00 0.500
## 65      0     1    140        0   1.00 0.670
## 71     14     3    230      100   1.50 1.000
# Fun with functions

# concatenating objects together, especially to make a vector
limits <- c(0, 100)

ggplot(data=cereal, aes(x=fat, y=calories))+
    geom_point()+
    ylim(c(0,100))
## Warning: Removed 38 rows containing missing values (geom_point).

# Generating sequences
seq(1,10,2)
## [1] 1 3 5 7 9
seq(1, by=2, length.out=10 )
##  [1]  1  3  5  7  9 11 13 15 17 19
# Generating all possible combinations
expand.grid( sex=c("m","f"), age=c(10,20,30), stringsAsFactors=FALSE)
##   sex age
## 1   m  10
## 2   f  10
## 3   m  20
## 4   f  20
## 5   m  30
## 6   f  30
# checking for and counting number of missing values; selecting rows without missing values
is.na(cereal$weight)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE
sum(is.na(cereal$weight))
## [1] 2
sum(!is.na(cereal$weight))
## [1] 75
select <- is.na(cereal$weight)
cereal[ !select,]
##                                      name mfr type calories protein fat sodium
## 1                               100%_Bran   N    C       60       4   1    130
## 2                       100%_Natural_Bran   Q    C      110       3   5     15
## 3                                All-Bran   K    C       80       4   1    260
## 4               All-Bran_with_Extra_Fiber   K    C       50       4   0    140
## 5                          Almond_Delight   R    C      110       2   2    200
## 6                 Apple_Cinnamon_Cheerios   G    C      110       2   2    180
## 7                             Apple_Jacks   K    C      110       2   0    125
## 8                                 Basic_4   G    C      140       3   2    210
## 9                               Bran_Chex   R    C       90       2   1    200
## 10                            Bran_Flakes   P    C       90       3   0    210
## 11                           Cap'n'Crunch   Q    C      120       1   2    220
## 12                               Cheerios   G    C      110       6   2    290
## 13                  Cinnamon_Toast_Crunch   G    C      130       1   3    210
## 14                               Clusters   G    C      100       3   2    140
## 15                            Cocoa_Puffs   G    C      110       1   1    180
## 16                              Corn_Chex   R    C      110       2   0    280
## 17                            Corn_Flakes   K    C      110       2   0    290
## 18                              Corn_Pops   K    C      100       1   0     90
## 19                          Count_Chocula   G    C      110       1   1    180
## 20                     Crackling_Oat_Bran   K    C      110       3   3    140
## 21                 Cream_of_Wheat_(Quick)   N    H      100       3   0     80
## 22                                Crispix   K    C      100       2   0    220
## 23                 Crispy_Wheat_&_Raisins   G    C       90       2   1    140
## 24                            Double_Chex   R    C      100       2   0    190
## 25                            Froot_Loops   K    C      100       2   1    125
## 26                         Frosted_Flakes   K    C      110       1   0    200
## 27                    Frosted_Mini-Wheats   K    C       90       3   0      0
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats   P    C      120       3   2    160
## 29                          Fruitful_Bran   K    C      130       3   0    240
## 30                         Fruity_Pebbles   P    C      100       1   1    135
## 31                           Golden_Crisp   P    C      100       2   0     45
## 32                         Golden_Grahams   G    C      100       1   1    280
## 33                      Grape_Nuts_Flakes   P    C      100       3   1    140
## 34                             Grape-Nuts   P    C      110       3   0    170
## 35                     Great_Grains_Pecan   P    C      110       3   3     75
## 36                       Honey_Graham_Ohs   Q    C      130       1   2    220
## 37                     Honey_Nut_Cheerios   G    C      110       3   1    250
## 38                             Honey-comb   P    C      120       1   0    180
## 39            Just_Right_Crunchy__Nuggets   K    C      100       2   1    170
## 40                 Just_Right_Fruit_&_Nut   K    C      140       3   1    170
## 41                                    Kix   G    C      100       2   1    260
## 42                                   Life   Q    C      100       4   2    150
## 43                           Lucky_Charms   G    C      110       2   1    180
## 44                                  Maypo   A    H      110       4   1      0
## 47                   Mueslix_Crispy_Blend   K    C      160       3   2    150
## 48                   Multi-Grain_Cheerios   G    C       90       2   1    220
## 49                       Nut&Honey_Crunch   K    C      120       2   1    190
## 50              Nutri-Grain_Almond-Raisin   K    C      140       3   2    220
## 51                      Nutri-grain_Wheat   K    C       90       3   0    170
## 52                   Oatmeal_Raisin_Crisp   G    C      130       3   2    170
## 53                  Post_Nat._Raisin_Bran   P    C      130       3   1    200
## 54                             Product_19   K    C       90       3   0    320
## 55                            Puffed_Rice   Q    C       40       1   0      0
## 56                           Puffed_Wheat   Q    C       50       2   0      0
## 57                     Quaker_Oat_Squares   Q    C      100       4   1    135
## 58                         Quaker_Oatmeal   Q    H       90       5   2      0
## 59                            Raisin_Bran   K    C      120       3   1    210
## 60                        Raisin_Nut_Bran   G    C       90       3   2    140
## 61                         Raisin_Squares   K    C       90       2   0      0
## 62                              Rice_Chex   R    C      110       1   0    240
## 63                          Rice_Krispies   K    C      100       2   0    290
## 64                         Shredded_Wheat   N    C       80       2   0      0
## 65                 Shredded_Wheat_'n'Bran   N    C       80       3   0      0
## 66              Shredded_Wheat_spoon_size   N    C       90       3   0      0
## 67                                 Smacks   K    C      110       2   1     70
## 68                              Special_K   K    C      100       6   0    230
## 69                Strawberry_Fruit_Wheats   N    C       80       2   0     15
## 70                      Total_Corn_Flakes   G    C      100       2   1    200
## 71                      Total_Raisin_Bran   G    C      150       3   1    190
## 72                      Total_Whole_Grain   G    C      110       3   1    200
## 73                                Triples   G    C      100       2   1    250
## 74                                   Trix   G    C      110       1   1    140
## 75                             Wheat_Chex   R    C      100       3   1    230
## 76                               Wheaties   G    C       90       3   1    200
## 77                    Wheaties_Honey_Gold   G    C      110       2   1    200
##    fibre carbo sugars shelf potass vitamins weight  cups
## 1   10.0   5.0      6     3    280       25   1.00 0.331
## 2    2.0   8.0      8     3    135        0   1.00    NA
## 3    9.0   7.0      5     3    320       25   1.00 0.330
## 4   14.0   8.0      0     3    330       25   1.00 0.500
## 5    1.0  14.0      8     3     NA       25   1.00 0.750
## 6    1.5  10.5     10     1     70       25   1.00 0.750
## 7    1.0  11.0     14     2     30       25   1.00 1.000
## 8    2.0  18.0      8     3    100       25   1.33 0.750
## 9    4.0  15.0      6     1    125       25   1.00 0.670
## 10   5.0  13.0      5     3    190       25   1.00 0.670
## 11   0.0  12.0     12     2     35       25   1.00 0.750
## 12   2.0  17.0      1     1    105       25   1.00 1.250
## 13   0.0  13.0      9     2     45       25   1.00 0.750
## 14   2.0  13.0      7     3    105       25   1.00 0.500
## 15   0.0  12.0     13     2     55       25   1.00 1.000
## 16   0.0  22.0      3     1     25       25   1.00 1.000
## 17   1.0  21.0      2     1     35       25   1.00 1.000
## 18   1.0  13.0     12     2     20       25   1.00 1.000
## 19   0.0  12.0     13     2     65       25   1.00 1.000
## 20   4.0  10.0      7     3    160       25   1.00 0.500
## 21   1.0  21.0      0     2     NA        0   1.00 1.000
## 22   1.0  21.0      3     3     30       25   1.00 1.000
## 23   2.0  11.0     10     3    120       25   1.00 0.750
## 24   1.0  18.0      5     3     80       25   1.00 0.750
## 25   1.0  11.0     13     2     30       25   1.00 1.000
## 26   1.0  14.0     11     1     25       25   1.00 0.750
## 27   3.0  14.0      7     2    100       25   1.00 0.800
## 28   5.0  12.0     10     3    200       25   1.25 0.670
## 29   5.0  14.0     12     3    190       25   1.33 0.670
## 30   0.0  13.0     12     2     25       25   1.00 0.750
## 31   0.0  11.0     15     1     40       25   1.00 0.880
## 32   0.0  15.0      9     2     45       25   1.00 0.750
## 33   3.0  15.0      5     3     85       25   1.00 0.880
## 34   3.0  17.0      3     3     90       25   1.00 0.250
## 35   3.0  13.0      4     3    100       25   1.00 0.330
## 36   1.0  12.0     11     2     45       25   1.00 1.000
## 37   1.5  11.5     10     1     90       25   1.00 0.750
## 38   0.0  14.0     11     1     35       25   1.00 1.330
## 39   1.0  17.0      6     3     60      100   1.00    NA
## 40   2.0  20.0      9     3     95      100   1.30 0.750
## 41   0.0  21.0      3     2     40       25   1.00 1.500
## 42   2.0  12.0      6     2     95       25   1.00 0.670
## 43   0.0  12.0     12     2     55       25   1.00 1.000
## 44   0.0  16.0      3     2     95       25   1.00    NA
## 47   3.0  17.0     13     3    160       25   1.50 0.670
## 48   2.0  15.0      6     1     90       25   1.00 1.000
## 49   0.0  15.0      9     2     40       25   1.00 0.670
## 50   3.0  21.0      7     3    130       25   1.33 0.670
## 51   3.0  18.0      2     3     90       25   1.00    NA
## 52   1.5  13.5     10     3    120       25   1.25 0.500
## 53   6.0  11.0     14     3    260       25   1.33 0.670
## 54   1.0  20.0      3     3     45      100   1.00 1.000
## 55   0.0  13.0      0     3     15        0   0.50 1.000
## 56   1.0  10.0      0     3     50        0   0.50    NA
## 57   2.0  14.0      6     3    110       25   1.00 0.500
## 58   2.7    NA     NA     1    110        0   1.00 0.670
## 59   5.0  14.0     12     2    240       25   1.33 0.750
## 60   2.5  10.5      8     3    140       25   1.00 0.500
## 61   2.0  15.0      6     3    110       25   1.00 0.500
## 62   0.0  23.0      2     1     30       25   1.00 1.130
## 63   0.0  22.0      3     1     35       25   1.00 1.000
## 64   3.0  16.0      0     1     95        0   0.83    NA
## 65   4.0  19.0      0     1    140        0   1.00 0.670
## 66   3.0  20.0      0     1    120        0   1.00 0.670
## 67   1.0   9.0     15     2     40       25   1.00 0.750
## 68   1.0  16.0      3     1     55       25   1.00 1.000
## 69   3.0  15.0      5     2     90       25   1.00    NA
## 70   0.0  21.0      3     3     35      100   1.00 1.000
## 71   4.0  15.0     14     3    230      100   1.50 1.000
## 72   3.0  16.0      3     3    110      100   1.00 1.000
## 73   0.0  21.0      3     3     60       25   1.00 0.750
## 74   0.0  13.0     12     2     25       25   1.00 1.000
## 75   3.0  17.0      3     1    115       25   1.00 0.670
## 76   3.0  17.0      3     1    110       25   1.00 1.000
## 77   1.0  16.0      8     1     60       25   1.00 0.750
# max and parallel maximum
x <- c(1,2,3,4,5,6)
max(x)
## [1] 6
pmax(3, x)
## [1] 3 3 3 4 5 6
# finding the set of unique values
unique(cereal$type)
## [1] "C" "H"
# xtabs - counting and checking
xtabs(~type, data=cereal, exclude=NULL, na.action=na.pass)
## type
##  C  H 
## 74  3
xtabs(~type+cups, data=cereal, exclude=NULL, na.action=na.pass)
##     cups
## type 0.25 0.33 0.331 0.5 0.67 0.75 0.8 0.88  1 1.13 1.25 1.33 1.5 <NA>
##    C    1    2     1   7   12   16   1    2 20    1    1    1   1    8
##    H    0    0     0   0    1    0   0    0  1    0    0    0   0    1
# pasting text together
paste("Analysis of ", nrow(cereal), ' breakfast cereals', sep="")
## [1] "Analysis of 77 breakfast cereals"
ggplot(data=cereal, aes(x=fat, y=calories))+
    geom_point()+
    ggtitle(paste("Analysis of ", nrow(cereal), ' breakfast cereals', sep=""))

# pattern matching - Googleis your friend
select <- grepl("bran", cereal$name) # exact match
cereal[select,]
##  [1] name     mfr      type     calories protein  fat      sodium   fibre   
##  [9] carbo    sugars   shelf    potass   vitamins weight   cups    
## <0 rows> (or 0-length row.names)
select <- grepl("bran", cereal$name, ignore.case=TRUE)
cereal[select,]
##                         name mfr type calories protein fat sodium fibre carbo
## 1                  100%_Bran   N    C       60       4   1    130  10.0   5.0
## 2          100%_Natural_Bran   Q    C      110       3   5     15   2.0   8.0
## 3                   All-Bran   K    C       80       4   1    260   9.0   7.0
## 4  All-Bran_with_Extra_Fiber   K    C       50       4   0    140  14.0   8.0
## 9                  Bran_Chex   R    C       90       2   1    200   4.0  15.0
## 10               Bran_Flakes   P    C       90       3   0    210   5.0  13.0
## 20        Crackling_Oat_Bran   K    C      110       3   3    140   4.0  10.0
## 29             Fruitful_Bran   K    C      130       3   0    240   5.0  14.0
## 53     Post_Nat._Raisin_Bran   P    C      130       3   1    200   6.0  11.0
## 59               Raisin_Bran   K    C      120       3   1    210   5.0  14.0
## 60           Raisin_Nut_Bran   G    C       90       3   2    140   2.5  10.5
## 65    Shredded_Wheat_'n'Bran   N    C       80       3   0      0   4.0  19.0
## 71         Total_Raisin_Bran   G    C      150       3   1    190   4.0  15.0
##    sugars shelf potass vitamins weight  cups
## 1       6     3    280       25   1.00 0.331
## 2       8     3    135        0   1.00    NA
## 3       5     3    320       25   1.00 0.330
## 4       0     3    330       25   1.00 0.500
## 9       6     1    125       25   1.00 0.670
## 10      5     3    190       25   1.00 0.670
## 20      7     3    160       25   1.00 0.500
## 29     12     3    190       25   1.33 0.670
## 53     14     3    260       25   1.33 0.670
## 59     12     2    240       25   1.33 0.750
## 60      8     3    140       25   1.00 0.500
## 65      0     1    140        0   1.00 0.670
## 71     14     3    230      100   1.50 1.000
select <- grepl("^bran", cereal$name, ignore.case=TRUE)
cereal[select,] # start with bran
##           name mfr type calories protein fat sodium fibre carbo sugars shelf
## 9    Bran_Chex   R    C       90       2   1    200     4    15      6     1
## 10 Bran_Flakes   P    C       90       3   0    210     5    13      5     3
##    potass vitamins weight cups
## 9     125       25      1 0.67
## 10    190       25      1 0.67
select <- grepl("bran$", cereal$name, ignore.case=TRUE)
cereal[select,] # end with bran
##                      name mfr type calories protein fat sodium fibre carbo
## 1               100%_Bran   N    C       60       4   1    130  10.0   5.0
## 2       100%_Natural_Bran   Q    C      110       3   5     15   2.0   8.0
## 3                All-Bran   K    C       80       4   1    260   9.0   7.0
## 20     Crackling_Oat_Bran   K    C      110       3   3    140   4.0  10.0
## 29          Fruitful_Bran   K    C      130       3   0    240   5.0  14.0
## 53  Post_Nat._Raisin_Bran   P    C      130       3   1    200   6.0  11.0
## 59            Raisin_Bran   K    C      120       3   1    210   5.0  14.0
## 60        Raisin_Nut_Bran   G    C       90       3   2    140   2.5  10.5
## 65 Shredded_Wheat_'n'Bran   N    C       80       3   0      0   4.0  19.0
## 71      Total_Raisin_Bran   G    C      150       3   1    190   4.0  15.0
##    sugars shelf potass vitamins weight  cups
## 1       6     3    280       25   1.00 0.331
## 2       8     3    135        0   1.00    NA
## 3       5     3    320       25   1.00 0.330
## 20      7     3    160       25   1.00 0.500
## 29     12     3    190       25   1.33 0.670
## 53     14     3    260       25   1.33 0.670
## 59     12     2    240       25   1.33 0.750
## 60      8     3    140       25   1.00 0.500
## 65      0     1    140        0   1.00 0.670
## 71     14     3    230      100   1.50 1.000
# dealing with strings
toupper(cereal$name)
##  [1] "100%_BRAN"                             
##  [2] "100%_NATURAL_BRAN"                     
##  [3] "ALL-BRAN"                              
##  [4] "ALL-BRAN_WITH_EXTRA_FIBER"             
##  [5] "ALMOND_DELIGHT"                        
##  [6] "APPLE_CINNAMON_CHEERIOS"               
##  [7] "APPLE_JACKS"                           
##  [8] "BASIC_4"                               
##  [9] "BRAN_CHEX"                             
## [10] "BRAN_FLAKES"                           
## [11] "CAP'N'CRUNCH"                          
## [12] "CHEERIOS"                              
## [13] "CINNAMON_TOAST_CRUNCH"                 
## [14] "CLUSTERS"                              
## [15] "COCOA_PUFFS"                           
## [16] "CORN_CHEX"                             
## [17] "CORN_FLAKES"                           
## [18] "CORN_POPS"                             
## [19] "COUNT_CHOCULA"                         
## [20] "CRACKLING_OAT_BRAN"                    
## [21] "CREAM_OF_WHEAT_(QUICK)"                
## [22] "CRISPIX"                               
## [23] "CRISPY_WHEAT_&_RAISINS"                
## [24] "DOUBLE_CHEX"                           
## [25] "FROOT_LOOPS"                           
## [26] "FROSTED_FLAKES"                        
## [27] "FROSTED_MINI-WHEATS"                   
## [28] "FRUIT_&_FIBRE_DATES,_WALNUTS,_AND_OATS"
## [29] "FRUITFUL_BRAN"                         
## [30] "FRUITY_PEBBLES"                        
## [31] "GOLDEN_CRISP"                          
## [32] "GOLDEN_GRAHAMS"                        
## [33] "GRAPE_NUTS_FLAKES"                     
## [34] "GRAPE-NUTS"                            
## [35] "GREAT_GRAINS_PECAN"                    
## [36] "HONEY_GRAHAM_OHS"                      
## [37] "HONEY_NUT_CHEERIOS"                    
## [38] "HONEY-COMB"                            
## [39] "JUST_RIGHT_CRUNCHY__NUGGETS"           
## [40] "JUST_RIGHT_FRUIT_&_NUT"                
## [41] "KIX"                                   
## [42] "LIFE"                                  
## [43] "LUCKY_CHARMS"                          
## [44] "MAYPO"                                 
## [45] "MUESLI_RAISINS,_DATES,_&_ALMONDS"      
## [46] "MUESLI_RAISINS,_PEACHES,_&_PECANS"     
## [47] "MUESLIX_CRISPY_BLEND"                  
## [48] "MULTI-GRAIN_CHEERIOS"                  
## [49] "NUT&HONEY_CRUNCH"                      
## [50] "NUTRI-GRAIN_ALMOND-RAISIN"             
## [51] "NUTRI-GRAIN_WHEAT"                     
## [52] "OATMEAL_RAISIN_CRISP"                  
## [53] "POST_NAT._RAISIN_BRAN"                 
## [54] "PRODUCT_19"                            
## [55] "PUFFED_RICE"                           
## [56] "PUFFED_WHEAT"                          
## [57] "QUAKER_OAT_SQUARES"                    
## [58] "QUAKER_OATMEAL"                        
## [59] "RAISIN_BRAN"                           
## [60] "RAISIN_NUT_BRAN"                       
## [61] "RAISIN_SQUARES"                        
## [62] "RICE_CHEX"                             
## [63] "RICE_KRISPIES"                         
## [64] "SHREDDED_WHEAT"                        
## [65] "SHREDDED_WHEAT_'N'BRAN"                
## [66] "SHREDDED_WHEAT_SPOON_SIZE"             
## [67] "SMACKS"                                
## [68] "SPECIAL_K"                             
## [69] "STRAWBERRY_FRUIT_WHEATS"               
## [70] "TOTAL_CORN_FLAKES"                     
## [71] "TOTAL_RAISIN_BRAN"                     
## [72] "TOTAL_WHOLE_GRAIN"                     
## [73] "TRIPLES"                               
## [74] "TRIX"                                  
## [75] "WHEAT_CHEX"                            
## [76] "WHEATIES"                              
## [77] "WHEATIES_HONEY_GOLD"
tolower(cereal$name)
##  [1] "100%_bran"                             
##  [2] "100%_natural_bran"                     
##  [3] "all-bran"                              
##  [4] "all-bran_with_extra_fiber"             
##  [5] "almond_delight"                        
##  [6] "apple_cinnamon_cheerios"               
##  [7] "apple_jacks"                           
##  [8] "basic_4"                               
##  [9] "bran_chex"                             
## [10] "bran_flakes"                           
## [11] "cap'n'crunch"                          
## [12] "cheerios"                              
## [13] "cinnamon_toast_crunch"                 
## [14] "clusters"                              
## [15] "cocoa_puffs"                           
## [16] "corn_chex"                             
## [17] "corn_flakes"                           
## [18] "corn_pops"                             
## [19] "count_chocula"                         
## [20] "crackling_oat_bran"                    
## [21] "cream_of_wheat_(quick)"                
## [22] "crispix"                               
## [23] "crispy_wheat_&_raisins"                
## [24] "double_chex"                           
## [25] "froot_loops"                           
## [26] "frosted_flakes"                        
## [27] "frosted_mini-wheats"                   
## [28] "fruit_&_fibre_dates,_walnuts,_and_oats"
## [29] "fruitful_bran"                         
## [30] "fruity_pebbles"                        
## [31] "golden_crisp"                          
## [32] "golden_grahams"                        
## [33] "grape_nuts_flakes"                     
## [34] "grape-nuts"                            
## [35] "great_grains_pecan"                    
## [36] "honey_graham_ohs"                      
## [37] "honey_nut_cheerios"                    
## [38] "honey-comb"                            
## [39] "just_right_crunchy__nuggets"           
## [40] "just_right_fruit_&_nut"                
## [41] "kix"                                   
## [42] "life"                                  
## [43] "lucky_charms"                          
## [44] "maypo"                                 
## [45] "muesli_raisins,_dates,_&_almonds"      
## [46] "muesli_raisins,_peaches,_&_pecans"     
## [47] "mueslix_crispy_blend"                  
## [48] "multi-grain_cheerios"                  
## [49] "nut&honey_crunch"                      
## [50] "nutri-grain_almond-raisin"             
## [51] "nutri-grain_wheat"                     
## [52] "oatmeal_raisin_crisp"                  
## [53] "post_nat._raisin_bran"                 
## [54] "product_19"                            
## [55] "puffed_rice"                           
## [56] "puffed_wheat"                          
## [57] "quaker_oat_squares"                    
## [58] "quaker_oatmeal"                        
## [59] "raisin_bran"                           
## [60] "raisin_nut_bran"                       
## [61] "raisin_squares"                        
## [62] "rice_chex"                             
## [63] "rice_krispies"                         
## [64] "shredded_wheat"                        
## [65] "shredded_wheat_'n'bran"                
## [66] "shredded_wheat_spoon_size"             
## [67] "smacks"                                
## [68] "special_k"                             
## [69] "strawberry_fruit_wheats"               
## [70] "total_corn_flakes"                     
## [71] "total_raisin_bran"                     
## [72] "total_whole_grain"                     
## [73] "triples"                               
## [74] "trix"                                  
## [75] "wheat_chex"                            
## [76] "wheaties"                              
## [77] "wheaties_honey_gold"
trimws(cereal$name)
##  [1] "100%_Bran"                             
##  [2] "100%_Natural_Bran"                     
##  [3] "All-Bran"                              
##  [4] "All-Bran_with_Extra_Fiber"             
##  [5] "Almond_Delight"                        
##  [6] "Apple_Cinnamon_Cheerios"               
##  [7] "Apple_Jacks"                           
##  [8] "Basic_4"                               
##  [9] "Bran_Chex"                             
## [10] "Bran_Flakes"                           
## [11] "Cap'n'Crunch"                          
## [12] "Cheerios"                              
## [13] "Cinnamon_Toast_Crunch"                 
## [14] "Clusters"                              
## [15] "Cocoa_Puffs"                           
## [16] "Corn_Chex"                             
## [17] "Corn_Flakes"                           
## [18] "Corn_Pops"                             
## [19] "Count_Chocula"                         
## [20] "Crackling_Oat_Bran"                    
## [21] "Cream_of_Wheat_(Quick)"                
## [22] "Crispix"                               
## [23] "Crispy_Wheat_&_Raisins"                
## [24] "Double_Chex"                           
## [25] "Froot_Loops"                           
## [26] "Frosted_Flakes"                        
## [27] "Frosted_Mini-Wheats"                   
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"                         
## [30] "Fruity_Pebbles"                        
## [31] "Golden_Crisp"                          
## [32] "Golden_Grahams"                        
## [33] "Grape_Nuts_Flakes"                     
## [34] "Grape-Nuts"                            
## [35] "Great_Grains_Pecan"                    
## [36] "Honey_Graham_Ohs"                      
## [37] "Honey_Nut_Cheerios"                    
## [38] "Honey-comb"                            
## [39] "Just_Right_Crunchy__Nuggets"           
## [40] "Just_Right_Fruit_&_Nut"                
## [41] "Kix"                                   
## [42] "Life"                                  
## [43] "Lucky_Charms"                          
## [44] "Maypo"                                 
## [45] "Muesli_Raisins,_Dates,_&_Almonds"      
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"     
## [47] "Mueslix_Crispy_Blend"                  
## [48] "Multi-Grain_Cheerios"                  
## [49] "Nut&Honey_Crunch"                      
## [50] "Nutri-Grain_Almond-Raisin"             
## [51] "Nutri-grain_Wheat"                     
## [52] "Oatmeal_Raisin_Crisp"                  
## [53] "Post_Nat._Raisin_Bran"                 
## [54] "Product_19"                            
## [55] "Puffed_Rice"                           
## [56] "Puffed_Wheat"                          
## [57] "Quaker_Oat_Squares"                    
## [58] "Quaker_Oatmeal"                        
## [59] "Raisin_Bran"                           
## [60] "Raisin_Nut_Bran"                       
## [61] "Raisin_Squares"                        
## [62] "Rice_Chex"                             
## [63] "Rice_Krispies"                         
## [64] "Shredded_Wheat"                        
## [65] "Shredded_Wheat_'n'Bran"                
## [66] "Shredded_Wheat_spoon_size"             
## [67] "Smacks"                                
## [68] "Special_K"                             
## [69] "Strawberry_Fruit_Wheats"               
## [70] "Total_Corn_Flakes"                     
## [71] "Total_Raisin_Bran"                     
## [72] "Total_Whole_Grain"                     
## [73] "Triples"                               
## [74] "Trix"                                  
## [75] "Wheat_Chex"                            
## [76] "Wheaties"                              
## [77] "Wheaties_Honey_Gold"
substr(cereal$name, 1, 4)
##  [1] "100%" "100%" "All-" "All-" "Almo" "Appl" "Appl" "Basi" "Bran" "Bran"
## [11] "Cap'" "Chee" "Cinn" "Clus" "Coco" "Corn" "Corn" "Corn" "Coun" "Crac"
## [21] "Crea" "Cris" "Cris" "Doub" "Froo" "Fros" "Fros" "Frui" "Frui" "Frui"
## [31] "Gold" "Gold" "Grap" "Grap" "Grea" "Hone" "Hone" "Hone" "Just" "Just"
## [41] "Kix"  "Life" "Luck" "Mayp" "Mues" "Mues" "Mues" "Mult" "Nut&" "Nutr"
## [51] "Nutr" "Oatm" "Post" "Prod" "Puff" "Puff" "Quak" "Quak" "Rais" "Rais"
## [61] "Rais" "Rice" "Rice" "Shre" "Shre" "Shre" "Smac" "Spec" "Stra" "Tota"
## [71] "Tota" "Tota" "Trip" "Trix" "Whea" "Whea" "Whea"
substring(cereal$name, 4)
##  [1] "%_Bran"                              "%_Natural_Bran"                     
##  [3] "-Bran"                               "-Bran_with_Extra_Fiber"             
##  [5] "ond_Delight"                         "le_Cinnamon_Cheerios"               
##  [7] "le_Jacks"                            "ic_4"                               
##  [9] "n_Chex"                              "n_Flakes"                           
## [11] "'n'Crunch"                           "erios"                              
## [13] "namon_Toast_Crunch"                  "sters"                              
## [15] "oa_Puffs"                            "n_Chex"                             
## [17] "n_Flakes"                            "n_Pops"                             
## [19] "nt_Chocula"                          "ckling_Oat_Bran"                    
## [21] "am_of_Wheat_(Quick)"                 "spix"                               
## [23] "spy_Wheat_&_Raisins"                 "ble_Chex"                           
## [25] "ot_Loops"                            "sted_Flakes"                        
## [27] "sted_Mini-Wheats"                    "it_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "itful_Bran"                          "ity_Pebbles"                        
## [31] "den_Crisp"                           "den_Grahams"                        
## [33] "pe_Nuts_Flakes"                      "pe-Nuts"                            
## [35] "at_Grains_Pecan"                     "ey_Graham_Ohs"                      
## [37] "ey_Nut_Cheerios"                     "ey-comb"                            
## [39] "t_Right_Crunchy__Nuggets"            "t_Right_Fruit_&_Nut"                
## [41] ""                                    "e"                                  
## [43] "ky_Charms"                           "po"                                 
## [45] "sli_Raisins,_Dates,_&_Almonds"       "sli_Raisins,_Peaches,_&_Pecans"     
## [47] "slix_Crispy_Blend"                   "ti-Grain_Cheerios"                  
## [49] "&Honey_Crunch"                       "ri-Grain_Almond-Raisin"             
## [51] "ri-grain_Wheat"                      "meal_Raisin_Crisp"                  
## [53] "t_Nat._Raisin_Bran"                  "duct_19"                            
## [55] "fed_Rice"                            "fed_Wheat"                          
## [57] "ker_Oat_Squares"                     "ker_Oatmeal"                        
## [59] "sin_Bran"                            "sin_Nut_Bran"                       
## [61] "sin_Squares"                         "e_Chex"                             
## [63] "e_Krispies"                          "edded_Wheat"                        
## [65] "edded_Wheat_'n'Bran"                 "edded_Wheat_spoon_size"             
## [67] "cks"                                 "cial_K"                             
## [69] "awberry_Fruit_Wheats"                "al_Corn_Flakes"                     
## [71] "al_Raisin_Bran"                      "al_Whole_Grain"                     
## [73] "ples"                                "x"                                  
## [75] "at_Chex"                             "aties"                              
## [77] "aties_Honey_Gold"
substr(cereal$name, 1, -1 +regexpr("_", cereal$name, fixed=TRUE))
##  [1] "100%"        "100%"        ""            "All-Bran"    "Almond"     
##  [6] "Apple"       "Apple"       "Basic"       "Bran"        "Bran"       
## [11] ""            ""            "Cinnamon"    ""            "Cocoa"      
## [16] "Corn"        "Corn"        "Corn"        "Count"       "Crackling"  
## [21] "Cream"       ""            "Crispy"      "Double"      "Froot"      
## [26] "Frosted"     "Frosted"     "Fruit"       "Fruitful"    "Fruity"     
## [31] "Golden"      "Golden"      "Grape"       ""            "Great"      
## [36] "Honey"       "Honey"       ""            "Just"        "Just"       
## [41] ""            ""            "Lucky"       ""            "Muesli"     
## [46] "Muesli"      "Mueslix"     "Multi-Grain" "Nut&Honey"   "Nutri-Grain"
## [51] "Nutri-grain" "Oatmeal"     "Post"        "Product"     "Puffed"     
## [56] "Puffed"      "Quaker"      "Quaker"      "Raisin"      "Raisin"     
## [61] "Raisin"      "Rice"        "Rice"        "Shredded"    "Shredded"   
## [66] "Shredded"    ""            "Special"     "Strawberry"  "Total"      
## [71] "Total"       "Total"       ""            ""            "Wheat"      
## [76] ""            "Wheaties"
substr(cereal$name, 1, pmax(5,-1 +regexpr("_", cereal$name, fixed=TRUE)))
##  [1] "100%_"       "100%_"       "All-B"       "All-Bran"    "Almond"     
##  [6] "Apple"       "Apple"       "Basic"       "Bran_"       "Bran_"      
## [11] "Cap'n"       "Cheer"       "Cinnamon"    "Clust"       "Cocoa"      
## [16] "Corn_"       "Corn_"       "Corn_"       "Count"       "Crackling"  
## [21] "Cream"       "Crisp"       "Crispy"      "Double"      "Froot"      
## [26] "Frosted"     "Frosted"     "Fruit"       "Fruitful"    "Fruity"     
## [31] "Golden"      "Golden"      "Grape"       "Grape"       "Great"      
## [36] "Honey"       "Honey"       "Honey"       "Just_"       "Just_"      
## [41] "Kix"         "Life"        "Lucky"       "Maypo"       "Muesli"     
## [46] "Muesli"      "Mueslix"     "Multi-Grain" "Nut&Honey"   "Nutri-Grain"
## [51] "Nutri-grain" "Oatmeal"     "Post_"       "Product"     "Puffed"     
## [56] "Puffed"      "Quaker"      "Quaker"      "Raisin"      "Raisin"     
## [61] "Raisin"      "Rice_"       "Rice_"       "Shredded"    "Shredded"   
## [66] "Shredded"    "Smack"       "Special"     "Strawberry"  "Total"      
## [71] "Total"       "Total"       "Tripl"       "Trix"        "Wheat"      
## [76] "Wheat"       "Wheaties"
gsub("-","_", cereal$name)
##  [1] "100%_Bran"                             
##  [2] "100%_Natural_Bran"                     
##  [3] "All_Bran"                              
##  [4] "All_Bran_with_Extra_Fiber"             
##  [5] "Almond_Delight"                        
##  [6] "Apple_Cinnamon_Cheerios"               
##  [7] "Apple_Jacks"                           
##  [8] "Basic_4"                               
##  [9] "Bran_Chex"                             
## [10] "Bran_Flakes"                           
## [11] "Cap'n'Crunch"                          
## [12] "Cheerios"                              
## [13] "Cinnamon_Toast_Crunch"                 
## [14] "Clusters"                              
## [15] "Cocoa_Puffs"                           
## [16] "Corn_Chex"                             
## [17] "Corn_Flakes"                           
## [18] "Corn_Pops"                             
## [19] "Count_Chocula"                         
## [20] "Crackling_Oat_Bran"                    
## [21] "Cream_of_Wheat_(Quick)"                
## [22] "Crispix"                               
## [23] "Crispy_Wheat_&_Raisins"                
## [24] "Double_Chex"                           
## [25] "Froot_Loops"                           
## [26] "Frosted_Flakes"                        
## [27] "Frosted_Mini_Wheats"                   
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"                         
## [30] "Fruity_Pebbles"                        
## [31] "Golden_Crisp"                          
## [32] "Golden_Grahams"                        
## [33] "Grape_Nuts_Flakes"                     
## [34] "Grape_Nuts"                            
## [35] "Great_Grains_Pecan"                    
## [36] "Honey_Graham_Ohs"                      
## [37] "Honey_Nut_Cheerios"                    
## [38] "Honey_comb"                            
## [39] "Just_Right_Crunchy__Nuggets"           
## [40] "Just_Right_Fruit_&_Nut"                
## [41] "Kix"                                   
## [42] "Life"                                  
## [43] "Lucky_Charms"                          
## [44] "Maypo"                                 
## [45] "Muesli_Raisins,_Dates,_&_Almonds"      
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"     
## [47] "Mueslix_Crispy_Blend"                  
## [48] "Multi_Grain_Cheerios"                  
## [49] "Nut&Honey_Crunch"                      
## [50] "Nutri_Grain_Almond_Raisin"             
## [51] "Nutri_grain_Wheat"                     
## [52] "Oatmeal_Raisin_Crisp"                  
## [53] "Post_Nat._Raisin_Bran"                 
## [54] "Product_19"                            
## [55] "Puffed_Rice"                           
## [56] "Puffed_Wheat"                          
## [57] "Quaker_Oat_Squares"                    
## [58] "Quaker_Oatmeal"                        
## [59] "Raisin_Bran"                           
## [60] "Raisin_Nut_Bran"                       
## [61] "Raisin_Squares"                        
## [62] "Rice_Chex"                             
## [63] "Rice_Krispies"                         
## [64] "Shredded_Wheat"                        
## [65] "Shredded_Wheat_'n'Bran"                
## [66] "Shredded_Wheat_spoon_size"             
## [67] "Smacks"                                
## [68] "Special_K"                             
## [69] "Strawberry_Fruit_Wheats"               
## [70] "Total_Corn_Flakes"                     
## [71] "Total_Raisin_Bran"                     
## [72] "Total_Whole_Grain"                     
## [73] "Triples"                               
## [74] "Trix"                                  
## [75] "Wheat_Chex"                            
## [76] "Wheaties"                              
## [77] "Wheaties_Honey_Gold"
# sorting
sort(cereal$name)
##  [1] "100%_Bran"                             
##  [2] "100%_Natural_Bran"                     
##  [3] "All-Bran"                              
##  [4] "All-Bran_with_Extra_Fiber"             
##  [5] "Almond_Delight"                        
##  [6] "Apple_Cinnamon_Cheerios"               
##  [7] "Apple_Jacks"                           
##  [8] "Basic_4"                               
##  [9] "Bran_Chex"                             
## [10] "Bran_Flakes"                           
## [11] "Cap'n'Crunch"                          
## [12] "Cheerios"                              
## [13] "Cinnamon_Toast_Crunch"                 
## [14] "Clusters"                              
## [15] "Cocoa_Puffs"                           
## [16] "Corn_Chex"                             
## [17] "Corn_Flakes"                           
## [18] "Corn_Pops"                             
## [19] "Count_Chocula"                         
## [20] "Crackling_Oat_Bran"                    
## [21] "Cream_of_Wheat_(Quick)"                
## [22] "Crispix"                               
## [23] "Crispy_Wheat_&_Raisins"                
## [24] "Double_Chex"                           
## [25] "Froot_Loops"                           
## [26] "Frosted_Flakes"                        
## [27] "Frosted_Mini-Wheats"                   
## [28] "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats"
## [29] "Fruitful_Bran"                         
## [30] "Fruity_Pebbles"                        
## [31] "Golden_Crisp"                          
## [32] "Golden_Grahams"                        
## [33] "Grape_Nuts_Flakes"                     
## [34] "Grape-Nuts"                            
## [35] "Great_Grains_Pecan"                    
## [36] "Honey_Graham_Ohs"                      
## [37] "Honey_Nut_Cheerios"                    
## [38] "Honey-comb"                            
## [39] "Just_Right_Crunchy__Nuggets"           
## [40] "Just_Right_Fruit_&_Nut"                
## [41] "Kix"                                   
## [42] "Life"                                  
## [43] "Lucky_Charms"                          
## [44] "Maypo"                                 
## [45] "Muesli_Raisins,_Dates,_&_Almonds"      
## [46] "Muesli_Raisins,_Peaches,_&_Pecans"     
## [47] "Mueslix_Crispy_Blend"                  
## [48] "Multi-Grain_Cheerios"                  
## [49] "Nut&Honey_Crunch"                      
## [50] "Nutri-Grain_Almond-Raisin"             
## [51] "Nutri-grain_Wheat"                     
## [52] "Oatmeal_Raisin_Crisp"                  
## [53] "Post_Nat._Raisin_Bran"                 
## [54] "Product_19"                            
## [55] "Puffed_Rice"                           
## [56] "Puffed_Wheat"                          
## [57] "Quaker_Oat_Squares"                    
## [58] "Quaker_Oatmeal"                        
## [59] "Raisin_Bran"                           
## [60] "Raisin_Nut_Bran"                       
## [61] "Raisin_Squares"                        
## [62] "Rice_Chex"                             
## [63] "Rice_Krispies"                         
## [64] "Shredded_Wheat"                        
## [65] "Shredded_Wheat_'n'Bran"                
## [66] "Shredded_Wheat_spoon_size"             
## [67] "Smacks"                                
## [68] "Special_K"                             
## [69] "Strawberry_Fruit_Wheats"               
## [70] "Total_Corn_Flakes"                     
## [71] "Total_Raisin_Bran"                     
## [72] "Total_Whole_Grain"                     
## [73] "Triples"                               
## [74] "Trix"                                  
## [75] "Wheat_Chex"                            
## [76] "Wheaties"                              
## [77] "Wheaties_Honey_Gold"
cereal[ order(cereal$name), ] # reorder a data frame
##                                      name mfr type calories protein fat sodium
## 1                               100%_Bran   N    C       60       4   1    130
## 2                       100%_Natural_Bran   Q    C      110       3   5     15
## 3                                All-Bran   K    C       80       4   1    260
## 4               All-Bran_with_Extra_Fiber   K    C       50       4   0    140
## 5                          Almond_Delight   R    C      110       2   2    200
## 6                 Apple_Cinnamon_Cheerios   G    C      110       2   2    180
## 7                             Apple_Jacks   K    C      110       2   0    125
## 8                                 Basic_4   G    C      140       3   2    210
## 9                               Bran_Chex   R    C       90       2   1    200
## 10                            Bran_Flakes   P    C       90       3   0    210
## 11                           Cap'n'Crunch   Q    C      120       1   2    220
## 12                               Cheerios   G    C      110       6   2    290
## 13                  Cinnamon_Toast_Crunch   G    C      130       1   3    210
## 14                               Clusters   G    C      100       3   2    140
## 15                            Cocoa_Puffs   G    C      110       1   1    180
## 16                              Corn_Chex   R    C      110       2   0    280
## 17                            Corn_Flakes   K    C      110       2   0    290
## 18                              Corn_Pops   K    C      100       1   0     90
## 19                          Count_Chocula   G    C      110       1   1    180
## 20                     Crackling_Oat_Bran   K    C      110       3   3    140
## 21                 Cream_of_Wheat_(Quick)   N    H      100       3   0     80
## 22                                Crispix   K    C      100       2   0    220
## 23                 Crispy_Wheat_&_Raisins   G    C       90       2   1    140
## 24                            Double_Chex   R    C      100       2   0    190
## 25                            Froot_Loops   K    C      100       2   1    125
## 26                         Frosted_Flakes   K    C      110       1   0    200
## 27                    Frosted_Mini-Wheats   K    C       90       3   0      0
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats   P    C      120       3   2    160
## 29                          Fruitful_Bran   K    C      130       3   0    240
## 30                         Fruity_Pebbles   P    C      100       1   1    135
## 31                           Golden_Crisp   P    C      100       2   0     45
## 32                         Golden_Grahams   G    C      100       1   1    280
## 33                      Grape_Nuts_Flakes   P    C      100       3   1    140
## 34                             Grape-Nuts   P    C      110       3   0    170
## 35                     Great_Grains_Pecan   P    C      110       3   3     75
## 36                       Honey_Graham_Ohs   Q    C      130       1   2    220
## 37                     Honey_Nut_Cheerios   G    C      110       3   1    250
## 38                             Honey-comb   P    C      120       1   0    180
## 39            Just_Right_Crunchy__Nuggets   K    C      100       2   1    170
## 40                 Just_Right_Fruit_&_Nut   K    C      140       3   1    170
## 41                                    Kix   G    C      100       2   1    260
## 42                                   Life   Q    C      100       4   2    150
## 43                           Lucky_Charms   G    C      110       2   1    180
## 44                                  Maypo   A    H      110       4   1      0
## 45       Muesli_Raisins,_Dates,_&_Almonds   R    C      150       4   3     95
## 46      Muesli_Raisins,_Peaches,_&_Pecans   R    C      150       4   3    150
## 47                   Mueslix_Crispy_Blend   K    C      160       3   2    150
## 48                   Multi-Grain_Cheerios   G    C       90       2   1    220
## 49                       Nut&Honey_Crunch   K    C      120       2   1    190
## 50              Nutri-Grain_Almond-Raisin   K    C      140       3   2    220
## 51                      Nutri-grain_Wheat   K    C       90       3   0    170
## 52                   Oatmeal_Raisin_Crisp   G    C      130       3   2    170
## 53                  Post_Nat._Raisin_Bran   P    C      130       3   1    200
## 54                             Product_19   K    C       90       3   0    320
## 55                            Puffed_Rice   Q    C       40       1   0      0
## 56                           Puffed_Wheat   Q    C       50       2   0      0
## 57                     Quaker_Oat_Squares   Q    C      100       4   1    135
## 58                         Quaker_Oatmeal   Q    H       90       5   2      0
## 59                            Raisin_Bran   K    C      120       3   1    210
## 60                        Raisin_Nut_Bran   G    C       90       3   2    140
## 61                         Raisin_Squares   K    C       90       2   0      0
## 62                              Rice_Chex   R    C      110       1   0    240
## 63                          Rice_Krispies   K    C      100       2   0    290
## 64                         Shredded_Wheat   N    C       80       2   0      0
## 65                 Shredded_Wheat_'n'Bran   N    C       80       3   0      0
## 66              Shredded_Wheat_spoon_size   N    C       90       3   0      0
## 67                                 Smacks   K    C      110       2   1     70
## 68                              Special_K   K    C      100       6   0    230
## 69                Strawberry_Fruit_Wheats   N    C       80       2   0     15
## 70                      Total_Corn_Flakes   G    C      100       2   1    200
## 71                      Total_Raisin_Bran   G    C      150       3   1    190
## 72                      Total_Whole_Grain   G    C      110       3   1    200
## 73                                Triples   G    C      100       2   1    250
## 74                                   Trix   G    C      110       1   1    140
## 75                             Wheat_Chex   R    C      100       3   1    230
## 76                               Wheaties   G    C       90       3   1    200
## 77                    Wheaties_Honey_Gold   G    C      110       2   1    200
##    fibre carbo sugars shelf potass vitamins weight  cups
## 1   10.0   5.0      6     3    280       25   1.00 0.331
## 2    2.0   8.0      8     3    135        0   1.00    NA
## 3    9.0   7.0      5     3    320       25   1.00 0.330
## 4   14.0   8.0      0     3    330       25   1.00 0.500
## 5    1.0  14.0      8     3     NA       25   1.00 0.750
## 6    1.5  10.5     10     1     70       25   1.00 0.750
## 7    1.0  11.0     14     2     30       25   1.00 1.000
## 8    2.0  18.0      8     3    100       25   1.33 0.750
## 9    4.0  15.0      6     1    125       25   1.00 0.670
## 10   5.0  13.0      5     3    190       25   1.00 0.670
## 11   0.0  12.0     12     2     35       25   1.00 0.750
## 12   2.0  17.0      1     1    105       25   1.00 1.250
## 13   0.0  13.0      9     2     45       25   1.00 0.750
## 14   2.0  13.0      7     3    105       25   1.00 0.500
## 15   0.0  12.0     13     2     55       25   1.00 1.000
## 16   0.0  22.0      3     1     25       25   1.00 1.000
## 17   1.0  21.0      2     1     35       25   1.00 1.000
## 18   1.0  13.0     12     2     20       25   1.00 1.000
## 19   0.0  12.0     13     2     65       25   1.00 1.000
## 20   4.0  10.0      7     3    160       25   1.00 0.500
## 21   1.0  21.0      0     2     NA        0   1.00 1.000
## 22   1.0  21.0      3     3     30       25   1.00 1.000
## 23   2.0  11.0     10     3    120       25   1.00 0.750
## 24   1.0  18.0      5     3     80       25   1.00 0.750
## 25   1.0  11.0     13     2     30       25   1.00 1.000
## 26   1.0  14.0     11     1     25       25   1.00 0.750
## 27   3.0  14.0      7     2    100       25   1.00 0.800
## 28   5.0  12.0     10     3    200       25   1.25 0.670
## 29   5.0  14.0     12     3    190       25   1.33 0.670
## 30   0.0  13.0     12     2     25       25   1.00 0.750
## 31   0.0  11.0     15     1     40       25   1.00 0.880
## 32   0.0  15.0      9     2     45       25   1.00 0.750
## 33   3.0  15.0      5     3     85       25   1.00 0.880
## 34   3.0  17.0      3     3     90       25   1.00 0.250
## 35   3.0  13.0      4     3    100       25   1.00 0.330
## 36   1.0  12.0     11     2     45       25   1.00 1.000
## 37   1.5  11.5     10     1     90       25   1.00 0.750
## 38   0.0  14.0     11     1     35       25   1.00 1.330
## 39   1.0  17.0      6     3     60      100   1.00    NA
## 40   2.0  20.0      9     3     95      100   1.30 0.750
## 41   0.0  21.0      3     2     40       25   1.00 1.500
## 42   2.0  12.0      6     2     95       25   1.00 0.670
## 43   0.0  12.0     12     2     55       25   1.00 1.000
## 44   0.0  16.0      3     2     95       25   1.00    NA
## 45   3.0  16.0     11     3    170       25     NA    NA
## 46   3.0  16.0     11     3    170       25     NA    NA
## 47   3.0  17.0     13     3    160       25   1.50 0.670
## 48   2.0  15.0      6     1     90       25   1.00 1.000
## 49   0.0  15.0      9     2     40       25   1.00 0.670
## 50   3.0  21.0      7     3    130       25   1.33 0.670
## 51   3.0  18.0      2     3     90       25   1.00    NA
## 52   1.5  13.5     10     3    120       25   1.25 0.500
## 53   6.0  11.0     14     3    260       25   1.33 0.670
## 54   1.0  20.0      3     3     45      100   1.00 1.000
## 55   0.0  13.0      0     3     15        0   0.50 1.000
## 56   1.0  10.0      0     3     50        0   0.50    NA
## 57   2.0  14.0      6     3    110       25   1.00 0.500
## 58   2.7    NA     NA     1    110        0   1.00 0.670
## 59   5.0  14.0     12     2    240       25   1.33 0.750
## 60   2.5  10.5      8     3    140       25   1.00 0.500
## 61   2.0  15.0      6     3    110       25   1.00 0.500
## 62   0.0  23.0      2     1     30       25   1.00 1.130
## 63   0.0  22.0      3     1     35       25   1.00 1.000
## 64   3.0  16.0      0     1     95        0   0.83    NA
## 65   4.0  19.0      0     1    140        0   1.00 0.670
## 66   3.0  20.0      0     1    120        0   1.00 0.670
## 67   1.0   9.0     15     2     40       25   1.00 0.750
## 68   1.0  16.0      3     1     55       25   1.00 1.000
## 69   3.0  15.0      5     2     90       25   1.00    NA
## 70   0.0  21.0      3     3     35      100   1.00 1.000
## 71   4.0  15.0     14     3    230      100   1.50 1.000
## 72   3.0  16.0      3     3    110      100   1.00 1.000
## 73   0.0  21.0      3     3     60       25   1.00 0.750
## 74   0.0  13.0     12     2     25       25   1.00 1.000
## 75   3.0  17.0      3     1    115       25   1.00 0.670
## 76   3.0  17.0      3     1    110       25   1.00 1.000
## 77   1.0  16.0      8     1     60       25   1.00 0.750
cereal[ order(cereal$calories, cereal$name),]
##                                      name mfr type calories protein fat sodium
## 55                            Puffed_Rice   Q    C       40       1   0      0
## 4               All-Bran_with_Extra_Fiber   K    C       50       4   0    140
## 56                           Puffed_Wheat   Q    C       50       2   0      0
## 1                               100%_Bran   N    C       60       4   1    130
## 3                                All-Bran   K    C       80       4   1    260
## 64                         Shredded_Wheat   N    C       80       2   0      0
## 65                 Shredded_Wheat_'n'Bran   N    C       80       3   0      0
## 69                Strawberry_Fruit_Wheats   N    C       80       2   0     15
## 9                               Bran_Chex   R    C       90       2   1    200
## 10                            Bran_Flakes   P    C       90       3   0    210
## 23                 Crispy_Wheat_&_Raisins   G    C       90       2   1    140
## 27                    Frosted_Mini-Wheats   K    C       90       3   0      0
## 48                   Multi-Grain_Cheerios   G    C       90       2   1    220
## 51                      Nutri-grain_Wheat   K    C       90       3   0    170
## 54                             Product_19   K    C       90       3   0    320
## 58                         Quaker_Oatmeal   Q    H       90       5   2      0
## 60                        Raisin_Nut_Bran   G    C       90       3   2    140
## 61                         Raisin_Squares   K    C       90       2   0      0
## 66              Shredded_Wheat_spoon_size   N    C       90       3   0      0
## 76                               Wheaties   G    C       90       3   1    200
## 14                               Clusters   G    C      100       3   2    140
## 18                              Corn_Pops   K    C      100       1   0     90
## 21                 Cream_of_Wheat_(Quick)   N    H      100       3   0     80
## 22                                Crispix   K    C      100       2   0    220
## 24                            Double_Chex   R    C      100       2   0    190
## 25                            Froot_Loops   K    C      100       2   1    125
## 30                         Fruity_Pebbles   P    C      100       1   1    135
## 31                           Golden_Crisp   P    C      100       2   0     45
## 32                         Golden_Grahams   G    C      100       1   1    280
## 33                      Grape_Nuts_Flakes   P    C      100       3   1    140
## 39            Just_Right_Crunchy__Nuggets   K    C      100       2   1    170
## 41                                    Kix   G    C      100       2   1    260
## 42                                   Life   Q    C      100       4   2    150
## 57                     Quaker_Oat_Squares   Q    C      100       4   1    135
## 63                          Rice_Krispies   K    C      100       2   0    290
## 68                              Special_K   K    C      100       6   0    230
## 70                      Total_Corn_Flakes   G    C      100       2   1    200
## 73                                Triples   G    C      100       2   1    250
## 75                             Wheat_Chex   R    C      100       3   1    230
## 2                       100%_Natural_Bran   Q    C      110       3   5     15
## 5                          Almond_Delight   R    C      110       2   2    200
## 6                 Apple_Cinnamon_Cheerios   G    C      110       2   2    180
## 7                             Apple_Jacks   K    C      110       2   0    125
## 12                               Cheerios   G    C      110       6   2    290
## 15                            Cocoa_Puffs   G    C      110       1   1    180
## 16                              Corn_Chex   R    C      110       2   0    280
## 17                            Corn_Flakes   K    C      110       2   0    290
## 19                          Count_Chocula   G    C      110       1   1    180
## 20                     Crackling_Oat_Bran   K    C      110       3   3    140
## 26                         Frosted_Flakes   K    C      110       1   0    200
## 34                             Grape-Nuts   P    C      110       3   0    170
## 35                     Great_Grains_Pecan   P    C      110       3   3     75
## 37                     Honey_Nut_Cheerios   G    C      110       3   1    250
## 43                           Lucky_Charms   G    C      110       2   1    180
## 44                                  Maypo   A    H      110       4   1      0
## 62                              Rice_Chex   R    C      110       1   0    240
## 67                                 Smacks   K    C      110       2   1     70
## 72                      Total_Whole_Grain   G    C      110       3   1    200
## 74                                   Trix   G    C      110       1   1    140
## 77                    Wheaties_Honey_Gold   G    C      110       2   1    200
## 11                           Cap'n'Crunch   Q    C      120       1   2    220
## 28 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats   P    C      120       3   2    160
## 38                             Honey-comb   P    C      120       1   0    180
## 49                       Nut&Honey_Crunch   K    C      120       2   1    190
## 59                            Raisin_Bran   K    C      120       3   1    210
## 13                  Cinnamon_Toast_Crunch   G    C      130       1   3    210
## 29                          Fruitful_Bran   K    C      130       3   0    240
## 36                       Honey_Graham_Ohs   Q    C      130       1   2    220
## 52                   Oatmeal_Raisin_Crisp   G    C      130       3   2    170
## 53                  Post_Nat._Raisin_Bran   P    C      130       3   1    200
## 8                                 Basic_4   G    C      140       3   2    210
## 40                 Just_Right_Fruit_&_Nut   K    C      140       3   1    170
## 50              Nutri-Grain_Almond-Raisin   K    C      140       3   2    220
## 45       Muesli_Raisins,_Dates,_&_Almonds   R    C      150       4   3     95
## 46      Muesli_Raisins,_Peaches,_&_Pecans   R    C      150       4   3    150
## 71                      Total_Raisin_Bran   G    C      150       3   1    190
## 47                   Mueslix_Crispy_Blend   K    C      160       3   2    150
##    fibre carbo sugars shelf potass vitamins weight  cups
## 55   0.0  13.0      0     3     15        0   0.50 1.000
## 4   14.0   8.0      0     3    330       25   1.00 0.500
## 56   1.0  10.0      0     3     50        0   0.50    NA
## 1   10.0   5.0      6     3    280       25   1.00 0.331
## 3    9.0   7.0      5     3    320       25   1.00 0.330
## 64   3.0  16.0      0     1     95        0   0.83    NA
## 65   4.0  19.0      0     1    140        0   1.00 0.670
## 69   3.0  15.0      5     2     90       25   1.00    NA
## 9    4.0  15.0      6     1    125       25   1.00 0.670
## 10   5.0  13.0      5     3    190       25   1.00 0.670
## 23   2.0  11.0     10     3    120       25   1.00 0.750
## 27   3.0  14.0      7     2    100       25   1.00 0.800
## 48   2.0  15.0      6     1     90       25   1.00 1.000
## 51   3.0  18.0      2     3     90       25   1.00    NA
## 54   1.0  20.0      3     3     45      100   1.00 1.000
## 58   2.7    NA     NA     1    110        0   1.00 0.670
## 60   2.5  10.5      8     3    140       25   1.00 0.500
## 61   2.0  15.0      6     3    110       25   1.00 0.500
## 66   3.0  20.0      0     1    120        0   1.00 0.670
## 76   3.0  17.0      3     1    110       25   1.00 1.000
## 14   2.0  13.0      7     3    105       25   1.00 0.500
## 18   1.0  13.0     12     2     20       25   1.00 1.000
## 21   1.0  21.0      0     2     NA        0   1.00 1.000
## 22   1.0  21.0      3     3     30       25   1.00 1.000
## 24   1.0  18.0      5     3     80       25   1.00 0.750
## 25   1.0  11.0     13     2     30       25   1.00 1.000
## 30   0.0  13.0     12     2     25       25   1.00 0.750
## 31   0.0  11.0     15     1     40       25   1.00 0.880
## 32   0.0  15.0      9     2     45       25   1.00 0.750
## 33   3.0  15.0      5     3     85       25   1.00 0.880
## 39   1.0  17.0      6     3     60      100   1.00    NA
## 41   0.0  21.0      3     2     40       25   1.00 1.500
## 42   2.0  12.0      6     2     95       25   1.00 0.670
## 57   2.0  14.0      6     3    110       25   1.00 0.500
## 63   0.0  22.0      3     1     35       25   1.00 1.000
## 68   1.0  16.0      3     1     55       25   1.00 1.000
## 70   0.0  21.0      3     3     35      100   1.00 1.000
## 73   0.0  21.0      3     3     60       25   1.00 0.750
## 75   3.0  17.0      3     1    115       25   1.00 0.670
## 2    2.0   8.0      8     3    135        0   1.00    NA
## 5    1.0  14.0      8     3     NA       25   1.00 0.750
## 6    1.5  10.5     10     1     70       25   1.00 0.750
## 7    1.0  11.0     14     2     30       25   1.00 1.000
## 12   2.0  17.0      1     1    105       25   1.00 1.250
## 15   0.0  12.0     13     2     55       25   1.00 1.000
## 16   0.0  22.0      3     1     25       25   1.00 1.000
## 17   1.0  21.0      2     1     35       25   1.00 1.000
## 19   0.0  12.0     13     2     65       25   1.00 1.000
## 20   4.0  10.0      7     3    160       25   1.00 0.500
## 26   1.0  14.0     11     1     25       25   1.00 0.750
## 34   3.0  17.0      3     3     90       25   1.00 0.250
## 35   3.0  13.0      4     3    100       25   1.00 0.330
## 37   1.5  11.5     10     1     90       25   1.00 0.750
## 43   0.0  12.0     12     2     55       25   1.00 1.000
## 44   0.0  16.0      3     2     95       25   1.00    NA
## 62   0.0  23.0      2     1     30       25   1.00 1.130
## 67   1.0   9.0     15     2     40       25   1.00 0.750
## 72   3.0  16.0      3     3    110      100   1.00 1.000
## 74   0.0  13.0     12     2     25       25   1.00 1.000
## 77   1.0  16.0      8     1     60       25   1.00 0.750
## 11   0.0  12.0     12     2     35       25   1.00 0.750
## 28   5.0  12.0     10     3    200       25   1.25 0.670
## 38   0.0  14.0     11     1     35       25   1.00 1.330
## 49   0.0  15.0      9     2     40       25   1.00 0.670
## 59   5.0  14.0     12     2    240       25   1.33 0.750
## 13   0.0  13.0      9     2     45       25   1.00 0.750
## 29   5.0  14.0     12     3    190       25   1.33 0.670
## 36   1.0  12.0     11     2     45       25   1.00 1.000
## 52   1.5  13.5     10     3    120       25   1.25 0.500
## 53   6.0  11.0     14     3    260       25   1.33 0.670
## 8    2.0  18.0      8     3    100       25   1.33 0.750
## 40   2.0  20.0      9     3     95      100   1.30 0.750
## 50   3.0  21.0      7     3    130       25   1.33 0.670
## 45   3.0  16.0     11     3    170       25     NA    NA
## 46   3.0  16.0     11     3    170       25     NA    NA
## 71   4.0  15.0     14     3    230      100   1.50 1.000
## 47   3.0  17.0     13     3    160       25   1.50 0.670
# file path that is device independent
file.path("..","sampledata")
## [1] "../sampledata"
# merging and combining data frames
byear <- data.frame(name =c('Carl', 'Lois', 'Matthew', 'Marianne', 'David'),
                    byear=c( 1956,   1956,    1986,     1990,       1991))

bcity <- data.frame(name =c('Carl', 'Lois', 'Matthew'),
                    city =c('Wpg',  'Brandon', 'Wpg'))

wcity <- data.frame(name =c('Matthew', 'Marianne', 'David'),
                    city =c('Ottawa',  'Vancouver', 'Victoria'))

# cbind must be used with caution 
cbind(bcity, wcity)
##      name    city     name      city
## 1    Carl     Wpg  Matthew    Ottawa
## 2    Lois Brandon Marianne Vancouver
## 3 Matthew     Wpg    David  Victoria
# merge - careful of non-matches
merge(byear, bcity)
##      name byear    city
## 1    Carl  1956     Wpg
## 2    Lois  1956 Brandon
## 3 Matthew  1986     Wpg
merge(byear, bcity, all=TRUE)
##       name byear    city
## 1     Carl  1956     Wpg
## 2    David  1991    <NA>
## 3     Lois  1956 Brandon
## 4 Marianne  1990    <NA>
## 5  Matthew  1986     Wpg
merge(byear, bcity, all.y=TRUE)
##      name byear    city
## 1    Carl  1956     Wpg
## 2    Lois  1956 Brandon
## 3 Matthew  1986     Wpg
# multiple merges - Google is your friend
Reduce(function(...){merge(..., all=TRUE)}, list(byear, bcity, wcity))
##       name      city byear
## 1     Carl       Wpg  1956
## 2    David  Victoria    NA
## 3    David      <NA>  1991
## 4     Lois   Brandon  1956
## 5 Marianne Vancouver    NA
## 6 Marianne      <NA>  1990
## 7  Matthew    Ottawa    NA
## 8  Matthew       Wpg  1986