library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data('iris')
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
There are 150 observations and 5 variables in thee iris dataset
iris1
that contains only the species virginica and versicolor with sepal lengths longer than 6 cm and sepal widths longer than 2.5 cm. How many observations and variables are in the dataset?iris1 <- filter(iris, Species %in% c('virginica', 'versicolor'), Sepal.Length > 6, Sepal.Width > 2.5)
56 observations and 5 variables when filtering by these criteria
iris2 <- select(iris1, Species, Sepal.Length, Sepal.Width)
Ther are 56 observations and 3 variables in this new dataset
iris3 <- arrange(iris2, desc(Sepal.Length))
head(iris3, 6)
## Species Sepal.Length Sepal.Width
## 1 virginica 7.9 3.8
## 2 virginica 7.7 3.8
## 3 virginica 7.7 2.6
## 4 virginica 7.7 2.8
## 5 virginica 7.7 3.0
## 6 virginica 7.6 3.0
iris4 <- mutate(iris3, Sepal.Area = Sepal.Width*Sepal.Length)
There are 56 observations and 4 variables in the iris4 dataset
iris5 <- summarize(iris4, meanSepL = mean(Sepal.Length, na.rm=T),
meanSepW = mean(Sepal.Width, na.rm=T),
TotalN=n())
iris5
## meanSepL meanSepW TotalN
## 1 6.698214 3.041071 56
iris6 <- iris4 %>%
group_by(Species) %>%
summarize(meanSepL = mean(Sepal.Length, na.rm=T),
meanSepW = mean(Sepal.Width, na.rm=T),
TotalN=n())
iris6
## # A tibble: 2 x 4
## Species meanSepL meanSepW TotalN
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
iris7 <- iris %>%
filter(Species %in% c('virginica', 'versicolor'), Sepal.Length > 6, Sepal.Width > 2.5) %>%
select(Species, Sepal.Length, Sepal.Width) %>%
arrange(desc(Sepal.Length)) %>%
mutate(Sepal.Area = Sepal.Width*Sepal.Length) %>%
group_by(Species) %>%
summarize(meanSepL = mean(Sepal.Length, na.rm=T),
meanSepW = mean(Sepal.Width, na.rm=T),
TotalN=n())
iris6
## # A tibble: 2 x 4
## Species meanSepL meanSepW TotalN
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
iris7
## # A tibble: 2 x 4
## Species meanSepL meanSepW TotalN
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
long_iris <- iris %>%
pivot_longer(cols = 1:4,
names_to = 'Measure',
values_to = 'Value',
values_drop_na = TRUE)
long_iris
## # A tibble: 600 x 3
## Species Measure Value
## <fct> <chr> <dbl>
## 1 setosa Sepal.Length 5.1
## 2 setosa Sepal.Width 3.5
## 3 setosa Petal.Length 1.4
## 4 setosa Petal.Width 0.2
## 5 setosa Sepal.Length 4.9
## 6 setosa Sepal.Width 3
## 7 setosa Petal.Length 1.4
## 8 setosa Petal.Width 0.2
## 9 setosa Sepal.Length 4.7
## 10 setosa Sepal.Width 3.2
## # ... with 590 more rows