library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
data("iris")
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
iris has 5 variables, and 150 observations.
virginica
and versicolor
with sepal lengths
longer than 6 cm and sepal widths longer than 2.5 cm. How many
observations and variables are in the data set?iris1<-filter(iris, Sepal.Width>2.5, Sepal.Length>6)%>%
filter(Species %in% c("virginica", "versicolor"))
iris2
data frame from iris1
that contains only the columns for
Species, Sepal.Length, and Sepal.Width
. How many
observations and variables are in the data set?iris2<-select(iris1, Species, Sepal.Length, Sepal.Width)
glimpse(iris2)
## Rows: 56
## Columns: 3
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
## $ Sepal.Length <dbl> 7.0, 6.4, 6.9, 6.5, 6.3, 6.6, 6.1, 6.7, 6.1, 6.1, 6.4, 6.…
## $ Sepal.Width <dbl> 3.2, 3.2, 3.1, 2.8, 3.3, 2.9, 2.9, 3.1, 2.8, 2.8, 2.9, 3.…
It has 56 obervations.
iris3
data frame from iris2
that
orders the observations from largest to smallest sepal length. Show the
first 6 rows of this data set.iris3<-arrange(iris2, by = Sepal.Length)
head(iris3)
## Species Sepal.Length Sepal.Width
## 1 versicolor 6.1 2.9
## 2 versicolor 6.1 2.8
## 3 versicolor 6.1 2.8
## 4 versicolor 6.1 3.0
## 5 virginica 6.1 3.0
## 6 virginica 6.1 2.6
iris4
data frame from iris3
that
creates a column with a sepal area (length * width)
value
for each observation. How many observations and variables are in the
data set?iris4<-mutate(iris3, Sepal.Area = Sepal.Length*Sepal.Width)
glimpse(iris4)
## Rows: 56
## Columns: 4
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, virginica…
## $ Sepal.Length <dbl> 6.1, 6.1, 6.1, 6.1, 6.1, 6.1, 6.2, 6.2, 6.2, 6.3, 6.3, 6.…
## $ Sepal.Width <dbl> 2.9, 2.8, 2.8, 3.0, 3.0, 2.6, 2.9, 2.8, 3.4, 3.3, 3.3, 2.…
## $ Sepal.Area <dbl> 17.69, 17.08, 17.08, 18.30, 18.30, 15.86, 17.98, 17.36, 2…
It has 4 variables, 56 observations.
iris5
that calculates the average sepal length,
the average sepal width, and the sample size of the entire
iris4
data frame and print iris5
.iris5<-summarize(iris4, meanLenght = mean(Sepal.Length),
meanWidth = mean(Sepal.Width),
sample.Size = n())
print(iris5)
## meanLenght meanWidth sample.Size
## 1 6.698214 3.041071 56
iris_species<-group_by(iris4, Species)
iris6<-summarize(iris_species, meanLenght = mean(Sepal.Length),
meanWidth = mean(Sepal.Width),
sample.Size = n())
print(iris6)
## # A tibble: 2 × 4
## Species meanLenght meanWidth sample.Size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
iris1 iris2 iris3 iris4 iris5 iris6
. At each stage, the
output data frame from one operation serves as the input fro the next. A
more efficient way to do this is to use the pipe operator %>% from
the tidyr package. See if you can rework all of your previous statements
(except for iris5) into an extended piping operation that uses iris as
the input and generates irisFinal as the output.irisFinal<-iris%>%
filter(Sepal.Width>2.5, Sepal.Length>6)%>%
filter(Species %in% c("virginica", "versicolor"))%>%
select(Species, Sepal.Length, Sepal.Width)%>%
arrange(by = Sepal.Length)%>%
group_by(Species)%>%
summarize(meanLenght = mean(Sepal.Length),
meanWidth = mean(Sepal.Width),
sample.Size = n())
print(irisFinal)
## # A tibble: 2 × 4
## Species meanLenght meanWidth sample.Size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
iris_long<-pivot_longer(iris, cols=1:4, names_to = "Measure", values_to = "Values")
glimpse(iris_long)
## Rows: 600
## Columns: 3
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, setosa…
## $ Measure <chr> "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", …
## $ Values <dbl> 5.1, 3.5, 1.4, 0.2, 4.9, 3.0, 1.4, 0.2, 4.7, 3.2, 1.3, 0.2, 4.…
Now it has 3 variables, and 600 obervations.