|
|
|
|
|
|
|
|
## Mutating Joins |
|
|
## Mutating Joins |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
x <- data_frame( |
|
|
|
|
|
|
|
|
x <- dplyr::data_frame( |
|
|
id = 1:3, |
|
|
id = 1:3, |
|
|
x = paste0("x", 1:3) |
|
|
x = paste0("x", 1:3) |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
y <- data_frame( |
|
|
|
|
|
|
|
|
y <- dplyr::data_frame( |
|
|
id = (1:4)[-3], |
|
|
id = (1:4)[-3], |
|
|
y = paste0("y", (1:4)[-3]) |
|
|
y = paste0("y", (1:4)[-3]) |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
inner_join(x, y, by = "id") |
|
|
|
|
|
|
|
|
dplyr::inner_join(x, y, by = "id") |
|
|
#> # A tibble: 2 x 3 |
|
|
#> # A tibble: 2 x 3 |
|
|
#> id x y |
|
|
#> id x y |
|
|
#> <int> <chr> <chr> |
|
|
#> <int> <chr> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
left_join(x, y, by = "id") |
|
|
|
|
|
|
|
|
dplyr::left_join(x, y, by = "id") |
|
|
#> # A tibble: 3 x 3 |
|
|
#> # A tibble: 3 x 3 |
|
|
#> id x y |
|
|
#> id x y |
|
|
#> <int> <chr> <chr> |
|
|
#> <int> <chr> <chr> |
|
|
|
|
|
|
|
|
> of the matches are returned. |
|
|
> of the matches are returned. |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
y_extra <- bind_rows(y, data_frame(id = 2, y = "y5")) |
|
|
|
|
|
|
|
|
y_extra <- dplyr::bind_rows(y, dplyr::data_frame(id = 2, y = "y5")) |
|
|
y_extra # has multiple rows with the key from `x` |
|
|
y_extra # has multiple rows with the key from `x` |
|
|
#> # A tibble: 4 x 2 |
|
|
#> # A tibble: 4 x 2 |
|
|
#> id y |
|
|
#> id y |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
left_join(x, y_extra, by = "id") |
|
|
|
|
|
|
|
|
dplyr::left_join(x, y_extra, by = "id") |
|
|
#> # A tibble: 4 x 3 |
|
|
#> # A tibble: 4 x 3 |
|
|
#> id x y |
|
|
#> id x y |
|
|
#> <dbl> <chr> <chr> |
|
|
#> <dbl> <chr> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
right_join(x, y, by = "id") |
|
|
|
|
|
|
|
|
dplyr::right_join(x, y, by = "id") |
|
|
#> # A tibble: 3 x 3 |
|
|
#> # A tibble: 3 x 3 |
|
|
#> id x y |
|
|
#> id x y |
|
|
#> <int> <chr> <chr> |
|
|
#> <int> <chr> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
full_join(x, y, by = "id") |
|
|
|
|
|
|
|
|
dplyr::full_join(x, y, by = "id") |
|
|
#> # A tibble: 4 x 3 |
|
|
#> # A tibble: 4 x 3 |
|
|
#> id x y |
|
|
#> id x y |
|
|
#> <int> <chr> <chr> |
|
|
#> <int> <chr> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
semi_join(x, y, by = "id") |
|
|
|
|
|
|
|
|
dplyr::semi_join(x, y, by = "id") |
|
|
#> # A tibble: 2 x 2 |
|
|
#> # A tibble: 2 x 2 |
|
|
#> id x |
|
|
#> id x |
|
|
#> <int> <chr> |
|
|
#> <int> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
anti_join(x, y, by = "id") |
|
|
|
|
|
|
|
|
dplyr::anti_join(x, y, by = "id") |
|
|
#> # A tibble: 1 x 2 |
|
|
#> # A tibble: 1 x 2 |
|
|
#> id x |
|
|
#> id x |
|
|
#> <int> <chr> |
|
|
#> <int> <chr> |
|
|
|
|
|
|
|
|
## Set Operations |
|
|
## Set Operations |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
x <- data_frame( |
|
|
|
|
|
|
|
|
x <- dplyr::data_frame( |
|
|
x = c(1, 1, 2), |
|
|
x = c(1, 1, 2), |
|
|
y = c("a", "b", "a") |
|
|
y = c("a", "b", "a") |
|
|
) |
|
|
) |
|
|
y <- data_frame( |
|
|
|
|
|
|
|
|
y <- dplyr::data_frame( |
|
|
x = c(1, 2), |
|
|
x = c(1, 2), |
|
|
y = c("a", "b") |
|
|
y = c("a", "b") |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
union(x, y) |
|
|
|
|
|
|
|
|
dplyr::union(x, y) |
|
|
#> # A tibble: 4 x 2 |
|
|
#> # A tibble: 4 x 2 |
|
|
#> x y |
|
|
#> x y |
|
|
#> <dbl> <chr> |
|
|
#> <dbl> <chr> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
|
|
|
|
|
|
union(y, x) |
|
|
|
|
|
|
|
|
dplyr::union(y, x) |
|
|
#> # A tibble: 4 x 2 |
|
|
#> # A tibble: 4 x 2 |
|
|
#> x y |
|
|
#> x y |
|
|
#> <dbl> <chr> |
|
|
#> <dbl> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
union_all(x, y) |
|
|
|
|
|
|
|
|
dplyr::union_all(x, y) |
|
|
#> # A tibble: 5 x 2 |
|
|
#> # A tibble: 5 x 2 |
|
|
#> x y |
|
|
#> x y |
|
|
#> <dbl> <chr> |
|
|
#> <dbl> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
intersect(x, y) |
|
|
|
|
|
|
|
|
dplyr::intersect(x, y) |
|
|
#> # A tibble: 1 x 2 |
|
|
#> # A tibble: 1 x 2 |
|
|
#> x y |
|
|
#> x y |
|
|
#> <dbl> <chr> |
|
|
#> <dbl> <chr> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
setdiff(x, y) |
|
|
|
|
|
|
|
|
dplyr::setdiff(x, y) |
|
|
#> # A tibble: 2 x 2 |
|
|
#> # A tibble: 2 x 2 |
|
|
#> x y |
|
|
#> x y |
|
|
#> <dbl> <chr> |
|
|
#> <dbl> <chr> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
|
|
|
|
|
|
setdiff(y, x) |
|
|
|
|
|
|
|
|
dplyr::setdiff(y, x) |
|
|
#> # A tibble: 1 x 2 |
|
|
#> # A tibble: 1 x 2 |
|
|
#> x y |
|
|
#> x y |
|
|
#> <dbl> <chr> |
|
|
#> <dbl> <chr> |
|
|
|
|
|
|
|
|
you organize your data into tidy data. |
|
|
you organize your data into tidy data. |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
library(tidyr) |
|
|
|
|
|
|
|
|
|
|
|
long <- data_frame( |
|
|
|
|
|
|
|
|
long <- dplyr::data_frame( |
|
|
year = c(2010, 2011, 2010, 2011, 2010, 2011), |
|
|
year = c(2010, 2011, 2010, 2011, 2010, 2011), |
|
|
person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), |
|
|
person = c("Alice", "Alice", "Bob", "Bob", "Charlie", "Charlie"), |
|
|
sales = c(105, 110, 100, 97, 90, 95) |
|
|
sales = c(105, 110, 100, 97, 90, 95) |
|
|
) |
|
|
) |
|
|
wide <- data_frame( |
|
|
|
|
|
|
|
|
wide <- dplyr::data_frame( |
|
|
year = 2010:2011, |
|
|
year = 2010:2011, |
|
|
Alice = c(105, 110), |
|
|
Alice = c(105, 110), |
|
|
Bob = c(100, 97), |
|
|
Bob = c(100, 97), |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
gather(wide, key = "person", value = "sales", -year) |
|
|
|
|
|
|
|
|
tidyr::gather(wide, key = "person", value = "sales", -year) |
|
|
#> # A tibble: 6 x 3 |
|
|
#> # A tibble: 6 x 3 |
|
|
#> year person sales |
|
|
#> year person sales |
|
|
#> <int> <chr> <dbl> |
|
|
#> <int> <chr> <dbl> |
|
|
|
|
|
|
|
|
<!-- --> |
|
|
<!-- --> |
|
|
|
|
|
|
|
|
``` r |
|
|
``` r |
|
|
spread(long, key = "person", value = "sales") |
|
|
|
|
|
|
|
|
tidyr::spread(long, key = "person", value = "sales") |
|
|
#> # A tibble: 2 x 4 |
|
|
#> # A tibble: 2 x 4 |
|
|
#> year Alice Bob Charlie |
|
|
#> year Alice Bob Charlie |
|
|
#> <dbl> <dbl> <dbl> <dbl> |
|
|
#> <dbl> <dbl> <dbl> <dbl> |