Skip to main content

dplyr::select

dplyr::select は、tidyverse コレクションの dplyr パッケージに含まれる関数で、データフレームから指定した列を選択するために使用される。

列を選択する様々な方法を利用できるため、柔軟に列の選択をすることができる。

クイックリファレンス

library(tidyverse)

df %>%
select(col1, col2, ...)
penguins %>%
select(species, starts_with("bill"))

# # A tibble: 344 × 3
# species bill_length_mm bill_depth_mm
# <fct> <dbl> <dbl>
# 1 Adelie 39.1 18.7
# 2 Adelie 39.5 17.4
# 3 Adelie 40.3 18
# 4 Adelie NA NA
# 5 Adelie 36.7 19.3
# 6 Adelie 39.3 20.6
# 7 Adelie 38.9 17.8
# 8 Adelie 39.2 19.6
# 9 Adelie 34.1 18.1
# 10 Adelie 42 20.2
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

基本構文

select(.data, ...,)
引数説明
.dataデータフレーム(または tibble)。
...選択する列名や列番号。<tidy-select>も指定可能。

使用例

1. 指定した列を選択

penguins %>%
select(species, island)

# # A tibble: 344 × 2
# species island
# <fct> <fct>
# 1 Adelie Torgersen
# 2 Adelie Torgersen
# 3 Adelie Torgersen
# 4 Adelie Torgersen
# 5 Adelie Torgersen
# 6 Adelie Torgersen
# 7 Adelie Torgersen
# 8 Adelie Torgersen
# 9 Adelie Torgersen
# 10 Adelie Torgersen
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

列名を変更して選択することも出来る。

penguins %>%
select(SPECIES = species, ISLAND = island)

# # A tibble: 344 × 2
# SPECIES ISLAND
# <fct> <fct>
# 1 Adelie Torgersen
# 2 Adelie Torgersen
# 3 Adelie Torgersen
# 4 Adelie Torgersen
# 5 Adelie Torgersen
# 6 Adelie Torgersen
# 7 Adelie Torgersen
# 8 Adelie Torgersen
# 9 Adelie Torgersen
# 10 Adelie Torgersen
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

2. 指定した列以外を選択

penguins %>%
select(!species, !island)

# # A tibble: 344 × 5
# bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
# <dbl> <dbl> <int> <int> <fct>
# 1 39.1 18.7 181 3750 male
# 2 39.5 17.4 186 3800 female
# 3 40.3 18 195 3250 female
# 4 NA NA NA NA NA
# 5 36.7 19.3 193 3450 female
# 6 39.3 20.6 190 3650 male
# 7 38.9 17.8 181 3625 female
# 8 39.2 19.6 195 4675 male
# 9 34.1 18.1 193 3475 NA
# 10 42 20.2 190 4250 NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

3. 連続する列を範囲選択

penguins %>%
select(1:3)

# # A tibble: 344 × 3
# species island bill_length_mm
# <fct> <fct> <dbl>
# 1 Adelie Torgersen 39.1
# 2 Adelie Torgersen 39.5
# 3 Adelie Torgersen 40.3
# 4 Adelie Torgersen NA
# 5 Adelie Torgersen 36.7
# 6 Adelie Torgersen 39.3
# 7 Adelie Torgersen 38.9
# 8 Adelie Torgersen 39.2
# 9 Adelie Torgersen 34.1
# 10 Adelie Torgersen 42
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(species:bill_length_mm)

# # A tibble: 344 × 3
# species island bill_length_mm
# <fct> <fct> <dbl>
# 1 Adelie Torgersen 39.1
# 2 Adelie Torgersen 39.5
# 3 Adelie Torgersen 40.3
# 4 Adelie Torgersen NA
# 5 Adelie Torgersen 36.7
# 6 Adelie Torgersen 39.3
# 7 Adelie Torgersen 38.9
# 8 Adelie Torgersen 39.2
# 9 Adelie Torgersen 34.1
# 10 Adelie Torgersen 42
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

4. 全ての列を選択

tidyselect::everything を使用すると、全ての列を選択できる。

penguins %>%
select(everything())

# # A tibble: 344 × 7
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
# <fct> <fct> <dbl> <dbl> <int> <int> <fct>
# 1 Adelie Torgersen 39.1 18.7 181 3750 male
# 2 Adelie Torgersen 39.5 17.4 186 3800 female
# 3 Adelie Torgersen 40.3 18 195 3250 female
# 4 Adelie Torgersen NA NA NA NA NA
# 5 Adelie Torgersen 36.7 19.3 193 3450 female
# 6 Adelie Torgersen 39.3 20.6 190 3650 male
# 7 Adelie Torgersen 38.9 17.8 181 3625 female
# 8 Adelie Torgersen 39.2 19.6 195 4675 male
# 9 Adelie Torgersen 34.1 18.1 193 3475 NA
# 10 Adelie Torgersen 42 20.2 190 4250 NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

5. 最後の列を選択

tidyselect::last_col を使用すると、最後の列を選択する。

penguins %>%
select(last_col())

# # A tibble: 344 × 1
# sex
# <fct>
# 1 male
# 2 female
# 3 female
# 4 NA
# 5 female
# 6 male
# 7 female
# 8 male
# 9 NA
# 10 NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

offset パラメータ に、数値 n を指定した場合、最後から n 番目の列が選択される。

penguins %>%
select(last_col(offset = 2))

# # A tibble: 344 × 1
# flipper_length_mm
# <int>
# 1 181
# 2 186
# 3 195
# 4 NA
# 5 193
# 6 190
# 7 181
# 8 195
# 9 193
# 10 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

6. グループ化した列を選択

tidyselect::group_cols を使用すると、グループ化された列を選択する。

penguins %>%
group_by(species, island) %>%
select(group_cols())

# # A tibble: 344 × 2
# # Groups: species, island [5]
# species island
# <fct> <fct>
# 1 Adelie Torgersen
# 2 Adelie Torgersen
# 3 Adelie Torgersen
# 4 Adelie Torgersen
# 5 Adelie Torgersen
# 6 Adelie Torgersen
# 7 Adelie Torgersen
# 8 Adelie Torgersen
# 9 Adelie Torgersen
# 10 Adelie Torgersen
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

7. パターンに一致する列を選択

ヘルパー関数パターン
tidyselect::starts_with前方一致
tidyselect::ends_with後方一致
tidyselect::contains部分一致
tidyselect::matches正規表現と一致(貪欲マッチ)
tidyselect::num_range数値範囲と一致
penguins %>%
select(starts_with("bill"))

# # A tibble: 344 × 2
# bill_length_mm bill_depth_mm
# <dbl> <dbl>
# 1 39.1 18.7
# 2 39.5 17.4
# 3 40.3 18
# 4 NA NA
# 5 36.7 19.3
# 6 39.3 20.6
# 7 38.9 17.8
# 8 39.2 19.6
# 9 34.1 18.1
# 10 42 20.2
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(ends_with("_mm"))

# # A tibble: 344 × 3
# bill_length_mm bill_depth_mm flipper_length_mm
# <dbl> <dbl> <int>
# 1 39.1 18.7 181
# 2 39.5 17.4 186
# 3 40.3 18 195
# 4 NA NA NA
# 5 36.7 19.3 193
# 6 39.3 20.6 190
# 7 38.9 17.8 181
# 8 39.2 19.6 195
# 9 34.1 18.1 193
# 10 42 20.2 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(contains("length"))

# # A tibble: 344 × 2
# bill_length_mm flipper_length_mm
# <dbl> <int>
# 1 39.1 181
# 2 39.5 186
# 3 40.3 195
# 4 NA NA
# 5 36.7 193
# 6 39.3 190
# 7 38.9 181
# 8 39.2 195
# 9 34.1 193
# 10 42 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(matches(r"(^\w{3,7}$)"))

# # A tibble: 344 × 3
# species island sex
# <fct> <fct> <fct>
# 1 Adelie Torgersen male
# 2 Adelie Torgersen female
# 3 Adelie Torgersen female
# 4 Adelie Torgersen NA
# 5 Adelie Torgersen female
# 6 Adelie Torgersen male
# 7 Adelie Torgersen female
# 8 Adelie Torgersen male
# 9 Adelie Torgersen NA
# 10 Adelie Torgersen NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
billboard %>%
select(num_range("wk", 10:15))

# # A tibble: 317 × 6
# wk10 wk11 wk12 wk13 wk14 wk15
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 NA NA NA NA NA NA
# 2 NA NA NA NA NA NA
# 3 51 51 51 47 44 38
# 4 61 61 59 61 66 72
# 5 57 64 70 75 76 78
# 6 6 7 22 29 36 47
# 7 NA NA NA NA NA NA
# 8 36 37 37 38 49 61
# 9 10 9 8 6 1 2
# 10 59 66 68 61 67 59
# # ℹ 307 more rows
# # ℹ Use `print(n = ...)` to see more rows

8. 文字ベクトルから列を選択

ヘルパー関数説明
tidyselect::all_of文字ベクトル内の列の欠落を許可しない。
tidyselect::any_of文字ベクトル内の列の欠落を許可する。
vars <- c("flipper_length_mm", "flipper_depth_mm")
penguins %>%
select(all_of(vars))

# Error in `all_of()`:
# ! Can't subset columns that don't exist.
# ✖ Column `flipper_depth_mm` doesn't exist.
# Run `rlang::last_trace()` to see where the error occurred.
vars <- c("flipper_length_mm", "flipper_depth_mm")
penguins %>%
select(any_of(vars))

# # A tibble: 344 × 1
# flipper_length_mm
# <int>
# 1 181
# 2 186s
# 3 195
# 4 NA
# 5 193
# 6 190
# 7 181
# 8 195
# 9 193
# 10 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows

9. 関数で列を選択

tidyselect::where を使用すると、関数(または、purrr-like 関数)が TRUE を返す列を選択する。

penguins %>%
select(where(is.numeric))

# # A tibble: 344 × 4
# bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
# <dbl> <dbl> <int> <int>
# 1 39.1 18.7 181 3750
# 2 39.5 17.4 186 3800
# 3 40.3 18 195 3250
# 4 NA NA NA NA
# 5 36.7 19.3 193 3450
# 6 39.3 20.6 190 3650
# 7 38.9 17.8 181 3625
# 8 39.2 19.6 195 4675
# 9 34.1 18.1 193 3475
# 10 42 20.2 190 4250
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(where(~ is.numeric(.x) && mean(.x, na.rm = TRUE) > 100))

# # A tibble: 344 × 2
# flipper_length_mm body_mass_g
# <int> <int>
# 1 181 3750
# 2 186 3800
# 3 195 3250
# 4 NA NA
# 5 193 3450
# 6 190 3650
# 7 181 3625
# 8 195 4675
# 9 193 3475
# 10 190 4250
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows