R: tidy text dataset - tibble: Difference between revisions
From OnnoCenterWiki
Jump to navigationJump to search
Onnowpurbo (talk | contribs) No edit summary |
Onnowpurbo (talk | contribs) |
||
| Line 29: | Line 29: | ||
original_books | original_books | ||
Buat menjadi one-token-per-row | |||
library(tidytext) | |||
tidy_books <- original_books %>% | |||
unnest_tokens(word, text) | |||
tidy_books | |||
==Pranala Menarik== | ==Pranala Menarik== | ||
* [[R]] | * [[R]] | ||
Revision as of 02:17, 31 October 2018
Text Vector
text <- c("Because I could not stop for Death -",
"He kindly stopped for me -",
"The Carriage held but just Ourselves -",
"and Immortality")
text
Tidy Text Dataset
install.packages("dplyr")
library(dplyr)
text_df <- data_frame(line = 1:4, text = text)
text_df
Tidy Text Novel
library(janeaustenr)
library(dplyr)
library(stringr)
original_books <- austen_books() %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup()
original_books
Buat menjadi one-token-per-row
library(tidytext)
tidy_books <- original_books %>%
unnest_tokens(word, text)
tidy_books