R: tidytext NASA data: Difference between revisions
From OnnoCenterWiki
Jump to navigationJump to search
Onnowpurbo (talk | contribs) No edit summary |
Onnowpurbo (talk | contribs) No edit summary |
||
| Line 30: | Line 30: | ||
nasa_keyword | nasa_keyword | ||
library(tidytext) | |||
nasa_title <- nasa_title %>% | |||
unnest_tokens(word, title) %>% | |||
anti_join(stop_words) | |||
nasa_desc <- nasa_desc %>% | |||
unnest_tokens(word, desc) %>% | |||
anti_join(stop_words) | |||
nasa_title | |||
nasa_desc | |||
Revision as of 01:13, 8 November 2018
library(jsonlite)
metadata <- fromJSON("https://data.nasa.gov/data.json")
names(metadata$dataset)
class(metadata$dataset$title) class(metadata$dataset$description) class(metadata$dataset$keyword)
# ambil judul2
library(dplyr)
nasa_title <- data_frame(id = metadata$dataset$`_id`$`$oid`,
title = metadata$dataset$title)
nasa_title
# sampling 5 line
nasa_desc <- data_frame(id = metadata$dataset$`_id`$`$oid`,
desc = metadata$dataset$description)
nasa_desc %>%
select(desc) %>%
sample_n(5)
# cari keyword
library(tidyr)
nasa_keyword <- data_frame(id = metadata$dataset$`_id`$`$oid`,
keyword = metadata$dataset$keyword) %>%
unnest(keyword)
nasa_keyword
library(tidytext)
nasa_title <- nasa_title %>%
unnest_tokens(word, title) %>%
anti_join(stop_words)
nasa_desc <- nasa_desc %>%
unnest_tokens(word, desc) %>%
anti_join(stop_words)
nasa_title nasa_desc