<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=R%3A_tidytext%3A_compare_text</id>
	<title>R: tidytext: compare text - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=R%3A_tidytext%3A_compare_text"/>
	<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=R:_tidytext:_compare_text&amp;action=history"/>
	<updated>2026-04-21T04:38:25Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.45.1</generator>
	<entry>
		<id>https://lms.onnocenter.or.id/wiki/index.php?title=R:_tidytext:_compare_text&amp;diff=57714&amp;oldid=prev</id>
		<title>Onnowpurbo: Created page with &quot; # Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/01-tidy-text.Rmd   library(knitr)  opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)  options(width...&quot;</title>
		<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=R:_tidytext:_compare_text&amp;diff=57714&amp;oldid=prev"/>
		<updated>2019-12-03T00:16:39Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot; # Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/01-tidy-text.Rmd   library(knitr)  opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)  options(width...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt; # Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/01-tidy-text.Rmd&lt;br /&gt;
&lt;br /&gt;
 library(knitr)&lt;br /&gt;
 opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)&lt;br /&gt;
 options(width = 100, dplyr.width = 100)&lt;br /&gt;
 library(ggplot2)&lt;br /&gt;
 theme_set(theme_light())&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 # Jane Austen&lt;br /&gt;
 library(janeaustenr)&lt;br /&gt;
 library(dplyr)&lt;br /&gt;
 library(stringr)&lt;br /&gt;
 original_books &amp;lt;- austen_books() %&amp;gt;%&lt;br /&gt;
   group_by(book) %&amp;gt;%&lt;br /&gt;
   mutate(linenumber = row_number(),&lt;br /&gt;
          chapter = cumsum(str_detect(text, regex(&amp;quot;^chapter [\\divxlc]&amp;quot;,&lt;br /&gt;
                                                  ignore_case = TRUE)))) %&amp;gt;%&lt;br /&gt;
   ungroup()&lt;br /&gt;
 original_books&lt;br /&gt;
 library(tidytext)&lt;br /&gt;
 tidy_books &amp;lt;- original_books %&amp;gt;%&lt;br /&gt;
   unnest_tokens(word, text)&lt;br /&gt;
 tidy_books&lt;br /&gt;
 data(stop_words)&lt;br /&gt;
 tidy_books &amp;lt;- tidy_books %&amp;gt;%&lt;br /&gt;
   anti_join(stop_words)&lt;br /&gt;
 tidy_books %&amp;gt;%&lt;br /&gt;
   count(word, sort = TRUE) &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 # h.g.wells&lt;br /&gt;
 library(gutenbergr)&lt;br /&gt;
 hgwells &amp;lt;- gutenberg_download(c(35, 36, 5230, 159))&lt;br /&gt;
 tidy_hgwells &amp;lt;- hgwells %&amp;gt;%&lt;br /&gt;
   unnest_tokens(word, text) %&amp;gt;%&lt;br /&gt;
   anti_join(stop_words)&lt;br /&gt;
 tidy_hgwells %&amp;gt;%&lt;br /&gt;
   count(word, sort = TRUE)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 # bronte&lt;br /&gt;
 bronte &amp;lt;- gutenberg_download(c(1260, 768, 969, 9182, 767))&lt;br /&gt;
 tidy_bronte &amp;lt;- bronte %&amp;gt;%&lt;br /&gt;
   unnest_tokens(word, text) %&amp;gt;%&lt;br /&gt;
   anti_join(stop_words)&lt;br /&gt;
 tidy_bronte %&amp;gt;%&lt;br /&gt;
   count(word, sort = TRUE)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 # calculate the frequency for each word for the works of Jane Austen, the Brontë sisters, and H.G. Wells by binding the data frames together.&lt;br /&gt;
 # We can use `spread` and `gather` from tidyr to reshape our dataframe&lt;br /&gt;
 library(tidyr)&lt;br /&gt;
 frequency &amp;lt;- bind_rows(mutate(tidy_bronte, author = &amp;quot;Brontë Sisters&amp;quot;),&lt;br /&gt;
                        mutate(tidy_hgwells, author = &amp;quot;H.G. Wells&amp;quot;), &lt;br /&gt;
                        mutate(tidy_books, author = &amp;quot;Jane Austen&amp;quot;)) %&amp;gt;% &lt;br /&gt;
   mutate(word = str_extract(word, &amp;quot;[a-z&amp;#039;]+&amp;quot;)) %&amp;gt;%&lt;br /&gt;
   count(author, word) %&amp;gt;%&lt;br /&gt;
   group_by(author) %&amp;gt;%&lt;br /&gt;
   mutate(proportion = n / sum(n)) %&amp;gt;% &lt;br /&gt;
   select(-n) %&amp;gt;% &lt;br /&gt;
   spread(author, proportion) %&amp;gt;% &lt;br /&gt;
   gather(author, proportion, `Brontë Sisters`:`H.G. Wells`)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 # let&amp;#039;s plot (Figure&lt;br /&gt;
 library(scales)&lt;br /&gt;
 # expect a warning about rows with missing values being removed&lt;br /&gt;
 ggplot(frequency, aes(x = proportion, y = `Jane Austen`, color = abs(`Jane Austen` - proportion))) +&lt;br /&gt;
   geom_abline(color = &amp;quot;gray40&amp;quot;, lty = 2) +&lt;br /&gt;
   geom_jitter(alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +&lt;br /&gt;
   geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +&lt;br /&gt;
   scale_x_log10(labels = percent_format()) +&lt;br /&gt;
   scale_y_log10(labels = percent_format()) +&lt;br /&gt;
   scale_color_gradient(limits = c(0, 0.001), low = &amp;quot;darkslategray4&amp;quot;, high = &amp;quot;gray75&amp;quot;) +&lt;br /&gt;
   facet_wrap(~author, ncol = 2) +&lt;br /&gt;
   theme(legend.position=&amp;quot;none&amp;quot;) +&lt;br /&gt;
   labs(y = &amp;quot;Jane Austen&amp;quot;, x = NULL)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 # how similar and different these sets of word frequencies are using a correlation test&lt;br /&gt;
 cor.test(data = frequency[frequency$author == &amp;quot;Brontë Sisters&amp;quot;,],&lt;br /&gt;
          ~ proportion + `Jane Austen`)&lt;br /&gt;
 cor.test(data = frequency[frequency$author == &amp;quot;H.G. Wells&amp;quot;,], &lt;br /&gt;
          ~ proportion + `Jane Austen`) &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Referensi==&lt;br /&gt;
&lt;br /&gt;
* https://github.com/dgrtwo/tidy-text-mining/blob/master/01-tidy-text.Rmd&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Pranala Menarik==&lt;br /&gt;
&lt;br /&gt;
* [[R]]&lt;/div&gt;</summary>
		<author><name>Onnowpurbo</name></author>
	</entry>
</feed>