<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=R%3A_tidytext%3A_tf-idf_Jane_Austen_novels</id>
	<title>R: tidytext: tf-idf Jane Austen novels - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=R%3A_tidytext%3A_tf-idf_Jane_Austen_novels"/>
	<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=R:_tidytext:_tf-idf_Jane_Austen_novels&amp;action=history"/>
	<updated>2026-04-21T04:41:36Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.45.1</generator>
	<entry>
		<id>https://lms.onnocenter.or.id/wiki/index.php?title=R:_tidytext:_tf-idf_Jane_Austen_novels&amp;diff=57734&amp;oldid=prev</id>
		<title>Onnowpurbo: Created page with &quot; # Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/03-tf-idf.Rmd   library(knitr)  opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)  options(width =...&quot;</title>
		<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=R:_tidytext:_tf-idf_Jane_Austen_novels&amp;diff=57734&amp;oldid=prev"/>
		<updated>2019-12-03T21:42:36Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot; # Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/03-tf-idf.Rmd   library(knitr)  opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)  options(width =...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt; # Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/03-tf-idf.Rmd&lt;br /&gt;
&lt;br /&gt;
 library(knitr)&lt;br /&gt;
 opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)&lt;br /&gt;
 options(width = 100, dplyr.width = 100)&lt;br /&gt;
 library(ggplot2)&lt;br /&gt;
 theme_set(theme_light())&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 library(dplyr)&lt;br /&gt;
 library(janeaustenr)&lt;br /&gt;
 library(tidytext)&lt;br /&gt;
 book_words &amp;lt;- austen_books() %&amp;gt;%&lt;br /&gt;
   unnest_tokens(word, text) %&amp;gt;%&lt;br /&gt;
   count(book, word, sort = TRUE)&lt;br /&gt;
 total_words &amp;lt;- book_words %&amp;gt;% &lt;br /&gt;
   group_by(book) %&amp;gt;% &lt;br /&gt;
   summarize(total = sum(n))&lt;br /&gt;
 book_words &amp;lt;- left_join(book_words, total_words)&lt;br /&gt;
 book_words&lt;br /&gt;
&lt;br /&gt;
 # plot&lt;br /&gt;
 library(ggplot2)&lt;br /&gt;
 ggplot(book_words, aes(n/total, fill = book)) +&lt;br /&gt;
   geom_histogram(show.legend = FALSE) +&lt;br /&gt;
   xlim(NA, 0.0009) +&lt;br /&gt;
   facet_wrap(~book, ncol = 2, scales = &amp;quot;free_y&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 ## Zipf&amp;#039;s law&lt;br /&gt;
 freq_by_rank &amp;lt;- book_words %&amp;gt;% &lt;br /&gt;
   group_by(book) %&amp;gt;% &lt;br /&gt;
   mutate(rank = row_number(), &lt;br /&gt;
          `term frequency` = n/total)&lt;br /&gt;
 freq_by_rank&lt;br /&gt;
&lt;br /&gt;
 # plot&lt;br /&gt;
 freq_by_rank %&amp;gt;% &lt;br /&gt;
   ggplot(aes(rank, `term frequency`, color = book)) + &lt;br /&gt;
   geom_line(size = 1.1, alpha = 0.8, show.legend = FALSE) + &lt;br /&gt;
   scale_x_log10() +&lt;br /&gt;
   scale_y_log10()&lt;br /&gt;
&lt;br /&gt;
 # rank subset&lt;br /&gt;
 rank_subset &amp;lt;- freq_by_rank %&amp;gt;% &lt;br /&gt;
   filter(rank &amp;lt; 500,&lt;br /&gt;
          rank &amp;gt; 10)&lt;br /&gt;
 lm(log10(`term frequency`) ~ log10(rank), data = rank_subset)&lt;br /&gt;
&lt;br /&gt;
 # plot&lt;br /&gt;
 freq_by_rank %&amp;gt;% &lt;br /&gt;
   ggplot(aes(rank, `term frequency`, color = book)) + &lt;br /&gt;
   geom_abline(intercept = -0.62, slope = -1.1, color = &amp;quot;gray50&amp;quot;, linetype = 2) +&lt;br /&gt;
   geom_line(size = 1.1, alpha = 0.8, show.legend = FALSE) + &lt;br /&gt;
   scale_x_log10() +&lt;br /&gt;
   scale_y_log10()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 ## The `bind_tf_idf` function&lt;br /&gt;
 book_words &amp;lt;- book_words %&amp;gt;%&lt;br /&gt;
   bind_tf_idf(word, book, n)&lt;br /&gt;
 book_words&lt;br /&gt;
&lt;br /&gt;
 #  terms with high tf-idf in Jane Austen&amp;#039;s works.&lt;br /&gt;
 book_words %&amp;gt;%&lt;br /&gt;
   select(-total) %&amp;gt;%&lt;br /&gt;
   arrange(desc(tf_idf))&lt;br /&gt;
&lt;br /&gt;
 # plot&lt;br /&gt;
 book_words %&amp;gt;%&lt;br /&gt;
   arrange(desc(tf_idf)) %&amp;gt;%&lt;br /&gt;
   mutate(word = factor(word, levels = rev(unique(word)))) %&amp;gt;% &lt;br /&gt;
   group_by(book) %&amp;gt;% &lt;br /&gt;
   top_n(15) %&amp;gt;% &lt;br /&gt;
   ungroup() %&amp;gt;%&lt;br /&gt;
   ggplot(aes(word, tf_idf, fill = book)) +&lt;br /&gt;
   geom_col(show.legend = FALSE) +&lt;br /&gt;
   labs(x = NULL, y = &amp;quot;tf-idf&amp;quot;) +&lt;br /&gt;
   facet_wrap(~book, ncol = 2, scales = &amp;quot;free&amp;quot;) +&lt;br /&gt;
   coord_flip()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Referensi==&lt;br /&gt;
&lt;br /&gt;
* https://github.com/dgrtwo/tidy-text-mining/blob/master/03-tf-idf.Rmd&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Pranala Menarik==&lt;br /&gt;
&lt;br /&gt;
* [[R]]&lt;/div&gt;</summary>
		<author><name>Onnowpurbo</name></author>
	</entry>
</feed>