R: ngram dan frekuensi-nya: Difference between revisions
From OnnoCenterWiki
Jump to navigationJump to search
Onnowpurbo (talk | contribs) Created page with " library(tau) library(data.table) # given a string vector and size of ngrams this function returns word ngrams with corresponding frequencies createNgram <-function..." |
Onnowpurbo (talk | contribs) No edit summary |
||
| (2 intermediate revisions by the same user not shown) | |||
| Line 3: | Line 3: | ||
library(tau) | library(tau) | ||
library(data.table) | library(data.table) | ||
createNgram <-function(stringVector, ngramSize){ | createNgram <-function(stringVector, ngramSize){ | ||
| Line 16: | Line 16: | ||
return(ngram) | return(ngram) | ||
} | } | ||
text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter." | text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter." | ||
# | |||
text <- readtext("out.txt") | |||
res <- createNgram(text, 2) | res <- createNgram(text, 2) | ||
res | res | ||
# sort | |||
res_sort <- res[order(-freq)] | |||
res_sort | |||
head(res_sort,n=50) | |||
==Pranala Menarik== | |||
* [[R]] | |||
Latest revision as of 01:22, 5 November 2018
library(tau) library(data.table)
createNgram <-function(stringVector, ngramSize){
ngram <- data.table()
ng <- textcnt(stringVector, method = "string", n=ngramSize, tolower = FALSE)
if(ngramSize==1){
ngram <- data.table(w1 = names(ng), freq = unclass(ng), length=nchar(names(ng)))
}
else {
ngram <- data.table(w1w2 = names(ng), freq = unclass(ng), length=nchar(names(ng)))
}
return(ngram)
}
text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter."
#
text <- readtext("out.txt")
res <- createNgram(text, 2)
res
# sort res_sort <- res[order(-freq)] res_sort head(res_sort,n=50)