R: read multi PDF ke tidytext
From OnnoCenterWiki
library(tidyverse) library(tidytext) library(tm) directory <- "data-pdf"
# create corpus from pdfs converted <- VCorpus(DirSource(directory), readerControl = list(reader = readPDF)) %>% DocumentTermMatrix()
converted %>%
tidy() %>%
filter(!grepl("[0-9]+", term))