R: tidytext RPJP BAPPENAS
From OnnoCenterWiki
install.packages("pdftools")
library(pdftools)
rpjp2005 <- pdf_text("RPJP_2005-2025.pdf") %>% strsplit(split = "\n")
original_rpjp2005 <- rpjp2005 %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup()
original_books