Python: Read PDF: Difference between revisions
From OnnoCenterWiki
Jump to navigationJump to search
Onnowpurbo (talk | contribs) No edit summary |
Onnowpurbo (talk | contribs) No edit summary |
||
| Line 1: | Line 1: | ||
Install | |||
pip install PyPDF2 | |||
==Script Cek Jumlah Halaman== | |||
# importing all the required modules | # importing all the required modules | ||
| Line 17: | Line 17: | ||
print(fileReader.numPages) | print(fileReader.numPages) | ||
==Script Print Satu Page== | |||
# extracting_text.py | |||
from PyPDF2 import PdfFileReader | |||
def text_extractor(path): | |||
with open(path, 'rb') as f: | |||
pdf = PdfFileReader(f) | |||
# get the first page | |||
page = pdf.getPage(1) | |||
print(page) | |||
print('Page type: {}'.format(str(type(page)))) | |||
text = page.extractText() | |||
print(text) | |||
if __name__ == '__main__': | |||
path = 'reportlab-sample.pdf' | |||
text_extractor(path) | |||
Latest revision as of 09:22, 28 October 2018
Install
pip install PyPDF2
Script Cek Jumlah Halaman
# importing all the required modules import PyPDF2
# creating an object
file = open('example.pdf', 'rb')
# creating a pdf reader object fileReader = PyPDF2.PdfFileReader(file)
# print the number of pages in pdf file print(fileReader.numPages)
Script Print Satu Page
# extracting_text.py
from PyPDF2 import PdfFileReader
def text_extractor(path):
with open(path, 'rb') as f:
pdf = PdfFileReader(f)
# get the first page
page = pdf.getPage(1)
print(page)
print('Page type: {}'.format(str(type(page))))
text = page.extractText()
print(text)
if __name__ == '__main__':
path = 'reportlab-sample.pdf'
text_extractor(path)