drmattmedical

Medical Apps & Things


Extract PDF text

#Python
#How to extract PDF text if no tables
import ghostscript
import PyPDF2
pdfFileObj = open(‘PathToPDFFile’, ‘rb’)
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
pdfReader.numPages
pageObj = pdfReader.getPage(0)
pageObj.extractText()
print(pageObj.extractText(), file=open(‘PathToOutputFile.txt’,’w’)) #'a' for append