drmattmedical

Medical Apps & Things


How to get table data (sometimes) from PDF

#Python
#How to (try to) extract PDF tables
import camelot
import ghostscript
tables = camelot.read_pdf('PathToPDFFile', 'rb')
tables #Gives number of tables
tables.export('foo.csv', f='csv', compress=True) #to json, excel, html
tables[0] #Gives shape of table
tables[0].parsing_report #Gives data regarding table
tables[0].to_csv('foo.csv') #to_json, to_excel, to_html
tables[0].df #To a pandas DataFrame