Skip to content

Instantly share code, notes, and snippets.

View documentprocessing's full-sized avatar

Document Processing documentprocessing

View GitHub Profile
import extract_msg
# Specify the path to your .msg file
msg_file_path = 'path/to/your/file.msg'
# Specify the directory where attachments should be saved
save_directory = 'path/to/save/attachments/'
# Open the MSG file
with extract_msg.openMsg(msg_file_path) as msg:
# Loop through attachments
import extract_msg
# open message file
msg = extract_msg.openMsg("input.msg")
# print sender name
print('Sender: {}'.format(msg.sender))
# print date
print('Sent On: {}'.format(msg.date))
# print subject
print('Subject: {}'.format(msg.subject))
# print body
@documentprocessing
documentprocessing / extract-tables-from-pdf-pdf2docx.py
Last active April 27, 2024 09:38
Extract tables from PDF file using pdf2docx API
from pdf2docx import Converter
pdf_file = 'sample_pdf.pdf'
converter = Converter(pdf_file)
tables = converter.extract_tables(start=0, end=1)
converter.close()
#loop through the tables to print these
for table in tables:
@documentprocessing
documentprocessing / convert-specific-pages-of-pdf-to-docx-pdf2docx.py
Created April 27, 2024 07:01
Convert specific PDF pages to DOCX using pdf2docx API in Python
from pdf2docx import Converter
# PDF file to be converted
pdf_file = 'example.pdf'
# Output DOCX file
docx_file = 'output.docx'
# Create a PDF to DOCX converter object
cv = Converter(pdf_file)
@documentprocessing
documentprocessing / convert-pdf-to-docx-using-pdf2docx-api.py
Created April 27, 2024 06:53
Convert PDF files to DOCX with pdf2docx
from pdf2docx import Converter
# Specify the PDF file to convert
pdf_file = 'example.pdf'
docx_file = 'output.docx'
# Create a Converter object and convert the PDF to DOCX
cv = Converter(pdf_file)
cv.convert(docx_file, start=0, end=None)
cv.close()
from pptx import Presentation
prs = Presentation()
slide_with_bullet_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_with_bullet_layout)
shapes = slide.shapes
title_shape = shapes.title
body_shape = shapes.placeholders[1]
@documentprocessing
documentprocessing / add-textbox-to-presentation-with-python-ptpx.py
Created April 21, 2024 18:08
Add Textbox to Presentation with python-pptx
from pptx import Presentation
from pptx.util import Inches, Pt
presentation = Presentation()
blank_slide_layout = presentation.slide_layouts[6]
slide = presentation.slides.add_slide(blank_slide_layout)
#set the dimensions of the text box
left = top = width = height = Inches(1)
txBox = slide.shapes.add_textbox(left, top, width, height)
@documentprocessing
documentprocessing / read-pptx.py
Created April 21, 2024 18:01
Read PPTX with python-pptx
from pptx import Presentation
presentation = Presentation('path_to_presentation_file.pptx')
presentation.save('paht_to_updated_pptx_file.pptx')
@documentprocessing
documentprocessing / create-pptx-with-python-pptx.py
Last active April 21, 2024 18:02
Create PowerPoint PPTX with python-pptx
# import Presentation class from python-pptx library
from pptx import Presentation
# Creating presentation object
root_Presentation = Presentation()
# Creating slide layout
layout_slide = root_Presentation.slide_layouts[0]
#Create a slide object and attach it to the Presentation
@documentprocessing
documentprocessing / udpate-existing-column-in-XLS-with-Pyexcel-XLS.py
Created March 26, 2024 19:11
Update existing Column in XLS file with Pyexcel-XLS
import pyexcel as p
#open a sample Excel file
sheet = p.get_sheet(file_name="example.xls")
#update data in existing column
sheet.column[2] = ["Column 3", 100, 200, 300]
#Save XLS file to disc
sheet.save_as("updateexisting.xls")