Created
June 18, 2019 15:28
-
-
Save vbernardes/529679286441f5982da7122f875e8277 to your computer and use it in GitHub Desktop.
Function for returning lists of email addresses in To/From fields of a message
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
email_format_pattern = re.compile(r'([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+)') | |
def get_email_addresses(message): | |
''' | |
Given a string containing raw email text, returns a dict with lists of | |
email addresses contained in the address fields. | |
Args: | |
message: str containing raw email content | |
Returns: | |
dict: dictionary with email fields as keys and list of | |
addresses as the value | |
''' | |
address_fields = ['From:', 'To:', 'Cc:', 'Bcc:'] | |
addresses = {} | |
for field in address_fields: | |
field_pattern = re.compile(field+'([^:]*):', flags=re.DOTALL) # include '\n' in matches | |
field_match = field_pattern.search(message) # find everything until next field (':') | |
# extract only emails from field match | |
emails_list = email_format_pattern.findall(field_match.group(1)) | |
addresses[field] = emails_list | |
return addresses |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment