从消息文件中提取电子邮件地址 -Python

问题描述 投票:0回答:2

我正在尝试编写一个脚本,从 Outlook .msg 文件中提取电子邮件地址。但是,某些 .msg 文件可能附加有另一个 .msg 文件。

def extract_emails_from_folder(folder_path, excluded_email):
# Create an empty set to store extracted email addresses
email_addresses = set()

# Iterate through each file in the folder
for filename in os.listdir(folder_path):
    # Check if the file is an MSG file
    if filename.endswith('.msg'):
        file_path = os.path.join(folder_path, filename)
        # Extract email addresses from the MSG file
        email_addresses.update(extract_emails_from_msg(file_path, excluded_email))

# Create a DataFrame to store the email addresses
df = pd.DataFrame({'Email Addresses': list(email_addresses)})
print(df)

.msg 文件中的某些电子邮件地址附加在另一个 .msg 文件中。我可以知道如何也提取这些电子邮件地址吗?

python email-attachments
2个回答
0
投票

您可以使用

extract-msg
库:

import os
import re
import extract_msg
def extract_emails_from_msg(file_path, excluded_email):
    email_addresses = set()
    with extract_msg.Message(file_path) as msg:
        email_addresses.add(msg.sender) 
        email_addresses.update(msg.to)
        email_addresses.update(msg.cc)
        email_addresses.update(msg.bcc)
        email_addresses.update(re.findall(r'[\w\.-]+@[\w\.-]+', msg.body))
        for att in msg.attachments:
            if att.longFilename.endswith('.msg'):
                att_filename = os.path.join('/tmp', att.longFilename)
                att.saveCustom(att_filename)
                email_addresses.update(extract_emails_from_msg(att_filename, excluded_email))
                os.remove(att_filename)
    email_addresses.discard(excluded_email)
    return email_addresses
def extract_emails_from_folder(folder_path, excluded_email):
    email_addresses = set()
    for filename in os.listdir(folder_path):
        if filename.endswith('.msg'):
            file_path = os.path.join(folder_path, filename)
            email_addresses.update(extract_emails_from_msg(file_path, excluded_email))
    return email_addresses
folder_path = '/path/to/your/folder'
excluded_email = '[email protected]'
emails = extract_emails_from_folder(folder_path, excluded_email)
print(emails)

不要忘记安装库🤠:

pip install extract-msg

0
投票

您可以使用“extract_msg”库

pip install extract-msg

from extract_msg import Message

def extract_emails_from_folder(folder_path, excluded_email):
    try:
        msg = Message(folder_path)
        if(msg.sender_email not in exlcude_email) and (msg.receiver_email not in exlcude_email) and (msg.cc not in exlcude_email):
            return [msg.sender_email, msg.receiver_email, msg.cc]
        else:
            return []
    except Exception as e:
            print(f"Error extracting email addresses from {folder_path}: {e}")
            return []

excluded_email = ["Your Excluded Mails here"]
email_addresses = set()
# Iterate through each file in the folder
for filename in os.listdir(folder_path):
    # Check if the file is an MSG file
    if filename.endswith('.msg'):
        file_path = os.path.join(folder_path, filename)
        # Extract email addresses from the MSG file
        extracted_mail = extract_emails_from_msg(file_path, excluded_email)
        if(extracted_mail==[]):
            pass
        email_addresses.update(extracted_mail)

# Create a DataFrame to store the email addresses
df = pd.DataFrame({'Email Addresses': list(email_addresses)})
print(df)
© www.soinside.com 2019 - 2024. All rights reserved.