所以我需要为我的一个项目创建一个 csv 文件,但我需要的消息数据位于其他扩展名为 0001.ea7e79d3153e7469e7a9c3e0af6a357e 的小文件中。 我如何编写脚本,以便它遍历所有此类文件、读取数据并获取所需的消息并将其写入不同的 csv 文件。 我需要 3 列:邮件的发件人和发件人、主题和内容 我有一个代码会转到指定的文件夹,但不会转到其中的文件
import os
import email
import csv
rootdir = 'E:\Masters\2nd Sem\Advance data mining\spamassasin\Dataset file\easy_ham'
with open('dataset.csv', 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['To', 'From', 'Subject', 'Content']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for subdir, dirs, files in os.walk(rootdir):
for file in files:
filepath = os.path.join(subdir, file)
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
try:
msg = email.message_from_file(f)
to = msg['To']
frm = msg['From']
subject = msg['Subject']
content = ''
if msg.is_multipart():
for part in msg.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
if ctype == 'text/plain' and 'attachment' not in cdispo:
content += str(part.get_payload(decode=True))
else:
content = str(msg.get_payload(decode=True))
writer.writerow({'To': to, 'From': frm, 'Subject': subject, 'Content': content})
except Exception as e:
print('Error: {0} - {1}'.format(file, e))