CoCalc -- email-read-alternative.py

GitHub Repository: allendowney/cpython
Path: blob/main/Doc/includes/email-read-alternative.py
¹² views
1
import os
2
import sys
3
import tempfile
4
import mimetypes
5
import webbrowser
6

7
# Import the email modules we'll need
8
from email import policy
9
from email.parser import BytesParser
10

11

12
def magic_html_parser(html_text, partfiles):
13
    """Return safety-sanitized html linked to partfiles.
14

15
    Rewrite the href="cid:...." attributes to point to the filenames in partfiles.
16
    Though not trivial, this should be possible using html.parser.
17
    """
18
    raise NotImplementedError("Add the magic needed")
19

20

21
# In a real program you'd get the filename from the arguments.
22
with open('outgoing.msg', 'rb') as fp:
23
    msg = BytesParser(policy=policy.default).parse(fp)
24

25
# Now the header items can be accessed as a dictionary, and any non-ASCII will
26
# be converted to unicode:
27
print('To:', msg['to'])
28
print('From:', msg['from'])
29
print('Subject:', msg['subject'])
30

31
# If we want to print a preview of the message content, we can extract whatever
32
# the least formatted payload is and print the first three lines.  Of course,
33
# if the message has no plain text part printing the first three lines of html
34
# is probably useless, but this is just a conceptual example.
35
simplest = msg.get_body(preferencelist=('plain', 'html'))
36
print()
37
print(''.join(simplest.get_content().splitlines(keepends=True)[:3]))
38

39
ans = input("View full message?")
40
if ans.lower()[0] == 'n':
41
    sys.exit()
42

43
# We can extract the richest alternative in order to display it:
44
richest = msg.get_body()
45
partfiles = {}
46
if richest['content-type'].maintype == 'text':
47
    if richest['content-type'].subtype == 'plain':
48
        for line in richest.get_content().splitlines():
49
            print(line)
50
        sys.exit()
51
    elif richest['content-type'].subtype == 'html':
52
        body = richest
53
    else:
54
        print("Don't know how to display {}".format(richest.get_content_type()))
55
        sys.exit()
56
elif richest['content-type'].content_type == 'multipart/related':
57
    body = richest.get_body(preferencelist=('html'))
58
    for part in richest.iter_attachments():
59
        fn = part.get_filename()
60
        if fn:
61
            extension = os.path.splitext(part.get_filename())[1]
62
        else:
63
            extension = mimetypes.guess_extension(part.get_content_type())
64
        with tempfile.NamedTemporaryFile(suffix=extension, delete=False) as f:
65
            f.write(part.get_content())
66
            # again strip the <> to go from email form of cid to html form.
67
            partfiles[part['content-id'][1:-1]] = f.name
68
else:
69
    print("Don't know how to display {}".format(richest.get_content_type()))
70
    sys.exit()
71
with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
72
    f.write(magic_html_parser(body.get_content(), partfiles))
73
webbrowser.open(f.name)
74
os.remove(f.name)
75
for fn in partfiles.values():
76
    os.remove(fn)
77

78
# Of course, there are lots of email messages that could break this simple
79
# minded program, but it will handle the most common ones.
80

81
Product

Resources

Company