Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Wrestle with some horrible edge cases from eBay |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | master | trunk |
Files: | files | file ages | folders |
SHA3-256: |
b9f983566b6aa31f7d6d73059022d90b |
User & Date: | ajv-899-334-8894@vsta.org 2016-11-30 22:50:53 |
Context
2016-12-04
| ||
15:54 | Start reading a folder at *newest*, not oldest check-in: b797c891d4 user: ajv-899-334-8894@vsta.org tags: master, trunk | |
2016-11-30
| ||
22:50 | Wrestle with some horrible edge cases from eBay check-in: b9f983566b user: ajv-899-334-8894@vsta.org tags: master, trunk | |
2016-11-28
| ||
14:44 | Deal with quoting -> blockquote Factor all HTML-ification into its own module check-in: 0d75320618 user: ajv-899-334-8894@vsta.org tags: master, trunk | |
Changes
Changes to imap.py.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
...
382
383
384
385
386
387
388
389
390
391
392
393
394
395
|
#
# imap.py
# imaplib services
#
import imaplib, time, sys, html2text, quopri
from email.header import decode_header
import chore
# Turn an imap BODYSTRUCTURE into a Python list representation
class BodyParser(object):
def __init__(self, s):
self.s = s
self.peeked = None
................................................................................
# Try for textual, then HTML (plus render to text)
t = self._get_type(bods, msgidx, '"text"', '"plain"')
if t is None:
bodhtml = self._get_type(bods, msgidx, '"text"', '"html"')
if bodhtml is None:
return None
t = html2text.html2text(bodhtml)
# Message header fields
tup = self.srv.fetch(msgidx,
'(BODY[HEADER.FIELDS (FROM TO DATE STATUS SUBJECT)])')
if tup[0] != "OK":
sys.stderr.write("%s msg %d failed: %s\n" %
(self.user, msgidx, tup[1]))
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
...
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
|
# # imap.py # imaplib services # import imaplib, time, sys, html2text, quopri, base64, cgi import string from email.header import decode_header import chore # Legal characters in URL URLchars = set() URLchars.update(string.ascii_letters) URLchars.update(string.digits) URLchars.update(".&?=/_,%+-") # html2text will permit broken URL's across lines URLchars.add('\n') # Rewrite without anything which will cause browser actions def _html_sanitize(s): global URLchars for targ in ("http:", "https:"): while targ in s: idx = s.index(targ) contin = idx+len(targ) sl = len(s) while contin < sl: c = s[contin] contin += 1 if c not in URLchars: break s = s[:idx] + '(url)' + s[contin:] return s # Turn an imap BODYSTRUCTURE into a Python list representation class BodyParser(object): def __init__(self, s): self.s = s self.peeked = None ................................................................................ # Try for textual, then HTML (plus render to text) t = self._get_type(bods, msgidx, '"text"', '"plain"') if t is None: bodhtml = self._get_type(bods, msgidx, '"text"', '"html"') if bodhtml is None: return None t = html2text.html2text(bodhtml) # Shed URL's. Some browsers will go out and touch # them, violating your privacy and burning data. t = _html_sanitize(t) # They could embed HTML into the text stream, # naughty. t = cgi.escape(t) # Message header fields tup = self.srv.fetch(msgidx, '(BODY[HEADER.FIELDS (FROM TO DATE STATUS SUBJECT)])') if tup[0] != "OK": sys.stderr.write("%s msg %d failed: %s\n" % (self.user, msgidx, tup[1])) |