asd
This commit is contained in:
		| @ -0,0 +1,26 @@ | ||||
| """ | ||||
| future.backports package | ||||
| """ | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import sys | ||||
|  | ||||
| __future_module__ = True | ||||
| from future.standard_library import import_top_level_modules | ||||
|  | ||||
|  | ||||
| if sys.version_info[0] >= 3: | ||||
|     import_top_level_modules() | ||||
|  | ||||
|  | ||||
| from .misc import (ceil, | ||||
|                    OrderedDict, | ||||
|                    Counter, | ||||
|                    ChainMap, | ||||
|                    check_output, | ||||
|                    count, | ||||
|                    recursive_repr, | ||||
|                    _count_elements, | ||||
|                    cmp_to_key | ||||
|                   ) | ||||
| @ -0,0 +1,422 @@ | ||||
| """Shared support for scanning document type declarations in HTML and XHTML. | ||||
|  | ||||
| Backported for python-future from Python 3.3. Reason: ParserBase is an | ||||
| old-style class in the Python 2.7 source of markupbase.py, which I suspect | ||||
| might be the cause of sporadic unit-test failures on travis-ci.org with | ||||
| test_htmlparser.py.  The test failures look like this: | ||||
|  | ||||
|     ====================================================================== | ||||
|  | ||||
| ERROR: test_attr_entity_replacement (future.tests.test_htmlparser.AttributesStrictTestCase) | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| Traceback (most recent call last): | ||||
|   File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 661, in test_attr_entity_replacement | ||||
|     [("starttag", "a", [("b", "&><\"'")])]) | ||||
|   File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 93, in _run_check | ||||
|     collector = self.get_collector() | ||||
|   File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 617, in get_collector | ||||
|     return EventCollector(strict=True) | ||||
|   File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 27, in __init__ | ||||
|     html.parser.HTMLParser.__init__(self, *args, **kw) | ||||
|   File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 135, in __init__ | ||||
|     self.reset() | ||||
|   File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 143, in reset | ||||
|     _markupbase.ParserBase.reset(self) | ||||
|  | ||||
| TypeError: unbound method reset() must be called with ParserBase instance as first argument (got EventCollector instance instead) | ||||
|  | ||||
| This module is used as a foundation for the html.parser module.  It has no | ||||
| documented public API and should not be used directly. | ||||
|  | ||||
| """ | ||||
|  | ||||
| import re | ||||
|  | ||||
| _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match | ||||
| _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match | ||||
| _commentclose = re.compile(r'--\s*>') | ||||
| _markedsectionclose = re.compile(r']\s*]\s*>') | ||||
|  | ||||
| # An analysis of the MS-Word extensions is available at | ||||
| # http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf | ||||
|  | ||||
| _msmarkedsectionclose = re.compile(r']\s*>') | ||||
|  | ||||
| del re | ||||
|  | ||||
|  | ||||
| class ParserBase(object): | ||||
|     """Parser base class which provides some common support methods used | ||||
|     by the SGML/HTML and XHTML parsers.""" | ||||
|  | ||||
|     def __init__(self): | ||||
|         if self.__class__ is ParserBase: | ||||
|             raise RuntimeError( | ||||
|                 "_markupbase.ParserBase must be subclassed") | ||||
|  | ||||
|     def error(self, message): | ||||
|         raise NotImplementedError( | ||||
|             "subclasses of ParserBase must override error()") | ||||
|  | ||||
|     def reset(self): | ||||
|         self.lineno = 1 | ||||
|         self.offset = 0 | ||||
|  | ||||
|     def getpos(self): | ||||
|         """Return current line number and offset.""" | ||||
|         return self.lineno, self.offset | ||||
|  | ||||
|     # Internal -- update line number and offset.  This should be | ||||
|     # called for each piece of data exactly once, in order -- in other | ||||
|     # words the concatenation of all the input strings to this | ||||
|     # function should be exactly the entire input. | ||||
|     def updatepos(self, i, j): | ||||
|         if i >= j: | ||||
|             return j | ||||
|         rawdata = self.rawdata | ||||
|         nlines = rawdata.count("\n", i, j) | ||||
|         if nlines: | ||||
|             self.lineno = self.lineno + nlines | ||||
|             pos = rawdata.rindex("\n", i, j) # Should not fail | ||||
|             self.offset = j-(pos+1) | ||||
|         else: | ||||
|             self.offset = self.offset + j-i | ||||
|         return j | ||||
|  | ||||
|     _decl_otherchars = '' | ||||
|  | ||||
|     # Internal -- parse declaration (for use by subclasses). | ||||
|     def parse_declaration(self, i): | ||||
|         # This is some sort of declaration; in "HTML as | ||||
|         # deployed," this should only be the document type | ||||
|         # declaration ("<!DOCTYPE html...>"). | ||||
|         # ISO 8879:1986, however, has more complex | ||||
|         # declaration syntax for elements in <!...>, including: | ||||
|         # --comment-- | ||||
|         # [marked section] | ||||
|         # name in the following list: ENTITY, DOCTYPE, ELEMENT, | ||||
|         # ATTLIST, NOTATION, SHORTREF, USEMAP, | ||||
|         # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM | ||||
|         rawdata = self.rawdata | ||||
|         j = i + 2 | ||||
|         assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" | ||||
|         if rawdata[j:j+1] == ">": | ||||
|             # the empty comment <!> | ||||
|             return j + 1 | ||||
|         if rawdata[j:j+1] in ("-", ""): | ||||
|             # Start of comment followed by buffer boundary, | ||||
|             # or just a buffer boundary. | ||||
|             return -1 | ||||
|         # A simple, practical version could look like: ((name|stringlit) S*) + '>' | ||||
|         n = len(rawdata) | ||||
|         if rawdata[j:j+2] == '--': #comment | ||||
|             # Locate --.*-- as the body of the comment | ||||
|             return self.parse_comment(i) | ||||
|         elif rawdata[j] == '[': #marked section | ||||
|             # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section | ||||
|             # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA | ||||
|             # Note that this is extended by Microsoft Office "Save as Web" function | ||||
|             # to include [if...] and [endif]. | ||||
|             return self.parse_marked_section(i) | ||||
|         else: #all other declaration elements | ||||
|             decltype, j = self._scan_name(j, i) | ||||
|         if j < 0: | ||||
|             return j | ||||
|         if decltype == "doctype": | ||||
|             self._decl_otherchars = '' | ||||
|         while j < n: | ||||
|             c = rawdata[j] | ||||
|             if c == ">": | ||||
|                 # end of declaration syntax | ||||
|                 data = rawdata[i+2:j] | ||||
|                 if decltype == "doctype": | ||||
|                     self.handle_decl(data) | ||||
|                 else: | ||||
|                     # According to the HTML5 specs sections "8.2.4.44 Bogus | ||||
|                     # comment state" and "8.2.4.45 Markup declaration open | ||||
|                     # state", a comment token should be emitted. | ||||
|                     # Calling unknown_decl provides more flexibility though. | ||||
|                     self.unknown_decl(data) | ||||
|                 return j + 1 | ||||
|             if c in "\"'": | ||||
|                 m = _declstringlit_match(rawdata, j) | ||||
|                 if not m: | ||||
|                     return -1 # incomplete | ||||
|                 j = m.end() | ||||
|             elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": | ||||
|                 name, j = self._scan_name(j, i) | ||||
|             elif c in self._decl_otherchars: | ||||
|                 j = j + 1 | ||||
|             elif c == "[": | ||||
|                 # this could be handled in a separate doctype parser | ||||
|                 if decltype == "doctype": | ||||
|                     j = self._parse_doctype_subset(j + 1, i) | ||||
|                 elif decltype in set(["attlist", "linktype", "link", "element"]): | ||||
|                     # must tolerate []'d groups in a content model in an element declaration | ||||
|                     # also in data attribute specifications of attlist declaration | ||||
|                     # also link type declaration subsets in linktype declarations | ||||
|                     # also link attribute specification lists in link declarations | ||||
|                     self.error("unsupported '[' char in %s declaration" % decltype) | ||||
|                 else: | ||||
|                     self.error("unexpected '[' char in declaration") | ||||
|             else: | ||||
|                 self.error( | ||||
|                     "unexpected %r char in declaration" % rawdata[j]) | ||||
|             if j < 0: | ||||
|                 return j | ||||
|         return -1 # incomplete | ||||
|  | ||||
|     # Internal -- parse a marked section | ||||
|     # Override this to handle MS-word extension syntax <![if word]>content<![endif]> | ||||
|     def parse_marked_section(self, i, report=1): | ||||
|         rawdata= self.rawdata | ||||
|         assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()" | ||||
|         sectName, j = self._scan_name( i+3, i ) | ||||
|         if j < 0: | ||||
|             return j | ||||
|         if sectName in set(["temp", "cdata", "ignore", "include", "rcdata"]): | ||||
|             # look for standard ]]> ending | ||||
|             match= _markedsectionclose.search(rawdata, i+3) | ||||
|         elif sectName in set(["if", "else", "endif"]): | ||||
|             # look for MS Office ]> ending | ||||
|             match= _msmarkedsectionclose.search(rawdata, i+3) | ||||
|         else: | ||||
|             self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) | ||||
|         if not match: | ||||
|             return -1 | ||||
|         if report: | ||||
|             j = match.start(0) | ||||
|             self.unknown_decl(rawdata[i+3: j]) | ||||
|         return match.end(0) | ||||
|  | ||||
|     # Internal -- parse comment, return length or -1 if not terminated | ||||
|     def parse_comment(self, i, report=1): | ||||
|         rawdata = self.rawdata | ||||
|         if rawdata[i:i+4] != '<!--': | ||||
|             self.error('unexpected call to parse_comment()') | ||||
|         match = _commentclose.search(rawdata, i+4) | ||||
|         if not match: | ||||
|             return -1 | ||||
|         if report: | ||||
|             j = match.start(0) | ||||
|             self.handle_comment(rawdata[i+4: j]) | ||||
|         return match.end(0) | ||||
|  | ||||
|     # Internal -- scan past the internal subset in a <!DOCTYPE declaration, | ||||
|     # returning the index just past any whitespace following the trailing ']'. | ||||
|     def _parse_doctype_subset(self, i, declstartpos): | ||||
|         rawdata = self.rawdata | ||||
|         n = len(rawdata) | ||||
|         j = i | ||||
|         while j < n: | ||||
|             c = rawdata[j] | ||||
|             if c == "<": | ||||
|                 s = rawdata[j:j+2] | ||||
|                 if s == "<": | ||||
|                     # end of buffer; incomplete | ||||
|                     return -1 | ||||
|                 if s != "<!": | ||||
|                     self.updatepos(declstartpos, j + 1) | ||||
|                     self.error("unexpected char in internal subset (in %r)" % s) | ||||
|                 if (j + 2) == n: | ||||
|                     # end of buffer; incomplete | ||||
|                     return -1 | ||||
|                 if (j + 4) > n: | ||||
|                     # end of buffer; incomplete | ||||
|                     return -1 | ||||
|                 if rawdata[j:j+4] == "<!--": | ||||
|                     j = self.parse_comment(j, report=0) | ||||
|                     if j < 0: | ||||
|                         return j | ||||
|                     continue | ||||
|                 name, j = self._scan_name(j + 2, declstartpos) | ||||
|                 if j == -1: | ||||
|                     return -1 | ||||
|                 if name not in set(["attlist", "element", "entity", "notation"]): | ||||
|                     self.updatepos(declstartpos, j + 2) | ||||
|                     self.error( | ||||
|                         "unknown declaration %r in internal subset" % name) | ||||
|                 # handle the individual names | ||||
|                 meth = getattr(self, "_parse_doctype_" + name) | ||||
|                 j = meth(j, declstartpos) | ||||
|                 if j < 0: | ||||
|                     return j | ||||
|             elif c == "%": | ||||
|                 # parameter entity reference | ||||
|                 if (j + 1) == n: | ||||
|                     # end of buffer; incomplete | ||||
|                     return -1 | ||||
|                 s, j = self._scan_name(j + 1, declstartpos) | ||||
|                 if j < 0: | ||||
|                     return j | ||||
|                 if rawdata[j] == ";": | ||||
|                     j = j + 1 | ||||
|             elif c == "]": | ||||
|                 j = j + 1 | ||||
|                 while j < n and rawdata[j].isspace(): | ||||
|                     j = j + 1 | ||||
|                 if j < n: | ||||
|                     if rawdata[j] == ">": | ||||
|                         return j | ||||
|                     self.updatepos(declstartpos, j) | ||||
|                     self.error("unexpected char after internal subset") | ||||
|                 else: | ||||
|                     return -1 | ||||
|             elif c.isspace(): | ||||
|                 j = j + 1 | ||||
|             else: | ||||
|                 self.updatepos(declstartpos, j) | ||||
|                 self.error("unexpected char %r in internal subset" % c) | ||||
|         # end of buffer reached | ||||
|         return -1 | ||||
|  | ||||
|     # Internal -- scan past <!ELEMENT declarations | ||||
|     def _parse_doctype_element(self, i, declstartpos): | ||||
|         name, j = self._scan_name(i, declstartpos) | ||||
|         if j == -1: | ||||
|             return -1 | ||||
|         # style content model; just skip until '>' | ||||
|         rawdata = self.rawdata | ||||
|         if '>' in rawdata[j:]: | ||||
|             return rawdata.find(">", j) + 1 | ||||
|         return -1 | ||||
|  | ||||
|     # Internal -- scan past <!ATTLIST declarations | ||||
|     def _parse_doctype_attlist(self, i, declstartpos): | ||||
|         rawdata = self.rawdata | ||||
|         name, j = self._scan_name(i, declstartpos) | ||||
|         c = rawdata[j:j+1] | ||||
|         if c == "": | ||||
|             return -1 | ||||
|         if c == ">": | ||||
|             return j + 1 | ||||
|         while 1: | ||||
|             # scan a series of attribute descriptions; simplified: | ||||
|             #   name type [value] [#constraint] | ||||
|             name, j = self._scan_name(j, declstartpos) | ||||
|             if j < 0: | ||||
|                 return j | ||||
|             c = rawdata[j:j+1] | ||||
|             if c == "": | ||||
|                 return -1 | ||||
|             if c == "(": | ||||
|                 # an enumerated type; look for ')' | ||||
|                 if ")" in rawdata[j:]: | ||||
|                     j = rawdata.find(")", j) + 1 | ||||
|                 else: | ||||
|                     return -1 | ||||
|                 while rawdata[j:j+1].isspace(): | ||||
|                     j = j + 1 | ||||
|                 if not rawdata[j:]: | ||||
|                     # end of buffer, incomplete | ||||
|                     return -1 | ||||
|             else: | ||||
|                 name, j = self._scan_name(j, declstartpos) | ||||
|             c = rawdata[j:j+1] | ||||
|             if not c: | ||||
|                 return -1 | ||||
|             if c in "'\"": | ||||
|                 m = _declstringlit_match(rawdata, j) | ||||
|                 if m: | ||||
|                     j = m.end() | ||||
|                 else: | ||||
|                     return -1 | ||||
|                 c = rawdata[j:j+1] | ||||
|                 if not c: | ||||
|                     return -1 | ||||
|             if c == "#": | ||||
|                 if rawdata[j:] == "#": | ||||
|                     # end of buffer | ||||
|                     return -1 | ||||
|                 name, j = self._scan_name(j + 1, declstartpos) | ||||
|                 if j < 0: | ||||
|                     return j | ||||
|                 c = rawdata[j:j+1] | ||||
|                 if not c: | ||||
|                     return -1 | ||||
|             if c == '>': | ||||
|                 # all done | ||||
|                 return j + 1 | ||||
|  | ||||
|     # Internal -- scan past <!NOTATION declarations | ||||
|     def _parse_doctype_notation(self, i, declstartpos): | ||||
|         name, j = self._scan_name(i, declstartpos) | ||||
|         if j < 0: | ||||
|             return j | ||||
|         rawdata = self.rawdata | ||||
|         while 1: | ||||
|             c = rawdata[j:j+1] | ||||
|             if not c: | ||||
|                 # end of buffer; incomplete | ||||
|                 return -1 | ||||
|             if c == '>': | ||||
|                 return j + 1 | ||||
|             if c in "'\"": | ||||
|                 m = _declstringlit_match(rawdata, j) | ||||
|                 if not m: | ||||
|                     return -1 | ||||
|                 j = m.end() | ||||
|             else: | ||||
|                 name, j = self._scan_name(j, declstartpos) | ||||
|                 if j < 0: | ||||
|                     return j | ||||
|  | ||||
|     # Internal -- scan past <!ENTITY declarations | ||||
|     def _parse_doctype_entity(self, i, declstartpos): | ||||
|         rawdata = self.rawdata | ||||
|         if rawdata[i:i+1] == "%": | ||||
|             j = i + 1 | ||||
|             while 1: | ||||
|                 c = rawdata[j:j+1] | ||||
|                 if not c: | ||||
|                     return -1 | ||||
|                 if c.isspace(): | ||||
|                     j = j + 1 | ||||
|                 else: | ||||
|                     break | ||||
|         else: | ||||
|             j = i | ||||
|         name, j = self._scan_name(j, declstartpos) | ||||
|         if j < 0: | ||||
|             return j | ||||
|         while 1: | ||||
|             c = self.rawdata[j:j+1] | ||||
|             if not c: | ||||
|                 return -1 | ||||
|             if c in "'\"": | ||||
|                 m = _declstringlit_match(rawdata, j) | ||||
|                 if m: | ||||
|                     j = m.end() | ||||
|                 else: | ||||
|                     return -1    # incomplete | ||||
|             elif c == ">": | ||||
|                 return j + 1 | ||||
|             else: | ||||
|                 name, j = self._scan_name(j, declstartpos) | ||||
|                 if j < 0: | ||||
|                     return j | ||||
|  | ||||
|     # Internal -- scan a name token and the new position and the token, or | ||||
|     # return -1 if we've reached the end of the buffer. | ||||
|     def _scan_name(self, i, declstartpos): | ||||
|         rawdata = self.rawdata | ||||
|         n = len(rawdata) | ||||
|         if i == n: | ||||
|             return None, -1 | ||||
|         m = _declname_match(rawdata, i) | ||||
|         if m: | ||||
|             s = m.group() | ||||
|             name = s.strip() | ||||
|             if (i + len(s)) == n: | ||||
|                 return None, -1  # end of buffer | ||||
|             return name.lower(), m.end() | ||||
|         else: | ||||
|             self.updatepos(declstartpos, i) | ||||
|             self.error("expected name token at %r" | ||||
|                        % rawdata[declstartpos:declstartpos+20]) | ||||
|  | ||||
|     # To be overridden -- handlers for unknown objects | ||||
|     def unknown_decl(self, data): | ||||
|         pass | ||||
							
								
								
									
										2152
									
								
								venv/lib/python3.12/site-packages/future/backports/datetime.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2152
									
								
								venv/lib/python3.12/site-packages/future/backports/datetime.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,78 @@ | ||||
| # Copyright (C) 2001-2007 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """ | ||||
| Backport of the Python 3.3 email package for Python-Future. | ||||
|  | ||||
| A package for parsing, handling, and generating email messages. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| # Install the surrogate escape handler here because this is used by many | ||||
| # modules in the email package. | ||||
| from future.utils import surrogateescape | ||||
| surrogateescape.register_surrogateescape() | ||||
| # (Should this be done globally by ``future``?) | ||||
|  | ||||
|  | ||||
| __version__ = '5.1.0' | ||||
|  | ||||
| __all__ = [ | ||||
|     'base64mime', | ||||
|     'charset', | ||||
|     'encoders', | ||||
|     'errors', | ||||
|     'feedparser', | ||||
|     'generator', | ||||
|     'header', | ||||
|     'iterators', | ||||
|     'message', | ||||
|     'message_from_file', | ||||
|     'message_from_binary_file', | ||||
|     'message_from_string', | ||||
|     'message_from_bytes', | ||||
|     'mime', | ||||
|     'parser', | ||||
|     'quoprimime', | ||||
|     'utils', | ||||
|     ] | ||||
|  | ||||
|  | ||||
|  | ||||
| # Some convenience routines.  Don't import Parser and Message as side-effects | ||||
| # of importing email since those cascadingly import most of the rest of the | ||||
| # email package. | ||||
| def message_from_string(s, *args, **kws): | ||||
|     """Parse a string into a Message object model. | ||||
|  | ||||
|     Optional _class and strict are passed to the Parser constructor. | ||||
|     """ | ||||
|     from future.backports.email.parser import Parser | ||||
|     return Parser(*args, **kws).parsestr(s) | ||||
|  | ||||
| def message_from_bytes(s, *args, **kws): | ||||
|     """Parse a bytes string into a Message object model. | ||||
|  | ||||
|     Optional _class and strict are passed to the Parser constructor. | ||||
|     """ | ||||
|     from future.backports.email.parser import BytesParser | ||||
|     return BytesParser(*args, **kws).parsebytes(s) | ||||
|  | ||||
| def message_from_file(fp, *args, **kws): | ||||
|     """Read a file and parse its contents into a Message object model. | ||||
|  | ||||
|     Optional _class and strict are passed to the Parser constructor. | ||||
|     """ | ||||
|     from future.backports.email.parser import Parser | ||||
|     return Parser(*args, **kws).parse(fp) | ||||
|  | ||||
| def message_from_binary_file(fp, *args, **kws): | ||||
|     """Read a binary file and parse its contents into a Message object model. | ||||
|  | ||||
|     Optional _class and strict are passed to the Parser constructor. | ||||
|     """ | ||||
|     from future.backports.email.parser import BytesParser | ||||
|     return BytesParser(*args, **kws).parse(fp) | ||||
| @ -0,0 +1,232 @@ | ||||
| """ Routines for manipulating RFC2047 encoded words. | ||||
|  | ||||
| This is currently a package-private API, but will be considered for promotion | ||||
| to a public API if there is demand. | ||||
|  | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import bytes | ||||
| from future.builtins import chr | ||||
| from future.builtins import int | ||||
| from future.builtins import str | ||||
|  | ||||
| # An ecoded word looks like this: | ||||
| # | ||||
| #        =?charset[*lang]?cte?encoded_string?= | ||||
| # | ||||
| # for more information about charset see the charset module.  Here it is one | ||||
| # of the preferred MIME charset names (hopefully; you never know when parsing). | ||||
| # cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case).  In | ||||
| # theory other letters could be used for other encodings, but in practice this | ||||
| # (almost?) never happens.  There could be a public API for adding entries | ||||
| # to the CTE tables, but YAGNI for now.  'q' is Quoted Printable, 'b' is | ||||
| # Base64.  The meaning of encoded_string should be obvious.  'lang' is optional | ||||
| # as indicated by the brackets (they are not part of the syntax) but is almost | ||||
| # never encountered in practice. | ||||
| # | ||||
| # The general interface for a CTE decoder is that it takes the encoded_string | ||||
| # as its argument, and returns a tuple (cte_decoded_string, defects).  The | ||||
| # cte_decoded_string is the original binary that was encoded using the | ||||
| # specified cte.  'defects' is a list of MessageDefect instances indicating any | ||||
| # problems encountered during conversion.  'charset' and 'lang' are the | ||||
| # corresponding strings extracted from the EW, case preserved. | ||||
| # | ||||
| # The general interface for a CTE encoder is that it takes a binary sequence | ||||
| # as input and returns the cte_encoded_string, which is an ascii-only string. | ||||
| # | ||||
| # Each decoder must also supply a length function that takes the binary | ||||
| # sequence as its argument and returns the length of the resulting encoded | ||||
| # string. | ||||
| # | ||||
| # The main API functions for the module are decode, which calls the decoder | ||||
| # referenced by the cte specifier, and encode, which adds the appropriate | ||||
| # RFC 2047 "chrome" to the encoded string, and can optionally automatically | ||||
| # select the shortest possible encoding.  See their docstrings below for | ||||
| # details. | ||||
|  | ||||
| import re | ||||
| import base64 | ||||
| import binascii | ||||
| import functools | ||||
| from string import ascii_letters, digits | ||||
| from future.backports.email import errors | ||||
|  | ||||
| __all__ = ['decode_q', | ||||
|            'encode_q', | ||||
|            'decode_b', | ||||
|            'encode_b', | ||||
|            'len_q', | ||||
|            'len_b', | ||||
|            'decode', | ||||
|            'encode', | ||||
|            ] | ||||
|  | ||||
| # | ||||
| # Quoted Printable | ||||
| # | ||||
|  | ||||
| # regex based decoder. | ||||
| _q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, | ||||
|         lambda m: bytes([int(m.group(1), 16)])) | ||||
|  | ||||
| def decode_q(encoded): | ||||
|     encoded = bytes(encoded.replace(b'_', b' ')) | ||||
|     return _q_byte_subber(encoded), [] | ||||
|  | ||||
|  | ||||
| # dict mapping bytes to their encoded form | ||||
| class _QByteMap(dict): | ||||
|  | ||||
|     safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')) | ||||
|  | ||||
|     def __missing__(self, key): | ||||
|         if key in self.safe: | ||||
|             self[key] = chr(key) | ||||
|         else: | ||||
|             self[key] = "={:02X}".format(key) | ||||
|         return self[key] | ||||
|  | ||||
| _q_byte_map = _QByteMap() | ||||
|  | ||||
| # In headers spaces are mapped to '_'. | ||||
| _q_byte_map[ord(' ')] = '_' | ||||
|  | ||||
| def encode_q(bstring): | ||||
|     return str(''.join(_q_byte_map[x] for x in bytes(bstring))) | ||||
|  | ||||
| def len_q(bstring): | ||||
|     return sum(len(_q_byte_map[x]) for x in bytes(bstring)) | ||||
|  | ||||
|  | ||||
| # | ||||
| # Base64 | ||||
| # | ||||
|  | ||||
| def decode_b(encoded): | ||||
|     defects = [] | ||||
|     pad_err = len(encoded) % 4 | ||||
|     if pad_err: | ||||
|         defects.append(errors.InvalidBase64PaddingDefect()) | ||||
|         padded_encoded = encoded + b'==='[:4-pad_err] | ||||
|     else: | ||||
|         padded_encoded = encoded | ||||
|     try: | ||||
|         # The validate kwarg to b64decode is not supported in Py2.x | ||||
|         if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded): | ||||
|             raise binascii.Error('Non-base64 digit found') | ||||
|         return base64.b64decode(padded_encoded), defects | ||||
|     except binascii.Error: | ||||
|         # Since we had correct padding, this must an invalid char error. | ||||
|         defects = [errors.InvalidBase64CharactersDefect()] | ||||
|         # The non-alphabet characters are ignored as far as padding | ||||
|         # goes, but we don't know how many there are.  So we'll just | ||||
|         # try various padding lengths until something works. | ||||
|         for i in 0, 1, 2, 3: | ||||
|             try: | ||||
|                 return base64.b64decode(encoded+b'='*i), defects | ||||
|             except (binascii.Error, TypeError):    # Py2 raises a TypeError | ||||
|                 if i==0: | ||||
|                     defects.append(errors.InvalidBase64PaddingDefect()) | ||||
|         else: | ||||
|             # This should never happen. | ||||
|             raise AssertionError("unexpected binascii.Error") | ||||
|  | ||||
| def encode_b(bstring): | ||||
|     return base64.b64encode(bstring).decode('ascii') | ||||
|  | ||||
| def len_b(bstring): | ||||
|     groups_of_3, leftover = divmod(len(bstring), 3) | ||||
|     # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. | ||||
|     return groups_of_3 * 4 + (4 if leftover else 0) | ||||
|  | ||||
|  | ||||
| _cte_decoders = { | ||||
|     'q': decode_q, | ||||
|     'b': decode_b, | ||||
|     } | ||||
|  | ||||
| def decode(ew): | ||||
|     """Decode encoded word and return (string, charset, lang, defects) tuple. | ||||
|  | ||||
|     An RFC 2047/2243 encoded word has the form: | ||||
|  | ||||
|         =?charset*lang?cte?encoded_string?= | ||||
|  | ||||
|     where '*lang' may be omitted but the other parts may not be. | ||||
|  | ||||
|     This function expects exactly such a string (that is, it does not check the | ||||
|     syntax and may raise errors if the string is not well formed), and returns | ||||
|     the encoded_string decoded first from its Content Transfer Encoding and | ||||
|     then from the resulting bytes into unicode using the specified charset.  If | ||||
|     the cte-decoded string does not successfully decode using the specified | ||||
|     character set, a defect is added to the defects list and the unknown octets | ||||
|     are replaced by the unicode 'unknown' character \uFDFF. | ||||
|  | ||||
|     The specified charset and language are returned.  The default for language, | ||||
|     which is rarely if ever encountered, is the empty string. | ||||
|  | ||||
|     """ | ||||
|     _, charset, cte, cte_string, _ = str(ew).split('?') | ||||
|     charset, _, lang = charset.partition('*') | ||||
|     cte = cte.lower() | ||||
|     # Recover the original bytes and do CTE decoding. | ||||
|     bstring = cte_string.encode('ascii', 'surrogateescape') | ||||
|     bstring, defects = _cte_decoders[cte](bstring) | ||||
|     # Turn the CTE decoded bytes into unicode. | ||||
|     try: | ||||
|         string = bstring.decode(charset) | ||||
|     except UnicodeError: | ||||
|         defects.append(errors.UndecodableBytesDefect("Encoded word " | ||||
|             "contains bytes not decodable using {} charset".format(charset))) | ||||
|         string = bstring.decode(charset, 'surrogateescape') | ||||
|     except LookupError: | ||||
|         string = bstring.decode('ascii', 'surrogateescape') | ||||
|         if charset.lower() != 'unknown-8bit': | ||||
|             defects.append(errors.CharsetError("Unknown charset {} " | ||||
|                 "in encoded word; decoded as unknown bytes".format(charset))) | ||||
|     return string, charset, lang, defects | ||||
|  | ||||
|  | ||||
| _cte_encoders = { | ||||
|     'q': encode_q, | ||||
|     'b': encode_b, | ||||
|     } | ||||
|  | ||||
| _cte_encode_length = { | ||||
|     'q': len_q, | ||||
|     'b': len_b, | ||||
|     } | ||||
|  | ||||
| def encode(string, charset='utf-8', encoding=None, lang=''): | ||||
|     """Encode string using the CTE encoding that produces the shorter result. | ||||
|  | ||||
|     Produces an RFC 2047/2243 encoded word of the form: | ||||
|  | ||||
|         =?charset*lang?cte?encoded_string?= | ||||
|  | ||||
|     where '*lang' is omitted unless the 'lang' parameter is given a value. | ||||
|     Optional argument charset (defaults to utf-8) specifies the charset to use | ||||
|     to encode the string to binary before CTE encoding it.  Optional argument | ||||
|     'encoding' is the cte specifier for the encoding that should be used ('q' | ||||
|     or 'b'); if it is None (the default) the encoding which produces the | ||||
|     shortest encoded sequence is used, except that 'q' is preferred if it is up | ||||
|     to five characters longer.  Optional argument 'lang' (default '') gives the | ||||
|     RFC 2243 language string to specify in the encoded word. | ||||
|  | ||||
|     """ | ||||
|     string = str(string) | ||||
|     if charset == 'unknown-8bit': | ||||
|         bstring = string.encode('ascii', 'surrogateescape') | ||||
|     else: | ||||
|         bstring = string.encode(charset) | ||||
|     if encoding is None: | ||||
|         qlen = _cte_encode_length['q'](bstring) | ||||
|         blen = _cte_encode_length['b'](bstring) | ||||
|         # Bias toward q.  5 is arbitrary. | ||||
|         encoding = 'q' if qlen - blen < 5 else 'b' | ||||
|     encoded = _cte_encoders[encoding](bstring) | ||||
|     if lang: | ||||
|         lang = '*' + lang | ||||
|     return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded) | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,546 @@ | ||||
| # Copyright (C) 2002-2007 Python Software Foundation | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Email address parsing code. | ||||
|  | ||||
| Lifted directly from rfc822.py.  This should eventually be rewritten. | ||||
| """ | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import print_function | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import int | ||||
|  | ||||
| __all__ = [ | ||||
|     'mktime_tz', | ||||
|     'parsedate', | ||||
|     'parsedate_tz', | ||||
|     'quote', | ||||
|     ] | ||||
|  | ||||
| import time, calendar | ||||
|  | ||||
| SPACE = ' ' | ||||
| EMPTYSTRING = '' | ||||
| COMMASPACE = ', ' | ||||
|  | ||||
| # Parse a date field | ||||
| _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', | ||||
|                'aug', 'sep', 'oct', 'nov', 'dec', | ||||
|                'january', 'february', 'march', 'april', 'may', 'june', 'july', | ||||
|                'august', 'september', 'october', 'november', 'december'] | ||||
|  | ||||
| _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] | ||||
|  | ||||
| # The timezone table does not include the military time zones defined | ||||
| # in RFC822, other than Z.  According to RFC1123, the description in | ||||
| # RFC822 gets the signs wrong, so we can't rely on any such time | ||||
| # zones.  RFC1123 recommends that numeric timezone indicators be used | ||||
| # instead of timezone names. | ||||
|  | ||||
| _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, | ||||
|               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada) | ||||
|               'EST': -500, 'EDT': -400,  # Eastern | ||||
|               'CST': -600, 'CDT': -500,  # Central | ||||
|               'MST': -700, 'MDT': -600,  # Mountain | ||||
|               'PST': -800, 'PDT': -700   # Pacific | ||||
|               } | ||||
|  | ||||
|  | ||||
| def parsedate_tz(data): | ||||
|     """Convert a date string to a time tuple. | ||||
|  | ||||
|     Accounts for military timezones. | ||||
|     """ | ||||
|     res = _parsedate_tz(data) | ||||
|     if not res: | ||||
|         return | ||||
|     if res[9] is None: | ||||
|         res[9] = 0 | ||||
|     return tuple(res) | ||||
|  | ||||
| def _parsedate_tz(data): | ||||
|     """Convert date to extended time tuple. | ||||
|  | ||||
|     The last (additional) element is the time zone offset in seconds, except if | ||||
|     the timezone was specified as -0000.  In that case the last element is | ||||
|     None.  This indicates a UTC timestamp that explicitly declaims knowledge of | ||||
|     the source timezone, as opposed to a +0000 timestamp that indicates the | ||||
|     source timezone really was UTC. | ||||
|  | ||||
|     """ | ||||
|     if not data: | ||||
|         return | ||||
|     data = data.split() | ||||
|     # The FWS after the comma after the day-of-week is optional, so search and | ||||
|     # adjust for this. | ||||
|     if data[0].endswith(',') or data[0].lower() in _daynames: | ||||
|         # There's a dayname here. Skip it | ||||
|         del data[0] | ||||
|     else: | ||||
|         i = data[0].rfind(',') | ||||
|         if i >= 0: | ||||
|             data[0] = data[0][i+1:] | ||||
|     if len(data) == 3: # RFC 850 date, deprecated | ||||
|         stuff = data[0].split('-') | ||||
|         if len(stuff) == 3: | ||||
|             data = stuff + data[1:] | ||||
|     if len(data) == 4: | ||||
|         s = data[3] | ||||
|         i = s.find('+') | ||||
|         if i == -1: | ||||
|             i = s.find('-') | ||||
|         if i > 0: | ||||
|             data[3:] = [s[:i], s[i:]] | ||||
|         else: | ||||
|             data.append('') # Dummy tz | ||||
|     if len(data) < 5: | ||||
|         return None | ||||
|     data = data[:5] | ||||
|     [dd, mm, yy, tm, tz] = data | ||||
|     mm = mm.lower() | ||||
|     if mm not in _monthnames: | ||||
|         dd, mm = mm, dd.lower() | ||||
|         if mm not in _monthnames: | ||||
|             return None | ||||
|     mm = _monthnames.index(mm) + 1 | ||||
|     if mm > 12: | ||||
|         mm -= 12 | ||||
|     if dd[-1] == ',': | ||||
|         dd = dd[:-1] | ||||
|     i = yy.find(':') | ||||
|     if i > 0: | ||||
|         yy, tm = tm, yy | ||||
|     if yy[-1] == ',': | ||||
|         yy = yy[:-1] | ||||
|     if not yy[0].isdigit(): | ||||
|         yy, tz = tz, yy | ||||
|     if tm[-1] == ',': | ||||
|         tm = tm[:-1] | ||||
|     tm = tm.split(':') | ||||
|     if len(tm) == 2: | ||||
|         [thh, tmm] = tm | ||||
|         tss = '0' | ||||
|     elif len(tm) == 3: | ||||
|         [thh, tmm, tss] = tm | ||||
|     elif len(tm) == 1 and '.' in tm[0]: | ||||
|         # Some non-compliant MUAs use '.' to separate time elements. | ||||
|         tm = tm[0].split('.') | ||||
|         if len(tm) == 2: | ||||
|             [thh, tmm] = tm | ||||
|             tss = 0 | ||||
|         elif len(tm) == 3: | ||||
|             [thh, tmm, tss] = tm | ||||
|     else: | ||||
|         return None | ||||
|     try: | ||||
|         yy = int(yy) | ||||
|         dd = int(dd) | ||||
|         thh = int(thh) | ||||
|         tmm = int(tmm) | ||||
|         tss = int(tss) | ||||
|     except ValueError: | ||||
|         return None | ||||
|     # Check for a yy specified in two-digit format, then convert it to the | ||||
|     # appropriate four-digit format, according to the POSIX standard. RFC 822 | ||||
|     # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) | ||||
|     # mandates a 4-digit yy. For more information, see the documentation for | ||||
|     # the time module. | ||||
|     if yy < 100: | ||||
|         # The year is between 1969 and 1999 (inclusive). | ||||
|         if yy > 68: | ||||
|             yy += 1900 | ||||
|         # The year is between 2000 and 2068 (inclusive). | ||||
|         else: | ||||
|             yy += 2000 | ||||
|     tzoffset = None | ||||
|     tz = tz.upper() | ||||
|     if tz in _timezones: | ||||
|         tzoffset = _timezones[tz] | ||||
|     else: | ||||
|         try: | ||||
|             tzoffset = int(tz) | ||||
|         except ValueError: | ||||
|             pass | ||||
|         if tzoffset==0 and tz.startswith('-'): | ||||
|             tzoffset = None | ||||
|     # Convert a timezone offset into seconds ; -0500 -> -18000 | ||||
|     if tzoffset: | ||||
|         if tzoffset < 0: | ||||
|             tzsign = -1 | ||||
|             tzoffset = -tzoffset | ||||
|         else: | ||||
|             tzsign = 1 | ||||
|         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) | ||||
|     # Daylight Saving Time flag is set to -1, since DST is unknown. | ||||
|     return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] | ||||
|  | ||||
|  | ||||
| def parsedate(data): | ||||
|     """Convert a time string to a time tuple.""" | ||||
|     t = parsedate_tz(data) | ||||
|     if isinstance(t, tuple): | ||||
|         return t[:9] | ||||
|     else: | ||||
|         return t | ||||
|  | ||||
|  | ||||
| def mktime_tz(data): | ||||
|     """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" | ||||
|     if data[9] is None: | ||||
|         # No zone info, so localtime is better assumption than GMT | ||||
|         return time.mktime(data[:8] + (-1,)) | ||||
|     else: | ||||
|         t = calendar.timegm(data) | ||||
|         return t - data[9] | ||||
|  | ||||
|  | ||||
| def quote(str): | ||||
|     """Prepare string to be used in a quoted string. | ||||
|  | ||||
|     Turns backslash and double quote characters into quoted pairs.  These | ||||
|     are the only characters that need to be quoted inside a quoted string. | ||||
|     Does not add the surrounding double quotes. | ||||
|     """ | ||||
|     return str.replace('\\', '\\\\').replace('"', '\\"') | ||||
|  | ||||
|  | ||||
| class AddrlistClass(object): | ||||
|     """Address parser class by Ben Escoto. | ||||
|  | ||||
|     To understand what this class does, it helps to have a copy of RFC 2822 in | ||||
|     front of you. | ||||
|  | ||||
|     Note: this class interface is deprecated and may be removed in the future. | ||||
|     Use email.utils.AddressList instead. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, field): | ||||
|         """Initialize a new instance. | ||||
|  | ||||
|         `field' is an unparsed address header field, containing | ||||
|         one or more addresses. | ||||
|         """ | ||||
|         self.specials = '()<>@,:;.\"[]' | ||||
|         self.pos = 0 | ||||
|         self.LWS = ' \t' | ||||
|         self.CR = '\r\n' | ||||
|         self.FWS = self.LWS + self.CR | ||||
|         self.atomends = self.specials + self.LWS + self.CR | ||||
|         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it | ||||
|         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete | ||||
|         # syntax, so allow dots in phrases. | ||||
|         self.phraseends = self.atomends.replace('.', '') | ||||
|         self.field = field | ||||
|         self.commentlist = [] | ||||
|  | ||||
|     def gotonext(self): | ||||
|         """Skip white space and extract comments.""" | ||||
|         wslist = [] | ||||
|         while self.pos < len(self.field): | ||||
|             if self.field[self.pos] in self.LWS + '\n\r': | ||||
|                 if self.field[self.pos] not in '\n\r': | ||||
|                     wslist.append(self.field[self.pos]) | ||||
|                 self.pos += 1 | ||||
|             elif self.field[self.pos] == '(': | ||||
|                 self.commentlist.append(self.getcomment()) | ||||
|             else: | ||||
|                 break | ||||
|         return EMPTYSTRING.join(wslist) | ||||
|  | ||||
|     def getaddrlist(self): | ||||
|         """Parse all addresses. | ||||
|  | ||||
|         Returns a list containing all of the addresses. | ||||
|         """ | ||||
|         result = [] | ||||
|         while self.pos < len(self.field): | ||||
|             ad = self.getaddress() | ||||
|             if ad: | ||||
|                 result += ad | ||||
|             else: | ||||
|                 result.append(('', '')) | ||||
|         return result | ||||
|  | ||||
|     def getaddress(self): | ||||
|         """Parse the next address.""" | ||||
|         self.commentlist = [] | ||||
|         self.gotonext() | ||||
|  | ||||
|         oldpos = self.pos | ||||
|         oldcl = self.commentlist | ||||
|         plist = self.getphraselist() | ||||
|  | ||||
|         self.gotonext() | ||||
|         returnlist = [] | ||||
|  | ||||
|         if self.pos >= len(self.field): | ||||
|             # Bad email address technically, no domain. | ||||
|             if plist: | ||||
|                 returnlist = [(SPACE.join(self.commentlist), plist[0])] | ||||
|  | ||||
|         elif self.field[self.pos] in '.@': | ||||
|             # email address is just an addrspec | ||||
|             # this isn't very efficient since we start over | ||||
|             self.pos = oldpos | ||||
|             self.commentlist = oldcl | ||||
|             addrspec = self.getaddrspec() | ||||
|             returnlist = [(SPACE.join(self.commentlist), addrspec)] | ||||
|  | ||||
|         elif self.field[self.pos] == ':': | ||||
|             # address is a group | ||||
|             returnlist = [] | ||||
|  | ||||
|             fieldlen = len(self.field) | ||||
|             self.pos += 1 | ||||
|             while self.pos < len(self.field): | ||||
|                 self.gotonext() | ||||
|                 if self.pos < fieldlen and self.field[self.pos] == ';': | ||||
|                     self.pos += 1 | ||||
|                     break | ||||
|                 returnlist = returnlist + self.getaddress() | ||||
|  | ||||
|         elif self.field[self.pos] == '<': | ||||
|             # Address is a phrase then a route addr | ||||
|             routeaddr = self.getrouteaddr() | ||||
|  | ||||
|             if self.commentlist: | ||||
|                 returnlist = [(SPACE.join(plist) + ' (' + | ||||
|                                ' '.join(self.commentlist) + ')', routeaddr)] | ||||
|             else: | ||||
|                 returnlist = [(SPACE.join(plist), routeaddr)] | ||||
|  | ||||
|         else: | ||||
|             if plist: | ||||
|                 returnlist = [(SPACE.join(self.commentlist), plist[0])] | ||||
|             elif self.field[self.pos] in self.specials: | ||||
|                 self.pos += 1 | ||||
|  | ||||
|         self.gotonext() | ||||
|         if self.pos < len(self.field) and self.field[self.pos] == ',': | ||||
|             self.pos += 1 | ||||
|         return returnlist | ||||
|  | ||||
|     def getrouteaddr(self): | ||||
|         """Parse a route address (Return-path value). | ||||
|  | ||||
|         This method just skips all the route stuff and returns the addrspec. | ||||
|         """ | ||||
|         if self.field[self.pos] != '<': | ||||
|             return | ||||
|  | ||||
|         expectroute = False | ||||
|         self.pos += 1 | ||||
|         self.gotonext() | ||||
|         adlist = '' | ||||
|         while self.pos < len(self.field): | ||||
|             if expectroute: | ||||
|                 self.getdomain() | ||||
|                 expectroute = False | ||||
|             elif self.field[self.pos] == '>': | ||||
|                 self.pos += 1 | ||||
|                 break | ||||
|             elif self.field[self.pos] == '@': | ||||
|                 self.pos += 1 | ||||
|                 expectroute = True | ||||
|             elif self.field[self.pos] == ':': | ||||
|                 self.pos += 1 | ||||
|             else: | ||||
|                 adlist = self.getaddrspec() | ||||
|                 self.pos += 1 | ||||
|                 break | ||||
|             self.gotonext() | ||||
|  | ||||
|         return adlist | ||||
|  | ||||
|     def getaddrspec(self): | ||||
|         """Parse an RFC 2822 addr-spec.""" | ||||
|         aslist = [] | ||||
|  | ||||
|         self.gotonext() | ||||
|         while self.pos < len(self.field): | ||||
|             preserve_ws = True | ||||
|             if self.field[self.pos] == '.': | ||||
|                 if aslist and not aslist[-1].strip(): | ||||
|                     aslist.pop() | ||||
|                 aslist.append('.') | ||||
|                 self.pos += 1 | ||||
|                 preserve_ws = False | ||||
|             elif self.field[self.pos] == '"': | ||||
|                 aslist.append('"%s"' % quote(self.getquote())) | ||||
|             elif self.field[self.pos] in self.atomends: | ||||
|                 if aslist and not aslist[-1].strip(): | ||||
|                     aslist.pop() | ||||
|                 break | ||||
|             else: | ||||
|                 aslist.append(self.getatom()) | ||||
|             ws = self.gotonext() | ||||
|             if preserve_ws and ws: | ||||
|                 aslist.append(ws) | ||||
|  | ||||
|         if self.pos >= len(self.field) or self.field[self.pos] != '@': | ||||
|             return EMPTYSTRING.join(aslist) | ||||
|  | ||||
|         aslist.append('@') | ||||
|         self.pos += 1 | ||||
|         self.gotonext() | ||||
|         return EMPTYSTRING.join(aslist) + self.getdomain() | ||||
|  | ||||
|     def getdomain(self): | ||||
|         """Get the complete domain name from an address.""" | ||||
|         sdlist = [] | ||||
|         while self.pos < len(self.field): | ||||
|             if self.field[self.pos] in self.LWS: | ||||
|                 self.pos += 1 | ||||
|             elif self.field[self.pos] == '(': | ||||
|                 self.commentlist.append(self.getcomment()) | ||||
|             elif self.field[self.pos] == '[': | ||||
|                 sdlist.append(self.getdomainliteral()) | ||||
|             elif self.field[self.pos] == '.': | ||||
|                 self.pos += 1 | ||||
|                 sdlist.append('.') | ||||
|             elif self.field[self.pos] in self.atomends: | ||||
|                 break | ||||
|             else: | ||||
|                 sdlist.append(self.getatom()) | ||||
|         return EMPTYSTRING.join(sdlist) | ||||
|  | ||||
|     def getdelimited(self, beginchar, endchars, allowcomments=True): | ||||
|         """Parse a header fragment delimited by special characters. | ||||
|  | ||||
|         `beginchar' is the start character for the fragment. | ||||
|         If self is not looking at an instance of `beginchar' then | ||||
|         getdelimited returns the empty string. | ||||
|  | ||||
|         `endchars' is a sequence of allowable end-delimiting characters. | ||||
|         Parsing stops when one of these is encountered. | ||||
|  | ||||
|         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed | ||||
|         within the parsed fragment. | ||||
|         """ | ||||
|         if self.field[self.pos] != beginchar: | ||||
|             return '' | ||||
|  | ||||
|         slist = [''] | ||||
|         quote = False | ||||
|         self.pos += 1 | ||||
|         while self.pos < len(self.field): | ||||
|             if quote: | ||||
|                 slist.append(self.field[self.pos]) | ||||
|                 quote = False | ||||
|             elif self.field[self.pos] in endchars: | ||||
|                 self.pos += 1 | ||||
|                 break | ||||
|             elif allowcomments and self.field[self.pos] == '(': | ||||
|                 slist.append(self.getcomment()) | ||||
|                 continue        # have already advanced pos from getcomment | ||||
|             elif self.field[self.pos] == '\\': | ||||
|                 quote = True | ||||
|             else: | ||||
|                 slist.append(self.field[self.pos]) | ||||
|             self.pos += 1 | ||||
|  | ||||
|         return EMPTYSTRING.join(slist) | ||||
|  | ||||
|     def getquote(self): | ||||
|         """Get a quote-delimited fragment from self's field.""" | ||||
|         return self.getdelimited('"', '"\r', False) | ||||
|  | ||||
|     def getcomment(self): | ||||
|         """Get a parenthesis-delimited fragment from self's field.""" | ||||
|         return self.getdelimited('(', ')\r', True) | ||||
|  | ||||
|     def getdomainliteral(self): | ||||
|         """Parse an RFC 2822 domain-literal.""" | ||||
|         return '[%s]' % self.getdelimited('[', ']\r', False) | ||||
|  | ||||
|     def getatom(self, atomends=None): | ||||
|         """Parse an RFC 2822 atom. | ||||
|  | ||||
|         Optional atomends specifies a different set of end token delimiters | ||||
|         (the default is to use self.atomends).  This is used e.g. in | ||||
|         getphraselist() since phrase endings must not include the `.' (which | ||||
|         is legal in phrases).""" | ||||
|         atomlist = [''] | ||||
|         if atomends is None: | ||||
|             atomends = self.atomends | ||||
|  | ||||
|         while self.pos < len(self.field): | ||||
|             if self.field[self.pos] in atomends: | ||||
|                 break | ||||
|             else: | ||||
|                 atomlist.append(self.field[self.pos]) | ||||
|             self.pos += 1 | ||||
|  | ||||
|         return EMPTYSTRING.join(atomlist) | ||||
|  | ||||
|     def getphraselist(self): | ||||
|         """Parse a sequence of RFC 2822 phrases. | ||||
|  | ||||
|         A phrase is a sequence of words, which are in turn either RFC 2822 | ||||
|         atoms or quoted-strings.  Phrases are canonicalized by squeezing all | ||||
|         runs of continuous whitespace into one space. | ||||
|         """ | ||||
|         plist = [] | ||||
|  | ||||
|         while self.pos < len(self.field): | ||||
|             if self.field[self.pos] in self.FWS: | ||||
|                 self.pos += 1 | ||||
|             elif self.field[self.pos] == '"': | ||||
|                 plist.append(self.getquote()) | ||||
|             elif self.field[self.pos] == '(': | ||||
|                 self.commentlist.append(self.getcomment()) | ||||
|             elif self.field[self.pos] in self.phraseends: | ||||
|                 break | ||||
|             else: | ||||
|                 plist.append(self.getatom(self.phraseends)) | ||||
|  | ||||
|         return plist | ||||
|  | ||||
| class AddressList(AddrlistClass): | ||||
|     """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" | ||||
|     def __init__(self, field): | ||||
|         AddrlistClass.__init__(self, field) | ||||
|         if field: | ||||
|             self.addresslist = self.getaddrlist() | ||||
|         else: | ||||
|             self.addresslist = [] | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(self.addresslist) | ||||
|  | ||||
|     def __add__(self, other): | ||||
|         # Set union | ||||
|         newaddr = AddressList(None) | ||||
|         newaddr.addresslist = self.addresslist[:] | ||||
|         for x in other.addresslist: | ||||
|             if not x in self.addresslist: | ||||
|                 newaddr.addresslist.append(x) | ||||
|         return newaddr | ||||
|  | ||||
|     def __iadd__(self, other): | ||||
|         # Set union, in-place | ||||
|         for x in other.addresslist: | ||||
|             if not x in self.addresslist: | ||||
|                 self.addresslist.append(x) | ||||
|         return self | ||||
|  | ||||
|     def __sub__(self, other): | ||||
|         # Set difference | ||||
|         newaddr = AddressList(None) | ||||
|         for x in self.addresslist: | ||||
|             if not x in other.addresslist: | ||||
|                 newaddr.addresslist.append(x) | ||||
|         return newaddr | ||||
|  | ||||
|     def __isub__(self, other): | ||||
|         # Set difference, in-place | ||||
|         for x in other.addresslist: | ||||
|             if x in self.addresslist: | ||||
|                 self.addresslist.remove(x) | ||||
|         return self | ||||
|  | ||||
|     def __getitem__(self, index): | ||||
|         # Make indexing, slices, and 'in' work | ||||
|         return self.addresslist[index] | ||||
| @ -0,0 +1,365 @@ | ||||
| """Policy framework for the email package. | ||||
|  | ||||
| Allows fine grained feature control of how the package parses and emits data. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import print_function | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import super | ||||
| from future.builtins import str | ||||
| from future.utils import with_metaclass | ||||
|  | ||||
| import abc | ||||
| from future.backports.email import header | ||||
| from future.backports.email import charset as _charset | ||||
| from future.backports.email.utils import _has_surrogates | ||||
|  | ||||
| __all__ = [ | ||||
|     'Policy', | ||||
|     'Compat32', | ||||
|     'compat32', | ||||
|     ] | ||||
|  | ||||
|  | ||||
| class _PolicyBase(object): | ||||
|  | ||||
|     """Policy Object basic framework. | ||||
|  | ||||
|     This class is useless unless subclassed.  A subclass should define | ||||
|     class attributes with defaults for any values that are to be | ||||
|     managed by the Policy object.  The constructor will then allow | ||||
|     non-default values to be set for these attributes at instance | ||||
|     creation time.  The instance will be callable, taking these same | ||||
|     attributes keyword arguments, and returning a new instance | ||||
|     identical to the called instance except for those values changed | ||||
|     by the keyword arguments.  Instances may be added, yielding new | ||||
|     instances with any non-default values from the right hand | ||||
|     operand overriding those in the left hand operand.  That is, | ||||
|  | ||||
|         A + B == A(<non-default values of B>) | ||||
|  | ||||
|     The repr of an instance can be used to reconstruct the object | ||||
|     if and only if the repr of the values can be used to reconstruct | ||||
|     those values. | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, **kw): | ||||
|         """Create new Policy, possibly overriding some defaults. | ||||
|  | ||||
|         See class docstring for a list of overridable attributes. | ||||
|  | ||||
|         """ | ||||
|         for name, value in kw.items(): | ||||
|             if hasattr(self, name): | ||||
|                 super(_PolicyBase,self).__setattr__(name, value) | ||||
|             else: | ||||
|                 raise TypeError( | ||||
|                     "{!r} is an invalid keyword argument for {}".format( | ||||
|                         name, self.__class__.__name__)) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         args = [ "{}={!r}".format(name, value) | ||||
|                  for name, value in self.__dict__.items() ] | ||||
|         return "{}({})".format(self.__class__.__name__, ', '.join(args)) | ||||
|  | ||||
|     def clone(self, **kw): | ||||
|         """Return a new instance with specified attributes changed. | ||||
|  | ||||
|         The new instance has the same attribute values as the current object, | ||||
|         except for the changes passed in as keyword arguments. | ||||
|  | ||||
|         """ | ||||
|         newpolicy = self.__class__.__new__(self.__class__) | ||||
|         for attr, value in self.__dict__.items(): | ||||
|             object.__setattr__(newpolicy, attr, value) | ||||
|         for attr, value in kw.items(): | ||||
|             if not hasattr(self, attr): | ||||
|                 raise TypeError( | ||||
|                     "{!r} is an invalid keyword argument for {}".format( | ||||
|                         attr, self.__class__.__name__)) | ||||
|             object.__setattr__(newpolicy, attr, value) | ||||
|         return newpolicy | ||||
|  | ||||
|     def __setattr__(self, name, value): | ||||
|         if hasattr(self, name): | ||||
|             msg = "{!r} object attribute {!r} is read-only" | ||||
|         else: | ||||
|             msg = "{!r} object has no attribute {!r}" | ||||
|         raise AttributeError(msg.format(self.__class__.__name__, name)) | ||||
|  | ||||
|     def __add__(self, other): | ||||
|         """Non-default values from right operand override those from left. | ||||
|  | ||||
|         The object returned is a new instance of the subclass. | ||||
|  | ||||
|         """ | ||||
|         return self.clone(**other.__dict__) | ||||
|  | ||||
|  | ||||
| def _append_doc(doc, added_doc): | ||||
|     doc = doc.rsplit('\n', 1)[0] | ||||
|     added_doc = added_doc.split('\n', 1)[1] | ||||
|     return doc + '\n' + added_doc | ||||
|  | ||||
| def _extend_docstrings(cls): | ||||
|     if cls.__doc__ and cls.__doc__.startswith('+'): | ||||
|         cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__) | ||||
|     for name, attr in cls.__dict__.items(): | ||||
|         if attr.__doc__ and attr.__doc__.startswith('+'): | ||||
|             for c in (c for base in cls.__bases__ for c in base.mro()): | ||||
|                 doc = getattr(getattr(c, name), '__doc__') | ||||
|                 if doc: | ||||
|                     attr.__doc__ = _append_doc(doc, attr.__doc__) | ||||
|                     break | ||||
|     return cls | ||||
|  | ||||
|  | ||||
| class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)): | ||||
|  | ||||
|     r"""Controls for how messages are interpreted and formatted. | ||||
|  | ||||
|     Most of the classes and many of the methods in the email package accept | ||||
|     Policy objects as parameters.  A Policy object contains a set of values and | ||||
|     functions that control how input is interpreted and how output is rendered. | ||||
|     For example, the parameter 'raise_on_defect' controls whether or not an RFC | ||||
|     violation results in an error being raised or not, while 'max_line_length' | ||||
|     controls the maximum length of output lines when a Message is serialized. | ||||
|  | ||||
|     Any valid attribute may be overridden when a Policy is created by passing | ||||
|     it as a keyword argument to the constructor.  Policy objects are immutable, | ||||
|     but a new Policy object can be created with only certain values changed by | ||||
|     calling the Policy instance with keyword arguments.  Policy objects can | ||||
|     also be added, producing a new Policy object in which the non-default | ||||
|     attributes set in the right hand operand overwrite those specified in the | ||||
|     left operand. | ||||
|  | ||||
|     Settable attributes: | ||||
|  | ||||
|     raise_on_defect     -- If true, then defects should be raised as errors. | ||||
|                            Default: False. | ||||
|  | ||||
|     linesep             -- string containing the value to use as separation | ||||
|                            between output lines.  Default '\n'. | ||||
|  | ||||
|     cte_type            -- Type of allowed content transfer encodings | ||||
|  | ||||
|                            7bit  -- ASCII only | ||||
|                            8bit  -- Content-Transfer-Encoding: 8bit is allowed | ||||
|  | ||||
|                            Default: 8bit.  Also controls the disposition of | ||||
|                            (RFC invalid) binary data in headers; see the | ||||
|                            documentation of the binary_fold method. | ||||
|  | ||||
|     max_line_length     -- maximum length of lines, excluding 'linesep', | ||||
|                            during serialization.  None or 0 means no line | ||||
|                            wrapping is done.  Default is 78. | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     raise_on_defect = False | ||||
|     linesep = '\n' | ||||
|     cte_type = '8bit' | ||||
|     max_line_length = 78 | ||||
|  | ||||
|     def handle_defect(self, obj, defect): | ||||
|         """Based on policy, either raise defect or call register_defect. | ||||
|  | ||||
|             handle_defect(obj, defect) | ||||
|  | ||||
|         defect should be a Defect subclass, but in any case must be an | ||||
|         Exception subclass.  obj is the object on which the defect should be | ||||
|         registered if it is not raised.  If the raise_on_defect is True, the | ||||
|         defect is raised as an error, otherwise the object and the defect are | ||||
|         passed to register_defect. | ||||
|  | ||||
|         This method is intended to be called by parsers that discover defects. | ||||
|         The email package parsers always call it with Defect instances. | ||||
|  | ||||
|         """ | ||||
|         if self.raise_on_defect: | ||||
|             raise defect | ||||
|         self.register_defect(obj, defect) | ||||
|  | ||||
|     def register_defect(self, obj, defect): | ||||
|         """Record 'defect' on 'obj'. | ||||
|  | ||||
|         Called by handle_defect if raise_on_defect is False.  This method is | ||||
|         part of the Policy API so that Policy subclasses can implement custom | ||||
|         defect handling.  The default implementation calls the append method of | ||||
|         the defects attribute of obj.  The objects used by the email package by | ||||
|         default that get passed to this method will always have a defects | ||||
|         attribute with an append method. | ||||
|  | ||||
|         """ | ||||
|         obj.defects.append(defect) | ||||
|  | ||||
|     def header_max_count(self, name): | ||||
|         """Return the maximum allowed number of headers named 'name'. | ||||
|  | ||||
|         Called when a header is added to a Message object.  If the returned | ||||
|         value is not 0 or None, and there are already a number of headers with | ||||
|         the name 'name' equal to the value returned, a ValueError is raised. | ||||
|  | ||||
|         Because the default behavior of Message's __setitem__ is to append the | ||||
|         value to the list of headers, it is easy to create duplicate headers | ||||
|         without realizing it.  This method allows certain headers to be limited | ||||
|         in the number of instances of that header that may be added to a | ||||
|         Message programmatically.  (The limit is not observed by the parser, | ||||
|         which will faithfully produce as many headers as exist in the message | ||||
|         being parsed.) | ||||
|  | ||||
|         The default implementation returns None for all header names. | ||||
|         """ | ||||
|         return None | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def header_source_parse(self, sourcelines): | ||||
|         """Given a list of linesep terminated strings constituting the lines of | ||||
|         a single header, return the (name, value) tuple that should be stored | ||||
|         in the model.  The input lines should retain their terminating linesep | ||||
|         characters.  The lines passed in by the email package may contain | ||||
|         surrogateescaped binary data. | ||||
|         """ | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def header_store_parse(self, name, value): | ||||
|         """Given the header name and the value provided by the application | ||||
|         program, return the (name, value) that should be stored in the model. | ||||
|         """ | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def header_fetch_parse(self, name, value): | ||||
|         """Given the header name and the value from the model, return the value | ||||
|         to be returned to the application program that is requesting that | ||||
|         header.  The value passed in by the email package may contain | ||||
|         surrogateescaped binary data if the lines were parsed by a BytesParser. | ||||
|         The returned value should not contain any surrogateescaped data. | ||||
|  | ||||
|         """ | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def fold(self, name, value): | ||||
|         """Given the header name and the value from the model, return a string | ||||
|         containing linesep characters that implement the folding of the header | ||||
|         according to the policy controls.  The value passed in by the email | ||||
|         package may contain surrogateescaped binary data if the lines were | ||||
|         parsed by a BytesParser.  The returned value should not contain any | ||||
|         surrogateescaped data. | ||||
|  | ||||
|         """ | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def fold_binary(self, name, value): | ||||
|         """Given the header name and the value from the model, return binary | ||||
|         data containing linesep characters that implement the folding of the | ||||
|         header according to the policy controls.  The value passed in by the | ||||
|         email package may contain surrogateescaped binary data. | ||||
|  | ||||
|         """ | ||||
|         raise NotImplementedError | ||||
|  | ||||
|  | ||||
| @_extend_docstrings | ||||
| class Compat32(Policy): | ||||
|  | ||||
|     """+ | ||||
|     This particular policy is the backward compatibility Policy.  It | ||||
|     replicates the behavior of the email package version 5.1. | ||||
|     """ | ||||
|  | ||||
|     def _sanitize_header(self, name, value): | ||||
|         # If the header value contains surrogates, return a Header using | ||||
|         # the unknown-8bit charset to encode the bytes as encoded words. | ||||
|         if not isinstance(value, str): | ||||
|             # Assume it is already a header object | ||||
|             return value | ||||
|         if _has_surrogates(value): | ||||
|             return header.Header(value, charset=_charset.UNKNOWN8BIT, | ||||
|                                  header_name=name) | ||||
|         else: | ||||
|             return value | ||||
|  | ||||
|     def header_source_parse(self, sourcelines): | ||||
|         """+ | ||||
|         The name is parsed as everything up to the ':' and returned unmodified. | ||||
|         The value is determined by stripping leading whitespace off the | ||||
|         remainder of the first line, joining all subsequent lines together, and | ||||
|         stripping any trailing carriage return or linefeed characters. | ||||
|  | ||||
|         """ | ||||
|         name, value = sourcelines[0].split(':', 1) | ||||
|         value = value.lstrip(' \t') + ''.join(sourcelines[1:]) | ||||
|         return (name, value.rstrip('\r\n')) | ||||
|  | ||||
|     def header_store_parse(self, name, value): | ||||
|         """+ | ||||
|         The name and value are returned unmodified. | ||||
|         """ | ||||
|         return (name, value) | ||||
|  | ||||
|     def header_fetch_parse(self, name, value): | ||||
|         """+ | ||||
|         If the value contains binary data, it is converted into a Header object | ||||
|         using the unknown-8bit charset.  Otherwise it is returned unmodified. | ||||
|         """ | ||||
|         return self._sanitize_header(name, value) | ||||
|  | ||||
|     def fold(self, name, value): | ||||
|         """+ | ||||
|         Headers are folded using the Header folding algorithm, which preserves | ||||
|         existing line breaks in the value, and wraps each resulting line to the | ||||
|         max_line_length.  Non-ASCII binary data are CTE encoded using the | ||||
|         unknown-8bit charset. | ||||
|  | ||||
|         """ | ||||
|         return self._fold(name, value, sanitize=True) | ||||
|  | ||||
|     def fold_binary(self, name, value): | ||||
|         """+ | ||||
|         Headers are folded using the Header folding algorithm, which preserves | ||||
|         existing line breaks in the value, and wraps each resulting line to the | ||||
|         max_line_length.  If cte_type is 7bit, non-ascii binary data is CTE | ||||
|         encoded using the unknown-8bit charset.  Otherwise the original source | ||||
|         header is used, with its existing line breaks and/or binary data. | ||||
|  | ||||
|         """ | ||||
|         folded = self._fold(name, value, sanitize=self.cte_type=='7bit') | ||||
|         return folded.encode('ascii', 'surrogateescape') | ||||
|  | ||||
|     def _fold(self, name, value, sanitize): | ||||
|         parts = [] | ||||
|         parts.append('%s: ' % name) | ||||
|         if isinstance(value, str): | ||||
|             if _has_surrogates(value): | ||||
|                 if sanitize: | ||||
|                     h = header.Header(value, | ||||
|                                       charset=_charset.UNKNOWN8BIT, | ||||
|                                       header_name=name) | ||||
|                 else: | ||||
|                     # If we have raw 8bit data in a byte string, we have no idea | ||||
|                     # what the encoding is.  There is no safe way to split this | ||||
|                     # string.  If it's ascii-subset, then we could do a normal | ||||
|                     # ascii split, but if it's multibyte then we could break the | ||||
|                     # string.  There's no way to know so the least harm seems to | ||||
|                     # be to not split the string and risk it being too long. | ||||
|                     parts.append(value) | ||||
|                     h = None | ||||
|             else: | ||||
|                 h = header.Header(value, header_name=name) | ||||
|         else: | ||||
|             # Assume it is a Header-like object. | ||||
|             h = value | ||||
|         if h is not None: | ||||
|             parts.append(h.encode(linesep=self.linesep, | ||||
|                                   maxlinelen=self.max_line_length)) | ||||
|         parts.append(self.linesep) | ||||
|         return ''.join(parts) | ||||
|  | ||||
|  | ||||
| compat32 = Compat32() | ||||
| @ -0,0 +1,121 @@ | ||||
| # Copyright (C) 2002-2007 Python Software Foundation | ||||
| # Author: Ben Gertzfield | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Base64 content transfer encoding per RFCs 2045-2047. | ||||
|  | ||||
| This module handles the content transfer encoding method defined in RFC 2045 | ||||
| to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit | ||||
| characters encoding known as Base64. | ||||
|  | ||||
| It is used in the MIME standards for email to attach images, audio, and text | ||||
| using some 8-bit character sets to messages. | ||||
|  | ||||
| This module provides an interface to encode and decode both headers and bodies | ||||
| with Base64 encoding. | ||||
|  | ||||
| RFC 2045 defines a method for including character set information in an | ||||
| `encoded-word' in a header.  This method is commonly used for 8-bit real names | ||||
| in To:, From:, Cc:, etc. fields, as well as Subject: lines. | ||||
|  | ||||
| This module does not do the line wrapping or end-of-line character conversion | ||||
| necessary for proper internationalized headers; it only does dumb encoding and | ||||
| decoding.  To deal with the various line wrapping issues, use the email.header | ||||
| module. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import range | ||||
| from future.builtins import bytes | ||||
| from future.builtins import str | ||||
|  | ||||
| __all__ = [ | ||||
|     'body_decode', | ||||
|     'body_encode', | ||||
|     'decode', | ||||
|     'decodestring', | ||||
|     'header_encode', | ||||
|     'header_length', | ||||
|     ] | ||||
|  | ||||
|  | ||||
| from base64 import b64encode | ||||
| from binascii import b2a_base64, a2b_base64 | ||||
|  | ||||
| CRLF = '\r\n' | ||||
| NL = '\n' | ||||
| EMPTYSTRING = '' | ||||
|  | ||||
| # See also Charset.py | ||||
| MISC_LEN = 7 | ||||
|  | ||||
|  | ||||
| # Helpers | ||||
| def header_length(bytearray): | ||||
|     """Return the length of s when it is encoded with base64.""" | ||||
|     groups_of_3, leftover = divmod(len(bytearray), 3) | ||||
|     # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. | ||||
|     n = groups_of_3 * 4 | ||||
|     if leftover: | ||||
|         n += 4 | ||||
|     return n | ||||
|  | ||||
|  | ||||
| def header_encode(header_bytes, charset='iso-8859-1'): | ||||
|     """Encode a single header line with Base64 encoding in a given charset. | ||||
|  | ||||
|     charset names the character set to use to encode the header.  It defaults | ||||
|     to iso-8859-1.  Base64 encoding is defined in RFC 2045. | ||||
|     """ | ||||
|     if not header_bytes: | ||||
|         return "" | ||||
|     if isinstance(header_bytes, str): | ||||
|         header_bytes = header_bytes.encode(charset) | ||||
|     encoded = b64encode(header_bytes).decode("ascii") | ||||
|     return '=?%s?b?%s?=' % (charset, encoded) | ||||
|  | ||||
|  | ||||
| def body_encode(s, maxlinelen=76, eol=NL): | ||||
|     r"""Encode a string with base64. | ||||
|  | ||||
|     Each line will be wrapped at, at most, maxlinelen characters (defaults to | ||||
|     76 characters). | ||||
|  | ||||
|     Each line of encoded text will end with eol, which defaults to "\n".  Set | ||||
|     this to "\r\n" if you will be using the result of this function directly | ||||
|     in an email. | ||||
|     """ | ||||
|     if not s: | ||||
|         return s | ||||
|  | ||||
|     encvec = [] | ||||
|     max_unencoded = maxlinelen * 3 // 4 | ||||
|     for i in range(0, len(s), max_unencoded): | ||||
|         # BAW: should encode() inherit b2a_base64()'s dubious behavior in | ||||
|         # adding a newline to the encoded string? | ||||
|         enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") | ||||
|         if enc.endswith(NL) and eol != NL: | ||||
|             enc = enc[:-1] + eol | ||||
|         encvec.append(enc) | ||||
|     return EMPTYSTRING.join(encvec) | ||||
|  | ||||
|  | ||||
| def decode(string): | ||||
|     """Decode a raw base64 string, returning a bytes object. | ||||
|  | ||||
|     This function does not parse a full MIME header value encoded with | ||||
|     base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high | ||||
|     level email.header class for that functionality. | ||||
|     """ | ||||
|     if not string: | ||||
|         return bytes() | ||||
|     elif isinstance(string, str): | ||||
|         return a2b_base64(string.encode('raw-unicode-escape')) | ||||
|     else: | ||||
|         return a2b_base64(string) | ||||
|  | ||||
|  | ||||
| # For convenience and backwards compatibility w/ standard base64 module | ||||
| body_decode = decode | ||||
| decodestring = decode | ||||
| @ -0,0 +1,409 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import str | ||||
| from future.builtins import next | ||||
|  | ||||
| # Copyright (C) 2001-2007 Python Software Foundation | ||||
| # Author: Ben Gertzfield, Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| __all__ = [ | ||||
|     'Charset', | ||||
|     'add_alias', | ||||
|     'add_charset', | ||||
|     'add_codec', | ||||
|     ] | ||||
|  | ||||
| from functools import partial | ||||
|  | ||||
| from future.backports import email | ||||
| from future.backports.email import errors | ||||
| from future.backports.email.encoders import encode_7or8bit | ||||
|  | ||||
|  | ||||
| # Flags for types of header encodings | ||||
| QP          = 1 # Quoted-Printable | ||||
| BASE64      = 2 # Base64 | ||||
| SHORTEST    = 3 # the shorter of QP and base64, but only for headers | ||||
|  | ||||
| # In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 | ||||
| RFC2047_CHROME_LEN = 7 | ||||
|  | ||||
| DEFAULT_CHARSET = 'us-ascii' | ||||
| UNKNOWN8BIT = 'unknown-8bit' | ||||
| EMPTYSTRING = '' | ||||
|  | ||||
|  | ||||
| # Defaults | ||||
| CHARSETS = { | ||||
|     # input        header enc  body enc output conv | ||||
|     'iso-8859-1':  (QP,        QP,      None), | ||||
|     'iso-8859-2':  (QP,        QP,      None), | ||||
|     'iso-8859-3':  (QP,        QP,      None), | ||||
|     'iso-8859-4':  (QP,        QP,      None), | ||||
|     # iso-8859-5 is Cyrillic, and not especially used | ||||
|     # iso-8859-6 is Arabic, also not particularly used | ||||
|     # iso-8859-7 is Greek, QP will not make it readable | ||||
|     # iso-8859-8 is Hebrew, QP will not make it readable | ||||
|     'iso-8859-9':  (QP,        QP,      None), | ||||
|     'iso-8859-10': (QP,        QP,      None), | ||||
|     # iso-8859-11 is Thai, QP will not make it readable | ||||
|     'iso-8859-13': (QP,        QP,      None), | ||||
|     'iso-8859-14': (QP,        QP,      None), | ||||
|     'iso-8859-15': (QP,        QP,      None), | ||||
|     'iso-8859-16': (QP,        QP,      None), | ||||
|     'windows-1252':(QP,        QP,      None), | ||||
|     'viscii':      (QP,        QP,      None), | ||||
|     'us-ascii':    (None,      None,    None), | ||||
|     'big5':        (BASE64,    BASE64,  None), | ||||
|     'gb2312':      (BASE64,    BASE64,  None), | ||||
|     'euc-jp':      (BASE64,    None,    'iso-2022-jp'), | ||||
|     'shift_jis':   (BASE64,    None,    'iso-2022-jp'), | ||||
|     'iso-2022-jp': (BASE64,    None,    None), | ||||
|     'koi8-r':      (BASE64,    BASE64,  None), | ||||
|     'utf-8':       (SHORTEST,  BASE64, 'utf-8'), | ||||
|     } | ||||
|  | ||||
| # Aliases for other commonly-used names for character sets.  Map | ||||
| # them to the real ones used in email. | ||||
| ALIASES = { | ||||
|     'latin_1': 'iso-8859-1', | ||||
|     'latin-1': 'iso-8859-1', | ||||
|     'latin_2': 'iso-8859-2', | ||||
|     'latin-2': 'iso-8859-2', | ||||
|     'latin_3': 'iso-8859-3', | ||||
|     'latin-3': 'iso-8859-3', | ||||
|     'latin_4': 'iso-8859-4', | ||||
|     'latin-4': 'iso-8859-4', | ||||
|     'latin_5': 'iso-8859-9', | ||||
|     'latin-5': 'iso-8859-9', | ||||
|     'latin_6': 'iso-8859-10', | ||||
|     'latin-6': 'iso-8859-10', | ||||
|     'latin_7': 'iso-8859-13', | ||||
|     'latin-7': 'iso-8859-13', | ||||
|     'latin_8': 'iso-8859-14', | ||||
|     'latin-8': 'iso-8859-14', | ||||
|     'latin_9': 'iso-8859-15', | ||||
|     'latin-9': 'iso-8859-15', | ||||
|     'latin_10':'iso-8859-16', | ||||
|     'latin-10':'iso-8859-16', | ||||
|     'cp949':   'ks_c_5601-1987', | ||||
|     'euc_jp':  'euc-jp', | ||||
|     'euc_kr':  'euc-kr', | ||||
|     'ascii':   'us-ascii', | ||||
|     } | ||||
|  | ||||
|  | ||||
| # Map charsets to their Unicode codec strings. | ||||
| CODEC_MAP = { | ||||
|     'gb2312':      'eucgb2312_cn', | ||||
|     'big5':        'big5_tw', | ||||
|     # Hack: We don't want *any* conversion for stuff marked us-ascii, as all | ||||
|     # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. | ||||
|     # Let that stuff pass through without conversion to/from Unicode. | ||||
|     'us-ascii':    None, | ||||
|     } | ||||
|  | ||||
|  | ||||
| # Convenience functions for extending the above mappings | ||||
| def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): | ||||
|     """Add character set properties to the global registry. | ||||
|  | ||||
|     charset is the input character set, and must be the canonical name of a | ||||
|     character set. | ||||
|  | ||||
|     Optional header_enc and body_enc is either Charset.QP for | ||||
|     quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for | ||||
|     the shortest of qp or base64 encoding, or None for no encoding.  SHORTEST | ||||
|     is only valid for header_enc.  It describes how message headers and | ||||
|     message bodies in the input charset are to be encoded.  Default is no | ||||
|     encoding. | ||||
|  | ||||
|     Optional output_charset is the character set that the output should be | ||||
|     in.  Conversions will proceed from input charset, to Unicode, to the | ||||
|     output charset when the method Charset.convert() is called.  The default | ||||
|     is to output in the same character set as the input. | ||||
|  | ||||
|     Both input_charset and output_charset must have Unicode codec entries in | ||||
|     the module's charset-to-codec mapping; use add_codec(charset, codecname) | ||||
|     to add codecs the module does not know about.  See the codecs module's | ||||
|     documentation for more information. | ||||
|     """ | ||||
|     if body_enc == SHORTEST: | ||||
|         raise ValueError('SHORTEST not allowed for body_enc') | ||||
|     CHARSETS[charset] = (header_enc, body_enc, output_charset) | ||||
|  | ||||
|  | ||||
| def add_alias(alias, canonical): | ||||
|     """Add a character set alias. | ||||
|  | ||||
|     alias is the alias name, e.g. latin-1 | ||||
|     canonical is the character set's canonical name, e.g. iso-8859-1 | ||||
|     """ | ||||
|     ALIASES[alias] = canonical | ||||
|  | ||||
|  | ||||
| def add_codec(charset, codecname): | ||||
|     """Add a codec that map characters in the given charset to/from Unicode. | ||||
|  | ||||
|     charset is the canonical name of a character set.  codecname is the name | ||||
|     of a Python codec, as appropriate for the second argument to the unicode() | ||||
|     built-in, or to the encode() method of a Unicode string. | ||||
|     """ | ||||
|     CODEC_MAP[charset] = codecname | ||||
|  | ||||
|  | ||||
| # Convenience function for encoding strings, taking into account | ||||
| # that they might be unknown-8bit (ie: have surrogate-escaped bytes) | ||||
| def _encode(string, codec): | ||||
|     string = str(string) | ||||
|     if codec == UNKNOWN8BIT: | ||||
|         return string.encode('ascii', 'surrogateescape') | ||||
|     else: | ||||
|         return string.encode(codec) | ||||
|  | ||||
|  | ||||
| class Charset(object): | ||||
|     """Map character sets to their email properties. | ||||
|  | ||||
|     This class provides information about the requirements imposed on email | ||||
|     for a specific character set.  It also provides convenience routines for | ||||
|     converting between character sets, given the availability of the | ||||
|     applicable codecs.  Given a character set, it will do its best to provide | ||||
|     information on how to use that character set in an email in an | ||||
|     RFC-compliant way. | ||||
|  | ||||
|     Certain character sets must be encoded with quoted-printable or base64 | ||||
|     when used in email headers or bodies.  Certain character sets must be | ||||
|     converted outright, and are not allowed in email.  Instances of this | ||||
|     module expose the following information about a character set: | ||||
|  | ||||
|     input_charset: The initial character set specified.  Common aliases | ||||
|                    are converted to their `official' email names (e.g. latin_1 | ||||
|                    is converted to iso-8859-1).  Defaults to 7-bit us-ascii. | ||||
|  | ||||
|     header_encoding: If the character set must be encoded before it can be | ||||
|                      used in an email header, this attribute will be set to | ||||
|                      Charset.QP (for quoted-printable), Charset.BASE64 (for | ||||
|                      base64 encoding), or Charset.SHORTEST for the shortest of | ||||
|                      QP or BASE64 encoding.  Otherwise, it will be None. | ||||
|  | ||||
|     body_encoding: Same as header_encoding, but describes the encoding for the | ||||
|                    mail message's body, which indeed may be different than the | ||||
|                    header encoding.  Charset.SHORTEST is not allowed for | ||||
|                    body_encoding. | ||||
|  | ||||
|     output_charset: Some character sets must be converted before they can be | ||||
|                     used in email headers or bodies.  If the input_charset is | ||||
|                     one of them, this attribute will contain the name of the | ||||
|                     charset output will be converted to.  Otherwise, it will | ||||
|                     be None. | ||||
|  | ||||
|     input_codec: The name of the Python codec used to convert the | ||||
|                  input_charset to Unicode.  If no conversion codec is | ||||
|                  necessary, this attribute will be None. | ||||
|  | ||||
|     output_codec: The name of the Python codec used to convert Unicode | ||||
|                   to the output_charset.  If no conversion codec is necessary, | ||||
|                   this attribute will have the same value as the input_codec. | ||||
|     """ | ||||
|     def __init__(self, input_charset=DEFAULT_CHARSET): | ||||
|         # RFC 2046, $4.1.2 says charsets are not case sensitive.  We coerce to | ||||
|         # unicode because its .lower() is locale insensitive.  If the argument | ||||
|         # is already a unicode, we leave it at that, but ensure that the | ||||
|         # charset is ASCII, as the standard (RFC XXX) requires. | ||||
|         try: | ||||
|             if isinstance(input_charset, str): | ||||
|                 input_charset.encode('ascii') | ||||
|             else: | ||||
|                 input_charset = str(input_charset, 'ascii') | ||||
|         except UnicodeError: | ||||
|             raise errors.CharsetError(input_charset) | ||||
|         input_charset = input_charset.lower() | ||||
|         # Set the input charset after filtering through the aliases | ||||
|         self.input_charset = ALIASES.get(input_charset, input_charset) | ||||
|         # We can try to guess which encoding and conversion to use by the | ||||
|         # charset_map dictionary.  Try that first, but let the user override | ||||
|         # it. | ||||
|         henc, benc, conv = CHARSETS.get(self.input_charset, | ||||
|                                         (SHORTEST, BASE64, None)) | ||||
|         if not conv: | ||||
|             conv = self.input_charset | ||||
|         # Set the attributes, allowing the arguments to override the default. | ||||
|         self.header_encoding = henc | ||||
|         self.body_encoding = benc | ||||
|         self.output_charset = ALIASES.get(conv, conv) | ||||
|         # Now set the codecs.  If one isn't defined for input_charset, | ||||
|         # guess and try a Unicode codec with the same name as input_codec. | ||||
|         self.input_codec = CODEC_MAP.get(self.input_charset, | ||||
|                                          self.input_charset) | ||||
|         self.output_codec = CODEC_MAP.get(self.output_charset, | ||||
|                                           self.output_charset) | ||||
|  | ||||
|     def __str__(self): | ||||
|         return self.input_charset.lower() | ||||
|  | ||||
|     __repr__ = __str__ | ||||
|  | ||||
|     def __eq__(self, other): | ||||
|         return str(self) == str(other).lower() | ||||
|  | ||||
|     def __ne__(self, other): | ||||
|         return not self.__eq__(other) | ||||
|  | ||||
|     def get_body_encoding(self): | ||||
|         """Return the content-transfer-encoding used for body encoding. | ||||
|  | ||||
|         This is either the string `quoted-printable' or `base64' depending on | ||||
|         the encoding used, or it is a function in which case you should call | ||||
|         the function with a single argument, the Message object being | ||||
|         encoded.  The function should then set the Content-Transfer-Encoding | ||||
|         header itself to whatever is appropriate. | ||||
|  | ||||
|         Returns "quoted-printable" if self.body_encoding is QP. | ||||
|         Returns "base64" if self.body_encoding is BASE64. | ||||
|         Returns conversion function otherwise. | ||||
|         """ | ||||
|         assert self.body_encoding != SHORTEST | ||||
|         if self.body_encoding == QP: | ||||
|             return 'quoted-printable' | ||||
|         elif self.body_encoding == BASE64: | ||||
|             return 'base64' | ||||
|         else: | ||||
|             return encode_7or8bit | ||||
|  | ||||
|     def get_output_charset(self): | ||||
|         """Return the output character set. | ||||
|  | ||||
|         This is self.output_charset if that is not None, otherwise it is | ||||
|         self.input_charset. | ||||
|         """ | ||||
|         return self.output_charset or self.input_charset | ||||
|  | ||||
|     def header_encode(self, string): | ||||
|         """Header-encode a string by converting it first to bytes. | ||||
|  | ||||
|         The type of encoding (base64 or quoted-printable) will be based on | ||||
|         this charset's `header_encoding`. | ||||
|  | ||||
|         :param string: A unicode string for the header.  It must be possible | ||||
|             to encode this string to bytes using the character set's | ||||
|             output codec. | ||||
|         :return: The encoded string, with RFC 2047 chrome. | ||||
|         """ | ||||
|         codec = self.output_codec or 'us-ascii' | ||||
|         header_bytes = _encode(string, codec) | ||||
|         # 7bit/8bit encodings return the string unchanged (modulo conversions) | ||||
|         encoder_module = self._get_encoder(header_bytes) | ||||
|         if encoder_module is None: | ||||
|             return string | ||||
|         return encoder_module.header_encode(header_bytes, codec) | ||||
|  | ||||
|     def header_encode_lines(self, string, maxlengths): | ||||
|         """Header-encode a string by converting it first to bytes. | ||||
|  | ||||
|         This is similar to `header_encode()` except that the string is fit | ||||
|         into maximum line lengths as given by the argument. | ||||
|  | ||||
|         :param string: A unicode string for the header.  It must be possible | ||||
|             to encode this string to bytes using the character set's | ||||
|             output codec. | ||||
|         :param maxlengths: Maximum line length iterator.  Each element | ||||
|             returned from this iterator will provide the next maximum line | ||||
|             length.  This parameter is used as an argument to built-in next() | ||||
|             and should never be exhausted.  The maximum line lengths should | ||||
|             not count the RFC 2047 chrome.  These line lengths are only a | ||||
|             hint; the splitter does the best it can. | ||||
|         :return: Lines of encoded strings, each with RFC 2047 chrome. | ||||
|         """ | ||||
|         # See which encoding we should use. | ||||
|         codec = self.output_codec or 'us-ascii' | ||||
|         header_bytes = _encode(string, codec) | ||||
|         encoder_module = self._get_encoder(header_bytes) | ||||
|         encoder = partial(encoder_module.header_encode, charset=codec) | ||||
|         # Calculate the number of characters that the RFC 2047 chrome will | ||||
|         # contribute to each line. | ||||
|         charset = self.get_output_charset() | ||||
|         extra = len(charset) + RFC2047_CHROME_LEN | ||||
|         # Now comes the hard part.  We must encode bytes but we can't split on | ||||
|         # bytes because some character sets are variable length and each | ||||
|         # encoded word must stand on its own.  So the problem is you have to | ||||
|         # encode to bytes to figure out this word's length, but you must split | ||||
|         # on characters.  This causes two problems: first, we don't know how | ||||
|         # many octets a specific substring of unicode characters will get | ||||
|         # encoded to, and second, we don't know how many ASCII characters | ||||
|         # those octets will get encoded to.  Unless we try it.  Which seems | ||||
|         # inefficient.  In the interest of being correct rather than fast (and | ||||
|         # in the hope that there will be few encoded headers in any such | ||||
|         # message), brute force it. :( | ||||
|         lines = [] | ||||
|         current_line = [] | ||||
|         maxlen = next(maxlengths) - extra | ||||
|         for character in string: | ||||
|             current_line.append(character) | ||||
|             this_line = EMPTYSTRING.join(current_line) | ||||
|             length = encoder_module.header_length(_encode(this_line, charset)) | ||||
|             if length > maxlen: | ||||
|                 # This last character doesn't fit so pop it off. | ||||
|                 current_line.pop() | ||||
|                 # Does nothing fit on the first line? | ||||
|                 if not lines and not current_line: | ||||
|                     lines.append(None) | ||||
|                 else: | ||||
|                     separator = (' ' if lines else '') | ||||
|                     joined_line = EMPTYSTRING.join(current_line) | ||||
|                     header_bytes = _encode(joined_line, codec) | ||||
|                     lines.append(encoder(header_bytes)) | ||||
|                 current_line = [character] | ||||
|                 maxlen = next(maxlengths) - extra | ||||
|         joined_line = EMPTYSTRING.join(current_line) | ||||
|         header_bytes = _encode(joined_line, codec) | ||||
|         lines.append(encoder(header_bytes)) | ||||
|         return lines | ||||
|  | ||||
|     def _get_encoder(self, header_bytes): | ||||
|         if self.header_encoding == BASE64: | ||||
|             return email.base64mime | ||||
|         elif self.header_encoding == QP: | ||||
|             return email.quoprimime | ||||
|         elif self.header_encoding == SHORTEST: | ||||
|             len64 = email.base64mime.header_length(header_bytes) | ||||
|             lenqp = email.quoprimime.header_length(header_bytes) | ||||
|             if len64 < lenqp: | ||||
|                 return email.base64mime | ||||
|             else: | ||||
|                 return email.quoprimime | ||||
|         else: | ||||
|             return None | ||||
|  | ||||
|     def body_encode(self, string): | ||||
|         """Body-encode a string by converting it first to bytes. | ||||
|  | ||||
|         The type of encoding (base64 or quoted-printable) will be based on | ||||
|         self.body_encoding.  If body_encoding is None, we assume the | ||||
|         output charset is a 7bit encoding, so re-encoding the decoded | ||||
|         string using the ascii codec produces the correct string version | ||||
|         of the content. | ||||
|         """ | ||||
|         if not string: | ||||
|             return string | ||||
|         if self.body_encoding is BASE64: | ||||
|             if isinstance(string, str): | ||||
|                 string = string.encode(self.output_charset) | ||||
|             return email.base64mime.body_encode(string) | ||||
|         elif self.body_encoding is QP: | ||||
|             # quopromime.body_encode takes a string, but operates on it as if | ||||
|             # it were a list of byte codes.  For a (minimal) history on why | ||||
|             # this is so, see changeset 0cf700464177.  To correctly encode a | ||||
|             # character set, then, we must turn it into pseudo bytes via the | ||||
|             # latin1 charset, which will encode any byte as a single code point | ||||
|             # between 0 and 255, which is what body_encode is expecting. | ||||
|             if isinstance(string, str): | ||||
|                 string = string.encode(self.output_charset) | ||||
|             string = string.decode('latin1') | ||||
|             return email.quoprimime.body_encode(string) | ||||
|         else: | ||||
|             if isinstance(string, str): | ||||
|                 string = string.encode(self.output_charset).decode('ascii') | ||||
|             return string | ||||
| @ -0,0 +1,90 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Encodings and related functions.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import str | ||||
|  | ||||
| __all__ = [ | ||||
|     'encode_7or8bit', | ||||
|     'encode_base64', | ||||
|     'encode_noop', | ||||
|     'encode_quopri', | ||||
|     ] | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from base64 import encodebytes as _bencode | ||||
| except ImportError: | ||||
|     # Py2 compatibility. TODO: test this! | ||||
|     from base64 import encodestring as _bencode | ||||
| from quopri import encodestring as _encodestring | ||||
|  | ||||
|  | ||||
| def _qencode(s): | ||||
|     enc = _encodestring(s, quotetabs=True) | ||||
|     # Must encode spaces, which quopri.encodestring() doesn't do | ||||
|     return enc.replace(' ', '=20') | ||||
|  | ||||
|  | ||||
| def encode_base64(msg): | ||||
|     """Encode the message's payload in Base64. | ||||
|  | ||||
|     Also, add an appropriate Content-Transfer-Encoding header. | ||||
|     """ | ||||
|     orig = msg.get_payload() | ||||
|     encdata = str(_bencode(orig), 'ascii') | ||||
|     msg.set_payload(encdata) | ||||
|     msg['Content-Transfer-Encoding'] = 'base64' | ||||
|  | ||||
|  | ||||
| def encode_quopri(msg): | ||||
|     """Encode the message's payload in quoted-printable. | ||||
|  | ||||
|     Also, add an appropriate Content-Transfer-Encoding header. | ||||
|     """ | ||||
|     orig = msg.get_payload() | ||||
|     encdata = _qencode(orig) | ||||
|     msg.set_payload(encdata) | ||||
|     msg['Content-Transfer-Encoding'] = 'quoted-printable' | ||||
|  | ||||
|  | ||||
| def encode_7or8bit(msg): | ||||
|     """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" | ||||
|     orig = msg.get_payload() | ||||
|     if orig is None: | ||||
|         # There's no payload.  For backwards compatibility we use 7bit | ||||
|         msg['Content-Transfer-Encoding'] = '7bit' | ||||
|         return | ||||
|     # We play a trick to make this go fast.  If encoding/decode to ASCII | ||||
|     # succeeds, we know the data must be 7bit, otherwise treat it as 8bit. | ||||
|     try: | ||||
|         if isinstance(orig, str): | ||||
|             orig.encode('ascii') | ||||
|         else: | ||||
|             orig.decode('ascii') | ||||
|     except UnicodeError: | ||||
|         charset = msg.get_charset() | ||||
|         output_cset = charset and charset.output_charset | ||||
|         # iso-2022-* is non-ASCII but encodes to a 7-bit representation | ||||
|         if output_cset and output_cset.lower().startswith('iso-2022-'): | ||||
|             msg['Content-Transfer-Encoding'] = '7bit' | ||||
|         else: | ||||
|             msg['Content-Transfer-Encoding'] = '8bit' | ||||
|     else: | ||||
|         msg['Content-Transfer-Encoding'] = '7bit' | ||||
|     if not isinstance(orig, str): | ||||
|         msg.set_payload(orig.decode('ascii', 'surrogateescape')) | ||||
|  | ||||
|  | ||||
| def encode_noop(msg): | ||||
|     """Do nothing.""" | ||||
|     # Well, not quite *nothing*: in Python3 we have to turn bytes into a string | ||||
|     # in our internal surrogateescaped form in order to keep the model | ||||
|     # consistent. | ||||
|     orig = msg.get_payload() | ||||
|     if not isinstance(orig, str): | ||||
|         msg.set_payload(orig.decode('ascii', 'surrogateescape')) | ||||
| @ -0,0 +1,111 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """email package exception classes.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import super | ||||
|  | ||||
|  | ||||
| class MessageError(Exception): | ||||
|     """Base class for errors in the email package.""" | ||||
|  | ||||
|  | ||||
| class MessageParseError(MessageError): | ||||
|     """Base class for message parsing errors.""" | ||||
|  | ||||
|  | ||||
| class HeaderParseError(MessageParseError): | ||||
|     """Error while parsing headers.""" | ||||
|  | ||||
|  | ||||
| class BoundaryError(MessageParseError): | ||||
|     """Couldn't find terminating boundary.""" | ||||
|  | ||||
|  | ||||
| class MultipartConversionError(MessageError, TypeError): | ||||
|     """Conversion to a multipart is prohibited.""" | ||||
|  | ||||
|  | ||||
| class CharsetError(MessageError): | ||||
|     """An illegal charset was given.""" | ||||
|  | ||||
|  | ||||
| # These are parsing defects which the parser was able to work around. | ||||
| class MessageDefect(ValueError): | ||||
|     """Base class for a message defect.""" | ||||
|  | ||||
|     def __init__(self, line=None): | ||||
|         if line is not None: | ||||
|             super().__init__(line) | ||||
|         self.line = line | ||||
|  | ||||
| class NoBoundaryInMultipartDefect(MessageDefect): | ||||
|     """A message claimed to be a multipart but had no boundary parameter.""" | ||||
|  | ||||
| class StartBoundaryNotFoundDefect(MessageDefect): | ||||
|     """The claimed start boundary was never found.""" | ||||
|  | ||||
| class CloseBoundaryNotFoundDefect(MessageDefect): | ||||
|     """A start boundary was found, but not the corresponding close boundary.""" | ||||
|  | ||||
| class FirstHeaderLineIsContinuationDefect(MessageDefect): | ||||
|     """A message had a continuation line as its first header line.""" | ||||
|  | ||||
| class MisplacedEnvelopeHeaderDefect(MessageDefect): | ||||
|     """A 'Unix-from' header was found in the middle of a header block.""" | ||||
|  | ||||
| class MissingHeaderBodySeparatorDefect(MessageDefect): | ||||
|     """Found line with no leading whitespace and no colon before blank line.""" | ||||
| # XXX: backward compatibility, just in case (it was never emitted). | ||||
| MalformedHeaderDefect = MissingHeaderBodySeparatorDefect | ||||
|  | ||||
| class MultipartInvariantViolationDefect(MessageDefect): | ||||
|     """A message claimed to be a multipart but no subparts were found.""" | ||||
|  | ||||
| class InvalidMultipartContentTransferEncodingDefect(MessageDefect): | ||||
|     """An invalid content transfer encoding was set on the multipart itself.""" | ||||
|  | ||||
| class UndecodableBytesDefect(MessageDefect): | ||||
|     """Header contained bytes that could not be decoded""" | ||||
|  | ||||
| class InvalidBase64PaddingDefect(MessageDefect): | ||||
|     """base64 encoded sequence had an incorrect length""" | ||||
|  | ||||
| class InvalidBase64CharactersDefect(MessageDefect): | ||||
|     """base64 encoded sequence had characters not in base64 alphabet""" | ||||
|  | ||||
| # These errors are specific to header parsing. | ||||
|  | ||||
| class HeaderDefect(MessageDefect): | ||||
|     """Base class for a header defect.""" | ||||
|  | ||||
|     def __init__(self, *args, **kw): | ||||
|         super().__init__(*args, **kw) | ||||
|  | ||||
| class InvalidHeaderDefect(HeaderDefect): | ||||
|     """Header is not valid, message gives details.""" | ||||
|  | ||||
| class HeaderMissingRequiredValue(HeaderDefect): | ||||
|     """A header that must have a value had none""" | ||||
|  | ||||
| class NonPrintableDefect(HeaderDefect): | ||||
|     """ASCII characters outside the ascii-printable range found""" | ||||
|  | ||||
|     def __init__(self, non_printables): | ||||
|         super().__init__(non_printables) | ||||
|         self.non_printables = non_printables | ||||
|  | ||||
|     def __str__(self): | ||||
|         return ("the following ASCII non-printables found in header: " | ||||
|             "{}".format(self.non_printables)) | ||||
|  | ||||
| class ObsoleteHeaderDefect(HeaderDefect): | ||||
|     """Header uses syntax declared obsolete by RFC 5322""" | ||||
|  | ||||
| class NonASCIILocalPartDefect(HeaderDefect): | ||||
|     """local_part contains non-ASCII characters""" | ||||
|     # This defect only occurs during unicode parsing, not when | ||||
|     # parsing messages decoded from binary. | ||||
| @ -0,0 +1,525 @@ | ||||
| # Copyright (C) 2004-2006 Python Software Foundation | ||||
| # Authors: Baxter, Wouters and Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """FeedParser - An email feed parser. | ||||
|  | ||||
| The feed parser implements an interface for incrementally parsing an email | ||||
| message, line by line.  This has advantages for certain applications, such as | ||||
| those reading email messages off a socket. | ||||
|  | ||||
| FeedParser.feed() is the primary interface for pushing new data into the | ||||
| parser.  It returns when there's nothing more it can do with the available | ||||
| data.  When you have no more data to push into the parser, call .close(). | ||||
| This completes the parsing and returns the root message object. | ||||
|  | ||||
| The other advantage of this parser is that it will never raise a parsing | ||||
| exception.  Instead, when it finds something unexpected, it adds a 'defect' to | ||||
| the current message.  Defects are just instances that live on the message | ||||
| object's .defects attribute. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import object, range, super | ||||
| from future.utils import implements_iterator, PY3 | ||||
|  | ||||
| __all__ = ['FeedParser', 'BytesFeedParser'] | ||||
|  | ||||
| import re | ||||
|  | ||||
| from future.backports.email import errors | ||||
| from future.backports.email import message | ||||
| from future.backports.email._policybase import compat32 | ||||
|  | ||||
| NLCRE = re.compile('\r\n|\r|\n') | ||||
| NLCRE_bol = re.compile('(\r\n|\r|\n)') | ||||
| NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') | ||||
| NLCRE_crack = re.compile('(\r\n|\r|\n)') | ||||
| # RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character | ||||
| # except controls, SP, and ":". | ||||
| headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') | ||||
| EMPTYSTRING = '' | ||||
| NL = '\n' | ||||
|  | ||||
| NeedMoreData = object() | ||||
|  | ||||
|  | ||||
| # @implements_iterator | ||||
| class BufferedSubFile(object): | ||||
|     """A file-ish object that can have new data loaded into it. | ||||
|  | ||||
|     You can also push and pop line-matching predicates onto a stack.  When the | ||||
|     current predicate matches the current line, a false EOF response | ||||
|     (i.e. empty string) is returned instead.  This lets the parser adhere to a | ||||
|     simple abstraction -- it parses until EOF closes the current message. | ||||
|     """ | ||||
|     def __init__(self): | ||||
|         # The last partial line pushed into this object. | ||||
|         self._partial = '' | ||||
|         # The list of full, pushed lines, in reverse order | ||||
|         self._lines = [] | ||||
|         # The stack of false-EOF checking predicates. | ||||
|         self._eofstack = [] | ||||
|         # A flag indicating whether the file has been closed or not. | ||||
|         self._closed = False | ||||
|  | ||||
|     def push_eof_matcher(self, pred): | ||||
|         self._eofstack.append(pred) | ||||
|  | ||||
|     def pop_eof_matcher(self): | ||||
|         return self._eofstack.pop() | ||||
|  | ||||
|     def close(self): | ||||
|         # Don't forget any trailing partial line. | ||||
|         self._lines.append(self._partial) | ||||
|         self._partial = '' | ||||
|         self._closed = True | ||||
|  | ||||
|     def readline(self): | ||||
|         if not self._lines: | ||||
|             if self._closed: | ||||
|                 return '' | ||||
|             return NeedMoreData | ||||
|         # Pop the line off the stack and see if it matches the current | ||||
|         # false-EOF predicate. | ||||
|         line = self._lines.pop() | ||||
|         # RFC 2046, section 5.1.2 requires us to recognize outer level | ||||
|         # boundaries at any level of inner nesting.  Do this, but be sure it's | ||||
|         # in the order of most to least nested. | ||||
|         for ateof in self._eofstack[::-1]: | ||||
|             if ateof(line): | ||||
|                 # We're at the false EOF.  But push the last line back first. | ||||
|                 self._lines.append(line) | ||||
|                 return '' | ||||
|         return line | ||||
|  | ||||
|     def unreadline(self, line): | ||||
|         # Let the consumer push a line back into the buffer. | ||||
|         assert line is not NeedMoreData | ||||
|         self._lines.append(line) | ||||
|  | ||||
|     def push(self, data): | ||||
|         """Push some new data into this object.""" | ||||
|         # Handle any previous leftovers | ||||
|         data, self._partial = self._partial + data, '' | ||||
|         # Crack into lines, but preserve the newlines on the end of each | ||||
|         parts = NLCRE_crack.split(data) | ||||
|         # The *ahem* interesting behaviour of re.split when supplied grouping | ||||
|         # parentheses is that the last element of the resulting list is the | ||||
|         # data after the final RE.  In the case of a NL/CR terminated string, | ||||
|         # this is the empty string. | ||||
|         self._partial = parts.pop() | ||||
|         #GAN 29Mar09  bugs 1555570, 1721862  Confusion at 8K boundary ending with \r: | ||||
|         # is there a \n to follow later? | ||||
|         if not self._partial and parts and parts[-1].endswith('\r'): | ||||
|             self._partial = parts.pop(-2)+parts.pop() | ||||
|         # parts is a list of strings, alternating between the line contents | ||||
|         # and the eol character(s).  Gather up a list of lines after | ||||
|         # re-attaching the newlines. | ||||
|         lines = [] | ||||
|         for i in range(len(parts) // 2): | ||||
|             lines.append(parts[i*2] + parts[i*2+1]) | ||||
|         self.pushlines(lines) | ||||
|  | ||||
|     def pushlines(self, lines): | ||||
|         # Reverse and insert at the front of the lines. | ||||
|         self._lines[:0] = lines[::-1] | ||||
|  | ||||
|     def __iter__(self): | ||||
|         return self | ||||
|  | ||||
|     def __next__(self): | ||||
|         line = self.readline() | ||||
|         if line == '': | ||||
|             raise StopIteration | ||||
|         return line | ||||
|  | ||||
|  | ||||
| class FeedParser(object): | ||||
|     """A feed-style parser of email.""" | ||||
|  | ||||
|     def __init__(self, _factory=message.Message, **_3to2kwargs): | ||||
|         if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] | ||||
|         else: policy = compat32 | ||||
|         """_factory is called with no arguments to create a new message obj | ||||
|  | ||||
|         The policy keyword specifies a policy object that controls a number of | ||||
|         aspects of the parser's operation.  The default policy maintains | ||||
|         backward compatibility. | ||||
|  | ||||
|         """ | ||||
|         self._factory = _factory | ||||
|         self.policy = policy | ||||
|         try: | ||||
|             _factory(policy=self.policy) | ||||
|             self._factory_kwds = lambda: {'policy': self.policy} | ||||
|         except TypeError: | ||||
|             # Assume this is an old-style factory | ||||
|             self._factory_kwds = lambda: {} | ||||
|         self._input = BufferedSubFile() | ||||
|         self._msgstack = [] | ||||
|         if PY3: | ||||
|             self._parse = self._parsegen().__next__ | ||||
|         else: | ||||
|             self._parse = self._parsegen().next | ||||
|         self._cur = None | ||||
|         self._last = None | ||||
|         self._headersonly = False | ||||
|  | ||||
|     # Non-public interface for supporting Parser's headersonly flag | ||||
|     def _set_headersonly(self): | ||||
|         self._headersonly = True | ||||
|  | ||||
|     def feed(self, data): | ||||
|         """Push more data into the parser.""" | ||||
|         self._input.push(data) | ||||
|         self._call_parse() | ||||
|  | ||||
|     def _call_parse(self): | ||||
|         try: | ||||
|             self._parse() | ||||
|         except StopIteration: | ||||
|             pass | ||||
|  | ||||
|     def close(self): | ||||
|         """Parse all remaining data and return the root message object.""" | ||||
|         self._input.close() | ||||
|         self._call_parse() | ||||
|         root = self._pop_message() | ||||
|         assert not self._msgstack | ||||
|         # Look for final set of defects | ||||
|         if root.get_content_maintype() == 'multipart' \ | ||||
|                and not root.is_multipart(): | ||||
|             defect = errors.MultipartInvariantViolationDefect() | ||||
|             self.policy.handle_defect(root, defect) | ||||
|         return root | ||||
|  | ||||
|     def _new_message(self): | ||||
|         msg = self._factory(**self._factory_kwds()) | ||||
|         if self._cur and self._cur.get_content_type() == 'multipart/digest': | ||||
|             msg.set_default_type('message/rfc822') | ||||
|         if self._msgstack: | ||||
|             self._msgstack[-1].attach(msg) | ||||
|         self._msgstack.append(msg) | ||||
|         self._cur = msg | ||||
|         self._last = msg | ||||
|  | ||||
|     def _pop_message(self): | ||||
|         retval = self._msgstack.pop() | ||||
|         if self._msgstack: | ||||
|             self._cur = self._msgstack[-1] | ||||
|         else: | ||||
|             self._cur = None | ||||
|         return retval | ||||
|  | ||||
|     def _parsegen(self): | ||||
|         # Create a new message and start by parsing headers. | ||||
|         self._new_message() | ||||
|         headers = [] | ||||
|         # Collect the headers, searching for a line that doesn't match the RFC | ||||
|         # 2822 header or continuation pattern (including an empty line). | ||||
|         for line in self._input: | ||||
|             if line is NeedMoreData: | ||||
|                 yield NeedMoreData | ||||
|                 continue | ||||
|             if not headerRE.match(line): | ||||
|                 # If we saw the RFC defined header/body separator | ||||
|                 # (i.e. newline), just throw it away. Otherwise the line is | ||||
|                 # part of the body so push it back. | ||||
|                 if not NLCRE.match(line): | ||||
|                     defect = errors.MissingHeaderBodySeparatorDefect() | ||||
|                     self.policy.handle_defect(self._cur, defect) | ||||
|                     self._input.unreadline(line) | ||||
|                 break | ||||
|             headers.append(line) | ||||
|         # Done with the headers, so parse them and figure out what we're | ||||
|         # supposed to see in the body of the message. | ||||
|         self._parse_headers(headers) | ||||
|         # Headers-only parsing is a backwards compatibility hack, which was | ||||
|         # necessary in the older parser, which could raise errors.  All | ||||
|         # remaining lines in the input are thrown into the message body. | ||||
|         if self._headersonly: | ||||
|             lines = [] | ||||
|             while True: | ||||
|                 line = self._input.readline() | ||||
|                 if line is NeedMoreData: | ||||
|                     yield NeedMoreData | ||||
|                     continue | ||||
|                 if line == '': | ||||
|                     break | ||||
|                 lines.append(line) | ||||
|             self._cur.set_payload(EMPTYSTRING.join(lines)) | ||||
|             return | ||||
|         if self._cur.get_content_type() == 'message/delivery-status': | ||||
|             # message/delivery-status contains blocks of headers separated by | ||||
|             # a blank line.  We'll represent each header block as a separate | ||||
|             # nested message object, but the processing is a bit different | ||||
|             # than standard message/* types because there is no body for the | ||||
|             # nested messages.  A blank line separates the subparts. | ||||
|             while True: | ||||
|                 self._input.push_eof_matcher(NLCRE.match) | ||||
|                 for retval in self._parsegen(): | ||||
|                     if retval is NeedMoreData: | ||||
|                         yield NeedMoreData | ||||
|                         continue | ||||
|                     break | ||||
|                 msg = self._pop_message() | ||||
|                 # We need to pop the EOF matcher in order to tell if we're at | ||||
|                 # the end of the current file, not the end of the last block | ||||
|                 # of message headers. | ||||
|                 self._input.pop_eof_matcher() | ||||
|                 # The input stream must be sitting at the newline or at the | ||||
|                 # EOF.  We want to see if we're at the end of this subpart, so | ||||
|                 # first consume the blank line, then test the next line to see | ||||
|                 # if we're at this subpart's EOF. | ||||
|                 while True: | ||||
|                     line = self._input.readline() | ||||
|                     if line is NeedMoreData: | ||||
|                         yield NeedMoreData | ||||
|                         continue | ||||
|                     break | ||||
|                 while True: | ||||
|                     line = self._input.readline() | ||||
|                     if line is NeedMoreData: | ||||
|                         yield NeedMoreData | ||||
|                         continue | ||||
|                     break | ||||
|                 if line == '': | ||||
|                     break | ||||
|                 # Not at EOF so this is a line we're going to need. | ||||
|                 self._input.unreadline(line) | ||||
|             return | ||||
|         if self._cur.get_content_maintype() == 'message': | ||||
|             # The message claims to be a message/* type, then what follows is | ||||
|             # another RFC 2822 message. | ||||
|             for retval in self._parsegen(): | ||||
|                 if retval is NeedMoreData: | ||||
|                     yield NeedMoreData | ||||
|                     continue | ||||
|                 break | ||||
|             self._pop_message() | ||||
|             return | ||||
|         if self._cur.get_content_maintype() == 'multipart': | ||||
|             boundary = self._cur.get_boundary() | ||||
|             if boundary is None: | ||||
|                 # The message /claims/ to be a multipart but it has not | ||||
|                 # defined a boundary.  That's a problem which we'll handle by | ||||
|                 # reading everything until the EOF and marking the message as | ||||
|                 # defective. | ||||
|                 defect = errors.NoBoundaryInMultipartDefect() | ||||
|                 self.policy.handle_defect(self._cur, defect) | ||||
|                 lines = [] | ||||
|                 for line in self._input: | ||||
|                     if line is NeedMoreData: | ||||
|                         yield NeedMoreData | ||||
|                         continue | ||||
|                     lines.append(line) | ||||
|                 self._cur.set_payload(EMPTYSTRING.join(lines)) | ||||
|                 return | ||||
|             # Make sure a valid content type was specified per RFC 2045:6.4. | ||||
|             if (self._cur.get('content-transfer-encoding', '8bit').lower() | ||||
|                     not in ('7bit', '8bit', 'binary')): | ||||
|                 defect = errors.InvalidMultipartContentTransferEncodingDefect() | ||||
|                 self.policy.handle_defect(self._cur, defect) | ||||
|             # Create a line match predicate which matches the inter-part | ||||
|             # boundary as well as the end-of-multipart boundary.  Don't push | ||||
|             # this onto the input stream until we've scanned past the | ||||
|             # preamble. | ||||
|             separator = '--' + boundary | ||||
|             boundaryre = re.compile( | ||||
|                 '(?P<sep>' + re.escape(separator) + | ||||
|                 r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') | ||||
|             capturing_preamble = True | ||||
|             preamble = [] | ||||
|             linesep = False | ||||
|             close_boundary_seen = False | ||||
|             while True: | ||||
|                 line = self._input.readline() | ||||
|                 if line is NeedMoreData: | ||||
|                     yield NeedMoreData | ||||
|                     continue | ||||
|                 if line == '': | ||||
|                     break | ||||
|                 mo = boundaryre.match(line) | ||||
|                 if mo: | ||||
|                     # If we're looking at the end boundary, we're done with | ||||
|                     # this multipart.  If there was a newline at the end of | ||||
|                     # the closing boundary, then we need to initialize the | ||||
|                     # epilogue with the empty string (see below). | ||||
|                     if mo.group('end'): | ||||
|                         close_boundary_seen = True | ||||
|                         linesep = mo.group('linesep') | ||||
|                         break | ||||
|                     # We saw an inter-part boundary.  Were we in the preamble? | ||||
|                     if capturing_preamble: | ||||
|                         if preamble: | ||||
|                             # According to RFC 2046, the last newline belongs | ||||
|                             # to the boundary. | ||||
|                             lastline = preamble[-1] | ||||
|                             eolmo = NLCRE_eol.search(lastline) | ||||
|                             if eolmo: | ||||
|                                 preamble[-1] = lastline[:-len(eolmo.group(0))] | ||||
|                             self._cur.preamble = EMPTYSTRING.join(preamble) | ||||
|                         capturing_preamble = False | ||||
|                         self._input.unreadline(line) | ||||
|                         continue | ||||
|                     # We saw a boundary separating two parts.  Consume any | ||||
|                     # multiple boundary lines that may be following.  Our | ||||
|                     # interpretation of RFC 2046 BNF grammar does not produce | ||||
|                     # body parts within such double boundaries. | ||||
|                     while True: | ||||
|                         line = self._input.readline() | ||||
|                         if line is NeedMoreData: | ||||
|                             yield NeedMoreData | ||||
|                             continue | ||||
|                         mo = boundaryre.match(line) | ||||
|                         if not mo: | ||||
|                             self._input.unreadline(line) | ||||
|                             break | ||||
|                     # Recurse to parse this subpart; the input stream points | ||||
|                     # at the subpart's first line. | ||||
|                     self._input.push_eof_matcher(boundaryre.match) | ||||
|                     for retval in self._parsegen(): | ||||
|                         if retval is NeedMoreData: | ||||
|                             yield NeedMoreData | ||||
|                             continue | ||||
|                         break | ||||
|                     # Because of RFC 2046, the newline preceding the boundary | ||||
|                     # separator actually belongs to the boundary, not the | ||||
|                     # previous subpart's payload (or epilogue if the previous | ||||
|                     # part is a multipart). | ||||
|                     if self._last.get_content_maintype() == 'multipart': | ||||
|                         epilogue = self._last.epilogue | ||||
|                         if epilogue == '': | ||||
|                             self._last.epilogue = None | ||||
|                         elif epilogue is not None: | ||||
|                             mo = NLCRE_eol.search(epilogue) | ||||
|                             if mo: | ||||
|                                 end = len(mo.group(0)) | ||||
|                                 self._last.epilogue = epilogue[:-end] | ||||
|                     else: | ||||
|                         payload = self._last._payload | ||||
|                         if isinstance(payload, str): | ||||
|                             mo = NLCRE_eol.search(payload) | ||||
|                             if mo: | ||||
|                                 payload = payload[:-len(mo.group(0))] | ||||
|                                 self._last._payload = payload | ||||
|                     self._input.pop_eof_matcher() | ||||
|                     self._pop_message() | ||||
|                     # Set the multipart up for newline cleansing, which will | ||||
|                     # happen if we're in a nested multipart. | ||||
|                     self._last = self._cur | ||||
|                 else: | ||||
|                     # I think we must be in the preamble | ||||
|                     assert capturing_preamble | ||||
|                     preamble.append(line) | ||||
|             # We've seen either the EOF or the end boundary.  If we're still | ||||
|             # capturing the preamble, we never saw the start boundary.  Note | ||||
|             # that as a defect and store the captured text as the payload. | ||||
|             if capturing_preamble: | ||||
|                 defect = errors.StartBoundaryNotFoundDefect() | ||||
|                 self.policy.handle_defect(self._cur, defect) | ||||
|                 self._cur.set_payload(EMPTYSTRING.join(preamble)) | ||||
|                 epilogue = [] | ||||
|                 for line in self._input: | ||||
|                     if line is NeedMoreData: | ||||
|                         yield NeedMoreData | ||||
|                         continue | ||||
|                 self._cur.epilogue = EMPTYSTRING.join(epilogue) | ||||
|                 return | ||||
|             # If we're not processing the preamble, then we might have seen | ||||
|             # EOF without seeing that end boundary...that is also a defect. | ||||
|             if not close_boundary_seen: | ||||
|                 defect = errors.CloseBoundaryNotFoundDefect() | ||||
|                 self.policy.handle_defect(self._cur, defect) | ||||
|                 return | ||||
|             # Everything from here to the EOF is epilogue.  If the end boundary | ||||
|             # ended in a newline, we'll need to make sure the epilogue isn't | ||||
|             # None | ||||
|             if linesep: | ||||
|                 epilogue = [''] | ||||
|             else: | ||||
|                 epilogue = [] | ||||
|             for line in self._input: | ||||
|                 if line is NeedMoreData: | ||||
|                     yield NeedMoreData | ||||
|                     continue | ||||
|                 epilogue.append(line) | ||||
|             # Any CRLF at the front of the epilogue is not technically part of | ||||
|             # the epilogue.  Also, watch out for an empty string epilogue, | ||||
|             # which means a single newline. | ||||
|             if epilogue: | ||||
|                 firstline = epilogue[0] | ||||
|                 bolmo = NLCRE_bol.match(firstline) | ||||
|                 if bolmo: | ||||
|                     epilogue[0] = firstline[len(bolmo.group(0)):] | ||||
|             self._cur.epilogue = EMPTYSTRING.join(epilogue) | ||||
|             return | ||||
|         # Otherwise, it's some non-multipart type, so the entire rest of the | ||||
|         # file contents becomes the payload. | ||||
|         lines = [] | ||||
|         for line in self._input: | ||||
|             if line is NeedMoreData: | ||||
|                 yield NeedMoreData | ||||
|                 continue | ||||
|             lines.append(line) | ||||
|         self._cur.set_payload(EMPTYSTRING.join(lines)) | ||||
|  | ||||
|     def _parse_headers(self, lines): | ||||
|         # Passed a list of lines that make up the headers for the current msg | ||||
|         lastheader = '' | ||||
|         lastvalue = [] | ||||
|         for lineno, line in enumerate(lines): | ||||
|             # Check for continuation | ||||
|             if line[0] in ' \t': | ||||
|                 if not lastheader: | ||||
|                     # The first line of the headers was a continuation.  This | ||||
|                     # is illegal, so let's note the defect, store the illegal | ||||
|                     # line, and ignore it for purposes of headers. | ||||
|                     defect = errors.FirstHeaderLineIsContinuationDefect(line) | ||||
|                     self.policy.handle_defect(self._cur, defect) | ||||
|                     continue | ||||
|                 lastvalue.append(line) | ||||
|                 continue | ||||
|             if lastheader: | ||||
|                 self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) | ||||
|                 lastheader, lastvalue = '', [] | ||||
|             # Check for envelope header, i.e. unix-from | ||||
|             if line.startswith('From '): | ||||
|                 if lineno == 0: | ||||
|                     # Strip off the trailing newline | ||||
|                     mo = NLCRE_eol.search(line) | ||||
|                     if mo: | ||||
|                         line = line[:-len(mo.group(0))] | ||||
|                     self._cur.set_unixfrom(line) | ||||
|                     continue | ||||
|                 elif lineno == len(lines) - 1: | ||||
|                     # Something looking like a unix-from at the end - it's | ||||
|                     # probably the first line of the body, so push back the | ||||
|                     # line and stop. | ||||
|                     self._input.unreadline(line) | ||||
|                     return | ||||
|                 else: | ||||
|                     # Weirdly placed unix-from line.  Note this as a defect | ||||
|                     # and ignore it. | ||||
|                     defect = errors.MisplacedEnvelopeHeaderDefect(line) | ||||
|                     self._cur.defects.append(defect) | ||||
|                     continue | ||||
|             # Split the line on the colon separating field name from value. | ||||
|             # There will always be a colon, because if there wasn't the part of | ||||
|             # the parser that calls us would have started parsing the body. | ||||
|             i = line.find(':') | ||||
|             assert i>0, "_parse_headers fed line with no : and no leading WS" | ||||
|             lastheader = line[:i] | ||||
|             lastvalue = [line] | ||||
|         # Done with all the lines, so handle the last header. | ||||
|         if lastheader: | ||||
|             self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) | ||||
|  | ||||
|  | ||||
| class BytesFeedParser(FeedParser): | ||||
|     """Like FeedParser, but feed accepts bytes.""" | ||||
|  | ||||
|     def feed(self, data): | ||||
|         super().feed(data.decode('ascii', 'surrogateescape')) | ||||
| @ -0,0 +1,498 @@ | ||||
| # Copyright (C) 2001-2010 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Classes to generate plain text from a message object tree.""" | ||||
| from __future__ import print_function | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import super | ||||
| from future.builtins import str | ||||
|  | ||||
| __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] | ||||
|  | ||||
| import re | ||||
| import sys | ||||
| import time | ||||
| import random | ||||
| import warnings | ||||
|  | ||||
| from io import StringIO, BytesIO | ||||
| from future.backports.email._policybase import compat32 | ||||
| from future.backports.email.header import Header | ||||
| from future.backports.email.utils import _has_surrogates | ||||
| import future.backports.email.charset as _charset | ||||
|  | ||||
| UNDERSCORE = '_' | ||||
| NL = '\n'  # XXX: no longer used by the code below. | ||||
|  | ||||
| fcre = re.compile(r'^From ', re.MULTILINE) | ||||
|  | ||||
|  | ||||
| class Generator(object): | ||||
|     """Generates output from a Message object tree. | ||||
|  | ||||
|     This basic generator writes the message to the given file object as plain | ||||
|     text. | ||||
|     """ | ||||
|     # | ||||
|     # Public interface | ||||
|     # | ||||
|  | ||||
|     def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs): | ||||
|         if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] | ||||
|         else: policy = None | ||||
|         """Create the generator for message flattening. | ||||
|  | ||||
|         outfp is the output file-like object for writing the message to.  It | ||||
|         must have a write() method. | ||||
|  | ||||
|         Optional mangle_from_ is a flag that, when True (the default), escapes | ||||
|         From_ lines in the body of the message by putting a `>' in front of | ||||
|         them. | ||||
|  | ||||
|         Optional maxheaderlen specifies the longest length for a non-continued | ||||
|         header.  When a header line is longer (in characters, with tabs | ||||
|         expanded to 8 spaces) than maxheaderlen, the header will split as | ||||
|         defined in the Header class.  Set maxheaderlen to zero to disable | ||||
|         header wrapping.  The default is 78, as recommended (but not required) | ||||
|         by RFC 2822. | ||||
|  | ||||
|         The policy keyword specifies a policy object that controls a number of | ||||
|         aspects of the generator's operation.  The default policy maintains | ||||
|         backward compatibility. | ||||
|  | ||||
|         """ | ||||
|         self._fp = outfp | ||||
|         self._mangle_from_ = mangle_from_ | ||||
|         self.maxheaderlen = maxheaderlen | ||||
|         self.policy = policy | ||||
|  | ||||
|     def write(self, s): | ||||
|         # Just delegate to the file object | ||||
|         self._fp.write(s) | ||||
|  | ||||
|     def flatten(self, msg, unixfrom=False, linesep=None): | ||||
|         r"""Print the message object tree rooted at msg to the output file | ||||
|         specified when the Generator instance was created. | ||||
|  | ||||
|         unixfrom is a flag that forces the printing of a Unix From_ delimiter | ||||
|         before the first object in the message tree.  If the original message | ||||
|         has no From_ delimiter, a `standard' one is crafted.  By default, this | ||||
|         is False to inhibit the printing of any From_ delimiter. | ||||
|  | ||||
|         Note that for subobjects, no From_ line is printed. | ||||
|  | ||||
|         linesep specifies the characters used to indicate a new line in | ||||
|         the output.  The default value is determined by the policy. | ||||
|  | ||||
|         """ | ||||
|         # We use the _XXX constants for operating on data that comes directly | ||||
|         # from the msg, and _encoded_XXX constants for operating on data that | ||||
|         # has already been converted (to bytes in the BytesGenerator) and | ||||
|         # inserted into a temporary buffer. | ||||
|         policy = msg.policy if self.policy is None else self.policy | ||||
|         if linesep is not None: | ||||
|             policy = policy.clone(linesep=linesep) | ||||
|         if self.maxheaderlen is not None: | ||||
|             policy = policy.clone(max_line_length=self.maxheaderlen) | ||||
|         self._NL = policy.linesep | ||||
|         self._encoded_NL = self._encode(self._NL) | ||||
|         self._EMPTY = '' | ||||
|         self._encoded_EMTPY = self._encode('') | ||||
|         # Because we use clone (below) when we recursively process message | ||||
|         # subparts, and because clone uses the computed policy (not None), | ||||
|         # submessages will automatically get set to the computed policy when | ||||
|         # they are processed by this code. | ||||
|         old_gen_policy = self.policy | ||||
|         old_msg_policy = msg.policy | ||||
|         try: | ||||
|             self.policy = policy | ||||
|             msg.policy = policy | ||||
|             if unixfrom: | ||||
|                 ufrom = msg.get_unixfrom() | ||||
|                 if not ufrom: | ||||
|                     ufrom = 'From nobody ' + time.ctime(time.time()) | ||||
|                 self.write(ufrom + self._NL) | ||||
|             self._write(msg) | ||||
|         finally: | ||||
|             self.policy = old_gen_policy | ||||
|             msg.policy = old_msg_policy | ||||
|  | ||||
|     def clone(self, fp): | ||||
|         """Clone this generator with the exact same options.""" | ||||
|         return self.__class__(fp, | ||||
|                               self._mangle_from_, | ||||
|                               None, # Use policy setting, which we've adjusted | ||||
|                               policy=self.policy) | ||||
|  | ||||
|     # | ||||
|     # Protected interface - undocumented ;/ | ||||
|     # | ||||
|  | ||||
|     # Note that we use 'self.write' when what we are writing is coming from | ||||
|     # the source, and self._fp.write when what we are writing is coming from a | ||||
|     # buffer (because the Bytes subclass has already had a chance to transform | ||||
|     # the data in its write method in that case).  This is an entirely | ||||
|     # pragmatic split determined by experiment; we could be more general by | ||||
|     # always using write and having the Bytes subclass write method detect when | ||||
|     # it has already transformed the input; but, since this whole thing is a | ||||
|     # hack anyway this seems good enough. | ||||
|  | ||||
|     # Similarly, we have _XXX and _encoded_XXX attributes that are used on | ||||
|     # source and buffer data, respectively. | ||||
|     _encoded_EMPTY = '' | ||||
|  | ||||
|     def _new_buffer(self): | ||||
|         # BytesGenerator overrides this to return BytesIO. | ||||
|         return StringIO() | ||||
|  | ||||
|     def _encode(self, s): | ||||
|         # BytesGenerator overrides this to encode strings to bytes. | ||||
|         return s | ||||
|  | ||||
|     def _write_lines(self, lines): | ||||
|         # We have to transform the line endings. | ||||
|         if not lines: | ||||
|             return | ||||
|         lines = lines.splitlines(True) | ||||
|         for line in lines[:-1]: | ||||
|             self.write(line.rstrip('\r\n')) | ||||
|             self.write(self._NL) | ||||
|         laststripped = lines[-1].rstrip('\r\n') | ||||
|         self.write(laststripped) | ||||
|         if len(lines[-1]) != len(laststripped): | ||||
|             self.write(self._NL) | ||||
|  | ||||
|     def _write(self, msg): | ||||
|         # We can't write the headers yet because of the following scenario: | ||||
|         # say a multipart message includes the boundary string somewhere in | ||||
|         # its body.  We'd have to calculate the new boundary /before/ we write | ||||
|         # the headers so that we can write the correct Content-Type: | ||||
|         # parameter. | ||||
|         # | ||||
|         # The way we do this, so as to make the _handle_*() methods simpler, | ||||
|         # is to cache any subpart writes into a buffer.  The we write the | ||||
|         # headers and the buffer contents.  That way, subpart handlers can | ||||
|         # Do The Right Thing, and can still modify the Content-Type: header if | ||||
|         # necessary. | ||||
|         oldfp = self._fp | ||||
|         try: | ||||
|             self._fp = sfp = self._new_buffer() | ||||
|             self._dispatch(msg) | ||||
|         finally: | ||||
|             self._fp = oldfp | ||||
|         # Write the headers.  First we see if the message object wants to | ||||
|         # handle that itself.  If not, we'll do it generically. | ||||
|         meth = getattr(msg, '_write_headers', None) | ||||
|         if meth is None: | ||||
|             self._write_headers(msg) | ||||
|         else: | ||||
|             meth(self) | ||||
|         self._fp.write(sfp.getvalue()) | ||||
|  | ||||
|     def _dispatch(self, msg): | ||||
|         # Get the Content-Type: for the message, then try to dispatch to | ||||
|         # self._handle_<maintype>_<subtype>().  If there's no handler for the | ||||
|         # full MIME type, then dispatch to self._handle_<maintype>().  If | ||||
|         # that's missing too, then dispatch to self._writeBody(). | ||||
|         main = msg.get_content_maintype() | ||||
|         sub = msg.get_content_subtype() | ||||
|         specific = UNDERSCORE.join((main, sub)).replace('-', '_') | ||||
|         meth = getattr(self, '_handle_' + specific, None) | ||||
|         if meth is None: | ||||
|             generic = main.replace('-', '_') | ||||
|             meth = getattr(self, '_handle_' + generic, None) | ||||
|             if meth is None: | ||||
|                 meth = self._writeBody | ||||
|         meth(msg) | ||||
|  | ||||
|     # | ||||
|     # Default handlers | ||||
|     # | ||||
|  | ||||
|     def _write_headers(self, msg): | ||||
|         for h, v in msg.raw_items(): | ||||
|             self.write(self.policy.fold(h, v)) | ||||
|         # A blank line always separates headers from body | ||||
|         self.write(self._NL) | ||||
|  | ||||
|     # | ||||
|     # Handlers for writing types and subtypes | ||||
|     # | ||||
|  | ||||
|     def _handle_text(self, msg): | ||||
|         payload = msg.get_payload() | ||||
|         if payload is None: | ||||
|             return | ||||
|         if not isinstance(payload, str): | ||||
|             raise TypeError('string payload expected: %s' % type(payload)) | ||||
|         if _has_surrogates(msg._payload): | ||||
|             charset = msg.get_param('charset') | ||||
|             if charset is not None: | ||||
|                 del msg['content-transfer-encoding'] | ||||
|                 msg.set_payload(payload, charset) | ||||
|                 payload = msg.get_payload() | ||||
|         if self._mangle_from_: | ||||
|             payload = fcre.sub('>From ', payload) | ||||
|         self._write_lines(payload) | ||||
|  | ||||
|     # Default body handler | ||||
|     _writeBody = _handle_text | ||||
|  | ||||
|     def _handle_multipart(self, msg): | ||||
|         # The trick here is to write out each part separately, merge them all | ||||
|         # together, and then make sure that the boundary we've chosen isn't | ||||
|         # present in the payload. | ||||
|         msgtexts = [] | ||||
|         subparts = msg.get_payload() | ||||
|         if subparts is None: | ||||
|             subparts = [] | ||||
|         elif isinstance(subparts, str): | ||||
|             # e.g. a non-strict parse of a message with no starting boundary. | ||||
|             self.write(subparts) | ||||
|             return | ||||
|         elif not isinstance(subparts, list): | ||||
|             # Scalar payload | ||||
|             subparts = [subparts] | ||||
|         for part in subparts: | ||||
|             s = self._new_buffer() | ||||
|             g = self.clone(s) | ||||
|             g.flatten(part, unixfrom=False, linesep=self._NL) | ||||
|             msgtexts.append(s.getvalue()) | ||||
|         # BAW: What about boundaries that are wrapped in double-quotes? | ||||
|         boundary = msg.get_boundary() | ||||
|         if not boundary: | ||||
|             # Create a boundary that doesn't appear in any of the | ||||
|             # message texts. | ||||
|             alltext = self._encoded_NL.join(msgtexts) | ||||
|             boundary = self._make_boundary(alltext) | ||||
|             msg.set_boundary(boundary) | ||||
|         # If there's a preamble, write it out, with a trailing CRLF | ||||
|         if msg.preamble is not None: | ||||
|             if self._mangle_from_: | ||||
|                 preamble = fcre.sub('>From ', msg.preamble) | ||||
|             else: | ||||
|                 preamble = msg.preamble | ||||
|             self._write_lines(preamble) | ||||
|             self.write(self._NL) | ||||
|         # dash-boundary transport-padding CRLF | ||||
|         self.write('--' + boundary + self._NL) | ||||
|         # body-part | ||||
|         if msgtexts: | ||||
|             self._fp.write(msgtexts.pop(0)) | ||||
|         # *encapsulation | ||||
|         # --> delimiter transport-padding | ||||
|         # --> CRLF body-part | ||||
|         for body_part in msgtexts: | ||||
|             # delimiter transport-padding CRLF | ||||
|             self.write(self._NL + '--' + boundary + self._NL) | ||||
|             # body-part | ||||
|             self._fp.write(body_part) | ||||
|         # close-delimiter transport-padding | ||||
|         self.write(self._NL + '--' + boundary + '--') | ||||
|         if msg.epilogue is not None: | ||||
|             self.write(self._NL) | ||||
|             if self._mangle_from_: | ||||
|                 epilogue = fcre.sub('>From ', msg.epilogue) | ||||
|             else: | ||||
|                 epilogue = msg.epilogue | ||||
|             self._write_lines(epilogue) | ||||
|  | ||||
|     def _handle_multipart_signed(self, msg): | ||||
|         # The contents of signed parts has to stay unmodified in order to keep | ||||
|         # the signature intact per RFC1847 2.1, so we disable header wrapping. | ||||
|         # RDM: This isn't enough to completely preserve the part, but it helps. | ||||
|         p = self.policy | ||||
|         self.policy = p.clone(max_line_length=0) | ||||
|         try: | ||||
|             self._handle_multipart(msg) | ||||
|         finally: | ||||
|             self.policy = p | ||||
|  | ||||
|     def _handle_message_delivery_status(self, msg): | ||||
|         # We can't just write the headers directly to self's file object | ||||
|         # because this will leave an extra newline between the last header | ||||
|         # block and the boundary.  Sigh. | ||||
|         blocks = [] | ||||
|         for part in msg.get_payload(): | ||||
|             s = self._new_buffer() | ||||
|             g = self.clone(s) | ||||
|             g.flatten(part, unixfrom=False, linesep=self._NL) | ||||
|             text = s.getvalue() | ||||
|             lines = text.split(self._encoded_NL) | ||||
|             # Strip off the unnecessary trailing empty line | ||||
|             if lines and lines[-1] == self._encoded_EMPTY: | ||||
|                 blocks.append(self._encoded_NL.join(lines[:-1])) | ||||
|             else: | ||||
|                 blocks.append(text) | ||||
|         # Now join all the blocks with an empty line.  This has the lovely | ||||
|         # effect of separating each block with an empty line, but not adding | ||||
|         # an extra one after the last one. | ||||
|         self._fp.write(self._encoded_NL.join(blocks)) | ||||
|  | ||||
|     def _handle_message(self, msg): | ||||
|         s = self._new_buffer() | ||||
|         g = self.clone(s) | ||||
|         # The payload of a message/rfc822 part should be a multipart sequence | ||||
|         # of length 1.  The zeroth element of the list should be the Message | ||||
|         # object for the subpart.  Extract that object, stringify it, and | ||||
|         # write it out. | ||||
|         # Except, it turns out, when it's a string instead, which happens when | ||||
|         # and only when HeaderParser is used on a message of mime type | ||||
|         # message/rfc822.  Such messages are generated by, for example, | ||||
|         # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So | ||||
|         # in that case we just emit the string body. | ||||
|         payload = msg._payload | ||||
|         if isinstance(payload, list): | ||||
|             g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) | ||||
|             payload = s.getvalue() | ||||
|         else: | ||||
|             payload = self._encode(payload) | ||||
|         self._fp.write(payload) | ||||
|  | ||||
|     # This used to be a module level function; we use a classmethod for this | ||||
|     # and _compile_re so we can continue to provide the module level function | ||||
|     # for backward compatibility by doing | ||||
|     #   _make_boudary = Generator._make_boundary | ||||
|     # at the end of the module.  It *is* internal, so we could drop that... | ||||
|     @classmethod | ||||
|     def _make_boundary(cls, text=None): | ||||
|         # Craft a random boundary.  If text is given, ensure that the chosen | ||||
|         # boundary doesn't appear in the text. | ||||
|         token = random.randrange(sys.maxsize) | ||||
|         boundary = ('=' * 15) + (_fmt % token) + '==' | ||||
|         if text is None: | ||||
|             return boundary | ||||
|         b = boundary | ||||
|         counter = 0 | ||||
|         while True: | ||||
|             cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) | ||||
|             if not cre.search(text): | ||||
|                 break | ||||
|             b = boundary + '.' + str(counter) | ||||
|             counter += 1 | ||||
|         return b | ||||
|  | ||||
|     @classmethod | ||||
|     def _compile_re(cls, s, flags): | ||||
|         return re.compile(s, flags) | ||||
|  | ||||
| class BytesGenerator(Generator): | ||||
|     """Generates a bytes version of a Message object tree. | ||||
|  | ||||
|     Functionally identical to the base Generator except that the output is | ||||
|     bytes and not string.  When surrogates were used in the input to encode | ||||
|     bytes, these are decoded back to bytes for output.  If the policy has | ||||
|     cte_type set to 7bit, then the message is transformed such that the | ||||
|     non-ASCII bytes are properly content transfer encoded, using the charset | ||||
|     unknown-8bit. | ||||
|  | ||||
|     The outfp object must accept bytes in its write method. | ||||
|     """ | ||||
|  | ||||
|     # Bytes versions of this constant for use in manipulating data from | ||||
|     # the BytesIO buffer. | ||||
|     _encoded_EMPTY = b'' | ||||
|  | ||||
|     def write(self, s): | ||||
|         self._fp.write(str(s).encode('ascii', 'surrogateescape')) | ||||
|  | ||||
|     def _new_buffer(self): | ||||
|         return BytesIO() | ||||
|  | ||||
|     def _encode(self, s): | ||||
|         return s.encode('ascii') | ||||
|  | ||||
|     def _write_headers(self, msg): | ||||
|         # This is almost the same as the string version, except for handling | ||||
|         # strings with 8bit bytes. | ||||
|         for h, v in msg.raw_items(): | ||||
|             self._fp.write(self.policy.fold_binary(h, v)) | ||||
|         # A blank line always separates headers from body | ||||
|         self.write(self._NL) | ||||
|  | ||||
|     def _handle_text(self, msg): | ||||
|         # If the string has surrogates the original source was bytes, so | ||||
|         # just write it back out. | ||||
|         if msg._payload is None: | ||||
|             return | ||||
|         if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': | ||||
|             if self._mangle_from_: | ||||
|                 msg._payload = fcre.sub(">From ", msg._payload) | ||||
|             self._write_lines(msg._payload) | ||||
|         else: | ||||
|             super(BytesGenerator,self)._handle_text(msg) | ||||
|  | ||||
|     # Default body handler | ||||
|     _writeBody = _handle_text | ||||
|  | ||||
|     @classmethod | ||||
|     def _compile_re(cls, s, flags): | ||||
|         return re.compile(s.encode('ascii'), flags) | ||||
|  | ||||
|  | ||||
| _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' | ||||
|  | ||||
| class DecodedGenerator(Generator): | ||||
|     """Generates a text representation of a message. | ||||
|  | ||||
|     Like the Generator base class, except that non-text parts are substituted | ||||
|     with a format string representing the part. | ||||
|     """ | ||||
|     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): | ||||
|         """Like Generator.__init__() except that an additional optional | ||||
|         argument is allowed. | ||||
|  | ||||
|         Walks through all subparts of a message.  If the subpart is of main | ||||
|         type `text', then it prints the decoded payload of the subpart. | ||||
|  | ||||
|         Otherwise, fmt is a format string that is used instead of the message | ||||
|         payload.  fmt is expanded with the following keywords (in | ||||
|         %(keyword)s format): | ||||
|  | ||||
|         type       : Full MIME type of the non-text part | ||||
|         maintype   : Main MIME type of the non-text part | ||||
|         subtype    : Sub-MIME type of the non-text part | ||||
|         filename   : Filename of the non-text part | ||||
|         description: Description associated with the non-text part | ||||
|         encoding   : Content transfer encoding of the non-text part | ||||
|  | ||||
|         The default value for fmt is None, meaning | ||||
|  | ||||
|         [Non-text (%(type)s) part of message omitted, filename %(filename)s] | ||||
|         """ | ||||
|         Generator.__init__(self, outfp, mangle_from_, maxheaderlen) | ||||
|         if fmt is None: | ||||
|             self._fmt = _FMT | ||||
|         else: | ||||
|             self._fmt = fmt | ||||
|  | ||||
|     def _dispatch(self, msg): | ||||
|         for part in msg.walk(): | ||||
|             maintype = part.get_content_maintype() | ||||
|             if maintype == 'text': | ||||
|                 print(part.get_payload(decode=False), file=self) | ||||
|             elif maintype == 'multipart': | ||||
|                 # Just skip this | ||||
|                 pass | ||||
|             else: | ||||
|                 print(self._fmt % { | ||||
|                     'type'       : part.get_content_type(), | ||||
|                     'maintype'   : part.get_content_maintype(), | ||||
|                     'subtype'    : part.get_content_subtype(), | ||||
|                     'filename'   : part.get_filename('[no filename]'), | ||||
|                     'description': part.get('Content-Description', | ||||
|                                             '[no description]'), | ||||
|                     'encoding'   : part.get('Content-Transfer-Encoding', | ||||
|                                             '[no encoding]'), | ||||
|                     }, file=self) | ||||
|  | ||||
|  | ||||
| # Helper used by Generator._make_boundary | ||||
| _width = len(repr(sys.maxsize-1)) | ||||
| _fmt = '%%0%dd' % _width | ||||
|  | ||||
| # Backward compatibility | ||||
| _make_boundary = Generator._make_boundary | ||||
| @ -0,0 +1,581 @@ | ||||
| # Copyright (C) 2002-2007 Python Software Foundation | ||||
| # Author: Ben Gertzfield, Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Header encoding and decoding functionality.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import bytes, range, str, super, zip | ||||
|  | ||||
| __all__ = [ | ||||
|     'Header', | ||||
|     'decode_header', | ||||
|     'make_header', | ||||
|     ] | ||||
|  | ||||
| import re | ||||
| import binascii | ||||
|  | ||||
| from future.backports import email | ||||
| from future.backports.email import base64mime | ||||
| from future.backports.email.errors import HeaderParseError | ||||
| import future.backports.email.charset as _charset | ||||
|  | ||||
| # Helpers | ||||
| from future.backports.email.quoprimime import _max_append, header_decode | ||||
|  | ||||
| Charset = _charset.Charset | ||||
|  | ||||
| NL = '\n' | ||||
| SPACE = ' ' | ||||
| BSPACE = b' ' | ||||
| SPACE8 = ' ' * 8 | ||||
| EMPTYSTRING = '' | ||||
| MAXLINELEN = 78 | ||||
| FWS = ' \t' | ||||
|  | ||||
| USASCII = Charset('us-ascii') | ||||
| UTF8 = Charset('utf-8') | ||||
|  | ||||
| # Match encoded-word strings in the form =?charset?q?Hello_World?= | ||||
| ecre = re.compile(r''' | ||||
|   =\?                   # literal =? | ||||
|   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset | ||||
|   \?                    # literal ? | ||||
|   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive | ||||
|   \?                    # literal ? | ||||
|   (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string | ||||
|   \?=                   # literal ?= | ||||
|   ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) | ||||
|  | ||||
| # Field name regexp, including trailing colon, but not separating whitespace, | ||||
| # according to RFC 2822.  Character range is from tilde to exclamation mark. | ||||
| # For use with .match() | ||||
| fcre = re.compile(r'[\041-\176]+:$') | ||||
|  | ||||
| # Find a header embedded in a putative header value.  Used to check for | ||||
| # header injection attack. | ||||
| _embeded_header = re.compile(r'\n[^ \t]+:') | ||||
|  | ||||
|  | ||||
| def decode_header(header): | ||||
|     """Decode a message header value without converting charset. | ||||
|  | ||||
|     Returns a list of (string, charset) pairs containing each of the decoded | ||||
|     parts of the header.  Charset is None for non-encoded parts of the header, | ||||
|     otherwise a lower-case string containing the name of the character set | ||||
|     specified in the encoded string. | ||||
|  | ||||
|     header may be a string that may or may not contain RFC2047 encoded words, | ||||
|     or it may be a Header object. | ||||
|  | ||||
|     An email.errors.HeaderParseError may be raised when certain decoding error | ||||
|     occurs (e.g. a base64 decoding exception). | ||||
|     """ | ||||
|     # If it is a Header object, we can just return the encoded chunks. | ||||
|     if hasattr(header, '_chunks'): | ||||
|         return [(_charset._encode(string, str(charset)), str(charset)) | ||||
|                     for string, charset in header._chunks] | ||||
|     # If no encoding, just return the header with no charset. | ||||
|     if not ecre.search(header): | ||||
|         return [(header, None)] | ||||
|     # First step is to parse all the encoded parts into triplets of the form | ||||
|     # (encoded_string, encoding, charset).  For unencoded strings, the last | ||||
|     # two parts will be None. | ||||
|     words = [] | ||||
|     for line in header.splitlines(): | ||||
|         parts = ecre.split(line) | ||||
|         first = True | ||||
|         while parts: | ||||
|             unencoded = parts.pop(0) | ||||
|             if first: | ||||
|                 unencoded = unencoded.lstrip() | ||||
|                 first = False | ||||
|             if unencoded: | ||||
|                 words.append((unencoded, None, None)) | ||||
|             if parts: | ||||
|                 charset = parts.pop(0).lower() | ||||
|                 encoding = parts.pop(0).lower() | ||||
|                 encoded = parts.pop(0) | ||||
|                 words.append((encoded, encoding, charset)) | ||||
|     # Now loop over words and remove words that consist of whitespace | ||||
|     # between two encoded strings. | ||||
|     import sys | ||||
|     droplist = [] | ||||
|     for n, w in enumerate(words): | ||||
|         if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): | ||||
|             droplist.append(n-1) | ||||
|     for d in reversed(droplist): | ||||
|         del words[d] | ||||
|  | ||||
|     # The next step is to decode each encoded word by applying the reverse | ||||
|     # base64 or quopri transformation.  decoded_words is now a list of the | ||||
|     # form (decoded_word, charset). | ||||
|     decoded_words = [] | ||||
|     for encoded_string, encoding, charset in words: | ||||
|         if encoding is None: | ||||
|             # This is an unencoded word. | ||||
|             decoded_words.append((encoded_string, charset)) | ||||
|         elif encoding == 'q': | ||||
|             word = header_decode(encoded_string) | ||||
|             decoded_words.append((word, charset)) | ||||
|         elif encoding == 'b': | ||||
|             paderr = len(encoded_string) % 4   # Postel's law: add missing padding | ||||
|             if paderr: | ||||
|                 encoded_string += '==='[:4 - paderr] | ||||
|             try: | ||||
|                 word = base64mime.decode(encoded_string) | ||||
|             except binascii.Error: | ||||
|                 raise HeaderParseError('Base64 decoding error') | ||||
|             else: | ||||
|                 decoded_words.append((word, charset)) | ||||
|         else: | ||||
|             raise AssertionError('Unexpected encoding: ' + encoding) | ||||
|     # Now convert all words to bytes and collapse consecutive runs of | ||||
|     # similarly encoded words. | ||||
|     collapsed = [] | ||||
|     last_word = last_charset = None | ||||
|     for word, charset in decoded_words: | ||||
|         if isinstance(word, str): | ||||
|             word = bytes(word, 'raw-unicode-escape') | ||||
|         if last_word is None: | ||||
|             last_word = word | ||||
|             last_charset = charset | ||||
|         elif charset != last_charset: | ||||
|             collapsed.append((last_word, last_charset)) | ||||
|             last_word = word | ||||
|             last_charset = charset | ||||
|         elif last_charset is None: | ||||
|             last_word += BSPACE + word | ||||
|         else: | ||||
|             last_word += word | ||||
|     collapsed.append((last_word, last_charset)) | ||||
|     return collapsed | ||||
|  | ||||
|  | ||||
| def make_header(decoded_seq, maxlinelen=None, header_name=None, | ||||
|                 continuation_ws=' '): | ||||
|     """Create a Header from a sequence of pairs as returned by decode_header() | ||||
|  | ||||
|     decode_header() takes a header value string and returns a sequence of | ||||
|     pairs of the format (decoded_string, charset) where charset is the string | ||||
|     name of the character set. | ||||
|  | ||||
|     This function takes one of those sequence of pairs and returns a Header | ||||
|     instance.  Optional maxlinelen, header_name, and continuation_ws are as in | ||||
|     the Header constructor. | ||||
|     """ | ||||
|     h = Header(maxlinelen=maxlinelen, header_name=header_name, | ||||
|                continuation_ws=continuation_ws) | ||||
|     for s, charset in decoded_seq: | ||||
|         # None means us-ascii but we can simply pass it on to h.append() | ||||
|         if charset is not None and not isinstance(charset, Charset): | ||||
|             charset = Charset(charset) | ||||
|         h.append(s, charset) | ||||
|     return h | ||||
|  | ||||
|  | ||||
| class Header(object): | ||||
|     def __init__(self, s=None, charset=None, | ||||
|                  maxlinelen=None, header_name=None, | ||||
|                  continuation_ws=' ', errors='strict'): | ||||
|         """Create a MIME-compliant header that can contain many character sets. | ||||
|  | ||||
|         Optional s is the initial header value.  If None, the initial header | ||||
|         value is not set.  You can later append to the header with .append() | ||||
|         method calls.  s may be a byte string or a Unicode string, but see the | ||||
|         .append() documentation for semantics. | ||||
|  | ||||
|         Optional charset serves two purposes: it has the same meaning as the | ||||
|         charset argument to the .append() method.  It also sets the default | ||||
|         character set for all subsequent .append() calls that omit the charset | ||||
|         argument.  If charset is not provided in the constructor, the us-ascii | ||||
|         charset is used both as s's initial charset and as the default for | ||||
|         subsequent .append() calls. | ||||
|  | ||||
|         The maximum line length can be specified explicitly via maxlinelen. For | ||||
|         splitting the first line to a shorter value (to account for the field | ||||
|         header which isn't included in s, e.g. `Subject') pass in the name of | ||||
|         the field in header_name.  The default maxlinelen is 78 as recommended | ||||
|         by RFC 2822. | ||||
|  | ||||
|         continuation_ws must be RFC 2822 compliant folding whitespace (usually | ||||
|         either a space or a hard tab) which will be prepended to continuation | ||||
|         lines. | ||||
|  | ||||
|         errors is passed through to the .append() call. | ||||
|         """ | ||||
|         if charset is None: | ||||
|             charset = USASCII | ||||
|         elif not isinstance(charset, Charset): | ||||
|             charset = Charset(charset) | ||||
|         self._charset = charset | ||||
|         self._continuation_ws = continuation_ws | ||||
|         self._chunks = [] | ||||
|         if s is not None: | ||||
|             self.append(s, charset, errors) | ||||
|         if maxlinelen is None: | ||||
|             maxlinelen = MAXLINELEN | ||||
|         self._maxlinelen = maxlinelen | ||||
|         if header_name is None: | ||||
|             self._headerlen = 0 | ||||
|         else: | ||||
|             # Take the separating colon and space into account. | ||||
|             self._headerlen = len(header_name) + 2 | ||||
|  | ||||
|     def __str__(self): | ||||
|         """Return the string value of the header.""" | ||||
|         self._normalize() | ||||
|         uchunks = [] | ||||
|         lastcs = None | ||||
|         lastspace = None | ||||
|         for string, charset in self._chunks: | ||||
|             # We must preserve spaces between encoded and non-encoded word | ||||
|             # boundaries, which means for us we need to add a space when we go | ||||
|             # from a charset to None/us-ascii, or from None/us-ascii to a | ||||
|             # charset.  Only do this for the second and subsequent chunks. | ||||
|             # Don't add a space if the None/us-ascii string already has | ||||
|             # a space (trailing or leading depending on transition) | ||||
|             nextcs = charset | ||||
|             if nextcs == _charset.UNKNOWN8BIT: | ||||
|                 original_bytes = string.encode('ascii', 'surrogateescape') | ||||
|                 string = original_bytes.decode('ascii', 'replace') | ||||
|             if uchunks: | ||||
|                 hasspace = string and self._nonctext(string[0]) | ||||
|                 if lastcs not in (None, 'us-ascii'): | ||||
|                     if nextcs in (None, 'us-ascii') and not hasspace: | ||||
|                         uchunks.append(SPACE) | ||||
|                         nextcs = None | ||||
|                 elif nextcs not in (None, 'us-ascii') and not lastspace: | ||||
|                     uchunks.append(SPACE) | ||||
|             lastspace = string and self._nonctext(string[-1]) | ||||
|             lastcs = nextcs | ||||
|             uchunks.append(string) | ||||
|         return EMPTYSTRING.join(uchunks) | ||||
|  | ||||
|     # Rich comparison operators for equality only.  BAW: does it make sense to | ||||
|     # have or explicitly disable <, <=, >, >= operators? | ||||
|     def __eq__(self, other): | ||||
|         # other may be a Header or a string.  Both are fine so coerce | ||||
|         # ourselves to a unicode (of the unencoded header value), swap the | ||||
|         # args and do another comparison. | ||||
|         return other == str(self) | ||||
|  | ||||
|     def __ne__(self, other): | ||||
|         return not self == other | ||||
|  | ||||
|     def append(self, s, charset=None, errors='strict'): | ||||
|         """Append a string to the MIME header. | ||||
|  | ||||
|         Optional charset, if given, should be a Charset instance or the name | ||||
|         of a character set (which will be converted to a Charset instance).  A | ||||
|         value of None (the default) means that the charset given in the | ||||
|         constructor is used. | ||||
|  | ||||
|         s may be a byte string or a Unicode string.  If it is a byte string | ||||
|         (i.e. isinstance(s, str) is false), then charset is the encoding of | ||||
|         that byte string, and a UnicodeError will be raised if the string | ||||
|         cannot be decoded with that charset.  If s is a Unicode string, then | ||||
|         charset is a hint specifying the character set of the characters in | ||||
|         the string.  In either case, when producing an RFC 2822 compliant | ||||
|         header using RFC 2047 rules, the string will be encoded using the | ||||
|         output codec of the charset.  If the string cannot be encoded to the | ||||
|         output codec, a UnicodeError will be raised. | ||||
|  | ||||
|         Optional `errors' is passed as the errors argument to the decode | ||||
|         call if s is a byte string. | ||||
|         """ | ||||
|         if charset is None: | ||||
|             charset = self._charset | ||||
|         elif not isinstance(charset, Charset): | ||||
|             charset = Charset(charset) | ||||
|         if not isinstance(s, str): | ||||
|             input_charset = charset.input_codec or 'us-ascii' | ||||
|             if input_charset == _charset.UNKNOWN8BIT: | ||||
|                 s = s.decode('us-ascii', 'surrogateescape') | ||||
|             else: | ||||
|                 s = s.decode(input_charset, errors) | ||||
|         # Ensure that the bytes we're storing can be decoded to the output | ||||
|         # character set, otherwise an early error is raised. | ||||
|         output_charset = charset.output_codec or 'us-ascii' | ||||
|         if output_charset != _charset.UNKNOWN8BIT: | ||||
|             try: | ||||
|                 s.encode(output_charset, errors) | ||||
|             except UnicodeEncodeError: | ||||
|                 if output_charset!='us-ascii': | ||||
|                     raise | ||||
|                 charset = UTF8 | ||||
|         self._chunks.append((s, charset)) | ||||
|  | ||||
|     def _nonctext(self, s): | ||||
|         """True if string s is not a ctext character of RFC822. | ||||
|         """ | ||||
|         return s.isspace() or s in ('(', ')', '\\') | ||||
|  | ||||
|     def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): | ||||
|         r"""Encode a message header into an RFC-compliant format. | ||||
|  | ||||
|         There are many issues involved in converting a given string for use in | ||||
|         an email header.  Only certain character sets are readable in most | ||||
|         email clients, and as header strings can only contain a subset of | ||||
|         7-bit ASCII, care must be taken to properly convert and encode (with | ||||
|         Base64 or quoted-printable) header strings.  In addition, there is a | ||||
|         75-character length limit on any given encoded header field, so | ||||
|         line-wrapping must be performed, even with double-byte character sets. | ||||
|  | ||||
|         Optional maxlinelen specifies the maximum length of each generated | ||||
|         line, exclusive of the linesep string.  Individual lines may be longer | ||||
|         than maxlinelen if a folding point cannot be found.  The first line | ||||
|         will be shorter by the length of the header name plus ": " if a header | ||||
|         name was specified at Header construction time.  The default value for | ||||
|         maxlinelen is determined at header construction time. | ||||
|  | ||||
|         Optional splitchars is a string containing characters which should be | ||||
|         given extra weight by the splitting algorithm during normal header | ||||
|         wrapping.  This is in very rough support of RFC 2822's `higher level | ||||
|         syntactic breaks':  split points preceded by a splitchar are preferred | ||||
|         during line splitting, with the characters preferred in the order in | ||||
|         which they appear in the string.  Space and tab may be included in the | ||||
|         string to indicate whether preference should be given to one over the | ||||
|         other as a split point when other split chars do not appear in the line | ||||
|         being split.  Splitchars does not affect RFC 2047 encoded lines. | ||||
|  | ||||
|         Optional linesep is a string to be used to separate the lines of | ||||
|         the value.  The default value is the most useful for typical | ||||
|         Python applications, but it can be set to \r\n to produce RFC-compliant | ||||
|         line separators when needed. | ||||
|         """ | ||||
|         self._normalize() | ||||
|         if maxlinelen is None: | ||||
|             maxlinelen = self._maxlinelen | ||||
|         # A maxlinelen of 0 means don't wrap.  For all practical purposes, | ||||
|         # choosing a huge number here accomplishes that and makes the | ||||
|         # _ValueFormatter algorithm much simpler. | ||||
|         if maxlinelen == 0: | ||||
|             maxlinelen = 1000000 | ||||
|         formatter = _ValueFormatter(self._headerlen, maxlinelen, | ||||
|                                     self._continuation_ws, splitchars) | ||||
|         lastcs = None | ||||
|         hasspace = lastspace = None | ||||
|         for string, charset in self._chunks: | ||||
|             if hasspace is not None: | ||||
|                 hasspace = string and self._nonctext(string[0]) | ||||
|                 import sys | ||||
|                 if lastcs not in (None, 'us-ascii'): | ||||
|                     if not hasspace or charset not in (None, 'us-ascii'): | ||||
|                         formatter.add_transition() | ||||
|                 elif charset not in (None, 'us-ascii') and not lastspace: | ||||
|                     formatter.add_transition() | ||||
|             lastspace = string and self._nonctext(string[-1]) | ||||
|             lastcs = charset | ||||
|             hasspace = False | ||||
|             lines = string.splitlines() | ||||
|             if lines: | ||||
|                 formatter.feed('', lines[0], charset) | ||||
|             else: | ||||
|                 formatter.feed('', '', charset) | ||||
|             for line in lines[1:]: | ||||
|                 formatter.newline() | ||||
|                 if charset.header_encoding is not None: | ||||
|                     formatter.feed(self._continuation_ws, ' ' + line.lstrip(), | ||||
|                                    charset) | ||||
|                 else: | ||||
|                     sline = line.lstrip() | ||||
|                     fws = line[:len(line)-len(sline)] | ||||
|                     formatter.feed(fws, sline, charset) | ||||
|             if len(lines) > 1: | ||||
|                 formatter.newline() | ||||
|         if self._chunks: | ||||
|             formatter.add_transition() | ||||
|         value = formatter._str(linesep) | ||||
|         if _embeded_header.search(value): | ||||
|             raise HeaderParseError("header value appears to contain " | ||||
|                 "an embedded header: {!r}".format(value)) | ||||
|         return value | ||||
|  | ||||
|     def _normalize(self): | ||||
|         # Step 1: Normalize the chunks so that all runs of identical charsets | ||||
|         # get collapsed into a single unicode string. | ||||
|         chunks = [] | ||||
|         last_charset = None | ||||
|         last_chunk = [] | ||||
|         for string, charset in self._chunks: | ||||
|             if charset == last_charset: | ||||
|                 last_chunk.append(string) | ||||
|             else: | ||||
|                 if last_charset is not None: | ||||
|                     chunks.append((SPACE.join(last_chunk), last_charset)) | ||||
|                 last_chunk = [string] | ||||
|                 last_charset = charset | ||||
|         if last_chunk: | ||||
|             chunks.append((SPACE.join(last_chunk), last_charset)) | ||||
|         self._chunks = chunks | ||||
|  | ||||
|  | ||||
| class _ValueFormatter(object): | ||||
|     def __init__(self, headerlen, maxlen, continuation_ws, splitchars): | ||||
|         self._maxlen = maxlen | ||||
|         self._continuation_ws = continuation_ws | ||||
|         self._continuation_ws_len = len(continuation_ws) | ||||
|         self._splitchars = splitchars | ||||
|         self._lines = [] | ||||
|         self._current_line = _Accumulator(headerlen) | ||||
|  | ||||
|     def _str(self, linesep): | ||||
|         self.newline() | ||||
|         return linesep.join(self._lines) | ||||
|  | ||||
|     def __str__(self): | ||||
|         return self._str(NL) | ||||
|  | ||||
|     def newline(self): | ||||
|         end_of_line = self._current_line.pop() | ||||
|         if end_of_line != (' ', ''): | ||||
|             self._current_line.push(*end_of_line) | ||||
|         if len(self._current_line) > 0: | ||||
|             if self._current_line.is_onlyws(): | ||||
|                 self._lines[-1] += str(self._current_line) | ||||
|             else: | ||||
|                 self._lines.append(str(self._current_line)) | ||||
|         self._current_line.reset() | ||||
|  | ||||
|     def add_transition(self): | ||||
|         self._current_line.push(' ', '') | ||||
|  | ||||
|     def feed(self, fws, string, charset): | ||||
|         # If the charset has no header encoding (i.e. it is an ASCII encoding) | ||||
|         # then we must split the header at the "highest level syntactic break" | ||||
|         # possible. Note that we don't have a lot of smarts about field | ||||
|         # syntax; we just try to break on semi-colons, then commas, then | ||||
|         # whitespace.  Eventually, this should be pluggable. | ||||
|         if charset.header_encoding is None: | ||||
|             self._ascii_split(fws, string, self._splitchars) | ||||
|             return | ||||
|         # Otherwise, we're doing either a Base64 or a quoted-printable | ||||
|         # encoding which means we don't need to split the line on syntactic | ||||
|         # breaks.  We can basically just find enough characters to fit on the | ||||
|         # current line, minus the RFC 2047 chrome.  What makes this trickier | ||||
|         # though is that we have to split at octet boundaries, not character | ||||
|         # boundaries but it's only safe to split at character boundaries so at | ||||
|         # best we can only get close. | ||||
|         encoded_lines = charset.header_encode_lines(string, self._maxlengths()) | ||||
|         # The first element extends the current line, but if it's None then | ||||
|         # nothing more fit on the current line so start a new line. | ||||
|         try: | ||||
|             first_line = encoded_lines.pop(0) | ||||
|         except IndexError: | ||||
|             # There are no encoded lines, so we're done. | ||||
|             return | ||||
|         if first_line is not None: | ||||
|             self._append_chunk(fws, first_line) | ||||
|         try: | ||||
|             last_line = encoded_lines.pop() | ||||
|         except IndexError: | ||||
|             # There was only one line. | ||||
|             return | ||||
|         self.newline() | ||||
|         self._current_line.push(self._continuation_ws, last_line) | ||||
|         # Everything else are full lines in themselves. | ||||
|         for line in encoded_lines: | ||||
|             self._lines.append(self._continuation_ws + line) | ||||
|  | ||||
|     def _maxlengths(self): | ||||
|         # The first line's length. | ||||
|         yield self._maxlen - len(self._current_line) | ||||
|         while True: | ||||
|             yield self._maxlen - self._continuation_ws_len | ||||
|  | ||||
|     def _ascii_split(self, fws, string, splitchars): | ||||
|         # The RFC 2822 header folding algorithm is simple in principle but | ||||
|         # complex in practice.  Lines may be folded any place where "folding | ||||
|         # white space" appears by inserting a linesep character in front of the | ||||
|         # FWS.  The complication is that not all spaces or tabs qualify as FWS, | ||||
|         # and we are also supposed to prefer to break at "higher level | ||||
|         # syntactic breaks".  We can't do either of these without intimate | ||||
|         # knowledge of the structure of structured headers, which we don't have | ||||
|         # here.  So the best we can do here is prefer to break at the specified | ||||
|         # splitchars, and hope that we don't choose any spaces or tabs that | ||||
|         # aren't legal FWS.  (This is at least better than the old algorithm, | ||||
|         # where we would sometimes *introduce* FWS after a splitchar, or the | ||||
|         # algorithm before that, where we would turn all white space runs into | ||||
|         # single spaces or tabs.) | ||||
|         parts = re.split("(["+FWS+"]+)", fws+string) | ||||
|         if parts[0]: | ||||
|             parts[:0] = [''] | ||||
|         else: | ||||
|             parts.pop(0) | ||||
|         for fws, part in zip(*[iter(parts)]*2): | ||||
|             self._append_chunk(fws, part) | ||||
|  | ||||
|     def _append_chunk(self, fws, string): | ||||
|         self._current_line.push(fws, string) | ||||
|         if len(self._current_line) > self._maxlen: | ||||
|             # Find the best split point, working backward from the end. | ||||
|             # There might be none, on a long first line. | ||||
|             for ch in self._splitchars: | ||||
|                 for i in range(self._current_line.part_count()-1, 0, -1): | ||||
|                     if ch.isspace(): | ||||
|                         fws = self._current_line[i][0] | ||||
|                         if fws and fws[0]==ch: | ||||
|                             break | ||||
|                     prevpart = self._current_line[i-1][1] | ||||
|                     if prevpart and prevpart[-1]==ch: | ||||
|                         break | ||||
|                 else: | ||||
|                     continue | ||||
|                 break | ||||
|             else: | ||||
|                 fws, part = self._current_line.pop() | ||||
|                 if self._current_line._initial_size > 0: | ||||
|                     # There will be a header, so leave it on a line by itself. | ||||
|                     self.newline() | ||||
|                     if not fws: | ||||
|                         # We don't use continuation_ws here because the whitespace | ||||
|                         # after a header should always be a space. | ||||
|                         fws = ' ' | ||||
|                 self._current_line.push(fws, part) | ||||
|                 return | ||||
|             remainder = self._current_line.pop_from(i) | ||||
|             self._lines.append(str(self._current_line)) | ||||
|             self._current_line.reset(remainder) | ||||
|  | ||||
|  | ||||
| class _Accumulator(list): | ||||
|  | ||||
|     def __init__(self, initial_size=0): | ||||
|         self._initial_size = initial_size | ||||
|         super().__init__() | ||||
|  | ||||
|     def push(self, fws, string): | ||||
|         self.append((fws, string)) | ||||
|  | ||||
|     def pop_from(self, i=0): | ||||
|         popped = self[i:] | ||||
|         self[i:] = [] | ||||
|         return popped | ||||
|  | ||||
|     def pop(self): | ||||
|         if self.part_count()==0: | ||||
|             return ('', '') | ||||
|         return super().pop() | ||||
|  | ||||
|     def __len__(self): | ||||
|         return sum((len(fws)+len(part) for fws, part in self), | ||||
|                    self._initial_size) | ||||
|  | ||||
|     def __str__(self): | ||||
|         return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) | ||||
|                                 for fws, part in self)) | ||||
|  | ||||
|     def reset(self, startval=None): | ||||
|         if startval is None: | ||||
|             startval = [] | ||||
|         self[:] = startval | ||||
|         self._initial_size = 0 | ||||
|  | ||||
|     def is_onlyws(self): | ||||
|         return self._initial_size==0 and (not self or str(self).isspace()) | ||||
|  | ||||
|     def part_count(self): | ||||
|         return super().__len__() | ||||
| @ -0,0 +1,592 @@ | ||||
| """Representing and manipulating email headers via custom objects. | ||||
|  | ||||
| This module provides an implementation of the HeaderRegistry API. | ||||
| The implementation is designed to flexibly follow RFC5322 rules. | ||||
|  | ||||
| Eventually HeaderRegistry will be a public API, but it isn't yet, | ||||
| and will probably change some before that happens. | ||||
|  | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| from future.builtins import super | ||||
| from future.builtins import str | ||||
| from future.utils import text_to_native_str | ||||
| from future.backports.email import utils | ||||
| from future.backports.email import errors | ||||
| from future.backports.email import _header_value_parser as parser | ||||
|  | ||||
| class Address(object): | ||||
|  | ||||
|     def __init__(self, display_name='', username='', domain='', addr_spec=None): | ||||
|         """Create an object represeting a full email address. | ||||
|  | ||||
|         An address can have a 'display_name', a 'username', and a 'domain'.  In | ||||
|         addition to specifying the username and domain separately, they may be | ||||
|         specified together by using the addr_spec keyword *instead of* the | ||||
|         username and domain keywords.  If an addr_spec string is specified it | ||||
|         must be properly quoted according to RFC 5322 rules; an error will be | ||||
|         raised if it is not. | ||||
|  | ||||
|         An Address object has display_name, username, domain, and addr_spec | ||||
|         attributes, all of which are read-only.  The addr_spec and the string | ||||
|         value of the object are both quoted according to RFC5322 rules, but | ||||
|         without any Content Transfer Encoding. | ||||
|  | ||||
|         """ | ||||
|         # This clause with its potential 'raise' may only happen when an | ||||
|         # application program creates an Address object using an addr_spec | ||||
|         # keyword.  The email library code itself must always supply username | ||||
|         # and domain. | ||||
|         if addr_spec is not None: | ||||
|             if username or domain: | ||||
|                 raise TypeError("addrspec specified when username and/or " | ||||
|                                 "domain also specified") | ||||
|             a_s, rest = parser.get_addr_spec(addr_spec) | ||||
|             if rest: | ||||
|                 raise ValueError("Invalid addr_spec; only '{}' " | ||||
|                                  "could be parsed from '{}'".format( | ||||
|                                     a_s, addr_spec)) | ||||
|             if a_s.all_defects: | ||||
|                 raise a_s.all_defects[0] | ||||
|             username = a_s.local_part | ||||
|             domain = a_s.domain | ||||
|         self._display_name = display_name | ||||
|         self._username = username | ||||
|         self._domain = domain | ||||
|  | ||||
|     @property | ||||
|     def display_name(self): | ||||
|         return self._display_name | ||||
|  | ||||
|     @property | ||||
|     def username(self): | ||||
|         return self._username | ||||
|  | ||||
|     @property | ||||
|     def domain(self): | ||||
|         return self._domain | ||||
|  | ||||
|     @property | ||||
|     def addr_spec(self): | ||||
|         """The addr_spec (username@domain) portion of the address, quoted | ||||
|         according to RFC 5322 rules, but with no Content Transfer Encoding. | ||||
|         """ | ||||
|         nameset = set(self.username) | ||||
|         if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): | ||||
|             lp = parser.quote_string(self.username) | ||||
|         else: | ||||
|             lp = self.username | ||||
|         if self.domain: | ||||
|             return lp + '@' + self.domain | ||||
|         if not lp: | ||||
|             return '<>' | ||||
|         return lp | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return "Address(display_name={!r}, username={!r}, domain={!r})".format( | ||||
|                         self.display_name, self.username, self.domain) | ||||
|  | ||||
|     def __str__(self): | ||||
|         nameset = set(self.display_name) | ||||
|         if len(nameset) > len(nameset-parser.SPECIALS): | ||||
|             disp = parser.quote_string(self.display_name) | ||||
|         else: | ||||
|             disp = self.display_name | ||||
|         if disp: | ||||
|             addr_spec = '' if self.addr_spec=='<>' else self.addr_spec | ||||
|             return "{} <{}>".format(disp, addr_spec) | ||||
|         return self.addr_spec | ||||
|  | ||||
|     def __eq__(self, other): | ||||
|         if type(other) != type(self): | ||||
|             return False | ||||
|         return (self.display_name == other.display_name and | ||||
|                 self.username == other.username and | ||||
|                 self.domain == other.domain) | ||||
|  | ||||
|  | ||||
| class Group(object): | ||||
|  | ||||
|     def __init__(self, display_name=None, addresses=None): | ||||
|         """Create an object representing an address group. | ||||
|  | ||||
|         An address group consists of a display_name followed by colon and an | ||||
|         list of addresses (see Address) terminated by a semi-colon.  The Group | ||||
|         is created by specifying a display_name and a possibly empty list of | ||||
|         Address objects.  A Group can also be used to represent a single | ||||
|         address that is not in a group, which is convenient when manipulating | ||||
|         lists that are a combination of Groups and individual Addresses.  In | ||||
|         this case the display_name should be set to None.  In particular, the | ||||
|         string representation of a Group whose display_name is None is the same | ||||
|         as the Address object, if there is one and only one Address object in | ||||
|         the addresses list. | ||||
|  | ||||
|         """ | ||||
|         self._display_name = display_name | ||||
|         self._addresses = tuple(addresses) if addresses else tuple() | ||||
|  | ||||
|     @property | ||||
|     def display_name(self): | ||||
|         return self._display_name | ||||
|  | ||||
|     @property | ||||
|     def addresses(self): | ||||
|         return self._addresses | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return "Group(display_name={!r}, addresses={!r}".format( | ||||
|                  self.display_name, self.addresses) | ||||
|  | ||||
|     def __str__(self): | ||||
|         if self.display_name is None and len(self.addresses)==1: | ||||
|             return str(self.addresses[0]) | ||||
|         disp = self.display_name | ||||
|         if disp is not None: | ||||
|             nameset = set(disp) | ||||
|             if len(nameset) > len(nameset-parser.SPECIALS): | ||||
|                 disp = parser.quote_string(disp) | ||||
|         adrstr = ", ".join(str(x) for x in self.addresses) | ||||
|         adrstr = ' ' + adrstr if adrstr else adrstr | ||||
|         return "{}:{};".format(disp, adrstr) | ||||
|  | ||||
|     def __eq__(self, other): | ||||
|         if type(other) != type(self): | ||||
|             return False | ||||
|         return (self.display_name == other.display_name and | ||||
|                 self.addresses == other.addresses) | ||||
|  | ||||
|  | ||||
| # Header Classes # | ||||
|  | ||||
| class BaseHeader(str): | ||||
|  | ||||
|     """Base class for message headers. | ||||
|  | ||||
|     Implements generic behavior and provides tools for subclasses. | ||||
|  | ||||
|     A subclass must define a classmethod named 'parse' that takes an unfolded | ||||
|     value string and a dictionary as its arguments.  The dictionary will | ||||
|     contain one key, 'defects', initialized to an empty list.  After the call | ||||
|     the dictionary must contain two additional keys: parse_tree, set to the | ||||
|     parse tree obtained from parsing the header, and 'decoded', set to the | ||||
|     string value of the idealized representation of the data from the value. | ||||
|     (That is, encoded words are decoded, and values that have canonical | ||||
|     representations are so represented.) | ||||
|  | ||||
|     The defects key is intended to collect parsing defects, which the message | ||||
|     parser will subsequently dispose of as appropriate.  The parser should not, | ||||
|     insofar as practical, raise any errors.  Defects should be added to the | ||||
|     list instead.  The standard header parsers register defects for RFC | ||||
|     compliance issues, for obsolete RFC syntax, and for unrecoverable parsing | ||||
|     errors. | ||||
|  | ||||
|     The parse method may add additional keys to the dictionary.  In this case | ||||
|     the subclass must define an 'init' method, which will be passed the | ||||
|     dictionary as its keyword arguments.  The method should use (usually by | ||||
|     setting them as the value of similarly named attributes) and remove all the | ||||
|     extra keys added by its parse method, and then use super to call its parent | ||||
|     class with the remaining arguments and keywords. | ||||
|  | ||||
|     The subclass should also make sure that a 'max_count' attribute is defined | ||||
|     that is either None or 1. XXX: need to better define this API. | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     def __new__(cls, name, value): | ||||
|         kwds = {'defects': []} | ||||
|         cls.parse(value, kwds) | ||||
|         if utils._has_surrogates(kwds['decoded']): | ||||
|             kwds['decoded'] = utils._sanitize(kwds['decoded']) | ||||
|         self = str.__new__(cls, kwds['decoded']) | ||||
|         # del kwds['decoded'] | ||||
|         self.init(name, **kwds) | ||||
|         return self | ||||
|  | ||||
|     def init(self, name, **_3to2kwargs): | ||||
|         defects = _3to2kwargs['defects']; del _3to2kwargs['defects'] | ||||
|         parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree'] | ||||
|         self._name = name | ||||
|         self._parse_tree = parse_tree | ||||
|         self._defects = defects | ||||
|  | ||||
|     @property | ||||
|     def name(self): | ||||
|         return self._name | ||||
|  | ||||
|     @property | ||||
|     def defects(self): | ||||
|         return tuple(self._defects) | ||||
|  | ||||
|     def __reduce__(self): | ||||
|         return ( | ||||
|             _reconstruct_header, | ||||
|             ( | ||||
|                 self.__class__.__name__, | ||||
|                 self.__class__.__bases__, | ||||
|                 str(self), | ||||
|             ), | ||||
|             self.__dict__) | ||||
|  | ||||
|     @classmethod | ||||
|     def _reconstruct(cls, value): | ||||
|         return str.__new__(cls, value) | ||||
|  | ||||
|     def fold(self, **_3to2kwargs): | ||||
|         policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] | ||||
|         """Fold header according to policy. | ||||
|  | ||||
|         The parsed representation of the header is folded according to | ||||
|         RFC5322 rules, as modified by the policy.  If the parse tree | ||||
|         contains surrogateescaped bytes, the bytes are CTE encoded using | ||||
|         the charset 'unknown-8bit". | ||||
|  | ||||
|         Any non-ASCII characters in the parse tree are CTE encoded using | ||||
|         charset utf-8. XXX: make this a policy setting. | ||||
|  | ||||
|         The returned value is an ASCII-only string possibly containing linesep | ||||
|         characters, and ending with a linesep character.  The string includes | ||||
|         the header name and the ': ' separator. | ||||
|  | ||||
|         """ | ||||
|         # At some point we need to only put fws here if it was in the source. | ||||
|         header = parser.Header([ | ||||
|             parser.HeaderLabel([ | ||||
|                 parser.ValueTerminal(self.name, 'header-name'), | ||||
|                 parser.ValueTerminal(':', 'header-sep')]), | ||||
|             parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), | ||||
|                              self._parse_tree]) | ||||
|         return header.fold(policy=policy) | ||||
|  | ||||
|  | ||||
| def _reconstruct_header(cls_name, bases, value): | ||||
|     return type(text_to_native_str(cls_name), bases, {})._reconstruct(value) | ||||
|  | ||||
|  | ||||
| class UnstructuredHeader(object): | ||||
|  | ||||
|     max_count = None | ||||
|     value_parser = staticmethod(parser.get_unstructured) | ||||
|  | ||||
|     @classmethod | ||||
|     def parse(cls, value, kwds): | ||||
|         kwds['parse_tree'] = cls.value_parser(value) | ||||
|         kwds['decoded'] = str(kwds['parse_tree']) | ||||
|  | ||||
|  | ||||
| class UniqueUnstructuredHeader(UnstructuredHeader): | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|  | ||||
| class DateHeader(object): | ||||
|  | ||||
|     """Header whose value consists of a single timestamp. | ||||
|  | ||||
|     Provides an additional attribute, datetime, which is either an aware | ||||
|     datetime using a timezone, or a naive datetime if the timezone | ||||
|     in the input string is -0000.  Also accepts a datetime as input. | ||||
|     The 'value' attribute is the normalized form of the timestamp, | ||||
|     which means it is the output of format_datetime on the datetime. | ||||
|     """ | ||||
|  | ||||
|     max_count = None | ||||
|  | ||||
|     # This is used only for folding, not for creating 'decoded'. | ||||
|     value_parser = staticmethod(parser.get_unstructured) | ||||
|  | ||||
|     @classmethod | ||||
|     def parse(cls, value, kwds): | ||||
|         if not value: | ||||
|             kwds['defects'].append(errors.HeaderMissingRequiredValue()) | ||||
|             kwds['datetime'] = None | ||||
|             kwds['decoded'] = '' | ||||
|             kwds['parse_tree'] = parser.TokenList() | ||||
|             return | ||||
|         if isinstance(value, str): | ||||
|             value = utils.parsedate_to_datetime(value) | ||||
|         kwds['datetime'] = value | ||||
|         kwds['decoded'] = utils.format_datetime(kwds['datetime']) | ||||
|         kwds['parse_tree'] = cls.value_parser(kwds['decoded']) | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         self._datetime = kw.pop('datetime') | ||||
|         super().init(*args, **kw) | ||||
|  | ||||
|     @property | ||||
|     def datetime(self): | ||||
|         return self._datetime | ||||
|  | ||||
|  | ||||
| class UniqueDateHeader(DateHeader): | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|  | ||||
| class AddressHeader(object): | ||||
|  | ||||
|     max_count = None | ||||
|  | ||||
|     @staticmethod | ||||
|     def value_parser(value): | ||||
|         address_list, value = parser.get_address_list(value) | ||||
|         assert not value, 'this should not happen' | ||||
|         return address_list | ||||
|  | ||||
|     @classmethod | ||||
|     def parse(cls, value, kwds): | ||||
|         if isinstance(value, str): | ||||
|             # We are translating here from the RFC language (address/mailbox) | ||||
|             # to our API language (group/address). | ||||
|             kwds['parse_tree'] = address_list = cls.value_parser(value) | ||||
|             groups = [] | ||||
|             for addr in address_list.addresses: | ||||
|                 groups.append(Group(addr.display_name, | ||||
|                                     [Address(mb.display_name or '', | ||||
|                                              mb.local_part or '', | ||||
|                                              mb.domain or '') | ||||
|                                      for mb in addr.all_mailboxes])) | ||||
|             defects = list(address_list.all_defects) | ||||
|         else: | ||||
|             # Assume it is Address/Group stuff | ||||
|             if not hasattr(value, '__iter__'): | ||||
|                 value = [value] | ||||
|             groups = [Group(None, [item]) if not hasattr(item, 'addresses') | ||||
|                                           else item | ||||
|                                     for item in value] | ||||
|             defects = [] | ||||
|         kwds['groups'] = groups | ||||
|         kwds['defects'] = defects | ||||
|         kwds['decoded'] = ', '.join([str(item) for item in groups]) | ||||
|         if 'parse_tree' not in kwds: | ||||
|             kwds['parse_tree'] = cls.value_parser(kwds['decoded']) | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         self._groups = tuple(kw.pop('groups')) | ||||
|         self._addresses = None | ||||
|         super().init(*args, **kw) | ||||
|  | ||||
|     @property | ||||
|     def groups(self): | ||||
|         return self._groups | ||||
|  | ||||
|     @property | ||||
|     def addresses(self): | ||||
|         if self._addresses is None: | ||||
|             self._addresses = tuple([address for group in self._groups | ||||
|                                              for address in group.addresses]) | ||||
|         return self._addresses | ||||
|  | ||||
|  | ||||
| class UniqueAddressHeader(AddressHeader): | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|  | ||||
| class SingleAddressHeader(AddressHeader): | ||||
|  | ||||
|     @property | ||||
|     def address(self): | ||||
|         if len(self.addresses)!=1: | ||||
|             raise ValueError(("value of single address header {} is not " | ||||
|                 "a single address").format(self.name)) | ||||
|         return self.addresses[0] | ||||
|  | ||||
|  | ||||
| class UniqueSingleAddressHeader(SingleAddressHeader): | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|  | ||||
| class MIMEVersionHeader(object): | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|     value_parser = staticmethod(parser.parse_mime_version) | ||||
|  | ||||
|     @classmethod | ||||
|     def parse(cls, value, kwds): | ||||
|         kwds['parse_tree'] = parse_tree = cls.value_parser(value) | ||||
|         kwds['decoded'] = str(parse_tree) | ||||
|         kwds['defects'].extend(parse_tree.all_defects) | ||||
|         kwds['major'] = None if parse_tree.minor is None else parse_tree.major | ||||
|         kwds['minor'] = parse_tree.minor | ||||
|         if parse_tree.minor is not None: | ||||
|             kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) | ||||
|         else: | ||||
|             kwds['version'] = None | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         self._version = kw.pop('version') | ||||
|         self._major = kw.pop('major') | ||||
|         self._minor = kw.pop('minor') | ||||
|         super().init(*args, **kw) | ||||
|  | ||||
|     @property | ||||
|     def major(self): | ||||
|         return self._major | ||||
|  | ||||
|     @property | ||||
|     def minor(self): | ||||
|         return self._minor | ||||
|  | ||||
|     @property | ||||
|     def version(self): | ||||
|         return self._version | ||||
|  | ||||
|  | ||||
| class ParameterizedMIMEHeader(object): | ||||
|  | ||||
|     # Mixin that handles the params dict.  Must be subclassed and | ||||
|     # a property value_parser for the specific header provided. | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|     @classmethod | ||||
|     def parse(cls, value, kwds): | ||||
|         kwds['parse_tree'] = parse_tree = cls.value_parser(value) | ||||
|         kwds['decoded'] = str(parse_tree) | ||||
|         kwds['defects'].extend(parse_tree.all_defects) | ||||
|         if parse_tree.params is None: | ||||
|             kwds['params'] = {} | ||||
|         else: | ||||
|             # The MIME RFCs specify that parameter ordering is arbitrary. | ||||
|             kwds['params'] = dict((utils._sanitize(name).lower(), | ||||
|                                    utils._sanitize(value)) | ||||
|                                   for name, value in parse_tree.params) | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         self._params = kw.pop('params') | ||||
|         super().init(*args, **kw) | ||||
|  | ||||
|     @property | ||||
|     def params(self): | ||||
|         return self._params.copy() | ||||
|  | ||||
|  | ||||
| class ContentTypeHeader(ParameterizedMIMEHeader): | ||||
|  | ||||
|     value_parser = staticmethod(parser.parse_content_type_header) | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         super().init(*args, **kw) | ||||
|         self._maintype = utils._sanitize(self._parse_tree.maintype) | ||||
|         self._subtype = utils._sanitize(self._parse_tree.subtype) | ||||
|  | ||||
|     @property | ||||
|     def maintype(self): | ||||
|         return self._maintype | ||||
|  | ||||
|     @property | ||||
|     def subtype(self): | ||||
|         return self._subtype | ||||
|  | ||||
|     @property | ||||
|     def content_type(self): | ||||
|         return self.maintype + '/' + self.subtype | ||||
|  | ||||
|  | ||||
| class ContentDispositionHeader(ParameterizedMIMEHeader): | ||||
|  | ||||
|     value_parser = staticmethod(parser.parse_content_disposition_header) | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         super().init(*args, **kw) | ||||
|         cd = self._parse_tree.content_disposition | ||||
|         self._content_disposition = cd if cd is None else utils._sanitize(cd) | ||||
|  | ||||
|     @property | ||||
|     def content_disposition(self): | ||||
|         return self._content_disposition | ||||
|  | ||||
|  | ||||
| class ContentTransferEncodingHeader(object): | ||||
|  | ||||
|     max_count = 1 | ||||
|  | ||||
|     value_parser = staticmethod(parser.parse_content_transfer_encoding_header) | ||||
|  | ||||
|     @classmethod | ||||
|     def parse(cls, value, kwds): | ||||
|         kwds['parse_tree'] = parse_tree = cls.value_parser(value) | ||||
|         kwds['decoded'] = str(parse_tree) | ||||
|         kwds['defects'].extend(parse_tree.all_defects) | ||||
|  | ||||
|     def init(self, *args, **kw): | ||||
|         super().init(*args, **kw) | ||||
|         self._cte = utils._sanitize(self._parse_tree.cte) | ||||
|  | ||||
|     @property | ||||
|     def cte(self): | ||||
|         return self._cte | ||||
|  | ||||
|  | ||||
| # The header factory # | ||||
|  | ||||
| _default_header_map = { | ||||
|     'subject':                      UniqueUnstructuredHeader, | ||||
|     'date':                         UniqueDateHeader, | ||||
|     'resent-date':                  DateHeader, | ||||
|     'orig-date':                    UniqueDateHeader, | ||||
|     'sender':                       UniqueSingleAddressHeader, | ||||
|     'resent-sender':                SingleAddressHeader, | ||||
|     'to':                           UniqueAddressHeader, | ||||
|     'resent-to':                    AddressHeader, | ||||
|     'cc':                           UniqueAddressHeader, | ||||
|     'resent-cc':                    AddressHeader, | ||||
|     'bcc':                          UniqueAddressHeader, | ||||
|     'resent-bcc':                   AddressHeader, | ||||
|     'from':                         UniqueAddressHeader, | ||||
|     'resent-from':                  AddressHeader, | ||||
|     'reply-to':                     UniqueAddressHeader, | ||||
|     'mime-version':                 MIMEVersionHeader, | ||||
|     'content-type':                 ContentTypeHeader, | ||||
|     'content-disposition':          ContentDispositionHeader, | ||||
|     'content-transfer-encoding':    ContentTransferEncodingHeader, | ||||
|     } | ||||
|  | ||||
| class HeaderRegistry(object): | ||||
|  | ||||
|     """A header_factory and header registry.""" | ||||
|  | ||||
|     def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, | ||||
|                        use_default_map=True): | ||||
|         """Create a header_factory that works with the Policy API. | ||||
|  | ||||
|         base_class is the class that will be the last class in the created | ||||
|         header class's __bases__ list.  default_class is the class that will be | ||||
|         used if "name" (see __call__) does not appear in the registry. | ||||
|         use_default_map controls whether or not the default mapping of names to | ||||
|         specialized classes is copied in to the registry when the factory is | ||||
|         created.  The default is True. | ||||
|  | ||||
|         """ | ||||
|         self.registry = {} | ||||
|         self.base_class = base_class | ||||
|         self.default_class = default_class | ||||
|         if use_default_map: | ||||
|             self.registry.update(_default_header_map) | ||||
|  | ||||
|     def map_to_type(self, name, cls): | ||||
|         """Register cls as the specialized class for handling "name" headers. | ||||
|  | ||||
|         """ | ||||
|         self.registry[name.lower()] = cls | ||||
|  | ||||
|     def __getitem__(self, name): | ||||
|         cls = self.registry.get(name.lower(), self.default_class) | ||||
|         return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {}) | ||||
|  | ||||
|     def __call__(self, name, value): | ||||
|         """Create a header instance for header 'name' from 'value'. | ||||
|  | ||||
|         Creates a header instance by creating a specialized class for parsing | ||||
|         and representing the specified header by combining the factory | ||||
|         base_class with a specialized class from the registry or the | ||||
|         default_class, and passing the name and value to the constructed | ||||
|         class's constructor. | ||||
|  | ||||
|         """ | ||||
|         return self[name](name, value) | ||||
| @ -0,0 +1,74 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Various types of useful iterators and generators.""" | ||||
| from __future__ import print_function | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = [ | ||||
|     'body_line_iterator', | ||||
|     'typed_subpart_iterator', | ||||
|     'walk', | ||||
|     # Do not include _structure() since it's part of the debugging API. | ||||
|     ] | ||||
|  | ||||
| import sys | ||||
| from io import StringIO | ||||
|  | ||||
|  | ||||
| # This function will become a method of the Message class | ||||
| def walk(self): | ||||
|     """Walk over the message tree, yielding each subpart. | ||||
|  | ||||
|     The walk is performed in depth-first order.  This method is a | ||||
|     generator. | ||||
|     """ | ||||
|     yield self | ||||
|     if self.is_multipart(): | ||||
|         for subpart in self.get_payload(): | ||||
|             for subsubpart in subpart.walk(): | ||||
|                 yield subsubpart | ||||
|  | ||||
|  | ||||
| # These two functions are imported into the Iterators.py interface module. | ||||
| def body_line_iterator(msg, decode=False): | ||||
|     """Iterate over the parts, returning string payloads line-by-line. | ||||
|  | ||||
|     Optional decode (default False) is passed through to .get_payload(). | ||||
|     """ | ||||
|     for subpart in msg.walk(): | ||||
|         payload = subpart.get_payload(decode=decode) | ||||
|         if isinstance(payload, str): | ||||
|             for line in StringIO(payload): | ||||
|                 yield line | ||||
|  | ||||
|  | ||||
| def typed_subpart_iterator(msg, maintype='text', subtype=None): | ||||
|     """Iterate over the subparts with a given MIME type. | ||||
|  | ||||
|     Use `maintype' as the main MIME type to match against; this defaults to | ||||
|     "text".  Optional `subtype' is the MIME subtype to match against; if | ||||
|     omitted, only the main type is matched. | ||||
|     """ | ||||
|     for subpart in msg.walk(): | ||||
|         if subpart.get_content_maintype() == maintype: | ||||
|             if subtype is None or subpart.get_content_subtype() == subtype: | ||||
|                 yield subpart | ||||
|  | ||||
|  | ||||
| def _structure(msg, fp=None, level=0, include_default=False): | ||||
|     """A handy debugging aid""" | ||||
|     if fp is None: | ||||
|         fp = sys.stdout | ||||
|     tab = ' ' * (level * 4) | ||||
|     print(tab + msg.get_content_type(), end='', file=fp) | ||||
|     if include_default: | ||||
|         print(' [%s]' % msg.get_default_type(), file=fp) | ||||
|     else: | ||||
|         print(file=fp) | ||||
|     if msg.is_multipart(): | ||||
|         for subpart in msg.get_payload(): | ||||
|             _structure(subpart, fp, level+1, include_default) | ||||
| @ -0,0 +1,882 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| # Copyright (C) 2001-2007 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Basic message object for the email package object model.""" | ||||
| from __future__ import absolute_import, division, unicode_literals | ||||
| from future.builtins import list, range, str, zip | ||||
|  | ||||
| __all__ = ['Message'] | ||||
|  | ||||
| import re | ||||
| import uu | ||||
| import base64 | ||||
| import binascii | ||||
| from io import BytesIO, StringIO | ||||
|  | ||||
| # Intrapackage imports | ||||
| from future.utils import as_native_str | ||||
| from future.backports.email import utils | ||||
| from future.backports.email import errors | ||||
| from future.backports.email._policybase import compat32 | ||||
| from future.backports.email import charset as _charset | ||||
| from future.backports.email._encoded_words import decode_b | ||||
| Charset = _charset.Charset | ||||
|  | ||||
| SEMISPACE = '; ' | ||||
|  | ||||
| # Regular expression that matches `special' characters in parameters, the | ||||
| # existence of which force quoting of the parameter value. | ||||
| tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') | ||||
|  | ||||
|  | ||||
| def _splitparam(param): | ||||
|     # Split header parameters.  BAW: this may be too simple.  It isn't | ||||
|     # strictly RFC 2045 (section 5.1) compliant, but it catches most headers | ||||
|     # found in the wild.  We may eventually need a full fledged parser. | ||||
|     # RDM: we might have a Header here; for now just stringify it. | ||||
|     a, sep, b = str(param).partition(';') | ||||
|     if not sep: | ||||
|         return a.strip(), None | ||||
|     return a.strip(), b.strip() | ||||
|  | ||||
| def _formatparam(param, value=None, quote=True): | ||||
|     """Convenience function to format and return a key=value pair. | ||||
|  | ||||
|     This will quote the value if needed or if quote is true.  If value is a | ||||
|     three tuple (charset, language, value), it will be encoded according | ||||
|     to RFC2231 rules.  If it contains non-ascii characters it will likewise | ||||
|     be encoded according to RFC2231 rules, using the utf-8 charset and | ||||
|     a null language. | ||||
|     """ | ||||
|     if value is not None and len(value) > 0: | ||||
|         # A tuple is used for RFC 2231 encoded parameter values where items | ||||
|         # are (charset, language, value).  charset is a string, not a Charset | ||||
|         # instance.  RFC 2231 encoded values are never quoted, per RFC. | ||||
|         if isinstance(value, tuple): | ||||
|             # Encode as per RFC 2231 | ||||
|             param += '*' | ||||
|             value = utils.encode_rfc2231(value[2], value[0], value[1]) | ||||
|             return '%s=%s' % (param, value) | ||||
|         else: | ||||
|             try: | ||||
|                 value.encode('ascii') | ||||
|             except UnicodeEncodeError: | ||||
|                 param += '*' | ||||
|                 value = utils.encode_rfc2231(value, 'utf-8', '') | ||||
|                 return '%s=%s' % (param, value) | ||||
|         # BAW: Please check this.  I think that if quote is set it should | ||||
|         # force quoting even if not necessary. | ||||
|         if quote or tspecials.search(value): | ||||
|             return '%s="%s"' % (param, utils.quote(value)) | ||||
|         else: | ||||
|             return '%s=%s' % (param, value) | ||||
|     else: | ||||
|         return param | ||||
|  | ||||
| def _parseparam(s): | ||||
|     # RDM This might be a Header, so for now stringify it. | ||||
|     s = ';' + str(s) | ||||
|     plist = [] | ||||
|     while s[:1] == ';': | ||||
|         s = s[1:] | ||||
|         end = s.find(';') | ||||
|         while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: | ||||
|             end = s.find(';', end + 1) | ||||
|         if end < 0: | ||||
|             end = len(s) | ||||
|         f = s[:end] | ||||
|         if '=' in f: | ||||
|             i = f.index('=') | ||||
|             f = f[:i].strip().lower() + '=' + f[i+1:].strip() | ||||
|         plist.append(f.strip()) | ||||
|         s = s[end:] | ||||
|     return plist | ||||
|  | ||||
|  | ||||
| def _unquotevalue(value): | ||||
|     # This is different than utils.collapse_rfc2231_value() because it doesn't | ||||
|     # try to convert the value to a unicode.  Message.get_param() and | ||||
|     # Message.get_params() are both currently defined to return the tuple in | ||||
|     # the face of RFC 2231 parameters. | ||||
|     if isinstance(value, tuple): | ||||
|         return value[0], value[1], utils.unquote(value[2]) | ||||
|     else: | ||||
|         return utils.unquote(value) | ||||
|  | ||||
|  | ||||
| class Message(object): | ||||
|     """Basic message object. | ||||
|  | ||||
|     A message object is defined as something that has a bunch of RFC 2822 | ||||
|     headers and a payload.  It may optionally have an envelope header | ||||
|     (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a | ||||
|     multipart or a message/rfc822), then the payload is a list of Message | ||||
|     objects, otherwise it is a string. | ||||
|  | ||||
|     Message objects implement part of the `mapping' interface, which assumes | ||||
|     there is exactly one occurrence of the header per message.  Some headers | ||||
|     do in fact appear multiple times (e.g. Received) and for those headers, | ||||
|     you must use the explicit API to set or get all the headers.  Not all of | ||||
|     the mapping methods are implemented. | ||||
|     """ | ||||
|     def __init__(self, policy=compat32): | ||||
|         self.policy = policy | ||||
|         self._headers = list() | ||||
|         self._unixfrom = None | ||||
|         self._payload = None | ||||
|         self._charset = None | ||||
|         # Defaults for multipart messages | ||||
|         self.preamble = self.epilogue = None | ||||
|         self.defects = [] | ||||
|         # Default content type | ||||
|         self._default_type = 'text/plain' | ||||
|  | ||||
|     @as_native_str(encoding='utf-8') | ||||
|     def __str__(self): | ||||
|         """Return the entire formatted message as a string. | ||||
|         This includes the headers, body, and envelope header. | ||||
|         """ | ||||
|         return self.as_string() | ||||
|  | ||||
|     def as_string(self, unixfrom=False, maxheaderlen=0): | ||||
|         """Return the entire formatted message as a (unicode) string. | ||||
|         Optional `unixfrom' when True, means include the Unix From_ envelope | ||||
|         header. | ||||
|  | ||||
|         This is a convenience method and may not generate the message exactly | ||||
|         as you intend.  For more flexibility, use the flatten() method of a | ||||
|         Generator instance. | ||||
|         """ | ||||
|         from future.backports.email.generator import Generator | ||||
|         fp = StringIO() | ||||
|         g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) | ||||
|         g.flatten(self, unixfrom=unixfrom) | ||||
|         return fp.getvalue() | ||||
|  | ||||
|     def is_multipart(self): | ||||
|         """Return True if the message consists of multiple parts.""" | ||||
|         return isinstance(self._payload, list) | ||||
|  | ||||
|     # | ||||
|     # Unix From_ line | ||||
|     # | ||||
|     def set_unixfrom(self, unixfrom): | ||||
|         self._unixfrom = unixfrom | ||||
|  | ||||
|     def get_unixfrom(self): | ||||
|         return self._unixfrom | ||||
|  | ||||
|     # | ||||
|     # Payload manipulation. | ||||
|     # | ||||
|     def attach(self, payload): | ||||
|         """Add the given payload to the current payload. | ||||
|  | ||||
|         The current payload will always be a list of objects after this method | ||||
|         is called.  If you want to set the payload to a scalar object, use | ||||
|         set_payload() instead. | ||||
|         """ | ||||
|         if self._payload is None: | ||||
|             self._payload = [payload] | ||||
|         else: | ||||
|             self._payload.append(payload) | ||||
|  | ||||
|     def get_payload(self, i=None, decode=False): | ||||
|         """Return a reference to the payload. | ||||
|  | ||||
|         The payload will either be a list object or a string.  If you mutate | ||||
|         the list object, you modify the message's payload in place.  Optional | ||||
|         i returns that index into the payload. | ||||
|  | ||||
|         Optional decode is a flag indicating whether the payload should be | ||||
|         decoded or not, according to the Content-Transfer-Encoding header | ||||
|         (default is False). | ||||
|  | ||||
|         When True and the message is not a multipart, the payload will be | ||||
|         decoded if this header's value is `quoted-printable' or `base64'.  If | ||||
|         some other encoding is used, or the header is missing, or if the | ||||
|         payload has bogus data (i.e. bogus base64 or uuencoded data), the | ||||
|         payload is returned as-is. | ||||
|  | ||||
|         If the message is a multipart and the decode flag is True, then None | ||||
|         is returned. | ||||
|         """ | ||||
|         # Here is the logic table for this code, based on the email5.0.0 code: | ||||
|         #   i     decode  is_multipart  result | ||||
|         # ------  ------  ------------  ------------------------------ | ||||
|         #  None   True    True          None | ||||
|         #   i     True    True          None | ||||
|         #  None   False   True          _payload (a list) | ||||
|         #   i     False   True          _payload element i (a Message) | ||||
|         #   i     False   False         error (not a list) | ||||
|         #   i     True    False         error (not a list) | ||||
|         #  None   False   False         _payload | ||||
|         #  None   True    False         _payload decoded (bytes) | ||||
|         # Note that Barry planned to factor out the 'decode' case, but that | ||||
|         # isn't so easy now that we handle the 8 bit data, which needs to be | ||||
|         # converted in both the decode and non-decode path. | ||||
|         if self.is_multipart(): | ||||
|             if decode: | ||||
|                 return None | ||||
|             if i is None: | ||||
|                 return self._payload | ||||
|             else: | ||||
|                 return self._payload[i] | ||||
|         # For backward compatibility, Use isinstance and this error message | ||||
|         # instead of the more logical is_multipart test. | ||||
|         if i is not None and not isinstance(self._payload, list): | ||||
|             raise TypeError('Expected list, got %s' % type(self._payload)) | ||||
|         payload = self._payload | ||||
|         # cte might be a Header, so for now stringify it. | ||||
|         cte = str(self.get('content-transfer-encoding', '')).lower() | ||||
|         # payload may be bytes here. | ||||
|         if isinstance(payload, str): | ||||
|             payload = str(payload)    # for Python-Future, so surrogateescape works | ||||
|             if utils._has_surrogates(payload): | ||||
|                 bpayload = payload.encode('ascii', 'surrogateescape') | ||||
|                 if not decode: | ||||
|                     try: | ||||
|                         payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') | ||||
|                     except LookupError: | ||||
|                         payload = bpayload.decode('ascii', 'replace') | ||||
|             elif decode: | ||||
|                 try: | ||||
|                     bpayload = payload.encode('ascii') | ||||
|                 except UnicodeError: | ||||
|                     # This won't happen for RFC compliant messages (messages | ||||
|                     # containing only ASCII codepoints in the unicode input). | ||||
|                     # If it does happen, turn the string into bytes in a way | ||||
|                     # guaranteed not to fail. | ||||
|                     bpayload = payload.encode('raw-unicode-escape') | ||||
|         if not decode: | ||||
|             return payload | ||||
|         if cte == 'quoted-printable': | ||||
|             return utils._qdecode(bpayload) | ||||
|         elif cte == 'base64': | ||||
|             # XXX: this is a bit of a hack; decode_b should probably be factored | ||||
|             # out somewhere, but I haven't figured out where yet. | ||||
|             value, defects = decode_b(b''.join(bpayload.splitlines())) | ||||
|             for defect in defects: | ||||
|                 self.policy.handle_defect(self, defect) | ||||
|             return value | ||||
|         elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): | ||||
|             in_file = BytesIO(bpayload) | ||||
|             out_file = BytesIO() | ||||
|             try: | ||||
|                 uu.decode(in_file, out_file, quiet=True) | ||||
|                 return out_file.getvalue() | ||||
|             except uu.Error: | ||||
|                 # Some decoding problem | ||||
|                 return bpayload | ||||
|         if isinstance(payload, str): | ||||
|             return bpayload | ||||
|         return payload | ||||
|  | ||||
|     def set_payload(self, payload, charset=None): | ||||
|         """Set the payload to the given value. | ||||
|  | ||||
|         Optional charset sets the message's default character set.  See | ||||
|         set_charset() for details. | ||||
|         """ | ||||
|         self._payload = payload | ||||
|         if charset is not None: | ||||
|             self.set_charset(charset) | ||||
|  | ||||
|     def set_charset(self, charset): | ||||
|         """Set the charset of the payload to a given character set. | ||||
|  | ||||
|         charset can be a Charset instance, a string naming a character set, or | ||||
|         None.  If it is a string it will be converted to a Charset instance. | ||||
|         If charset is None, the charset parameter will be removed from the | ||||
|         Content-Type field.  Anything else will generate a TypeError. | ||||
|  | ||||
|         The message will be assumed to be of type text/* encoded with | ||||
|         charset.input_charset.  It will be converted to charset.output_charset | ||||
|         and encoded properly, if needed, when generating the plain text | ||||
|         representation of the message.  MIME headers (MIME-Version, | ||||
|         Content-Type, Content-Transfer-Encoding) will be added as needed. | ||||
|         """ | ||||
|         if charset is None: | ||||
|             self.del_param('charset') | ||||
|             self._charset = None | ||||
|             return | ||||
|         if not isinstance(charset, Charset): | ||||
|             charset = Charset(charset) | ||||
|         self._charset = charset | ||||
|         if 'MIME-Version' not in self: | ||||
|             self.add_header('MIME-Version', '1.0') | ||||
|         if 'Content-Type' not in self: | ||||
|             self.add_header('Content-Type', 'text/plain', | ||||
|                             charset=charset.get_output_charset()) | ||||
|         else: | ||||
|             self.set_param('charset', charset.get_output_charset()) | ||||
|         if charset != charset.get_output_charset(): | ||||
|             self._payload = charset.body_encode(self._payload) | ||||
|         if 'Content-Transfer-Encoding' not in self: | ||||
|             cte = charset.get_body_encoding() | ||||
|             try: | ||||
|                 cte(self) | ||||
|             except TypeError: | ||||
|                 self._payload = charset.body_encode(self._payload) | ||||
|                 self.add_header('Content-Transfer-Encoding', cte) | ||||
|  | ||||
|     def get_charset(self): | ||||
|         """Return the Charset instance associated with the message's payload. | ||||
|         """ | ||||
|         return self._charset | ||||
|  | ||||
|     # | ||||
|     # MAPPING INTERFACE (partial) | ||||
|     # | ||||
|     def __len__(self): | ||||
|         """Return the total number of headers, including duplicates.""" | ||||
|         return len(self._headers) | ||||
|  | ||||
|     def __getitem__(self, name): | ||||
|         """Get a header value. | ||||
|  | ||||
|         Return None if the header is missing instead of raising an exception. | ||||
|  | ||||
|         Note that if the header appeared multiple times, exactly which | ||||
|         occurrence gets returned is undefined.  Use get_all() to get all | ||||
|         the values matching a header field name. | ||||
|         """ | ||||
|         return self.get(name) | ||||
|  | ||||
|     def __setitem__(self, name, val): | ||||
|         """Set the value of a header. | ||||
|  | ||||
|         Note: this does not overwrite an existing header with the same field | ||||
|         name.  Use __delitem__() first to delete any existing headers. | ||||
|         """ | ||||
|         max_count = self.policy.header_max_count(name) | ||||
|         if max_count: | ||||
|             lname = name.lower() | ||||
|             found = 0 | ||||
|             for k, v in self._headers: | ||||
|                 if k.lower() == lname: | ||||
|                     found += 1 | ||||
|                     if found >= max_count: | ||||
|                         raise ValueError("There may be at most {} {} headers " | ||||
|                                          "in a message".format(max_count, name)) | ||||
|         self._headers.append(self.policy.header_store_parse(name, val)) | ||||
|  | ||||
|     def __delitem__(self, name): | ||||
|         """Delete all occurrences of a header, if present. | ||||
|  | ||||
|         Does not raise an exception if the header is missing. | ||||
|         """ | ||||
|         name = name.lower() | ||||
|         newheaders = list() | ||||
|         for k, v in self._headers: | ||||
|             if k.lower() != name: | ||||
|                 newheaders.append((k, v)) | ||||
|         self._headers = newheaders | ||||
|  | ||||
|     def __contains__(self, name): | ||||
|         return name.lower() in [k.lower() for k, v in self._headers] | ||||
|  | ||||
|     def __iter__(self): | ||||
|         for field, value in self._headers: | ||||
|             yield field | ||||
|  | ||||
|     def keys(self): | ||||
|         """Return a list of all the message's header field names. | ||||
|  | ||||
|         These will be sorted in the order they appeared in the original | ||||
|         message, or were added to the message, and may contain duplicates. | ||||
|         Any fields deleted and re-inserted are always appended to the header | ||||
|         list. | ||||
|         """ | ||||
|         return [k for k, v in self._headers] | ||||
|  | ||||
|     def values(self): | ||||
|         """Return a list of all the message's header values. | ||||
|  | ||||
|         These will be sorted in the order they appeared in the original | ||||
|         message, or were added to the message, and may contain duplicates. | ||||
|         Any fields deleted and re-inserted are always appended to the header | ||||
|         list. | ||||
|         """ | ||||
|         return [self.policy.header_fetch_parse(k, v) | ||||
|                 for k, v in self._headers] | ||||
|  | ||||
|     def items(self): | ||||
|         """Get all the message's header fields and values. | ||||
|  | ||||
|         These will be sorted in the order they appeared in the original | ||||
|         message, or were added to the message, and may contain duplicates. | ||||
|         Any fields deleted and re-inserted are always appended to the header | ||||
|         list. | ||||
|         """ | ||||
|         return [(k, self.policy.header_fetch_parse(k, v)) | ||||
|                 for k, v in self._headers] | ||||
|  | ||||
|     def get(self, name, failobj=None): | ||||
|         """Get a header value. | ||||
|  | ||||
|         Like __getitem__() but return failobj instead of None when the field | ||||
|         is missing. | ||||
|         """ | ||||
|         name = name.lower() | ||||
|         for k, v in self._headers: | ||||
|             if k.lower() == name: | ||||
|                 return self.policy.header_fetch_parse(k, v) | ||||
|         return failobj | ||||
|  | ||||
|     # | ||||
|     # "Internal" methods (public API, but only intended for use by a parser | ||||
|     # or generator, not normal application code. | ||||
|     # | ||||
|  | ||||
|     def set_raw(self, name, value): | ||||
|         """Store name and value in the model without modification. | ||||
|  | ||||
|         This is an "internal" API, intended only for use by a parser. | ||||
|         """ | ||||
|         self._headers.append((name, value)) | ||||
|  | ||||
|     def raw_items(self): | ||||
|         """Return the (name, value) header pairs without modification. | ||||
|  | ||||
|         This is an "internal" API, intended only for use by a generator. | ||||
|         """ | ||||
|         return iter(self._headers.copy()) | ||||
|  | ||||
|     # | ||||
|     # Additional useful stuff | ||||
|     # | ||||
|  | ||||
|     def get_all(self, name, failobj=None): | ||||
|         """Return a list of all the values for the named field. | ||||
|  | ||||
|         These will be sorted in the order they appeared in the original | ||||
|         message, and may contain duplicates.  Any fields deleted and | ||||
|         re-inserted are always appended to the header list. | ||||
|  | ||||
|         If no such fields exist, failobj is returned (defaults to None). | ||||
|         """ | ||||
|         values = [] | ||||
|         name = name.lower() | ||||
|         for k, v in self._headers: | ||||
|             if k.lower() == name: | ||||
|                 values.append(self.policy.header_fetch_parse(k, v)) | ||||
|         if not values: | ||||
|             return failobj | ||||
|         return values | ||||
|  | ||||
|     def add_header(self, _name, _value, **_params): | ||||
|         """Extended header setting. | ||||
|  | ||||
|         name is the header field to add.  keyword arguments can be used to set | ||||
|         additional parameters for the header field, with underscores converted | ||||
|         to dashes.  Normally the parameter will be added as key="value" unless | ||||
|         value is None, in which case only the key will be added.  If a | ||||
|         parameter value contains non-ASCII characters it can be specified as a | ||||
|         three-tuple of (charset, language, value), in which case it will be | ||||
|         encoded according to RFC2231 rules.  Otherwise it will be encoded using | ||||
|         the utf-8 charset and a language of ''. | ||||
|  | ||||
|         Examples: | ||||
|  | ||||
|         msg.add_header('content-disposition', 'attachment', filename='bud.gif') | ||||
|         msg.add_header('content-disposition', 'attachment', | ||||
|                        filename=('utf-8', '', 'Fußballer.ppt')) | ||||
|         msg.add_header('content-disposition', 'attachment', | ||||
|                        filename='Fußballer.ppt')) | ||||
|         """ | ||||
|         parts = [] | ||||
|         for k, v in _params.items(): | ||||
|             if v is None: | ||||
|                 parts.append(k.replace('_', '-')) | ||||
|             else: | ||||
|                 parts.append(_formatparam(k.replace('_', '-'), v)) | ||||
|         if _value is not None: | ||||
|             parts.insert(0, _value) | ||||
|         self[_name] = SEMISPACE.join(parts) | ||||
|  | ||||
|     def replace_header(self, _name, _value): | ||||
|         """Replace a header. | ||||
|  | ||||
|         Replace the first matching header found in the message, retaining | ||||
|         header order and case.  If no matching header was found, a KeyError is | ||||
|         raised. | ||||
|         """ | ||||
|         _name = _name.lower() | ||||
|         for i, (k, v) in zip(range(len(self._headers)), self._headers): | ||||
|             if k.lower() == _name: | ||||
|                 self._headers[i] = self.policy.header_store_parse(k, _value) | ||||
|                 break | ||||
|         else: | ||||
|             raise KeyError(_name) | ||||
|  | ||||
|     # | ||||
|     # Use these three methods instead of the three above. | ||||
|     # | ||||
|  | ||||
|     def get_content_type(self): | ||||
|         """Return the message's content type. | ||||
|  | ||||
|         The returned string is coerced to lower case of the form | ||||
|         `maintype/subtype'.  If there was no Content-Type header in the | ||||
|         message, the default type as given by get_default_type() will be | ||||
|         returned.  Since according to RFC 2045, messages always have a default | ||||
|         type this will always return a value. | ||||
|  | ||||
|         RFC 2045 defines a message's default type to be text/plain unless it | ||||
|         appears inside a multipart/digest container, in which case it would be | ||||
|         message/rfc822. | ||||
|         """ | ||||
|         missing = object() | ||||
|         value = self.get('content-type', missing) | ||||
|         if value is missing: | ||||
|             # This should have no parameters | ||||
|             return self.get_default_type() | ||||
|         ctype = _splitparam(value)[0].lower() | ||||
|         # RFC 2045, section 5.2 says if its invalid, use text/plain | ||||
|         if ctype.count('/') != 1: | ||||
|             return 'text/plain' | ||||
|         return ctype | ||||
|  | ||||
|     def get_content_maintype(self): | ||||
|         """Return the message's main content type. | ||||
|  | ||||
|         This is the `maintype' part of the string returned by | ||||
|         get_content_type(). | ||||
|         """ | ||||
|         ctype = self.get_content_type() | ||||
|         return ctype.split('/')[0] | ||||
|  | ||||
|     def get_content_subtype(self): | ||||
|         """Returns the message's sub-content type. | ||||
|  | ||||
|         This is the `subtype' part of the string returned by | ||||
|         get_content_type(). | ||||
|         """ | ||||
|         ctype = self.get_content_type() | ||||
|         return ctype.split('/')[1] | ||||
|  | ||||
|     def get_default_type(self): | ||||
|         """Return the `default' content type. | ||||
|  | ||||
|         Most messages have a default content type of text/plain, except for | ||||
|         messages that are subparts of multipart/digest containers.  Such | ||||
|         subparts have a default content type of message/rfc822. | ||||
|         """ | ||||
|         return self._default_type | ||||
|  | ||||
|     def set_default_type(self, ctype): | ||||
|         """Set the `default' content type. | ||||
|  | ||||
|         ctype should be either "text/plain" or "message/rfc822", although this | ||||
|         is not enforced.  The default content type is not stored in the | ||||
|         Content-Type header. | ||||
|         """ | ||||
|         self._default_type = ctype | ||||
|  | ||||
|     def _get_params_preserve(self, failobj, header): | ||||
|         # Like get_params() but preserves the quoting of values.  BAW: | ||||
|         # should this be part of the public interface? | ||||
|         missing = object() | ||||
|         value = self.get(header, missing) | ||||
|         if value is missing: | ||||
|             return failobj | ||||
|         params = [] | ||||
|         for p in _parseparam(value): | ||||
|             try: | ||||
|                 name, val = p.split('=', 1) | ||||
|                 name = name.strip() | ||||
|                 val = val.strip() | ||||
|             except ValueError: | ||||
|                 # Must have been a bare attribute | ||||
|                 name = p.strip() | ||||
|                 val = '' | ||||
|             params.append((name, val)) | ||||
|         params = utils.decode_params(params) | ||||
|         return params | ||||
|  | ||||
|     def get_params(self, failobj=None, header='content-type', unquote=True): | ||||
|         """Return the message's Content-Type parameters, as a list. | ||||
|  | ||||
|         The elements of the returned list are 2-tuples of key/value pairs, as | ||||
|         split on the `=' sign.  The left hand side of the `=' is the key, | ||||
|         while the right hand side is the value.  If there is no `=' sign in | ||||
|         the parameter the value is the empty string.  The value is as | ||||
|         described in the get_param() method. | ||||
|  | ||||
|         Optional failobj is the object to return if there is no Content-Type | ||||
|         header.  Optional header is the header to search instead of | ||||
|         Content-Type.  If unquote is True, the value is unquoted. | ||||
|         """ | ||||
|         missing = object() | ||||
|         params = self._get_params_preserve(missing, header) | ||||
|         if params is missing: | ||||
|             return failobj | ||||
|         if unquote: | ||||
|             return [(k, _unquotevalue(v)) for k, v in params] | ||||
|         else: | ||||
|             return params | ||||
|  | ||||
|     def get_param(self, param, failobj=None, header='content-type', | ||||
|                   unquote=True): | ||||
|         """Return the parameter value if found in the Content-Type header. | ||||
|  | ||||
|         Optional failobj is the object to return if there is no Content-Type | ||||
|         header, or the Content-Type header has no such parameter.  Optional | ||||
|         header is the header to search instead of Content-Type. | ||||
|  | ||||
|         Parameter keys are always compared case insensitively.  The return | ||||
|         value can either be a string, or a 3-tuple if the parameter was RFC | ||||
|         2231 encoded.  When it's a 3-tuple, the elements of the value are of | ||||
|         the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and | ||||
|         LANGUAGE can be None, in which case you should consider VALUE to be | ||||
|         encoded in the us-ascii charset.  You can usually ignore LANGUAGE. | ||||
|         The parameter value (either the returned string, or the VALUE item in | ||||
|         the 3-tuple) is always unquoted, unless unquote is set to False. | ||||
|  | ||||
|         If your application doesn't care whether the parameter was RFC 2231 | ||||
|         encoded, it can turn the return value into a string as follows: | ||||
|  | ||||
|             param = msg.get_param('foo') | ||||
|             param = email.utils.collapse_rfc2231_value(rawparam) | ||||
|  | ||||
|         """ | ||||
|         if header not in self: | ||||
|             return failobj | ||||
|         for k, v in self._get_params_preserve(failobj, header): | ||||
|             if k.lower() == param.lower(): | ||||
|                 if unquote: | ||||
|                     return _unquotevalue(v) | ||||
|                 else: | ||||
|                     return v | ||||
|         return failobj | ||||
|  | ||||
|     def set_param(self, param, value, header='Content-Type', requote=True, | ||||
|                   charset=None, language=''): | ||||
|         """Set a parameter in the Content-Type header. | ||||
|  | ||||
|         If the parameter already exists in the header, its value will be | ||||
|         replaced with the new value. | ||||
|  | ||||
|         If header is Content-Type and has not yet been defined for this | ||||
|         message, it will be set to "text/plain" and the new parameter and | ||||
|         value will be appended as per RFC 2045. | ||||
|  | ||||
|         An alternate header can specified in the header argument, and all | ||||
|         parameters will be quoted as necessary unless requote is False. | ||||
|  | ||||
|         If charset is specified, the parameter will be encoded according to RFC | ||||
|         2231.  Optional language specifies the RFC 2231 language, defaulting | ||||
|         to the empty string.  Both charset and language should be strings. | ||||
|         """ | ||||
|         if not isinstance(value, tuple) and charset: | ||||
|             value = (charset, language, value) | ||||
|  | ||||
|         if header not in self and header.lower() == 'content-type': | ||||
|             ctype = 'text/plain' | ||||
|         else: | ||||
|             ctype = self.get(header) | ||||
|         if not self.get_param(param, header=header): | ||||
|             if not ctype: | ||||
|                 ctype = _formatparam(param, value, requote) | ||||
|             else: | ||||
|                 ctype = SEMISPACE.join( | ||||
|                     [ctype, _formatparam(param, value, requote)]) | ||||
|         else: | ||||
|             ctype = '' | ||||
|             for old_param, old_value in self.get_params(header=header, | ||||
|                                                         unquote=requote): | ||||
|                 append_param = '' | ||||
|                 if old_param.lower() == param.lower(): | ||||
|                     append_param = _formatparam(param, value, requote) | ||||
|                 else: | ||||
|                     append_param = _formatparam(old_param, old_value, requote) | ||||
|                 if not ctype: | ||||
|                     ctype = append_param | ||||
|                 else: | ||||
|                     ctype = SEMISPACE.join([ctype, append_param]) | ||||
|         if ctype != self.get(header): | ||||
|             del self[header] | ||||
|             self[header] = ctype | ||||
|  | ||||
|     def del_param(self, param, header='content-type', requote=True): | ||||
|         """Remove the given parameter completely from the Content-Type header. | ||||
|  | ||||
|         The header will be re-written in place without the parameter or its | ||||
|         value. All values will be quoted as necessary unless requote is | ||||
|         False.  Optional header specifies an alternative to the Content-Type | ||||
|         header. | ||||
|         """ | ||||
|         if header not in self: | ||||
|             return | ||||
|         new_ctype = '' | ||||
|         for p, v in self.get_params(header=header, unquote=requote): | ||||
|             if p.lower() != param.lower(): | ||||
|                 if not new_ctype: | ||||
|                     new_ctype = _formatparam(p, v, requote) | ||||
|                 else: | ||||
|                     new_ctype = SEMISPACE.join([new_ctype, | ||||
|                                                 _formatparam(p, v, requote)]) | ||||
|         if new_ctype != self.get(header): | ||||
|             del self[header] | ||||
|             self[header] = new_ctype | ||||
|  | ||||
|     def set_type(self, type, header='Content-Type', requote=True): | ||||
|         """Set the main type and subtype for the Content-Type header. | ||||
|  | ||||
|         type must be a string in the form "maintype/subtype", otherwise a | ||||
|         ValueError is raised. | ||||
|  | ||||
|         This method replaces the Content-Type header, keeping all the | ||||
|         parameters in place.  If requote is False, this leaves the existing | ||||
|         header's quoting as is.  Otherwise, the parameters will be quoted (the | ||||
|         default). | ||||
|  | ||||
|         An alternative header can be specified in the header argument.  When | ||||
|         the Content-Type header is set, we'll always also add a MIME-Version | ||||
|         header. | ||||
|         """ | ||||
|         # BAW: should we be strict? | ||||
|         if not type.count('/') == 1: | ||||
|             raise ValueError | ||||
|         # Set the Content-Type, you get a MIME-Version | ||||
|         if header.lower() == 'content-type': | ||||
|             del self['mime-version'] | ||||
|             self['MIME-Version'] = '1.0' | ||||
|         if header not in self: | ||||
|             self[header] = type | ||||
|             return | ||||
|         params = self.get_params(header=header, unquote=requote) | ||||
|         del self[header] | ||||
|         self[header] = type | ||||
|         # Skip the first param; it's the old type. | ||||
|         for p, v in params[1:]: | ||||
|             self.set_param(p, v, header, requote) | ||||
|  | ||||
|     def get_filename(self, failobj=None): | ||||
|         """Return the filename associated with the payload if present. | ||||
|  | ||||
|         The filename is extracted from the Content-Disposition header's | ||||
|         `filename' parameter, and it is unquoted.  If that header is missing | ||||
|         the `filename' parameter, this method falls back to looking for the | ||||
|         `name' parameter. | ||||
|         """ | ||||
|         missing = object() | ||||
|         filename = self.get_param('filename', missing, 'content-disposition') | ||||
|         if filename is missing: | ||||
|             filename = self.get_param('name', missing, 'content-type') | ||||
|         if filename is missing: | ||||
|             return failobj | ||||
|         return utils.collapse_rfc2231_value(filename).strip() | ||||
|  | ||||
|     def get_boundary(self, failobj=None): | ||||
|         """Return the boundary associated with the payload if present. | ||||
|  | ||||
|         The boundary is extracted from the Content-Type header's `boundary' | ||||
|         parameter, and it is unquoted. | ||||
|         """ | ||||
|         missing = object() | ||||
|         boundary = self.get_param('boundary', missing) | ||||
|         if boundary is missing: | ||||
|             return failobj | ||||
|         # RFC 2046 says that boundaries may begin but not end in w/s | ||||
|         return utils.collapse_rfc2231_value(boundary).rstrip() | ||||
|  | ||||
|     def set_boundary(self, boundary): | ||||
|         """Set the boundary parameter in Content-Type to 'boundary'. | ||||
|  | ||||
|         This is subtly different than deleting the Content-Type header and | ||||
|         adding a new one with a new boundary parameter via add_header().  The | ||||
|         main difference is that using the set_boundary() method preserves the | ||||
|         order of the Content-Type header in the original message. | ||||
|  | ||||
|         HeaderParseError is raised if the message has no Content-Type header. | ||||
|         """ | ||||
|         missing = object() | ||||
|         params = self._get_params_preserve(missing, 'content-type') | ||||
|         if params is missing: | ||||
|             # There was no Content-Type header, and we don't know what type | ||||
|             # to set it to, so raise an exception. | ||||
|             raise errors.HeaderParseError('No Content-Type header found') | ||||
|         newparams = list() | ||||
|         foundp = False | ||||
|         for pk, pv in params: | ||||
|             if pk.lower() == 'boundary': | ||||
|                 newparams.append(('boundary', '"%s"' % boundary)) | ||||
|                 foundp = True | ||||
|             else: | ||||
|                 newparams.append((pk, pv)) | ||||
|         if not foundp: | ||||
|             # The original Content-Type header had no boundary attribute. | ||||
|             # Tack one on the end.  BAW: should we raise an exception | ||||
|             # instead??? | ||||
|             newparams.append(('boundary', '"%s"' % boundary)) | ||||
|         # Replace the existing Content-Type header with the new value | ||||
|         newheaders = list() | ||||
|         for h, v in self._headers: | ||||
|             if h.lower() == 'content-type': | ||||
|                 parts = list() | ||||
|                 for k, v in newparams: | ||||
|                     if v == '': | ||||
|                         parts.append(k) | ||||
|                     else: | ||||
|                         parts.append('%s=%s' % (k, v)) | ||||
|                 val = SEMISPACE.join(parts) | ||||
|                 newheaders.append(self.policy.header_store_parse(h, val)) | ||||
|  | ||||
|             else: | ||||
|                 newheaders.append((h, v)) | ||||
|         self._headers = newheaders | ||||
|  | ||||
|     def get_content_charset(self, failobj=None): | ||||
|         """Return the charset parameter of the Content-Type header. | ||||
|  | ||||
|         The returned string is always coerced to lower case.  If there is no | ||||
|         Content-Type header, or if that header has no charset parameter, | ||||
|         failobj is returned. | ||||
|         """ | ||||
|         missing = object() | ||||
|         charset = self.get_param('charset', missing) | ||||
|         if charset is missing: | ||||
|             return failobj | ||||
|         if isinstance(charset, tuple): | ||||
|             # RFC 2231 encoded, so decode it, and it better end up as ascii. | ||||
|             pcharset = charset[0] or 'us-ascii' | ||||
|             try: | ||||
|                 # LookupError will be raised if the charset isn't known to | ||||
|                 # Python.  UnicodeError will be raised if the encoded text | ||||
|                 # contains a character not in the charset. | ||||
|                 as_bytes = charset[2].encode('raw-unicode-escape') | ||||
|                 charset = str(as_bytes, pcharset) | ||||
|             except (LookupError, UnicodeError): | ||||
|                 charset = charset[2] | ||||
|         # charset characters must be in us-ascii range | ||||
|         try: | ||||
|             charset.encode('us-ascii') | ||||
|         except UnicodeError: | ||||
|             return failobj | ||||
|         # RFC 2046, $4.1.2 says charsets are not case sensitive | ||||
|         return charset.lower() | ||||
|  | ||||
|     def get_charsets(self, failobj=None): | ||||
|         """Return a list containing the charset(s) used in this message. | ||||
|  | ||||
|         The returned list of items describes the Content-Type headers' | ||||
|         charset parameter for this message and all the subparts in its | ||||
|         payload. | ||||
|  | ||||
|         Each item will either be a string (the value of the charset parameter | ||||
|         in the Content-Type header of that part) or the value of the | ||||
|         'failobj' parameter (defaults to None), if the part does not have a | ||||
|         main MIME type of "text", or the charset is not defined. | ||||
|  | ||||
|         The list will contain one string for each part of the message, plus | ||||
|         one for the container message (i.e. self), so that a non-multipart | ||||
|         message will still return a list of length 1. | ||||
|         """ | ||||
|         return [part.get_content_charset(failobj) for part in self.walk()] | ||||
|  | ||||
|     # I.e. def walk(self): ... | ||||
|     from future.backports.email.iterators import walk | ||||
| @ -0,0 +1,39 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Keith Dart | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Class representing application/* type MIME documents.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| from future.backports.email import encoders | ||||
| from future.backports.email.mime.nonmultipart import MIMENonMultipart | ||||
|  | ||||
| __all__ = ["MIMEApplication"] | ||||
|  | ||||
|  | ||||
| class MIMEApplication(MIMENonMultipart): | ||||
|     """Class for generating application/* MIME documents.""" | ||||
|  | ||||
|     def __init__(self, _data, _subtype='octet-stream', | ||||
|                  _encoder=encoders.encode_base64, **_params): | ||||
|         """Create an application/* type MIME document. | ||||
|  | ||||
|         _data is a string containing the raw application data. | ||||
|  | ||||
|         _subtype is the MIME content type subtype, defaulting to | ||||
|         'octet-stream'. | ||||
|  | ||||
|         _encoder is a function which will perform the actual encoding for | ||||
|         transport of the application data, defaulting to base64 encoding. | ||||
|  | ||||
|         Any additional keyword arguments are passed to the base class | ||||
|         constructor, which turns them into parameters on the Content-Type | ||||
|         header. | ||||
|         """ | ||||
|         if _subtype is None: | ||||
|             raise TypeError('Invalid application MIME subtype') | ||||
|         MIMENonMultipart.__init__(self, 'application', _subtype, **_params) | ||||
|         self.set_payload(_data) | ||||
|         _encoder(self) | ||||
| @ -0,0 +1,74 @@ | ||||
| # Copyright (C) 2001-2007 Python Software Foundation | ||||
| # Author: Anthony Baxter | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Class representing audio/* type MIME documents.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['MIMEAudio'] | ||||
|  | ||||
| import sndhdr | ||||
|  | ||||
| from io import BytesIO | ||||
| from future.backports.email import encoders | ||||
| from future.backports.email.mime.nonmultipart import MIMENonMultipart | ||||
|  | ||||
|  | ||||
| _sndhdr_MIMEmap = {'au'  : 'basic', | ||||
|                    'wav' :'x-wav', | ||||
|                    'aiff':'x-aiff', | ||||
|                    'aifc':'x-aiff', | ||||
|                    } | ||||
|  | ||||
| # There are others in sndhdr that don't have MIME types. :( | ||||
| # Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? | ||||
| def _whatsnd(data): | ||||
|     """Try to identify a sound file type. | ||||
|  | ||||
|     sndhdr.what() has a pretty cruddy interface, unfortunately.  This is why | ||||
|     we re-do it here.  It would be easier to reverse engineer the Unix 'file' | ||||
|     command and use the standard 'magic' file, as shipped with a modern Unix. | ||||
|     """ | ||||
|     hdr = data[:512] | ||||
|     fakefile = BytesIO(hdr) | ||||
|     for testfn in sndhdr.tests: | ||||
|         res = testfn(hdr, fakefile) | ||||
|         if res is not None: | ||||
|             return _sndhdr_MIMEmap.get(res[0]) | ||||
|     return None | ||||
|  | ||||
|  | ||||
| class MIMEAudio(MIMENonMultipart): | ||||
|     """Class for generating audio/* MIME documents.""" | ||||
|  | ||||
|     def __init__(self, _audiodata, _subtype=None, | ||||
|                  _encoder=encoders.encode_base64, **_params): | ||||
|         """Create an audio/* type MIME document. | ||||
|  | ||||
|         _audiodata is a string containing the raw audio data.  If this data | ||||
|         can be decoded by the standard Python `sndhdr' module, then the | ||||
|         subtype will be automatically included in the Content-Type header. | ||||
|         Otherwise, you can specify  the specific audio subtype via the | ||||
|         _subtype parameter.  If _subtype is not given, and no subtype can be | ||||
|         guessed, a TypeError is raised. | ||||
|  | ||||
|         _encoder is a function which will perform the actual encoding for | ||||
|         transport of the image data.  It takes one argument, which is this | ||||
|         Image instance.  It should use get_payload() and set_payload() to | ||||
|         change the payload to the encoded form.  It should also add any | ||||
|         Content-Transfer-Encoding or other headers to the message as | ||||
|         necessary.  The default encoding is Base64. | ||||
|  | ||||
|         Any additional keyword arguments are passed to the base class | ||||
|         constructor, which turns them into parameters on the Content-Type | ||||
|         header. | ||||
|         """ | ||||
|         if _subtype is None: | ||||
|             _subtype = _whatsnd(_audiodata) | ||||
|         if _subtype is None: | ||||
|             raise TypeError('Could not find audio MIME subtype') | ||||
|         MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) | ||||
|         self.set_payload(_audiodata) | ||||
|         _encoder(self) | ||||
| @ -0,0 +1,25 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Base class for MIME specializations.""" | ||||
| from __future__ import absolute_import, division, unicode_literals | ||||
| from future.backports.email import message | ||||
|  | ||||
| __all__ = ['MIMEBase'] | ||||
|  | ||||
|  | ||||
| class MIMEBase(message.Message): | ||||
|     """Base class for MIME specializations.""" | ||||
|  | ||||
|     def __init__(self, _maintype, _subtype, **_params): | ||||
|         """This constructor adds a Content-Type: and a MIME-Version: header. | ||||
|  | ||||
|         The Content-Type: header is taken from the _maintype and _subtype | ||||
|         arguments.  Additional parameters for this header are taken from the | ||||
|         keyword arguments. | ||||
|         """ | ||||
|         message.Message.__init__(self) | ||||
|         ctype = '%s/%s' % (_maintype, _subtype) | ||||
|         self.add_header('Content-Type', ctype, **_params) | ||||
|         self['MIME-Version'] = '1.0' | ||||
| @ -0,0 +1,48 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Class representing image/* type MIME documents.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['MIMEImage'] | ||||
|  | ||||
| import imghdr | ||||
|  | ||||
| from future.backports.email import encoders | ||||
| from future.backports.email.mime.nonmultipart import MIMENonMultipart | ||||
|  | ||||
|  | ||||
| class MIMEImage(MIMENonMultipart): | ||||
|     """Class for generating image/* type MIME documents.""" | ||||
|  | ||||
|     def __init__(self, _imagedata, _subtype=None, | ||||
|                  _encoder=encoders.encode_base64, **_params): | ||||
|         """Create an image/* type MIME document. | ||||
|  | ||||
|         _imagedata is a string containing the raw image data.  If this data | ||||
|         can be decoded by the standard Python `imghdr' module, then the | ||||
|         subtype will be automatically included in the Content-Type header. | ||||
|         Otherwise, you can specify the specific image subtype via the _subtype | ||||
|         parameter. | ||||
|  | ||||
|         _encoder is a function which will perform the actual encoding for | ||||
|         transport of the image data.  It takes one argument, which is this | ||||
|         Image instance.  It should use get_payload() and set_payload() to | ||||
|         change the payload to the encoded form.  It should also add any | ||||
|         Content-Transfer-Encoding or other headers to the message as | ||||
|         necessary.  The default encoding is Base64. | ||||
|  | ||||
|         Any additional keyword arguments are passed to the base class | ||||
|         constructor, which turns them into parameters on the Content-Type | ||||
|         header. | ||||
|         """ | ||||
|         if _subtype is None: | ||||
|             _subtype = imghdr.what(None, _imagedata) | ||||
|         if _subtype is None: | ||||
|             raise TypeError('Could not guess image MIME subtype') | ||||
|         MIMENonMultipart.__init__(self, 'image', _subtype, **_params) | ||||
|         self.set_payload(_imagedata) | ||||
|         _encoder(self) | ||||
| @ -0,0 +1,36 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Class representing message/* MIME documents.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['MIMEMessage'] | ||||
|  | ||||
| from future.backports.email import message | ||||
| from future.backports.email.mime.nonmultipart import MIMENonMultipart | ||||
|  | ||||
|  | ||||
| class MIMEMessage(MIMENonMultipart): | ||||
|     """Class representing message/* MIME documents.""" | ||||
|  | ||||
|     def __init__(self, _msg, _subtype='rfc822'): | ||||
|         """Create a message/* type MIME document. | ||||
|  | ||||
|         _msg is a message object and must be an instance of Message, or a | ||||
|         derived class of Message, otherwise a TypeError is raised. | ||||
|  | ||||
|         Optional _subtype defines the subtype of the contained message.  The | ||||
|         default is "rfc822" (this is defined by the MIME standard, even though | ||||
|         the term "rfc822" is technically outdated by RFC 2822). | ||||
|         """ | ||||
|         MIMENonMultipart.__init__(self, 'message', _subtype) | ||||
|         if not isinstance(_msg, message.Message): | ||||
|             raise TypeError('Argument is not an instance of Message') | ||||
|         # It's convenient to use this base class method.  We need to do it | ||||
|         # this way or we'll get an exception | ||||
|         message.Message.attach(self, _msg) | ||||
|         # And be sure our default type is set correctly | ||||
|         self.set_default_type('message/rfc822') | ||||
| @ -0,0 +1,49 @@ | ||||
| # Copyright (C) 2002-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Base class for MIME multipart/* type messages.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['MIMEMultipart'] | ||||
|  | ||||
| from future.backports.email.mime.base import MIMEBase | ||||
|  | ||||
|  | ||||
| class MIMEMultipart(MIMEBase): | ||||
|     """Base class for MIME multipart/* type messages.""" | ||||
|  | ||||
|     def __init__(self, _subtype='mixed', boundary=None, _subparts=None, | ||||
|                  **_params): | ||||
|         """Creates a multipart/* type message. | ||||
|  | ||||
|         By default, creates a multipart/mixed message, with proper | ||||
|         Content-Type and MIME-Version headers. | ||||
|  | ||||
|         _subtype is the subtype of the multipart content type, defaulting to | ||||
|         `mixed'. | ||||
|  | ||||
|         boundary is the multipart boundary string.  By default it is | ||||
|         calculated as needed. | ||||
|  | ||||
|         _subparts is a sequence of initial subparts for the payload.  It | ||||
|         must be an iterable object, such as a list.  You can always | ||||
|         attach new subparts to the message by using the attach() method. | ||||
|  | ||||
|         Additional parameters for the Content-Type header are taken from the | ||||
|         keyword arguments (or passed into the _params argument). | ||||
|         """ | ||||
|         MIMEBase.__init__(self, 'multipart', _subtype, **_params) | ||||
|  | ||||
|         # Initialise _payload to an empty list as the Message superclass's | ||||
|         # implementation of is_multipart assumes that _payload is a list for | ||||
|         # multipart messages. | ||||
|         self._payload = [] | ||||
|  | ||||
|         if _subparts: | ||||
|             for p in _subparts: | ||||
|                 self.attach(p) | ||||
|         if boundary: | ||||
|             self.set_boundary(boundary) | ||||
| @ -0,0 +1,24 @@ | ||||
| # Copyright (C) 2002-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Base class for MIME type messages that are not multipart.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['MIMENonMultipart'] | ||||
|  | ||||
| from future.backports.email import errors | ||||
| from future.backports.email.mime.base import MIMEBase | ||||
|  | ||||
|  | ||||
| class MIMENonMultipart(MIMEBase): | ||||
|     """Base class for MIME multipart/* type messages.""" | ||||
|  | ||||
|     def attach(self, payload): | ||||
|         # The public API prohibits attaching multiple subparts to MIMEBase | ||||
|         # derived subtypes since none of them are, by definition, of content | ||||
|         # type multipart/* | ||||
|         raise errors.MultipartConversionError( | ||||
|             'Cannot attach additional subparts to non-multipart/*') | ||||
| @ -0,0 +1,44 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Class representing text/* type MIME documents.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['MIMEText'] | ||||
|  | ||||
| from future.backports.email.encoders import encode_7or8bit | ||||
| from future.backports.email.mime.nonmultipart import MIMENonMultipart | ||||
|  | ||||
|  | ||||
| class MIMEText(MIMENonMultipart): | ||||
|     """Class for generating text/* type MIME documents.""" | ||||
|  | ||||
|     def __init__(self, _text, _subtype='plain', _charset=None): | ||||
|         """Create a text/* type MIME document. | ||||
|  | ||||
|         _text is the string for this message object. | ||||
|  | ||||
|         _subtype is the MIME sub content type, defaulting to "plain". | ||||
|  | ||||
|         _charset is the character set parameter added to the Content-Type | ||||
|         header.  This defaults to "us-ascii".  Note that as a side-effect, the | ||||
|         Content-Transfer-Encoding header will also be set. | ||||
|         """ | ||||
|  | ||||
|         # If no _charset was specified, check to see if there are non-ascii | ||||
|         # characters present. If not, use 'us-ascii', otherwise use utf-8. | ||||
|         # XXX: This can be removed once #7304 is fixed. | ||||
|         if _charset is None: | ||||
|             try: | ||||
|                 _text.encode('us-ascii') | ||||
|                 _charset = 'us-ascii' | ||||
|             except UnicodeEncodeError: | ||||
|                 _charset = 'utf-8' | ||||
|  | ||||
|         MIMENonMultipart.__init__(self, 'text', _subtype, | ||||
|                                   **{'charset': _charset}) | ||||
|  | ||||
|         self.set_payload(_text, _charset) | ||||
| @ -0,0 +1,135 @@ | ||||
| # Copyright (C) 2001-2007 Python Software Foundation | ||||
| # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """A parser of RFC 2822 and MIME email messages.""" | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] | ||||
|  | ||||
| import warnings | ||||
| from io import StringIO, TextIOWrapper | ||||
|  | ||||
| from future.backports.email.feedparser import FeedParser, BytesFeedParser | ||||
| from future.backports.email.message import Message | ||||
| from future.backports.email._policybase import compat32 | ||||
|  | ||||
|  | ||||
| class Parser(object): | ||||
|     def __init__(self, _class=Message, **_3to2kwargs): | ||||
|         """Parser of RFC 2822 and MIME email messages. | ||||
|  | ||||
|         Creates an in-memory object tree representing the email message, which | ||||
|         can then be manipulated and turned over to a Generator to return the | ||||
|         textual representation of the message. | ||||
|  | ||||
|         The string must be formatted as a block of RFC 2822 headers and header | ||||
|         continuation lines, optionally preceded by a `Unix-from' header.  The | ||||
|         header block is terminated either by the end of the string or by a | ||||
|         blank line. | ||||
|  | ||||
|         _class is the class to instantiate for new message objects when they | ||||
|         must be created.  This class must have a constructor that can take | ||||
|         zero arguments.  Default is Message.Message. | ||||
|  | ||||
|         The policy keyword specifies a policy object that controls a number of | ||||
|         aspects of the parser's operation.  The default policy maintains | ||||
|         backward compatibility. | ||||
|  | ||||
|         """ | ||||
|         if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] | ||||
|         else: policy = compat32 | ||||
|         self._class = _class | ||||
|         self.policy = policy | ||||
|  | ||||
|     def parse(self, fp, headersonly=False): | ||||
|         """Create a message structure from the data in a file. | ||||
|  | ||||
|         Reads all the data from the file and returns the root of the message | ||||
|         structure.  Optional headersonly is a flag specifying whether to stop | ||||
|         parsing after reading the headers or not.  The default is False, | ||||
|         meaning it parses the entire contents of the file. | ||||
|         """ | ||||
|         feedparser = FeedParser(self._class, policy=self.policy) | ||||
|         if headersonly: | ||||
|             feedparser._set_headersonly() | ||||
|         while True: | ||||
|             data = fp.read(8192) | ||||
|             if not data: | ||||
|                 break | ||||
|             feedparser.feed(data) | ||||
|         return feedparser.close() | ||||
|  | ||||
|     def parsestr(self, text, headersonly=False): | ||||
|         """Create a message structure from a string. | ||||
|  | ||||
|         Returns the root of the message structure.  Optional headersonly is a | ||||
|         flag specifying whether to stop parsing after reading the headers or | ||||
|         not.  The default is False, meaning it parses the entire contents of | ||||
|         the file. | ||||
|         """ | ||||
|         return self.parse(StringIO(text), headersonly=headersonly) | ||||
|  | ||||
|  | ||||
|  | ||||
| class HeaderParser(Parser): | ||||
|     def parse(self, fp, headersonly=True): | ||||
|         return Parser.parse(self, fp, True) | ||||
|  | ||||
|     def parsestr(self, text, headersonly=True): | ||||
|         return Parser.parsestr(self, text, True) | ||||
|  | ||||
|  | ||||
| class BytesParser(object): | ||||
|  | ||||
|     def __init__(self, *args, **kw): | ||||
|         """Parser of binary RFC 2822 and MIME email messages. | ||||
|  | ||||
|         Creates an in-memory object tree representing the email message, which | ||||
|         can then be manipulated and turned over to a Generator to return the | ||||
|         textual representation of the message. | ||||
|  | ||||
|         The input must be formatted as a block of RFC 2822 headers and header | ||||
|         continuation lines, optionally preceded by a `Unix-from' header.  The | ||||
|         header block is terminated either by the end of the input or by a | ||||
|         blank line. | ||||
|  | ||||
|         _class is the class to instantiate for new message objects when they | ||||
|         must be created.  This class must have a constructor that can take | ||||
|         zero arguments.  Default is Message.Message. | ||||
|         """ | ||||
|         self.parser = Parser(*args, **kw) | ||||
|  | ||||
|     def parse(self, fp, headersonly=False): | ||||
|         """Create a message structure from the data in a binary file. | ||||
|  | ||||
|         Reads all the data from the file and returns the root of the message | ||||
|         structure.  Optional headersonly is a flag specifying whether to stop | ||||
|         parsing after reading the headers or not.  The default is False, | ||||
|         meaning it parses the entire contents of the file. | ||||
|         """ | ||||
|         fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') | ||||
|         with fp: | ||||
|             return self.parser.parse(fp, headersonly) | ||||
|  | ||||
|  | ||||
|     def parsebytes(self, text, headersonly=False): | ||||
|         """Create a message structure from a byte string. | ||||
|  | ||||
|         Returns the root of the message structure.  Optional headersonly is a | ||||
|         flag specifying whether to stop parsing after reading the headers or | ||||
|         not.  The default is False, meaning it parses the entire contents of | ||||
|         the file. | ||||
|         """ | ||||
|         text = text.decode('ASCII', errors='surrogateescape') | ||||
|         return self.parser.parsestr(text, headersonly) | ||||
|  | ||||
|  | ||||
| class BytesHeaderParser(BytesParser): | ||||
|     def parse(self, fp, headersonly=True): | ||||
|         return BytesParser.parse(self, fp, headersonly=True) | ||||
|  | ||||
|     def parsebytes(self, text, headersonly=True): | ||||
|         return BytesParser.parsebytes(self, text, headersonly=True) | ||||
| @ -0,0 +1,193 @@ | ||||
| """This will be the home for the policy that hooks in the new | ||||
| code that adds all the email6 features. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import super | ||||
|  | ||||
| from future.standard_library.email._policybase import (Policy, Compat32, | ||||
|                                                   compat32, _extend_docstrings) | ||||
| from future.standard_library.email.utils import _has_surrogates | ||||
| from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry | ||||
|  | ||||
| __all__ = [ | ||||
|     'Compat32', | ||||
|     'compat32', | ||||
|     'Policy', | ||||
|     'EmailPolicy', | ||||
|     'default', | ||||
|     'strict', | ||||
|     'SMTP', | ||||
|     'HTTP', | ||||
|     ] | ||||
|  | ||||
| @_extend_docstrings | ||||
| class EmailPolicy(Policy): | ||||
|  | ||||
|     """+ | ||||
|     PROVISIONAL | ||||
|  | ||||
|     The API extensions enabled by this policy are currently provisional. | ||||
|     Refer to the documentation for details. | ||||
|  | ||||
|     This policy adds new header parsing and folding algorithms.  Instead of | ||||
|     simple strings, headers are custom objects with custom attributes | ||||
|     depending on the type of the field.  The folding algorithm fully | ||||
|     implements RFCs 2047 and 5322. | ||||
|  | ||||
|     In addition to the settable attributes listed above that apply to | ||||
|     all Policies, this policy adds the following additional attributes: | ||||
|  | ||||
|     refold_source       -- if the value for a header in the Message object | ||||
|                            came from the parsing of some source, this attribute | ||||
|                            indicates whether or not a generator should refold | ||||
|                            that value when transforming the message back into | ||||
|                            stream form.  The possible values are: | ||||
|  | ||||
|                            none  -- all source values use original folding | ||||
|                            long  -- source values that have any line that is | ||||
|                                     longer than max_line_length will be | ||||
|                                     refolded | ||||
|                            all  -- all values are refolded. | ||||
|  | ||||
|                            The default is 'long'. | ||||
|  | ||||
|     header_factory      -- a callable that takes two arguments, 'name' and | ||||
|                            'value', where 'name' is a header field name and | ||||
|                            'value' is an unfolded header field value, and | ||||
|                            returns a string-like object that represents that | ||||
|                            header.  A default header_factory is provided that | ||||
|                            understands some of the RFC5322 header field types. | ||||
|                            (Currently address fields and date fields have | ||||
|                            special treatment, while all other fields are | ||||
|                            treated as unstructured.  This list will be | ||||
|                            completed before the extension is marked stable.) | ||||
|     """ | ||||
|  | ||||
|     refold_source = 'long' | ||||
|     header_factory = HeaderRegistry() | ||||
|  | ||||
|     def __init__(self, **kw): | ||||
|         # Ensure that each new instance gets a unique header factory | ||||
|         # (as opposed to clones, which share the factory). | ||||
|         if 'header_factory' not in kw: | ||||
|             object.__setattr__(self, 'header_factory', HeaderRegistry()) | ||||
|         super().__init__(**kw) | ||||
|  | ||||
|     def header_max_count(self, name): | ||||
|         """+ | ||||
|         The implementation for this class returns the max_count attribute from | ||||
|         the specialized header class that would be used to construct a header | ||||
|         of type 'name'. | ||||
|         """ | ||||
|         return self.header_factory[name].max_count | ||||
|  | ||||
|     # The logic of the next three methods is chosen such that it is possible to | ||||
|     # switch a Message object between a Compat32 policy and a policy derived | ||||
|     # from this class and have the results stay consistent.  This allows a | ||||
|     # Message object constructed with this policy to be passed to a library | ||||
|     # that only handles Compat32 objects, or to receive such an object and | ||||
|     # convert it to use the newer style by just changing its policy.  It is | ||||
|     # also chosen because it postpones the relatively expensive full rfc5322 | ||||
|     # parse until as late as possible when parsing from source, since in many | ||||
|     # applications only a few headers will actually be inspected. | ||||
|  | ||||
|     def header_source_parse(self, sourcelines): | ||||
|         """+ | ||||
|         The name is parsed as everything up to the ':' and returned unmodified. | ||||
|         The value is determined by stripping leading whitespace off the | ||||
|         remainder of the first line, joining all subsequent lines together, and | ||||
|         stripping any trailing carriage return or linefeed characters.  (This | ||||
|         is the same as Compat32). | ||||
|  | ||||
|         """ | ||||
|         name, value = sourcelines[0].split(':', 1) | ||||
|         value = value.lstrip(' \t') + ''.join(sourcelines[1:]) | ||||
|         return (name, value.rstrip('\r\n')) | ||||
|  | ||||
|     def header_store_parse(self, name, value): | ||||
|         """+ | ||||
|         The name is returned unchanged.  If the input value has a 'name' | ||||
|         attribute and it matches the name ignoring case, the value is returned | ||||
|         unchanged.  Otherwise the name and value are passed to header_factory | ||||
|         method, and the resulting custom header object is returned as the | ||||
|         value.  In this case a ValueError is raised if the input value contains | ||||
|         CR or LF characters. | ||||
|  | ||||
|         """ | ||||
|         if hasattr(value, 'name') and value.name.lower() == name.lower(): | ||||
|             return (name, value) | ||||
|         if isinstance(value, str) and len(value.splitlines())>1: | ||||
|             raise ValueError("Header values may not contain linefeed " | ||||
|                              "or carriage return characters") | ||||
|         return (name, self.header_factory(name, value)) | ||||
|  | ||||
|     def header_fetch_parse(self, name, value): | ||||
|         """+ | ||||
|         If the value has a 'name' attribute, it is returned to unmodified. | ||||
|         Otherwise the name and the value with any linesep characters removed | ||||
|         are passed to the header_factory method, and the resulting custom | ||||
|         header object is returned.  Any surrogateescaped bytes get turned | ||||
|         into the unicode unknown-character glyph. | ||||
|  | ||||
|         """ | ||||
|         if hasattr(value, 'name'): | ||||
|             return value | ||||
|         return self.header_factory(name, ''.join(value.splitlines())) | ||||
|  | ||||
|     def fold(self, name, value): | ||||
|         """+ | ||||
|         Header folding is controlled by the refold_source policy setting.  A | ||||
|         value is considered to be a 'source value' if and only if it does not | ||||
|         have a 'name' attribute (having a 'name' attribute means it is a header | ||||
|         object of some sort).  If a source value needs to be refolded according | ||||
|         to the policy, it is converted into a custom header object by passing | ||||
|         the name and the value with any linesep characters removed to the | ||||
|         header_factory method.  Folding of a custom header object is done by | ||||
|         calling its fold method with the current policy. | ||||
|  | ||||
|         Source values are split into lines using splitlines.  If the value is | ||||
|         not to be refolded, the lines are rejoined using the linesep from the | ||||
|         policy and returned.  The exception is lines containing non-ascii | ||||
|         binary data.  In that case the value is refolded regardless of the | ||||
|         refold_source setting, which causes the binary data to be CTE encoded | ||||
|         using the unknown-8bit charset. | ||||
|  | ||||
|         """ | ||||
|         return self._fold(name, value, refold_binary=True) | ||||
|  | ||||
|     def fold_binary(self, name, value): | ||||
|         """+ | ||||
|         The same as fold if cte_type is 7bit, except that the returned value is | ||||
|         bytes. | ||||
|  | ||||
|         If cte_type is 8bit, non-ASCII binary data is converted back into | ||||
|         bytes.  Headers with binary data are not refolded, regardless of the | ||||
|         refold_header setting, since there is no way to know whether the binary | ||||
|         data consists of single byte characters or multibyte characters. | ||||
|  | ||||
|         """ | ||||
|         folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') | ||||
|         return folded.encode('ascii', 'surrogateescape') | ||||
|  | ||||
|     def _fold(self, name, value, refold_binary=False): | ||||
|         if hasattr(value, 'name'): | ||||
|             return value.fold(policy=self) | ||||
|         maxlen = self.max_line_length if self.max_line_length else float('inf') | ||||
|         lines = value.splitlines() | ||||
|         refold = (self.refold_source == 'all' or | ||||
|                   self.refold_source == 'long' and | ||||
|                     (lines and len(lines[0])+len(name)+2 > maxlen or | ||||
|                      any(len(x) > maxlen for x in lines[1:]))) | ||||
|         if refold or refold_binary and _has_surrogates(value): | ||||
|             return self.header_factory(name, ''.join(lines)).fold(policy=self) | ||||
|         return name + ': ' + self.linesep.join(lines) + self.linesep | ||||
|  | ||||
|  | ||||
| default = EmailPolicy() | ||||
| # Make the default policy use the class default header_factory | ||||
| del default.header_factory | ||||
| strict = default.clone(raise_on_defect=True) | ||||
| SMTP = default.clone(linesep='\r\n') | ||||
| HTTP = default.clone(linesep='\r\n', max_line_length=None) | ||||
| @ -0,0 +1,326 @@ | ||||
| # Copyright (C) 2001-2006 Python Software Foundation | ||||
| # Author: Ben Gertzfield | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Quoted-printable content transfer encoding per RFCs 2045-2047. | ||||
|  | ||||
| This module handles the content transfer encoding method defined in RFC 2045 | ||||
| to encode US ASCII-like 8-bit data called `quoted-printable'.  It is used to | ||||
| safely encode text that is in a character set similar to the 7-bit US ASCII | ||||
| character set, but that includes some 8-bit characters that are normally not | ||||
| allowed in email bodies or headers. | ||||
|  | ||||
| Quoted-printable is very space-inefficient for encoding binary files; use the | ||||
| email.base64mime module for that instead. | ||||
|  | ||||
| This module provides an interface to encode and decode both headers and bodies | ||||
| with quoted-printable encoding. | ||||
|  | ||||
| RFC 2045 defines a method for including character set information in an | ||||
| `encoded-word' in a header.  This method is commonly used for 8-bit real names | ||||
| in To:/From:/Cc: etc. fields, as well as Subject: lines. | ||||
|  | ||||
| This module does not do the line wrapping or end-of-line character | ||||
| conversion necessary for proper internationalized headers; it only | ||||
| does dumb encoding and decoding.  To deal with the various line | ||||
| wrapping issues, use the email.header module. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import bytes, chr, dict, int, range, super | ||||
|  | ||||
| __all__ = [ | ||||
|     'body_decode', | ||||
|     'body_encode', | ||||
|     'body_length', | ||||
|     'decode', | ||||
|     'decodestring', | ||||
|     'header_decode', | ||||
|     'header_encode', | ||||
|     'header_length', | ||||
|     'quote', | ||||
|     'unquote', | ||||
|     ] | ||||
|  | ||||
| import re | ||||
| import io | ||||
|  | ||||
| from string import ascii_letters, digits, hexdigits | ||||
|  | ||||
| CRLF = '\r\n' | ||||
| NL = '\n' | ||||
| EMPTYSTRING = '' | ||||
|  | ||||
| # Build a mapping of octets to the expansion of that octet.  Since we're only | ||||
| # going to have 256 of these things, this isn't terribly inefficient | ||||
| # space-wise.  Remember that headers and bodies have different sets of safe | ||||
| # characters.  Initialize both maps with the full expansion, and then override | ||||
| # the safe bytes with the more compact form. | ||||
| _QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) | ||||
| _QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() | ||||
|  | ||||
| # Safe header bytes which need no encoding. | ||||
| for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): | ||||
|     _QUOPRI_HEADER_MAP[c] = chr(c) | ||||
| # Headers have one other special encoding; spaces become underscores. | ||||
| _QUOPRI_HEADER_MAP[ord(' ')] = '_' | ||||
|  | ||||
| # Safe body bytes which need no encoding. | ||||
| for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' | ||||
|                b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' | ||||
|                b'abcdefghijklmnopqrstuvwxyz{|}~\t'): | ||||
|     _QUOPRI_BODY_MAP[c] = chr(c) | ||||
|  | ||||
|  | ||||
|  | ||||
| # Helpers | ||||
| def header_check(octet): | ||||
|     """Return True if the octet should be escaped with header quopri.""" | ||||
|     return chr(octet) != _QUOPRI_HEADER_MAP[octet] | ||||
|  | ||||
|  | ||||
| def body_check(octet): | ||||
|     """Return True if the octet should be escaped with body quopri.""" | ||||
|     return chr(octet) != _QUOPRI_BODY_MAP[octet] | ||||
|  | ||||
|  | ||||
| def header_length(bytearray): | ||||
|     """Return a header quoted-printable encoding length. | ||||
|  | ||||
|     Note that this does not include any RFC 2047 chrome added by | ||||
|     `header_encode()`. | ||||
|  | ||||
|     :param bytearray: An array of bytes (a.k.a. octets). | ||||
|     :return: The length in bytes of the byte array when it is encoded with | ||||
|         quoted-printable for headers. | ||||
|     """ | ||||
|     return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) | ||||
|  | ||||
|  | ||||
| def body_length(bytearray): | ||||
|     """Return a body quoted-printable encoding length. | ||||
|  | ||||
|     :param bytearray: An array of bytes (a.k.a. octets). | ||||
|     :return: The length in bytes of the byte array when it is encoded with | ||||
|         quoted-printable for bodies. | ||||
|     """ | ||||
|     return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) | ||||
|  | ||||
|  | ||||
| def _max_append(L, s, maxlen, extra=''): | ||||
|     if not isinstance(s, str): | ||||
|         s = chr(s) | ||||
|     if not L: | ||||
|         L.append(s.lstrip()) | ||||
|     elif len(L[-1]) + len(s) <= maxlen: | ||||
|         L[-1] += extra + s | ||||
|     else: | ||||
|         L.append(s.lstrip()) | ||||
|  | ||||
|  | ||||
| def unquote(s): | ||||
|     """Turn a string in the form =AB to the ASCII character with value 0xab""" | ||||
|     return chr(int(s[1:3], 16)) | ||||
|  | ||||
|  | ||||
| def quote(c): | ||||
|     return '=%02X' % ord(c) | ||||
|  | ||||
|  | ||||
|  | ||||
| def header_encode(header_bytes, charset='iso-8859-1'): | ||||
|     """Encode a single header line with quoted-printable (like) encoding. | ||||
|  | ||||
|     Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but | ||||
|     used specifically for email header fields to allow charsets with mostly 7 | ||||
|     bit characters (and some 8 bit) to remain more or less readable in non-RFC | ||||
|     2045 aware mail clients. | ||||
|  | ||||
|     charset names the character set to use in the RFC 2046 header.  It | ||||
|     defaults to iso-8859-1. | ||||
|     """ | ||||
|     # Return empty headers as an empty string. | ||||
|     if not header_bytes: | ||||
|         return '' | ||||
|     # Iterate over every byte, encoding if necessary. | ||||
|     encoded = [] | ||||
|     for octet in header_bytes: | ||||
|         encoded.append(_QUOPRI_HEADER_MAP[octet]) | ||||
|     # Now add the RFC chrome to each encoded chunk and glue the chunks | ||||
|     # together. | ||||
|     return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) | ||||
|  | ||||
|  | ||||
| class _body_accumulator(io.StringIO): | ||||
|  | ||||
|     def __init__(self, maxlinelen, eol, *args, **kw): | ||||
|         super().__init__(*args, **kw) | ||||
|         self.eol = eol | ||||
|         self.maxlinelen = self.room = maxlinelen | ||||
|  | ||||
|     def write_str(self, s): | ||||
|         """Add string s to the accumulated body.""" | ||||
|         self.write(s) | ||||
|         self.room -= len(s) | ||||
|  | ||||
|     def newline(self): | ||||
|         """Write eol, then start new line.""" | ||||
|         self.write_str(self.eol) | ||||
|         self.room = self.maxlinelen | ||||
|  | ||||
|     def write_soft_break(self): | ||||
|         """Write a soft break, then start a new line.""" | ||||
|         self.write_str('=') | ||||
|         self.newline() | ||||
|  | ||||
|     def write_wrapped(self, s, extra_room=0): | ||||
|         """Add a soft line break if needed, then write s.""" | ||||
|         if self.room < len(s) + extra_room: | ||||
|             self.write_soft_break() | ||||
|         self.write_str(s) | ||||
|  | ||||
|     def write_char(self, c, is_last_char): | ||||
|         if not is_last_char: | ||||
|             # Another character follows on this line, so we must leave | ||||
|             # extra room, either for it or a soft break, and whitespace | ||||
|             # need not be quoted. | ||||
|             self.write_wrapped(c, extra_room=1) | ||||
|         elif c not in ' \t': | ||||
|             # For this and remaining cases, no more characters follow, | ||||
|             # so there is no need to reserve extra room (since a hard | ||||
|             # break will immediately follow). | ||||
|             self.write_wrapped(c) | ||||
|         elif self.room >= 3: | ||||
|             # It's a whitespace character at end-of-line, and we have room | ||||
|             # for the three-character quoted encoding. | ||||
|             self.write(quote(c)) | ||||
|         elif self.room == 2: | ||||
|             # There's room for the whitespace character and a soft break. | ||||
|             self.write(c) | ||||
|             self.write_soft_break() | ||||
|         else: | ||||
|             # There's room only for a soft break.  The quoted whitespace | ||||
|             # will be the only content on the subsequent line. | ||||
|             self.write_soft_break() | ||||
|             self.write(quote(c)) | ||||
|  | ||||
|  | ||||
| def body_encode(body, maxlinelen=76, eol=NL): | ||||
|     """Encode with quoted-printable, wrapping at maxlinelen characters. | ||||
|  | ||||
|     Each line of encoded text will end with eol, which defaults to "\\n".  Set | ||||
|     this to "\\r\\n" if you will be using the result of this function directly | ||||
|     in an email. | ||||
|  | ||||
|     Each line will be wrapped at, at most, maxlinelen characters before the | ||||
|     eol string (maxlinelen defaults to 76 characters, the maximum value | ||||
|     permitted by RFC 2045).  Long lines will have the 'soft line break' | ||||
|     quoted-printable character "=" appended to them, so the decoded text will | ||||
|     be identical to the original text. | ||||
|  | ||||
|     The minimum maxlinelen is 4 to have room for a quoted character ("=XX") | ||||
|     followed by a soft line break.  Smaller values will generate a | ||||
|     ValueError. | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     if maxlinelen < 4: | ||||
|         raise ValueError("maxlinelen must be at least 4") | ||||
|     if not body: | ||||
|         return body | ||||
|  | ||||
|     # The last line may or may not end in eol, but all other lines do. | ||||
|     last_has_eol = (body[-1] in '\r\n') | ||||
|  | ||||
|     # This accumulator will make it easier to build the encoded body. | ||||
|     encoded_body = _body_accumulator(maxlinelen, eol) | ||||
|  | ||||
|     lines = body.splitlines() | ||||
|     last_line_no = len(lines) - 1 | ||||
|     for line_no, line in enumerate(lines): | ||||
|         last_char_index = len(line) - 1 | ||||
|         for i, c in enumerate(line): | ||||
|             if body_check(ord(c)): | ||||
|                 c = quote(c) | ||||
|             encoded_body.write_char(c, i==last_char_index) | ||||
|         # Add an eol if input line had eol.  All input lines have eol except | ||||
|         # possibly the last one. | ||||
|         if line_no < last_line_no or last_has_eol: | ||||
|             encoded_body.newline() | ||||
|  | ||||
|     return encoded_body.getvalue() | ||||
|  | ||||
|  | ||||
|  | ||||
| # BAW: I'm not sure if the intent was for the signature of this function to be | ||||
| # the same as base64MIME.decode() or not... | ||||
| def decode(encoded, eol=NL): | ||||
|     """Decode a quoted-printable string. | ||||
|  | ||||
|     Lines are separated with eol, which defaults to \\n. | ||||
|     """ | ||||
|     if not encoded: | ||||
|         return encoded | ||||
|     # BAW: see comment in encode() above.  Again, we're building up the | ||||
|     # decoded string with string concatenation, which could be done much more | ||||
|     # efficiently. | ||||
|     decoded = '' | ||||
|  | ||||
|     for line in encoded.splitlines(): | ||||
|         line = line.rstrip() | ||||
|         if not line: | ||||
|             decoded += eol | ||||
|             continue | ||||
|  | ||||
|         i = 0 | ||||
|         n = len(line) | ||||
|         while i < n: | ||||
|             c = line[i] | ||||
|             if c != '=': | ||||
|                 decoded += c | ||||
|                 i += 1 | ||||
|             # Otherwise, c == "=".  Are we at the end of the line?  If so, add | ||||
|             # a soft line break. | ||||
|             elif i+1 == n: | ||||
|                 i += 1 | ||||
|                 continue | ||||
|             # Decode if in form =AB | ||||
|             elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: | ||||
|                 decoded += unquote(line[i:i+3]) | ||||
|                 i += 3 | ||||
|             # Otherwise, not in form =AB, pass literally | ||||
|             else: | ||||
|                 decoded += c | ||||
|                 i += 1 | ||||
|  | ||||
|             if i == n: | ||||
|                 decoded += eol | ||||
|     # Special case if original string did not end with eol | ||||
|     if encoded[-1] not in '\r\n' and decoded.endswith(eol): | ||||
|         decoded = decoded[:-1] | ||||
|     return decoded | ||||
|  | ||||
|  | ||||
| # For convenience and backwards compatibility w/ standard base64 module | ||||
| body_decode = decode | ||||
| decodestring = decode | ||||
|  | ||||
|  | ||||
|  | ||||
| def _unquote_match(match): | ||||
|     """Turn a match in the form =AB to the ASCII character with value 0xab""" | ||||
|     s = match.group(0) | ||||
|     return unquote(s) | ||||
|  | ||||
|  | ||||
| # Header decoding is done a bit differently | ||||
| def header_decode(s): | ||||
|     """Decode a string encoded with RFC 2045 MIME header `Q' encoding. | ||||
|  | ||||
|     This function does not parse a full MIME header value encoded with | ||||
|     quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use | ||||
|     the high level email.header class for that functionality. | ||||
|     """ | ||||
|     s = s.replace('_', ' ') | ||||
|     return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII) | ||||
| @ -0,0 +1,400 @@ | ||||
| # Copyright (C) 2001-2010 Python Software Foundation | ||||
| # Author: Barry Warsaw | ||||
| # Contact: email-sig@python.org | ||||
|  | ||||
| """Miscellaneous utilities.""" | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future import utils | ||||
| from future.builtins import bytes, int, str | ||||
|  | ||||
| __all__ = [ | ||||
|     'collapse_rfc2231_value', | ||||
|     'decode_params', | ||||
|     'decode_rfc2231', | ||||
|     'encode_rfc2231', | ||||
|     'formataddr', | ||||
|     'formatdate', | ||||
|     'format_datetime', | ||||
|     'getaddresses', | ||||
|     'make_msgid', | ||||
|     'mktime_tz', | ||||
|     'parseaddr', | ||||
|     'parsedate', | ||||
|     'parsedate_tz', | ||||
|     'parsedate_to_datetime', | ||||
|     'unquote', | ||||
|     ] | ||||
|  | ||||
| import os | ||||
| import re | ||||
| if utils.PY2: | ||||
|     re.ASCII = 0 | ||||
| import time | ||||
| import base64 | ||||
| import random | ||||
| import socket | ||||
| from future.backports import datetime | ||||
| from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote | ||||
| import warnings | ||||
| from io import StringIO | ||||
|  | ||||
| from future.backports.email._parseaddr import quote | ||||
| from future.backports.email._parseaddr import AddressList as _AddressList | ||||
| from future.backports.email._parseaddr import mktime_tz | ||||
|  | ||||
| from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz | ||||
|  | ||||
| from quopri import decodestring as _qdecode | ||||
|  | ||||
| # Intrapackage imports | ||||
| from future.backports.email.encoders import _bencode, _qencode | ||||
| from future.backports.email.charset import Charset | ||||
|  | ||||
| COMMASPACE = ', ' | ||||
| EMPTYSTRING = '' | ||||
| UEMPTYSTRING = '' | ||||
| CRLF = '\r\n' | ||||
| TICK = "'" | ||||
|  | ||||
| specialsre = re.compile(r'[][\\()<>@,:;".]') | ||||
| escapesre = re.compile(r'[\\"]') | ||||
|  | ||||
| # How to figure out if we are processing strings that come from a byte | ||||
| # source with undecodable characters. | ||||
| _has_surrogates = re.compile( | ||||
|     '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search | ||||
|  | ||||
| # How to deal with a string containing bytes before handing it to the | ||||
| # application through the 'normal' interface. | ||||
| def _sanitize(string): | ||||
|     # Turn any escaped bytes into unicode 'unknown' char. | ||||
|     original_bytes = string.encode('ascii', 'surrogateescape') | ||||
|     return original_bytes.decode('ascii', 'replace') | ||||
|  | ||||
|  | ||||
| # Helpers | ||||
|  | ||||
| def formataddr(pair, charset='utf-8'): | ||||
|     """The inverse of parseaddr(), this takes a 2-tuple of the form | ||||
|     (realname, email_address) and returns the string value suitable | ||||
|     for an RFC 2822 From, To or Cc header. | ||||
|  | ||||
|     If the first element of pair is false, then the second element is | ||||
|     returned unmodified. | ||||
|  | ||||
|     Optional charset if given is the character set that is used to encode | ||||
|     realname in case realname is not ASCII safe.  Can be an instance of str or | ||||
|     a Charset-like object which has a header_encode method.  Default is | ||||
|     'utf-8'. | ||||
|     """ | ||||
|     name, address = pair | ||||
|     # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. | ||||
|     address.encode('ascii') | ||||
|     if name: | ||||
|         try: | ||||
|             name.encode('ascii') | ||||
|         except UnicodeEncodeError: | ||||
|             if isinstance(charset, str): | ||||
|                 charset = Charset(charset) | ||||
|             encoded_name = charset.header_encode(name) | ||||
|             return "%s <%s>" % (encoded_name, address) | ||||
|         else: | ||||
|             quotes = '' | ||||
|             if specialsre.search(name): | ||||
|                 quotes = '"' | ||||
|             name = escapesre.sub(r'\\\g<0>', name) | ||||
|             return '%s%s%s <%s>' % (quotes, name, quotes, address) | ||||
|     return address | ||||
|  | ||||
|  | ||||
|  | ||||
| def getaddresses(fieldvalues): | ||||
|     """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" | ||||
|     all = COMMASPACE.join(fieldvalues) | ||||
|     a = _AddressList(all) | ||||
|     return a.addresslist | ||||
|  | ||||
|  | ||||
|  | ||||
| ecre = re.compile(r''' | ||||
|   =\?                   # literal =? | ||||
|   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset | ||||
|   \?                    # literal ? | ||||
|   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive | ||||
|   \?                    # literal ? | ||||
|   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom | ||||
|   \?=                   # literal ?= | ||||
|   ''', re.VERBOSE | re.IGNORECASE) | ||||
|  | ||||
|  | ||||
| def _format_timetuple_and_zone(timetuple, zone): | ||||
|     return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( | ||||
|         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], | ||||
|         timetuple[2], | ||||
|         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | ||||
|          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], | ||||
|         timetuple[0], timetuple[3], timetuple[4], timetuple[5], | ||||
|         zone) | ||||
|  | ||||
| def formatdate(timeval=None, localtime=False, usegmt=False): | ||||
|     """Returns a date string as specified by RFC 2822, e.g.: | ||||
|  | ||||
|     Fri, 09 Nov 2001 01:08:47 -0000 | ||||
|  | ||||
|     Optional timeval if given is a floating point time value as accepted by | ||||
|     gmtime() and localtime(), otherwise the current time is used. | ||||
|  | ||||
|     Optional localtime is a flag that when True, interprets timeval, and | ||||
|     returns a date relative to the local timezone instead of UTC, properly | ||||
|     taking daylight savings time into account. | ||||
|  | ||||
|     Optional argument usegmt means that the timezone is written out as | ||||
|     an ascii string, not numeric one (so "GMT" instead of "+0000"). This | ||||
|     is needed for HTTP, and is only used when localtime==False. | ||||
|     """ | ||||
|     # Note: we cannot use strftime() because that honors the locale and RFC | ||||
|     # 2822 requires that day and month names be the English abbreviations. | ||||
|     if timeval is None: | ||||
|         timeval = time.time() | ||||
|     if localtime: | ||||
|         now = time.localtime(timeval) | ||||
|         # Calculate timezone offset, based on whether the local zone has | ||||
|         # daylight savings time, and whether DST is in effect. | ||||
|         if time.daylight and now[-1]: | ||||
|             offset = time.altzone | ||||
|         else: | ||||
|             offset = time.timezone | ||||
|         hours, minutes = divmod(abs(offset), 3600) | ||||
|         # Remember offset is in seconds west of UTC, but the timezone is in | ||||
|         # minutes east of UTC, so the signs differ. | ||||
|         if offset > 0: | ||||
|             sign = '-' | ||||
|         else: | ||||
|             sign = '+' | ||||
|         zone = '%s%02d%02d' % (sign, hours, minutes // 60) | ||||
|     else: | ||||
|         now = time.gmtime(timeval) | ||||
|         # Timezone offset is always -0000 | ||||
|         if usegmt: | ||||
|             zone = 'GMT' | ||||
|         else: | ||||
|             zone = '-0000' | ||||
|     return _format_timetuple_and_zone(now, zone) | ||||
|  | ||||
| def format_datetime(dt, usegmt=False): | ||||
|     """Turn a datetime into a date string as specified in RFC 2822. | ||||
|  | ||||
|     If usegmt is True, dt must be an aware datetime with an offset of zero.  In | ||||
|     this case 'GMT' will be rendered instead of the normal +0000 required by | ||||
|     RFC2822.  This is to support HTTP headers involving date stamps. | ||||
|     """ | ||||
|     now = dt.timetuple() | ||||
|     if usegmt: | ||||
|         if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: | ||||
|             raise ValueError("usegmt option requires a UTC datetime") | ||||
|         zone = 'GMT' | ||||
|     elif dt.tzinfo is None: | ||||
|         zone = '-0000' | ||||
|     else: | ||||
|         zone = dt.strftime("%z") | ||||
|     return _format_timetuple_and_zone(now, zone) | ||||
|  | ||||
|  | ||||
| def make_msgid(idstring=None, domain=None): | ||||
|     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: | ||||
|  | ||||
|     <20020201195627.33539.96671@nightshade.la.mastaler.com> | ||||
|  | ||||
|     Optional idstring if given is a string used to strengthen the | ||||
|     uniqueness of the message id.  Optional domain if given provides the | ||||
|     portion of the message id after the '@'.  It defaults to the locally | ||||
|     defined hostname. | ||||
|     """ | ||||
|     timeval = time.time() | ||||
|     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) | ||||
|     pid = os.getpid() | ||||
|     randint = random.randrange(100000) | ||||
|     if idstring is None: | ||||
|         idstring = '' | ||||
|     else: | ||||
|         idstring = '.' + idstring | ||||
|     if domain is None: | ||||
|         domain = socket.getfqdn() | ||||
|     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) | ||||
|     return msgid | ||||
|  | ||||
|  | ||||
| def parsedate_to_datetime(data): | ||||
|     _3to2list = list(_parsedate_tz(data)) | ||||
|     dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] | ||||
|     if tz is None: | ||||
|         return datetime.datetime(*dtuple[:6]) | ||||
|     return datetime.datetime(*dtuple[:6], | ||||
|             tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) | ||||
|  | ||||
|  | ||||
| def parseaddr(addr): | ||||
|     addrs = _AddressList(addr).addresslist | ||||
|     if not addrs: | ||||
|         return '', '' | ||||
|     return addrs[0] | ||||
|  | ||||
|  | ||||
| # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. | ||||
| def unquote(str): | ||||
|     """Remove quotes from a string.""" | ||||
|     if len(str) > 1: | ||||
|         if str.startswith('"') and str.endswith('"'): | ||||
|             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') | ||||
|         if str.startswith('<') and str.endswith('>'): | ||||
|             return str[1:-1] | ||||
|     return str | ||||
|  | ||||
|  | ||||
|  | ||||
| # RFC2231-related functions - parameter encoding and decoding | ||||
| def decode_rfc2231(s): | ||||
|     """Decode string according to RFC 2231""" | ||||
|     parts = s.split(TICK, 2) | ||||
|     if len(parts) <= 2: | ||||
|         return None, None, s | ||||
|     return parts | ||||
|  | ||||
|  | ||||
| def encode_rfc2231(s, charset=None, language=None): | ||||
|     """Encode string according to RFC 2231. | ||||
|  | ||||
|     If neither charset nor language is given, then s is returned as-is.  If | ||||
|     charset is given but not language, the string is encoded using the empty | ||||
|     string for language. | ||||
|     """ | ||||
|     s = url_quote(s, safe='', encoding=charset or 'ascii') | ||||
|     if charset is None and language is None: | ||||
|         return s | ||||
|     if language is None: | ||||
|         language = '' | ||||
|     return "%s'%s'%s" % (charset, language, s) | ||||
|  | ||||
|  | ||||
| rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$', | ||||
|     re.ASCII) | ||||
|  | ||||
| def decode_params(params): | ||||
|     """Decode parameters list according to RFC 2231. | ||||
|  | ||||
|     params is a sequence of 2-tuples containing (param name, string value). | ||||
|     """ | ||||
|     # Copy params so we don't mess with the original | ||||
|     params = params[:] | ||||
|     new_params = [] | ||||
|     # Map parameter's name to a list of continuations.  The values are a | ||||
|     # 3-tuple of the continuation number, the string value, and a flag | ||||
|     # specifying whether a particular segment is %-encoded. | ||||
|     rfc2231_params = {} | ||||
|     name, value = params.pop(0) | ||||
|     new_params.append((name, value)) | ||||
|     while params: | ||||
|         name, value = params.pop(0) | ||||
|         if name.endswith('*'): | ||||
|             encoded = True | ||||
|         else: | ||||
|             encoded = False | ||||
|         value = unquote(value) | ||||
|         mo = rfc2231_continuation.match(name) | ||||
|         if mo: | ||||
|             name, num = mo.group('name', 'num') | ||||
|             if num is not None: | ||||
|                 num = int(num) | ||||
|             rfc2231_params.setdefault(name, []).append((num, value, encoded)) | ||||
|         else: | ||||
|             new_params.append((name, '"%s"' % quote(value))) | ||||
|     if rfc2231_params: | ||||
|         for name, continuations in rfc2231_params.items(): | ||||
|             value = [] | ||||
|             extended = False | ||||
|             # Sort by number | ||||
|             continuations.sort() | ||||
|             # And now append all values in numerical order, converting | ||||
|             # %-encodings for the encoded segments.  If any of the | ||||
|             # continuation names ends in a *, then the entire string, after | ||||
|             # decoding segments and concatenating, must have the charset and | ||||
|             # language specifiers at the beginning of the string. | ||||
|             for num, s, encoded in continuations: | ||||
|                 if encoded: | ||||
|                     # Decode as "latin-1", so the characters in s directly | ||||
|                     # represent the percent-encoded octet values. | ||||
|                     # collapse_rfc2231_value treats this as an octet sequence. | ||||
|                     s = url_unquote(s, encoding="latin-1") | ||||
|                     extended = True | ||||
|                 value.append(s) | ||||
|             value = quote(EMPTYSTRING.join(value)) | ||||
|             if extended: | ||||
|                 charset, language, value = decode_rfc2231(value) | ||||
|                 new_params.append((name, (charset, language, '"%s"' % value))) | ||||
|             else: | ||||
|                 new_params.append((name, '"%s"' % value)) | ||||
|     return new_params | ||||
|  | ||||
| def collapse_rfc2231_value(value, errors='replace', | ||||
|                            fallback_charset='us-ascii'): | ||||
|     if not isinstance(value, tuple) or len(value) != 3: | ||||
|         return unquote(value) | ||||
|     # While value comes to us as a unicode string, we need it to be a bytes | ||||
|     # object.  We do not want bytes() normal utf-8 decoder, we want a straight | ||||
|     # interpretation of the string as character bytes. | ||||
|     charset, language, text = value | ||||
|     rawbytes = bytes(text, 'raw-unicode-escape') | ||||
|     try: | ||||
|         return str(rawbytes, charset, errors) | ||||
|     except LookupError: | ||||
|         # charset is not a known codec. | ||||
|         return unquote(text) | ||||
|  | ||||
|  | ||||
| # | ||||
| # datetime doesn't provide a localtime function yet, so provide one.  Code | ||||
| # adapted from the patch in issue 9527.  This may not be perfect, but it is | ||||
| # better than not having it. | ||||
| # | ||||
|  | ||||
| def localtime(dt=None, isdst=-1): | ||||
|     """Return local time as an aware datetime object. | ||||
|  | ||||
|     If called without arguments, return current time.  Otherwise *dt* | ||||
|     argument should be a datetime instance, and it is converted to the | ||||
|     local time zone according to the system time zone database.  If *dt* is | ||||
|     naive (that is, dt.tzinfo is None), it is assumed to be in local time. | ||||
|     In this case, a positive or zero value for *isdst* causes localtime to | ||||
|     presume initially that summer time (for example, Daylight Saving Time) | ||||
|     is or is not (respectively) in effect for the specified time.  A | ||||
|     negative value for *isdst* causes the localtime() function to attempt | ||||
|     to divine whether summer time is in effect for the specified time. | ||||
|  | ||||
|     """ | ||||
|     if dt is None: | ||||
|         return datetime.datetime.now(datetime.timezone.utc).astimezone() | ||||
|     if dt.tzinfo is not None: | ||||
|         return dt.astimezone() | ||||
|     # We have a naive datetime.  Convert to a (localtime) timetuple and pass to | ||||
|     # system mktime together with the isdst hint.  System mktime will return | ||||
|     # seconds since epoch. | ||||
|     tm = dt.timetuple()[:-1] + (isdst,) | ||||
|     seconds = time.mktime(tm) | ||||
|     localtm = time.localtime(seconds) | ||||
|     try: | ||||
|         delta = datetime.timedelta(seconds=localtm.tm_gmtoff) | ||||
|         tz = datetime.timezone(delta, localtm.tm_zone) | ||||
|     except AttributeError: | ||||
|         # Compute UTC offset and compare with the value implied by tm_isdst. | ||||
|         # If the values match, use the zone name implied by tm_isdst. | ||||
|         delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) | ||||
|         dst = time.daylight and localtm.tm_isdst > 0 | ||||
|         gmtoff = -(time.altzone if dst else time.timezone) | ||||
|         if delta == datetime.timedelta(seconds=gmtoff): | ||||
|             tz = datetime.timezone(delta, time.tzname[dst]) | ||||
|         else: | ||||
|             tz = datetime.timezone(delta) | ||||
|     return dt.replace(tzinfo=tz) | ||||
| @ -0,0 +1,27 @@ | ||||
| """ | ||||
| General functions for HTML manipulation, backported from Py3. | ||||
|  | ||||
| Note that this uses Python 2.7 code with the corresponding Python 3 | ||||
| module names and locations. | ||||
| """ | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
|  | ||||
| _escape_map = {ord('&'): '&', ord('<'): '<', ord('>'): '>'} | ||||
| _escape_map_full = {ord('&'): '&', ord('<'): '<', ord('>'): '>', | ||||
|                     ord('"'): '"', ord('\''): '''} | ||||
|  | ||||
| # NB: this is a candidate for a bytes/string polymorphic interface | ||||
|  | ||||
| def escape(s, quote=True): | ||||
|     """ | ||||
|     Replace special characters "&", "<" and ">" to HTML-safe sequences. | ||||
|     If the optional flag quote is true (the default), the quotation mark | ||||
|     characters, both double quote (") and single quote (') characters are also | ||||
|     translated. | ||||
|     """ | ||||
|     assert not isinstance(s, bytes), 'Pass a unicode string' | ||||
|     if quote: | ||||
|         return s.translate(_escape_map_full) | ||||
|     return s.translate(_escape_map) | ||||
							
								
								
									
										2514
									
								
								venv/lib/python3.12/site-packages/future/backports/html/entities.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2514
									
								
								venv/lib/python3.12/site-packages/future/backports/html/entities.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,536 @@ | ||||
| """A parser for HTML and XHTML. | ||||
|  | ||||
| Backported for python-future from Python 3.3. | ||||
| """ | ||||
|  | ||||
| # This file is based on sgmllib.py, but the API is slightly different. | ||||
|  | ||||
| # XXX There should be a way to distinguish between PCDATA (parsed | ||||
| # character data -- the normal case), RCDATA (replaceable character | ||||
| # data -- only char and entity references and end tags are special) | ||||
| # and CDATA (character data -- only end tags are special). | ||||
|  | ||||
| from __future__ import (absolute_import, division, | ||||
|                         print_function, unicode_literals) | ||||
| from future.builtins import * | ||||
| from future.backports import _markupbase | ||||
| import re | ||||
| import warnings | ||||
|  | ||||
| # Regular expressions used for parsing | ||||
|  | ||||
| interesting_normal = re.compile('[&<]') | ||||
| incomplete = re.compile('&[a-zA-Z#]') | ||||
|  | ||||
| entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') | ||||
| charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') | ||||
|  | ||||
| starttagopen = re.compile('<[a-zA-Z]') | ||||
| piclose = re.compile('>') | ||||
| commentclose = re.compile(r'--\s*>') | ||||
| tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') | ||||
| # see http://www.w3.org/TR/html5/tokenization.html#tag-open-state | ||||
| # and http://www.w3.org/TR/html5/tokenization.html#tag-name-state | ||||
| tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') | ||||
| # Note: | ||||
| #  1) the strict attrfind isn't really strict, but we can't make it | ||||
| #     correctly strict without breaking backward compatibility; | ||||
| #  2) if you change attrfind remember to update locatestarttagend too; | ||||
| #  3) if you change attrfind and/or locatestarttagend the parser will | ||||
| #     explode, so don't do it. | ||||
| attrfind = re.compile( | ||||
|     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' | ||||
|     r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') | ||||
| attrfind_tolerant = re.compile( | ||||
|     r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' | ||||
|     r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') | ||||
| locatestarttagend = re.compile(r""" | ||||
|   <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name | ||||
|   (?:\s+                             # whitespace before attribute name | ||||
|     (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name | ||||
|       (?:\s*=\s*                     # value indicator | ||||
|         (?:'[^']*'                   # LITA-enclosed value | ||||
|           |\"[^\"]*\"                # LIT-enclosed value | ||||
|           |[^'\">\s]+                # bare value | ||||
|          ) | ||||
|        )? | ||||
|      ) | ||||
|    )* | ||||
|   \s*                                # trailing whitespace | ||||
| """, re.VERBOSE) | ||||
| locatestarttagend_tolerant = re.compile(r""" | ||||
|   <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name | ||||
|   (?:[\s/]*                          # optional whitespace before attribute name | ||||
|     (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name | ||||
|       (?:\s*=+\s*                    # value indicator | ||||
|         (?:'[^']*'                   # LITA-enclosed value | ||||
|           |"[^"]*"                   # LIT-enclosed value | ||||
|           |(?!['"])[^>\s]*           # bare value | ||||
|          ) | ||||
|          (?:\s*,)*                   # possibly followed by a comma | ||||
|        )?(?:\s|/(?!>))* | ||||
|      )* | ||||
|    )? | ||||
|   \s*                                # trailing whitespace | ||||
| """, re.VERBOSE) | ||||
| endendtag = re.compile('>') | ||||
| # the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between | ||||
| # </ and the tag name, so maybe this should be fixed | ||||
| endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>') | ||||
|  | ||||
|  | ||||
| class HTMLParseError(Exception): | ||||
|     """Exception raised for all parse errors.""" | ||||
|  | ||||
|     def __init__(self, msg, position=(None, None)): | ||||
|         assert msg | ||||
|         self.msg = msg | ||||
|         self.lineno = position[0] | ||||
|         self.offset = position[1] | ||||
|  | ||||
|     def __str__(self): | ||||
|         result = self.msg | ||||
|         if self.lineno is not None: | ||||
|             result = result + ", at line %d" % self.lineno | ||||
|         if self.offset is not None: | ||||
|             result = result + ", column %d" % (self.offset + 1) | ||||
|         return result | ||||
|  | ||||
|  | ||||
| class HTMLParser(_markupbase.ParserBase): | ||||
|     """Find tags and other markup and call handler functions. | ||||
|  | ||||
|     Usage: | ||||
|         p = HTMLParser() | ||||
|         p.feed(data) | ||||
|         ... | ||||
|         p.close() | ||||
|  | ||||
|     Start tags are handled by calling self.handle_starttag() or | ||||
|     self.handle_startendtag(); end tags by self.handle_endtag().  The | ||||
|     data between tags is passed from the parser to the derived class | ||||
|     by calling self.handle_data() with the data as argument (the data | ||||
|     may be split up in arbitrary chunks).  Entity references are | ||||
|     passed by calling self.handle_entityref() with the entity | ||||
|     reference as the argument.  Numeric character references are | ||||
|     passed to self.handle_charref() with the string containing the | ||||
|     reference as the argument. | ||||
|     """ | ||||
|  | ||||
|     CDATA_CONTENT_ELEMENTS = ("script", "style") | ||||
|  | ||||
|     def __init__(self, strict=False): | ||||
|         """Initialize and reset this instance. | ||||
|  | ||||
|         If strict is set to False (the default) the parser will parse invalid | ||||
|         markup, otherwise it will raise an error.  Note that the strict mode | ||||
|         is deprecated. | ||||
|         """ | ||||
|         if strict: | ||||
|             warnings.warn("The strict mode is deprecated.", | ||||
|                           DeprecationWarning, stacklevel=2) | ||||
|         self.strict = strict | ||||
|         self.reset() | ||||
|  | ||||
|     def reset(self): | ||||
|         """Reset this instance.  Loses all unprocessed data.""" | ||||
|         self.rawdata = '' | ||||
|         self.lasttag = '???' | ||||
|         self.interesting = interesting_normal | ||||
|         self.cdata_elem = None | ||||
|         _markupbase.ParserBase.reset(self) | ||||
|  | ||||
|     def feed(self, data): | ||||
|         r"""Feed data to the parser. | ||||
|  | ||||
|         Call this as often as you want, with as little or as much text | ||||
|         as you want (may include '\n'). | ||||
|         """ | ||||
|         self.rawdata = self.rawdata + data | ||||
|         self.goahead(0) | ||||
|  | ||||
|     def close(self): | ||||
|         """Handle any buffered data.""" | ||||
|         self.goahead(1) | ||||
|  | ||||
|     def error(self, message): | ||||
|         raise HTMLParseError(message, self.getpos()) | ||||
|  | ||||
|     __starttag_text = None | ||||
|  | ||||
|     def get_starttag_text(self): | ||||
|         """Return full source of start tag: '<...>'.""" | ||||
|         return self.__starttag_text | ||||
|  | ||||
|     def set_cdata_mode(self, elem): | ||||
|         self.cdata_elem = elem.lower() | ||||
|         self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I) | ||||
|  | ||||
|     def clear_cdata_mode(self): | ||||
|         self.interesting = interesting_normal | ||||
|         self.cdata_elem = None | ||||
|  | ||||
|     # Internal -- handle data as far as reasonable.  May leave state | ||||
|     # and data to be processed by a subsequent call.  If 'end' is | ||||
|     # true, force handling all data as if followed by EOF marker. | ||||
|     def goahead(self, end): | ||||
|         rawdata = self.rawdata | ||||
|         i = 0 | ||||
|         n = len(rawdata) | ||||
|         while i < n: | ||||
|             match = self.interesting.search(rawdata, i) # < or & | ||||
|             if match: | ||||
|                 j = match.start() | ||||
|             else: | ||||
|                 if self.cdata_elem: | ||||
|                     break | ||||
|                 j = n | ||||
|             if i < j: self.handle_data(rawdata[i:j]) | ||||
|             i = self.updatepos(i, j) | ||||
|             if i == n: break | ||||
|             startswith = rawdata.startswith | ||||
|             if startswith('<', i): | ||||
|                 if starttagopen.match(rawdata, i): # < + letter | ||||
|                     k = self.parse_starttag(i) | ||||
|                 elif startswith("</", i): | ||||
|                     k = self.parse_endtag(i) | ||||
|                 elif startswith("<!--", i): | ||||
|                     k = self.parse_comment(i) | ||||
|                 elif startswith("<?", i): | ||||
|                     k = self.parse_pi(i) | ||||
|                 elif startswith("<!", i): | ||||
|                     if self.strict: | ||||
|                         k = self.parse_declaration(i) | ||||
|                     else: | ||||
|                         k = self.parse_html_declaration(i) | ||||
|                 elif (i + 1) < n: | ||||
|                     self.handle_data("<") | ||||
|                     k = i + 1 | ||||
|                 else: | ||||
|                     break | ||||
|                 if k < 0: | ||||
|                     if not end: | ||||
|                         break | ||||
|                     if self.strict: | ||||
|                         self.error("EOF in middle of construct") | ||||
|                     k = rawdata.find('>', i + 1) | ||||
|                     if k < 0: | ||||
|                         k = rawdata.find('<', i + 1) | ||||
|                         if k < 0: | ||||
|                             k = i + 1 | ||||
|                     else: | ||||
|                         k += 1 | ||||
|                     self.handle_data(rawdata[i:k]) | ||||
|                 i = self.updatepos(i, k) | ||||
|             elif startswith("&#", i): | ||||
|                 match = charref.match(rawdata, i) | ||||
|                 if match: | ||||
|                     name = match.group()[2:-1] | ||||
|                     self.handle_charref(name) | ||||
|                     k = match.end() | ||||
|                     if not startswith(';', k-1): | ||||
|                         k = k - 1 | ||||
|                     i = self.updatepos(i, k) | ||||
|                     continue | ||||
|                 else: | ||||
|                     if ";" in rawdata[i:]: #bail by consuming &# | ||||
|                         self.handle_data(rawdata[0:2]) | ||||
|                         i = self.updatepos(i, 2) | ||||
|                     break | ||||
|             elif startswith('&', i): | ||||
|                 match = entityref.match(rawdata, i) | ||||
|                 if match: | ||||
|                     name = match.group(1) | ||||
|                     self.handle_entityref(name) | ||||
|                     k = match.end() | ||||
|                     if not startswith(';', k-1): | ||||
|                         k = k - 1 | ||||
|                     i = self.updatepos(i, k) | ||||
|                     continue | ||||
|                 match = incomplete.match(rawdata, i) | ||||
|                 if match: | ||||
|                     # match.group() will contain at least 2 chars | ||||
|                     if end and match.group() == rawdata[i:]: | ||||
|                         if self.strict: | ||||
|                             self.error("EOF in middle of entity or char ref") | ||||
|                         else: | ||||
|                             if k <= i: | ||||
|                                 k = n | ||||
|                             i = self.updatepos(i, i + 1) | ||||
|                     # incomplete | ||||
|                     break | ||||
|                 elif (i + 1) < n: | ||||
|                     # not the end of the buffer, and can't be confused | ||||
|                     # with some other construct | ||||
|                     self.handle_data("&") | ||||
|                     i = self.updatepos(i, i + 1) | ||||
|                 else: | ||||
|                     break | ||||
|             else: | ||||
|                 assert 0, "interesting.search() lied" | ||||
|         # end while | ||||
|         if end and i < n and not self.cdata_elem: | ||||
|             self.handle_data(rawdata[i:n]) | ||||
|             i = self.updatepos(i, n) | ||||
|         self.rawdata = rawdata[i:] | ||||
|  | ||||
|     # Internal -- parse html declarations, return length or -1 if not terminated | ||||
|     # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state | ||||
|     # See also parse_declaration in _markupbase | ||||
|     def parse_html_declaration(self, i): | ||||
|         rawdata = self.rawdata | ||||
|         assert rawdata[i:i+2] == '<!', ('unexpected call to ' | ||||
|                                         'parse_html_declaration()') | ||||
|         if rawdata[i:i+4] == '<!--': | ||||
|             # this case is actually already handled in goahead() | ||||
|             return self.parse_comment(i) | ||||
|         elif rawdata[i:i+3] == '<![': | ||||
|             return self.parse_marked_section(i) | ||||
|         elif rawdata[i:i+9].lower() == '<!doctype': | ||||
|             # find the closing > | ||||
|             gtpos = rawdata.find('>', i+9) | ||||
|             if gtpos == -1: | ||||
|                 return -1 | ||||
|             self.handle_decl(rawdata[i+2:gtpos]) | ||||
|             return gtpos+1 | ||||
|         else: | ||||
|             return self.parse_bogus_comment(i) | ||||
|  | ||||
|     # Internal -- parse bogus comment, return length or -1 if not terminated | ||||
|     # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state | ||||
|     def parse_bogus_comment(self, i, report=1): | ||||
|         rawdata = self.rawdata | ||||
|         assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to ' | ||||
|                                                 'parse_comment()') | ||||
|         pos = rawdata.find('>', i+2) | ||||
|         if pos == -1: | ||||
|             return -1 | ||||
|         if report: | ||||
|             self.handle_comment(rawdata[i+2:pos]) | ||||
|         return pos + 1 | ||||
|  | ||||
|     # Internal -- parse processing instr, return end or -1 if not terminated | ||||
|     def parse_pi(self, i): | ||||
|         rawdata = self.rawdata | ||||
|         assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()' | ||||
|         match = piclose.search(rawdata, i+2) # > | ||||
|         if not match: | ||||
|             return -1 | ||||
|         j = match.start() | ||||
|         self.handle_pi(rawdata[i+2: j]) | ||||
|         j = match.end() | ||||
|         return j | ||||
|  | ||||
|     # Internal -- handle starttag, return end or -1 if not terminated | ||||
|     def parse_starttag(self, i): | ||||
|         self.__starttag_text = None | ||||
|         endpos = self.check_for_whole_start_tag(i) | ||||
|         if endpos < 0: | ||||
|             return endpos | ||||
|         rawdata = self.rawdata | ||||
|         self.__starttag_text = rawdata[i:endpos] | ||||
|  | ||||
|         # Now parse the data between i+1 and j into a tag and attrs | ||||
|         attrs = [] | ||||
|         match = tagfind.match(rawdata, i+1) | ||||
|         assert match, 'unexpected call to parse_starttag()' | ||||
|         k = match.end() | ||||
|         self.lasttag = tag = match.group(1).lower() | ||||
|         while k < endpos: | ||||
|             if self.strict: | ||||
|                 m = attrfind.match(rawdata, k) | ||||
|             else: | ||||
|                 m = attrfind_tolerant.match(rawdata, k) | ||||
|             if not m: | ||||
|                 break | ||||
|             attrname, rest, attrvalue = m.group(1, 2, 3) | ||||
|             if not rest: | ||||
|                 attrvalue = None | ||||
|             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ | ||||
|                  attrvalue[:1] == '"' == attrvalue[-1:]: | ||||
|                 attrvalue = attrvalue[1:-1] | ||||
|             if attrvalue: | ||||
|                 attrvalue = self.unescape(attrvalue) | ||||
|             attrs.append((attrname.lower(), attrvalue)) | ||||
|             k = m.end() | ||||
|  | ||||
|         end = rawdata[k:endpos].strip() | ||||
|         if end not in (">", "/>"): | ||||
|             lineno, offset = self.getpos() | ||||
|             if "\n" in self.__starttag_text: | ||||
|                 lineno = lineno + self.__starttag_text.count("\n") | ||||
|                 offset = len(self.__starttag_text) \ | ||||
|                          - self.__starttag_text.rfind("\n") | ||||
|             else: | ||||
|                 offset = offset + len(self.__starttag_text) | ||||
|             if self.strict: | ||||
|                 self.error("junk characters in start tag: %r" | ||||
|                            % (rawdata[k:endpos][:20],)) | ||||
|             self.handle_data(rawdata[i:endpos]) | ||||
|             return endpos | ||||
|         if end.endswith('/>'): | ||||
|             # XHTML-style empty tag: <span attr="value" /> | ||||
|             self.handle_startendtag(tag, attrs) | ||||
|         else: | ||||
|             self.handle_starttag(tag, attrs) | ||||
|             if tag in self.CDATA_CONTENT_ELEMENTS: | ||||
|                 self.set_cdata_mode(tag) | ||||
|         return endpos | ||||
|  | ||||
|     # Internal -- check to see if we have a complete starttag; return end | ||||
|     # or -1 if incomplete. | ||||
|     def check_for_whole_start_tag(self, i): | ||||
|         rawdata = self.rawdata | ||||
|         if self.strict: | ||||
|             m = locatestarttagend.match(rawdata, i) | ||||
|         else: | ||||
|             m = locatestarttagend_tolerant.match(rawdata, i) | ||||
|         if m: | ||||
|             j = m.end() | ||||
|             next = rawdata[j:j+1] | ||||
|             if next == ">": | ||||
|                 return j + 1 | ||||
|             if next == "/": | ||||
|                 if rawdata.startswith("/>", j): | ||||
|                     return j + 2 | ||||
|                 if rawdata.startswith("/", j): | ||||
|                     # buffer boundary | ||||
|                     return -1 | ||||
|                 # else bogus input | ||||
|                 if self.strict: | ||||
|                     self.updatepos(i, j + 1) | ||||
|                     self.error("malformed empty start tag") | ||||
|                 if j > i: | ||||
|                     return j | ||||
|                 else: | ||||
|                     return i + 1 | ||||
|             if next == "": | ||||
|                 # end of input | ||||
|                 return -1 | ||||
|             if next in ("abcdefghijklmnopqrstuvwxyz=/" | ||||
|                         "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): | ||||
|                 # end of input in or before attribute value, or we have the | ||||
|                 # '/' from a '/>' ending | ||||
|                 return -1 | ||||
|             if self.strict: | ||||
|                 self.updatepos(i, j) | ||||
|                 self.error("malformed start tag") | ||||
|             if j > i: | ||||
|                 return j | ||||
|             else: | ||||
|                 return i + 1 | ||||
|         raise AssertionError("we should not get here!") | ||||
|  | ||||
|     # Internal -- parse endtag, return end or -1 if incomplete | ||||
|     def parse_endtag(self, i): | ||||
|         rawdata = self.rawdata | ||||
|         assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag" | ||||
|         match = endendtag.search(rawdata, i+1) # > | ||||
|         if not match: | ||||
|             return -1 | ||||
|         gtpos = match.end() | ||||
|         match = endtagfind.match(rawdata, i) # </ + tag + > | ||||
|         if not match: | ||||
|             if self.cdata_elem is not None: | ||||
|                 self.handle_data(rawdata[i:gtpos]) | ||||
|                 return gtpos | ||||
|             if self.strict: | ||||
|                 self.error("bad end tag: %r" % (rawdata[i:gtpos],)) | ||||
|             # find the name: w3.org/TR/html5/tokenization.html#tag-name-state | ||||
|             namematch = tagfind_tolerant.match(rawdata, i+2) | ||||
|             if not namematch: | ||||
|                 # w3.org/TR/html5/tokenization.html#end-tag-open-state | ||||
|                 if rawdata[i:i+3] == '</>': | ||||
|                     return i+3 | ||||
|                 else: | ||||
|                     return self.parse_bogus_comment(i) | ||||
|             tagname = namematch.group().lower() | ||||
|             # consume and ignore other stuff between the name and the > | ||||
|             # Note: this is not 100% correct, since we might have things like | ||||
|             # </tag attr=">">, but looking for > after tha name should cover | ||||
|             # most of the cases and is much simpler | ||||
|             gtpos = rawdata.find('>', namematch.end()) | ||||
|             self.handle_endtag(tagname) | ||||
|             return gtpos+1 | ||||
|  | ||||
|         elem = match.group(1).lower() # script or style | ||||
|         if self.cdata_elem is not None: | ||||
|             if elem != self.cdata_elem: | ||||
|                 self.handle_data(rawdata[i:gtpos]) | ||||
|                 return gtpos | ||||
|  | ||||
|         self.handle_endtag(elem.lower()) | ||||
|         self.clear_cdata_mode() | ||||
|         return gtpos | ||||
|  | ||||
|     # Overridable -- finish processing of start+end tag: <tag.../> | ||||
|     def handle_startendtag(self, tag, attrs): | ||||
|         self.handle_starttag(tag, attrs) | ||||
|         self.handle_endtag(tag) | ||||
|  | ||||
|     # Overridable -- handle start tag | ||||
|     def handle_starttag(self, tag, attrs): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle end tag | ||||
|     def handle_endtag(self, tag): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle character reference | ||||
|     def handle_charref(self, name): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle entity reference | ||||
|     def handle_entityref(self, name): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle data | ||||
|     def handle_data(self, data): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle comment | ||||
|     def handle_comment(self, data): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle declaration | ||||
|     def handle_decl(self, decl): | ||||
|         pass | ||||
|  | ||||
|     # Overridable -- handle processing instruction | ||||
|     def handle_pi(self, data): | ||||
|         pass | ||||
|  | ||||
|     def unknown_decl(self, data): | ||||
|         if self.strict: | ||||
|             self.error("unknown declaration: %r" % (data,)) | ||||
|  | ||||
|     # Internal -- helper to remove special character quoting | ||||
|     def unescape(self, s): | ||||
|         if '&' not in s: | ||||
|             return s | ||||
|         def replaceEntities(s): | ||||
|             s = s.groups()[0] | ||||
|             try: | ||||
|                 if s[0] == "#": | ||||
|                     s = s[1:] | ||||
|                     if s[0] in ['x','X']: | ||||
|                         c = int(s[1:].rstrip(';'), 16) | ||||
|                     else: | ||||
|                         c = int(s.rstrip(';')) | ||||
|                     return chr(c) | ||||
|             except ValueError: | ||||
|                 return '&#' + s | ||||
|             else: | ||||
|                 from future.backports.html.entities import html5 | ||||
|                 if s in html5: | ||||
|                     return html5[s] | ||||
|                 elif s.endswith(';'): | ||||
|                     return '&' + s | ||||
|                 for x in range(2, len(s)): | ||||
|                     if s[:x] in html5: | ||||
|                         return html5[s[:x]] + s[x:] | ||||
|                 else: | ||||
|                     return '&' + s | ||||
|  | ||||
|         return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))", | ||||
|                       replaceEntities, s) | ||||
							
								
								
									
										1346
									
								
								venv/lib/python3.12/site-packages/future/backports/http/client.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1346
									
								
								venv/lib/python3.12/site-packages/future/backports/http/client.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2116
									
								
								venv/lib/python3.12/site-packages/future/backports/http/cookiejar.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2116
									
								
								venv/lib/python3.12/site-packages/future/backports/http/cookiejar.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,598 @@ | ||||
| #### | ||||
| # Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> | ||||
| # | ||||
| #                All Rights Reserved | ||||
| # | ||||
| # Permission to use, copy, modify, and distribute this software | ||||
| # and its documentation for any purpose and without fee is hereby | ||||
| # granted, provided that the above copyright notice appear in all | ||||
| # copies and that both that copyright notice and this permission | ||||
| # notice appear in supporting documentation, and that the name of | ||||
| # Timothy O'Malley  not be used in advertising or publicity | ||||
| # pertaining to distribution of the software without specific, written | ||||
| # prior permission. | ||||
| # | ||||
| # Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS | ||||
| # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | ||||
| # AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR | ||||
| # ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||
| # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | ||||
| # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | ||||
| # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||||
| # PERFORMANCE OF THIS SOFTWARE. | ||||
| # | ||||
| #### | ||||
| # | ||||
| # Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp | ||||
| #   by Timothy O'Malley <timo@alum.mit.edu> | ||||
| # | ||||
| #  Cookie.py is a Python module for the handling of HTTP | ||||
| #  cookies as a Python dictionary.  See RFC 2109 for more | ||||
| #  information on cookies. | ||||
| # | ||||
| #  The original idea to treat Cookies as a dictionary came from | ||||
| #  Dave Mitchell (davem@magnet.com) in 1995, when he released the | ||||
| #  first version of nscookie.py. | ||||
| # | ||||
| #### | ||||
|  | ||||
| r""" | ||||
| http.cookies module ported to python-future from Py3.3 | ||||
|  | ||||
| Here's a sample session to show how to use this module. | ||||
| At the moment, this is the only documentation. | ||||
|  | ||||
| The Basics | ||||
| ---------- | ||||
|  | ||||
| Importing is easy... | ||||
|  | ||||
|    >>> from http import cookies | ||||
|  | ||||
| Most of the time you start by creating a cookie. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|  | ||||
| Once you've created your Cookie, you can add values just as if it were | ||||
| a dictionary. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C["fig"] = "newton" | ||||
|    >>> C["sugar"] = "wafer" | ||||
|    >>> C.output() | ||||
|    'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' | ||||
|  | ||||
| Notice that the printable representation of a Cookie is the | ||||
| appropriate format for a Set-Cookie: header.  This is the | ||||
| default behavior.  You can change the header and printed | ||||
| attributes by using the .output() function | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C["rocky"] = "road" | ||||
|    >>> C["rocky"]["path"] = "/cookie" | ||||
|    >>> print(C.output(header="Cookie:")) | ||||
|    Cookie: rocky=road; Path=/cookie | ||||
|    >>> print(C.output(attrs=[], header="Cookie:")) | ||||
|    Cookie: rocky=road | ||||
|  | ||||
| The load() method of a Cookie extracts cookies from a string.  In a | ||||
| CGI script, you would use this method to extract the cookies from the | ||||
| HTTP_COOKIE environment variable. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C.load("chips=ahoy; vienna=finger") | ||||
|    >>> C.output() | ||||
|    'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' | ||||
|  | ||||
| The load() method is darn-tootin smart about identifying cookies | ||||
| within a string.  Escaped quotation marks, nested semicolons, and other | ||||
| such trickeries do not confuse it. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') | ||||
|    >>> print(C) | ||||
|    Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" | ||||
|  | ||||
| Each element of the Cookie also supports all of the RFC 2109 | ||||
| Cookie attributes.  Here's an example which sets the Path | ||||
| attribute. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C["oreo"] = "doublestuff" | ||||
|    >>> C["oreo"]["path"] = "/" | ||||
|    >>> print(C) | ||||
|    Set-Cookie: oreo=doublestuff; Path=/ | ||||
|  | ||||
| Each dictionary element has a 'value' attribute, which gives you | ||||
| back the value associated with the key. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C["twix"] = "none for you" | ||||
|    >>> C["twix"].value | ||||
|    'none for you' | ||||
|  | ||||
| The SimpleCookie expects that all values should be standard strings. | ||||
| Just to be sure, SimpleCookie invokes the str() builtin to convert | ||||
| the value to a string, when the values are set dictionary-style. | ||||
|  | ||||
|    >>> C = cookies.SimpleCookie() | ||||
|    >>> C["number"] = 7 | ||||
|    >>> C["string"] = "seven" | ||||
|    >>> C["number"].value | ||||
|    '7' | ||||
|    >>> C["string"].value | ||||
|    'seven' | ||||
|    >>> C.output() | ||||
|    'Set-Cookie: number=7\r\nSet-Cookie: string=seven' | ||||
|  | ||||
| Finis. | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import print_function | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import chr, dict, int, str | ||||
| from future.utils import PY2, as_native_str | ||||
|  | ||||
| # | ||||
| # Import our required modules | ||||
| # | ||||
| import re | ||||
| if PY2: | ||||
|     re.ASCII = 0    # for py2 compatibility | ||||
| import string | ||||
|  | ||||
| __all__ = ["CookieError", "BaseCookie", "SimpleCookie"] | ||||
|  | ||||
| _nulljoin = ''.join | ||||
| _semispacejoin = '; '.join | ||||
| _spacejoin = ' '.join | ||||
|  | ||||
| # | ||||
| # Define an exception visible to External modules | ||||
| # | ||||
| class CookieError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| # These quoting routines conform to the RFC2109 specification, which in | ||||
| # turn references the character definitions from RFC2068.  They provide | ||||
| # a two-way quoting algorithm.  Any non-text character is translated | ||||
| # into a 4 character sequence: a forward-slash followed by the | ||||
| # three-digit octal equivalent of the character.  Any '\' or '"' is | ||||
| # quoted with a preceeding '\' slash. | ||||
| # | ||||
| # These are taken from RFC2068 and RFC2109. | ||||
| #       _LegalChars       is the list of chars which don't require "'s | ||||
| #       _Translator       hash-table for fast quoting | ||||
| # | ||||
| _LegalChars       = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" | ||||
| _Translator       = { | ||||
|     '\000' : '\\000',  '\001' : '\\001',  '\002' : '\\002', | ||||
|     '\003' : '\\003',  '\004' : '\\004',  '\005' : '\\005', | ||||
|     '\006' : '\\006',  '\007' : '\\007',  '\010' : '\\010', | ||||
|     '\011' : '\\011',  '\012' : '\\012',  '\013' : '\\013', | ||||
|     '\014' : '\\014',  '\015' : '\\015',  '\016' : '\\016', | ||||
|     '\017' : '\\017',  '\020' : '\\020',  '\021' : '\\021', | ||||
|     '\022' : '\\022',  '\023' : '\\023',  '\024' : '\\024', | ||||
|     '\025' : '\\025',  '\026' : '\\026',  '\027' : '\\027', | ||||
|     '\030' : '\\030',  '\031' : '\\031',  '\032' : '\\032', | ||||
|     '\033' : '\\033',  '\034' : '\\034',  '\035' : '\\035', | ||||
|     '\036' : '\\036',  '\037' : '\\037', | ||||
|  | ||||
|     # Because of the way browsers really handle cookies (as opposed | ||||
|     # to what the RFC says) we also encode , and ; | ||||
|  | ||||
|     ',' : '\\054', ';' : '\\073', | ||||
|  | ||||
|     '"' : '\\"',       '\\' : '\\\\', | ||||
|  | ||||
|     '\177' : '\\177',  '\200' : '\\200',  '\201' : '\\201', | ||||
|     '\202' : '\\202',  '\203' : '\\203',  '\204' : '\\204', | ||||
|     '\205' : '\\205',  '\206' : '\\206',  '\207' : '\\207', | ||||
|     '\210' : '\\210',  '\211' : '\\211',  '\212' : '\\212', | ||||
|     '\213' : '\\213',  '\214' : '\\214',  '\215' : '\\215', | ||||
|     '\216' : '\\216',  '\217' : '\\217',  '\220' : '\\220', | ||||
|     '\221' : '\\221',  '\222' : '\\222',  '\223' : '\\223', | ||||
|     '\224' : '\\224',  '\225' : '\\225',  '\226' : '\\226', | ||||
|     '\227' : '\\227',  '\230' : '\\230',  '\231' : '\\231', | ||||
|     '\232' : '\\232',  '\233' : '\\233',  '\234' : '\\234', | ||||
|     '\235' : '\\235',  '\236' : '\\236',  '\237' : '\\237', | ||||
|     '\240' : '\\240',  '\241' : '\\241',  '\242' : '\\242', | ||||
|     '\243' : '\\243',  '\244' : '\\244',  '\245' : '\\245', | ||||
|     '\246' : '\\246',  '\247' : '\\247',  '\250' : '\\250', | ||||
|     '\251' : '\\251',  '\252' : '\\252',  '\253' : '\\253', | ||||
|     '\254' : '\\254',  '\255' : '\\255',  '\256' : '\\256', | ||||
|     '\257' : '\\257',  '\260' : '\\260',  '\261' : '\\261', | ||||
|     '\262' : '\\262',  '\263' : '\\263',  '\264' : '\\264', | ||||
|     '\265' : '\\265',  '\266' : '\\266',  '\267' : '\\267', | ||||
|     '\270' : '\\270',  '\271' : '\\271',  '\272' : '\\272', | ||||
|     '\273' : '\\273',  '\274' : '\\274',  '\275' : '\\275', | ||||
|     '\276' : '\\276',  '\277' : '\\277',  '\300' : '\\300', | ||||
|     '\301' : '\\301',  '\302' : '\\302',  '\303' : '\\303', | ||||
|     '\304' : '\\304',  '\305' : '\\305',  '\306' : '\\306', | ||||
|     '\307' : '\\307',  '\310' : '\\310',  '\311' : '\\311', | ||||
|     '\312' : '\\312',  '\313' : '\\313',  '\314' : '\\314', | ||||
|     '\315' : '\\315',  '\316' : '\\316',  '\317' : '\\317', | ||||
|     '\320' : '\\320',  '\321' : '\\321',  '\322' : '\\322', | ||||
|     '\323' : '\\323',  '\324' : '\\324',  '\325' : '\\325', | ||||
|     '\326' : '\\326',  '\327' : '\\327',  '\330' : '\\330', | ||||
|     '\331' : '\\331',  '\332' : '\\332',  '\333' : '\\333', | ||||
|     '\334' : '\\334',  '\335' : '\\335',  '\336' : '\\336', | ||||
|     '\337' : '\\337',  '\340' : '\\340',  '\341' : '\\341', | ||||
|     '\342' : '\\342',  '\343' : '\\343',  '\344' : '\\344', | ||||
|     '\345' : '\\345',  '\346' : '\\346',  '\347' : '\\347', | ||||
|     '\350' : '\\350',  '\351' : '\\351',  '\352' : '\\352', | ||||
|     '\353' : '\\353',  '\354' : '\\354',  '\355' : '\\355', | ||||
|     '\356' : '\\356',  '\357' : '\\357',  '\360' : '\\360', | ||||
|     '\361' : '\\361',  '\362' : '\\362',  '\363' : '\\363', | ||||
|     '\364' : '\\364',  '\365' : '\\365',  '\366' : '\\366', | ||||
|     '\367' : '\\367',  '\370' : '\\370',  '\371' : '\\371', | ||||
|     '\372' : '\\372',  '\373' : '\\373',  '\374' : '\\374', | ||||
|     '\375' : '\\375',  '\376' : '\\376',  '\377' : '\\377' | ||||
|     } | ||||
|  | ||||
| def _quote(str, LegalChars=_LegalChars): | ||||
|     r"""Quote a string for use in a cookie header. | ||||
|  | ||||
|     If the string does not need to be double-quoted, then just return the | ||||
|     string.  Otherwise, surround the string in doublequotes and quote | ||||
|     (with a \) special characters. | ||||
|     """ | ||||
|     if all(c in LegalChars for c in str): | ||||
|         return str | ||||
|     else: | ||||
|         return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' | ||||
|  | ||||
|  | ||||
| _OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") | ||||
| _QuotePatt = re.compile(r"[\\].") | ||||
|  | ||||
| def _unquote(mystr): | ||||
|     # If there aren't any doublequotes, | ||||
|     # then there can't be any special characters.  See RFC 2109. | ||||
|     if len(mystr) < 2: | ||||
|         return mystr | ||||
|     if mystr[0] != '"' or mystr[-1] != '"': | ||||
|         return mystr | ||||
|  | ||||
|     # We have to assume that we must decode this string. | ||||
|     # Down to work. | ||||
|  | ||||
|     # Remove the "s | ||||
|     mystr = mystr[1:-1] | ||||
|  | ||||
|     # Check for special sequences.  Examples: | ||||
|     #    \012 --> \n | ||||
|     #    \"   --> " | ||||
|     # | ||||
|     i = 0 | ||||
|     n = len(mystr) | ||||
|     res = [] | ||||
|     while 0 <= i < n: | ||||
|         o_match = _OctalPatt.search(mystr, i) | ||||
|         q_match = _QuotePatt.search(mystr, i) | ||||
|         if not o_match and not q_match:              # Neither matched | ||||
|             res.append(mystr[i:]) | ||||
|             break | ||||
|         # else: | ||||
|         j = k = -1 | ||||
|         if o_match: | ||||
|             j = o_match.start(0) | ||||
|         if q_match: | ||||
|             k = q_match.start(0) | ||||
|         if q_match and (not o_match or k < j):     # QuotePatt matched | ||||
|             res.append(mystr[i:k]) | ||||
|             res.append(mystr[k+1]) | ||||
|             i = k + 2 | ||||
|         else:                                      # OctalPatt matched | ||||
|             res.append(mystr[i:j]) | ||||
|             res.append(chr(int(mystr[j+1:j+4], 8))) | ||||
|             i = j + 4 | ||||
|     return _nulljoin(res) | ||||
|  | ||||
| # The _getdate() routine is used to set the expiration time in the cookie's HTTP | ||||
| # header.  By default, _getdate() returns the current time in the appropriate | ||||
| # "expires" format for a Set-Cookie header.  The one optional argument is an | ||||
| # offset from now, in seconds.  For example, an offset of -3600 means "one hour | ||||
| # ago".  The offset may be a floating point number. | ||||
| # | ||||
|  | ||||
| _weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] | ||||
|  | ||||
| _monthname = [None, | ||||
|               'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | ||||
|               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | ||||
|  | ||||
| def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): | ||||
|     from time import gmtime, time | ||||
|     now = time() | ||||
|     year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) | ||||
|     return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ | ||||
|            (weekdayname[wd], day, monthname[month], year, hh, mm, ss) | ||||
|  | ||||
|  | ||||
| class Morsel(dict): | ||||
|     """A class to hold ONE (key, value) pair. | ||||
|  | ||||
|     In a cookie, each such pair may have several attributes, so this class is | ||||
|     used to keep the attributes associated with the appropriate key,value pair. | ||||
|     This class also includes a coded_value attribute, which is used to hold | ||||
|     the network representation of the value.  This is most useful when Python | ||||
|     objects are pickled for network transit. | ||||
|     """ | ||||
|     # RFC 2109 lists these attributes as reserved: | ||||
|     #   path       comment         domain | ||||
|     #   max-age    secure      version | ||||
|     # | ||||
|     # For historical reasons, these attributes are also reserved: | ||||
|     #   expires | ||||
|     # | ||||
|     # This is an extension from Microsoft: | ||||
|     #   httponly | ||||
|     # | ||||
|     # This dictionary provides a mapping from the lowercase | ||||
|     # variant on the left to the appropriate traditional | ||||
|     # formatting on the right. | ||||
|     _reserved = { | ||||
|         "expires"  : "expires", | ||||
|         "path"     : "Path", | ||||
|         "comment"  : "Comment", | ||||
|         "domain"   : "Domain", | ||||
|         "max-age"  : "Max-Age", | ||||
|         "secure"   : "secure", | ||||
|         "httponly" : "httponly", | ||||
|         "version"  : "Version", | ||||
|     } | ||||
|  | ||||
|     _flags = set(['secure', 'httponly']) | ||||
|  | ||||
|     def __init__(self): | ||||
|         # Set defaults | ||||
|         self.key = self.value = self.coded_value = None | ||||
|  | ||||
|         # Set default attributes | ||||
|         for key in self._reserved: | ||||
|             dict.__setitem__(self, key, "") | ||||
|  | ||||
|     def __setitem__(self, K, V): | ||||
|         K = K.lower() | ||||
|         if not K in self._reserved: | ||||
|             raise CookieError("Invalid Attribute %s" % K) | ||||
|         dict.__setitem__(self, K, V) | ||||
|  | ||||
|     def isReservedKey(self, K): | ||||
|         return K.lower() in self._reserved | ||||
|  | ||||
|     def set(self, key, val, coded_val, LegalChars=_LegalChars): | ||||
|         # First we verify that the key isn't a reserved word | ||||
|         # Second we make sure it only contains legal characters | ||||
|         if key.lower() in self._reserved: | ||||
|             raise CookieError("Attempt to set a reserved key: %s" % key) | ||||
|         if any(c not in LegalChars for c in key): | ||||
|             raise CookieError("Illegal key value: %s" % key) | ||||
|  | ||||
|         # It's a good key, so save it. | ||||
|         self.key = key | ||||
|         self.value = val | ||||
|         self.coded_value = coded_val | ||||
|  | ||||
|     def output(self, attrs=None, header="Set-Cookie:"): | ||||
|         return "%s %s" % (header, self.OutputString(attrs)) | ||||
|  | ||||
|     __str__ = output | ||||
|  | ||||
|     @as_native_str() | ||||
|     def __repr__(self): | ||||
|         if PY2 and isinstance(self.value, unicode): | ||||
|             val = str(self.value)    # make it a newstr to remove the u prefix | ||||
|         else: | ||||
|             val = self.value | ||||
|         return '<%s: %s=%s>' % (self.__class__.__name__, | ||||
|                                 str(self.key), repr(val)) | ||||
|  | ||||
|     def js_output(self, attrs=None): | ||||
|         # Print javascript | ||||
|         return """ | ||||
|         <script type="text/javascript"> | ||||
|         <!-- begin hiding | ||||
|         document.cookie = \"%s\"; | ||||
|         // end hiding --> | ||||
|         </script> | ||||
|         """ % (self.OutputString(attrs).replace('"', r'\"')) | ||||
|  | ||||
|     def OutputString(self, attrs=None): | ||||
|         # Build up our result | ||||
|         # | ||||
|         result = [] | ||||
|         append = result.append | ||||
|  | ||||
|         # First, the key=value pair | ||||
|         append("%s=%s" % (self.key, self.coded_value)) | ||||
|  | ||||
|         # Now add any defined attributes | ||||
|         if attrs is None: | ||||
|             attrs = self._reserved | ||||
|         items = sorted(self.items()) | ||||
|         for key, value in items: | ||||
|             if value == "": | ||||
|                 continue | ||||
|             if key not in attrs: | ||||
|                 continue | ||||
|             if key == "expires" and isinstance(value, int): | ||||
|                 append("%s=%s" % (self._reserved[key], _getdate(value))) | ||||
|             elif key == "max-age" and isinstance(value, int): | ||||
|                 append("%s=%d" % (self._reserved[key], value)) | ||||
|             elif key == "secure": | ||||
|                 append(str(self._reserved[key])) | ||||
|             elif key == "httponly": | ||||
|                 append(str(self._reserved[key])) | ||||
|             else: | ||||
|                 append("%s=%s" % (self._reserved[key], value)) | ||||
|  | ||||
|         # Return the result | ||||
|         return _semispacejoin(result) | ||||
|  | ||||
|  | ||||
| # | ||||
| # Pattern for finding cookie | ||||
| # | ||||
| # This used to be strict parsing based on the RFC2109 and RFC2068 | ||||
| # specifications.  I have since discovered that MSIE 3.0x doesn't | ||||
| # follow the character rules outlined in those specs.  As a | ||||
| # result, the parsing rules here are less strict. | ||||
| # | ||||
|  | ||||
| _LegalCharsPatt  = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" | ||||
| _CookiePattern = re.compile(r""" | ||||
|     (?x)                           # This is a verbose pattern | ||||
|     (?P<key>                       # Start of group 'key' | ||||
|     """ + _LegalCharsPatt + r"""+?   # Any word of at least one letter | ||||
|     )                              # End of group 'key' | ||||
|     (                              # Optional group: there may not be a value. | ||||
|     \s*=\s*                          # Equal Sign | ||||
|     (?P<val>                         # Start of group 'val' | ||||
|     "(?:[^\\"]|\\.)*"                  # Any doublequoted string | ||||
|     |                                  # or | ||||
|     \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT  # Special case for "expires" attr | ||||
|     |                                  # or | ||||
|     """ + _LegalCharsPatt + r"""*      # Any word or empty string | ||||
|     )                                # End of group 'val' | ||||
|     )?                             # End of optional value group | ||||
|     \s*                            # Any number of spaces. | ||||
|     (\s+|;|$)                      # Ending either at space, semicolon, or EOS. | ||||
|     """, re.ASCII)                 # May be removed if safe. | ||||
|  | ||||
|  | ||||
| # At long last, here is the cookie class.  Using this class is almost just like | ||||
| # using a dictionary.  See this module's docstring for example usage. | ||||
| # | ||||
| class BaseCookie(dict): | ||||
|     """A container class for a set of Morsels.""" | ||||
|  | ||||
|     def value_decode(self, val): | ||||
|         """real_value, coded_value = value_decode(STRING) | ||||
|         Called prior to setting a cookie's value from the network | ||||
|         representation.  The VALUE is the value read from HTTP | ||||
|         header. | ||||
|         Override this function to modify the behavior of cookies. | ||||
|         """ | ||||
|         return val, val | ||||
|  | ||||
|     def value_encode(self, val): | ||||
|         """real_value, coded_value = value_encode(VALUE) | ||||
|         Called prior to setting a cookie's value from the dictionary | ||||
|         representation.  The VALUE is the value being assigned. | ||||
|         Override this function to modify the behavior of cookies. | ||||
|         """ | ||||
|         strval = str(val) | ||||
|         return strval, strval | ||||
|  | ||||
|     def __init__(self, input=None): | ||||
|         if input: | ||||
|             self.load(input) | ||||
|  | ||||
|     def __set(self, key, real_value, coded_value): | ||||
|         """Private method for setting a cookie's value""" | ||||
|         M = self.get(key, Morsel()) | ||||
|         M.set(key, real_value, coded_value) | ||||
|         dict.__setitem__(self, key, M) | ||||
|  | ||||
|     def __setitem__(self, key, value): | ||||
|         """Dictionary style assignment.""" | ||||
|         rval, cval = self.value_encode(value) | ||||
|         self.__set(key, rval, cval) | ||||
|  | ||||
|     def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): | ||||
|         """Return a string suitable for HTTP.""" | ||||
|         result = [] | ||||
|         items = sorted(self.items()) | ||||
|         for key, value in items: | ||||
|             result.append(value.output(attrs, header)) | ||||
|         return sep.join(result) | ||||
|  | ||||
|     __str__ = output | ||||
|  | ||||
|     @as_native_str() | ||||
|     def __repr__(self): | ||||
|         l = [] | ||||
|         items = sorted(self.items()) | ||||
|         for key, value in items: | ||||
|             if PY2 and isinstance(value.value, unicode): | ||||
|                 val = str(value.value)    # make it a newstr to remove the u prefix | ||||
|             else: | ||||
|                 val = value.value | ||||
|             l.append('%s=%s' % (str(key), repr(val))) | ||||
|         return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) | ||||
|  | ||||
|     def js_output(self, attrs=None): | ||||
|         """Return a string suitable for JavaScript.""" | ||||
|         result = [] | ||||
|         items = sorted(self.items()) | ||||
|         for key, value in items: | ||||
|             result.append(value.js_output(attrs)) | ||||
|         return _nulljoin(result) | ||||
|  | ||||
|     def load(self, rawdata): | ||||
|         """Load cookies from a string (presumably HTTP_COOKIE) or | ||||
|         from a dictionary.  Loading cookies from a dictionary 'd' | ||||
|         is equivalent to calling: | ||||
|             map(Cookie.__setitem__, d.keys(), d.values()) | ||||
|         """ | ||||
|         if isinstance(rawdata, str): | ||||
|             self.__parse_string(rawdata) | ||||
|         else: | ||||
|             # self.update() wouldn't call our custom __setitem__ | ||||
|             for key, value in rawdata.items(): | ||||
|                 self[key] = value | ||||
|         return | ||||
|  | ||||
|     def __parse_string(self, mystr, patt=_CookiePattern): | ||||
|         i = 0            # Our starting point | ||||
|         n = len(mystr)     # Length of string | ||||
|         M = None         # current morsel | ||||
|  | ||||
|         while 0 <= i < n: | ||||
|             # Start looking for a cookie | ||||
|             match = patt.search(mystr, i) | ||||
|             if not match: | ||||
|                 # No more cookies | ||||
|                 break | ||||
|  | ||||
|             key, value = match.group("key"), match.group("val") | ||||
|  | ||||
|             i = match.end(0) | ||||
|  | ||||
|             # Parse the key, value in case it's metainfo | ||||
|             if key[0] == "$": | ||||
|                 # We ignore attributes which pertain to the cookie | ||||
|                 # mechanism as a whole.  See RFC 2109. | ||||
|                 # (Does anyone care?) | ||||
|                 if M: | ||||
|                     M[key[1:]] = value | ||||
|             elif key.lower() in Morsel._reserved: | ||||
|                 if M: | ||||
|                     if value is None: | ||||
|                         if key.lower() in Morsel._flags: | ||||
|                             M[key] = True | ||||
|                     else: | ||||
|                         M[key] = _unquote(value) | ||||
|             elif value is not None: | ||||
|                 rval, cval = self.value_decode(value) | ||||
|                 self.__set(key, rval, cval) | ||||
|                 M = self[key] | ||||
|  | ||||
|  | ||||
| class SimpleCookie(BaseCookie): | ||||
|     """ | ||||
|     SimpleCookie supports strings as cookie values.  When setting | ||||
|     the value using the dictionary assignment notation, SimpleCookie | ||||
|     calls the builtin str() to convert the value to a string.  Values | ||||
|     received from HTTP are kept as strings. | ||||
|     """ | ||||
|     def value_decode(self, val): | ||||
|         return _unquote(val), val | ||||
|  | ||||
|     def value_encode(self, val): | ||||
|         strval = str(val) | ||||
|         return strval, _quote(strval) | ||||
							
								
								
									
										1226
									
								
								venv/lib/python3.12/site-packages/future/backports/http/server.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1226
									
								
								venv/lib/python3.12/site-packages/future/backports/http/server.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										958
									
								
								venv/lib/python3.12/site-packages/future/backports/misc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										958
									
								
								venv/lib/python3.12/site-packages/future/backports/misc.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,958 @@ | ||||
| """ | ||||
| Miscellaneous function (re)definitions from the Py3.4+ standard library | ||||
| for Python 2.6/2.7. | ||||
|  | ||||
| - math.ceil                (for Python 2.7) | ||||
| - collections.OrderedDict  (for Python 2.6) | ||||
| - collections.Counter      (for Python 2.6) | ||||
| - collections.ChainMap     (for all versions prior to Python 3.3) | ||||
| - itertools.count          (for Python 2.6, with step parameter) | ||||
| - subprocess.check_output  (for Python 2.6) | ||||
| - reprlib.recursive_repr   (for Python 2.6+) | ||||
| - functools.cmp_to_key     (for Python 2.6) | ||||
| """ | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import subprocess | ||||
| from math import ceil as oldceil | ||||
|  | ||||
| from operator import itemgetter as _itemgetter, eq as _eq | ||||
| import sys | ||||
| import heapq as _heapq | ||||
| from _weakref import proxy as _proxy | ||||
| from itertools import repeat as _repeat, chain as _chain, starmap as _starmap | ||||
| from socket import getaddrinfo, SOCK_STREAM, error, socket | ||||
|  | ||||
| from future.utils import iteritems, itervalues, PY2, PY26, PY3 | ||||
|  | ||||
| if PY2: | ||||
|     from collections import Mapping, MutableMapping | ||||
| else: | ||||
|     from collections.abc import Mapping, MutableMapping | ||||
|  | ||||
|  | ||||
| def ceil(x): | ||||
|     """ | ||||
|     Return the ceiling of x as an int. | ||||
|     This is the smallest integral value >= x. | ||||
|     """ | ||||
|     return int(oldceil(x)) | ||||
|  | ||||
|  | ||||
| ######################################################################## | ||||
| ###  reprlib.recursive_repr decorator from Py3.4 | ||||
| ######################################################################## | ||||
|  | ||||
| from itertools import islice | ||||
|  | ||||
| if PY26: | ||||
|     # itertools.count in Py 2.6 doesn't accept a step parameter | ||||
|     def count(start=0, step=1): | ||||
|         while True: | ||||
|             yield start | ||||
|             start += step | ||||
| else: | ||||
|     from itertools import count | ||||
|  | ||||
|  | ||||
| if PY3: | ||||
|     try: | ||||
|         from _thread import get_ident | ||||
|     except ImportError: | ||||
|         from _dummy_thread import get_ident | ||||
| else: | ||||
|     try: | ||||
|         from thread import get_ident | ||||
|     except ImportError: | ||||
|         from dummy_thread import get_ident | ||||
|  | ||||
|  | ||||
| def recursive_repr(fillvalue='...'): | ||||
|     'Decorator to make a repr function return fillvalue for a recursive call' | ||||
|  | ||||
|     def decorating_function(user_function): | ||||
|         repr_running = set() | ||||
|  | ||||
|         def wrapper(self): | ||||
|             key = id(self), get_ident() | ||||
|             if key in repr_running: | ||||
|                 return fillvalue | ||||
|             repr_running.add(key) | ||||
|             try: | ||||
|                 result = user_function(self) | ||||
|             finally: | ||||
|                 repr_running.discard(key) | ||||
|             return result | ||||
|  | ||||
|         # Can't use functools.wraps() here because of bootstrap issues | ||||
|         wrapper.__module__ = getattr(user_function, '__module__') | ||||
|         wrapper.__doc__ = getattr(user_function, '__doc__') | ||||
|         wrapper.__name__ = getattr(user_function, '__name__') | ||||
|         wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) | ||||
|         return wrapper | ||||
|  | ||||
|     return decorating_function | ||||
|  | ||||
|  | ||||
| # OrderedDict Shim from  Raymond Hettinger, python core dev | ||||
| # http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ | ||||
| # here to support version 2.6. | ||||
|  | ||||
| ################################################################################ | ||||
| ### OrderedDict | ||||
| ################################################################################ | ||||
|  | ||||
| class _Link(object): | ||||
|     __slots__ = 'prev', 'next', 'key', '__weakref__' | ||||
|  | ||||
| class OrderedDict(dict): | ||||
|     'Dictionary that remembers insertion order' | ||||
|     # An inherited dict maps keys to values. | ||||
|     # The inherited dict provides __getitem__, __len__, __contains__, and get. | ||||
|     # The remaining methods are order-aware. | ||||
|     # Big-O running times for all methods are the same as regular dictionaries. | ||||
|  | ||||
|     # The internal self.__map dict maps keys to links in a doubly linked list. | ||||
|     # The circular doubly linked list starts and ends with a sentinel element. | ||||
|     # The sentinel element never gets deleted (this simplifies the algorithm). | ||||
|     # The sentinel is in self.__hardroot with a weakref proxy in self.__root. | ||||
|     # The prev links are weakref proxies (to prevent circular references). | ||||
|     # Individual links are kept alive by the hard reference in self.__map. | ||||
|     # Those hard references disappear when a key is deleted from an OrderedDict. | ||||
|  | ||||
|     def __init__(*args, **kwds): | ||||
|         '''Initialize an ordered dictionary.  The signature is the same as | ||||
|         regular dictionaries, but keyword arguments are not recommended because | ||||
|         their insertion order is arbitrary. | ||||
|  | ||||
|         ''' | ||||
|         if not args: | ||||
|             raise TypeError("descriptor '__init__' of 'OrderedDict' object " | ||||
|                             "needs an argument") | ||||
|         self = args[0] | ||||
|         args = args[1:] | ||||
|         if len(args) > 1: | ||||
|             raise TypeError('expected at most 1 arguments, got %d' % len(args)) | ||||
|         try: | ||||
|             self.__root | ||||
|         except AttributeError: | ||||
|             self.__hardroot = _Link() | ||||
|             self.__root = root = _proxy(self.__hardroot) | ||||
|             root.prev = root.next = root | ||||
|             self.__map = {} | ||||
|         self.__update(*args, **kwds) | ||||
|  | ||||
|     def __setitem__(self, key, value, | ||||
|                     dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): | ||||
|         'od.__setitem__(i, y) <==> od[i]=y' | ||||
|         # Setting a new item creates a new link at the end of the linked list, | ||||
|         # and the inherited dictionary is updated with the new key/value pair. | ||||
|         if key not in self: | ||||
|             self.__map[key] = link = Link() | ||||
|             root = self.__root | ||||
|             last = root.prev | ||||
|             link.prev, link.next, link.key = last, root, key | ||||
|             last.next = link | ||||
|             root.prev = proxy(link) | ||||
|         dict_setitem(self, key, value) | ||||
|  | ||||
|     def __delitem__(self, key, dict_delitem=dict.__delitem__): | ||||
|         'od.__delitem__(y) <==> del od[y]' | ||||
|         # Deleting an existing item uses self.__map to find the link which gets | ||||
|         # removed by updating the links in the predecessor and successor nodes. | ||||
|         dict_delitem(self, key) | ||||
|         link = self.__map.pop(key) | ||||
|         link_prev = link.prev | ||||
|         link_next = link.next | ||||
|         link_prev.next = link_next | ||||
|         link_next.prev = link_prev | ||||
|  | ||||
|     def __iter__(self): | ||||
|         'od.__iter__() <==> iter(od)' | ||||
|         # Traverse the linked list in order. | ||||
|         root = self.__root | ||||
|         curr = root.next | ||||
|         while curr is not root: | ||||
|             yield curr.key | ||||
|             curr = curr.next | ||||
|  | ||||
|     def __reversed__(self): | ||||
|         'od.__reversed__() <==> reversed(od)' | ||||
|         # Traverse the linked list in reverse order. | ||||
|         root = self.__root | ||||
|         curr = root.prev | ||||
|         while curr is not root: | ||||
|             yield curr.key | ||||
|             curr = curr.prev | ||||
|  | ||||
|     def clear(self): | ||||
|         'od.clear() -> None.  Remove all items from od.' | ||||
|         root = self.__root | ||||
|         root.prev = root.next = root | ||||
|         self.__map.clear() | ||||
|         dict.clear(self) | ||||
|  | ||||
|     def popitem(self, last=True): | ||||
|         '''od.popitem() -> (k, v), return and remove a (key, value) pair. | ||||
|         Pairs are returned in LIFO order if last is true or FIFO order if false. | ||||
|  | ||||
|         ''' | ||||
|         if not self: | ||||
|             raise KeyError('dictionary is empty') | ||||
|         root = self.__root | ||||
|         if last: | ||||
|             link = root.prev | ||||
|             link_prev = link.prev | ||||
|             link_prev.next = root | ||||
|             root.prev = link_prev | ||||
|         else: | ||||
|             link = root.next | ||||
|             link_next = link.next | ||||
|             root.next = link_next | ||||
|             link_next.prev = root | ||||
|         key = link.key | ||||
|         del self.__map[key] | ||||
|         value = dict.pop(self, key) | ||||
|         return key, value | ||||
|  | ||||
|     def move_to_end(self, key, last=True): | ||||
|         '''Move an existing element to the end (or beginning if last==False). | ||||
|  | ||||
|         Raises KeyError if the element does not exist. | ||||
|         When last=True, acts like a fast version of self[key]=self.pop(key). | ||||
|  | ||||
|         ''' | ||||
|         link = self.__map[key] | ||||
|         link_prev = link.prev | ||||
|         link_next = link.next | ||||
|         link_prev.next = link_next | ||||
|         link_next.prev = link_prev | ||||
|         root = self.__root | ||||
|         if last: | ||||
|             last = root.prev | ||||
|             link.prev = last | ||||
|             link.next = root | ||||
|             last.next = root.prev = link | ||||
|         else: | ||||
|             first = root.next | ||||
|             link.prev = root | ||||
|             link.next = first | ||||
|             root.next = first.prev = link | ||||
|  | ||||
|     def __sizeof__(self): | ||||
|         sizeof = sys.getsizeof | ||||
|         n = len(self) + 1                       # number of links including root | ||||
|         size = sizeof(self.__dict__)            # instance dictionary | ||||
|         size += sizeof(self.__map) * 2          # internal dict and inherited dict | ||||
|         size += sizeof(self.__hardroot) * n     # link objects | ||||
|         size += sizeof(self.__root) * n         # proxy objects | ||||
|         return size | ||||
|  | ||||
|     update = __update = MutableMapping.update | ||||
|     keys = MutableMapping.keys | ||||
|     values = MutableMapping.values | ||||
|     items = MutableMapping.items | ||||
|     __ne__ = MutableMapping.__ne__ | ||||
|  | ||||
|     __marker = object() | ||||
|  | ||||
|     def pop(self, key, default=__marker): | ||||
|         '''od.pop(k[,d]) -> v, remove specified key and return the corresponding | ||||
|         value.  If key is not found, d is returned if given, otherwise KeyError | ||||
|         is raised. | ||||
|  | ||||
|         ''' | ||||
|         if key in self: | ||||
|             result = self[key] | ||||
|             del self[key] | ||||
|             return result | ||||
|         if default is self.__marker: | ||||
|             raise KeyError(key) | ||||
|         return default | ||||
|  | ||||
|     def setdefault(self, key, default=None): | ||||
|         'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' | ||||
|         if key in self: | ||||
|             return self[key] | ||||
|         self[key] = default | ||||
|         return default | ||||
|  | ||||
|     @recursive_repr() | ||||
|     def __repr__(self): | ||||
|         'od.__repr__() <==> repr(od)' | ||||
|         if not self: | ||||
|             return '%s()' % (self.__class__.__name__,) | ||||
|         return '%s(%r)' % (self.__class__.__name__, list(self.items())) | ||||
|  | ||||
|     def __reduce__(self): | ||||
|         'Return state information for pickling' | ||||
|         inst_dict = vars(self).copy() | ||||
|         for k in vars(OrderedDict()): | ||||
|             inst_dict.pop(k, None) | ||||
|         return self.__class__, (), inst_dict or None, None, iter(self.items()) | ||||
|  | ||||
|     def copy(self): | ||||
|         'od.copy() -> a shallow copy of od' | ||||
|         return self.__class__(self) | ||||
|  | ||||
|     @classmethod | ||||
|     def fromkeys(cls, iterable, value=None): | ||||
|         '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. | ||||
|         If not specified, the value defaults to None. | ||||
|  | ||||
|         ''' | ||||
|         self = cls() | ||||
|         for key in iterable: | ||||
|             self[key] = value | ||||
|         return self | ||||
|  | ||||
|     def __eq__(self, other): | ||||
|         '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive | ||||
|         while comparison to a regular mapping is order-insensitive. | ||||
|  | ||||
|         ''' | ||||
|         if isinstance(other, OrderedDict): | ||||
|             return dict.__eq__(self, other) and all(map(_eq, self, other)) | ||||
|         return dict.__eq__(self, other) | ||||
|  | ||||
|  | ||||
| # {{{ http://code.activestate.com/recipes/576611/ (r11) | ||||
|  | ||||
| try: | ||||
|     from operator import itemgetter | ||||
|     from heapq import nlargest | ||||
| except ImportError: | ||||
|     pass | ||||
|  | ||||
| ######################################################################## | ||||
| ###  Counter | ||||
| ######################################################################## | ||||
|  | ||||
| def _count_elements(mapping, iterable): | ||||
|     'Tally elements from the iterable.' | ||||
|     mapping_get = mapping.get | ||||
|     for elem in iterable: | ||||
|         mapping[elem] = mapping_get(elem, 0) + 1 | ||||
|  | ||||
| class Counter(dict): | ||||
|     '''Dict subclass for counting hashable items.  Sometimes called a bag | ||||
|     or multiset.  Elements are stored as dictionary keys and their counts | ||||
|     are stored as dictionary values. | ||||
|  | ||||
|     >>> c = Counter('abcdeabcdabcaba')  # count elements from a string | ||||
|  | ||||
|     >>> c.most_common(3)                # three most common elements | ||||
|     [('a', 5), ('b', 4), ('c', 3)] | ||||
|     >>> sorted(c)                       # list all unique elements | ||||
|     ['a', 'b', 'c', 'd', 'e'] | ||||
|     >>> ''.join(sorted(c.elements()))   # list elements with repetitions | ||||
|     'aaaaabbbbcccdde' | ||||
|     >>> sum(c.values())                 # total of all counts | ||||
|     15 | ||||
|  | ||||
|     >>> c['a']                          # count of letter 'a' | ||||
|     5 | ||||
|     >>> for elem in 'shazam':           # update counts from an iterable | ||||
|     ...     c[elem] += 1                # by adding 1 to each element's count | ||||
|     >>> c['a']                          # now there are seven 'a' | ||||
|     7 | ||||
|     >>> del c['b']                      # remove all 'b' | ||||
|     >>> c['b']                          # now there are zero 'b' | ||||
|     0 | ||||
|  | ||||
|     >>> d = Counter('simsalabim')       # make another counter | ||||
|     >>> c.update(d)                     # add in the second counter | ||||
|     >>> c['a']                          # now there are nine 'a' | ||||
|     9 | ||||
|  | ||||
|     >>> c.clear()                       # empty the counter | ||||
|     >>> c | ||||
|     Counter() | ||||
|  | ||||
|     Note:  If a count is set to zero or reduced to zero, it will remain | ||||
|     in the counter until the entry is deleted or the counter is cleared: | ||||
|  | ||||
|     >>> c = Counter('aaabbc') | ||||
|     >>> c['b'] -= 2                     # reduce the count of 'b' by two | ||||
|     >>> c.most_common()                 # 'b' is still in, but its count is zero | ||||
|     [('a', 3), ('c', 1), ('b', 0)] | ||||
|  | ||||
|     ''' | ||||
|     # References: | ||||
|     #   http://en.wikipedia.org/wiki/Multiset | ||||
|     #   http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html | ||||
|     #   http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm | ||||
|     #   http://code.activestate.com/recipes/259174/ | ||||
|     #   Knuth, TAOCP Vol. II section 4.6.3 | ||||
|  | ||||
|     def __init__(*args, **kwds): | ||||
|         '''Create a new, empty Counter object.  And if given, count elements | ||||
|         from an input iterable.  Or, initialize the count from another mapping | ||||
|         of elements to their counts. | ||||
|  | ||||
|         >>> c = Counter()                           # a new, empty counter | ||||
|         >>> c = Counter('gallahad')                 # a new counter from an iterable | ||||
|         >>> c = Counter({'a': 4, 'b': 2})           # a new counter from a mapping | ||||
|         >>> c = Counter(a=4, b=2)                   # a new counter from keyword args | ||||
|  | ||||
|         ''' | ||||
|         if not args: | ||||
|             raise TypeError("descriptor '__init__' of 'Counter' object " | ||||
|                             "needs an argument") | ||||
|         self = args[0] | ||||
|         args = args[1:] | ||||
|         if len(args) > 1: | ||||
|             raise TypeError('expected at most 1 arguments, got %d' % len(args)) | ||||
|         super(Counter, self).__init__() | ||||
|         self.update(*args, **kwds) | ||||
|  | ||||
|     def __missing__(self, key): | ||||
|         'The count of elements not in the Counter is zero.' | ||||
|         # Needed so that self[missing_item] does not raise KeyError | ||||
|         return 0 | ||||
|  | ||||
|     def most_common(self, n=None): | ||||
|         '''List the n most common elements and their counts from the most | ||||
|         common to the least.  If n is None, then list all element counts. | ||||
|  | ||||
|         >>> Counter('abcdeabcdabcaba').most_common(3) | ||||
|         [('a', 5), ('b', 4), ('c', 3)] | ||||
|  | ||||
|         ''' | ||||
|         # Emulate Bag.sortedByCount from Smalltalk | ||||
|         if n is None: | ||||
|             return sorted(self.items(), key=_itemgetter(1), reverse=True) | ||||
|         return _heapq.nlargest(n, self.items(), key=_itemgetter(1)) | ||||
|  | ||||
|     def elements(self): | ||||
|         '''Iterator over elements repeating each as many times as its count. | ||||
|  | ||||
|         >>> c = Counter('ABCABC') | ||||
|         >>> sorted(c.elements()) | ||||
|         ['A', 'A', 'B', 'B', 'C', 'C'] | ||||
|  | ||||
|         # Knuth's example for prime factors of 1836:  2**2 * 3**3 * 17**1 | ||||
|         >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) | ||||
|         >>> product = 1 | ||||
|         >>> for factor in prime_factors.elements():     # loop over factors | ||||
|         ...     product *= factor                       # and multiply them | ||||
|         >>> product | ||||
|         1836 | ||||
|  | ||||
|         Note, if an element's count has been set to zero or is a negative | ||||
|         number, elements() will ignore it. | ||||
|  | ||||
|         ''' | ||||
|         # Emulate Bag.do from Smalltalk and Multiset.begin from C++. | ||||
|         return _chain.from_iterable(_starmap(_repeat, self.items())) | ||||
|  | ||||
|     # Override dict methods where necessary | ||||
|  | ||||
|     @classmethod | ||||
|     def fromkeys(cls, iterable, v=None): | ||||
|         # There is no equivalent method for counters because setting v=1 | ||||
|         # means that no element can have a count greater than one. | ||||
|         raise NotImplementedError( | ||||
|             'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.') | ||||
|  | ||||
|     def update(*args, **kwds): | ||||
|         '''Like dict.update() but add counts instead of replacing them. | ||||
|  | ||||
|         Source can be an iterable, a dictionary, or another Counter instance. | ||||
|  | ||||
|         >>> c = Counter('which') | ||||
|         >>> c.update('witch')           # add elements from another iterable | ||||
|         >>> d = Counter('watch') | ||||
|         >>> c.update(d)                 # add elements from another counter | ||||
|         >>> c['h']                      # four 'h' in which, witch, and watch | ||||
|         4 | ||||
|  | ||||
|         ''' | ||||
|         # The regular dict.update() operation makes no sense here because the | ||||
|         # replace behavior results in the some of original untouched counts | ||||
|         # being mixed-in with all of the other counts for a mismash that | ||||
|         # doesn't have a straight-forward interpretation in most counting | ||||
|         # contexts.  Instead, we implement straight-addition.  Both the inputs | ||||
|         # and outputs are allowed to contain zero and negative counts. | ||||
|  | ||||
|         if not args: | ||||
|             raise TypeError("descriptor 'update' of 'Counter' object " | ||||
|                             "needs an argument") | ||||
|         self = args[0] | ||||
|         args = args[1:] | ||||
|         if len(args) > 1: | ||||
|             raise TypeError('expected at most 1 arguments, got %d' % len(args)) | ||||
|         iterable = args[0] if args else None | ||||
|         if iterable is not None: | ||||
|             if isinstance(iterable, Mapping): | ||||
|                 if self: | ||||
|                     self_get = self.get | ||||
|                     for elem, count in iterable.items(): | ||||
|                         self[elem] = count + self_get(elem, 0) | ||||
|                 else: | ||||
|                     super(Counter, self).update(iterable) # fast path when counter is empty | ||||
|             else: | ||||
|                 _count_elements(self, iterable) | ||||
|         if kwds: | ||||
|             self.update(kwds) | ||||
|  | ||||
|     def subtract(*args, **kwds): | ||||
|         '''Like dict.update() but subtracts counts instead of replacing them. | ||||
|         Counts can be reduced below zero.  Both the inputs and outputs are | ||||
|         allowed to contain zero and negative counts. | ||||
|  | ||||
|         Source can be an iterable, a dictionary, or another Counter instance. | ||||
|  | ||||
|         >>> c = Counter('which') | ||||
|         >>> c.subtract('witch')             # subtract elements from another iterable | ||||
|         >>> c.subtract(Counter('watch'))    # subtract elements from another counter | ||||
|         >>> c['h']                          # 2 in which, minus 1 in witch, minus 1 in watch | ||||
|         0 | ||||
|         >>> c['w']                          # 1 in which, minus 1 in witch, minus 1 in watch | ||||
|         -1 | ||||
|  | ||||
|         ''' | ||||
|         if not args: | ||||
|             raise TypeError("descriptor 'subtract' of 'Counter' object " | ||||
|                             "needs an argument") | ||||
|         self = args[0] | ||||
|         args = args[1:] | ||||
|         if len(args) > 1: | ||||
|             raise TypeError('expected at most 1 arguments, got %d' % len(args)) | ||||
|         iterable = args[0] if args else None | ||||
|         if iterable is not None: | ||||
|             self_get = self.get | ||||
|             if isinstance(iterable, Mapping): | ||||
|                 for elem, count in iterable.items(): | ||||
|                     self[elem] = self_get(elem, 0) - count | ||||
|             else: | ||||
|                 for elem in iterable: | ||||
|                     self[elem] = self_get(elem, 0) - 1 | ||||
|         if kwds: | ||||
|             self.subtract(kwds) | ||||
|  | ||||
|     def copy(self): | ||||
|         'Return a shallow copy.' | ||||
|         return self.__class__(self) | ||||
|  | ||||
|     def __reduce__(self): | ||||
|         return self.__class__, (dict(self),) | ||||
|  | ||||
|     def __delitem__(self, elem): | ||||
|         'Like dict.__delitem__() but does not raise KeyError for missing values.' | ||||
|         if elem in self: | ||||
|             super(Counter, self).__delitem__(elem) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         if not self: | ||||
|             return '%s()' % self.__class__.__name__ | ||||
|         try: | ||||
|             items = ', '.join(map('%r: %r'.__mod__, self.most_common())) | ||||
|             return '%s({%s})' % (self.__class__.__name__, items) | ||||
|         except TypeError: | ||||
|             # handle case where values are not orderable | ||||
|             return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) | ||||
|  | ||||
|     # Multiset-style mathematical operations discussed in: | ||||
|     #       Knuth TAOCP Volume II section 4.6.3 exercise 19 | ||||
|     #       and at http://en.wikipedia.org/wiki/Multiset | ||||
|     # | ||||
|     # Outputs guaranteed to only include positive counts. | ||||
|     # | ||||
|     # To strip negative and zero counts, add-in an empty counter: | ||||
|     #       c += Counter() | ||||
|  | ||||
|     def __add__(self, other): | ||||
|         '''Add counts from two counters. | ||||
|  | ||||
|         >>> Counter('abbb') + Counter('bcc') | ||||
|         Counter({'b': 4, 'c': 2, 'a': 1}) | ||||
|  | ||||
|         ''' | ||||
|         if not isinstance(other, Counter): | ||||
|             return NotImplemented | ||||
|         result = Counter() | ||||
|         for elem, count in self.items(): | ||||
|             newcount = count + other[elem] | ||||
|             if newcount > 0: | ||||
|                 result[elem] = newcount | ||||
|         for elem, count in other.items(): | ||||
|             if elem not in self and count > 0: | ||||
|                 result[elem] = count | ||||
|         return result | ||||
|  | ||||
|     def __sub__(self, other): | ||||
|         ''' Subtract count, but keep only results with positive counts. | ||||
|  | ||||
|         >>> Counter('abbbc') - Counter('bccd') | ||||
|         Counter({'b': 2, 'a': 1}) | ||||
|  | ||||
|         ''' | ||||
|         if not isinstance(other, Counter): | ||||
|             return NotImplemented | ||||
|         result = Counter() | ||||
|         for elem, count in self.items(): | ||||
|             newcount = count - other[elem] | ||||
|             if newcount > 0: | ||||
|                 result[elem] = newcount | ||||
|         for elem, count in other.items(): | ||||
|             if elem not in self and count < 0: | ||||
|                 result[elem] = 0 - count | ||||
|         return result | ||||
|  | ||||
|     def __or__(self, other): | ||||
|         '''Union is the maximum of value in either of the input counters. | ||||
|  | ||||
|         >>> Counter('abbb') | Counter('bcc') | ||||
|         Counter({'b': 3, 'c': 2, 'a': 1}) | ||||
|  | ||||
|         ''' | ||||
|         if not isinstance(other, Counter): | ||||
|             return NotImplemented | ||||
|         result = Counter() | ||||
|         for elem, count in self.items(): | ||||
|             other_count = other[elem] | ||||
|             newcount = other_count if count < other_count else count | ||||
|             if newcount > 0: | ||||
|                 result[elem] = newcount | ||||
|         for elem, count in other.items(): | ||||
|             if elem not in self and count > 0: | ||||
|                 result[elem] = count | ||||
|         return result | ||||
|  | ||||
|     def __and__(self, other): | ||||
|         ''' Intersection is the minimum of corresponding counts. | ||||
|  | ||||
|         >>> Counter('abbb') & Counter('bcc') | ||||
|         Counter({'b': 1}) | ||||
|  | ||||
|         ''' | ||||
|         if not isinstance(other, Counter): | ||||
|             return NotImplemented | ||||
|         result = Counter() | ||||
|         for elem, count in self.items(): | ||||
|             other_count = other[elem] | ||||
|             newcount = count if count < other_count else other_count | ||||
|             if newcount > 0: | ||||
|                 result[elem] = newcount | ||||
|         return result | ||||
|  | ||||
|     def __pos__(self): | ||||
|         'Adds an empty counter, effectively stripping negative and zero counts' | ||||
|         return self + Counter() | ||||
|  | ||||
|     def __neg__(self): | ||||
|         '''Subtracts from an empty counter.  Strips positive and zero counts, | ||||
|         and flips the sign on negative counts. | ||||
|  | ||||
|         ''' | ||||
|         return Counter() - self | ||||
|  | ||||
|     def _keep_positive(self): | ||||
|         '''Internal method to strip elements with a negative or zero count''' | ||||
|         nonpositive = [elem for elem, count in self.items() if not count > 0] | ||||
|         for elem in nonpositive: | ||||
|             del self[elem] | ||||
|         return self | ||||
|  | ||||
|     def __iadd__(self, other): | ||||
|         '''Inplace add from another counter, keeping only positive counts. | ||||
|  | ||||
|         >>> c = Counter('abbb') | ||||
|         >>> c += Counter('bcc') | ||||
|         >>> c | ||||
|         Counter({'b': 4, 'c': 2, 'a': 1}) | ||||
|  | ||||
|         ''' | ||||
|         for elem, count in other.items(): | ||||
|             self[elem] += count | ||||
|         return self._keep_positive() | ||||
|  | ||||
|     def __isub__(self, other): | ||||
|         '''Inplace subtract counter, but keep only results with positive counts. | ||||
|  | ||||
|         >>> c = Counter('abbbc') | ||||
|         >>> c -= Counter('bccd') | ||||
|         >>> c | ||||
|         Counter({'b': 2, 'a': 1}) | ||||
|  | ||||
|         ''' | ||||
|         for elem, count in other.items(): | ||||
|             self[elem] -= count | ||||
|         return self._keep_positive() | ||||
|  | ||||
|     def __ior__(self, other): | ||||
|         '''Inplace union is the maximum of value from either counter. | ||||
|  | ||||
|         >>> c = Counter('abbb') | ||||
|         >>> c |= Counter('bcc') | ||||
|         >>> c | ||||
|         Counter({'b': 3, 'c': 2, 'a': 1}) | ||||
|  | ||||
|         ''' | ||||
|         for elem, other_count in other.items(): | ||||
|             count = self[elem] | ||||
|             if other_count > count: | ||||
|                 self[elem] = other_count | ||||
|         return self._keep_positive() | ||||
|  | ||||
|     def __iand__(self, other): | ||||
|         '''Inplace intersection is the minimum of corresponding counts. | ||||
|  | ||||
|         >>> c = Counter('abbb') | ||||
|         >>> c &= Counter('bcc') | ||||
|         >>> c | ||||
|         Counter({'b': 1}) | ||||
|  | ||||
|         ''' | ||||
|         for elem, count in self.items(): | ||||
|             other_count = other[elem] | ||||
|             if other_count < count: | ||||
|                 self[elem] = other_count | ||||
|         return self._keep_positive() | ||||
|  | ||||
|  | ||||
| def check_output(*popenargs, **kwargs): | ||||
|     """ | ||||
|     For Python 2.6 compatibility: see | ||||
|     http://stackoverflow.com/questions/4814970/ | ||||
|     """ | ||||
|  | ||||
|     if 'stdout' in kwargs: | ||||
|         raise ValueError('stdout argument not allowed, it will be overridden.') | ||||
|     process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) | ||||
|     output, unused_err = process.communicate() | ||||
|     retcode = process.poll() | ||||
|     if retcode: | ||||
|         cmd = kwargs.get("args") | ||||
|         if cmd is None: | ||||
|             cmd = popenargs[0] | ||||
|         raise subprocess.CalledProcessError(retcode, cmd) | ||||
|     return output | ||||
|  | ||||
|  | ||||
| def count(start=0, step=1): | ||||
|     """ | ||||
|     ``itertools.count`` in Py 2.6 doesn't accept a step | ||||
|     parameter. This is an enhanced version of ``itertools.count`` | ||||
|     for Py2.6 equivalent to ``itertools.count`` in Python 2.7+. | ||||
|     """ | ||||
|     while True: | ||||
|         yield start | ||||
|         start += step | ||||
|  | ||||
|  | ||||
| ######################################################################## | ||||
| ###  ChainMap (helper for configparser and string.Template) | ||||
| ###  From the Py3.4 source code. See also: | ||||
| ###    https://github.com/kkxue/Py2ChainMap/blob/master/py2chainmap.py | ||||
| ######################################################################## | ||||
|  | ||||
| class ChainMap(MutableMapping): | ||||
|     ''' A ChainMap groups multiple dicts (or other mappings) together | ||||
|     to create a single, updateable view. | ||||
|  | ||||
|     The underlying mappings are stored in a list.  That list is public and can | ||||
|     accessed or updated using the *maps* attribute.  There is no other state. | ||||
|  | ||||
|     Lookups search the underlying mappings successively until a key is found. | ||||
|     In contrast, writes, updates, and deletions only operate on the first | ||||
|     mapping. | ||||
|  | ||||
|     ''' | ||||
|  | ||||
|     def __init__(self, *maps): | ||||
|         '''Initialize a ChainMap by setting *maps* to the given mappings. | ||||
|         If no mappings are provided, a single empty dictionary is used. | ||||
|  | ||||
|         ''' | ||||
|         self.maps = list(maps) or [{}]          # always at least one map | ||||
|  | ||||
|     def __missing__(self, key): | ||||
|         raise KeyError(key) | ||||
|  | ||||
|     def __getitem__(self, key): | ||||
|         for mapping in self.maps: | ||||
|             try: | ||||
|                 return mapping[key]             # can't use 'key in mapping' with defaultdict | ||||
|             except KeyError: | ||||
|                 pass | ||||
|         return self.__missing__(key)            # support subclasses that define __missing__ | ||||
|  | ||||
|     def get(self, key, default=None): | ||||
|         return self[key] if key in self else default | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(set().union(*self.maps))     # reuses stored hash values if possible | ||||
|  | ||||
|     def __iter__(self): | ||||
|         return iter(set().union(*self.maps)) | ||||
|  | ||||
|     def __contains__(self, key): | ||||
|         return any(key in m for m in self.maps) | ||||
|  | ||||
|     def __bool__(self): | ||||
|         return any(self.maps) | ||||
|  | ||||
|     # Py2 compatibility: | ||||
|     __nonzero__ = __bool__ | ||||
|  | ||||
|     @recursive_repr() | ||||
|     def __repr__(self): | ||||
|         return '{0.__class__.__name__}({1})'.format( | ||||
|             self, ', '.join(map(repr, self.maps))) | ||||
|  | ||||
|     @classmethod | ||||
|     def fromkeys(cls, iterable, *args): | ||||
|         'Create a ChainMap with a single dict created from the iterable.' | ||||
|         return cls(dict.fromkeys(iterable, *args)) | ||||
|  | ||||
|     def copy(self): | ||||
|         'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' | ||||
|         return self.__class__(self.maps[0].copy(), *self.maps[1:]) | ||||
|  | ||||
|     __copy__ = copy | ||||
|  | ||||
|     def new_child(self, m=None):                # like Django's Context.push() | ||||
|         ''' | ||||
|         New ChainMap with a new map followed by all previous maps. If no | ||||
|         map is provided, an empty dict is used. | ||||
|         ''' | ||||
|         if m is None: | ||||
|             m = {} | ||||
|         return self.__class__(m, *self.maps) | ||||
|  | ||||
|     @property | ||||
|     def parents(self):                          # like Django's Context.pop() | ||||
|         'New ChainMap from maps[1:].' | ||||
|         return self.__class__(*self.maps[1:]) | ||||
|  | ||||
|     def __setitem__(self, key, value): | ||||
|         self.maps[0][key] = value | ||||
|  | ||||
|     def __delitem__(self, key): | ||||
|         try: | ||||
|             del self.maps[0][key] | ||||
|         except KeyError: | ||||
|             raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) | ||||
|  | ||||
|     def popitem(self): | ||||
|         'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' | ||||
|         try: | ||||
|             return self.maps[0].popitem() | ||||
|         except KeyError: | ||||
|             raise KeyError('No keys found in the first mapping.') | ||||
|  | ||||
|     def pop(self, key, *args): | ||||
|         'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' | ||||
|         try: | ||||
|             return self.maps[0].pop(key, *args) | ||||
|         except KeyError: | ||||
|             raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) | ||||
|  | ||||
|     def clear(self): | ||||
|         'Clear maps[0], leaving maps[1:] intact.' | ||||
|         self.maps[0].clear() | ||||
|  | ||||
|  | ||||
| # Re-use the same sentinel as in the Python stdlib socket module: | ||||
| from socket import _GLOBAL_DEFAULT_TIMEOUT | ||||
| # Was: _GLOBAL_DEFAULT_TIMEOUT = object() | ||||
|  | ||||
|  | ||||
| def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, | ||||
|                       source_address=None): | ||||
|     """Backport of 3-argument create_connection() for Py2.6. | ||||
|  | ||||
|     Connect to *address* and return the socket object. | ||||
|  | ||||
|     Convenience function.  Connect to *address* (a 2-tuple ``(host, | ||||
|     port)``) and return the socket object.  Passing the optional | ||||
|     *timeout* parameter will set the timeout on the socket instance | ||||
|     before attempting to connect.  If no *timeout* is supplied, the | ||||
|     global default timeout setting returned by :func:`getdefaulttimeout` | ||||
|     is used.  If *source_address* is set it must be a tuple of (host, port) | ||||
|     for the socket to bind as a source address before making the connection. | ||||
|     An host of '' or port 0 tells the OS to use the default. | ||||
|     """ | ||||
|  | ||||
|     host, port = address | ||||
|     err = None | ||||
|     for res in getaddrinfo(host, port, 0, SOCK_STREAM): | ||||
|         af, socktype, proto, canonname, sa = res | ||||
|         sock = None | ||||
|         try: | ||||
|             sock = socket(af, socktype, proto) | ||||
|             if timeout is not _GLOBAL_DEFAULT_TIMEOUT: | ||||
|                 sock.settimeout(timeout) | ||||
|             if source_address: | ||||
|                 sock.bind(source_address) | ||||
|             sock.connect(sa) | ||||
|             return sock | ||||
|  | ||||
|         except error as _: | ||||
|             err = _ | ||||
|             if sock is not None: | ||||
|                 sock.close() | ||||
|  | ||||
|     if err is not None: | ||||
|         raise err | ||||
|     else: | ||||
|         raise error("getaddrinfo returns an empty list") | ||||
|  | ||||
| # Backport from Py2.7 for Py2.6: | ||||
| def cmp_to_key(mycmp): | ||||
|     """Convert a cmp= function into a key= function""" | ||||
|     class K(object): | ||||
|         __slots__ = ['obj'] | ||||
|         def __init__(self, obj, *args): | ||||
|             self.obj = obj | ||||
|         def __lt__(self, other): | ||||
|             return mycmp(self.obj, other.obj) < 0 | ||||
|         def __gt__(self, other): | ||||
|             return mycmp(self.obj, other.obj) > 0 | ||||
|         def __eq__(self, other): | ||||
|             return mycmp(self.obj, other.obj) == 0 | ||||
|         def __le__(self, other): | ||||
|             return mycmp(self.obj, other.obj) <= 0 | ||||
|         def __ge__(self, other): | ||||
|             return mycmp(self.obj, other.obj) >= 0 | ||||
|         def __ne__(self, other): | ||||
|             return mycmp(self.obj, other.obj) != 0 | ||||
|         def __hash__(self): | ||||
|             raise TypeError('hash not implemented') | ||||
|     return K | ||||
|  | ||||
| # Back up our definitions above in case they're useful | ||||
| _OrderedDict = OrderedDict | ||||
| _Counter = Counter | ||||
| _check_output = check_output | ||||
| _count = count | ||||
| _ceil = ceil | ||||
| __count_elements = _count_elements | ||||
| _recursive_repr = recursive_repr | ||||
| _ChainMap = ChainMap | ||||
| _create_connection = create_connection | ||||
| _cmp_to_key = cmp_to_key | ||||
|  | ||||
| # Overwrite the definitions above with the usual ones | ||||
| # from the standard library: | ||||
| if sys.version_info >= (2, 7): | ||||
|     from collections import OrderedDict, Counter | ||||
|     from itertools import count | ||||
|     from functools import cmp_to_key | ||||
|     try: | ||||
|         from subprocess import check_output | ||||
|     except ImportError: | ||||
|         # Not available. This happens with Google App Engine: see issue #231 | ||||
|         pass | ||||
|     from socket import create_connection | ||||
|  | ||||
| if sys.version_info >= (3, 0): | ||||
|     from math import ceil | ||||
|     from collections import _count_elements | ||||
|  | ||||
| if sys.version_info >= (3, 3): | ||||
|     from reprlib import recursive_repr | ||||
|     from collections import ChainMap | ||||
							
								
								
									
										454
									
								
								venv/lib/python3.12/site-packages/future/backports/socket.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										454
									
								
								venv/lib/python3.12/site-packages/future/backports/socket.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,454 @@ | ||||
| # Wrapper module for _socket, providing some additional facilities | ||||
| # implemented in Python. | ||||
|  | ||||
| """\ | ||||
| This module provides socket operations and some related functions. | ||||
| On Unix, it supports IP (Internet Protocol) and Unix domain sockets. | ||||
| On other systems, it only supports IP. Functions specific for a | ||||
| socket are available as methods of the socket object. | ||||
|  | ||||
| Functions: | ||||
|  | ||||
| socket() -- create a new socket object | ||||
| socketpair() -- create a pair of new socket objects [*] | ||||
| fromfd() -- create a socket object from an open file descriptor [*] | ||||
| fromshare() -- create a socket object from data received from socket.share() [*] | ||||
| gethostname() -- return the current hostname | ||||
| gethostbyname() -- map a hostname to its IP number | ||||
| gethostbyaddr() -- map an IP number or hostname to DNS info | ||||
| getservbyname() -- map a service name and a protocol name to a port number | ||||
| getprotobyname() -- map a protocol name (e.g. 'tcp') to a number | ||||
| ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order | ||||
| htons(), htonl() -- convert 16, 32 bit int from host to network byte order | ||||
| inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format | ||||
| inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) | ||||
| socket.getdefaulttimeout() -- get the default timeout value | ||||
| socket.setdefaulttimeout() -- set the default timeout value | ||||
| create_connection() -- connects to an address, with an optional timeout and | ||||
|                        optional source address. | ||||
|  | ||||
|  [*] not available on all platforms! | ||||
|  | ||||
| Special objects: | ||||
|  | ||||
| SocketType -- type object for socket objects | ||||
| error -- exception raised for I/O errors | ||||
| has_ipv6 -- boolean value indicating if IPv6 is supported | ||||
|  | ||||
| Integer constants: | ||||
|  | ||||
| AF_INET, AF_UNIX -- socket domains (first argument to socket() call) | ||||
| SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) | ||||
|  | ||||
| Many other constants may be defined; these may be used in calls to | ||||
| the setsockopt() and getsockopt() methods. | ||||
| """ | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import print_function | ||||
| from __future__ import division | ||||
| from __future__ import absolute_import | ||||
| from future.builtins import super | ||||
|  | ||||
| import _socket | ||||
| from _socket import * | ||||
|  | ||||
| import os, sys, io | ||||
|  | ||||
| try: | ||||
|     import errno | ||||
| except ImportError: | ||||
|     errno = None | ||||
| EBADF = getattr(errno, 'EBADF', 9) | ||||
| EAGAIN = getattr(errno, 'EAGAIN', 11) | ||||
| EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11) | ||||
|  | ||||
| __all__ = ["getfqdn", "create_connection"] | ||||
| __all__.extend(os._get_exports_list(_socket)) | ||||
|  | ||||
|  | ||||
| _realsocket = socket | ||||
|  | ||||
| # WSA error codes | ||||
| if sys.platform.lower().startswith("win"): | ||||
|     errorTab = {} | ||||
|     errorTab[10004] = "The operation was interrupted." | ||||
|     errorTab[10009] = "A bad file handle was passed." | ||||
|     errorTab[10013] = "Permission denied." | ||||
|     errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT | ||||
|     errorTab[10022] = "An invalid operation was attempted." | ||||
|     errorTab[10035] = "The socket operation would block" | ||||
|     errorTab[10036] = "A blocking operation is already in progress." | ||||
|     errorTab[10048] = "The network address is in use." | ||||
|     errorTab[10054] = "The connection has been reset." | ||||
|     errorTab[10058] = "The network has been shut down." | ||||
|     errorTab[10060] = "The operation timed out." | ||||
|     errorTab[10061] = "Connection refused." | ||||
|     errorTab[10063] = "The name is too long." | ||||
|     errorTab[10064] = "The host is down." | ||||
|     errorTab[10065] = "The host is unreachable." | ||||
|     __all__.append("errorTab") | ||||
|  | ||||
|  | ||||
| class socket(_socket.socket): | ||||
|  | ||||
|     """A subclass of _socket.socket adding the makefile() method.""" | ||||
|  | ||||
|     __slots__ = ["__weakref__", "_io_refs", "_closed"] | ||||
|  | ||||
|     def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None): | ||||
|         if fileno is None: | ||||
|             _socket.socket.__init__(self, family, type, proto) | ||||
|         else: | ||||
|             _socket.socket.__init__(self, family, type, proto, fileno) | ||||
|         self._io_refs = 0 | ||||
|         self._closed = False | ||||
|  | ||||
|     def __enter__(self): | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, *args): | ||||
|         if not self._closed: | ||||
|             self.close() | ||||
|  | ||||
|     def __repr__(self): | ||||
|         """Wrap __repr__() to reveal the real class name.""" | ||||
|         s = _socket.socket.__repr__(self) | ||||
|         if s.startswith("<socket object"): | ||||
|             s = "<%s.%s%s%s" % (self.__class__.__module__, | ||||
|                                 self.__class__.__name__, | ||||
|                                 getattr(self, '_closed', False) and " [closed] " or "", | ||||
|                                 s[7:]) | ||||
|         return s | ||||
|  | ||||
|     def __getstate__(self): | ||||
|         raise TypeError("Cannot serialize socket object") | ||||
|  | ||||
|     def dup(self): | ||||
|         """dup() -> socket object | ||||
|  | ||||
|         Return a new socket object connected to the same system resource. | ||||
|         """ | ||||
|         fd = dup(self.fileno()) | ||||
|         sock = self.__class__(self.family, self.type, self.proto, fileno=fd) | ||||
|         sock.settimeout(self.gettimeout()) | ||||
|         return sock | ||||
|  | ||||
|     def accept(self): | ||||
|         """accept() -> (socket object, address info) | ||||
|  | ||||
|         Wait for an incoming connection.  Return a new socket | ||||
|         representing the connection, and the address of the client. | ||||
|         For IP sockets, the address info is a pair (hostaddr, port). | ||||
|         """ | ||||
|         fd, addr = self._accept() | ||||
|         sock = socket(self.family, self.type, self.proto, fileno=fd) | ||||
|         # Issue #7995: if no default timeout is set and the listening | ||||
|         # socket had a (non-zero) timeout, force the new socket in blocking | ||||
|         # mode to override platform-specific socket flags inheritance. | ||||
|         if getdefaulttimeout() is None and self.gettimeout(): | ||||
|             sock.setblocking(True) | ||||
|         return sock, addr | ||||
|  | ||||
|     def makefile(self, mode="r", buffering=None, **_3to2kwargs): | ||||
|         """makefile(...) -> an I/O stream connected to the socket | ||||
|  | ||||
|         The arguments are as for io.open() after the filename, | ||||
|         except the only mode characters supported are 'r', 'w' and 'b'. | ||||
|         The semantics are similar too.  (XXX refactor to share code?) | ||||
|         """ | ||||
|         if 'newline' in _3to2kwargs: newline = _3to2kwargs['newline']; del _3to2kwargs['newline'] | ||||
|         else: newline = None | ||||
|         if 'errors' in _3to2kwargs: errors = _3to2kwargs['errors']; del _3to2kwargs['errors'] | ||||
|         else: errors = None | ||||
|         if 'encoding' in _3to2kwargs: encoding = _3to2kwargs['encoding']; del _3to2kwargs['encoding'] | ||||
|         else: encoding = None | ||||
|         for c in mode: | ||||
|             if c not in ("r", "w", "b"): | ||||
|                 raise ValueError("invalid mode %r (only r, w, b allowed)") | ||||
|         writing = "w" in mode | ||||
|         reading = "r" in mode or not writing | ||||
|         assert reading or writing | ||||
|         binary = "b" in mode | ||||
|         rawmode = "" | ||||
|         if reading: | ||||
|             rawmode += "r" | ||||
|         if writing: | ||||
|             rawmode += "w" | ||||
|         raw = SocketIO(self, rawmode) | ||||
|         self._io_refs += 1 | ||||
|         if buffering is None: | ||||
|             buffering = -1 | ||||
|         if buffering < 0: | ||||
|             buffering = io.DEFAULT_BUFFER_SIZE | ||||
|         if buffering == 0: | ||||
|             if not binary: | ||||
|                 raise ValueError("unbuffered streams must be binary") | ||||
|             return raw | ||||
|         if reading and writing: | ||||
|             buffer = io.BufferedRWPair(raw, raw, buffering) | ||||
|         elif reading: | ||||
|             buffer = io.BufferedReader(raw, buffering) | ||||
|         else: | ||||
|             assert writing | ||||
|             buffer = io.BufferedWriter(raw, buffering) | ||||
|         if binary: | ||||
|             return buffer | ||||
|         text = io.TextIOWrapper(buffer, encoding, errors, newline) | ||||
|         text.mode = mode | ||||
|         return text | ||||
|  | ||||
|     def _decref_socketios(self): | ||||
|         if self._io_refs > 0: | ||||
|             self._io_refs -= 1 | ||||
|         if self._closed: | ||||
|             self.close() | ||||
|  | ||||
|     def _real_close(self, _ss=_socket.socket): | ||||
|         # This function should not reference any globals. See issue #808164. | ||||
|         _ss.close(self) | ||||
|  | ||||
|     def close(self): | ||||
|         # This function should not reference any globals. See issue #808164. | ||||
|         self._closed = True | ||||
|         if self._io_refs <= 0: | ||||
|             self._real_close() | ||||
|  | ||||
|     def detach(self): | ||||
|         """detach() -> file descriptor | ||||
|  | ||||
|         Close the socket object without closing the underlying file descriptor. | ||||
|         The object cannot be used after this call, but the file descriptor | ||||
|         can be reused for other purposes.  The file descriptor is returned. | ||||
|         """ | ||||
|         self._closed = True | ||||
|         return super().detach() | ||||
|  | ||||
| def fromfd(fd, family, type, proto=0): | ||||
|     """ fromfd(fd, family, type[, proto]) -> socket object | ||||
|  | ||||
|     Create a socket object from a duplicate of the given file | ||||
|     descriptor.  The remaining arguments are the same as for socket(). | ||||
|     """ | ||||
|     nfd = dup(fd) | ||||
|     return socket(family, type, proto, nfd) | ||||
|  | ||||
| if hasattr(_socket.socket, "share"): | ||||
|     def fromshare(info): | ||||
|         """ fromshare(info) -> socket object | ||||
|  | ||||
|         Create a socket object from a the bytes object returned by | ||||
|         socket.share(pid). | ||||
|         """ | ||||
|         return socket(0, 0, 0, info) | ||||
|  | ||||
| if hasattr(_socket, "socketpair"): | ||||
|  | ||||
|     def socketpair(family=None, type=SOCK_STREAM, proto=0): | ||||
|         """socketpair([family[, type[, proto]]]) -> (socket object, socket object) | ||||
|  | ||||
|         Create a pair of socket objects from the sockets returned by the platform | ||||
|         socketpair() function. | ||||
|         The arguments are the same as for socket() except the default family is | ||||
|         AF_UNIX if defined on the platform; otherwise, the default is AF_INET. | ||||
|         """ | ||||
|         if family is None: | ||||
|             try: | ||||
|                 family = AF_UNIX | ||||
|             except NameError: | ||||
|                 family = AF_INET | ||||
|         a, b = _socket.socketpair(family, type, proto) | ||||
|         a = socket(family, type, proto, a.detach()) | ||||
|         b = socket(family, type, proto, b.detach()) | ||||
|         return a, b | ||||
|  | ||||
|  | ||||
| _blocking_errnos = set([EAGAIN, EWOULDBLOCK]) | ||||
|  | ||||
| class SocketIO(io.RawIOBase): | ||||
|  | ||||
|     """Raw I/O implementation for stream sockets. | ||||
|  | ||||
|     This class supports the makefile() method on sockets.  It provides | ||||
|     the raw I/O interface on top of a socket object. | ||||
|     """ | ||||
|  | ||||
|     # One might wonder why not let FileIO do the job instead.  There are two | ||||
|     # main reasons why FileIO is not adapted: | ||||
|     # - it wouldn't work under Windows (where you can't used read() and | ||||
|     #   write() on a socket handle) | ||||
|     # - it wouldn't work with socket timeouts (FileIO would ignore the | ||||
|     #   timeout and consider the socket non-blocking) | ||||
|  | ||||
|     # XXX More docs | ||||
|  | ||||
|     def __init__(self, sock, mode): | ||||
|         if mode not in ("r", "w", "rw", "rb", "wb", "rwb"): | ||||
|             raise ValueError("invalid mode: %r" % mode) | ||||
|         io.RawIOBase.__init__(self) | ||||
|         self._sock = sock | ||||
|         if "b" not in mode: | ||||
|             mode += "b" | ||||
|         self._mode = mode | ||||
|         self._reading = "r" in mode | ||||
|         self._writing = "w" in mode | ||||
|         self._timeout_occurred = False | ||||
|  | ||||
|     def readinto(self, b): | ||||
|         """Read up to len(b) bytes into the writable buffer *b* and return | ||||
|         the number of bytes read.  If the socket is non-blocking and no bytes | ||||
|         are available, None is returned. | ||||
|  | ||||
|         If *b* is non-empty, a 0 return value indicates that the connection | ||||
|         was shutdown at the other end. | ||||
|         """ | ||||
|         self._checkClosed() | ||||
|         self._checkReadable() | ||||
|         if self._timeout_occurred: | ||||
|             raise IOError("cannot read from timed out object") | ||||
|         while True: | ||||
|             try: | ||||
|                 return self._sock.recv_into(b) | ||||
|             except timeout: | ||||
|                 self._timeout_occurred = True | ||||
|                 raise | ||||
|             # except InterruptedError: | ||||
|             #     continue | ||||
|             except error as e: | ||||
|                 if e.args[0] in _blocking_errnos: | ||||
|                     return None | ||||
|                 raise | ||||
|  | ||||
|     def write(self, b): | ||||
|         """Write the given bytes or bytearray object *b* to the socket | ||||
|         and return the number of bytes written.  This can be less than | ||||
|         len(b) if not all data could be written.  If the socket is | ||||
|         non-blocking and no bytes could be written None is returned. | ||||
|         """ | ||||
|         self._checkClosed() | ||||
|         self._checkWritable() | ||||
|         try: | ||||
|             return self._sock.send(b) | ||||
|         except error as e: | ||||
|             # XXX what about EINTR? | ||||
|             if e.args[0] in _blocking_errnos: | ||||
|                 return None | ||||
|             raise | ||||
|  | ||||
|     def readable(self): | ||||
|         """True if the SocketIO is open for reading. | ||||
|         """ | ||||
|         if self.closed: | ||||
|             raise ValueError("I/O operation on closed socket.") | ||||
|         return self._reading | ||||
|  | ||||
|     def writable(self): | ||||
|         """True if the SocketIO is open for writing. | ||||
|         """ | ||||
|         if self.closed: | ||||
|             raise ValueError("I/O operation on closed socket.") | ||||
|         return self._writing | ||||
|  | ||||
|     def seekable(self): | ||||
|         """True if the SocketIO is open for seeking. | ||||
|         """ | ||||
|         if self.closed: | ||||
|             raise ValueError("I/O operation on closed socket.") | ||||
|         return super().seekable() | ||||
|  | ||||
|     def fileno(self): | ||||
|         """Return the file descriptor of the underlying socket. | ||||
|         """ | ||||
|         self._checkClosed() | ||||
|         return self._sock.fileno() | ||||
|  | ||||
|     @property | ||||
|     def name(self): | ||||
|         if not self.closed: | ||||
|             return self.fileno() | ||||
|         else: | ||||
|             return -1 | ||||
|  | ||||
|     @property | ||||
|     def mode(self): | ||||
|         return self._mode | ||||
|  | ||||
|     def close(self): | ||||
|         """Close the SocketIO object.  This doesn't close the underlying | ||||
|         socket, except if all references to it have disappeared. | ||||
|         """ | ||||
|         if self.closed: | ||||
|             return | ||||
|         io.RawIOBase.close(self) | ||||
|         self._sock._decref_socketios() | ||||
|         self._sock = None | ||||
|  | ||||
|  | ||||
| def getfqdn(name=''): | ||||
|     """Get fully qualified domain name from name. | ||||
|  | ||||
|     An empty argument is interpreted as meaning the local host. | ||||
|  | ||||
|     First the hostname returned by gethostbyaddr() is checked, then | ||||
|     possibly existing aliases. In case no FQDN is available, hostname | ||||
|     from gethostname() is returned. | ||||
|     """ | ||||
|     name = name.strip() | ||||
|     if not name or name == '0.0.0.0': | ||||
|         name = gethostname() | ||||
|     try: | ||||
|         hostname, aliases, ipaddrs = gethostbyaddr(name) | ||||
|     except error: | ||||
|         pass | ||||
|     else: | ||||
|         aliases.insert(0, hostname) | ||||
|         for name in aliases: | ||||
|             if '.' in name: | ||||
|                 break | ||||
|         else: | ||||
|             name = hostname | ||||
|     return name | ||||
|  | ||||
|  | ||||
| # Re-use the same sentinel as in the Python stdlib socket module: | ||||
| from socket import _GLOBAL_DEFAULT_TIMEOUT | ||||
| # Was: _GLOBAL_DEFAULT_TIMEOUT = object() | ||||
|  | ||||
|  | ||||
| def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, | ||||
|                       source_address=None): | ||||
|     """Connect to *address* and return the socket object. | ||||
|  | ||||
|     Convenience function.  Connect to *address* (a 2-tuple ``(host, | ||||
|     port)``) and return the socket object.  Passing the optional | ||||
|     *timeout* parameter will set the timeout on the socket instance | ||||
|     before attempting to connect.  If no *timeout* is supplied, the | ||||
|     global default timeout setting returned by :func:`getdefaulttimeout` | ||||
|     is used.  If *source_address* is set it must be a tuple of (host, port) | ||||
|     for the socket to bind as a source address before making the connection. | ||||
|     An host of '' or port 0 tells the OS to use the default. | ||||
|     """ | ||||
|  | ||||
|     host, port = address | ||||
|     err = None | ||||
|     for res in getaddrinfo(host, port, 0, SOCK_STREAM): | ||||
|         af, socktype, proto, canonname, sa = res | ||||
|         sock = None | ||||
|         try: | ||||
|             sock = socket(af, socktype, proto) | ||||
|             if timeout is not _GLOBAL_DEFAULT_TIMEOUT: | ||||
|                 sock.settimeout(timeout) | ||||
|             if source_address: | ||||
|                 sock.bind(source_address) | ||||
|             sock.connect(sa) | ||||
|             return sock | ||||
|  | ||||
|         except error as _: | ||||
|             err = _ | ||||
|             if sock is not None: | ||||
|                 sock.close() | ||||
|  | ||||
|     if err is not None: | ||||
|         raise err | ||||
|     else: | ||||
|         raise error("getaddrinfo returns an empty list") | ||||
| @ -0,0 +1,747 @@ | ||||
| """Generic socket server classes. | ||||
|  | ||||
| This module tries to capture the various aspects of defining a server: | ||||
|  | ||||
| For socket-based servers: | ||||
|  | ||||
| - address family: | ||||
|         - AF_INET{,6}: IP (Internet Protocol) sockets (default) | ||||
|         - AF_UNIX: Unix domain sockets | ||||
|         - others, e.g. AF_DECNET are conceivable (see <socket.h> | ||||
| - socket type: | ||||
|         - SOCK_STREAM (reliable stream, e.g. TCP) | ||||
|         - SOCK_DGRAM (datagrams, e.g. UDP) | ||||
|  | ||||
| For request-based servers (including socket-based): | ||||
|  | ||||
| - client address verification before further looking at the request | ||||
|         (This is actually a hook for any processing that needs to look | ||||
|          at the request before anything else, e.g. logging) | ||||
| - how to handle multiple requests: | ||||
|         - synchronous (one request is handled at a time) | ||||
|         - forking (each request is handled by a new process) | ||||
|         - threading (each request is handled by a new thread) | ||||
|  | ||||
| The classes in this module favor the server type that is simplest to | ||||
| write: a synchronous TCP/IP server.  This is bad class design, but | ||||
| save some typing.  (There's also the issue that a deep class hierarchy | ||||
| slows down method lookups.) | ||||
|  | ||||
| There are five classes in an inheritance diagram, four of which represent | ||||
| synchronous servers of four types: | ||||
|  | ||||
|         +------------+ | ||||
|         | BaseServer | | ||||
|         +------------+ | ||||
|               | | ||||
|               v | ||||
|         +-----------+        +------------------+ | ||||
|         | TCPServer |------->| UnixStreamServer | | ||||
|         +-----------+        +------------------+ | ||||
|               | | ||||
|               v | ||||
|         +-----------+        +--------------------+ | ||||
|         | UDPServer |------->| UnixDatagramServer | | ||||
|         +-----------+        +--------------------+ | ||||
|  | ||||
| Note that UnixDatagramServer derives from UDPServer, not from | ||||
| UnixStreamServer -- the only difference between an IP and a Unix | ||||
| stream server is the address family, which is simply repeated in both | ||||
| unix server classes. | ||||
|  | ||||
| Forking and threading versions of each type of server can be created | ||||
| using the ForkingMixIn and ThreadingMixIn mix-in classes.  For | ||||
| instance, a threading UDP server class is created as follows: | ||||
|  | ||||
|         class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass | ||||
|  | ||||
| The Mix-in class must come first, since it overrides a method defined | ||||
| in UDPServer! Setting the various member variables also changes | ||||
| the behavior of the underlying server mechanism. | ||||
|  | ||||
| To implement a service, you must derive a class from | ||||
| BaseRequestHandler and redefine its handle() method.  You can then run | ||||
| various versions of the service by combining one of the server classes | ||||
| with your request handler class. | ||||
|  | ||||
| The request handler class must be different for datagram or stream | ||||
| services.  This can be hidden by using the request handler | ||||
| subclasses StreamRequestHandler or DatagramRequestHandler. | ||||
|  | ||||
| Of course, you still have to use your head! | ||||
|  | ||||
| For instance, it makes no sense to use a forking server if the service | ||||
| contains state in memory that can be modified by requests (since the | ||||
| modifications in the child process would never reach the initial state | ||||
| kept in the parent process and passed to each child).  In this case, | ||||
| you can use a threading server, but you will probably have to use | ||||
| locks to avoid two requests that come in nearly simultaneous to apply | ||||
| conflicting changes to the server state. | ||||
|  | ||||
| On the other hand, if you are building e.g. an HTTP server, where all | ||||
| data is stored externally (e.g. in the file system), a synchronous | ||||
| class will essentially render the service "deaf" while one request is | ||||
| being handled -- which may be for a very long time if a client is slow | ||||
| to read all the data it has requested.  Here a threading or forking | ||||
| server is appropriate. | ||||
|  | ||||
| In some cases, it may be appropriate to process part of a request | ||||
| synchronously, but to finish processing in a forked child depending on | ||||
| the request data.  This can be implemented by using a synchronous | ||||
| server and doing an explicit fork in the request handler class | ||||
| handle() method. | ||||
|  | ||||
| Another approach to handling multiple simultaneous requests in an | ||||
| environment that supports neither threads nor fork (or where these are | ||||
| too expensive or inappropriate for the service) is to maintain an | ||||
| explicit table of partially finished requests and to use select() to | ||||
| decide which request to work on next (or whether to handle a new | ||||
| incoming request).  This is particularly important for stream services | ||||
| where each client can potentially be connected for a long time (if | ||||
| threads or subprocesses cannot be used). | ||||
|  | ||||
| Future work: | ||||
| - Standard classes for Sun RPC (which uses either UDP or TCP) | ||||
| - Standard mix-in classes to implement various authentication | ||||
|   and encryption schemes | ||||
| - Standard framework for select-based multiplexing | ||||
|  | ||||
| XXX Open problems: | ||||
| - What to do with out-of-band data? | ||||
|  | ||||
| BaseServer: | ||||
| - split generic "request" functionality out into BaseServer class. | ||||
|   Copyright (C) 2000  Luke Kenneth Casson Leighton <lkcl@samba.org> | ||||
|  | ||||
|   example: read entries from a SQL database (requires overriding | ||||
|   get_request() to return a table entry from the database). | ||||
|   entry is processed by a RequestHandlerClass. | ||||
|  | ||||
| """ | ||||
|  | ||||
| # Author of the BaseServer patch: Luke Kenneth Casson Leighton | ||||
|  | ||||
| # XXX Warning! | ||||
| # There is a test suite for this module, but it cannot be run by the | ||||
| # standard regression test. | ||||
| # To run it manually, run Lib/test/test_socketserver.py. | ||||
|  | ||||
| from __future__ import (absolute_import, print_function) | ||||
|  | ||||
| __version__ = "0.4" | ||||
|  | ||||
|  | ||||
| import socket | ||||
| import select | ||||
| import sys | ||||
| import os | ||||
| import errno | ||||
| try: | ||||
|     import threading | ||||
| except ImportError: | ||||
|     import dummy_threading as threading | ||||
|  | ||||
| __all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer", | ||||
|            "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler", | ||||
|            "StreamRequestHandler","DatagramRequestHandler", | ||||
|            "ThreadingMixIn", "ForkingMixIn"] | ||||
| if hasattr(socket, "AF_UNIX"): | ||||
|     __all__.extend(["UnixStreamServer","UnixDatagramServer", | ||||
|                     "ThreadingUnixStreamServer", | ||||
|                     "ThreadingUnixDatagramServer"]) | ||||
|  | ||||
| def _eintr_retry(func, *args): | ||||
|     """restart a system call interrupted by EINTR""" | ||||
|     while True: | ||||
|         try: | ||||
|             return func(*args) | ||||
|         except OSError as e: | ||||
|             if e.errno != errno.EINTR: | ||||
|                 raise | ||||
|  | ||||
| class BaseServer(object): | ||||
|  | ||||
|     """Base class for server classes. | ||||
|  | ||||
|     Methods for the caller: | ||||
|  | ||||
|     - __init__(server_address, RequestHandlerClass) | ||||
|     - serve_forever(poll_interval=0.5) | ||||
|     - shutdown() | ||||
|     - handle_request()  # if you do not use serve_forever() | ||||
|     - fileno() -> int   # for select() | ||||
|  | ||||
|     Methods that may be overridden: | ||||
|  | ||||
|     - server_bind() | ||||
|     - server_activate() | ||||
|     - get_request() -> request, client_address | ||||
|     - handle_timeout() | ||||
|     - verify_request(request, client_address) | ||||
|     - server_close() | ||||
|     - process_request(request, client_address) | ||||
|     - shutdown_request(request) | ||||
|     - close_request(request) | ||||
|     - service_actions() | ||||
|     - handle_error() | ||||
|  | ||||
|     Methods for derived classes: | ||||
|  | ||||
|     - finish_request(request, client_address) | ||||
|  | ||||
|     Class variables that may be overridden by derived classes or | ||||
|     instances: | ||||
|  | ||||
|     - timeout | ||||
|     - address_family | ||||
|     - socket_type | ||||
|     - allow_reuse_address | ||||
|  | ||||
|     Instance variables: | ||||
|  | ||||
|     - RequestHandlerClass | ||||
|     - socket | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     timeout = None | ||||
|  | ||||
|     def __init__(self, server_address, RequestHandlerClass): | ||||
|         """Constructor.  May be extended, do not override.""" | ||||
|         self.server_address = server_address | ||||
|         self.RequestHandlerClass = RequestHandlerClass | ||||
|         self.__is_shut_down = threading.Event() | ||||
|         self.__shutdown_request = False | ||||
|  | ||||
|     def server_activate(self): | ||||
|         """Called by constructor to activate the server. | ||||
|  | ||||
|         May be overridden. | ||||
|  | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     def serve_forever(self, poll_interval=0.5): | ||||
|         """Handle one request at a time until shutdown. | ||||
|  | ||||
|         Polls for shutdown every poll_interval seconds. Ignores | ||||
|         self.timeout. If you need to do periodic tasks, do them in | ||||
|         another thread. | ||||
|         """ | ||||
|         self.__is_shut_down.clear() | ||||
|         try: | ||||
|             while not self.__shutdown_request: | ||||
|                 # XXX: Consider using another file descriptor or | ||||
|                 # connecting to the socket to wake this up instead of | ||||
|                 # polling. Polling reduces our responsiveness to a | ||||
|                 # shutdown request and wastes cpu at all other times. | ||||
|                 r, w, e = _eintr_retry(select.select, [self], [], [], | ||||
|                                        poll_interval) | ||||
|                 if self in r: | ||||
|                     self._handle_request_noblock() | ||||
|  | ||||
|                 self.service_actions() | ||||
|         finally: | ||||
|             self.__shutdown_request = False | ||||
|             self.__is_shut_down.set() | ||||
|  | ||||
|     def shutdown(self): | ||||
|         """Stops the serve_forever loop. | ||||
|  | ||||
|         Blocks until the loop has finished. This must be called while | ||||
|         serve_forever() is running in another thread, or it will | ||||
|         deadlock. | ||||
|         """ | ||||
|         self.__shutdown_request = True | ||||
|         self.__is_shut_down.wait() | ||||
|  | ||||
|     def service_actions(self): | ||||
|         """Called by the serve_forever() loop. | ||||
|  | ||||
|         May be overridden by a subclass / Mixin to implement any code that | ||||
|         needs to be run during the loop. | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     # The distinction between handling, getting, processing and | ||||
|     # finishing a request is fairly arbitrary.  Remember: | ||||
|     # | ||||
|     # - handle_request() is the top-level call.  It calls | ||||
|     #   select, get_request(), verify_request() and process_request() | ||||
|     # - get_request() is different for stream or datagram sockets | ||||
|     # - process_request() is the place that may fork a new process | ||||
|     #   or create a new thread to finish the request | ||||
|     # - finish_request() instantiates the request handler class; | ||||
|     #   this constructor will handle the request all by itself | ||||
|  | ||||
|     def handle_request(self): | ||||
|         """Handle one request, possibly blocking. | ||||
|  | ||||
|         Respects self.timeout. | ||||
|         """ | ||||
|         # Support people who used socket.settimeout() to escape | ||||
|         # handle_request before self.timeout was available. | ||||
|         timeout = self.socket.gettimeout() | ||||
|         if timeout is None: | ||||
|             timeout = self.timeout | ||||
|         elif self.timeout is not None: | ||||
|             timeout = min(timeout, self.timeout) | ||||
|         fd_sets = _eintr_retry(select.select, [self], [], [], timeout) | ||||
|         if not fd_sets[0]: | ||||
|             self.handle_timeout() | ||||
|             return | ||||
|         self._handle_request_noblock() | ||||
|  | ||||
|     def _handle_request_noblock(self): | ||||
|         """Handle one request, without blocking. | ||||
|  | ||||
|         I assume that select.select has returned that the socket is | ||||
|         readable before this function was called, so there should be | ||||
|         no risk of blocking in get_request(). | ||||
|         """ | ||||
|         try: | ||||
|             request, client_address = self.get_request() | ||||
|         except socket.error: | ||||
|             return | ||||
|         if self.verify_request(request, client_address): | ||||
|             try: | ||||
|                 self.process_request(request, client_address) | ||||
|             except: | ||||
|                 self.handle_error(request, client_address) | ||||
|                 self.shutdown_request(request) | ||||
|  | ||||
|     def handle_timeout(self): | ||||
|         """Called if no new request arrives within self.timeout. | ||||
|  | ||||
|         Overridden by ForkingMixIn. | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     def verify_request(self, request, client_address): | ||||
|         """Verify the request.  May be overridden. | ||||
|  | ||||
|         Return True if we should proceed with this request. | ||||
|  | ||||
|         """ | ||||
|         return True | ||||
|  | ||||
|     def process_request(self, request, client_address): | ||||
|         """Call finish_request. | ||||
|  | ||||
|         Overridden by ForkingMixIn and ThreadingMixIn. | ||||
|  | ||||
|         """ | ||||
|         self.finish_request(request, client_address) | ||||
|         self.shutdown_request(request) | ||||
|  | ||||
|     def server_close(self): | ||||
|         """Called to clean-up the server. | ||||
|  | ||||
|         May be overridden. | ||||
|  | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|     def finish_request(self, request, client_address): | ||||
|         """Finish one request by instantiating RequestHandlerClass.""" | ||||
|         self.RequestHandlerClass(request, client_address, self) | ||||
|  | ||||
|     def shutdown_request(self, request): | ||||
|         """Called to shutdown and close an individual request.""" | ||||
|         self.close_request(request) | ||||
|  | ||||
|     def close_request(self, request): | ||||
|         """Called to clean up an individual request.""" | ||||
|         pass | ||||
|  | ||||
|     def handle_error(self, request, client_address): | ||||
|         """Handle an error gracefully.  May be overridden. | ||||
|  | ||||
|         The default is to print a traceback and continue. | ||||
|  | ||||
|         """ | ||||
|         print('-'*40) | ||||
|         print('Exception happened during processing of request from', end=' ') | ||||
|         print(client_address) | ||||
|         import traceback | ||||
|         traceback.print_exc() # XXX But this goes to stderr! | ||||
|         print('-'*40) | ||||
|  | ||||
|  | ||||
| class TCPServer(BaseServer): | ||||
|  | ||||
|     """Base class for various socket-based server classes. | ||||
|  | ||||
|     Defaults to synchronous IP stream (i.e., TCP). | ||||
|  | ||||
|     Methods for the caller: | ||||
|  | ||||
|     - __init__(server_address, RequestHandlerClass, bind_and_activate=True) | ||||
|     - serve_forever(poll_interval=0.5) | ||||
|     - shutdown() | ||||
|     - handle_request()  # if you don't use serve_forever() | ||||
|     - fileno() -> int   # for select() | ||||
|  | ||||
|     Methods that may be overridden: | ||||
|  | ||||
|     - server_bind() | ||||
|     - server_activate() | ||||
|     - get_request() -> request, client_address | ||||
|     - handle_timeout() | ||||
|     - verify_request(request, client_address) | ||||
|     - process_request(request, client_address) | ||||
|     - shutdown_request(request) | ||||
|     - close_request(request) | ||||
|     - handle_error() | ||||
|  | ||||
|     Methods for derived classes: | ||||
|  | ||||
|     - finish_request(request, client_address) | ||||
|  | ||||
|     Class variables that may be overridden by derived classes or | ||||
|     instances: | ||||
|  | ||||
|     - timeout | ||||
|     - address_family | ||||
|     - socket_type | ||||
|     - request_queue_size (only for stream sockets) | ||||
|     - allow_reuse_address | ||||
|  | ||||
|     Instance variables: | ||||
|  | ||||
|     - server_address | ||||
|     - RequestHandlerClass | ||||
|     - socket | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     address_family = socket.AF_INET | ||||
|  | ||||
|     socket_type = socket.SOCK_STREAM | ||||
|  | ||||
|     request_queue_size = 5 | ||||
|  | ||||
|     allow_reuse_address = False | ||||
|  | ||||
|     def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True): | ||||
|         """Constructor.  May be extended, do not override.""" | ||||
|         BaseServer.__init__(self, server_address, RequestHandlerClass) | ||||
|         self.socket = socket.socket(self.address_family, | ||||
|                                     self.socket_type) | ||||
|         if bind_and_activate: | ||||
|             self.server_bind() | ||||
|             self.server_activate() | ||||
|  | ||||
|     def server_bind(self): | ||||
|         """Called by constructor to bind the socket. | ||||
|  | ||||
|         May be overridden. | ||||
|  | ||||
|         """ | ||||
|         if self.allow_reuse_address: | ||||
|             self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | ||||
|         self.socket.bind(self.server_address) | ||||
|         self.server_address = self.socket.getsockname() | ||||
|  | ||||
|     def server_activate(self): | ||||
|         """Called by constructor to activate the server. | ||||
|  | ||||
|         May be overridden. | ||||
|  | ||||
|         """ | ||||
|         self.socket.listen(self.request_queue_size) | ||||
|  | ||||
|     def server_close(self): | ||||
|         """Called to clean-up the server. | ||||
|  | ||||
|         May be overridden. | ||||
|  | ||||
|         """ | ||||
|         self.socket.close() | ||||
|  | ||||
|     def fileno(self): | ||||
|         """Return socket file number. | ||||
|  | ||||
|         Interface required by select(). | ||||
|  | ||||
|         """ | ||||
|         return self.socket.fileno() | ||||
|  | ||||
|     def get_request(self): | ||||
|         """Get the request and client address from the socket. | ||||
|  | ||||
|         May be overridden. | ||||
|  | ||||
|         """ | ||||
|         return self.socket.accept() | ||||
|  | ||||
|     def shutdown_request(self, request): | ||||
|         """Called to shutdown and close an individual request.""" | ||||
|         try: | ||||
|             #explicitly shutdown.  socket.close() merely releases | ||||
|             #the socket and waits for GC to perform the actual close. | ||||
|             request.shutdown(socket.SHUT_WR) | ||||
|         except socket.error: | ||||
|             pass #some platforms may raise ENOTCONN here | ||||
|         self.close_request(request) | ||||
|  | ||||
|     def close_request(self, request): | ||||
|         """Called to clean up an individual request.""" | ||||
|         request.close() | ||||
|  | ||||
|  | ||||
| class UDPServer(TCPServer): | ||||
|  | ||||
|     """UDP server class.""" | ||||
|  | ||||
|     allow_reuse_address = False | ||||
|  | ||||
|     socket_type = socket.SOCK_DGRAM | ||||
|  | ||||
|     max_packet_size = 8192 | ||||
|  | ||||
|     def get_request(self): | ||||
|         data, client_addr = self.socket.recvfrom(self.max_packet_size) | ||||
|         return (data, self.socket), client_addr | ||||
|  | ||||
|     def server_activate(self): | ||||
|         # No need to call listen() for UDP. | ||||
|         pass | ||||
|  | ||||
|     def shutdown_request(self, request): | ||||
|         # No need to shutdown anything. | ||||
|         self.close_request(request) | ||||
|  | ||||
|     def close_request(self, request): | ||||
|         # No need to close anything. | ||||
|         pass | ||||
|  | ||||
| class ForkingMixIn(object): | ||||
|  | ||||
|     """Mix-in class to handle each request in a new process.""" | ||||
|  | ||||
|     timeout = 300 | ||||
|     active_children = None | ||||
|     max_children = 40 | ||||
|  | ||||
|     def collect_children(self): | ||||
|         """Internal routine to wait for children that have exited.""" | ||||
|         if self.active_children is None: return | ||||
|         while len(self.active_children) >= self.max_children: | ||||
|             # XXX: This will wait for any child process, not just ones | ||||
|             # spawned by this library. This could confuse other | ||||
|             # libraries that expect to be able to wait for their own | ||||
|             # children. | ||||
|             try: | ||||
|                 pid, status = os.waitpid(0, 0) | ||||
|             except os.error: | ||||
|                 pid = None | ||||
|             if pid not in self.active_children: continue | ||||
|             self.active_children.remove(pid) | ||||
|  | ||||
|         # XXX: This loop runs more system calls than it ought | ||||
|         # to. There should be a way to put the active_children into a | ||||
|         # process group and then use os.waitpid(-pgid) to wait for any | ||||
|         # of that set, but I couldn't find a way to allocate pgids | ||||
|         # that couldn't collide. | ||||
|         for child in self.active_children: | ||||
|             try: | ||||
|                 pid, status = os.waitpid(child, os.WNOHANG) | ||||
|             except os.error: | ||||
|                 pid = None | ||||
|             if not pid: continue | ||||
|             try: | ||||
|                 self.active_children.remove(pid) | ||||
|             except ValueError as e: | ||||
|                 raise ValueError('%s. x=%d and list=%r' % (e.message, pid, | ||||
|                                                            self.active_children)) | ||||
|  | ||||
|     def handle_timeout(self): | ||||
|         """Wait for zombies after self.timeout seconds of inactivity. | ||||
|  | ||||
|         May be extended, do not override. | ||||
|         """ | ||||
|         self.collect_children() | ||||
|  | ||||
|     def service_actions(self): | ||||
|         """Collect the zombie child processes regularly in the ForkingMixIn. | ||||
|  | ||||
|         service_actions is called in the BaseServer's serve_forver loop. | ||||
|         """ | ||||
|         self.collect_children() | ||||
|  | ||||
|     def process_request(self, request, client_address): | ||||
|         """Fork a new subprocess to process the request.""" | ||||
|         pid = os.fork() | ||||
|         if pid: | ||||
|             # Parent process | ||||
|             if self.active_children is None: | ||||
|                 self.active_children = [] | ||||
|             self.active_children.append(pid) | ||||
|             self.close_request(request) | ||||
|             return | ||||
|         else: | ||||
|             # Child process. | ||||
|             # This must never return, hence os._exit()! | ||||
|             try: | ||||
|                 self.finish_request(request, client_address) | ||||
|                 self.shutdown_request(request) | ||||
|                 os._exit(0) | ||||
|             except: | ||||
|                 try: | ||||
|                     self.handle_error(request, client_address) | ||||
|                     self.shutdown_request(request) | ||||
|                 finally: | ||||
|                     os._exit(1) | ||||
|  | ||||
|  | ||||
| class ThreadingMixIn(object): | ||||
|     """Mix-in class to handle each request in a new thread.""" | ||||
|  | ||||
|     # Decides how threads will act upon termination of the | ||||
|     # main process | ||||
|     daemon_threads = False | ||||
|  | ||||
|     def process_request_thread(self, request, client_address): | ||||
|         """Same as in BaseServer but as a thread. | ||||
|  | ||||
|         In addition, exception handling is done here. | ||||
|  | ||||
|         """ | ||||
|         try: | ||||
|             self.finish_request(request, client_address) | ||||
|             self.shutdown_request(request) | ||||
|         except: | ||||
|             self.handle_error(request, client_address) | ||||
|             self.shutdown_request(request) | ||||
|  | ||||
|     def process_request(self, request, client_address): | ||||
|         """Start a new thread to process the request.""" | ||||
|         t = threading.Thread(target = self.process_request_thread, | ||||
|                              args = (request, client_address)) | ||||
|         t.daemon = self.daemon_threads | ||||
|         t.start() | ||||
|  | ||||
|  | ||||
| class ForkingUDPServer(ForkingMixIn, UDPServer): pass | ||||
| class ForkingTCPServer(ForkingMixIn, TCPServer): pass | ||||
|  | ||||
| class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass | ||||
| class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass | ||||
|  | ||||
| if hasattr(socket, 'AF_UNIX'): | ||||
|  | ||||
|     class UnixStreamServer(TCPServer): | ||||
|         address_family = socket.AF_UNIX | ||||
|  | ||||
|     class UnixDatagramServer(UDPServer): | ||||
|         address_family = socket.AF_UNIX | ||||
|  | ||||
|     class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass | ||||
|  | ||||
|     class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass | ||||
|  | ||||
| class BaseRequestHandler(object): | ||||
|  | ||||
|     """Base class for request handler classes. | ||||
|  | ||||
|     This class is instantiated for each request to be handled.  The | ||||
|     constructor sets the instance variables request, client_address | ||||
|     and server, and then calls the handle() method.  To implement a | ||||
|     specific service, all you need to do is to derive a class which | ||||
|     defines a handle() method. | ||||
|  | ||||
|     The handle() method can find the request as self.request, the | ||||
|     client address as self.client_address, and the server (in case it | ||||
|     needs access to per-server information) as self.server.  Since a | ||||
|     separate instance is created for each request, the handle() method | ||||
|     can define arbitrary other instance variariables. | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, request, client_address, server): | ||||
|         self.request = request | ||||
|         self.client_address = client_address | ||||
|         self.server = server | ||||
|         self.setup() | ||||
|         try: | ||||
|             self.handle() | ||||
|         finally: | ||||
|             self.finish() | ||||
|  | ||||
|     def setup(self): | ||||
|         pass | ||||
|  | ||||
|     def handle(self): | ||||
|         pass | ||||
|  | ||||
|     def finish(self): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| # The following two classes make it possible to use the same service | ||||
| # class for stream or datagram servers. | ||||
| # Each class sets up these instance variables: | ||||
| # - rfile: a file object from which receives the request is read | ||||
| # - wfile: a file object to which the reply is written | ||||
| # When the handle() method returns, wfile is flushed properly | ||||
|  | ||||
|  | ||||
| class StreamRequestHandler(BaseRequestHandler): | ||||
|  | ||||
|     """Define self.rfile and self.wfile for stream sockets.""" | ||||
|  | ||||
|     # Default buffer sizes for rfile, wfile. | ||||
|     # We default rfile to buffered because otherwise it could be | ||||
|     # really slow for large data (a getc() call per byte); we make | ||||
|     # wfile unbuffered because (a) often after a write() we want to | ||||
|     # read and we need to flush the line; (b) big writes to unbuffered | ||||
|     # files are typically optimized by stdio even when big reads | ||||
|     # aren't. | ||||
|     rbufsize = -1 | ||||
|     wbufsize = 0 | ||||
|  | ||||
|     # A timeout to apply to the request socket, if not None. | ||||
|     timeout = None | ||||
|  | ||||
|     # Disable nagle algorithm for this socket, if True. | ||||
|     # Use only when wbufsize != 0, to avoid small packets. | ||||
|     disable_nagle_algorithm = False | ||||
|  | ||||
|     def setup(self): | ||||
|         self.connection = self.request | ||||
|         if self.timeout is not None: | ||||
|             self.connection.settimeout(self.timeout) | ||||
|         if self.disable_nagle_algorithm: | ||||
|             self.connection.setsockopt(socket.IPPROTO_TCP, | ||||
|                                        socket.TCP_NODELAY, True) | ||||
|         self.rfile = self.connection.makefile('rb', self.rbufsize) | ||||
|         self.wfile = self.connection.makefile('wb', self.wbufsize) | ||||
|  | ||||
|     def finish(self): | ||||
|         if not self.wfile.closed: | ||||
|             try: | ||||
|                 self.wfile.flush() | ||||
|             except socket.error: | ||||
|                 # An final socket error may have occurred here, such as | ||||
|                 # the local error ECONNABORTED. | ||||
|                 pass | ||||
|         self.wfile.close() | ||||
|         self.rfile.close() | ||||
|  | ||||
|  | ||||
| class DatagramRequestHandler(BaseRequestHandler): | ||||
|  | ||||
|     # XXX Regrettably, I cannot get this working on Linux; | ||||
|     # s.recvfrom() doesn't return a meaningful client address. | ||||
|  | ||||
|     """Define self.rfile and self.wfile for datagram sockets.""" | ||||
|  | ||||
|     def setup(self): | ||||
|         from io import BytesIO | ||||
|         self.packet, self.socket = self.request | ||||
|         self.rfile = BytesIO(self.packet) | ||||
|         self.wfile = BytesIO() | ||||
|  | ||||
|     def finish(self): | ||||
|         self.socket.sendto(self.wfile.getvalue(), self.client_address) | ||||
| @ -0,0 +1,9 @@ | ||||
| """ | ||||
| test package backported for python-future. | ||||
|  | ||||
| Its primary purpose is to allow use of "import test.support" for running | ||||
| the Python standard library unit tests using the new Python 3 stdlib | ||||
| import location. | ||||
|  | ||||
| Python 3 renamed test.test_support to test.support. | ||||
| """ | ||||
| @ -0,0 +1,272 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| """ | ||||
| "PYSTONE" Benchmark Program | ||||
|  | ||||
| Version:        Python/1.1 (corresponds to C/1.1 plus 2 Pystone fixes) | ||||
|  | ||||
| Author:         Reinhold P. Weicker,  CACM Vol 27, No 10, 10/84 pg. 1013. | ||||
|  | ||||
|                 Translated from ADA to C by Rick Richardson. | ||||
|                 Every method to preserve ADA-likeness has been used, | ||||
|                 at the expense of C-ness. | ||||
|  | ||||
|                 Translated from C to Python by Guido van Rossum. | ||||
|  | ||||
| Version History: | ||||
|  | ||||
|                 Version 1.1 corrects two bugs in version 1.0: | ||||
|  | ||||
|                 First, it leaked memory: in Proc1(), NextRecord ends | ||||
|                 up having a pointer to itself.  I have corrected this | ||||
|                 by zapping NextRecord.PtrComp at the end of Proc1(). | ||||
|  | ||||
|                 Second, Proc3() used the operator != to compare a | ||||
|                 record to None.  This is rather inefficient and not | ||||
|                 true to the intention of the original benchmark (where | ||||
|                 a pointer comparison to None is intended; the != | ||||
|                 operator attempts to find a method __cmp__ to do value | ||||
|                 comparison of the record).  Version 1.1 runs 5-10 | ||||
|                 percent faster than version 1.0, so benchmark figures | ||||
|                 of different versions can't be compared directly. | ||||
|  | ||||
| """ | ||||
|  | ||||
| from __future__ import print_function | ||||
|  | ||||
| from time import clock | ||||
|  | ||||
| LOOPS = 50000 | ||||
|  | ||||
| __version__ = "1.1" | ||||
|  | ||||
| [Ident1, Ident2, Ident3, Ident4, Ident5] = range(1, 6) | ||||
|  | ||||
| class Record(object): | ||||
|  | ||||
|     def __init__(self, PtrComp = None, Discr = 0, EnumComp = 0, | ||||
|                        IntComp = 0, StringComp = 0): | ||||
|         self.PtrComp = PtrComp | ||||
|         self.Discr = Discr | ||||
|         self.EnumComp = EnumComp | ||||
|         self.IntComp = IntComp | ||||
|         self.StringComp = StringComp | ||||
|  | ||||
|     def copy(self): | ||||
|         return Record(self.PtrComp, self.Discr, self.EnumComp, | ||||
|                       self.IntComp, self.StringComp) | ||||
|  | ||||
| TRUE = 1 | ||||
| FALSE = 0 | ||||
|  | ||||
| def main(loops=LOOPS): | ||||
|     benchtime, stones = pystones(loops) | ||||
|     print("Pystone(%s) time for %d passes = %g" % \ | ||||
|           (__version__, loops, benchtime)) | ||||
|     print("This machine benchmarks at %g pystones/second" % stones) | ||||
|  | ||||
|  | ||||
| def pystones(loops=LOOPS): | ||||
|     return Proc0(loops) | ||||
|  | ||||
| IntGlob = 0 | ||||
| BoolGlob = FALSE | ||||
| Char1Glob = '\0' | ||||
| Char2Glob = '\0' | ||||
| Array1Glob = [0]*51 | ||||
| Array2Glob = [x[:] for x in [Array1Glob]*51] | ||||
| PtrGlb = None | ||||
| PtrGlbNext = None | ||||
|  | ||||
| def Proc0(loops=LOOPS): | ||||
|     global IntGlob | ||||
|     global BoolGlob | ||||
|     global Char1Glob | ||||
|     global Char2Glob | ||||
|     global Array1Glob | ||||
|     global Array2Glob | ||||
|     global PtrGlb | ||||
|     global PtrGlbNext | ||||
|  | ||||
|     starttime = clock() | ||||
|     for i in range(loops): | ||||
|         pass | ||||
|     nulltime = clock() - starttime | ||||
|  | ||||
|     PtrGlbNext = Record() | ||||
|     PtrGlb = Record() | ||||
|     PtrGlb.PtrComp = PtrGlbNext | ||||
|     PtrGlb.Discr = Ident1 | ||||
|     PtrGlb.EnumComp = Ident3 | ||||
|     PtrGlb.IntComp = 40 | ||||
|     PtrGlb.StringComp = "DHRYSTONE PROGRAM, SOME STRING" | ||||
|     String1Loc = "DHRYSTONE PROGRAM, 1'ST STRING" | ||||
|     Array2Glob[8][7] = 10 | ||||
|  | ||||
|     starttime = clock() | ||||
|  | ||||
|     for i in range(loops): | ||||
|         Proc5() | ||||
|         Proc4() | ||||
|         IntLoc1 = 2 | ||||
|         IntLoc2 = 3 | ||||
|         String2Loc = "DHRYSTONE PROGRAM, 2'ND STRING" | ||||
|         EnumLoc = Ident2 | ||||
|         BoolGlob = not Func2(String1Loc, String2Loc) | ||||
|         while IntLoc1 < IntLoc2: | ||||
|             IntLoc3 = 5 * IntLoc1 - IntLoc2 | ||||
|             IntLoc3 = Proc7(IntLoc1, IntLoc2) | ||||
|             IntLoc1 = IntLoc1 + 1 | ||||
|         Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3) | ||||
|         PtrGlb = Proc1(PtrGlb) | ||||
|         CharIndex = 'A' | ||||
|         while CharIndex <= Char2Glob: | ||||
|             if EnumLoc == Func1(CharIndex, 'C'): | ||||
|                 EnumLoc = Proc6(Ident1) | ||||
|             CharIndex = chr(ord(CharIndex)+1) | ||||
|         IntLoc3 = IntLoc2 * IntLoc1 | ||||
|         IntLoc2 = IntLoc3 / IntLoc1 | ||||
|         IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1 | ||||
|         IntLoc1 = Proc2(IntLoc1) | ||||
|  | ||||
|     benchtime = clock() - starttime - nulltime | ||||
|     if benchtime == 0.0: | ||||
|         loopsPerBenchtime = 0.0 | ||||
|     else: | ||||
|         loopsPerBenchtime = (loops / benchtime) | ||||
|     return benchtime, loopsPerBenchtime | ||||
|  | ||||
| def Proc1(PtrParIn): | ||||
|     PtrParIn.PtrComp = NextRecord = PtrGlb.copy() | ||||
|     PtrParIn.IntComp = 5 | ||||
|     NextRecord.IntComp = PtrParIn.IntComp | ||||
|     NextRecord.PtrComp = PtrParIn.PtrComp | ||||
|     NextRecord.PtrComp = Proc3(NextRecord.PtrComp) | ||||
|     if NextRecord.Discr == Ident1: | ||||
|         NextRecord.IntComp = 6 | ||||
|         NextRecord.EnumComp = Proc6(PtrParIn.EnumComp) | ||||
|         NextRecord.PtrComp = PtrGlb.PtrComp | ||||
|         NextRecord.IntComp = Proc7(NextRecord.IntComp, 10) | ||||
|     else: | ||||
|         PtrParIn = NextRecord.copy() | ||||
|     NextRecord.PtrComp = None | ||||
|     return PtrParIn | ||||
|  | ||||
| def Proc2(IntParIO): | ||||
|     IntLoc = IntParIO + 10 | ||||
|     while 1: | ||||
|         if Char1Glob == 'A': | ||||
|             IntLoc = IntLoc - 1 | ||||
|             IntParIO = IntLoc - IntGlob | ||||
|             EnumLoc = Ident1 | ||||
|         if EnumLoc == Ident1: | ||||
|             break | ||||
|     return IntParIO | ||||
|  | ||||
| def Proc3(PtrParOut): | ||||
|     global IntGlob | ||||
|  | ||||
|     if PtrGlb is not None: | ||||
|         PtrParOut = PtrGlb.PtrComp | ||||
|     else: | ||||
|         IntGlob = 100 | ||||
|     PtrGlb.IntComp = Proc7(10, IntGlob) | ||||
|     return PtrParOut | ||||
|  | ||||
| def Proc4(): | ||||
|     global Char2Glob | ||||
|  | ||||
|     BoolLoc = Char1Glob == 'A' | ||||
|     BoolLoc = BoolLoc or BoolGlob | ||||
|     Char2Glob = 'B' | ||||
|  | ||||
| def Proc5(): | ||||
|     global Char1Glob | ||||
|     global BoolGlob | ||||
|  | ||||
|     Char1Glob = 'A' | ||||
|     BoolGlob = FALSE | ||||
|  | ||||
| def Proc6(EnumParIn): | ||||
|     EnumParOut = EnumParIn | ||||
|     if not Func3(EnumParIn): | ||||
|         EnumParOut = Ident4 | ||||
|     if EnumParIn == Ident1: | ||||
|         EnumParOut = Ident1 | ||||
|     elif EnumParIn == Ident2: | ||||
|         if IntGlob > 100: | ||||
|             EnumParOut = Ident1 | ||||
|         else: | ||||
|             EnumParOut = Ident4 | ||||
|     elif EnumParIn == Ident3: | ||||
|         EnumParOut = Ident2 | ||||
|     elif EnumParIn == Ident4: | ||||
|         pass | ||||
|     elif EnumParIn == Ident5: | ||||
|         EnumParOut = Ident3 | ||||
|     return EnumParOut | ||||
|  | ||||
| def Proc7(IntParI1, IntParI2): | ||||
|     IntLoc = IntParI1 + 2 | ||||
|     IntParOut = IntParI2 + IntLoc | ||||
|     return IntParOut | ||||
|  | ||||
| def Proc8(Array1Par, Array2Par, IntParI1, IntParI2): | ||||
|     global IntGlob | ||||
|  | ||||
|     IntLoc = IntParI1 + 5 | ||||
|     Array1Par[IntLoc] = IntParI2 | ||||
|     Array1Par[IntLoc+1] = Array1Par[IntLoc] | ||||
|     Array1Par[IntLoc+30] = IntLoc | ||||
|     for IntIndex in range(IntLoc, IntLoc+2): | ||||
|         Array2Par[IntLoc][IntIndex] = IntLoc | ||||
|     Array2Par[IntLoc][IntLoc-1] = Array2Par[IntLoc][IntLoc-1] + 1 | ||||
|     Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc] | ||||
|     IntGlob = 5 | ||||
|  | ||||
| def Func1(CharPar1, CharPar2): | ||||
|     CharLoc1 = CharPar1 | ||||
|     CharLoc2 = CharLoc1 | ||||
|     if CharLoc2 != CharPar2: | ||||
|         return Ident1 | ||||
|     else: | ||||
|         return Ident2 | ||||
|  | ||||
| def Func2(StrParI1, StrParI2): | ||||
|     IntLoc = 1 | ||||
|     while IntLoc <= 1: | ||||
|         if Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1: | ||||
|             CharLoc = 'A' | ||||
|             IntLoc = IntLoc + 1 | ||||
|     if CharLoc >= 'W' and CharLoc <= 'Z': | ||||
|         IntLoc = 7 | ||||
|     if CharLoc == 'X': | ||||
|         return TRUE | ||||
|     else: | ||||
|         if StrParI1 > StrParI2: | ||||
|             IntLoc = IntLoc + 7 | ||||
|             return TRUE | ||||
|         else: | ||||
|             return FALSE | ||||
|  | ||||
| def Func3(EnumParIn): | ||||
|     EnumLoc = EnumParIn | ||||
|     if EnumLoc == Ident3: return TRUE | ||||
|     return FALSE | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     import sys | ||||
|     def error(msg): | ||||
|         print(msg, end=' ', file=sys.stderr) | ||||
|         print("usage: %s [number_of_loops]" % sys.argv[0], file=sys.stderr) | ||||
|         sys.exit(100) | ||||
|     nargs = len(sys.argv) - 1 | ||||
|     if nargs > 1: | ||||
|         error("%d arguments are too many;" % nargs) | ||||
|     elif nargs == 1: | ||||
|         try: loops = int(sys.argv[1]) | ||||
|         except ValueError: | ||||
|             error("Invalid argument %r;" % sys.argv[1]) | ||||
|     else: | ||||
|         loops = LOOPS | ||||
|     main(loops) | ||||
| @ -0,0 +1,207 @@ | ||||
| from __future__ import absolute_import, division, print_function, unicode_literals | ||||
| from future.builtins import filter, str | ||||
| from future import utils | ||||
| import os | ||||
| import sys | ||||
| import ssl | ||||
| import pprint | ||||
| import socket | ||||
| from future.backports.urllib import parse as urllib_parse | ||||
| from future.backports.http.server import (HTTPServer as _HTTPServer, | ||||
|     SimpleHTTPRequestHandler, BaseHTTPRequestHandler) | ||||
| from future.backports.test import support | ||||
| threading = support.import_module("threading") | ||||
|  | ||||
| here = os.path.dirname(__file__) | ||||
|  | ||||
| HOST = support.HOST | ||||
| CERTFILE = os.path.join(here, 'keycert.pem') | ||||
|  | ||||
| # This one's based on HTTPServer, which is based on SocketServer | ||||
|  | ||||
| class HTTPSServer(_HTTPServer): | ||||
|  | ||||
|     def __init__(self, server_address, handler_class, context): | ||||
|         _HTTPServer.__init__(self, server_address, handler_class) | ||||
|         self.context = context | ||||
|  | ||||
|     def __str__(self): | ||||
|         return ('<%s %s:%s>' % | ||||
|                 (self.__class__.__name__, | ||||
|                  self.server_name, | ||||
|                  self.server_port)) | ||||
|  | ||||
|     def get_request(self): | ||||
|         # override this to wrap socket with SSL | ||||
|         try: | ||||
|             sock, addr = self.socket.accept() | ||||
|             sslconn = self.context.wrap_socket(sock, server_side=True) | ||||
|         except socket.error as e: | ||||
|             # socket errors are silenced by the caller, print them here | ||||
|             if support.verbose: | ||||
|                 sys.stderr.write("Got an error:\n%s\n" % e) | ||||
|             raise | ||||
|         return sslconn, addr | ||||
|  | ||||
| class RootedHTTPRequestHandler(SimpleHTTPRequestHandler): | ||||
|     # need to override translate_path to get a known root, | ||||
|     # instead of using os.curdir, since the test could be | ||||
|     # run from anywhere | ||||
|  | ||||
|     server_version = "TestHTTPS/1.0" | ||||
|     root = here | ||||
|     # Avoid hanging when a request gets interrupted by the client | ||||
|     timeout = 5 | ||||
|  | ||||
|     def translate_path(self, path): | ||||
|         """Translate a /-separated PATH to the local filename syntax. | ||||
|  | ||||
|         Components that mean special things to the local file system | ||||
|         (e.g. drive or directory names) are ignored.  (XXX They should | ||||
|         probably be diagnosed.) | ||||
|  | ||||
|         """ | ||||
|         # abandon query parameters | ||||
|         path = urllib.parse.urlparse(path)[2] | ||||
|         path = os.path.normpath(urllib.parse.unquote(path)) | ||||
|         words = path.split('/') | ||||
|         words = filter(None, words) | ||||
|         path = self.root | ||||
|         for word in words: | ||||
|             drive, word = os.path.splitdrive(word) | ||||
|             head, word = os.path.split(word) | ||||
|             path = os.path.join(path, word) | ||||
|         return path | ||||
|  | ||||
|     def log_message(self, format, *args): | ||||
|         # we override this to suppress logging unless "verbose" | ||||
|         if support.verbose: | ||||
|             sys.stdout.write(" server (%s:%d %s):\n   [%s] %s\n" % | ||||
|                              (self.server.server_address, | ||||
|                               self.server.server_port, | ||||
|                               self.request.cipher(), | ||||
|                               self.log_date_time_string(), | ||||
|                               format%args)) | ||||
|  | ||||
|  | ||||
| class StatsRequestHandler(BaseHTTPRequestHandler): | ||||
|     """Example HTTP request handler which returns SSL statistics on GET | ||||
|     requests. | ||||
|     """ | ||||
|  | ||||
|     server_version = "StatsHTTPS/1.0" | ||||
|  | ||||
|     def do_GET(self, send_body=True): | ||||
|         """Serve a GET request.""" | ||||
|         sock = self.rfile.raw._sock | ||||
|         context = sock.context | ||||
|         stats = { | ||||
|             'session_cache': context.session_stats(), | ||||
|             'cipher': sock.cipher(), | ||||
|             'compression': sock.compression(), | ||||
|             } | ||||
|         body = pprint.pformat(stats) | ||||
|         body = body.encode('utf-8') | ||||
|         self.send_response(200) | ||||
|         self.send_header("Content-type", "text/plain; charset=utf-8") | ||||
|         self.send_header("Content-Length", str(len(body))) | ||||
|         self.end_headers() | ||||
|         if send_body: | ||||
|             self.wfile.write(body) | ||||
|  | ||||
|     def do_HEAD(self): | ||||
|         """Serve a HEAD request.""" | ||||
|         self.do_GET(send_body=False) | ||||
|  | ||||
|     def log_request(self, format, *args): | ||||
|         if support.verbose: | ||||
|             BaseHTTPRequestHandler.log_request(self, format, *args) | ||||
|  | ||||
|  | ||||
| class HTTPSServerThread(threading.Thread): | ||||
|  | ||||
|     def __init__(self, context, host=HOST, handler_class=None): | ||||
|         self.flag = None | ||||
|         self.server = HTTPSServer((host, 0), | ||||
|                                   handler_class or RootedHTTPRequestHandler, | ||||
|                                   context) | ||||
|         self.port = self.server.server_port | ||||
|         threading.Thread.__init__(self) | ||||
|         self.daemon = True | ||||
|  | ||||
|     def __str__(self): | ||||
|         return "<%s %s>" % (self.__class__.__name__, self.server) | ||||
|  | ||||
|     def start(self, flag=None): | ||||
|         self.flag = flag | ||||
|         threading.Thread.start(self) | ||||
|  | ||||
|     def run(self): | ||||
|         if self.flag: | ||||
|             self.flag.set() | ||||
|         try: | ||||
|             self.server.serve_forever(0.05) | ||||
|         finally: | ||||
|             self.server.server_close() | ||||
|  | ||||
|     def stop(self): | ||||
|         self.server.shutdown() | ||||
|  | ||||
|  | ||||
| def make_https_server(case, certfile=CERTFILE, host=HOST, handler_class=None): | ||||
|     # we assume the certfile contains both private key and certificate | ||||
|     context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | ||||
|     context.load_cert_chain(certfile) | ||||
|     server = HTTPSServerThread(context, host, handler_class) | ||||
|     flag = threading.Event() | ||||
|     server.start(flag) | ||||
|     flag.wait() | ||||
|     def cleanup(): | ||||
|         if support.verbose: | ||||
|             sys.stdout.write('stopping HTTPS server\n') | ||||
|         server.stop() | ||||
|         if support.verbose: | ||||
|             sys.stdout.write('joining HTTPS thread\n') | ||||
|         server.join() | ||||
|     case.addCleanup(cleanup) | ||||
|     return server | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     import argparse | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description='Run a test HTTPS server. ' | ||||
|                     'By default, the current directory is served.') | ||||
|     parser.add_argument('-p', '--port', type=int, default=4433, | ||||
|                         help='port to listen on (default: %(default)s)') | ||||
|     parser.add_argument('-q', '--quiet', dest='verbose', default=True, | ||||
|                         action='store_false', help='be less verbose') | ||||
|     parser.add_argument('-s', '--stats', dest='use_stats_handler', default=False, | ||||
|                         action='store_true', help='always return stats page') | ||||
|     parser.add_argument('--curve-name', dest='curve_name', type=str, | ||||
|                         action='store', | ||||
|                         help='curve name for EC-based Diffie-Hellman') | ||||
|     parser.add_argument('--dh', dest='dh_file', type=str, action='store', | ||||
|                         help='PEM file containing DH parameters') | ||||
|     args = parser.parse_args() | ||||
|  | ||||
|     support.verbose = args.verbose | ||||
|     if args.use_stats_handler: | ||||
|         handler_class = StatsRequestHandler | ||||
|     else: | ||||
|         handler_class = RootedHTTPRequestHandler | ||||
|         if utils.PY2: | ||||
|             handler_class.root = os.getcwdu() | ||||
|         else: | ||||
|             handler_class.root = os.getcwd() | ||||
|     context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) | ||||
|     context.load_cert_chain(CERTFILE) | ||||
|     if args.curve_name: | ||||
|         context.set_ecdh_curve(args.curve_name) | ||||
|     if args.dh_file: | ||||
|         context.load_dh_params(args.dh_file) | ||||
|  | ||||
|     server = HTTPSServer(("", args.port), handler_class, context) | ||||
|     if args.verbose: | ||||
|         print("Listening on https://localhost:{0.port}".format(args)) | ||||
|     server.serve_forever(0.1) | ||||
							
								
								
									
										2016
									
								
								venv/lib/python3.12/site-packages/future/backports/test/support.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2016
									
								
								venv/lib/python3.12/site-packages/future/backports/test/support.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,38 @@ | ||||
| """ | ||||
| For Python < 2.7.2. total_ordering in versions prior to 2.7.2 is buggy. | ||||
| See http://bugs.python.org/issue10042 for details. For these versions use | ||||
| code borrowed from Python 2.7.3. | ||||
|  | ||||
| From django.utils. | ||||
| """ | ||||
|  | ||||
| import sys | ||||
| if sys.version_info >= (2, 7, 2): | ||||
|     from functools import total_ordering | ||||
| else: | ||||
|     def total_ordering(cls): | ||||
|         """Class decorator that fills in missing ordering methods""" | ||||
|         convert = { | ||||
|             '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)), | ||||
|                        ('__le__', lambda self, other: self < other or self == other), | ||||
|                        ('__ge__', lambda self, other: not self < other)], | ||||
|             '__le__': [('__ge__', lambda self, other: not self <= other or self == other), | ||||
|                        ('__lt__', lambda self, other: self <= other and not self == other), | ||||
|                        ('__gt__', lambda self, other: not self <= other)], | ||||
|             '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)), | ||||
|                        ('__ge__', lambda self, other: self > other or self == other), | ||||
|                        ('__le__', lambda self, other: not self > other)], | ||||
|             '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other), | ||||
|                        ('__gt__', lambda self, other: self >= other and not self == other), | ||||
|                        ('__lt__', lambda self, other: not self >= other)] | ||||
|         } | ||||
|         roots = set(dir(cls)) & set(convert) | ||||
|         if not roots: | ||||
|             raise ValueError('must define at least one ordering operation: < > <= >=') | ||||
|         root = max(roots)       # prefer __lt__ to __le__ to __gt__ to __ge__ | ||||
|         for opname, opfunc in convert[root]: | ||||
|             if opname not in roots: | ||||
|                 opfunc.__name__ = opname | ||||
|                 opfunc.__doc__ = getattr(int, opname).__doc__ | ||||
|                 setattr(cls, opname, opfunc) | ||||
|         return cls | ||||
| @ -0,0 +1,75 @@ | ||||
| """Exception classes raised by urllib. | ||||
|  | ||||
| The base exception class is URLError, which inherits from IOError.  It | ||||
| doesn't define any behavior of its own, but is the base class for all | ||||
| exceptions defined in this package. | ||||
|  | ||||
| HTTPError is an exception class that is also a valid HTTP response | ||||
| instance.  It behaves this way because HTTP protocol errors are valid | ||||
| responses, with a status code, headers, and a body.  In some contexts, | ||||
| an application may want to handle an exception like a regular | ||||
| response. | ||||
| """ | ||||
| from __future__ import absolute_import, division, unicode_literals | ||||
| from future import standard_library | ||||
|  | ||||
| from future.backports.urllib import response as urllib_response | ||||
|  | ||||
|  | ||||
| __all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] | ||||
|  | ||||
|  | ||||
| # do these error classes make sense? | ||||
| # make sure all of the IOError stuff is overridden.  we just want to be | ||||
| # subtypes. | ||||
|  | ||||
| class URLError(IOError): | ||||
|     # URLError is a sub-type of IOError, but it doesn't share any of | ||||
|     # the implementation.  need to override __init__ and __str__. | ||||
|     # It sets self.args for compatibility with other EnvironmentError | ||||
|     # subclasses, but args doesn't have the typical format with errno in | ||||
|     # slot 0 and strerror in slot 1.  This may be better than nothing. | ||||
|     def __init__(self, reason, filename=None): | ||||
|         self.args = reason, | ||||
|         self.reason = reason | ||||
|         if filename is not None: | ||||
|             self.filename = filename | ||||
|  | ||||
|     def __str__(self): | ||||
|         return '<urlopen error %s>' % self.reason | ||||
|  | ||||
| class HTTPError(URLError, urllib_response.addinfourl): | ||||
|     """Raised when HTTP error occurs, but also acts like non-error return""" | ||||
|     __super_init = urllib_response.addinfourl.__init__ | ||||
|  | ||||
|     def __init__(self, url, code, msg, hdrs, fp): | ||||
|         self.code = code | ||||
|         self.msg = msg | ||||
|         self.hdrs = hdrs | ||||
|         self.fp = fp | ||||
|         self.filename = url | ||||
|         # The addinfourl classes depend on fp being a valid file | ||||
|         # object.  In some cases, the HTTPError may not have a valid | ||||
|         # file object.  If this happens, the simplest workaround is to | ||||
|         # not initialize the base classes. | ||||
|         if fp is not None: | ||||
|             self.__super_init(fp, hdrs, url, code) | ||||
|  | ||||
|     def __str__(self): | ||||
|         return 'HTTP Error %s: %s' % (self.code, self.msg) | ||||
|  | ||||
|     # since URLError specifies a .reason attribute, HTTPError should also | ||||
|     #  provide this attribute. See issue13211 for discussion. | ||||
|     @property | ||||
|     def reason(self): | ||||
|         return self.msg | ||||
|  | ||||
|     def info(self): | ||||
|         return self.hdrs | ||||
|  | ||||
|  | ||||
| # exception raised when downloaded size does not match content-length | ||||
| class ContentTooShortError(URLError): | ||||
|     def __init__(self, message, content): | ||||
|         URLError.__init__(self, message) | ||||
|         self.content = content | ||||
| @ -0,0 +1,991 @@ | ||||
| """ | ||||
| Ported using Python-Future from the Python 3.3 standard library. | ||||
|  | ||||
| Parse (absolute and relative) URLs. | ||||
|  | ||||
| urlparse module is based upon the following RFC specifications. | ||||
|  | ||||
| RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding | ||||
| and L.  Masinter, January 2005. | ||||
|  | ||||
| RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter | ||||
| and L.Masinter, December 1999. | ||||
|  | ||||
| RFC 2396:  "Uniform Resource Identifiers (URI)": Generic Syntax by T. | ||||
| Berners-Lee, R. Fielding, and L. Masinter, August 1998. | ||||
|  | ||||
| RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. | ||||
|  | ||||
| RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June | ||||
| 1995. | ||||
|  | ||||
| RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. | ||||
| McCahill, December 1994 | ||||
|  | ||||
| RFC 3986 is considered the current standard and any future changes to | ||||
| urlparse module should conform with it.  The urlparse module is | ||||
| currently not entirely compliant with this RFC due to defacto | ||||
| scenarios for parsing, and for backward compatibility purposes, some | ||||
| parsing quirks from older RFCs are retained. The testcases in | ||||
| test_urlparse.py provides a good indicator of parsing behavior. | ||||
| """ | ||||
| from __future__ import absolute_import, division, unicode_literals | ||||
| from future.builtins import bytes, chr, dict, int, range, str | ||||
| from future.utils import raise_with_traceback | ||||
|  | ||||
| import re | ||||
| import sys | ||||
| import collections | ||||
|  | ||||
| __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", | ||||
|            "urlsplit", "urlunsplit", "urlencode", "parse_qs", | ||||
|            "parse_qsl", "quote", "quote_plus", "quote_from_bytes", | ||||
|            "unquote", "unquote_plus", "unquote_to_bytes"] | ||||
|  | ||||
| # A classification of schemes ('' means apply by default) | ||||
| uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', | ||||
|                  'wais', 'file', 'https', 'shttp', 'mms', | ||||
|                  'prospero', 'rtsp', 'rtspu', '', 'sftp', | ||||
|                  'svn', 'svn+ssh'] | ||||
| uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', | ||||
|                'imap', 'wais', 'file', 'mms', 'https', 'shttp', | ||||
|                'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', | ||||
|                'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] | ||||
| uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', | ||||
|                'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', | ||||
|                'mms', '', 'sftp', 'tel'] | ||||
|  | ||||
| # These are not actually used anymore, but should stay for backwards | ||||
| # compatibility.  (They are undocumented, but have a public-looking name.) | ||||
| non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', | ||||
|                     'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] | ||||
| uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', | ||||
|               'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] | ||||
| uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', | ||||
|                  'nntp', 'wais', 'https', 'shttp', 'snews', | ||||
|                  'file', 'prospero', ''] | ||||
|  | ||||
| # Characters valid in scheme names | ||||
| scheme_chars = ('abcdefghijklmnopqrstuvwxyz' | ||||
|                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | ||||
|                 '0123456789' | ||||
|                 '+-.') | ||||
|  | ||||
| # XXX: Consider replacing with functools.lru_cache | ||||
| MAX_CACHE_SIZE = 20 | ||||
| _parse_cache = {} | ||||
|  | ||||
| def clear_cache(): | ||||
|     """Clear the parse cache and the quoters cache.""" | ||||
|     _parse_cache.clear() | ||||
|     _safe_quoters.clear() | ||||
|  | ||||
|  | ||||
| # Helpers for bytes handling | ||||
| # For 3.2, we deliberately require applications that | ||||
| # handle improperly quoted URLs to do their own | ||||
| # decoding and encoding. If valid use cases are | ||||
| # presented, we may relax this by using latin-1 | ||||
| # decoding internally for 3.3 | ||||
| _implicit_encoding = 'ascii' | ||||
| _implicit_errors = 'strict' | ||||
|  | ||||
| def _noop(obj): | ||||
|     return obj | ||||
|  | ||||
| def _encode_result(obj, encoding=_implicit_encoding, | ||||
|                         errors=_implicit_errors): | ||||
|     return obj.encode(encoding, errors) | ||||
|  | ||||
| def _decode_args(args, encoding=_implicit_encoding, | ||||
|                        errors=_implicit_errors): | ||||
|     return tuple(x.decode(encoding, errors) if x else '' for x in args) | ||||
|  | ||||
| def _coerce_args(*args): | ||||
|     # Invokes decode if necessary to create str args | ||||
|     # and returns the coerced inputs along with | ||||
|     # an appropriate result coercion function | ||||
|     #   - noop for str inputs | ||||
|     #   - encoding function otherwise | ||||
|     str_input = isinstance(args[0], str) | ||||
|     for arg in args[1:]: | ||||
|         # We special-case the empty string to support the | ||||
|         # "scheme=''" default argument to some functions | ||||
|         if arg and isinstance(arg, str) != str_input: | ||||
|             raise TypeError("Cannot mix str and non-str arguments") | ||||
|     if str_input: | ||||
|         return args + (_noop,) | ||||
|     return _decode_args(args) + (_encode_result,) | ||||
|  | ||||
| # Result objects are more helpful than simple tuples | ||||
| class _ResultMixinStr(object): | ||||
|     """Standard approach to encoding parsed results from str to bytes""" | ||||
|     __slots__ = () | ||||
|  | ||||
|     def encode(self, encoding='ascii', errors='strict'): | ||||
|         return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) | ||||
|  | ||||
|  | ||||
| class _ResultMixinBytes(object): | ||||
|     """Standard approach to decoding parsed results from bytes to str""" | ||||
|     __slots__ = () | ||||
|  | ||||
|     def decode(self, encoding='ascii', errors='strict'): | ||||
|         return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) | ||||
|  | ||||
|  | ||||
| class _NetlocResultMixinBase(object): | ||||
|     """Shared methods for the parsed result objects containing a netloc element""" | ||||
|     __slots__ = () | ||||
|  | ||||
|     @property | ||||
|     def username(self): | ||||
|         return self._userinfo[0] | ||||
|  | ||||
|     @property | ||||
|     def password(self): | ||||
|         return self._userinfo[1] | ||||
|  | ||||
|     @property | ||||
|     def hostname(self): | ||||
|         hostname = self._hostinfo[0] | ||||
|         if not hostname: | ||||
|             hostname = None | ||||
|         elif hostname is not None: | ||||
|             hostname = hostname.lower() | ||||
|         return hostname | ||||
|  | ||||
|     @property | ||||
|     def port(self): | ||||
|         port = self._hostinfo[1] | ||||
|         if port is not None: | ||||
|             port = int(port, 10) | ||||
|             # Return None on an illegal port | ||||
|             if not ( 0 <= port <= 65535): | ||||
|                 return None | ||||
|         return port | ||||
|  | ||||
|  | ||||
| class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): | ||||
|     __slots__ = () | ||||
|  | ||||
|     @property | ||||
|     def _userinfo(self): | ||||
|         netloc = self.netloc | ||||
|         userinfo, have_info, hostinfo = netloc.rpartition('@') | ||||
|         if have_info: | ||||
|             username, have_password, password = userinfo.partition(':') | ||||
|             if not have_password: | ||||
|                 password = None | ||||
|         else: | ||||
|             username = password = None | ||||
|         return username, password | ||||
|  | ||||
|     @property | ||||
|     def _hostinfo(self): | ||||
|         netloc = self.netloc | ||||
|         _, _, hostinfo = netloc.rpartition('@') | ||||
|         _, have_open_br, bracketed = hostinfo.partition('[') | ||||
|         if have_open_br: | ||||
|             hostname, _, port = bracketed.partition(']') | ||||
|             _, have_port, port = port.partition(':') | ||||
|         else: | ||||
|             hostname, have_port, port = hostinfo.partition(':') | ||||
|         if not have_port: | ||||
|             port = None | ||||
|         return hostname, port | ||||
|  | ||||
|  | ||||
| class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): | ||||
|     __slots__ = () | ||||
|  | ||||
|     @property | ||||
|     def _userinfo(self): | ||||
|         netloc = self.netloc | ||||
|         userinfo, have_info, hostinfo = netloc.rpartition(b'@') | ||||
|         if have_info: | ||||
|             username, have_password, password = userinfo.partition(b':') | ||||
|             if not have_password: | ||||
|                 password = None | ||||
|         else: | ||||
|             username = password = None | ||||
|         return username, password | ||||
|  | ||||
|     @property | ||||
|     def _hostinfo(self): | ||||
|         netloc = self.netloc | ||||
|         _, _, hostinfo = netloc.rpartition(b'@') | ||||
|         _, have_open_br, bracketed = hostinfo.partition(b'[') | ||||
|         if have_open_br: | ||||
|             hostname, _, port = bracketed.partition(b']') | ||||
|             _, have_port, port = port.partition(b':') | ||||
|         else: | ||||
|             hostname, have_port, port = hostinfo.partition(b':') | ||||
|         if not have_port: | ||||
|             port = None | ||||
|         return hostname, port | ||||
|  | ||||
|  | ||||
| from collections import namedtuple | ||||
|  | ||||
| _DefragResultBase = namedtuple('DefragResult', 'url fragment') | ||||
| _SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') | ||||
| _ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') | ||||
|  | ||||
| # For backwards compatibility, alias _NetlocResultMixinStr | ||||
| # ResultBase is no longer part of the documented API, but it is | ||||
| # retained since deprecating it isn't worth the hassle | ||||
| ResultBase = _NetlocResultMixinStr | ||||
|  | ||||
| # Structured result objects for string data | ||||
| class DefragResult(_DefragResultBase, _ResultMixinStr): | ||||
|     __slots__ = () | ||||
|     def geturl(self): | ||||
|         if self.fragment: | ||||
|             return self.url + '#' + self.fragment | ||||
|         else: | ||||
|             return self.url | ||||
|  | ||||
| class SplitResult(_SplitResultBase, _NetlocResultMixinStr): | ||||
|     __slots__ = () | ||||
|     def geturl(self): | ||||
|         return urlunsplit(self) | ||||
|  | ||||
| class ParseResult(_ParseResultBase, _NetlocResultMixinStr): | ||||
|     __slots__ = () | ||||
|     def geturl(self): | ||||
|         return urlunparse(self) | ||||
|  | ||||
| # Structured result objects for bytes data | ||||
| class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): | ||||
|     __slots__ = () | ||||
|     def geturl(self): | ||||
|         if self.fragment: | ||||
|             return self.url + b'#' + self.fragment | ||||
|         else: | ||||
|             return self.url | ||||
|  | ||||
| class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): | ||||
|     __slots__ = () | ||||
|     def geturl(self): | ||||
|         return urlunsplit(self) | ||||
|  | ||||
| class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): | ||||
|     __slots__ = () | ||||
|     def geturl(self): | ||||
|         return urlunparse(self) | ||||
|  | ||||
| # Set up the encode/decode result pairs | ||||
| def _fix_result_transcoding(): | ||||
|     _result_pairs = ( | ||||
|         (DefragResult, DefragResultBytes), | ||||
|         (SplitResult, SplitResultBytes), | ||||
|         (ParseResult, ParseResultBytes), | ||||
|     ) | ||||
|     for _decoded, _encoded in _result_pairs: | ||||
|         _decoded._encoded_counterpart = _encoded | ||||
|         _encoded._decoded_counterpart = _decoded | ||||
|  | ||||
| _fix_result_transcoding() | ||||
| del _fix_result_transcoding | ||||
|  | ||||
| def urlparse(url, scheme='', allow_fragments=True): | ||||
|     """Parse a URL into 6 components: | ||||
|     <scheme>://<netloc>/<path>;<params>?<query>#<fragment> | ||||
|     Return a 6-tuple: (scheme, netloc, path, params, query, fragment). | ||||
|     Note that we don't break the components up in smaller bits | ||||
|     (e.g. netloc is a single string) and we don't expand % escapes.""" | ||||
|     url, scheme, _coerce_result = _coerce_args(url, scheme) | ||||
|     splitresult = urlsplit(url, scheme, allow_fragments) | ||||
|     scheme, netloc, url, query, fragment = splitresult | ||||
|     if scheme in uses_params and ';' in url: | ||||
|         url, params = _splitparams(url) | ||||
|     else: | ||||
|         params = '' | ||||
|     result = ParseResult(scheme, netloc, url, params, query, fragment) | ||||
|     return _coerce_result(result) | ||||
|  | ||||
| def _splitparams(url): | ||||
|     if '/'  in url: | ||||
|         i = url.find(';', url.rfind('/')) | ||||
|         if i < 0: | ||||
|             return url, '' | ||||
|     else: | ||||
|         i = url.find(';') | ||||
|     return url[:i], url[i+1:] | ||||
|  | ||||
| def _splitnetloc(url, start=0): | ||||
|     delim = len(url)   # position of end of domain part of url, default is end | ||||
|     for c in '/?#':    # look for delimiters; the order is NOT important | ||||
|         wdelim = url.find(c, start)        # find first of this delim | ||||
|         if wdelim >= 0:                    # if found | ||||
|             delim = min(delim, wdelim)     # use earliest delim position | ||||
|     return url[start:delim], url[delim:]   # return (domain, rest) | ||||
|  | ||||
| def urlsplit(url, scheme='', allow_fragments=True): | ||||
|     """Parse a URL into 5 components: | ||||
|     <scheme>://<netloc>/<path>?<query>#<fragment> | ||||
|     Return a 5-tuple: (scheme, netloc, path, query, fragment). | ||||
|     Note that we don't break the components up in smaller bits | ||||
|     (e.g. netloc is a single string) and we don't expand % escapes.""" | ||||
|     url, scheme, _coerce_result = _coerce_args(url, scheme) | ||||
|     allow_fragments = bool(allow_fragments) | ||||
|     key = url, scheme, allow_fragments, type(url), type(scheme) | ||||
|     cached = _parse_cache.get(key, None) | ||||
|     if cached: | ||||
|         return _coerce_result(cached) | ||||
|     if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth | ||||
|         clear_cache() | ||||
|     netloc = query = fragment = '' | ||||
|     i = url.find(':') | ||||
|     if i > 0: | ||||
|         if url[:i] == 'http': # optimize the common case | ||||
|             scheme = url[:i].lower() | ||||
|             url = url[i+1:] | ||||
|             if url[:2] == '//': | ||||
|                 netloc, url = _splitnetloc(url, 2) | ||||
|                 if (('[' in netloc and ']' not in netloc) or | ||||
|                         (']' in netloc and '[' not in netloc)): | ||||
|                     raise ValueError("Invalid IPv6 URL") | ||||
|             if allow_fragments and '#' in url: | ||||
|                 url, fragment = url.split('#', 1) | ||||
|             if '?' in url: | ||||
|                 url, query = url.split('?', 1) | ||||
|             v = SplitResult(scheme, netloc, url, query, fragment) | ||||
|             _parse_cache[key] = v | ||||
|             return _coerce_result(v) | ||||
|         for c in url[:i]: | ||||
|             if c not in scheme_chars: | ||||
|                 break | ||||
|         else: | ||||
|             # make sure "url" is not actually a port number (in which case | ||||
|             # "scheme" is really part of the path) | ||||
|             rest = url[i+1:] | ||||
|             if not rest or any(c not in '0123456789' for c in rest): | ||||
|                 # not a port number | ||||
|                 scheme, url = url[:i].lower(), rest | ||||
|  | ||||
|     if url[:2] == '//': | ||||
|         netloc, url = _splitnetloc(url, 2) | ||||
|         if (('[' in netloc and ']' not in netloc) or | ||||
|                 (']' in netloc and '[' not in netloc)): | ||||
|             raise ValueError("Invalid IPv6 URL") | ||||
|     if allow_fragments and '#' in url: | ||||
|         url, fragment = url.split('#', 1) | ||||
|     if '?' in url: | ||||
|         url, query = url.split('?', 1) | ||||
|     v = SplitResult(scheme, netloc, url, query, fragment) | ||||
|     _parse_cache[key] = v | ||||
|     return _coerce_result(v) | ||||
|  | ||||
| def urlunparse(components): | ||||
|     """Put a parsed URL back together again.  This may result in a | ||||
|     slightly different, but equivalent URL, if the URL that was parsed | ||||
|     originally had redundant delimiters, e.g. a ? with an empty query | ||||
|     (the draft states that these are equivalent).""" | ||||
|     scheme, netloc, url, params, query, fragment, _coerce_result = ( | ||||
|                                                   _coerce_args(*components)) | ||||
|     if params: | ||||
|         url = "%s;%s" % (url, params) | ||||
|     return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) | ||||
|  | ||||
| def urlunsplit(components): | ||||
|     """Combine the elements of a tuple as returned by urlsplit() into a | ||||
|     complete URL as a string. The data argument can be any five-item iterable. | ||||
|     This may result in a slightly different, but equivalent URL, if the URL that | ||||
|     was parsed originally had unnecessary delimiters (for example, a ? with an | ||||
|     empty query; the RFC states that these are equivalent).""" | ||||
|     scheme, netloc, url, query, fragment, _coerce_result = ( | ||||
|                                           _coerce_args(*components)) | ||||
|     if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): | ||||
|         if url and url[:1] != '/': url = '/' + url | ||||
|         url = '//' + (netloc or '') + url | ||||
|     if scheme: | ||||
|         url = scheme + ':' + url | ||||
|     if query: | ||||
|         url = url + '?' + query | ||||
|     if fragment: | ||||
|         url = url + '#' + fragment | ||||
|     return _coerce_result(url) | ||||
|  | ||||
| def urljoin(base, url, allow_fragments=True): | ||||
|     """Join a base URL and a possibly relative URL to form an absolute | ||||
|     interpretation of the latter.""" | ||||
|     if not base: | ||||
|         return url | ||||
|     if not url: | ||||
|         return base | ||||
|     base, url, _coerce_result = _coerce_args(base, url) | ||||
|     bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ | ||||
|             urlparse(base, '', allow_fragments) | ||||
|     scheme, netloc, path, params, query, fragment = \ | ||||
|             urlparse(url, bscheme, allow_fragments) | ||||
|     if scheme != bscheme or scheme not in uses_relative: | ||||
|         return _coerce_result(url) | ||||
|     if scheme in uses_netloc: | ||||
|         if netloc: | ||||
|             return _coerce_result(urlunparse((scheme, netloc, path, | ||||
|                                               params, query, fragment))) | ||||
|         netloc = bnetloc | ||||
|     if path[:1] == '/': | ||||
|         return _coerce_result(urlunparse((scheme, netloc, path, | ||||
|                                           params, query, fragment))) | ||||
|     if not path and not params: | ||||
|         path = bpath | ||||
|         params = bparams | ||||
|         if not query: | ||||
|             query = bquery | ||||
|         return _coerce_result(urlunparse((scheme, netloc, path, | ||||
|                                           params, query, fragment))) | ||||
|     segments = bpath.split('/')[:-1] + path.split('/') | ||||
|     # XXX The stuff below is bogus in various ways... | ||||
|     if segments[-1] == '.': | ||||
|         segments[-1] = '' | ||||
|     while '.' in segments: | ||||
|         segments.remove('.') | ||||
|     while 1: | ||||
|         i = 1 | ||||
|         n = len(segments) - 1 | ||||
|         while i < n: | ||||
|             if (segments[i] == '..' | ||||
|                 and segments[i-1] not in ('', '..')): | ||||
|                 del segments[i-1:i+1] | ||||
|                 break | ||||
|             i = i+1 | ||||
|         else: | ||||
|             break | ||||
|     if segments == ['', '..']: | ||||
|         segments[-1] = '' | ||||
|     elif len(segments) >= 2 and segments[-1] == '..': | ||||
|         segments[-2:] = [''] | ||||
|     return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), | ||||
|                                       params, query, fragment))) | ||||
|  | ||||
| def urldefrag(url): | ||||
|     """Removes any existing fragment from URL. | ||||
|  | ||||
|     Returns a tuple of the defragmented URL and the fragment.  If | ||||
|     the URL contained no fragments, the second element is the | ||||
|     empty string. | ||||
|     """ | ||||
|     url, _coerce_result = _coerce_args(url) | ||||
|     if '#' in url: | ||||
|         s, n, p, a, q, frag = urlparse(url) | ||||
|         defrag = urlunparse((s, n, p, a, q, '')) | ||||
|     else: | ||||
|         frag = '' | ||||
|         defrag = url | ||||
|     return _coerce_result(DefragResult(defrag, frag)) | ||||
|  | ||||
| _hexdig = '0123456789ABCDEFabcdef' | ||||
| _hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) | ||||
|                   for a in _hexdig for b in _hexdig) | ||||
|  | ||||
| def unquote_to_bytes(string): | ||||
|     """unquote_to_bytes('abc%20def') -> b'abc def'.""" | ||||
|     # Note: strings are encoded as UTF-8. This is only an issue if it contains | ||||
|     # unescaped non-ASCII characters, which URIs should not. | ||||
|     if not string: | ||||
|         # Is it a string-like object? | ||||
|         string.split | ||||
|         return bytes(b'') | ||||
|     if isinstance(string, str): | ||||
|         string = string.encode('utf-8') | ||||
|     ### For Python-Future: | ||||
|     # It is already a byte-string object, but force it to be newbytes here on | ||||
|     # Py2: | ||||
|     string = bytes(string) | ||||
|     ### | ||||
|     bits = string.split(b'%') | ||||
|     if len(bits) == 1: | ||||
|         return string | ||||
|     res = [bits[0]] | ||||
|     append = res.append | ||||
|     for item in bits[1:]: | ||||
|         try: | ||||
|             append(_hextobyte[item[:2]]) | ||||
|             append(item[2:]) | ||||
|         except KeyError: | ||||
|             append(b'%') | ||||
|             append(item) | ||||
|     return bytes(b'').join(res) | ||||
|  | ||||
| _asciire = re.compile('([\x00-\x7f]+)') | ||||
|  | ||||
| def unquote(string, encoding='utf-8', errors='replace'): | ||||
|     """Replace %xx escapes by their single-character equivalent. The optional | ||||
|     encoding and errors parameters specify how to decode percent-encoded | ||||
|     sequences into Unicode characters, as accepted by the bytes.decode() | ||||
|     method. | ||||
|     By default, percent-encoded sequences are decoded with UTF-8, and invalid | ||||
|     sequences are replaced by a placeholder character. | ||||
|  | ||||
|     unquote('abc%20def') -> 'abc def'. | ||||
|     """ | ||||
|     if '%' not in string: | ||||
|         string.split | ||||
|         return string | ||||
|     if encoding is None: | ||||
|         encoding = 'utf-8' | ||||
|     if errors is None: | ||||
|         errors = 'replace' | ||||
|     bits = _asciire.split(string) | ||||
|     res = [bits[0]] | ||||
|     append = res.append | ||||
|     for i in range(1, len(bits), 2): | ||||
|         append(unquote_to_bytes(bits[i]).decode(encoding, errors)) | ||||
|         append(bits[i + 1]) | ||||
|     return ''.join(res) | ||||
|  | ||||
| def parse_qs(qs, keep_blank_values=False, strict_parsing=False, | ||||
|              encoding='utf-8', errors='replace'): | ||||
|     """Parse a query given as a string argument. | ||||
|  | ||||
|         Arguments: | ||||
|  | ||||
|         qs: percent-encoded query string to be parsed | ||||
|  | ||||
|         keep_blank_values: flag indicating whether blank values in | ||||
|             percent-encoded queries should be treated as blank strings. | ||||
|             A true value indicates that blanks should be retained as | ||||
|             blank strings.  The default false value indicates that | ||||
|             blank values are to be ignored and treated as if they were | ||||
|             not included. | ||||
|  | ||||
|         strict_parsing: flag indicating what to do with parsing errors. | ||||
|             If false (the default), errors are silently ignored. | ||||
|             If true, errors raise a ValueError exception. | ||||
|  | ||||
|         encoding and errors: specify how to decode percent-encoded sequences | ||||
|             into Unicode characters, as accepted by the bytes.decode() method. | ||||
|     """ | ||||
|     parsed_result = {} | ||||
|     pairs = parse_qsl(qs, keep_blank_values, strict_parsing, | ||||
|                       encoding=encoding, errors=errors) | ||||
|     for name, value in pairs: | ||||
|         if name in parsed_result: | ||||
|             parsed_result[name].append(value) | ||||
|         else: | ||||
|             parsed_result[name] = [value] | ||||
|     return parsed_result | ||||
|  | ||||
| def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, | ||||
|               encoding='utf-8', errors='replace'): | ||||
|     """Parse a query given as a string argument. | ||||
|  | ||||
|     Arguments: | ||||
|  | ||||
|     qs: percent-encoded query string to be parsed | ||||
|  | ||||
|     keep_blank_values: flag indicating whether blank values in | ||||
|         percent-encoded queries should be treated as blank strings.  A | ||||
|         true value indicates that blanks should be retained as blank | ||||
|         strings.  The default false value indicates that blank values | ||||
|         are to be ignored and treated as if they were  not included. | ||||
|  | ||||
|     strict_parsing: flag indicating what to do with parsing errors. If | ||||
|         false (the default), errors are silently ignored. If true, | ||||
|         errors raise a ValueError exception. | ||||
|  | ||||
|     encoding and errors: specify how to decode percent-encoded sequences | ||||
|         into Unicode characters, as accepted by the bytes.decode() method. | ||||
|  | ||||
|     Returns a list, as G-d intended. | ||||
|     """ | ||||
|     qs, _coerce_result = _coerce_args(qs) | ||||
|     pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | ||||
|     r = [] | ||||
|     for name_value in pairs: | ||||
|         if not name_value and not strict_parsing: | ||||
|             continue | ||||
|         nv = name_value.split('=', 1) | ||||
|         if len(nv) != 2: | ||||
|             if strict_parsing: | ||||
|                 raise ValueError("bad query field: %r" % (name_value,)) | ||||
|             # Handle case of a control-name with no equal sign | ||||
|             if keep_blank_values: | ||||
|                 nv.append('') | ||||
|             else: | ||||
|                 continue | ||||
|         if len(nv[1]) or keep_blank_values: | ||||
|             name = nv[0].replace('+', ' ') | ||||
|             name = unquote(name, encoding=encoding, errors=errors) | ||||
|             name = _coerce_result(name) | ||||
|             value = nv[1].replace('+', ' ') | ||||
|             value = unquote(value, encoding=encoding, errors=errors) | ||||
|             value = _coerce_result(value) | ||||
|             r.append((name, value)) | ||||
|     return r | ||||
|  | ||||
| def unquote_plus(string, encoding='utf-8', errors='replace'): | ||||
|     """Like unquote(), but also replace plus signs by spaces, as required for | ||||
|     unquoting HTML form values. | ||||
|  | ||||
|     unquote_plus('%7e/abc+def') -> '~/abc def' | ||||
|     """ | ||||
|     string = string.replace('+', ' ') | ||||
|     return unquote(string, encoding, errors) | ||||
|  | ||||
| _ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | ||||
|                                b'abcdefghijklmnopqrstuvwxyz' | ||||
|                                b'0123456789' | ||||
|                                b'_.-')) | ||||
| _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) | ||||
| _safe_quoters = {} | ||||
|  | ||||
| class Quoter(collections.defaultdict): | ||||
|     """A mapping from bytes (in range(0,256)) to strings. | ||||
|  | ||||
|     String values are percent-encoded byte values, unless the key < 128, and | ||||
|     in the "safe" set (either the specified safe set, or default set). | ||||
|     """ | ||||
|     # Keeps a cache internally, using defaultdict, for efficiency (lookups | ||||
|     # of cached keys don't call Python code at all). | ||||
|     def __init__(self, safe): | ||||
|         """safe: bytes object.""" | ||||
|         self.safe = _ALWAYS_SAFE.union(bytes(safe)) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         # Without this, will just display as a defaultdict | ||||
|         return "<Quoter %r>" % dict(self) | ||||
|  | ||||
|     def __missing__(self, b): | ||||
|         # Handle a cache miss. Store quoted string in cache and return. | ||||
|         res = chr(b) if b in self.safe else '%{0:02X}'.format(b) | ||||
|         self[b] = res | ||||
|         return res | ||||
|  | ||||
| def quote(string, safe='/', encoding=None, errors=None): | ||||
|     """quote('abc def') -> 'abc%20def' | ||||
|  | ||||
|     Each part of a URL, e.g. the path info, the query, etc., has a | ||||
|     different set of reserved characters that must be quoted. | ||||
|  | ||||
|     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists | ||||
|     the following reserved characters. | ||||
|  | ||||
|     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | ||||
|                   "$" | "," | ||||
|  | ||||
|     Each of these characters is reserved in some component of a URL, | ||||
|     but not necessarily in all of them. | ||||
|  | ||||
|     By default, the quote function is intended for quoting the path | ||||
|     section of a URL.  Thus, it will not encode '/'.  This character | ||||
|     is reserved, but in typical usage the quote function is being | ||||
|     called on a path where the existing slash characters are used as | ||||
|     reserved characters. | ||||
|  | ||||
|     string and safe may be either str or bytes objects. encoding must | ||||
|     not be specified if string is a str. | ||||
|  | ||||
|     The optional encoding and errors parameters specify how to deal with | ||||
|     non-ASCII characters, as accepted by the str.encode method. | ||||
|     By default, encoding='utf-8' (characters are encoded with UTF-8), and | ||||
|     errors='strict' (unsupported characters raise a UnicodeEncodeError). | ||||
|     """ | ||||
|     if isinstance(string, str): | ||||
|         if not string: | ||||
|             return string | ||||
|         if encoding is None: | ||||
|             encoding = 'utf-8' | ||||
|         if errors is None: | ||||
|             errors = 'strict' | ||||
|         string = string.encode(encoding, errors) | ||||
|     else: | ||||
|         if encoding is not None: | ||||
|             raise TypeError("quote() doesn't support 'encoding' for bytes") | ||||
|         if errors is not None: | ||||
|             raise TypeError("quote() doesn't support 'errors' for bytes") | ||||
|     return quote_from_bytes(string, safe) | ||||
|  | ||||
| def quote_plus(string, safe='', encoding=None, errors=None): | ||||
|     """Like quote(), but also replace ' ' with '+', as required for quoting | ||||
|     HTML form values. Plus signs in the original string are escaped unless | ||||
|     they are included in safe. It also does not have safe default to '/'. | ||||
|     """ | ||||
|     # Check if ' ' in string, where string may either be a str or bytes.  If | ||||
|     # there are no spaces, the regular quote will produce the right answer. | ||||
|     if ((isinstance(string, str) and ' ' not in string) or | ||||
|         (isinstance(string, bytes) and b' ' not in string)): | ||||
|         return quote(string, safe, encoding, errors) | ||||
|     if isinstance(safe, str): | ||||
|         space = str(' ') | ||||
|     else: | ||||
|         space = bytes(b' ') | ||||
|     string = quote(string, safe + space, encoding, errors) | ||||
|     return string.replace(' ', '+') | ||||
|  | ||||
| def quote_from_bytes(bs, safe='/'): | ||||
|     """Like quote(), but accepts a bytes object rather than a str, and does | ||||
|     not perform string-to-bytes encoding.  It always returns an ASCII string. | ||||
|     quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' | ||||
|     """ | ||||
|     if not isinstance(bs, (bytes, bytearray)): | ||||
|         raise TypeError("quote_from_bytes() expected bytes") | ||||
|     if not bs: | ||||
|         return str('') | ||||
|     ### For Python-Future: | ||||
|     bs = bytes(bs) | ||||
|     ### | ||||
|     if isinstance(safe, str): | ||||
|         # Normalize 'safe' by converting to bytes and removing non-ASCII chars | ||||
|         safe = str(safe).encode('ascii', 'ignore') | ||||
|     else: | ||||
|         ### For Python-Future: | ||||
|         safe = bytes(safe) | ||||
|         ### | ||||
|         safe = bytes([c for c in safe if c < 128]) | ||||
|     if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): | ||||
|         return bs.decode() | ||||
|     try: | ||||
|         quoter = _safe_quoters[safe] | ||||
|     except KeyError: | ||||
|         _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ | ||||
|     return str('').join([quoter(char) for char in bs]) | ||||
|  | ||||
| def urlencode(query, doseq=False, safe='', encoding=None, errors=None): | ||||
|     """Encode a sequence of two-element tuples or dictionary into a URL query string. | ||||
|  | ||||
|     If any values in the query arg are sequences and doseq is true, each | ||||
|     sequence element is converted to a separate parameter. | ||||
|  | ||||
|     If the query arg is a sequence of two-element tuples, the order of the | ||||
|     parameters in the output will match the order of parameters in the | ||||
|     input. | ||||
|  | ||||
|     The query arg may be either a string or a bytes type. When query arg is a | ||||
|     string, the safe, encoding and error parameters are sent the quote_plus for | ||||
|     encoding. | ||||
|     """ | ||||
|  | ||||
|     if hasattr(query, "items"): | ||||
|         query = query.items() | ||||
|     else: | ||||
|         # It's a bother at times that strings and string-like objects are | ||||
|         # sequences. | ||||
|         try: | ||||
|             # non-sequence items should not work with len() | ||||
|             # non-empty strings will fail this | ||||
|             if len(query) and not isinstance(query[0], tuple): | ||||
|                 raise TypeError | ||||
|             # Zero-length sequences of all types will get here and succeed, | ||||
|             # but that's a minor nit.  Since the original implementation | ||||
|             # allowed empty dicts that type of behavior probably should be | ||||
|             # preserved for consistency | ||||
|         except TypeError: | ||||
|             ty, va, tb = sys.exc_info() | ||||
|             raise_with_traceback(TypeError("not a valid non-string sequence " | ||||
|                                            "or mapping object"), tb) | ||||
|  | ||||
|     l = [] | ||||
|     if not doseq: | ||||
|         for k, v in query: | ||||
|             if isinstance(k, bytes): | ||||
|                 k = quote_plus(k, safe) | ||||
|             else: | ||||
|                 k = quote_plus(str(k), safe, encoding, errors) | ||||
|  | ||||
|             if isinstance(v, bytes): | ||||
|                 v = quote_plus(v, safe) | ||||
|             else: | ||||
|                 v = quote_plus(str(v), safe, encoding, errors) | ||||
|             l.append(k + '=' + v) | ||||
|     else: | ||||
|         for k, v in query: | ||||
|             if isinstance(k, bytes): | ||||
|                 k = quote_plus(k, safe) | ||||
|             else: | ||||
|                 k = quote_plus(str(k), safe, encoding, errors) | ||||
|  | ||||
|             if isinstance(v, bytes): | ||||
|                 v = quote_plus(v, safe) | ||||
|                 l.append(k + '=' + v) | ||||
|             elif isinstance(v, str): | ||||
|                 v = quote_plus(v, safe, encoding, errors) | ||||
|                 l.append(k + '=' + v) | ||||
|             else: | ||||
|                 try: | ||||
|                     # Is this a sufficient test for sequence-ness? | ||||
|                     x = len(v) | ||||
|                 except TypeError: | ||||
|                     # not a sequence | ||||
|                     v = quote_plus(str(v), safe, encoding, errors) | ||||
|                     l.append(k + '=' + v) | ||||
|                 else: | ||||
|                     # loop over the sequence | ||||
|                     for elt in v: | ||||
|                         if isinstance(elt, bytes): | ||||
|                             elt = quote_plus(elt, safe) | ||||
|                         else: | ||||
|                             elt = quote_plus(str(elt), safe, encoding, errors) | ||||
|                         l.append(k + '=' + elt) | ||||
|     return str('&').join(l) | ||||
|  | ||||
| # Utilities to parse URLs (most of these return None for missing parts): | ||||
| # unwrap('<URL:type://host/path>') --> 'type://host/path' | ||||
| # splittype('type:opaquestring') --> 'type', 'opaquestring' | ||||
| # splithost('//host[:port]/path') --> 'host[:port]', '/path' | ||||
| # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' | ||||
| # splitpasswd('user:passwd') -> 'user', 'passwd' | ||||
| # splitport('host:port') --> 'host', 'port' | ||||
| # splitquery('/path?query') --> '/path', 'query' | ||||
| # splittag('/path#tag') --> '/path', 'tag' | ||||
| # splitattr('/path;attr1=value1;attr2=value2;...') -> | ||||
| #   '/path', ['attr1=value1', 'attr2=value2', ...] | ||||
| # splitvalue('attr=value') --> 'attr', 'value' | ||||
| # urllib.parse.unquote('abc%20def') -> 'abc def' | ||||
| # quote('abc def') -> 'abc%20def') | ||||
|  | ||||
| def to_bytes(url): | ||||
|     """to_bytes(u"URL") --> 'URL'.""" | ||||
|     # Most URL schemes require ASCII. If that changes, the conversion | ||||
|     # can be relaxed. | ||||
|     # XXX get rid of to_bytes() | ||||
|     if isinstance(url, str): | ||||
|         try: | ||||
|             url = url.encode("ASCII").decode() | ||||
|         except UnicodeError: | ||||
|             raise UnicodeError("URL " + repr(url) + | ||||
|                                " contains non-ASCII characters") | ||||
|     return url | ||||
|  | ||||
| def unwrap(url): | ||||
|     """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" | ||||
|     url = str(url).strip() | ||||
|     if url[:1] == '<' and url[-1:] == '>': | ||||
|         url = url[1:-1].strip() | ||||
|     if url[:4] == 'URL:': url = url[4:].strip() | ||||
|     return url | ||||
|  | ||||
| _typeprog = None | ||||
| def splittype(url): | ||||
|     """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" | ||||
|     global _typeprog | ||||
|     if _typeprog is None: | ||||
|         import re | ||||
|         _typeprog = re.compile('^([^/:]+):') | ||||
|  | ||||
|     match = _typeprog.match(url) | ||||
|     if match: | ||||
|         scheme = match.group(1) | ||||
|         return scheme.lower(), url[len(scheme) + 1:] | ||||
|     return None, url | ||||
|  | ||||
| _hostprog = None | ||||
| def splithost(url): | ||||
|     """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" | ||||
|     global _hostprog | ||||
|     if _hostprog is None: | ||||
|         import re | ||||
|         _hostprog = re.compile('^//([^/?]*)(.*)$') | ||||
|  | ||||
|     match = _hostprog.match(url) | ||||
|     if match: | ||||
|         host_port = match.group(1) | ||||
|         path = match.group(2) | ||||
|         if path and not path.startswith('/'): | ||||
|             path = '/' + path | ||||
|         return host_port, path | ||||
|     return None, url | ||||
|  | ||||
| _userprog = None | ||||
| def splituser(host): | ||||
|     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" | ||||
|     global _userprog | ||||
|     if _userprog is None: | ||||
|         import re | ||||
|         _userprog = re.compile('^(.*)@(.*)$') | ||||
|  | ||||
|     match = _userprog.match(host) | ||||
|     if match: return match.group(1, 2) | ||||
|     return None, host | ||||
|  | ||||
| _passwdprog = None | ||||
| def splitpasswd(user): | ||||
|     """splitpasswd('user:passwd') -> 'user', 'passwd'.""" | ||||
|     global _passwdprog | ||||
|     if _passwdprog is None: | ||||
|         import re | ||||
|         _passwdprog = re.compile('^([^:]*):(.*)$',re.S) | ||||
|  | ||||
|     match = _passwdprog.match(user) | ||||
|     if match: return match.group(1, 2) | ||||
|     return user, None | ||||
|  | ||||
| # splittag('/path#tag') --> '/path', 'tag' | ||||
| _portprog = None | ||||
| def splitport(host): | ||||
|     """splitport('host:port') --> 'host', 'port'.""" | ||||
|     global _portprog | ||||
|     if _portprog is None: | ||||
|         import re | ||||
|         _portprog = re.compile('^(.*):([0-9]+)$') | ||||
|  | ||||
|     match = _portprog.match(host) | ||||
|     if match: return match.group(1, 2) | ||||
|     return host, None | ||||
|  | ||||
| _nportprog = None | ||||
| def splitnport(host, defport=-1): | ||||
|     """Split host and port, returning numeric port. | ||||
|     Return given default port if no ':' found; defaults to -1. | ||||
|     Return numerical port if a valid number are found after ':'. | ||||
|     Return None if ':' but not a valid number.""" | ||||
|     global _nportprog | ||||
|     if _nportprog is None: | ||||
|         import re | ||||
|         _nportprog = re.compile('^(.*):(.*)$') | ||||
|  | ||||
|     match = _nportprog.match(host) | ||||
|     if match: | ||||
|         host, port = match.group(1, 2) | ||||
|         try: | ||||
|             if not port: raise ValueError("no digits") | ||||
|             nport = int(port) | ||||
|         except ValueError: | ||||
|             nport = None | ||||
|         return host, nport | ||||
|     return host, defport | ||||
|  | ||||
| _queryprog = None | ||||
| def splitquery(url): | ||||
|     """splitquery('/path?query') --> '/path', 'query'.""" | ||||
|     global _queryprog | ||||
|     if _queryprog is None: | ||||
|         import re | ||||
|         _queryprog = re.compile('^(.*)\?([^?]*)$') | ||||
|  | ||||
|     match = _queryprog.match(url) | ||||
|     if match: return match.group(1, 2) | ||||
|     return url, None | ||||
|  | ||||
| _tagprog = None | ||||
| def splittag(url): | ||||
|     """splittag('/path#tag') --> '/path', 'tag'.""" | ||||
|     global _tagprog | ||||
|     if _tagprog is None: | ||||
|         import re | ||||
|         _tagprog = re.compile('^(.*)#([^#]*)$') | ||||
|  | ||||
|     match = _tagprog.match(url) | ||||
|     if match: return match.group(1, 2) | ||||
|     return url, None | ||||
|  | ||||
| def splitattr(url): | ||||
|     """splitattr('/path;attr1=value1;attr2=value2;...') -> | ||||
|         '/path', ['attr1=value1', 'attr2=value2', ...].""" | ||||
|     words = url.split(';') | ||||
|     return words[0], words[1:] | ||||
|  | ||||
| _valueprog = None | ||||
| def splitvalue(attr): | ||||
|     """splitvalue('attr=value') --> 'attr', 'value'.""" | ||||
|     global _valueprog | ||||
|     if _valueprog is None: | ||||
|         import re | ||||
|         _valueprog = re.compile('^([^=]*)=(.*)$') | ||||
|  | ||||
|     match = _valueprog.match(attr) | ||||
|     if match: return match.group(1, 2) | ||||
|     return attr, None | ||||
							
								
								
									
										2647
									
								
								venv/lib/python3.12/site-packages/future/backports/urllib/request.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2647
									
								
								venv/lib/python3.12/site-packages/future/backports/urllib/request.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,103 @@ | ||||
| """Response classes used by urllib. | ||||
|  | ||||
| The base class, addbase, defines a minimal file-like interface, | ||||
| including read() and readline().  The typical response object is an | ||||
| addinfourl instance, which defines an info() method that returns | ||||
| headers and a geturl() method that returns the url. | ||||
| """ | ||||
| from __future__ import absolute_import, division, unicode_literals | ||||
| from future.builtins import object | ||||
|  | ||||
| class addbase(object): | ||||
|     """Base class for addinfo and addclosehook.""" | ||||
|  | ||||
|     # XXX Add a method to expose the timeout on the underlying socket? | ||||
|  | ||||
|     def __init__(self, fp): | ||||
|         # TODO(jhylton): Is there a better way to delegate using io? | ||||
|         self.fp = fp | ||||
|         self.read = self.fp.read | ||||
|         self.readline = self.fp.readline | ||||
|         # TODO(jhylton): Make sure an object with readlines() is also iterable | ||||
|         if hasattr(self.fp, "readlines"): | ||||
|             self.readlines = self.fp.readlines | ||||
|         if hasattr(self.fp, "fileno"): | ||||
|             self.fileno = self.fp.fileno | ||||
|         else: | ||||
|             self.fileno = lambda: None | ||||
|  | ||||
|     def __iter__(self): | ||||
|         # Assigning `__iter__` to the instance doesn't work as intended | ||||
|         # because the iter builtin does something like `cls.__iter__(obj)` | ||||
|         # and thus fails to find the _bound_ method `obj.__iter__`. | ||||
|         # Returning just `self.fp` works for built-in file objects but | ||||
|         # might not work for general file-like objects. | ||||
|         return iter(self.fp) | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return '<%s at %r whose fp = %r>' % (self.__class__.__name__, | ||||
|                                              id(self), self.fp) | ||||
|  | ||||
|     def close(self): | ||||
|         if self.fp: | ||||
|             self.fp.close() | ||||
|         self.fp = None | ||||
|         self.read = None | ||||
|         self.readline = None | ||||
|         self.readlines = None | ||||
|         self.fileno = None | ||||
|         self.__iter__ = None | ||||
|         self.__next__ = None | ||||
|  | ||||
|     def __enter__(self): | ||||
|         if self.fp is None: | ||||
|             raise ValueError("I/O operation on closed file") | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, type, value, traceback): | ||||
|         self.close() | ||||
|  | ||||
| class addclosehook(addbase): | ||||
|     """Class to add a close hook to an open file.""" | ||||
|  | ||||
|     def __init__(self, fp, closehook, *hookargs): | ||||
|         addbase.__init__(self, fp) | ||||
|         self.closehook = closehook | ||||
|         self.hookargs = hookargs | ||||
|  | ||||
|     def close(self): | ||||
|         if self.closehook: | ||||
|             self.closehook(*self.hookargs) | ||||
|             self.closehook = None | ||||
|             self.hookargs = None | ||||
|         addbase.close(self) | ||||
|  | ||||
| class addinfo(addbase): | ||||
|     """class to add an info() method to an open file.""" | ||||
|  | ||||
|     def __init__(self, fp, headers): | ||||
|         addbase.__init__(self, fp) | ||||
|         self.headers = headers | ||||
|  | ||||
|     def info(self): | ||||
|         return self.headers | ||||
|  | ||||
| class addinfourl(addbase): | ||||
|     """class to add info() and geturl() methods to an open file.""" | ||||
|  | ||||
|     def __init__(self, fp, headers, url, code=None): | ||||
|         addbase.__init__(self, fp) | ||||
|         self.headers = headers | ||||
|         self.url = url | ||||
|         self.code = code | ||||
|  | ||||
|     def info(self): | ||||
|         return self.headers | ||||
|  | ||||
|     def getcode(self): | ||||
|         return self.code | ||||
|  | ||||
|     def geturl(self): | ||||
|         return self.url | ||||
|  | ||||
| del absolute_import, division, unicode_literals, object | ||||
| @ -0,0 +1,211 @@ | ||||
| from __future__ import absolute_import, division, unicode_literals | ||||
| from future.builtins import str | ||||
| """ robotparser.py | ||||
|  | ||||
|     Copyright (C) 2000  Bastian Kleineidam | ||||
|  | ||||
|     You can choose between two licenses when using this package: | ||||
|     1) GNU GPLv2 | ||||
|     2) PSF license for Python 2.2 | ||||
|  | ||||
|     The robots.txt Exclusion Protocol is implemented as specified in | ||||
|     http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html | ||||
| """ | ||||
|  | ||||
| # Was: import urllib.parse, urllib.request | ||||
| from future.backports import urllib | ||||
| from future.backports.urllib import parse as _parse, request as _request | ||||
| urllib.parse = _parse | ||||
| urllib.request = _request | ||||
|  | ||||
|  | ||||
| __all__ = ["RobotFileParser"] | ||||
|  | ||||
| class RobotFileParser(object): | ||||
|     """ This class provides a set of methods to read, parse and answer | ||||
|     questions about a single robots.txt file. | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, url=''): | ||||
|         self.entries = [] | ||||
|         self.default_entry = None | ||||
|         self.disallow_all = False | ||||
|         self.allow_all = False | ||||
|         self.set_url(url) | ||||
|         self.last_checked = 0 | ||||
|  | ||||
|     def mtime(self): | ||||
|         """Returns the time the robots.txt file was last fetched. | ||||
|  | ||||
|         This is useful for long-running web spiders that need to | ||||
|         check for new robots.txt files periodically. | ||||
|  | ||||
|         """ | ||||
|         return self.last_checked | ||||
|  | ||||
|     def modified(self): | ||||
|         """Sets the time the robots.txt file was last fetched to the | ||||
|         current time. | ||||
|  | ||||
|         """ | ||||
|         import time | ||||
|         self.last_checked = time.time() | ||||
|  | ||||
|     def set_url(self, url): | ||||
|         """Sets the URL referring to a robots.txt file.""" | ||||
|         self.url = url | ||||
|         self.host, self.path = urllib.parse.urlparse(url)[1:3] | ||||
|  | ||||
|     def read(self): | ||||
|         """Reads the robots.txt URL and feeds it to the parser.""" | ||||
|         try: | ||||
|             f = urllib.request.urlopen(self.url) | ||||
|         except urllib.error.HTTPError as err: | ||||
|             if err.code in (401, 403): | ||||
|                 self.disallow_all = True | ||||
|             elif err.code >= 400: | ||||
|                 self.allow_all = True | ||||
|         else: | ||||
|             raw = f.read() | ||||
|             self.parse(raw.decode("utf-8").splitlines()) | ||||
|  | ||||
|     def _add_entry(self, entry): | ||||
|         if "*" in entry.useragents: | ||||
|             # the default entry is considered last | ||||
|             if self.default_entry is None: | ||||
|                 # the first default entry wins | ||||
|                 self.default_entry = entry | ||||
|         else: | ||||
|             self.entries.append(entry) | ||||
|  | ||||
|     def parse(self, lines): | ||||
|         """Parse the input lines from a robots.txt file. | ||||
|  | ||||
|         We allow that a user-agent: line is not preceded by | ||||
|         one or more blank lines. | ||||
|         """ | ||||
|         # states: | ||||
|         #   0: start state | ||||
|         #   1: saw user-agent line | ||||
|         #   2: saw an allow or disallow line | ||||
|         state = 0 | ||||
|         entry = Entry() | ||||
|  | ||||
|         for line in lines: | ||||
|             if not line: | ||||
|                 if state == 1: | ||||
|                     entry = Entry() | ||||
|                     state = 0 | ||||
|                 elif state == 2: | ||||
|                     self._add_entry(entry) | ||||
|                     entry = Entry() | ||||
|                     state = 0 | ||||
|             # remove optional comment and strip line | ||||
|             i = line.find('#') | ||||
|             if i >= 0: | ||||
|                 line = line[:i] | ||||
|             line = line.strip() | ||||
|             if not line: | ||||
|                 continue | ||||
|             line = line.split(':', 1) | ||||
|             if len(line) == 2: | ||||
|                 line[0] = line[0].strip().lower() | ||||
|                 line[1] = urllib.parse.unquote(line[1].strip()) | ||||
|                 if line[0] == "user-agent": | ||||
|                     if state == 2: | ||||
|                         self._add_entry(entry) | ||||
|                         entry = Entry() | ||||
|                     entry.useragents.append(line[1]) | ||||
|                     state = 1 | ||||
|                 elif line[0] == "disallow": | ||||
|                     if state != 0: | ||||
|                         entry.rulelines.append(RuleLine(line[1], False)) | ||||
|                         state = 2 | ||||
|                 elif line[0] == "allow": | ||||
|                     if state != 0: | ||||
|                         entry.rulelines.append(RuleLine(line[1], True)) | ||||
|                         state = 2 | ||||
|         if state == 2: | ||||
|             self._add_entry(entry) | ||||
|  | ||||
|  | ||||
|     def can_fetch(self, useragent, url): | ||||
|         """using the parsed robots.txt decide if useragent can fetch url""" | ||||
|         if self.disallow_all: | ||||
|             return False | ||||
|         if self.allow_all: | ||||
|             return True | ||||
|         # search for given user agent matches | ||||
|         # the first match counts | ||||
|         parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) | ||||
|         url = urllib.parse.urlunparse(('','',parsed_url.path, | ||||
|             parsed_url.params,parsed_url.query, parsed_url.fragment)) | ||||
|         url = urllib.parse.quote(url) | ||||
|         if not url: | ||||
|             url = "/" | ||||
|         for entry in self.entries: | ||||
|             if entry.applies_to(useragent): | ||||
|                 return entry.allowance(url) | ||||
|         # try the default entry last | ||||
|         if self.default_entry: | ||||
|             return self.default_entry.allowance(url) | ||||
|         # agent not found ==> access granted | ||||
|         return True | ||||
|  | ||||
|     def __str__(self): | ||||
|         return ''.join([str(entry) + "\n" for entry in self.entries]) | ||||
|  | ||||
|  | ||||
| class RuleLine(object): | ||||
|     """A rule line is a single "Allow:" (allowance==True) or "Disallow:" | ||||
|        (allowance==False) followed by a path.""" | ||||
|     def __init__(self, path, allowance): | ||||
|         if path == '' and not allowance: | ||||
|             # an empty value means allow all | ||||
|             allowance = True | ||||
|         self.path = urllib.parse.quote(path) | ||||
|         self.allowance = allowance | ||||
|  | ||||
|     def applies_to(self, filename): | ||||
|         return self.path == "*" or filename.startswith(self.path) | ||||
|  | ||||
|     def __str__(self): | ||||
|         return (self.allowance and "Allow" or "Disallow") + ": " + self.path | ||||
|  | ||||
|  | ||||
| class Entry(object): | ||||
|     """An entry has one or more user-agents and zero or more rulelines""" | ||||
|     def __init__(self): | ||||
|         self.useragents = [] | ||||
|         self.rulelines = [] | ||||
|  | ||||
|     def __str__(self): | ||||
|         ret = [] | ||||
|         for agent in self.useragents: | ||||
|             ret.extend(["User-agent: ", agent, "\n"]) | ||||
|         for line in self.rulelines: | ||||
|             ret.extend([str(line), "\n"]) | ||||
|         return ''.join(ret) | ||||
|  | ||||
|     def applies_to(self, useragent): | ||||
|         """check if this entry applies to the specified agent""" | ||||
|         # split the name token and make it lower case | ||||
|         useragent = useragent.split("/")[0].lower() | ||||
|         for agent in self.useragents: | ||||
|             if agent == '*': | ||||
|                 # we have the catch-all agent | ||||
|                 return True | ||||
|             agent = agent.lower() | ||||
|             if agent in useragent: | ||||
|                 return True | ||||
|         return False | ||||
|  | ||||
|     def allowance(self, filename): | ||||
|         """Preconditions: | ||||
|         - our agent applies to this entry | ||||
|         - filename is URL decoded""" | ||||
|         for line in self.rulelines: | ||||
|             if line.applies_to(filename): | ||||
|                 return line.allowance | ||||
|         return True | ||||
| @ -0,0 +1 @@ | ||||
| # This directory is a Python package. | ||||
							
								
								
									
										1497
									
								
								venv/lib/python3.12/site-packages/future/backports/xmlrpc/client.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1497
									
								
								venv/lib/python3.12/site-packages/future/backports/xmlrpc/client.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,999 @@ | ||||
| r""" | ||||
| Ported using Python-Future from the Python 3.3 standard library. | ||||
|  | ||||
| XML-RPC Servers. | ||||
|  | ||||
| This module can be used to create simple XML-RPC servers | ||||
| by creating a server and either installing functions, a | ||||
| class instance, or by extending the SimpleXMLRPCServer | ||||
| class. | ||||
|  | ||||
| It can also be used to handle XML-RPC requests in a CGI | ||||
| environment using CGIXMLRPCRequestHandler. | ||||
|  | ||||
| The Doc* classes can be used to create XML-RPC servers that | ||||
| serve pydoc-style documentation in response to HTTP | ||||
| GET requests. This documentation is dynamically generated | ||||
| based on the functions and methods registered with the | ||||
| server. | ||||
|  | ||||
| A list of possible usage patterns follows: | ||||
|  | ||||
| 1. Install functions: | ||||
|  | ||||
| server = SimpleXMLRPCServer(("localhost", 8000)) | ||||
| server.register_function(pow) | ||||
| server.register_function(lambda x,y: x+y, 'add') | ||||
| server.serve_forever() | ||||
|  | ||||
| 2. Install an instance: | ||||
|  | ||||
| class MyFuncs: | ||||
|     def __init__(self): | ||||
|         # make all of the sys functions available through sys.func_name | ||||
|         import sys | ||||
|         self.sys = sys | ||||
|     def _listMethods(self): | ||||
|         # implement this method so that system.listMethods | ||||
|         # knows to advertise the sys methods | ||||
|         return list_public_methods(self) + \ | ||||
|                 ['sys.' + method for method in list_public_methods(self.sys)] | ||||
|     def pow(self, x, y): return pow(x, y) | ||||
|     def add(self, x, y) : return x + y | ||||
|  | ||||
| server = SimpleXMLRPCServer(("localhost", 8000)) | ||||
| server.register_introspection_functions() | ||||
| server.register_instance(MyFuncs()) | ||||
| server.serve_forever() | ||||
|  | ||||
| 3. Install an instance with custom dispatch method: | ||||
|  | ||||
| class Math: | ||||
|     def _listMethods(self): | ||||
|         # this method must be present for system.listMethods | ||||
|         # to work | ||||
|         return ['add', 'pow'] | ||||
|     def _methodHelp(self, method): | ||||
|         # this method must be present for system.methodHelp | ||||
|         # to work | ||||
|         if method == 'add': | ||||
|             return "add(2,3) => 5" | ||||
|         elif method == 'pow': | ||||
|             return "pow(x, y[, z]) => number" | ||||
|         else: | ||||
|             # By convention, return empty | ||||
|             # string if no help is available | ||||
|             return "" | ||||
|     def _dispatch(self, method, params): | ||||
|         if method == 'pow': | ||||
|             return pow(*params) | ||||
|         elif method == 'add': | ||||
|             return params[0] + params[1] | ||||
|         else: | ||||
|             raise ValueError('bad method') | ||||
|  | ||||
| server = SimpleXMLRPCServer(("localhost", 8000)) | ||||
| server.register_introspection_functions() | ||||
| server.register_instance(Math()) | ||||
| server.serve_forever() | ||||
|  | ||||
| 4. Subclass SimpleXMLRPCServer: | ||||
|  | ||||
| class MathServer(SimpleXMLRPCServer): | ||||
|     def _dispatch(self, method, params): | ||||
|         try: | ||||
|             # We are forcing the 'export_' prefix on methods that are | ||||
|             # callable through XML-RPC to prevent potential security | ||||
|             # problems | ||||
|             func = getattr(self, 'export_' + method) | ||||
|         except AttributeError: | ||||
|             raise Exception('method "%s" is not supported' % method) | ||||
|         else: | ||||
|             return func(*params) | ||||
|  | ||||
|     def export_add(self, x, y): | ||||
|         return x + y | ||||
|  | ||||
| server = MathServer(("localhost", 8000)) | ||||
| server.serve_forever() | ||||
|  | ||||
| 5. CGI script: | ||||
|  | ||||
| server = CGIXMLRPCRequestHandler() | ||||
| server.register_function(pow) | ||||
| server.handle_request() | ||||
| """ | ||||
|  | ||||
| from __future__ import absolute_import, division, print_function, unicode_literals | ||||
| from future.builtins import int, str | ||||
|  | ||||
| # Written by Brian Quinlan (brian@sweetapp.com). | ||||
| # Based on code written by Fredrik Lundh. | ||||
|  | ||||
| from future.backports.xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode | ||||
| from future.backports.http.server import BaseHTTPRequestHandler | ||||
| import future.backports.http.server as http_server | ||||
| from future.backports import socketserver | ||||
| import sys | ||||
| import os | ||||
| import re | ||||
| import pydoc | ||||
| import inspect | ||||
| import traceback | ||||
| try: | ||||
|     import fcntl | ||||
| except ImportError: | ||||
|     fcntl = None | ||||
|  | ||||
| def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): | ||||
|     """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d | ||||
|  | ||||
|     Resolves a dotted attribute name to an object.  Raises | ||||
|     an AttributeError if any attribute in the chain starts with a '_'. | ||||
|  | ||||
|     If the optional allow_dotted_names argument is false, dots are not | ||||
|     supported and this function operates similar to getattr(obj, attr). | ||||
|     """ | ||||
|  | ||||
|     if allow_dotted_names: | ||||
|         attrs = attr.split('.') | ||||
|     else: | ||||
|         attrs = [attr] | ||||
|  | ||||
|     for i in attrs: | ||||
|         if i.startswith('_'): | ||||
|             raise AttributeError( | ||||
|                 'attempt to access private attribute "%s"' % i | ||||
|                 ) | ||||
|         else: | ||||
|             obj = getattr(obj,i) | ||||
|     return obj | ||||
|  | ||||
| def list_public_methods(obj): | ||||
|     """Returns a list of attribute strings, found in the specified | ||||
|     object, which represent callable attributes""" | ||||
|  | ||||
|     return [member for member in dir(obj) | ||||
|                 if not member.startswith('_') and | ||||
|                     callable(getattr(obj, member))] | ||||
|  | ||||
| class SimpleXMLRPCDispatcher(object): | ||||
|     """Mix-in class that dispatches XML-RPC requests. | ||||
|  | ||||
|     This class is used to register XML-RPC method handlers | ||||
|     and then to dispatch them. This class doesn't need to be | ||||
|     instanced directly when used by SimpleXMLRPCServer but it | ||||
|     can be instanced when used by the MultiPathXMLRPCServer | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, allow_none=False, encoding=None, | ||||
|                  use_builtin_types=False): | ||||
|         self.funcs = {} | ||||
|         self.instance = None | ||||
|         self.allow_none = allow_none | ||||
|         self.encoding = encoding or 'utf-8' | ||||
|         self.use_builtin_types = use_builtin_types | ||||
|  | ||||
|     def register_instance(self, instance, allow_dotted_names=False): | ||||
|         """Registers an instance to respond to XML-RPC requests. | ||||
|  | ||||
|         Only one instance can be installed at a time. | ||||
|  | ||||
|         If the registered instance has a _dispatch method then that | ||||
|         method will be called with the name of the XML-RPC method and | ||||
|         its parameters as a tuple | ||||
|         e.g. instance._dispatch('add',(2,3)) | ||||
|  | ||||
|         If the registered instance does not have a _dispatch method | ||||
|         then the instance will be searched to find a matching method | ||||
|         and, if found, will be called. Methods beginning with an '_' | ||||
|         are considered private and will not be called by | ||||
|         SimpleXMLRPCServer. | ||||
|  | ||||
|         If a registered function matches a XML-RPC request, then it | ||||
|         will be called instead of the registered instance. | ||||
|  | ||||
|         If the optional allow_dotted_names argument is true and the | ||||
|         instance does not have a _dispatch method, method names | ||||
|         containing dots are supported and resolved, as long as none of | ||||
|         the name segments start with an '_'. | ||||
|  | ||||
|             *** SECURITY WARNING: *** | ||||
|  | ||||
|             Enabling the allow_dotted_names options allows intruders | ||||
|             to access your module's global variables and may allow | ||||
|             intruders to execute arbitrary code on your machine.  Only | ||||
|             use this option on a secure, closed network. | ||||
|  | ||||
|         """ | ||||
|  | ||||
|         self.instance = instance | ||||
|         self.allow_dotted_names = allow_dotted_names | ||||
|  | ||||
|     def register_function(self, function, name=None): | ||||
|         """Registers a function to respond to XML-RPC requests. | ||||
|  | ||||
|         The optional name argument can be used to set a Unicode name | ||||
|         for the function. | ||||
|         """ | ||||
|  | ||||
|         if name is None: | ||||
|             name = function.__name__ | ||||
|         self.funcs[name] = function | ||||
|  | ||||
|     def register_introspection_functions(self): | ||||
|         """Registers the XML-RPC introspection methods in the system | ||||
|         namespace. | ||||
|  | ||||
|         see http://xmlrpc.usefulinc.com/doc/reserved.html | ||||
|         """ | ||||
|  | ||||
|         self.funcs.update({'system.listMethods' : self.system_listMethods, | ||||
|                       'system.methodSignature' : self.system_methodSignature, | ||||
|                       'system.methodHelp' : self.system_methodHelp}) | ||||
|  | ||||
|     def register_multicall_functions(self): | ||||
|         """Registers the XML-RPC multicall method in the system | ||||
|         namespace. | ||||
|  | ||||
|         see http://www.xmlrpc.com/discuss/msgReader$1208""" | ||||
|  | ||||
|         self.funcs.update({'system.multicall' : self.system_multicall}) | ||||
|  | ||||
|     def _marshaled_dispatch(self, data, dispatch_method = None, path = None): | ||||
|         """Dispatches an XML-RPC method from marshalled (XML) data. | ||||
|  | ||||
|         XML-RPC methods are dispatched from the marshalled (XML) data | ||||
|         using the _dispatch method and the result is returned as | ||||
|         marshalled data. For backwards compatibility, a dispatch | ||||
|         function can be provided as an argument (see comment in | ||||
|         SimpleXMLRPCRequestHandler.do_POST) but overriding the | ||||
|         existing method through subclassing is the preferred means | ||||
|         of changing method dispatch behavior. | ||||
|         """ | ||||
|  | ||||
|         try: | ||||
|             params, method = loads(data, use_builtin_types=self.use_builtin_types) | ||||
|  | ||||
|             # generate response | ||||
|             if dispatch_method is not None: | ||||
|                 response = dispatch_method(method, params) | ||||
|             else: | ||||
|                 response = self._dispatch(method, params) | ||||
|             # wrap response in a singleton tuple | ||||
|             response = (response,) | ||||
|             response = dumps(response, methodresponse=1, | ||||
|                              allow_none=self.allow_none, encoding=self.encoding) | ||||
|         except Fault as fault: | ||||
|             response = dumps(fault, allow_none=self.allow_none, | ||||
|                              encoding=self.encoding) | ||||
|         except: | ||||
|             # report exception back to server | ||||
|             exc_type, exc_value, exc_tb = sys.exc_info() | ||||
|             response = dumps( | ||||
|                 Fault(1, "%s:%s" % (exc_type, exc_value)), | ||||
|                 encoding=self.encoding, allow_none=self.allow_none, | ||||
|                 ) | ||||
|  | ||||
|         return response.encode(self.encoding) | ||||
|  | ||||
|     def system_listMethods(self): | ||||
|         """system.listMethods() => ['add', 'subtract', 'multiple'] | ||||
|  | ||||
|         Returns a list of the methods supported by the server.""" | ||||
|  | ||||
|         methods = set(self.funcs.keys()) | ||||
|         if self.instance is not None: | ||||
|             # Instance can implement _listMethod to return a list of | ||||
|             # methods | ||||
|             if hasattr(self.instance, '_listMethods'): | ||||
|                 methods |= set(self.instance._listMethods()) | ||||
|             # if the instance has a _dispatch method then we | ||||
|             # don't have enough information to provide a list | ||||
|             # of methods | ||||
|             elif not hasattr(self.instance, '_dispatch'): | ||||
|                 methods |= set(list_public_methods(self.instance)) | ||||
|         return sorted(methods) | ||||
|  | ||||
|     def system_methodSignature(self, method_name): | ||||
|         """system.methodSignature('add') => [double, int, int] | ||||
|  | ||||
|         Returns a list describing the signature of the method. In the | ||||
|         above example, the add method takes two integers as arguments | ||||
|         and returns a double result. | ||||
|  | ||||
|         This server does NOT support system.methodSignature.""" | ||||
|  | ||||
|         # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html | ||||
|  | ||||
|         return 'signatures not supported' | ||||
|  | ||||
|     def system_methodHelp(self, method_name): | ||||
|         """system.methodHelp('add') => "Adds two integers together" | ||||
|  | ||||
|         Returns a string containing documentation for the specified method.""" | ||||
|  | ||||
|         method = None | ||||
|         if method_name in self.funcs: | ||||
|             method = self.funcs[method_name] | ||||
|         elif self.instance is not None: | ||||
|             # Instance can implement _methodHelp to return help for a method | ||||
|             if hasattr(self.instance, '_methodHelp'): | ||||
|                 return self.instance._methodHelp(method_name) | ||||
|             # if the instance has a _dispatch method then we | ||||
|             # don't have enough information to provide help | ||||
|             elif not hasattr(self.instance, '_dispatch'): | ||||
|                 try: | ||||
|                     method = resolve_dotted_attribute( | ||||
|                                 self.instance, | ||||
|                                 method_name, | ||||
|                                 self.allow_dotted_names | ||||
|                                 ) | ||||
|                 except AttributeError: | ||||
|                     pass | ||||
|  | ||||
|         # Note that we aren't checking that the method actually | ||||
|         # be a callable object of some kind | ||||
|         if method is None: | ||||
|             return "" | ||||
|         else: | ||||
|             return pydoc.getdoc(method) | ||||
|  | ||||
|     def system_multicall(self, call_list): | ||||
|         """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \ | ||||
| [[4], ...] | ||||
|  | ||||
|         Allows the caller to package multiple XML-RPC calls into a single | ||||
|         request. | ||||
|  | ||||
|         See http://www.xmlrpc.com/discuss/msgReader$1208 | ||||
|         """ | ||||
|  | ||||
|         results = [] | ||||
|         for call in call_list: | ||||
|             method_name = call['methodName'] | ||||
|             params = call['params'] | ||||
|  | ||||
|             try: | ||||
|                 # XXX A marshalling error in any response will fail the entire | ||||
|                 # multicall. If someone cares they should fix this. | ||||
|                 results.append([self._dispatch(method_name, params)]) | ||||
|             except Fault as fault: | ||||
|                 results.append( | ||||
|                     {'faultCode' : fault.faultCode, | ||||
|                      'faultString' : fault.faultString} | ||||
|                     ) | ||||
|             except: | ||||
|                 exc_type, exc_value, exc_tb = sys.exc_info() | ||||
|                 results.append( | ||||
|                     {'faultCode' : 1, | ||||
|                      'faultString' : "%s:%s" % (exc_type, exc_value)} | ||||
|                     ) | ||||
|         return results | ||||
|  | ||||
|     def _dispatch(self, method, params): | ||||
|         """Dispatches the XML-RPC method. | ||||
|  | ||||
|         XML-RPC calls are forwarded to a registered function that | ||||
|         matches the called XML-RPC method name. If no such function | ||||
|         exists then the call is forwarded to the registered instance, | ||||
|         if available. | ||||
|  | ||||
|         If the registered instance has a _dispatch method then that | ||||
|         method will be called with the name of the XML-RPC method and | ||||
|         its parameters as a tuple | ||||
|         e.g. instance._dispatch('add',(2,3)) | ||||
|  | ||||
|         If the registered instance does not have a _dispatch method | ||||
|         then the instance will be searched to find a matching method | ||||
|         and, if found, will be called. | ||||
|  | ||||
|         Methods beginning with an '_' are considered private and will | ||||
|         not be called. | ||||
|         """ | ||||
|  | ||||
|         func = None | ||||
|         try: | ||||
|             # check to see if a matching function has been registered | ||||
|             func = self.funcs[method] | ||||
|         except KeyError: | ||||
|             if self.instance is not None: | ||||
|                 # check for a _dispatch method | ||||
|                 if hasattr(self.instance, '_dispatch'): | ||||
|                     return self.instance._dispatch(method, params) | ||||
|                 else: | ||||
|                     # call instance method directly | ||||
|                     try: | ||||
|                         func = resolve_dotted_attribute( | ||||
|                             self.instance, | ||||
|                             method, | ||||
|                             self.allow_dotted_names | ||||
|                             ) | ||||
|                     except AttributeError: | ||||
|                         pass | ||||
|  | ||||
|         if func is not None: | ||||
|             return func(*params) | ||||
|         else: | ||||
|             raise Exception('method "%s" is not supported' % method) | ||||
|  | ||||
| class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler): | ||||
|     """Simple XML-RPC request handler class. | ||||
|  | ||||
|     Handles all HTTP POST requests and attempts to decode them as | ||||
|     XML-RPC requests. | ||||
|     """ | ||||
|  | ||||
|     # Class attribute listing the accessible path components; | ||||
|     # paths not on this list will result in a 404 error. | ||||
|     rpc_paths = ('/', '/RPC2') | ||||
|  | ||||
|     #if not None, encode responses larger than this, if possible | ||||
|     encode_threshold = 1400 #a common MTU | ||||
|  | ||||
|     #Override form StreamRequestHandler: full buffering of output | ||||
|     #and no Nagle. | ||||
|     wbufsize = -1 | ||||
|     disable_nagle_algorithm = True | ||||
|  | ||||
|     # a re to match a gzip Accept-Encoding | ||||
|     aepattern = re.compile(r""" | ||||
|                             \s* ([^\s;]+) \s*            #content-coding | ||||
|                             (;\s* q \s*=\s* ([0-9\.]+))? #q | ||||
|                             """, re.VERBOSE | re.IGNORECASE) | ||||
|  | ||||
|     def accept_encodings(self): | ||||
|         r = {} | ||||
|         ae = self.headers.get("Accept-Encoding", "") | ||||
|         for e in ae.split(","): | ||||
|             match = self.aepattern.match(e) | ||||
|             if match: | ||||
|                 v = match.group(3) | ||||
|                 v = float(v) if v else 1.0 | ||||
|                 r[match.group(1)] = v | ||||
|         return r | ||||
|  | ||||
|     def is_rpc_path_valid(self): | ||||
|         if self.rpc_paths: | ||||
|             return self.path in self.rpc_paths | ||||
|         else: | ||||
|             # If .rpc_paths is empty, just assume all paths are legal | ||||
|             return True | ||||
|  | ||||
|     def do_POST(self): | ||||
|         """Handles the HTTP POST request. | ||||
|  | ||||
|         Attempts to interpret all HTTP POST requests as XML-RPC calls, | ||||
|         which are forwarded to the server's _dispatch method for handling. | ||||
|         """ | ||||
|  | ||||
|         # Check that the path is legal | ||||
|         if not self.is_rpc_path_valid(): | ||||
|             self.report_404() | ||||
|             return | ||||
|  | ||||
|         try: | ||||
|             # Get arguments by reading body of request. | ||||
|             # We read this in chunks to avoid straining | ||||
|             # socket.read(); around the 10 or 15Mb mark, some platforms | ||||
|             # begin to have problems (bug #792570). | ||||
|             max_chunk_size = 10*1024*1024 | ||||
|             size_remaining = int(self.headers["content-length"]) | ||||
|             L = [] | ||||
|             while size_remaining: | ||||
|                 chunk_size = min(size_remaining, max_chunk_size) | ||||
|                 chunk = self.rfile.read(chunk_size) | ||||
|                 if not chunk: | ||||
|                     break | ||||
|                 L.append(chunk) | ||||
|                 size_remaining -= len(L[-1]) | ||||
|             data = b''.join(L) | ||||
|  | ||||
|             data = self.decode_request_content(data) | ||||
|             if data is None: | ||||
|                 return #response has been sent | ||||
|  | ||||
|             # In previous versions of SimpleXMLRPCServer, _dispatch | ||||
|             # could be overridden in this class, instead of in | ||||
|             # SimpleXMLRPCDispatcher. To maintain backwards compatibility, | ||||
|             # check to see if a subclass implements _dispatch and dispatch | ||||
|             # using that method if present. | ||||
|             response = self.server._marshaled_dispatch( | ||||
|                     data, getattr(self, '_dispatch', None), self.path | ||||
|                 ) | ||||
|         except Exception as e: # This should only happen if the module is buggy | ||||
|             # internal error, report as HTTP server error | ||||
|             self.send_response(500) | ||||
|  | ||||
|             # Send information about the exception if requested | ||||
|             if hasattr(self.server, '_send_traceback_header') and \ | ||||
|                     self.server._send_traceback_header: | ||||
|                 self.send_header("X-exception", str(e)) | ||||
|                 trace = traceback.format_exc() | ||||
|                 trace = str(trace.encode('ASCII', 'backslashreplace'), 'ASCII') | ||||
|                 self.send_header("X-traceback", trace) | ||||
|  | ||||
|             self.send_header("Content-length", "0") | ||||
|             self.end_headers() | ||||
|         else: | ||||
|             self.send_response(200) | ||||
|             self.send_header("Content-type", "text/xml") | ||||
|             if self.encode_threshold is not None: | ||||
|                 if len(response) > self.encode_threshold: | ||||
|                     q = self.accept_encodings().get("gzip", 0) | ||||
|                     if q: | ||||
|                         try: | ||||
|                             response = gzip_encode(response) | ||||
|                             self.send_header("Content-Encoding", "gzip") | ||||
|                         except NotImplementedError: | ||||
|                             pass | ||||
|             self.send_header("Content-length", str(len(response))) | ||||
|             self.end_headers() | ||||
|             self.wfile.write(response) | ||||
|  | ||||
|     def decode_request_content(self, data): | ||||
|         #support gzip encoding of request | ||||
|         encoding = self.headers.get("content-encoding", "identity").lower() | ||||
|         if encoding == "identity": | ||||
|             return data | ||||
|         if encoding == "gzip": | ||||
|             try: | ||||
|                 return gzip_decode(data) | ||||
|             except NotImplementedError: | ||||
|                 self.send_response(501, "encoding %r not supported" % encoding) | ||||
|             except ValueError: | ||||
|                 self.send_response(400, "error decoding gzip content") | ||||
|         else: | ||||
|             self.send_response(501, "encoding %r not supported" % encoding) | ||||
|         self.send_header("Content-length", "0") | ||||
|         self.end_headers() | ||||
|  | ||||
|     def report_404 (self): | ||||
|             # Report a 404 error | ||||
|         self.send_response(404) | ||||
|         response = b'No such page' | ||||
|         self.send_header("Content-type", "text/plain") | ||||
|         self.send_header("Content-length", str(len(response))) | ||||
|         self.end_headers() | ||||
|         self.wfile.write(response) | ||||
|  | ||||
|     def log_request(self, code='-', size='-'): | ||||
|         """Selectively log an accepted request.""" | ||||
|  | ||||
|         if self.server.logRequests: | ||||
|             BaseHTTPRequestHandler.log_request(self, code, size) | ||||
|  | ||||
| class SimpleXMLRPCServer(socketserver.TCPServer, | ||||
|                          SimpleXMLRPCDispatcher): | ||||
|     """Simple XML-RPC server. | ||||
|  | ||||
|     Simple XML-RPC server that allows functions and a single instance | ||||
|     to be installed to handle requests. The default implementation | ||||
|     attempts to dispatch XML-RPC calls to the functions or instance | ||||
|     installed in the server. Override the _dispatch method inherited | ||||
|     from SimpleXMLRPCDispatcher to change this behavior. | ||||
|     """ | ||||
|  | ||||
|     allow_reuse_address = True | ||||
|  | ||||
|     # Warning: this is for debugging purposes only! Never set this to True in | ||||
|     # production code, as will be sending out sensitive information (exception | ||||
|     # and stack trace details) when exceptions are raised inside | ||||
|     # SimpleXMLRPCRequestHandler.do_POST | ||||
|     _send_traceback_header = False | ||||
|  | ||||
|     def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, | ||||
|                  logRequests=True, allow_none=False, encoding=None, | ||||
|                  bind_and_activate=True, use_builtin_types=False): | ||||
|         self.logRequests = logRequests | ||||
|  | ||||
|         SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) | ||||
|         socketserver.TCPServer.__init__(self, addr, requestHandler, bind_and_activate) | ||||
|  | ||||
|         # [Bug #1222790] If possible, set close-on-exec flag; if a | ||||
|         # method spawns a subprocess, the subprocess shouldn't have | ||||
|         # the listening socket open. | ||||
|         if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'): | ||||
|             flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) | ||||
|             flags |= fcntl.FD_CLOEXEC | ||||
|             fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags) | ||||
|  | ||||
| class MultiPathXMLRPCServer(SimpleXMLRPCServer): | ||||
|     """Multipath XML-RPC Server | ||||
|     This specialization of SimpleXMLRPCServer allows the user to create | ||||
|     multiple Dispatcher instances and assign them to different | ||||
|     HTTP request paths.  This makes it possible to run two or more | ||||
|     'virtual XML-RPC servers' at the same port. | ||||
|     Make sure that the requestHandler accepts the paths in question. | ||||
|     """ | ||||
|     def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, | ||||
|                  logRequests=True, allow_none=False, encoding=None, | ||||
|                  bind_and_activate=True, use_builtin_types=False): | ||||
|  | ||||
|         SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none, | ||||
|                                     encoding, bind_and_activate, use_builtin_types) | ||||
|         self.dispatchers = {} | ||||
|         self.allow_none = allow_none | ||||
|         self.encoding = encoding or 'utf-8' | ||||
|  | ||||
|     def add_dispatcher(self, path, dispatcher): | ||||
|         self.dispatchers[path] = dispatcher | ||||
|         return dispatcher | ||||
|  | ||||
|     def get_dispatcher(self, path): | ||||
|         return self.dispatchers[path] | ||||
|  | ||||
|     def _marshaled_dispatch(self, data, dispatch_method = None, path = None): | ||||
|         try: | ||||
|             response = self.dispatchers[path]._marshaled_dispatch( | ||||
|                data, dispatch_method, path) | ||||
|         except: | ||||
|             # report low level exception back to server | ||||
|             # (each dispatcher should have handled their own | ||||
|             # exceptions) | ||||
|             exc_type, exc_value = sys.exc_info()[:2] | ||||
|             response = dumps( | ||||
|                 Fault(1, "%s:%s" % (exc_type, exc_value)), | ||||
|                 encoding=self.encoding, allow_none=self.allow_none) | ||||
|             response = response.encode(self.encoding) | ||||
|         return response | ||||
|  | ||||
| class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher): | ||||
|     """Simple handler for XML-RPC data passed through CGI.""" | ||||
|  | ||||
|     def __init__(self, allow_none=False, encoding=None, use_builtin_types=False): | ||||
|         SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) | ||||
|  | ||||
|     def handle_xmlrpc(self, request_text): | ||||
|         """Handle a single XML-RPC request""" | ||||
|  | ||||
|         response = self._marshaled_dispatch(request_text) | ||||
|  | ||||
|         print('Content-Type: text/xml') | ||||
|         print('Content-Length: %d' % len(response)) | ||||
|         print() | ||||
|         sys.stdout.flush() | ||||
|         sys.stdout.buffer.write(response) | ||||
|         sys.stdout.buffer.flush() | ||||
|  | ||||
|     def handle_get(self): | ||||
|         """Handle a single HTTP GET request. | ||||
|  | ||||
|         Default implementation indicates an error because | ||||
|         XML-RPC uses the POST method. | ||||
|         """ | ||||
|  | ||||
|         code = 400 | ||||
|         message, explain = BaseHTTPRequestHandler.responses[code] | ||||
|  | ||||
|         response = http_server.DEFAULT_ERROR_MESSAGE % \ | ||||
|             { | ||||
|              'code' : code, | ||||
|              'message' : message, | ||||
|              'explain' : explain | ||||
|             } | ||||
|         response = response.encode('utf-8') | ||||
|         print('Status: %d %s' % (code, message)) | ||||
|         print('Content-Type: %s' % http_server.DEFAULT_ERROR_CONTENT_TYPE) | ||||
|         print('Content-Length: %d' % len(response)) | ||||
|         print() | ||||
|         sys.stdout.flush() | ||||
|         sys.stdout.buffer.write(response) | ||||
|         sys.stdout.buffer.flush() | ||||
|  | ||||
|     def handle_request(self, request_text=None): | ||||
|         """Handle a single XML-RPC request passed through a CGI post method. | ||||
|  | ||||
|         If no XML data is given then it is read from stdin. The resulting | ||||
|         XML-RPC response is printed to stdout along with the correct HTTP | ||||
|         headers. | ||||
|         """ | ||||
|  | ||||
|         if request_text is None and \ | ||||
|             os.environ.get('REQUEST_METHOD', None) == 'GET': | ||||
|             self.handle_get() | ||||
|         else: | ||||
|             # POST data is normally available through stdin | ||||
|             try: | ||||
|                 length = int(os.environ.get('CONTENT_LENGTH', None)) | ||||
|             except (ValueError, TypeError): | ||||
|                 length = -1 | ||||
|             if request_text is None: | ||||
|                 request_text = sys.stdin.read(length) | ||||
|  | ||||
|             self.handle_xmlrpc(request_text) | ||||
|  | ||||
|  | ||||
| # ----------------------------------------------------------------------------- | ||||
| # Self documenting XML-RPC Server. | ||||
|  | ||||
| class ServerHTMLDoc(pydoc.HTMLDoc): | ||||
|     """Class used to generate pydoc HTML document for a server""" | ||||
|  | ||||
|     def markup(self, text, escape=None, funcs={}, classes={}, methods={}): | ||||
|         """Mark up some plain text, given a context of symbols to look for. | ||||
|         Each context dictionary maps object names to anchor names.""" | ||||
|         escape = escape or self.escape | ||||
|         results = [] | ||||
|         here = 0 | ||||
|  | ||||
|         # XXX Note that this regular expression does not allow for the | ||||
|         # hyperlinking of arbitrary strings being used as method | ||||
|         # names. Only methods with names consisting of word characters | ||||
|         # and '.'s are hyperlinked. | ||||
|         pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|' | ||||
|                                 r'RFC[- ]?(\d+)|' | ||||
|                                 r'PEP[- ]?(\d+)|' | ||||
|                                 r'(self\.)?((?:\w|\.)+))\b') | ||||
|         while 1: | ||||
|             match = pattern.search(text, here) | ||||
|             if not match: break | ||||
|             start, end = match.span() | ||||
|             results.append(escape(text[here:start])) | ||||
|  | ||||
|             all, scheme, rfc, pep, selfdot, name = match.groups() | ||||
|             if scheme: | ||||
|                 url = escape(all).replace('"', '"') | ||||
|                 results.append('<a href="%s">%s</a>' % (url, url)) | ||||
|             elif rfc: | ||||
|                 url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc) | ||||
|                 results.append('<a href="%s">%s</a>' % (url, escape(all))) | ||||
|             elif pep: | ||||
|                 url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep) | ||||
|                 results.append('<a href="%s">%s</a>' % (url, escape(all))) | ||||
|             elif text[end:end+1] == '(': | ||||
|                 results.append(self.namelink(name, methods, funcs, classes)) | ||||
|             elif selfdot: | ||||
|                 results.append('self.<strong>%s</strong>' % name) | ||||
|             else: | ||||
|                 results.append(self.namelink(name, classes)) | ||||
|             here = end | ||||
|         results.append(escape(text[here:])) | ||||
|         return ''.join(results) | ||||
|  | ||||
|     def docroutine(self, object, name, mod=None, | ||||
|                    funcs={}, classes={}, methods={}, cl=None): | ||||
|         """Produce HTML documentation for a function or method object.""" | ||||
|  | ||||
|         anchor = (cl and cl.__name__ or '') + '-' + name | ||||
|         note = '' | ||||
|  | ||||
|         title = '<a name="%s"><strong>%s</strong></a>' % ( | ||||
|             self.escape(anchor), self.escape(name)) | ||||
|  | ||||
|         if inspect.ismethod(object): | ||||
|             args = inspect.getfullargspec(object) | ||||
|             # exclude the argument bound to the instance, it will be | ||||
|             # confusing to the non-Python user | ||||
|             argspec = inspect.formatargspec ( | ||||
|                     args.args[1:], | ||||
|                     args.varargs, | ||||
|                     args.varkw, | ||||
|                     args.defaults, | ||||
|                     annotations=args.annotations, | ||||
|                     formatvalue=self.formatvalue | ||||
|                 ) | ||||
|         elif inspect.isfunction(object): | ||||
|             args = inspect.getfullargspec(object) | ||||
|             argspec = inspect.formatargspec( | ||||
|                 args.args, args.varargs, args.varkw, args.defaults, | ||||
|                 annotations=args.annotations, | ||||
|                 formatvalue=self.formatvalue) | ||||
|         else: | ||||
|             argspec = '(...)' | ||||
|  | ||||
|         if isinstance(object, tuple): | ||||
|             argspec = object[0] or argspec | ||||
|             docstring = object[1] or "" | ||||
|         else: | ||||
|             docstring = pydoc.getdoc(object) | ||||
|  | ||||
|         decl = title + argspec + (note and self.grey( | ||||
|                '<font face="helvetica, arial">%s</font>' % note)) | ||||
|  | ||||
|         doc = self.markup( | ||||
|             docstring, self.preformat, funcs, classes, methods) | ||||
|         doc = doc and '<dd><tt>%s</tt></dd>' % doc | ||||
|         return '<dl><dt>%s</dt>%s</dl>\n' % (decl, doc) | ||||
|  | ||||
|     def docserver(self, server_name, package_documentation, methods): | ||||
|         """Produce HTML documentation for an XML-RPC server.""" | ||||
|  | ||||
|         fdict = {} | ||||
|         for key, value in methods.items(): | ||||
|             fdict[key] = '#-' + key | ||||
|             fdict[value] = fdict[key] | ||||
|  | ||||
|         server_name = self.escape(server_name) | ||||
|         head = '<big><big><strong>%s</strong></big></big>' % server_name | ||||
|         result = self.heading(head, '#ffffff', '#7799ee') | ||||
|  | ||||
|         doc = self.markup(package_documentation, self.preformat, fdict) | ||||
|         doc = doc and '<tt>%s</tt>' % doc | ||||
|         result = result + '<p>%s</p>\n' % doc | ||||
|  | ||||
|         contents = [] | ||||
|         method_items = sorted(methods.items()) | ||||
|         for key, value in method_items: | ||||
|             contents.append(self.docroutine(value, key, funcs=fdict)) | ||||
|         result = result + self.bigsection( | ||||
|             'Methods', '#ffffff', '#eeaa77', ''.join(contents)) | ||||
|  | ||||
|         return result | ||||
|  | ||||
| class XMLRPCDocGenerator(object): | ||||
|     """Generates documentation for an XML-RPC server. | ||||
|  | ||||
|     This class is designed as mix-in and should not | ||||
|     be constructed directly. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self): | ||||
|         # setup variables used for HTML documentation | ||||
|         self.server_name = 'XML-RPC Server Documentation' | ||||
|         self.server_documentation = \ | ||||
|             "This server exports the following methods through the XML-RPC "\ | ||||
|             "protocol." | ||||
|         self.server_title = 'XML-RPC Server Documentation' | ||||
|  | ||||
|     def set_server_title(self, server_title): | ||||
|         """Set the HTML title of the generated server documentation""" | ||||
|  | ||||
|         self.server_title = server_title | ||||
|  | ||||
|     def set_server_name(self, server_name): | ||||
|         """Set the name of the generated HTML server documentation""" | ||||
|  | ||||
|         self.server_name = server_name | ||||
|  | ||||
|     def set_server_documentation(self, server_documentation): | ||||
|         """Set the documentation string for the entire server.""" | ||||
|  | ||||
|         self.server_documentation = server_documentation | ||||
|  | ||||
|     def generate_html_documentation(self): | ||||
|         """generate_html_documentation() => html documentation for the server | ||||
|  | ||||
|         Generates HTML documentation for the server using introspection for | ||||
|         installed functions and instances that do not implement the | ||||
|         _dispatch method. Alternatively, instances can choose to implement | ||||
|         the _get_method_argstring(method_name) method to provide the | ||||
|         argument string used in the documentation and the | ||||
|         _methodHelp(method_name) method to provide the help text used | ||||
|         in the documentation.""" | ||||
|  | ||||
|         methods = {} | ||||
|  | ||||
|         for method_name in self.system_listMethods(): | ||||
|             if method_name in self.funcs: | ||||
|                 method = self.funcs[method_name] | ||||
|             elif self.instance is not None: | ||||
|                 method_info = [None, None] # argspec, documentation | ||||
|                 if hasattr(self.instance, '_get_method_argstring'): | ||||
|                     method_info[0] = self.instance._get_method_argstring(method_name) | ||||
|                 if hasattr(self.instance, '_methodHelp'): | ||||
|                     method_info[1] = self.instance._methodHelp(method_name) | ||||
|  | ||||
|                 method_info = tuple(method_info) | ||||
|                 if method_info != (None, None): | ||||
|                     method = method_info | ||||
|                 elif not hasattr(self.instance, '_dispatch'): | ||||
|                     try: | ||||
|                         method = resolve_dotted_attribute( | ||||
|                                     self.instance, | ||||
|                                     method_name | ||||
|                                     ) | ||||
|                     except AttributeError: | ||||
|                         method = method_info | ||||
|                 else: | ||||
|                     method = method_info | ||||
|             else: | ||||
|                 assert 0, "Could not find method in self.functions and no "\ | ||||
|                           "instance installed" | ||||
|  | ||||
|             methods[method_name] = method | ||||
|  | ||||
|         documenter = ServerHTMLDoc() | ||||
|         documentation = documenter.docserver( | ||||
|                                 self.server_name, | ||||
|                                 self.server_documentation, | ||||
|                                 methods | ||||
|                             ) | ||||
|  | ||||
|         return documenter.page(self.server_title, documentation) | ||||
|  | ||||
| class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler): | ||||
|     """XML-RPC and documentation request handler class. | ||||
|  | ||||
|     Handles all HTTP POST requests and attempts to decode them as | ||||
|     XML-RPC requests. | ||||
|  | ||||
|     Handles all HTTP GET requests and interprets them as requests | ||||
|     for documentation. | ||||
|     """ | ||||
|  | ||||
|     def do_GET(self): | ||||
|         """Handles the HTTP GET request. | ||||
|  | ||||
|         Interpret all HTTP GET requests as requests for server | ||||
|         documentation. | ||||
|         """ | ||||
|         # Check that the path is legal | ||||
|         if not self.is_rpc_path_valid(): | ||||
|             self.report_404() | ||||
|             return | ||||
|  | ||||
|         response = self.server.generate_html_documentation().encode('utf-8') | ||||
|         self.send_response(200) | ||||
|         self.send_header("Content-type", "text/html") | ||||
|         self.send_header("Content-length", str(len(response))) | ||||
|         self.end_headers() | ||||
|         self.wfile.write(response) | ||||
|  | ||||
| class DocXMLRPCServer(  SimpleXMLRPCServer, | ||||
|                         XMLRPCDocGenerator): | ||||
|     """XML-RPC and HTML documentation server. | ||||
|  | ||||
|     Adds the ability to serve server documentation to the capabilities | ||||
|     of SimpleXMLRPCServer. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler, | ||||
|                  logRequests=True, allow_none=False, encoding=None, | ||||
|                  bind_and_activate=True, use_builtin_types=False): | ||||
|         SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, | ||||
|                                     allow_none, encoding, bind_and_activate, | ||||
|                                     use_builtin_types) | ||||
|         XMLRPCDocGenerator.__init__(self) | ||||
|  | ||||
| class DocCGIXMLRPCRequestHandler(   CGIXMLRPCRequestHandler, | ||||
|                                     XMLRPCDocGenerator): | ||||
|     """Handler for XML-RPC data and documentation requests passed through | ||||
|     CGI""" | ||||
|  | ||||
|     def handle_get(self): | ||||
|         """Handles the HTTP GET request. | ||||
|  | ||||
|         Interpret all HTTP GET requests as requests for server | ||||
|         documentation. | ||||
|         """ | ||||
|  | ||||
|         response = self.generate_html_documentation().encode('utf-8') | ||||
|  | ||||
|         print('Content-Type: text/html') | ||||
|         print('Content-Length: %d' % len(response)) | ||||
|         print() | ||||
|         sys.stdout.flush() | ||||
|         sys.stdout.buffer.write(response) | ||||
|         sys.stdout.buffer.flush() | ||||
|  | ||||
|     def __init__(self): | ||||
|         CGIXMLRPCRequestHandler.__init__(self) | ||||
|         XMLRPCDocGenerator.__init__(self) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     import datetime | ||||
|  | ||||
|     class ExampleService: | ||||
|         def getData(self): | ||||
|             return '42' | ||||
|  | ||||
|         class currentTime: | ||||
|             @staticmethod | ||||
|             def getCurrentTime(): | ||||
|                 return datetime.datetime.now() | ||||
|  | ||||
|     server = SimpleXMLRPCServer(("localhost", 8000)) | ||||
|     server.register_function(pow) | ||||
|     server.register_function(lambda x,y: x+y, 'add') | ||||
|     server.register_instance(ExampleService(), allow_dotted_names=True) | ||||
|     server.register_multicall_functions() | ||||
|     print('Serving XML-RPC on localhost port 8000') | ||||
|     print('It is advisable to run this example server within a secure, closed network.') | ||||
|     try: | ||||
|         server.serve_forever() | ||||
|     except KeyboardInterrupt: | ||||
|         print("\nKeyboard interrupt received, exiting.") | ||||
|         server.server_close() | ||||
|         sys.exit(0) | ||||
		Reference in New Issue
	
	Block a user