This commit is contained in:
2024-11-29 18:15:30 +00:00
parent 40aade2d8e
commit bc9415586e
5298 changed files with 1938676 additions and 80 deletions

View File

@ -0,0 +1,116 @@
"""
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
depend on something external.
Files inside of pip._vendor should be considered immutable and should only be
updated to versions from upstream.
"""
from __future__ import absolute_import
import glob
import os.path
import sys
# Downstream redistributors which have debundled our dependencies should also
# patch this value to be true. This will trigger the additional patching
# to cause things like "six" to be available as pip.
DEBUNDLED = False
# By default, look in this directory for a bunch of .whl files which we will
# add to the beginning of sys.path before attempting to import anything. This
# is done to support downstream re-distributors like Debian and Fedora who
# wish to create their own Wheels for our dependencies to aid in debundling.
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
# Define a small helper function to alias our vendored modules to the real ones
# if the vendored ones do not exist. This idea of this was taken from
# https://github.com/kennethreitz/requests/pull/2567.
def vendored(modulename):
vendored_name = "{0}.{1}".format(__name__, modulename)
try:
__import__(modulename, globals(), locals(), level=0)
except ImportError:
# We can just silently allow import failures to pass here. If we
# got to this point it means that ``import pip._vendor.whatever``
# failed and so did ``import whatever``. Since we're importing this
# upfront in an attempt to alias imports, not erroring here will
# just mean we get a regular import error whenever pip *actually*
# tries to import one of these modules to use it, which actually
# gives us a better error message than we would have otherwise
# gotten.
pass
else:
sys.modules[vendored_name] = sys.modules[modulename]
base, head = vendored_name.rsplit(".", 1)
setattr(sys.modules[base], head, sys.modules[modulename])
# If we're operating in a debundled setup, then we want to go ahead and trigger
# the aliasing of our vendored libraries as well as looking for wheels to add
# to our sys.path. This will cause all of this code to be a no-op typically
# however downstream redistributors can enable it in a consistent way across
# all platforms.
if DEBUNDLED:
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
# front of our sys.path.
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
# Actually alias all of our vendored dependencies.
vendored("cachecontrol")
vendored("certifi")
vendored("distlib")
vendored("distro")
vendored("packaging")
vendored("packaging.version")
vendored("packaging.specifiers")
vendored("pkg_resources")
vendored("platformdirs")
vendored("progress")
vendored("pyproject_hooks")
vendored("requests")
vendored("requests.exceptions")
vendored("requests.packages")
vendored("requests.packages.urllib3")
vendored("requests.packages.urllib3._collections")
vendored("requests.packages.urllib3.connection")
vendored("requests.packages.urllib3.connectionpool")
vendored("requests.packages.urllib3.contrib")
vendored("requests.packages.urllib3.contrib.ntlmpool")
vendored("requests.packages.urllib3.contrib.pyopenssl")
vendored("requests.packages.urllib3.exceptions")
vendored("requests.packages.urllib3.fields")
vendored("requests.packages.urllib3.filepost")
vendored("requests.packages.urllib3.packages")
vendored("requests.packages.urllib3.packages.ordered_dict")
vendored("requests.packages.urllib3.packages.six")
vendored("requests.packages.urllib3.packages.ssl_match_hostname")
vendored("requests.packages.urllib3.packages.ssl_match_hostname."
"_implementation")
vendored("requests.packages.urllib3.poolmanager")
vendored("requests.packages.urllib3.request")
vendored("requests.packages.urllib3.response")
vendored("requests.packages.urllib3.util")
vendored("requests.packages.urllib3.util.connection")
vendored("requests.packages.urllib3.util.request")
vendored("requests.packages.urllib3.util.response")
vendored("requests.packages.urllib3.util.retry")
vendored("requests.packages.urllib3.util.ssl_")
vendored("requests.packages.urllib3.util.timeout")
vendored("requests.packages.urllib3.util.url")
vendored("resolvelib")
vendored("rich")
vendored("rich.console")
vendored("rich.highlighter")
vendored("rich.logging")
vendored("rich.markup")
vendored("rich.progress")
vendored("rich.segment")
vendored("rich.style")
vendored("rich.text")
vendored("rich.traceback")
if sys.version_info < (3, 11):
vendored("tomli")
vendored("truststore")
vendored("urllib3")

View File

@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.14.0"
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.controller import CacheController
from pip._vendor.cachecontrol.wrapper import CacheControl
__all__ = [
"__author__",
"__email__",
"__version__",
"CacheControlAdapter",
"CacheController",
"CacheControl",
]
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())

View File

@ -0,0 +1,70 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from argparse import ArgumentParser
from typing import TYPE_CHECKING
from pip._vendor import requests
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import logger
if TYPE_CHECKING:
from argparse import Namespace
from pip._vendor.cachecontrol.controller import CacheController
def setup_logging() -> None:
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)
def get_session() -> requests.Session:
adapter = CacheControlAdapter(
DictCache(), cache_etags=True, serializer=None, heuristic=None
)
sess = requests.Session()
sess.mount("http://", adapter)
sess.mount("https://", adapter)
sess.cache_controller = adapter.controller # type: ignore[attr-defined]
return sess
def get_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("url", help="The URL to try and cache")
return parser.parse_args()
def main() -> None:
args = get_args()
sess = get_session()
# Make a request to get a response
resp = sess.get(args.url)
# Turn on logging
setup_logging()
# try setting the cache
cache_controller: CacheController = (
sess.cache_controller # type: ignore[attr-defined]
)
cache_controller.cache_response(resp.request, resp.raw)
# Now try to get it
if cache_controller.cached_request(resp.request):
print("Cached!")
else:
print("Not cached :(")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,161 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import functools
import types
import zlib
from typing import TYPE_CHECKING, Any, Collection, Mapping
from pip._vendor.requests.adapters import HTTPAdapter
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController
from pip._vendor.cachecontrol.filewrapper import CallbackFileWrapper
if TYPE_CHECKING:
from pip._vendor.requests import PreparedRequest, Response
from pip._vendor.urllib3 import HTTPResponse
from pip._vendor.cachecontrol.cache import BaseCache
from pip._vendor.cachecontrol.heuristics import BaseHeuristic
from pip._vendor.cachecontrol.serialize import Serializer
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = {"PUT", "PATCH", "DELETE"}
def __init__(
self,
cache: BaseCache | None = None,
cache_etags: bool = True,
controller_class: type[CacheController] | None = None,
serializer: Serializer | None = None,
heuristic: BaseHeuristic | None = None,
cacheable_methods: Collection[str] | None = None,
*args: Any,
**kw: Any,
) -> None:
super().__init__(*args, **kw)
self.cache = DictCache() if cache is None else cache
self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ("GET",)
controller_factory = controller_class or CacheController
self.controller = controller_factory(
self.cache, cache_etags=cache_etags, serializer=serializer
)
def send(
self,
request: PreparedRequest,
stream: bool = False,
timeout: None | float | tuple[float, float] | tuple[float, None] = None,
verify: bool | str = True,
cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None,
proxies: Mapping[str, str] | None = None,
cacheable_methods: Collection[str] | None = None,
) -> Response:
"""
Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can.
"""
cacheable = cacheable_methods or self.cacheable_methods
if request.method in cacheable:
try:
cached_response = self.controller.cached_request(request)
except zlib.error:
cached_response = None
if cached_response:
return self.build_response(request, cached_response, from_cache=True)
# check for etags and add headers if appropriate
request.headers.update(self.controller.conditional_headers(request))
resp = super().send(request, stream, timeout, verify, cert, proxies)
return resp
def build_response(
self,
request: PreparedRequest,
response: HTTPResponse,
from_cache: bool = False,
cacheable_methods: Collection[str] | None = None,
) -> Response:
"""
Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
cacheable = cacheable_methods or self.cacheable_methods
if not from_cache and request.method in cacheable:
# Check for any heuristics that might update headers
# before trying to cache.
if self.heuristic:
response = self.heuristic.apply(response)
# apply any expiration heuristics
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
cached_response = self.controller.update_cached_response(
request, response
)
if cached_response is not response:
from_cache = True
# We are done with the server response, read a
# possible response body (compliant servers will
# not return one, but we cannot be 100% sure) and
# release the connection back to the pool.
response.read(decode_content=False)
response.release_conn()
response = cached_response
# We always cache the 301 responses
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
self.controller.cache_response(request, response)
else:
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper( # type: ignore[assignment]
response._fp, # type: ignore[arg-type]
functools.partial(
self.controller.cache_response, request, response
),
)
if response.chunked:
super_update_chunk_length = response._update_chunk_length
def _update_chunk_length(self: HTTPResponse) -> None:
super_update_chunk_length()
if self.chunk_left == 0:
self._fp._close() # type: ignore[union-attr]
response._update_chunk_length = types.MethodType( # type: ignore[method-assign]
_update_chunk_length, response
)
resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call]
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
assert request.url is not None
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Give the request a from_cache attr to let people use it
resp.from_cache = from_cache # type: ignore[attr-defined]
return resp
def close(self) -> None:
self.cache.close()
super().close() # type: ignore[no-untyped-call]

View File

@ -0,0 +1,74 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
from __future__ import annotations
from threading import Lock
from typing import IO, TYPE_CHECKING, MutableMapping
if TYPE_CHECKING:
from datetime import datetime
class BaseCache:
def get(self, key: str) -> bytes | None:
raise NotImplementedError()
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
raise NotImplementedError()
def delete(self, key: str) -> None:
raise NotImplementedError()
def close(self) -> None:
pass
class DictCache(BaseCache):
def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None:
self.lock = Lock()
self.data = init_dict or {}
def get(self, key: str) -> bytes | None:
return self.data.get(key, None)
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
with self.lock:
self.data.update({key: value})
def delete(self, key: str) -> None:
with self.lock:
if key in self.data:
self.data.pop(key)
class SeparateBodyBaseCache(BaseCache):
"""
In this variant, the body is not stored mixed in with the metadata, but is
passed in (as a bytes-like object) in a separate call to ``set_body()``.
That is, the expected interaction pattern is::
cache.set(key, serialized_metadata)
cache.set_body(key)
Similarly, the body should be loaded separately via ``get_body()``.
"""
def set_body(self, key: str, body: bytes) -> None:
raise NotImplementedError()
def get_body(self, key: str) -> IO[bytes] | None:
"""
Return the body as file-like object.
"""
raise NotImplementedError()

View File

@ -0,0 +1,8 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from pip._vendor.cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache
from pip._vendor.cachecontrol.caches.redis_cache import RedisCache
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]

View File

@ -0,0 +1,182 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import hashlib
import os
from textwrap import dedent
from typing import IO, TYPE_CHECKING, Union
from pathlib import Path
from pip._vendor.cachecontrol.cache import BaseCache, SeparateBodyBaseCache
from pip._vendor.cachecontrol.controller import CacheController
if TYPE_CHECKING:
from datetime import datetime
from filelock import BaseFileLock
def _secure_open_write(filename: str, fmode: int) -> IO[bytes]:
# We only want to write to this file, so open it in write only mode
flags = os.O_WRONLY
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
# will open *new* files.
# We specify this because we want to ensure that the mode we pass is the
# mode of the file.
flags |= os.O_CREAT | os.O_EXCL
# Do not follow symlinks to prevent someone from making a symlink that
# we follow and insecurely open a cache file.
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
# On Windows we'll mark this file as binary
if hasattr(os, "O_BINARY"):
flags |= os.O_BINARY
# Before we open our file, we want to delete any existing file that is
# there
try:
os.remove(filename)
except OSError:
# The file must not exist already, so we can just skip ahead to opening
pass
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
# race condition happens between the os.remove and this line, that an
# error will be raised. Because we utilize a lockfile this should only
# happen if someone is attempting to attack us.
fd = os.open(filename, flags, fmode)
try:
return os.fdopen(fd, "wb")
except:
# An error occurred wrapping our FD in a file object
os.close(fd)
raise
class _FileCacheMixin:
"""Shared implementation for both FileCache variants."""
def __init__(
self,
directory: str | Path,
forever: bool = False,
filemode: int = 0o0600,
dirmode: int = 0o0700,
lock_class: type[BaseFileLock] | None = None,
) -> None:
try:
if lock_class is None:
from filelock import FileLock
lock_class = FileLock
except ImportError:
notice = dedent(
"""
NOTE: In order to use the FileCache you must have
filelock installed. You can install it via pip:
pip install cachecontrol[filecache]
"""
)
raise ImportError(notice)
self.directory = directory
self.forever = forever
self.filemode = filemode
self.dirmode = dirmode
self.lock_class = lock_class
@staticmethod
def encode(x: str) -> str:
return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name: str) -> str:
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key: str) -> bytes | None:
name = self._fn(key)
try:
with open(name, "rb") as fh:
return fh.read()
except FileNotFoundError:
return None
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
name = self._fn(key)
self._write(name, value)
def _write(self, path: str, data: bytes) -> None:
"""
Safely write the data to the given path.
"""
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(path), self.dirmode)
except OSError:
pass
with self.lock_class(path + ".lock"):
# Write our actual file
with _secure_open_write(path, self.filemode) as fh:
fh.write(data)
def _delete(self, key: str, suffix: str) -> None:
name = self._fn(key) + suffix
if not self.forever:
try:
os.remove(name)
except FileNotFoundError:
pass
class FileCache(_FileCacheMixin, BaseCache):
"""
Traditional FileCache: body is stored in memory, so not suitable for large
downloads.
"""
def delete(self, key: str) -> None:
self._delete(key, "")
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
"""
Memory-efficient FileCache: body is stored in a separate file, reducing
peak memory usage.
"""
def get_body(self, key: str) -> IO[bytes] | None:
name = self._fn(key) + ".body"
try:
return open(name, "rb")
except FileNotFoundError:
return None
def set_body(self, key: str, body: bytes) -> None:
name = self._fn(key) + ".body"
self._write(name, body)
def delete(self, key: str) -> None:
self._delete(key, "")
self._delete(key, ".body")
def url_to_file_path(url: str, filecache: FileCache) -> str:
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
return filecache._fn(key)

View File

@ -0,0 +1,48 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from datetime import datetime, timezone
from typing import TYPE_CHECKING
from pip._vendor.cachecontrol.cache import BaseCache
if TYPE_CHECKING:
from redis import Redis
class RedisCache(BaseCache):
def __init__(self, conn: Redis[bytes]) -> None:
self.conn = conn
def get(self, key: str) -> bytes | None:
return self.conn.get(key)
def set(
self, key: str, value: bytes, expires: int | datetime | None = None
) -> None:
if not expires:
self.conn.set(key, value)
elif isinstance(expires, datetime):
now_utc = datetime.now(timezone.utc)
if expires.tzinfo is None:
now_utc = now_utc.replace(tzinfo=None)
delta = expires - now_utc
self.conn.setex(key, int(delta.total_seconds()), value)
else:
self.conn.setex(key, expires, value)
def delete(self, key: str) -> None:
self.conn.delete(key)
def clear(self) -> None:
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
def close(self) -> None:
"""Redis uses connection pooling, no need to close the connection."""
pass

View File

@ -0,0 +1,499 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The httplib2 algorithms ported for use with requests.
"""
from __future__ import annotations
import calendar
import logging
import re
import time
from email.utils import parsedate_tz
from typing import TYPE_CHECKING, Collection, Mapping
from pip._vendor.requests.structures import CaseInsensitiveDict
from pip._vendor.cachecontrol.cache import DictCache, SeparateBodyBaseCache
from pip._vendor.cachecontrol.serialize import Serializer
if TYPE_CHECKING:
from typing import Literal
from pip._vendor.requests import PreparedRequest
from pip._vendor.urllib3 import HTTPResponse
from pip._vendor.cachecontrol.cache import BaseCache
logger = logging.getLogger(__name__)
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
PERMANENT_REDIRECT_STATUSES = (301, 308)
def parse_uri(uri: str) -> tuple[str, str, str, str, str]:
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
match = URI.match(uri)
assert match is not None
groups = match.groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController:
"""An interface to see if request should cached or not."""
def __init__(
self,
cache: BaseCache | None = None,
cache_etags: bool = True,
serializer: Serializer | None = None,
status_codes: Collection[int] | None = None,
):
self.cache = DictCache() if cache is None else cache
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
@classmethod
def _urlnorm(cls, uri: str) -> str:
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
scheme = scheme.lower()
authority = authority.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
@classmethod
def cache_url(cls, uri: str) -> str:
return cls._urlnorm(uri)
def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]:
known_directives = {
# https://tools.ietf.org/html/rfc7234#section-5.2
"max-age": (int, True),
"max-stale": (int, False),
"min-fresh": (int, True),
"no-cache": (None, False),
"no-store": (None, False),
"no-transform": (None, False),
"only-if-cached": (None, False),
"must-revalidate": (None, False),
"public": (None, False),
"private": (None, False),
"proxy-revalidate": (None, False),
"s-maxage": (int, True),
}
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
retval: dict[str, int | None] = {}
for cc_directive in cc_headers.split(","):
if not cc_directive.strip():
continue
parts = cc_directive.split("=", 1)
directive = parts[0].strip()
try:
typ, required = known_directives[directive]
except KeyError:
logger.debug("Ignoring unknown cache-control directive: %s", directive)
continue
if not typ or not required:
retval[directive] = None
if typ:
try:
retval[directive] = typ(parts[1].strip())
except IndexError:
if required:
logger.debug(
"Missing value for cache-control " "directive: %s",
directive,
)
except ValueError:
logger.debug(
"Invalid value for cache-control directive " "%s, must be %s",
directive,
typ.__name__,
)
return retval
def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:
"""
Load a cached response, or return None if it's not available.
"""
# We do not support caching of partial content: so if the request contains a
# Range header then we don't want to load anything from the cache.
if "Range" in request.headers:
return None
cache_url = request.url
assert cache_url is not None
cache_data = self.cache.get(cache_url)
if cache_data is None:
logger.debug("No cache entry available")
return None
if isinstance(self.cache, SeparateBodyBaseCache):
body_file = self.cache.get_body(cache_url)
else:
body_file = None
result = self.serializer.loads(request, cache_data, body_file)
if result is None:
logger.warning("Cache entry deserialization failed, entry ignored")
return result
def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]:
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)
# Bail out if the request insists on fresh data
if "no-cache" in cc:
logger.debug('Request header has "no-cache", cache bypassed')
return False
if "max-age" in cc and cc["max-age"] == 0:
logger.debug('Request header has "max_age" as 0, cache bypassed')
return False
# Check whether we can load the response from the cache:
resp = self._load_from_cache(request)
if not resp:
return False
# If we have a cached permanent redirect, return it immediately. We
# don't need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
#
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
msg = (
"Returning cached permanent redirect response "
"(ignoring date and etag information)"
)
logger.debug(msg)
return resp
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if not headers or "date" not in headers:
if "etag" not in headers:
# Without date or etag, the cached response can never be used
# and should be deleted.
logger.debug("Purging cached response: no date or etag")
self.cache.delete(cache_url)
logger.debug("Ignoring cached response: no date")
return False
now = time.time()
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
current_age = max(0, now - date)
logger.debug("Current age based on date: %i", current_age)
# TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(headers)
# determine freshness
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
max_age = resp_cc.get("max-age")
if max_age is not None:
freshness_lifetime = max_age
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
# If there isn't a max-age, check for an expires header
elif "expires" in headers:
expires = parsedate_tz(headers["expires"])
if expires is not None:
expire_time = calendar.timegm(expires[:6]) - date
freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
# Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response.
max_age = cc.get("max-age")
if max_age is not None:
freshness_lifetime = max_age
logger.debug(
"Freshness lifetime from request max-age: %i", freshness_lifetime
)
min_fresh = cc.get("min-fresh")
if min_fresh is not None:
# adjust our current age by our min fresh
current_age += min_fresh
logger.debug("Adjusted current age from min-fresh: %i", current_age)
# Return entry if it is fresh enough
if freshness_lifetime > current_age:
logger.debug('The response is "fresh", returning cached response')
logger.debug("%i > %i", freshness_lifetime, current_age)
return resp
# we're not fresh. If we don't have an Etag, clear it out
if "etag" not in headers:
logger.debug('The cached response is "stale" with no etag, purging')
self.cache.delete(cache_url)
# return the original handler
return False
def conditional_headers(self, request: PreparedRequest) -> dict[str, str]:
resp = self._load_from_cache(request)
new_headers = {}
if resp:
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
if "etag" in headers:
new_headers["If-None-Match"] = headers["ETag"]
if "last-modified" in headers:
new_headers["If-Modified-Since"] = headers["Last-Modified"]
return new_headers
def _cache_set(
self,
cache_url: str,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
expires_time: int | None = None,
) -> None:
"""
Store the data in the cache.
"""
if isinstance(self.cache, SeparateBodyBaseCache):
# We pass in the body separately; just put a placeholder empty
# string in the metadata.
self.cache.set(
cache_url,
self.serializer.dumps(request, response, b""),
expires=expires_time,
)
# body is None can happen when, for example, we're only updating
# headers, as is the case in update_cached_response().
if body is not None:
self.cache.set_body(cache_url, body)
else:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
def cache_response(
self,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
status_codes: Collection[int] | None = None,
) -> None:
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
cacheable_status_codes = status_codes or self.cacheable_status_codes
if response.status not in cacheable_status_codes:
logger.debug(
"Status code %s not in %s", response.status, cacheable_status_codes
)
return
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
response.headers
)
if "date" in response_headers:
time_tuple = parsedate_tz(response_headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
else:
date = 0
# If we've been given a body, our response has a Content-Length, that
# Content-Length is valid then we can check to see if the body we've
# been given matches the expected size, and if it doesn't we'll just
# skip trying to cache it.
if (
body is not None
and "content-length" in response_headers
and response_headers["content-length"].isdigit()
and int(response_headers["content-length"]) != len(body)
):
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
assert request.url is not None
cache_url = self.cache_url(request.url)
logger.debug('Updating cache with response from "%s"', cache_url)
# Delete it from the cache if we happen to have it stored there
no_store = False
if "no-store" in cc:
no_store = True
logger.debug('Response header has "no-store"')
if "no-store" in cc_req:
no_store = True
logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url):
logger.debug('Purging existing cache entry to honor "no-store"')
self.cache.delete(cache_url)
if no_store:
return
# https://tools.ietf.org/html/rfc7234#section-4.1:
# A Vary header field-value of "*" always fails to match.
# Storing such a response leads to a deserialization warning
# during cache lookup and is not allowed to ever be served,
# so storing it can be avoided.
if "*" in response_headers.get("vary", ""):
logger.debug('Response header has "Vary: *"')
return
# If we've been given an etag, then keep the response
if self.cache_etags and "etag" in response_headers:
expires_time = 0
if response_headers.get("expires"):
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires[:6]) - date
expires_time = max(expires_time, 14 * 86400)
logger.debug(f"etag object cached for {expires_time} seconds")
logger.debug("Caching due to etag")
self._cache_set(cache_url, request, response, body, expires_time)
# Add to the cache any permanent redirects. We do this before looking
# that the Date headers.
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
logger.debug("Caching permanent redirect")
self._cache_set(cache_url, request, response, b"")
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif "date" in response_headers:
time_tuple = parsedate_tz(response_headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
# cache when there is a max-age > 0
max_age = cc.get("max-age")
if max_age is not None and max_age > 0:
logger.debug("Caching b/c date exists and max-age > 0")
expires_time = max_age
self._cache_set(
cache_url,
request,
response,
body,
expires_time,
)
# If the request can expire, it means we should cache it
# in the meantime.
elif "expires" in response_headers:
if response_headers["expires"]:
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires[:6]) - date
else:
expires_time = None
logger.debug(
"Caching b/c of expires header. expires in {} seconds".format(
expires_time
)
)
self._cache_set(
cache_url,
request,
response,
body,
expires_time,
)
def update_cached_response(
self, request: PreparedRequest, response: HTTPResponse
) -> HTTPResponse:
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
assert request.url is not None
cache_url = self.cache_url(request.url)
cached_response = self._load_from_cache(request)
if not cached_response:
# we didn't have a cached response
return response
# Lets update our headers with the headers from the new request:
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
#
# The server isn't supposed to send headers that would make
# the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to
# typical assumptions.
excluded_headers = ["content-length"]
cached_response.headers.update(
{
k: v
for k, v in response.headers.items()
if k.lower() not in excluded_headers
}
)
# we want a 200 b/c we have content via the cache
cached_response.status = 200
# update our cache
self._cache_set(cache_url, request, cached_response)
return cached_response

View File

@ -0,0 +1,119 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import mmap
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any, Callable
if TYPE_CHECKING:
from http.client import HTTPResponse
class CallbackFileWrapper:
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
contents of that buffer.
All attributes are proxied to the underlying file object.
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
The data is stored in a temporary file until it is all available. As long
as the temporary files directory is disk-based (sometimes it's a
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
pressure is high. For small files the disk usually won't be used at all,
it'll all be in the filesystem memory cache, so there should be no
performance impact.
"""
def __init__(
self, fp: HTTPResponse, callback: Callable[[bytes], None] | None
) -> None:
self.__buf = NamedTemporaryFile("rb+", delete=True)
self.__fp = fp
self.__callback = callback
def __getattr__(self, name: str) -> Any:
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop thigns from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__("_CallbackFileWrapper__fp")
return getattr(fp, name)
def __is_fp_closed(self) -> bool:
try:
return self.__fp.fp is None
except AttributeError:
pass
try:
closed: bool = self.__fp.closed
return closed
except AttributeError:
pass
# We just don't cache it then.
# TODO: Add some logging here...
return False
def _close(self) -> None:
if self.__callback:
if self.__buf.tell() == 0:
# Empty file:
result = b""
else:
# Return the data without actually loading it into memory,
# relying on Python's buffer API and mmap(). mmap() just gives
# a view directly into the filesystem's memory cache, so it
# doesn't result in duplicate memory use.
self.__buf.seek(0, 0)
result = memoryview(
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
)
self.__callback(result)
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
# because the callback is holding a reference to something which
# has a __del__ method. Setting this to None breaks the cycle
# and allows the garbage collector to do it's thing normally.
self.__callback = None
# Closing the temporary file releases memory and frees disk space.
# Important when caching big files.
self.__buf.close()
def read(self, amt: int | None = None) -> bytes:
data: bytes = self.__fp.read(amt)
if data:
# We may be dealing with b'', a sign that things are over:
# it's passed e.g. after we've already closed self.__buf.
self.__buf.write(data)
if self.__is_fp_closed():
self._close()
return data
def _safe_read(self, amt: int) -> bytes:
data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined]
if amt == 2 and data == b"\r\n":
# urllib executes this read to toss the CRLF at the end
# of the chunk.
return data
self.__buf.write(data)
if self.__is_fp_closed():
self._close()
return data

View File

@ -0,0 +1,154 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import calendar
import time
from datetime import datetime, timedelta, timezone
from email.utils import formatdate, parsedate, parsedate_tz
from typing import TYPE_CHECKING, Any, Mapping
if TYPE_CHECKING:
from pip._vendor.urllib3 import HTTPResponse
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta: timedelta, date: datetime | None = None) -> datetime:
date = date or datetime.now(timezone.utc)
return date + delta
def datetime_to_header(dt: datetime) -> str:
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic:
def warning(self, response: HTTPResponse) -> str | None:
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response: HTTPResponse) -> HTTPResponse:
updated_headers = self.update_headers(response)
if updated_headers:
response.headers.update(updated_headers)
warning_header_value = self.warning(response)
if warning_header_value is not None:
response.headers.update({"Warning": warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
headers = {}
if "expires" not in response.headers:
date = parsedate(response.headers["date"])
expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[index,misc]
headers["expires"] = datetime_to_header(expires)
headers["cache-control"] = "public"
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw: Any) -> None:
self.delta = timedelta(**kw)
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
expires = expire_after(self.delta)
return {"expires": datetime_to_header(expires), "cache-control": "public"}
def warning(self, response: HTTPResponse) -> str | None:
tmpl = "110 - Automatically cached for %s. Response might be stale"
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = {
200,
203,
204,
206,
300,
301,
404,
405,
410,
414,
501,
}
def update_headers(self, resp: HTTPResponse) -> dict[str, str]:
headers: Mapping[str, str] = resp.headers
if "expires" in headers:
return {}
if "cache-control" in headers and headers["cache-control"] != "public":
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if "date" not in headers or "last-modified" not in headers:
return {}
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
last_modified = parsedate(headers["last-modified"])
if last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp: HTTPResponse) -> str | None:
return None

View File

@ -0,0 +1,146 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import io
from typing import IO, TYPE_CHECKING, Any, Mapping, cast
from pip._vendor import msgpack
from pip._vendor.requests.structures import CaseInsensitiveDict
from pip._vendor.urllib3 import HTTPResponse
if TYPE_CHECKING:
from pip._vendor.requests import PreparedRequest
class Serializer:
serde_version = "4"
def dumps(
self,
request: PreparedRequest,
response: HTTPResponse,
body: bytes | None = None,
) -> bytes:
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
response.headers
)
if body is None:
# When a body isn't passed in, we'll read the response. We
# also update the response with a new file handler to be
# sure it acts as though it was never read.
body = response.read(decode_content=False)
response._fp = io.BytesIO(body) # type: ignore[assignment]
response.length_remaining = len(body)
data = {
"response": {
"body": body, # Empty bytestring if body is stored separately
"headers": {str(k): str(v) for k, v in response.headers.items()},
"status": response.status,
"version": response.version,
"reason": str(response.reason),
"decode_content": response.decode_content,
}
}
# Construct our vary headers
data["vary"] = {}
if "vary" in response_headers:
varied_headers = response_headers["vary"].split(",")
for header in varied_headers:
header = str(header).strip()
header_value = request.headers.get(header, None)
if header_value is not None:
header_value = str(header_value)
data["vary"][header] = header_value
return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)])
def serialize(self, data: dict[str, Any]) -> bytes:
return cast(bytes, msgpack.dumps(data, use_bin_type=True))
def loads(
self,
request: PreparedRequest,
data: bytes,
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
# Short circuit if we've been given an empty set of data
if not data:
return None
# Previous versions of this library supported other serialization
# formats, but these have all been removed.
if not data.startswith(f"cc={self.serde_version},".encode()):
return None
data = data[5:]
return self._loads_v4(request, data, body_file)
def prepare_response(
self,
request: PreparedRequest,
cached: Mapping[str, Any],
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
# This case is also handled in the controller code when creating
# a cache entry, but is left here for backwards compatibility.
if "*" in cached.get("vary", {}):
return None
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return None
body_raw = cached["response"].pop("body")
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
data=cached["response"]["headers"]
)
if headers.get("transfer-encoding", "") == "chunked":
headers.pop("transfer-encoding")
cached["response"]["headers"] = headers
try:
body: IO[bytes]
if body_file is None:
body = io.BytesIO(body_raw)
else:
body = body_file
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
# a Python 3 str (unicode string), which will cause the above to
# fail with:
#
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode("utf8"))
# Discard any `strict` parameter serialized by older version of cachecontrol.
cached["response"].pop("strict", None)
return HTTPResponse(body=body, preload_content=False, **cached["response"])
def _loads_v4(
self,
request: PreparedRequest,
data: bytes,
body_file: IO[bytes] | None = None,
) -> HTTPResponse | None:
try:
cached = msgpack.loads(data, raw=False)
except ValueError:
return None
return self.prepare_response(request, cached, body_file)

View File

@ -0,0 +1,43 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
from typing import TYPE_CHECKING, Collection
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.cache import DictCache
if TYPE_CHECKING:
from pip._vendor import requests
from pip._vendor.cachecontrol.cache import BaseCache
from pip._vendor.cachecontrol.controller import CacheController
from pip._vendor.cachecontrol.heuristics import BaseHeuristic
from pip._vendor.cachecontrol.serialize import Serializer
def CacheControl(
sess: requests.Session,
cache: BaseCache | None = None,
cache_etags: bool = True,
serializer: Serializer | None = None,
heuristic: BaseHeuristic | None = None,
controller_class: type[CacheController] | None = None,
adapter_class: type[CacheControlAdapter] | None = None,
cacheable_methods: Collection[str] | None = None,
) -> requests.Session:
cache = DictCache() if cache is None else cache
adapter_class = adapter_class or CacheControlAdapter
adapter = adapter_class(
cache,
cache_etags=cache_etags,
serializer=serializer,
heuristic=heuristic,
controller_class=controller_class,
cacheable_methods=cacheable_methods,
)
sess.mount("http://", adapter)
sess.mount("https://", adapter)
return sess

View File

@ -0,0 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2024.07.04"

View File

@ -0,0 +1,12 @@
import argparse
from pip._vendor.certifi import contents, where
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--contents", action="store_true")
args = parser.parse_args()
if args.contents:
print(contents())
else:
print(where())

View File

@ -0,0 +1,114 @@
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem or its contents.
"""
import sys
import atexit
def exit_cacert_ctx() -> None:
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
if sys.version_info >= (3, 11):
from importlib.resources import as_file, files
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the file
# in cases where we're inside of a zipimport situation until someone
# actually calls where(), but we don't want to re-extract the file
# on every call of where(), so we'll do it once then store it in a
# global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you to
# manage the cleanup of this file, so it doesn't actually return a
# path, it returns a context manager that will give you the path
# when you enter it and will do any cleanup when you leave it. In
# the common case of not needing a temporary file, it will just
# return the file system location and the __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = as_file(files("pip._vendor.certifi").joinpath("cacert.pem"))
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return files("pip._vendor.certifi").joinpath("cacert.pem").read_text(encoding="ascii")
elif sys.version_info >= (3, 7):
from importlib.resources import path as get_path, read_text
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the
# file in cases where we're inside of a zipimport situation until
# someone actually calls where(), but we don't want to re-extract
# the file on every call of where(), so we'll do it once then store
# it in a global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you
# to manage the cleanup of this file, so it doesn't actually
# return a path, it returns a context manager that will give
# you the path when you enter it and will do any cleanup when
# you leave it. In the common case of not needing a temporary
# file, it will just return the file system location and the
# __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = get_path("pip._vendor.certifi", "cacert.pem")
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return read_text("pip._vendor.certifi", "cacert.pem", encoding="ascii")
else:
import os
import types
from typing import Union
Package = Union[types.ModuleType, str]
Resource = Union[str, "os.PathLike"]
# This fallback will work for Python versions prior to 3.7 that lack the
# importlib.resources module but relies on the existing `where` function
# so won't address issues with environments like PyOxidizer that don't set
# __file__ on modules.
def read_text(
package: Package,
resource: Resource,
encoding: str = 'utf-8',
errors: str = 'strict'
) -> str:
with open(where(), encoding=encoding) as data:
return data.read()
# If we don't have importlib.resources, then we will just do the old logic
# of assuming we're on the filesystem and munge the path directly.
def where() -> str:
f = os.path.dirname(__file__)
return os.path.join(f, "cacert.pem")
def contents() -> str:
return read_text("pip._vendor.certifi", "cacert.pem", encoding="ascii")

View File

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2023 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import logging
__version__ = '0.3.8'
class DistlibException(Exception):
pass
try:
from logging import NullHandler
except ImportError: # pragma: no cover
class NullHandler(logging.Handler):
def handle(self, record):
pass
def emit(self, record):
pass
def createLock(self):
self.lock = None
logger = logging.getLogger(__name__)
logger.addHandler(NullHandler())

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,508 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2023 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import hashlib
import logging
import os
import shutil
import subprocess
import tempfile
try:
from threading import Thread
except ImportError: # pragma: no cover
from dummy_threading import Thread
from . import DistlibException
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
urlparse, build_opener, string_types)
from .util import zip_dir, ServerProxy
logger = logging.getLogger(__name__)
DEFAULT_INDEX = 'https://pypi.org/pypi'
DEFAULT_REALM = 'pypi'
class PackageIndex(object):
"""
This class represents a package index compatible with PyPI, the Python
Package Index.
"""
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
def __init__(self, url=None):
"""
Initialise an instance.
:param url: The URL of the index. If not specified, the URL for PyPI is
used.
"""
self.url = url or DEFAULT_INDEX
self.read_configuration()
scheme, netloc, path, params, query, frag = urlparse(self.url)
if params or query or frag or scheme not in ('http', 'https'):
raise DistlibException('invalid repository: %s' % self.url)
self.password_handler = None
self.ssl_verifier = None
self.gpg = None
self.gpg_home = None
with open(os.devnull, 'w') as sink:
# Use gpg by default rather than gpg2, as gpg2 insists on
# prompting for passwords
for s in ('gpg', 'gpg2'):
try:
rc = subprocess.check_call([s, '--version'], stdout=sink,
stderr=sink)
if rc == 0:
self.gpg = s
break
except OSError:
pass
def _get_pypirc_command(self):
"""
Get the distutils command for interacting with PyPI configurations.
:return: the command.
"""
from .util import _get_pypirc_command as cmd
return cmd()
def read_configuration(self):
"""
Read the PyPI access configuration as supported by distutils. This populates
``username``, ``password``, ``realm`` and ``url`` attributes from the
configuration.
"""
from .util import _load_pypirc
cfg = _load_pypirc(self)
self.username = cfg.get('username')
self.password = cfg.get('password')
self.realm = cfg.get('realm', 'pypi')
self.url = cfg.get('repository', self.url)
def save_configuration(self):
"""
Save the PyPI access configuration. You must have set ``username`` and
``password`` attributes before calling this method.
"""
self.check_credentials()
from .util import _store_pypirc
_store_pypirc(self)
def check_credentials(self):
"""
Check that ``username`` and ``password`` have been set, and raise an
exception if not.
"""
if self.username is None or self.password is None:
raise DistlibException('username and password must be set')
pm = HTTPPasswordMgr()
_, netloc, _, _, _, _ = urlparse(self.url)
pm.add_password(self.realm, netloc, self.username, self.password)
self.password_handler = HTTPBasicAuthHandler(pm)
def register(self, metadata): # pragma: no cover
"""
Register a distribution on PyPI, using the provided metadata.
:param metadata: A :class:`Metadata` instance defining at least a name
and version number for the distribution to be
registered.
:return: The HTTP response received from PyPI upon submission of the
request.
"""
self.check_credentials()
metadata.validate()
d = metadata.todict()
d[':action'] = 'verify'
request = self.encode_request(d.items(), [])
self.send_request(request)
d[':action'] = 'submit'
request = self.encode_request(d.items(), [])
return self.send_request(request)
def _reader(self, name, stream, outbuf):
"""
Thread runner for reading lines of from a subprocess into a buffer.
:param name: The logical name of the stream (used for logging only).
:param stream: The stream to read from. This will typically a pipe
connected to the output stream of a subprocess.
:param outbuf: The list to append the read lines to.
"""
while True:
s = stream.readline()
if not s:
break
s = s.decode('utf-8').rstrip()
outbuf.append(s)
logger.debug('%s: %s' % (name, s))
stream.close()
def get_sign_command(self, filename, signer, sign_password, keystore=None): # pragma: no cover
"""
Return a suitable command for signing a file.
:param filename: The pathname to the file to be signed.
:param signer: The identifier of the signer of the file.
:param sign_password: The passphrase for the signer's
private key used for signing.
:param keystore: The path to a directory which contains the keys
used in verification. If not specified, the
instance's ``gpg_home`` attribute is used instead.
:return: The signing command as a list suitable to be
passed to :class:`subprocess.Popen`.
"""
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
if keystore is None:
keystore = self.gpg_home
if keystore:
cmd.extend(['--homedir', keystore])
if sign_password is not None:
cmd.extend(['--batch', '--passphrase-fd', '0'])
td = tempfile.mkdtemp()
sf = os.path.join(td, os.path.basename(filename) + '.asc')
cmd.extend(['--detach-sign', '--armor', '--local-user',
signer, '--output', sf, filename])
logger.debug('invoking: %s', ' '.join(cmd))
return cmd, sf
def run_command(self, cmd, input_data=None):
"""
Run a command in a child process , passing it any input data specified.
:param cmd: The command to run.
:param input_data: If specified, this must be a byte string containing
data to be sent to the child process.
:return: A tuple consisting of the subprocess' exit code, a list of
lines read from the subprocess' ``stdout``, and a list of
lines read from the subprocess' ``stderr``.
"""
kwargs = {
'stdout': subprocess.PIPE,
'stderr': subprocess.PIPE,
}
if input_data is not None:
kwargs['stdin'] = subprocess.PIPE
stdout = []
stderr = []
p = subprocess.Popen(cmd, **kwargs)
# We don't use communicate() here because we may need to
# get clever with interacting with the command
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
t1.start()
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
t2.start()
if input_data is not None:
p.stdin.write(input_data)
p.stdin.close()
p.wait()
t1.join()
t2.join()
return p.returncode, stdout, stderr
def sign_file(self, filename, signer, sign_password, keystore=None): # pragma: no cover
"""
Sign a file.
:param filename: The pathname to the file to be signed.
:param signer: The identifier of the signer of the file.
:param sign_password: The passphrase for the signer's
private key used for signing.
:param keystore: The path to a directory which contains the keys
used in signing. If not specified, the instance's
``gpg_home`` attribute is used instead.
:return: The absolute pathname of the file where the signature is
stored.
"""
cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
keystore)
rc, stdout, stderr = self.run_command(cmd,
sign_password.encode('utf-8'))
if rc != 0:
raise DistlibException('sign command failed with error '
'code %s' % rc)
return sig_file
def upload_file(self, metadata, filename, signer=None, sign_password=None,
filetype='sdist', pyversion='source', keystore=None):
"""
Upload a release file to the index.
:param metadata: A :class:`Metadata` instance defining at least a name
and version number for the file to be uploaded.
:param filename: The pathname of the file to be uploaded.
:param signer: The identifier of the signer of the file.
:param sign_password: The passphrase for the signer's
private key used for signing.
:param filetype: The type of the file being uploaded. This is the
distutils command which produced that file, e.g.
``sdist`` or ``bdist_wheel``.
:param pyversion: The version of Python which the release relates
to. For code compatible with any Python, this would
be ``source``, otherwise it would be e.g. ``3.2``.
:param keystore: The path to a directory which contains the keys
used in signing. If not specified, the instance's
``gpg_home`` attribute is used instead.
:return: The HTTP response received from PyPI upon submission of the
request.
"""
self.check_credentials()
if not os.path.exists(filename):
raise DistlibException('not found: %s' % filename)
metadata.validate()
d = metadata.todict()
sig_file = None
if signer:
if not self.gpg:
logger.warning('no signing program available - not signed')
else:
sig_file = self.sign_file(filename, signer, sign_password,
keystore)
with open(filename, 'rb') as f:
file_data = f.read()
md5_digest = hashlib.md5(file_data).hexdigest()
sha256_digest = hashlib.sha256(file_data).hexdigest()
d.update({
':action': 'file_upload',
'protocol_version': '1',
'filetype': filetype,
'pyversion': pyversion,
'md5_digest': md5_digest,
'sha256_digest': sha256_digest,
})
files = [('content', os.path.basename(filename), file_data)]
if sig_file:
with open(sig_file, 'rb') as f:
sig_data = f.read()
files.append(('gpg_signature', os.path.basename(sig_file),
sig_data))
shutil.rmtree(os.path.dirname(sig_file))
request = self.encode_request(d.items(), files)
return self.send_request(request)
def upload_documentation(self, metadata, doc_dir): # pragma: no cover
"""
Upload documentation to the index.
:param metadata: A :class:`Metadata` instance defining at least a name
and version number for the documentation to be
uploaded.
:param doc_dir: The pathname of the directory which contains the
documentation. This should be the directory that
contains the ``index.html`` for the documentation.
:return: The HTTP response received from PyPI upon submission of the
request.
"""
self.check_credentials()
if not os.path.isdir(doc_dir):
raise DistlibException('not a directory: %r' % doc_dir)
fn = os.path.join(doc_dir, 'index.html')
if not os.path.exists(fn):
raise DistlibException('not found: %r' % fn)
metadata.validate()
name, version = metadata.name, metadata.version
zip_data = zip_dir(doc_dir).getvalue()
fields = [(':action', 'doc_upload'),
('name', name), ('version', version)]
files = [('content', name, zip_data)]
request = self.encode_request(fields, files)
return self.send_request(request)
def get_verify_command(self, signature_filename, data_filename,
keystore=None):
"""
Return a suitable command for verifying a file.
:param signature_filename: The pathname to the file containing the
signature.
:param data_filename: The pathname to the file containing the
signed data.
:param keystore: The path to a directory which contains the keys
used in verification. If not specified, the
instance's ``gpg_home`` attribute is used instead.
:return: The verifying command as a list suitable to be
passed to :class:`subprocess.Popen`.
"""
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
if keystore is None:
keystore = self.gpg_home
if keystore:
cmd.extend(['--homedir', keystore])
cmd.extend(['--verify', signature_filename, data_filename])
logger.debug('invoking: %s', ' '.join(cmd))
return cmd
def verify_signature(self, signature_filename, data_filename,
keystore=None):
"""
Verify a signature for a file.
:param signature_filename: The pathname to the file containing the
signature.
:param data_filename: The pathname to the file containing the
signed data.
:param keystore: The path to a directory which contains the keys
used in verification. If not specified, the
instance's ``gpg_home`` attribute is used instead.
:return: True if the signature was verified, else False.
"""
if not self.gpg:
raise DistlibException('verification unavailable because gpg '
'unavailable')
cmd = self.get_verify_command(signature_filename, data_filename,
keystore)
rc, stdout, stderr = self.run_command(cmd)
if rc not in (0, 1):
raise DistlibException('verify command failed with error code %s' % rc)
return rc == 0
def download_file(self, url, destfile, digest=None, reporthook=None):
"""
This is a convenience method for downloading a file from an URL.
Normally, this will be a file from the index, though currently
no check is made for this (i.e. a file can be downloaded from
anywhere).
The method is just like the :func:`urlretrieve` function in the
standard library, except that it allows digest computation to be
done during download and checking that the downloaded data
matched any expected value.
:param url: The URL of the file to be downloaded (assumed to be
available via an HTTP GET request).
:param destfile: The pathname where the downloaded file is to be
saved.
:param digest: If specified, this must be a (hasher, value)
tuple, where hasher is the algorithm used (e.g.
``'md5'``) and ``value`` is the expected value.
:param reporthook: The same as for :func:`urlretrieve` in the
standard library.
"""
if digest is None:
digester = None
logger.debug('No digest specified')
else:
if isinstance(digest, (list, tuple)):
hasher, digest = digest
else:
hasher = 'md5'
digester = getattr(hashlib, hasher)()
logger.debug('Digest specified: %s' % digest)
# The following code is equivalent to urlretrieve.
# We need to do it this way so that we can compute the
# digest of the file as we go.
with open(destfile, 'wb') as dfp:
# addinfourl is not a context manager on 2.x
# so we have to use try/finally
sfp = self.send_request(Request(url))
try:
headers = sfp.info()
blocksize = 8192
size = -1
read = 0
blocknum = 0
if "content-length" in headers:
size = int(headers["Content-Length"])
if reporthook:
reporthook(blocknum, blocksize, size)
while True:
block = sfp.read(blocksize)
if not block:
break
read += len(block)
dfp.write(block)
if digester:
digester.update(block)
blocknum += 1
if reporthook:
reporthook(blocknum, blocksize, size)
finally:
sfp.close()
# check that we got the whole file, if we can
if size >= 0 and read < size:
raise DistlibException(
'retrieval incomplete: got only %d out of %d bytes'
% (read, size))
# if we have a digest, it must match.
if digester:
actual = digester.hexdigest()
if digest != actual:
raise DistlibException('%s digest mismatch for %s: expected '
'%s, got %s' % (hasher, destfile,
digest, actual))
logger.debug('Digest verified: %s', digest)
def send_request(self, req):
"""
Send a standard library :class:`Request` to PyPI and return its
response.
:param req: The request to send.
:return: The HTTP response from PyPI (a standard library HTTPResponse).
"""
handlers = []
if self.password_handler:
handlers.append(self.password_handler)
if self.ssl_verifier:
handlers.append(self.ssl_verifier)
opener = build_opener(*handlers)
return opener.open(req)
def encode_request(self, fields, files):
"""
Encode fields and files for posting to an HTTP server.
:param fields: The fields to send as a list of (fieldname, value)
tuples.
:param files: The files to send as a list of (fieldname, filename,
file_bytes) tuple.
"""
# Adapted from packaging, which in turn was adapted from
# http://code.activestate.com/recipes/146306
parts = []
boundary = self.boundary
for k, values in fields:
if not isinstance(values, (list, tuple)):
values = [values]
for v in values:
parts.extend((
b'--' + boundary,
('Content-Disposition: form-data; name="%s"' %
k).encode('utf-8'),
b'',
v.encode('utf-8')))
for key, filename, value in files:
parts.extend((
b'--' + boundary,
('Content-Disposition: form-data; name="%s"; filename="%s"' %
(key, filename)).encode('utf-8'),
b'',
value))
parts.extend((b'--' + boundary + b'--', b''))
body = b'\r\n'.join(parts)
ct = b'multipart/form-data; boundary=' + boundary
headers = {
'Content-type': ct,
'Content-length': str(len(body))
}
return Request(self.url, body, headers)
def search(self, terms, operator=None): # pragma: no cover
if isinstance(terms, string_types):
terms = {'name': terms}
rpc_proxy = ServerProxy(self.url, timeout=3.0)
try:
return rpc_proxy.search(terms, operator or 'and')
finally:
rpc_proxy('close')()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,384 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2023 Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""
Class representing the list of files in a distribution.
Equivalent to distutils.filelist, but fixes some problems.
"""
import fnmatch
import logging
import os
import re
import sys
from . import DistlibException
from .compat import fsdecode
from .util import convert_path
__all__ = ['Manifest']
logger = logging.getLogger(__name__)
# a \ followed by some spaces + EOL
_COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M)
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
#
# Due to the different results returned by fnmatch.translate, we need
# to do slightly different processing for Python 2.7 and 3.2 ... this needed
# to be brought in for Python 3.6 onwards.
#
_PYTHON_VERSION = sys.version_info[:2]
class Manifest(object):
"""
A list of files built by exploring the filesystem and filtered by applying various
patterns to what we find there.
"""
def __init__(self, base=None):
"""
Initialise an instance.
:param base: The base directory to explore under.
"""
self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
self.prefix = self.base + os.sep
self.allfiles = None
self.files = set()
#
# Public API
#
def findall(self):
"""Find all files under the base and set ``allfiles`` to the absolute
pathnames of files found.
"""
from stat import S_ISREG, S_ISDIR, S_ISLNK
self.allfiles = allfiles = []
root = self.base
stack = [root]
pop = stack.pop
push = stack.append
while stack:
root = pop()
names = os.listdir(root)
for name in names:
fullname = os.path.join(root, name)
# Avoid excess stat calls -- just one will do, thank you!
stat = os.stat(fullname)
mode = stat.st_mode
if S_ISREG(mode):
allfiles.append(fsdecode(fullname))
elif S_ISDIR(mode) and not S_ISLNK(mode):
push(fullname)
def add(self, item):
"""
Add a file to the manifest.
:param item: The pathname to add. This can be relative to the base.
"""
if not item.startswith(self.prefix):
item = os.path.join(self.base, item)
self.files.add(os.path.normpath(item))
def add_many(self, items):
"""
Add a list of files to the manifest.
:param items: The pathnames to add. These can be relative to the base.
"""
for item in items:
self.add(item)
def sorted(self, wantdirs=False):
"""
Return sorted files in directory order
"""
def add_dir(dirs, d):
dirs.add(d)
logger.debug('add_dir added %s', d)
if d != self.base:
parent, _ = os.path.split(d)
assert parent not in ('', '/')
add_dir(dirs, parent)
result = set(self.files) # make a copy!
if wantdirs:
dirs = set()
for f in result:
add_dir(dirs, os.path.dirname(f))
result |= dirs
return [os.path.join(*path_tuple) for path_tuple in
sorted(os.path.split(path) for path in result)]
def clear(self):
"""Clear all collected files."""
self.files = set()
self.allfiles = []
def process_directive(self, directive):
"""
Process a directive which either adds some files from ``allfiles`` to
``files``, or removes some files from ``files``.
:param directive: The directive to process. This should be in a format
compatible with distutils ``MANIFEST.in`` files:
http://docs.python.org/distutils/sourcedist.html#commands
"""
# Parse the line: split it up, make sure the right number of words
# is there, and return the relevant words. 'action' is always
# defined: it's the first word of the line. Which of the other
# three are defined depends on the action; it'll be either
# patterns, (dir and patterns), or (dirpattern).
action, patterns, thedir, dirpattern = self._parse_directive(directive)
# OK, now we know that the action is valid and we have the
# right number of words on the line for that action -- so we
# can proceed with minimal error-checking.
if action == 'include':
for pattern in patterns:
if not self._include_pattern(pattern, anchor=True):
logger.warning('no files found matching %r', pattern)
elif action == 'exclude':
for pattern in patterns:
self._exclude_pattern(pattern, anchor=True)
elif action == 'global-include':
for pattern in patterns:
if not self._include_pattern(pattern, anchor=False):
logger.warning('no files found matching %r '
'anywhere in distribution', pattern)
elif action == 'global-exclude':
for pattern in patterns:
self._exclude_pattern(pattern, anchor=False)
elif action == 'recursive-include':
for pattern in patterns:
if not self._include_pattern(pattern, prefix=thedir):
logger.warning('no files found matching %r '
'under directory %r', pattern, thedir)
elif action == 'recursive-exclude':
for pattern in patterns:
self._exclude_pattern(pattern, prefix=thedir)
elif action == 'graft':
if not self._include_pattern(None, prefix=dirpattern):
logger.warning('no directories found matching %r',
dirpattern)
elif action == 'prune':
if not self._exclude_pattern(None, prefix=dirpattern):
logger.warning('no previously-included directories found '
'matching %r', dirpattern)
else: # pragma: no cover
# This should never happen, as it should be caught in
# _parse_template_line
raise DistlibException(
'invalid action %r' % action)
#
# Private API
#
def _parse_directive(self, directive):
"""
Validate a directive.
:param directive: The directive to validate.
:return: A tuple of action, patterns, thedir, dir_patterns
"""
words = directive.split()
if len(words) == 1 and words[0] not in ('include', 'exclude',
'global-include',
'global-exclude',
'recursive-include',
'recursive-exclude',
'graft', 'prune'):
# no action given, let's use the default 'include'
words.insert(0, 'include')
action = words[0]
patterns = thedir = dir_pattern = None
if action in ('include', 'exclude',
'global-include', 'global-exclude'):
if len(words) < 2:
raise DistlibException(
'%r expects <pattern1> <pattern2> ...' % action)
patterns = [convert_path(word) for word in words[1:]]
elif action in ('recursive-include', 'recursive-exclude'):
if len(words) < 3:
raise DistlibException(
'%r expects <dir> <pattern1> <pattern2> ...' % action)
thedir = convert_path(words[1])
patterns = [convert_path(word) for word in words[2:]]
elif action in ('graft', 'prune'):
if len(words) != 2:
raise DistlibException(
'%r expects a single <dir_pattern>' % action)
dir_pattern = convert_path(words[1])
else:
raise DistlibException('unknown action %r' % action)
return action, patterns, thedir, dir_pattern
def _include_pattern(self, pattern, anchor=True, prefix=None,
is_regex=False):
"""Select strings (presumably filenames) from 'self.files' that
match 'pattern', a Unix-style wildcard (glob) pattern.
Patterns are not quite the same as implemented by the 'fnmatch'
module: '*' and '?' match non-special characters, where "special"
is platform-dependent: slash on Unix; colon, slash, and backslash on
DOS/Windows; and colon on Mac OS.
If 'anchor' is true (the default), then the pattern match is more
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
'anchor' is false, both of these will match.
If 'prefix' is supplied, then only filenames starting with 'prefix'
(itself a pattern) and ending with 'pattern', with anything in between
them, will match. 'anchor' is ignored in this case.
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
'pattern' is assumed to be either a string containing a regex or a
regex object -- no translation is done, the regex is just compiled
and used as-is.
Selected strings will be added to self.files.
Return True if files are found.
"""
# XXX docstring lying about what the special chars are?
found = False
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
# delayed loading of allfiles list
if self.allfiles is None:
self.findall()
for name in self.allfiles:
if pattern_re.search(name):
self.files.add(name)
found = True
return found
def _exclude_pattern(self, pattern, anchor=True, prefix=None,
is_regex=False):
"""Remove strings (presumably filenames) from 'files' that match
'pattern'.
Other parameters are the same as for 'include_pattern()', above.
The list 'self.files' is modified in place. Return True if files are
found.
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
packaging source distributions
"""
found = False
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
for f in list(self.files):
if pattern_re.search(f):
self.files.remove(f)
found = True
return found
def _translate_pattern(self, pattern, anchor=True, prefix=None,
is_regex=False):
"""Translate a shell-like wildcard pattern to a compiled regular
expression.
Return the compiled regex. If 'is_regex' true,
then 'pattern' is directly compiled to a regex (if it's a string)
or just returned as-is (assumes it's a regex object).
"""
if is_regex:
if isinstance(pattern, str):
return re.compile(pattern)
else:
return pattern
if _PYTHON_VERSION > (3, 2):
# ditch start and end characters
start, _, end = self._glob_to_re('_').partition('_')
if pattern:
pattern_re = self._glob_to_re(pattern)
if _PYTHON_VERSION > (3, 2):
assert pattern_re.startswith(start) and pattern_re.endswith(end)
else:
pattern_re = ''
base = re.escape(os.path.join(self.base, ''))
if prefix is not None:
# ditch end of pattern character
if _PYTHON_VERSION <= (3, 2):
empty_pattern = self._glob_to_re('')
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
else:
prefix_re = self._glob_to_re(prefix)
assert prefix_re.startswith(start) and prefix_re.endswith(end)
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
sep = os.sep
if os.sep == '\\':
sep = r'\\'
if _PYTHON_VERSION <= (3, 2):
pattern_re = '^' + base + sep.join((prefix_re,
'.*' + pattern_re))
else:
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
pattern_re, end)
else: # no prefix -- respect anchor flag
if anchor:
if _PYTHON_VERSION <= (3, 2):
pattern_re = '^' + base + pattern_re
else:
pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
return re.compile(pattern_re)
def _glob_to_re(self, pattern):
"""Translate a shell-like glob pattern to a regular expression.
Return a string containing the regex. Differs from
'fnmatch.translate()' in that '*' does not match "special characters"
(which are platform-specific).
"""
pattern_re = fnmatch.translate(pattern)
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re

View File

@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2023 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""
Parser for the environment markers micro-language defined in PEP 508.
"""
# Note: In PEP 345, the micro-language was Python compatible, so the ast
# module could be used to parse it. However, PEP 508 introduced operators such
# as ~= and === which aren't in Python, necessitating a different approach.
import os
import re
import sys
import platform
from .compat import string_types
from .util import in_venv, parse_marker
from .version import LegacyVersion as LV
__all__ = ['interpret']
_VERSION_PATTERN = re.compile(
r'((\d+(\.\d+)*\w*)|\'(\d+(\.\d+)*\w*)\'|\"(\d+(\.\d+)*\w*)\")')
_VERSION_MARKERS = {'python_version', 'python_full_version'}
def _is_version_marker(s):
return isinstance(s, string_types) and s in _VERSION_MARKERS
def _is_literal(o):
if not isinstance(o, string_types) or not o:
return False
return o[0] in '\'"'
def _get_versions(s):
return {LV(m.groups()[0]) for m in _VERSION_PATTERN.finditer(s)}
class Evaluator(object):
"""
This class is used to evaluate marker expressions.
"""
operations = {
'==': lambda x, y: x == y,
'===': lambda x, y: x == y,
'~=': lambda x, y: x == y or x > y,
'!=': lambda x, y: x != y,
'<': lambda x, y: x < y,
'<=': lambda x, y: x == y or x < y,
'>': lambda x, y: x > y,
'>=': lambda x, y: x == y or x > y,
'and': lambda x, y: x and y,
'or': lambda x, y: x or y,
'in': lambda x, y: x in y,
'not in': lambda x, y: x not in y,
}
def evaluate(self, expr, context):
"""
Evaluate a marker expression returned by the :func:`parse_requirement`
function in the specified context.
"""
if isinstance(expr, string_types):
if expr[0] in '\'"':
result = expr[1:-1]
else:
if expr not in context:
raise SyntaxError('unknown variable: %s' % expr)
result = context[expr]
else:
assert isinstance(expr, dict)
op = expr['op']
if op not in self.operations:
raise NotImplementedError('op not implemented: %s' % op)
elhs = expr['lhs']
erhs = expr['rhs']
if _is_literal(expr['lhs']) and _is_literal(expr['rhs']):
raise SyntaxError('invalid comparison: %s %s %s' %
(elhs, op, erhs))
lhs = self.evaluate(elhs, context)
rhs = self.evaluate(erhs, context)
if ((_is_version_marker(elhs) or _is_version_marker(erhs))
and op in ('<', '<=', '>', '>=', '===', '==', '!=', '~=')):
lhs = LV(lhs)
rhs = LV(rhs)
elif _is_version_marker(elhs) and op in ('in', 'not in'):
lhs = LV(lhs)
rhs = _get_versions(rhs)
result = self.operations[op](lhs, rhs)
return result
_DIGITS = re.compile(r'\d+\.\d+')
def default_context():
def format_full_version(info):
version = '%s.%s.%s' % (info.major, info.minor, info.micro)
kind = info.releaselevel
if kind != 'final':
version += kind[0] + str(info.serial)
return version
if hasattr(sys, 'implementation'):
implementation_version = format_full_version(
sys.implementation.version)
implementation_name = sys.implementation.name
else:
implementation_version = '0'
implementation_name = ''
ppv = platform.python_version()
m = _DIGITS.match(ppv)
pv = m.group(0)
result = {
'implementation_name': implementation_name,
'implementation_version': implementation_version,
'os_name': os.name,
'platform_machine': platform.machine(),
'platform_python_implementation': platform.python_implementation(),
'platform_release': platform.release(),
'platform_system': platform.system(),
'platform_version': platform.version(),
'platform_in_venv': str(in_venv()),
'python_full_version': ppv,
'python_version': pv,
'sys_platform': sys.platform,
}
return result
DEFAULT_CONTEXT = default_context()
del default_context
evaluator = Evaluator()
def interpret(marker, execution_context=None):
"""
Interpret a marker and return a result depending on environment.
:param marker: The marker to interpret.
:type marker: str
:param execution_context: The context used for name lookup.
:type execution_context: mapping
"""
try:
expr, rest = parse_marker(marker)
except Exception as e:
raise SyntaxError('Unable to interpret marker syntax: %s: %s' %
(marker, e))
if rest and rest[0] != '#':
raise SyntaxError('unexpected trailing data in marker: %s: %s' %
(marker, rest))
context = dict(DEFAULT_CONTEXT)
if execution_context:
context.update(execution_context)
return evaluator.evaluate(expr, context)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,358 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2017 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from __future__ import unicode_literals
import bisect
import io
import logging
import os
import pkgutil
import sys
import types
import zipimport
from . import DistlibException
from .util import cached_property, get_cache_base, Cache
logger = logging.getLogger(__name__)
cache = None # created when needed
class ResourceCache(Cache):
def __init__(self, base=None):
if base is None:
# Use native string to avoid issues on 2.x: see Python #20140.
base = os.path.join(get_cache_base(), str('resource-cache'))
super(ResourceCache, self).__init__(base)
def is_stale(self, resource, path):
"""
Is the cache stale for the given resource?
:param resource: The :class:`Resource` being cached.
:param path: The path of the resource in the cache.
:return: True if the cache is stale.
"""
# Cache invalidation is a hard problem :-)
return True
def get(self, resource):
"""
Get a resource into the cache,
:param resource: A :class:`Resource` instance.
:return: The pathname of the resource in the cache.
"""
prefix, path = resource.finder.get_cache_info(resource)
if prefix is None:
result = path
else:
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
dirname = os.path.dirname(result)
if not os.path.isdir(dirname):
os.makedirs(dirname)
if not os.path.exists(result):
stale = True
else:
stale = self.is_stale(resource, path)
if stale:
# write the bytes of the resource to the cache location
with open(result, 'wb') as f:
f.write(resource.bytes)
return result
class ResourceBase(object):
def __init__(self, finder, name):
self.finder = finder
self.name = name
class Resource(ResourceBase):
"""
A class representing an in-package resource, such as a data file. This is
not normally instantiated by user code, but rather by a
:class:`ResourceFinder` which manages the resource.
"""
is_container = False # Backwards compatibility
def as_stream(self):
"""
Get the resource as a stream.
This is not a property to make it obvious that it returns a new stream
each time.
"""
return self.finder.get_stream(self)
@cached_property
def file_path(self):
global cache
if cache is None:
cache = ResourceCache()
return cache.get(self)
@cached_property
def bytes(self):
return self.finder.get_bytes(self)
@cached_property
def size(self):
return self.finder.get_size(self)
class ResourceContainer(ResourceBase):
is_container = True # Backwards compatibility
@cached_property
def resources(self):
return self.finder.get_resources(self)
class ResourceFinder(object):
"""
Resource finder for file system resources.
"""
if sys.platform.startswith('java'):
skipped_extensions = ('.pyc', '.pyo', '.class')
else:
skipped_extensions = ('.pyc', '.pyo')
def __init__(self, module):
self.module = module
self.loader = getattr(module, '__loader__', None)
self.base = os.path.dirname(getattr(module, '__file__', ''))
def _adjust_path(self, path):
return os.path.realpath(path)
def _make_path(self, resource_name):
# Issue #50: need to preserve type of path on Python 2.x
# like os.path._get_sep
if isinstance(resource_name, bytes): # should only happen on 2.x
sep = b'/'
else:
sep = '/'
parts = resource_name.split(sep)
parts.insert(0, self.base)
result = os.path.join(*parts)
return self._adjust_path(result)
def _find(self, path):
return os.path.exists(path)
def get_cache_info(self, resource):
return None, resource.path
def find(self, resource_name):
path = self._make_path(resource_name)
if not self._find(path):
result = None
else:
if self._is_directory(path):
result = ResourceContainer(self, resource_name)
else:
result = Resource(self, resource_name)
result.path = path
return result
def get_stream(self, resource):
return open(resource.path, 'rb')
def get_bytes(self, resource):
with open(resource.path, 'rb') as f:
return f.read()
def get_size(self, resource):
return os.path.getsize(resource.path)
def get_resources(self, resource):
def allowed(f):
return (f != '__pycache__' and not
f.endswith(self.skipped_extensions))
return set([f for f in os.listdir(resource.path) if allowed(f)])
def is_container(self, resource):
return self._is_directory(resource.path)
_is_directory = staticmethod(os.path.isdir)
def iterator(self, resource_name):
resource = self.find(resource_name)
if resource is not None:
todo = [resource]
while todo:
resource = todo.pop(0)
yield resource
if resource.is_container:
rname = resource.name
for name in resource.resources:
if not rname:
new_name = name
else:
new_name = '/'.join([rname, name])
child = self.find(new_name)
if child.is_container:
todo.append(child)
else:
yield child
class ZipResourceFinder(ResourceFinder):
"""
Resource finder for resources in .zip files.
"""
def __init__(self, module):
super(ZipResourceFinder, self).__init__(module)
archive = self.loader.archive
self.prefix_len = 1 + len(archive)
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
if hasattr(self.loader, '_files'):
self._files = self.loader._files
else:
self._files = zipimport._zip_directory_cache[archive]
self.index = sorted(self._files)
def _adjust_path(self, path):
return path
def _find(self, path):
path = path[self.prefix_len:]
if path in self._files:
result = True
else:
if path and path[-1] != os.sep:
path = path + os.sep
i = bisect.bisect(self.index, path)
try:
result = self.index[i].startswith(path)
except IndexError:
result = False
if not result:
logger.debug('_find failed: %r %r', path, self.loader.prefix)
else:
logger.debug('_find worked: %r %r', path, self.loader.prefix)
return result
def get_cache_info(self, resource):
prefix = self.loader.archive
path = resource.path[1 + len(prefix):]
return prefix, path
def get_bytes(self, resource):
return self.loader.get_data(resource.path)
def get_stream(self, resource):
return io.BytesIO(self.get_bytes(resource))
def get_size(self, resource):
path = resource.path[self.prefix_len:]
return self._files[path][3]
def get_resources(self, resource):
path = resource.path[self.prefix_len:]
if path and path[-1] != os.sep:
path += os.sep
plen = len(path)
result = set()
i = bisect.bisect(self.index, path)
while i < len(self.index):
if not self.index[i].startswith(path):
break
s = self.index[i][plen:]
result.add(s.split(os.sep, 1)[0]) # only immediate children
i += 1
return result
def _is_directory(self, path):
path = path[self.prefix_len:]
if path and path[-1] != os.sep:
path += os.sep
i = bisect.bisect(self.index, path)
try:
result = self.index[i].startswith(path)
except IndexError:
result = False
return result
_finder_registry = {
type(None): ResourceFinder,
zipimport.zipimporter: ZipResourceFinder
}
try:
# In Python 3.6, _frozen_importlib -> _frozen_importlib_external
try:
import _frozen_importlib_external as _fi
except ImportError:
import _frozen_importlib as _fi
_finder_registry[_fi.SourceFileLoader] = ResourceFinder
_finder_registry[_fi.FileFinder] = ResourceFinder
# See issue #146
_finder_registry[_fi.SourcelessFileLoader] = ResourceFinder
del _fi
except (ImportError, AttributeError):
pass
def register_finder(loader, finder_maker):
_finder_registry[type(loader)] = finder_maker
_finder_cache = {}
def finder(package):
"""
Return a resource finder for a package.
:param package: The name of the package.
:return: A :class:`ResourceFinder` instance for the package.
"""
if package in _finder_cache:
result = _finder_cache[package]
else:
if package not in sys.modules:
__import__(package)
module = sys.modules[package]
path = getattr(module, '__path__', None)
if path is None:
raise DistlibException('You cannot get a finder for a module, '
'only for a package')
loader = getattr(module, '__loader__', None)
finder_maker = _finder_registry.get(type(loader))
if finder_maker is None:
raise DistlibException('Unable to locate finder for %r' % package)
result = finder_maker(module)
_finder_cache[package] = result
return result
_dummy_module = types.ModuleType(str('__dummy__'))
def finder_for_path(path):
"""
Return a resource finder for a path, which should represent a container.
:param path: The path.
:return: A :class:`ResourceFinder` instance for the path.
"""
result = None
# calls any path hooks, gets importer into cache
pkgutil.get_importer(path)
loader = sys.path_importer_cache.get(path)
finder = _finder_registry.get(type(loader))
if finder:
module = _dummy_module
module.__file__ = os.path.join(path, '')
module.__loader__ = loader
result = finder(module)
return result

View File

@ -0,0 +1,466 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2023 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from io import BytesIO
import logging
import os
import re
import struct
import sys
import time
from zipfile import ZipInfo
from .compat import sysconfig, detect_encoding, ZipFile
from .resources import finder
from .util import (FileOperator, get_export_entry, convert_path,
get_executable, get_platform, in_venv)
logger = logging.getLogger(__name__)
_DEFAULT_MANIFEST = '''
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
<assemblyIdentity version="1.0.0.0"
processorArchitecture="X86"
name="%s"
type="win32"/>
<!-- Identify the application security requirements. -->
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
<security>
<requestedPrivileges>
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
</requestedPrivileges>
</security>
</trustInfo>
</assembly>'''.strip()
# check if Python is called on the first line with this expression
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
SCRIPT_TEMPLATE = r'''# -*- coding: utf-8 -*-
import re
import sys
from %(module)s import %(import_name)s
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(%(func)s())
'''
# Pre-fetch the contents of all executable wrapper stubs.
# This is to address https://github.com/pypa/pip/issues/12666.
# When updating pip, we rename the old pip in place before installing the
# new version. If we try to fetch a wrapper *after* that rename, the finder
# machinery will be confused as the package is no longer available at the
# location where it was imported from. So we load everything into memory in
# advance.
# Issue 31: don't hardcode an absolute package name, but
# determine it relative to the current package
distlib_package = __name__.rsplit('.', 1)[0]
WRAPPERS = {
r.name: r.bytes
for r in finder(distlib_package).iterator("")
if r.name.endswith(".exe")
}
def enquote_executable(executable):
if ' ' in executable:
# make sure we quote only the executable in case of env
# for example /usr/bin/env "/dir with spaces/bin/jython"
# instead of "/usr/bin/env /dir with spaces/bin/jython"
# otherwise whole
if executable.startswith('/usr/bin/env '):
env, _executable = executable.split(' ', 1)
if ' ' in _executable and not _executable.startswith('"'):
executable = '%s "%s"' % (env, _executable)
else:
if not executable.startswith('"'):
executable = '"%s"' % executable
return executable
# Keep the old name around (for now), as there is at least one project using it!
_enquote_executable = enquote_executable
class ScriptMaker(object):
"""
A class to copy or create scripts from source scripts or callable
specifications.
"""
script_template = SCRIPT_TEMPLATE
executable = None # for shebangs
def __init__(self,
source_dir,
target_dir,
add_launchers=True,
dry_run=False,
fileop=None):
self.source_dir = source_dir
self.target_dir = target_dir
self.add_launchers = add_launchers
self.force = False
self.clobber = False
# It only makes sense to set mode bits on POSIX.
self.set_mode = (os.name == 'posix') or (os.name == 'java'
and os._name == 'posix')
self.variants = set(('', 'X.Y'))
self._fileop = fileop or FileOperator(dry_run)
self._is_nt = os.name == 'nt' or (os.name == 'java'
and os._name == 'nt')
self.version_info = sys.version_info
def _get_alternate_executable(self, executable, options):
if options.get('gui', False) and self._is_nt: # pragma: no cover
dn, fn = os.path.split(executable)
fn = fn.replace('python', 'pythonw')
executable = os.path.join(dn, fn)
return executable
if sys.platform.startswith('java'): # pragma: no cover
def _is_shell(self, executable):
"""
Determine if the specified executable is a script
(contains a #! line)
"""
try:
with open(executable) as fp:
return fp.read(2) == '#!'
except (OSError, IOError):
logger.warning('Failed to open %s', executable)
return False
def _fix_jython_executable(self, executable):
if self._is_shell(executable):
# Workaround for Jython is not needed on Linux systems.
import java
if java.lang.System.getProperty('os.name') == 'Linux':
return executable
elif executable.lower().endswith('jython.exe'):
# Use wrapper exe for Jython on Windows
return executable
return '/usr/bin/env %s' % executable
def _build_shebang(self, executable, post_interp):
"""
Build a shebang line. In the simple case (on Windows, or a shebang line
which is not too long or contains spaces) use a simple formulation for
the shebang. Otherwise, use /bin/sh as the executable, with a contrived
shebang which allows the script to run either under Python or sh, using
suitable quoting. Thanks to Harald Nordgren for his input.
See also: http://www.in-ulm.de/~mascheck/various/shebang/#length
https://hg.mozilla.org/mozilla-central/file/tip/mach
"""
if os.name != 'posix':
simple_shebang = True
else:
# Add 3 for '#!' prefix and newline suffix.
shebang_length = len(executable) + len(post_interp) + 3
if sys.platform == 'darwin':
max_shebang_length = 512
else:
max_shebang_length = 127
simple_shebang = ((b' ' not in executable)
and (shebang_length <= max_shebang_length))
if simple_shebang:
result = b'#!' + executable + post_interp + b'\n'
else:
result = b'#!/bin/sh\n'
result += b"'''exec' " + executable + post_interp + b' "$0" "$@"\n'
result += b"' '''"
return result
def _get_shebang(self, encoding, post_interp=b'', options=None):
enquote = True
if self.executable:
executable = self.executable
enquote = False # assume this will be taken care of
elif not sysconfig.is_python_build():
executable = get_executable()
elif in_venv(): # pragma: no cover
executable = os.path.join(
sysconfig.get_path('scripts'),
'python%s' % sysconfig.get_config_var('EXE'))
else: # pragma: no cover
if os.name == 'nt':
# for Python builds from source on Windows, no Python executables with
# a version suffix are created, so we use python.exe
executable = os.path.join(
sysconfig.get_config_var('BINDIR'),
'python%s' % (sysconfig.get_config_var('EXE')))
else:
executable = os.path.join(
sysconfig.get_config_var('BINDIR'),
'python%s%s' % (sysconfig.get_config_var('VERSION'),
sysconfig.get_config_var('EXE')))
if options:
executable = self._get_alternate_executable(executable, options)
if sys.platform.startswith('java'): # pragma: no cover
executable = self._fix_jython_executable(executable)
# Normalise case for Windows - COMMENTED OUT
# executable = os.path.normcase(executable)
# N.B. The normalising operation above has been commented out: See
# issue #124. Although paths in Windows are generally case-insensitive,
# they aren't always. For example, a path containing a ẞ (which is a
# LATIN CAPITAL LETTER SHARP S - U+1E9E) is normcased to ß (which is a
# LATIN SMALL LETTER SHARP S' - U+00DF). The two are not considered by
# Windows as equivalent in path names.
# If the user didn't specify an executable, it may be necessary to
# cater for executable paths with spaces (not uncommon on Windows)
if enquote:
executable = enquote_executable(executable)
# Issue #51: don't use fsencode, since we later try to
# check that the shebang is decodable using utf-8.
executable = executable.encode('utf-8')
# in case of IronPython, play safe and enable frames support
if (sys.platform == 'cli' and '-X:Frames' not in post_interp
and '-X:FullFrames' not in post_interp): # pragma: no cover
post_interp += b' -X:Frames'
shebang = self._build_shebang(executable, post_interp)
# Python parser starts to read a script using UTF-8 until
# it gets a #coding:xxx cookie. The shebang has to be the
# first line of a file, the #coding:xxx cookie cannot be
# written before. So the shebang has to be decodable from
# UTF-8.
try:
shebang.decode('utf-8')
except UnicodeDecodeError: # pragma: no cover
raise ValueError('The shebang (%r) is not decodable from utf-8' %
shebang)
# If the script is encoded to a custom encoding (use a
# #coding:xxx cookie), the shebang has to be decodable from
# the script encoding too.
if encoding != 'utf-8':
try:
shebang.decode(encoding)
except UnicodeDecodeError: # pragma: no cover
raise ValueError('The shebang (%r) is not decodable '
'from the script encoding (%r)' %
(shebang, encoding))
return shebang
def _get_script_text(self, entry):
return self.script_template % dict(
module=entry.prefix,
import_name=entry.suffix.split('.')[0],
func=entry.suffix)
manifest = _DEFAULT_MANIFEST
def get_manifest(self, exename):
base = os.path.basename(exename)
return self.manifest % base
def _write_script(self, names, shebang, script_bytes, filenames, ext):
use_launcher = self.add_launchers and self._is_nt
linesep = os.linesep.encode('utf-8')
if not shebang.endswith(linesep):
shebang += linesep
if not use_launcher:
script_bytes = shebang + script_bytes
else: # pragma: no cover
if ext == 'py':
launcher = self._get_launcher('t')
else:
launcher = self._get_launcher('w')
stream = BytesIO()
with ZipFile(stream, 'w') as zf:
source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
if source_date_epoch:
date_time = time.gmtime(int(source_date_epoch))[:6]
zinfo = ZipInfo(filename='__main__.py',
date_time=date_time)
zf.writestr(zinfo, script_bytes)
else:
zf.writestr('__main__.py', script_bytes)
zip_data = stream.getvalue()
script_bytes = launcher + shebang + zip_data
for name in names:
outname = os.path.join(self.target_dir, name)
if use_launcher: # pragma: no cover
n, e = os.path.splitext(outname)
if e.startswith('.py'):
outname = n
outname = '%s.exe' % outname
try:
self._fileop.write_binary_file(outname, script_bytes)
except Exception:
# Failed writing an executable - it might be in use.
logger.warning('Failed to write executable - trying to '
'use .deleteme logic')
dfname = '%s.deleteme' % outname
if os.path.exists(dfname):
os.remove(dfname) # Not allowed to fail here
os.rename(outname, dfname) # nor here
self._fileop.write_binary_file(outname, script_bytes)
logger.debug('Able to replace executable using '
'.deleteme logic')
try:
os.remove(dfname)
except Exception:
pass # still in use - ignore error
else:
if self._is_nt and not outname.endswith(
'.' + ext): # pragma: no cover
outname = '%s.%s' % (outname, ext)
if os.path.exists(outname) and not self.clobber:
logger.warning('Skipping existing file %s', outname)
continue
self._fileop.write_binary_file(outname, script_bytes)
if self.set_mode:
self._fileop.set_executable_mode([outname])
filenames.append(outname)
variant_separator = '-'
def get_script_filenames(self, name):
result = set()
if '' in self.variants:
result.add(name)
if 'X' in self.variants:
result.add('%s%s' % (name, self.version_info[0]))
if 'X.Y' in self.variants:
result.add('%s%s%s.%s' %
(name, self.variant_separator, self.version_info[0],
self.version_info[1]))
return result
def _make_script(self, entry, filenames, options=None):
post_interp = b''
if options:
args = options.get('interpreter_args', [])
if args:
args = ' %s' % ' '.join(args)
post_interp = args.encode('utf-8')
shebang = self._get_shebang('utf-8', post_interp, options=options)
script = self._get_script_text(entry).encode('utf-8')
scriptnames = self.get_script_filenames(entry.name)
if options and options.get('gui', False):
ext = 'pyw'
else:
ext = 'py'
self._write_script(scriptnames, shebang, script, filenames, ext)
def _copy_script(self, script, filenames):
adjust = False
script = os.path.join(self.source_dir, convert_path(script))
outname = os.path.join(self.target_dir, os.path.basename(script))
if not self.force and not self._fileop.newer(script, outname):
logger.debug('not copying %s (up-to-date)', script)
return
# Always open the file, but ignore failures in dry-run mode --
# that way, we'll get accurate feedback if we can read the
# script.
try:
f = open(script, 'rb')
except IOError: # pragma: no cover
if not self.dry_run:
raise
f = None
else:
first_line = f.readline()
if not first_line: # pragma: no cover
logger.warning('%s is an empty file (skipping)', script)
return
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
if match:
adjust = True
post_interp = match.group(1) or b''
if not adjust:
if f:
f.close()
self._fileop.copy_file(script, outname)
if self.set_mode:
self._fileop.set_executable_mode([outname])
filenames.append(outname)
else:
logger.info('copying and adjusting %s -> %s', script,
self.target_dir)
if not self._fileop.dry_run:
encoding, lines = detect_encoding(f.readline)
f.seek(0)
shebang = self._get_shebang(encoding, post_interp)
if b'pythonw' in first_line: # pragma: no cover
ext = 'pyw'
else:
ext = 'py'
n = os.path.basename(outname)
self._write_script([n], shebang, f.read(), filenames, ext)
if f:
f.close()
@property
def dry_run(self):
return self._fileop.dry_run
@dry_run.setter
def dry_run(self, value):
self._fileop.dry_run = value
if os.name == 'nt' or (os.name == 'java'
and os._name == 'nt'): # pragma: no cover
# Executable launcher support.
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
def _get_launcher(self, kind):
if struct.calcsize('P') == 8: # 64-bit
bits = '64'
else:
bits = '32'
platform_suffix = '-arm' if get_platform() == 'win-arm64' else ''
name = '%s%s%s.exe' % (kind, bits, platform_suffix)
if name not in WRAPPERS:
msg = ('Unable to find resource %s in package %s' %
(name, distlib_package))
raise ValueError(msg)
return WRAPPERS[name]
# Public API follows
def make(self, specification, options=None):
"""
Make a script.
:param specification: The specification, which is either a valid export
entry specification (to make a script from a
callable) or a filename (to make a script by
copying from a source location).
:param options: A dictionary of options controlling script generation.
:return: A list of all absolute pathnames written to.
"""
filenames = []
entry = get_export_entry(specification)
if entry is None:
self._copy_script(specification, filenames)
else:
self._make_script(entry, filenames, options=options)
return filenames
def make_multiple(self, specifications, options=None):
"""
Take a list of specifications and make scripts from them,
:param specifications: A list of specifications.
:return: A list of all absolute pathnames written to,
"""
filenames = []
for specification in specifications:
filenames.extend(self.make(specification, options))
return filenames

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,751 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2023 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""
Implementation of a flexible versioning scheme providing support for PEP-440,
setuptools-compatible and semantic versioning.
"""
import logging
import re
from .compat import string_types
from .util import parse_requirement
__all__ = ['NormalizedVersion', 'NormalizedMatcher',
'LegacyVersion', 'LegacyMatcher',
'SemanticVersion', 'SemanticMatcher',
'UnsupportedVersionError', 'get_scheme']
logger = logging.getLogger(__name__)
class UnsupportedVersionError(ValueError):
"""This is an unsupported version."""
pass
class Version(object):
def __init__(self, s):
self._string = s = s.strip()
self._parts = parts = self.parse(s)
assert isinstance(parts, tuple)
assert len(parts) > 0
def parse(self, s):
raise NotImplementedError('please implement in a subclass')
def _check_compatible(self, other):
if type(self) != type(other):
raise TypeError('cannot compare %r and %r' % (self, other))
def __eq__(self, other):
self._check_compatible(other)
return self._parts == other._parts
def __ne__(self, other):
return not self.__eq__(other)
def __lt__(self, other):
self._check_compatible(other)
return self._parts < other._parts
def __gt__(self, other):
return not (self.__lt__(other) or self.__eq__(other))
def __le__(self, other):
return self.__lt__(other) or self.__eq__(other)
def __ge__(self, other):
return self.__gt__(other) or self.__eq__(other)
# See http://docs.python.org/reference/datamodel#object.__hash__
def __hash__(self):
return hash(self._parts)
def __repr__(self):
return "%s('%s')" % (self.__class__.__name__, self._string)
def __str__(self):
return self._string
@property
def is_prerelease(self):
raise NotImplementedError('Please implement in subclasses.')
class Matcher(object):
version_class = None
# value is either a callable or the name of a method
_operators = {
'<': lambda v, c, p: v < c,
'>': lambda v, c, p: v > c,
'<=': lambda v, c, p: v == c or v < c,
'>=': lambda v, c, p: v == c or v > c,
'==': lambda v, c, p: v == c,
'===': lambda v, c, p: v == c,
# by default, compatible => >=.
'~=': lambda v, c, p: v == c or v > c,
'!=': lambda v, c, p: v != c,
}
# this is a method only to support alternative implementations
# via overriding
def parse_requirement(self, s):
return parse_requirement(s)
def __init__(self, s):
if self.version_class is None:
raise ValueError('Please specify a version class')
self._string = s = s.strip()
r = self.parse_requirement(s)
if not r:
raise ValueError('Not valid: %r' % s)
self.name = r.name
self.key = self.name.lower() # for case-insensitive comparisons
clist = []
if r.constraints:
# import pdb; pdb.set_trace()
for op, s in r.constraints:
if s.endswith('.*'):
if op not in ('==', '!='):
raise ValueError('\'.*\' not allowed for '
'%r constraints' % op)
# Could be a partial version (e.g. for '2.*') which
# won't parse as a version, so keep it as a string
vn, prefix = s[:-2], True
# Just to check that vn is a valid version
self.version_class(vn)
else:
# Should parse as a version, so we can create an
# instance for the comparison
vn, prefix = self.version_class(s), False
clist.append((op, vn, prefix))
self._parts = tuple(clist)
def match(self, version):
"""
Check if the provided version matches the constraints.
:param version: The version to match against this instance.
:type version: String or :class:`Version` instance.
"""
if isinstance(version, string_types):
version = self.version_class(version)
for operator, constraint, prefix in self._parts:
f = self._operators.get(operator)
if isinstance(f, string_types):
f = getattr(self, f)
if not f:
msg = ('%r not implemented '
'for %s' % (operator, self.__class__.__name__))
raise NotImplementedError(msg)
if not f(version, constraint, prefix):
return False
return True
@property
def exact_version(self):
result = None
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
result = self._parts[0][1]
return result
def _check_compatible(self, other):
if type(self) != type(other) or self.name != other.name:
raise TypeError('cannot compare %s and %s' % (self, other))
def __eq__(self, other):
self._check_compatible(other)
return self.key == other.key and self._parts == other._parts
def __ne__(self, other):
return not self.__eq__(other)
# See http://docs.python.org/reference/datamodel#object.__hash__
def __hash__(self):
return hash(self.key) + hash(self._parts)
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._string)
def __str__(self):
return self._string
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|alpha|b|beta|c|rc|pre|preview)(\d+)?)?'
r'(\.(post|r|rev)(\d+)?)?([._-]?(dev)(\d+)?)?'
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$', re.I)
def _pep_440_key(s):
s = s.strip()
m = PEP440_VERSION_RE.match(s)
if not m:
raise UnsupportedVersionError('Not a valid version: %s' % s)
groups = m.groups()
nums = tuple(int(v) for v in groups[1].split('.'))
while len(nums) > 1 and nums[-1] == 0:
nums = nums[:-1]
if not groups[0]:
epoch = 0
else:
epoch = int(groups[0][:-1])
pre = groups[4:6]
post = groups[7:9]
dev = groups[10:12]
local = groups[13]
if pre == (None, None):
pre = ()
else:
if pre[1] is None:
pre = pre[0], 0
else:
pre = pre[0], int(pre[1])
if post == (None, None):
post = ()
else:
if post[1] is None:
post = post[0], 0
else:
post = post[0], int(post[1])
if dev == (None, None):
dev = ()
else:
if dev[1] is None:
dev = dev[0], 0
else:
dev = dev[0], int(dev[1])
if local is None:
local = ()
else:
parts = []
for part in local.split('.'):
# to ensure that numeric compares as > lexicographic, avoid
# comparing them directly, but encode a tuple which ensures
# correct sorting
if part.isdigit():
part = (1, int(part))
else:
part = (0, part)
parts.append(part)
local = tuple(parts)
if not pre:
# either before pre-release, or final release and after
if not post and dev:
# before pre-release
pre = ('a', -1) # to sort before a0
else:
pre = ('z',) # to sort after all pre-releases
# now look at the state of post and dev.
if not post:
post = ('_',) # sort before 'a'
if not dev:
dev = ('final',)
return epoch, nums, pre, post, dev, local
_normalized_key = _pep_440_key
class NormalizedVersion(Version):
"""A rational version.
Good:
1.2 # equivalent to "1.2.0"
1.2.0
1.2a1
1.2.3a2
1.2.3b1
1.2.3c1
1.2.3.4
TODO: fill this out
Bad:
1 # minimum two numbers
1.2a # release level must have a release serial
1.2.3b
"""
def parse(self, s):
result = _normalized_key(s)
# _normalized_key loses trailing zeroes in the release
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
# However, PEP 440 prefix matching needs it: for example,
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
m = PEP440_VERSION_RE.match(s) # must succeed
groups = m.groups()
self._release_clause = tuple(int(v) for v in groups[1].split('.'))
return result
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
@property
def is_prerelease(self):
return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
def _match_prefix(x, y):
x = str(x)
y = str(y)
if x == y:
return True
if not x.startswith(y):
return False
n = len(y)
return x[n] == '.'
class NormalizedMatcher(Matcher):
version_class = NormalizedVersion
# value is either a callable or the name of a method
_operators = {
'~=': '_match_compatible',
'<': '_match_lt',
'>': '_match_gt',
'<=': '_match_le',
'>=': '_match_ge',
'==': '_match_eq',
'===': '_match_arbitrary',
'!=': '_match_ne',
}
def _adjust_local(self, version, constraint, prefix):
if prefix:
strip_local = '+' not in constraint and version._parts[-1]
else:
# both constraint and version are
# NormalizedVersion instances.
# If constraint does not have a local component,
# ensure the version doesn't, either.
strip_local = not constraint._parts[-1] and version._parts[-1]
if strip_local:
s = version._string.split('+', 1)[0]
version = self.version_class(s)
return version, constraint
def _match_lt(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if version >= constraint:
return False
release_clause = constraint._release_clause
pfx = '.'.join([str(i) for i in release_clause])
return not _match_prefix(version, pfx)
def _match_gt(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if version <= constraint:
return False
release_clause = constraint._release_clause
pfx = '.'.join([str(i) for i in release_clause])
return not _match_prefix(version, pfx)
def _match_le(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
return version <= constraint
def _match_ge(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
return version >= constraint
def _match_eq(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if not prefix:
result = (version == constraint)
else:
result = _match_prefix(version, constraint)
return result
def _match_arbitrary(self, version, constraint, prefix):
return str(version) == str(constraint)
def _match_ne(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if not prefix:
result = (version != constraint)
else:
result = not _match_prefix(version, constraint)
return result
def _match_compatible(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if version == constraint:
return True
if version < constraint:
return False
# if not prefix:
# return True
release_clause = constraint._release_clause
if len(release_clause) > 1:
release_clause = release_clause[:-1]
pfx = '.'.join([str(i) for i in release_clause])
return _match_prefix(version, pfx)
_REPLACEMENTS = (
(re.compile('[.+-]$'), ''), # remove trailing puncts
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
(re.compile('^[.-]'), ''), # remove leading puncts
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
(re.compile(r'\b(pre-alpha|prealpha)\b'),
'pre.alpha'), # standardise
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
)
_SUFFIX_REPLACEMENTS = (
(re.compile('^[:~._+-]+'), ''), # remove leading puncts
(re.compile('[,*")([\\]]'), ''), # remove unwanted chars
(re.compile('[~:+_ -]'), '.'), # replace illegal chars
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
(re.compile(r'\.$'), ''), # trailing '.'
)
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
def _suggest_semantic_version(s):
"""
Try to suggest a semantic form for a version for which
_suggest_normalized_version couldn't come up with anything.
"""
result = s.strip().lower()
for pat, repl in _REPLACEMENTS:
result = pat.sub(repl, result)
if not result:
result = '0.0.0'
# Now look for numeric prefix, and separate it out from
# the rest.
# import pdb; pdb.set_trace()
m = _NUMERIC_PREFIX.match(result)
if not m:
prefix = '0.0.0'
suffix = result
else:
prefix = m.groups()[0].split('.')
prefix = [int(i) for i in prefix]
while len(prefix) < 3:
prefix.append(0)
if len(prefix) == 3:
suffix = result[m.end():]
else:
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
prefix = prefix[:3]
prefix = '.'.join([str(i) for i in prefix])
suffix = suffix.strip()
if suffix:
# import pdb; pdb.set_trace()
# massage the suffix.
for pat, repl in _SUFFIX_REPLACEMENTS:
suffix = pat.sub(repl, suffix)
if not suffix:
result = prefix
else:
sep = '-' if 'dev' in suffix else '+'
result = prefix + sep + suffix
if not is_semver(result):
result = None
return result
def _suggest_normalized_version(s):
"""Suggest a normalized version close to the given version string.
If you have a version string that isn't rational (i.e. NormalizedVersion
doesn't like it) then you might be able to get an equivalent (or close)
rational version from this function.
This does a number of simple normalizations to the given string, based
on observation of versions currently in use on PyPI. Given a dump of
those version during PyCon 2009, 4287 of them:
- 2312 (53.93%) match NormalizedVersion without change
with the automatic suggestion
- 3474 (81.04%) match when using this suggestion method
@param s {str} An irrational version string.
@returns A rational version string, or None, if couldn't determine one.
"""
try:
_normalized_key(s)
return s # already rational
except UnsupportedVersionError:
pass
rs = s.lower()
# part of this could use maketrans
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
('beta', 'b'), ('rc', 'c'), ('-final', ''),
('-pre', 'c'),
('-release', ''), ('.release', ''), ('-stable', ''),
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
('final', '')):
rs = rs.replace(orig, repl)
# if something ends with dev or pre, we add a 0
rs = re.sub(r"pre$", r"pre0", rs)
rs = re.sub(r"dev$", r"dev0", rs)
# if we have something like "b-2" or "a.2" at the end of the
# version, that is probably beta, alpha, etc
# let's remove the dash or dot
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
# 1.0-dev-r371 -> 1.0.dev371
# 0.1-dev-r79 -> 0.1.dev79
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
# Clean: v0.3, v1.0
if rs.startswith('v'):
rs = rs[1:]
# Clean leading '0's on numbers.
# TODO: unintended side-effect on, e.g., "2003.05.09"
# PyPI stats: 77 (~2%) better
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
# zero.
# PyPI stats: 245 (7.56%) better
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
# the 'dev-rNNN' tag is a dev tag
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
# clean the - when used as a pre delimiter
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
# a terminal "dev" or "devel" can be changed into ".dev0"
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
# a terminal "dev" can be changed into ".dev0"
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
# a terminal "final" or "stable" can be removed
rs = re.sub(r"(final|stable)$", "", rs)
# The 'r' and the '-' tags are post release tags
# 0.4a1.r10 -> 0.4a1.post10
# 0.9.33-17222 -> 0.9.33.post17222
# 0.9.33-r17222 -> 0.9.33.post17222
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
# Clean 'r' instead of 'dev' usage:
# 0.9.33+r17222 -> 0.9.33.dev17222
# 1.0dev123 -> 1.0.dev123
# 1.0.git123 -> 1.0.dev123
# 1.0.bzr123 -> 1.0.dev123
# 0.1a0dev.123 -> 0.1a0.dev123
# PyPI stats: ~150 (~4%) better
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
# 0.2.pre1 -> 0.2c1
# 0.2-c1 -> 0.2c1
# 1.0preview123 -> 1.0c123
# PyPI stats: ~21 (0.62%) better
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
# Tcl/Tk uses "px" for their post release markers
rs = re.sub(r"p(\d+)$", r".post\1", rs)
try:
_normalized_key(rs)
except UnsupportedVersionError:
rs = None
return rs
#
# Legacy version processing (distribute-compatible)
#
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
_VERSION_REPLACE = {
'pre': 'c',
'preview': 'c',
'-': 'final-',
'rc': 'c',
'dev': '@',
'': None,
'.': None,
}
def _legacy_key(s):
def get_parts(s):
result = []
for p in _VERSION_PART.split(s.lower()):
p = _VERSION_REPLACE.get(p, p)
if p:
if '0' <= p[:1] <= '9':
p = p.zfill(8)
else:
p = '*' + p
result.append(p)
result.append('*final')
return result
result = []
for p in get_parts(s):
if p.startswith('*'):
if p < '*final':
while result and result[-1] == '*final-':
result.pop()
while result and result[-1] == '00000000':
result.pop()
result.append(p)
return tuple(result)
class LegacyVersion(Version):
def parse(self, s):
return _legacy_key(s)
@property
def is_prerelease(self):
result = False
for x in self._parts:
if (isinstance(x, string_types) and x.startswith('*') and
x < '*final'):
result = True
break
return result
class LegacyMatcher(Matcher):
version_class = LegacyVersion
_operators = dict(Matcher._operators)
_operators['~='] = '_match_compatible'
numeric_re = re.compile(r'^(\d+(\.\d+)*)')
def _match_compatible(self, version, constraint, prefix):
if version < constraint:
return False
m = self.numeric_re.match(str(constraint))
if not m:
logger.warning('Cannot compute compatible match for version %s '
' and constraint %s', version, constraint)
return True
s = m.groups()[0]
if '.' in s:
s = s.rsplit('.', 1)[0]
return _match_prefix(version, s)
#
# Semantic versioning
#
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
def is_semver(s):
return _SEMVER_RE.match(s)
def _semantic_key(s):
def make_tuple(s, absent):
if s is None:
result = (absent,)
else:
parts = s[1:].split('.')
# We can't compare ints and strings on Python 3, so fudge it
# by zero-filling numeric values so simulate a numeric comparison
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
return result
m = is_semver(s)
if not m:
raise UnsupportedVersionError(s)
groups = m.groups()
major, minor, patch = [int(i) for i in groups[:3]]
# choose the '|' and '*' so that versions sort correctly
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
return (major, minor, patch), pre, build
class SemanticVersion(Version):
def parse(self, s):
return _semantic_key(s)
@property
def is_prerelease(self):
return self._parts[1][0] != '|'
class SemanticMatcher(Matcher):
version_class = SemanticVersion
class VersionScheme(object):
def __init__(self, key, matcher, suggester=None):
self.key = key
self.matcher = matcher
self.suggester = suggester
def is_valid_version(self, s):
try:
self.matcher.version_class(s)
result = True
except UnsupportedVersionError:
result = False
return result
def is_valid_matcher(self, s):
try:
self.matcher(s)
result = True
except UnsupportedVersionError:
result = False
return result
def is_valid_constraint_list(self, s):
"""
Used for processing some metadata fields
"""
# See issue #140. Be tolerant of a single trailing comma.
if s.endswith(','):
s = s[:-1]
return self.is_valid_matcher('dummy_name (%s)' % s)
def suggest(self, s):
if self.suggester is None:
result = None
else:
result = self.suggester(s)
return result
_SCHEMES = {
'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
_suggest_normalized_version),
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
'semantic': VersionScheme(_semantic_key, SemanticMatcher,
_suggest_semantic_version),
}
_SCHEMES['default'] = _SCHEMES['normalized']
def get_scheme(name):
if name not in _SCHEMES:
raise ValueError('unknown scheme name: %r' % name)
return _SCHEMES[name]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
from .distro import (
NORMALIZED_DISTRO_ID,
NORMALIZED_LSB_ID,
NORMALIZED_OS_ID,
LinuxDistribution,
__version__,
build_number,
codename,
distro_release_attr,
distro_release_info,
id,
info,
like,
linux_distribution,
lsb_release_attr,
lsb_release_info,
major_version,
minor_version,
name,
os_release_attr,
os_release_info,
uname_attr,
uname_info,
version,
version_parts,
)
__all__ = [
"NORMALIZED_DISTRO_ID",
"NORMALIZED_LSB_ID",
"NORMALIZED_OS_ID",
"LinuxDistribution",
"build_number",
"codename",
"distro_release_attr",
"distro_release_info",
"id",
"info",
"like",
"linux_distribution",
"lsb_release_attr",
"lsb_release_info",
"major_version",
"minor_version",
"name",
"os_release_attr",
"os_release_info",
"uname_attr",
"uname_info",
"version",
"version_parts",
]
__version__ = __version__

View File

@ -0,0 +1,4 @@
from .distro import main
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,44 @@
from .package_data import __version__
from .core import (
IDNABidiError,
IDNAError,
InvalidCodepoint,
InvalidCodepointContext,
alabel,
check_bidi,
check_hyphen_ok,
check_initial_combiner,
check_label,
check_nfc,
decode,
encode,
ulabel,
uts46_remap,
valid_contextj,
valid_contexto,
valid_label_length,
valid_string_length,
)
from .intranges import intranges_contain
__all__ = [
"IDNABidiError",
"IDNAError",
"InvalidCodepoint",
"InvalidCodepointContext",
"alabel",
"check_bidi",
"check_hyphen_ok",
"check_initial_combiner",
"check_label",
"check_nfc",
"decode",
"encode",
"intranges_contain",
"ulabel",
"uts46_remap",
"valid_contextj",
"valid_contexto",
"valid_label_length",
"valid_string_length",
]

View File

@ -0,0 +1,118 @@
from .core import encode, decode, alabel, ulabel, IDNAError
import codecs
import re
from typing import Any, Tuple, Optional
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
class Codec(codecs.Codec):
def encode(self, data: str, errors: str = 'strict') -> Tuple[bytes, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data:
return b"", 0
return encode(data), len(data)
def decode(self, data: bytes, errors: str = 'strict') -> Tuple[str, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data:
return '', 0
return decode(data), len(data)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data:
return b'', 0
labels = _unicode_dots_re.split(data)
trailing_dot = b''
if labels:
if not labels[-1]:
trailing_dot = b'.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = b'.'
result = []
size = 0
for label in labels:
result.append(alabel(label))
if size:
size += 1
size += len(label)
# Join with U+002E
result_bytes = b'.'.join(result) + trailing_dot
size += len(trailing_dot)
return result_bytes, size
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]:
if errors != 'strict':
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
if not data:
return ('', 0)
if not isinstance(data, str):
data = str(data, 'ascii')
labels = _unicode_dots_re.split(data)
trailing_dot = ''
if labels:
if not labels[-1]:
trailing_dot = '.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = '.'
result = []
size = 0
for label in labels:
result.append(ulabel(label))
if size:
size += 1
size += len(label)
result_str = '.'.join(result) + trailing_dot
size += len(trailing_dot)
return (result_str, size)
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec, codecs.StreamReader):
pass
def search_function(name: str) -> Optional[codecs.CodecInfo]:
if name != 'idna2008':
return None
return codecs.CodecInfo(
name=name,
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)
codecs.register(search_function)

View File

@ -0,0 +1,13 @@
from .core import *
from .codec import *
from typing import Any, Union
def ToASCII(label: str) -> bytes:
return encode(label)
def ToUnicode(label: Union[bytes, bytearray]) -> str:
return decode(label)
def nameprep(s: Any) -> None:
raise NotImplementedError('IDNA 2008 does not utilise nameprep protocol')

View File

@ -0,0 +1,395 @@
from . import idnadata
import bisect
import unicodedata
import re
from typing import Union, Optional
from .intranges import intranges_contain
_virama_combining_class = 9
_alabel_prefix = b'xn--'
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
class IDNAError(UnicodeError):
""" Base exception for all IDNA-encoding related problems """
pass
class IDNABidiError(IDNAError):
""" Exception when bidirectional requirements are not satisfied """
pass
class InvalidCodepoint(IDNAError):
""" Exception when a disallowed or unallocated codepoint is used """
pass
class InvalidCodepointContext(IDNAError):
""" Exception when the codepoint is not valid in the context it is used """
pass
def _combining_class(cp: int) -> int:
v = unicodedata.combining(chr(cp))
if v == 0:
if not unicodedata.name(chr(cp)):
raise ValueError('Unknown character in unicodedata')
return v
def _is_script(cp: str, script: str) -> bool:
return intranges_contain(ord(cp), idnadata.scripts[script])
def _punycode(s: str) -> bytes:
return s.encode('punycode')
def _unot(s: int) -> str:
return 'U+{:04X}'.format(s)
def valid_label_length(label: Union[bytes, str]) -> bool:
if len(label) > 63:
return False
return True
def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:
if len(label) > (254 if trailing_dot else 253):
return False
return True
def check_bidi(label: str, check_ltr: bool = False) -> bool:
# Bidi rules should only be applied if string contains RTL characters
bidi_label = False
for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if direction == '':
# String likely comes from a newer version of Unicode
raise IDNABidiError('Unknown directionality in label {} at position {}'.format(repr(label), idx))
if direction in ['R', 'AL', 'AN']:
bidi_label = True
if not bidi_label and not check_ltr:
return True
# Bidi rule 1
direction = unicodedata.bidirectional(label[0])
if direction in ['R', 'AL']:
rtl = True
elif direction == 'L':
rtl = False
else:
raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label)))
valid_ending = False
number_type = None # type: Optional[str]
for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if rtl:
# Bidi rule 2
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
raise IDNABidiError('Invalid direction for codepoint at position {} in a right-to-left label'.format(idx))
# Bidi rule 3
if direction in ['R', 'AL', 'EN', 'AN']:
valid_ending = True
elif direction != 'NSM':
valid_ending = False
# Bidi rule 4
if direction in ['AN', 'EN']:
if not number_type:
number_type = direction
else:
if number_type != direction:
raise IDNABidiError('Can not mix numeral types in a right-to-left label')
else:
# Bidi rule 5
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
raise IDNABidiError('Invalid direction for codepoint at position {} in a left-to-right label'.format(idx))
# Bidi rule 6
if direction in ['L', 'EN']:
valid_ending = True
elif direction != 'NSM':
valid_ending = False
if not valid_ending:
raise IDNABidiError('Label ends with illegal codepoint directionality')
return True
def check_initial_combiner(label: str) -> bool:
if unicodedata.category(label[0])[0] == 'M':
raise IDNAError('Label begins with an illegal combining character')
return True
def check_hyphen_ok(label: str) -> bool:
if label[2:4] == '--':
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
if label[0] == '-' or label[-1] == '-':
raise IDNAError('Label must not start or end with a hyphen')
return True
def check_nfc(label: str) -> None:
if unicodedata.normalize('NFC', label) != label:
raise IDNAError('Label must be in Normalization Form C')
def valid_contextj(label: str, pos: int) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x200c:
if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True
ok = False
for i in range(pos-1, -1, -1):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'):
continue
elif joining_type in [ord('L'), ord('D')]:
ok = True
break
else:
break
if not ok:
return False
ok = False
for i in range(pos+1, len(label)):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'):
continue
elif joining_type in [ord('R'), ord('D')]:
ok = True
break
else:
break
return ok
if cp_value == 0x200d:
if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True
return False
else:
return False
def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x00b7:
if 0 < pos < len(label)-1:
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
return True
return False
elif cp_value == 0x0375:
if pos < len(label)-1 and len(label) > 1:
return _is_script(label[pos + 1], 'Greek')
return False
elif cp_value == 0x05f3 or cp_value == 0x05f4:
if pos > 0:
return _is_script(label[pos - 1], 'Hebrew')
return False
elif cp_value == 0x30fb:
for cp in label:
if cp == '\u30fb':
continue
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'):
return True
return False
elif 0x660 <= cp_value <= 0x669:
for cp in label:
if 0x6f0 <= ord(cp) <= 0x06f9:
return False
return True
elif 0x6f0 <= cp_value <= 0x6f9:
for cp in label:
if 0x660 <= ord(cp) <= 0x0669:
return False
return True
return False
def check_label(label: Union[str, bytes, bytearray]) -> None:
if isinstance(label, (bytes, bytearray)):
label = label.decode('utf-8')
if len(label) == 0:
raise IDNAError('Empty Label')
check_nfc(label)
check_hyphen_ok(label)
check_initial_combiner(label)
for (pos, cp) in enumerate(label):
cp_value = ord(cp)
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
continue
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
if not valid_contextj(label, pos):
raise InvalidCodepointContext('Joiner {} not allowed at position {} in {}'.format(
_unot(cp_value), pos+1, repr(label)))
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
if not valid_contexto(label, pos):
raise InvalidCodepointContext('Codepoint {} not allowed at position {} in {}'.format(_unot(cp_value), pos+1, repr(label)))
else:
raise InvalidCodepoint('Codepoint {} at position {} of {} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
check_bidi(label)
def alabel(label: str) -> bytes:
try:
label_bytes = label.encode('ascii')
ulabel(label_bytes)
if not valid_label_length(label_bytes):
raise IDNAError('Label too long')
return label_bytes
except UnicodeEncodeError:
pass
check_label(label)
label_bytes = _alabel_prefix + _punycode(label)
if not valid_label_length(label_bytes):
raise IDNAError('Label too long')
return label_bytes
def ulabel(label: Union[str, bytes, bytearray]) -> str:
if not isinstance(label, (bytes, bytearray)):
try:
label_bytes = label.encode('ascii')
except UnicodeEncodeError:
check_label(label)
return label
else:
label_bytes = label
label_bytes = label_bytes.lower()
if label_bytes.startswith(_alabel_prefix):
label_bytes = label_bytes[len(_alabel_prefix):]
if not label_bytes:
raise IDNAError('Malformed A-label, no Punycode eligible content found')
if label_bytes.decode('ascii')[-1] == '-':
raise IDNAError('A-label must not end with a hyphen')
else:
check_label(label_bytes)
return label_bytes.decode('ascii')
try:
label = label_bytes.decode('punycode')
except UnicodeError:
raise IDNAError('Invalid A-label')
check_label(label)
return label
def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str:
"""Re-map the characters in the string according to UTS46 processing."""
from .uts46data import uts46data
output = ''
for pos, char in enumerate(domain):
code_point = ord(char)
try:
uts46row = uts46data[code_point if code_point < 256 else
bisect.bisect_left(uts46data, (code_point, 'Z')) - 1]
status = uts46row[1]
replacement = None # type: Optional[str]
if len(uts46row) == 3:
replacement = uts46row[2]
if (status == 'V' or
(status == 'D' and not transitional) or
(status == '3' and not std3_rules and replacement is None)):
output += char
elif replacement is not None and (status == 'M' or
(status == '3' and not std3_rules) or
(status == 'D' and transitional)):
output += replacement
elif status != 'I':
raise IndexError()
except IndexError:
raise InvalidCodepoint(
'Codepoint {} not allowed at position {} in {}'.format(
_unot(code_point), pos + 1, repr(domain)))
return unicodedata.normalize('NFC', output)
def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes:
if not isinstance(s, str):
try:
s = str(s, 'ascii')
except UnicodeDecodeError:
raise IDNAError('should pass a unicode string to the function rather than a byte string.')
if uts46:
s = uts46_remap(s, std3_rules, transitional)
trailing_dot = False
result = []
if strict:
labels = s.split('.')
else:
labels = _unicode_dots_re.split(s)
if not labels or labels == ['']:
raise IDNAError('Empty domain')
if labels[-1] == '':
del labels[-1]
trailing_dot = True
for label in labels:
s = alabel(label)
if s:
result.append(s)
else:
raise IDNAError('Empty label')
if trailing_dot:
result.append(b'')
s = b'.'.join(result)
if not valid_string_length(s, trailing_dot):
raise IDNAError('Domain too long')
return s
def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str:
try:
if not isinstance(s, str):
s = str(s, 'ascii')
except UnicodeDecodeError:
raise IDNAError('Invalid ASCII in A-label')
if uts46:
s = uts46_remap(s, std3_rules, False)
trailing_dot = False
result = []
if not strict:
labels = _unicode_dots_re.split(s)
else:
labels = s.split('.')
if not labels or labels == ['']:
raise IDNAError('Empty domain')
if not labels[-1]:
del labels[-1]
trailing_dot = True
for label in labels:
s = ulabel(label)
if s:
result.append(s)
else:
raise IDNAError('Empty label')
if trailing_dot:
result.append('')
return '.'.join(result)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
"""
Given a list of integers, made up of (hopefully) a small number of long runs
of consecutive integers, compute a representation of the form
((start1, end1), (start2, end2) ...). Then answer the question "was x present
in the original list?" in time O(log(# runs)).
"""
import bisect
from typing import List, Tuple
def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
"""Represent a list of integers as a sequence of ranges:
((start_0, end_0), (start_1, end_1), ...), such that the original
integers are exactly those x such that start_i <= x < end_i for some i.
Ranges are encoded as single integers (start << 32 | end), not as tuples.
"""
sorted_list = sorted(list_)
ranges = []
last_write = -1
for i in range(len(sorted_list)):
if i+1 < len(sorted_list):
if sorted_list[i] == sorted_list[i+1]-1:
continue
current_range = sorted_list[last_write+1:i+1]
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
last_write = i
return tuple(ranges)
def _encode_range(start: int, end: int) -> int:
return (start << 32) | end
def _decode_range(r: int) -> Tuple[int, int]:
return (r >> 32), (r & ((1 << 32) - 1))
def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool:
"""Determine if `int_` falls into one of the ranges in `ranges`."""
tuple_ = _encode_range(int_, 0)
pos = bisect.bisect_left(ranges, tuple_)
# we could be immediately ahead of a tuple (start, end)
# with start < int_ <= end
if pos > 0:
left, right = _decode_range(ranges[pos-1])
if left <= int_ < right:
return True
# or we could be immediately behind a tuple (int_, end)
if pos < len(ranges):
left, _ = _decode_range(ranges[pos])
if left == int_:
return True
return False

View File

@ -0,0 +1,2 @@
__version__ = '3.7'

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,55 @@
from .exceptions import *
from .ext import ExtType, Timestamp
import os
version = (1, 0, 8)
__version__ = "1.0.8"
if os.environ.get("MSGPACK_PUREPYTHON"):
from .fallback import Packer, unpackb, Unpacker
else:
try:
from ._cmsgpack import Packer, unpackb, Unpacker
except ImportError:
from .fallback import Packer, unpackb, Unpacker
def pack(o, stream, **kwargs):
"""
Pack object `o` and write it to `stream`
See :class:`Packer` for options.
"""
packer = Packer(**kwargs)
stream.write(packer.pack(o))
def packb(o, **kwargs):
"""
Pack object `o` and return packed bytes
See :class:`Packer` for options.
"""
return Packer(**kwargs).pack(o)
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
Raises `ExtraData` when `stream` contains extra bytes.
See :class:`Unpacker` for options.
"""
data = stream.read()
return unpackb(data, **kwargs)
# alias for compatibility to simplejson/marshal/pickle.
load = unpack
loads = unpackb
dump = pack
dumps = packb

View File

@ -0,0 +1,48 @@
class UnpackException(Exception):
"""Base class for some exceptions raised while unpacking.
NOTE: unpack may raise exception other than subclass of
UnpackException. If you want to catch all error, catch
Exception instead.
"""
class BufferFull(UnpackException):
pass
class OutOfData(UnpackException):
pass
class FormatError(ValueError, UnpackException):
"""Invalid msgpack format"""
class StackError(ValueError, UnpackException):
"""Too nested"""
# Deprecated. Use ValueError instead
UnpackValueError = ValueError
class ExtraData(UnpackValueError):
"""ExtraData is raised when there is trailing data.
This exception is raised while only one-shot (not streaming)
unpack.
"""
def __init__(self, unpacked, extra):
self.unpacked = unpacked
self.extra = extra
def __str__(self):
return "unpack(b) received extra data."
# Deprecated. Use Exception instead to catch all exception during packing.
PackException = Exception
PackValueError = ValueError
PackOverflowError = OverflowError

View File

@ -0,0 +1,168 @@
from collections import namedtuple
import datetime
import struct
class ExtType(namedtuple("ExtType", "code data")):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super().__new__(cls, code, data)
class Timestamp:
"""Timestamp represents the Timestamp extension type in msgpack.
When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`.
When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and
unpack `Timestamp`.
This class is immutable: Do not override seconds and nanoseconds.
"""
__slots__ = ["seconds", "nanoseconds"]
def __init__(self, seconds, nanoseconds=0):
"""Initialize a Timestamp object.
:param int seconds:
Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds).
May be negative.
:param int nanoseconds:
Number of nanoseconds to add to `seconds` to get fractional time.
Maximum is 999_999_999. Default is 0.
Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns.
"""
if not isinstance(seconds, int):
raise TypeError("seconds must be an integer")
if not isinstance(nanoseconds, int):
raise TypeError("nanoseconds must be an integer")
if not (0 <= nanoseconds < 10**9):
raise ValueError("nanoseconds must be a non-negative integer less than 999999999.")
self.seconds = seconds
self.nanoseconds = nanoseconds
def __repr__(self):
"""String representation of Timestamp."""
return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})"
def __eq__(self, other):
"""Check for equality with another Timestamp object"""
if type(other) is self.__class__:
return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
return False
def __ne__(self, other):
"""not-equals method (see :func:`__eq__()`)"""
return not self.__eq__(other)
def __hash__(self):
return hash((self.seconds, self.nanoseconds))
@staticmethod
def from_bytes(b):
"""Unpack bytes into a `Timestamp` object.
Used for pure-Python msgpack unpacking.
:param b: Payload from msgpack ext message with code -1
:type b: bytes
:returns: Timestamp object unpacked from msgpack ext payload
:rtype: Timestamp
"""
if len(b) == 4:
seconds = struct.unpack("!L", b)[0]
nanoseconds = 0
elif len(b) == 8:
data64 = struct.unpack("!Q", b)[0]
seconds = data64 & 0x00000003FFFFFFFF
nanoseconds = data64 >> 34
elif len(b) == 12:
nanoseconds, seconds = struct.unpack("!Iq", b)
else:
raise ValueError(
"Timestamp type can only be created from 32, 64, or 96-bit byte objects"
)
return Timestamp(seconds, nanoseconds)
def to_bytes(self):
"""Pack this Timestamp object into bytes.
Used for pure-Python msgpack packing.
:returns data: Payload for EXT message with code -1 (timestamp type)
:rtype: bytes
"""
if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits
data64 = self.nanoseconds << 34 | self.seconds
if data64 & 0xFFFFFFFF00000000 == 0:
# nanoseconds is zero and seconds < 2**32, so timestamp 32
data = struct.pack("!L", data64)
else:
# timestamp 64
data = struct.pack("!Q", data64)
else:
# timestamp 96
data = struct.pack("!Iq", self.nanoseconds, self.seconds)
return data
@staticmethod
def from_unix(unix_sec):
"""Create a Timestamp from posix timestamp in seconds.
:param unix_float: Posix timestamp in seconds.
:type unix_float: int or float
"""
seconds = int(unix_sec // 1)
nanoseconds = int((unix_sec % 1) * 10**9)
return Timestamp(seconds, nanoseconds)
def to_unix(self):
"""Get the timestamp as a floating-point value.
:returns: posix timestamp
:rtype: float
"""
return self.seconds + self.nanoseconds / 1e9
@staticmethod
def from_unix_nano(unix_ns):
"""Create a Timestamp from posix timestamp in nanoseconds.
:param int unix_ns: Posix timestamp in nanoseconds.
:rtype: Timestamp
"""
return Timestamp(*divmod(unix_ns, 10**9))
def to_unix_nano(self):
"""Get the timestamp as a unixtime in nanoseconds.
:returns: posix timestamp in nanoseconds
:rtype: int
"""
return self.seconds * 10**9 + self.nanoseconds
def to_datetime(self):
"""Get the timestamp as a UTC datetime.
:rtype: `datetime.datetime`
"""
utc = datetime.timezone.utc
return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(seconds=self.to_unix())
@staticmethod
def from_datetime(dt):
"""Create a Timestamp from datetime with tzinfo.
:rtype: Timestamp
"""
return Timestamp.from_unix(dt.timestamp())

View File

@ -0,0 +1,951 @@
"""Fallback pure Python implementation of msgpack"""
from datetime import datetime as _DateTime
import sys
import struct
if hasattr(sys, "pypy_version_info"):
# StringIO is slow on PyPy, StringIO is faster. However: PyPy's own
# StringBuilder is fastest.
from __pypy__ import newlist_hint
try:
from __pypy__.builders import BytesBuilder as StringBuilder
except ImportError:
from __pypy__.builders import StringBuilder
USING_STRINGBUILDER = True
class StringIO:
def __init__(self, s=b""):
if s:
self.builder = StringBuilder(len(s))
self.builder.append(s)
else:
self.builder = StringBuilder()
def write(self, s):
if isinstance(s, memoryview):
s = s.tobytes()
elif isinstance(s, bytearray):
s = bytes(s)
self.builder.append(s)
def getvalue(self):
return self.builder.build()
else:
USING_STRINGBUILDER = False
from io import BytesIO as StringIO
newlist_hint = lambda size: []
from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError
from .ext import ExtType, Timestamp
EX_SKIP = 0
EX_CONSTRUCT = 1
EX_READ_ARRAY_HEADER = 2
EX_READ_MAP_HEADER = 3
TYPE_IMMEDIATE = 0
TYPE_ARRAY = 1
TYPE_MAP = 2
TYPE_RAW = 3
TYPE_BIN = 4
TYPE_EXT = 5
DEFAULT_RECURSE_LIMIT = 511
def _check_type_strict(obj, t, type=type, tuple=tuple):
if type(t) is tuple:
return type(obj) in t
else:
return type(obj) is t
def _get_data_from_buffer(obj):
view = memoryview(obj)
if view.itemsize != 1:
raise ValueError("cannot unpack from multi-byte object")
return view
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``ValueError`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
See :class:`Unpacker` for options.
"""
unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs)
unpacker.feed(packed)
try:
ret = unpacker._unpack()
except OutOfData:
raise ValueError("Unpack failed: incomplete input")
except RecursionError:
raise StackError
if unpacker._got_extradata():
raise ExtraData(ret, unpacker._get_extradata())
return ret
_NO_FORMAT_USED = ""
_MSGPACK_HEADERS = {
0xC4: (1, _NO_FORMAT_USED, TYPE_BIN),
0xC5: (2, ">H", TYPE_BIN),
0xC6: (4, ">I", TYPE_BIN),
0xC7: (2, "Bb", TYPE_EXT),
0xC8: (3, ">Hb", TYPE_EXT),
0xC9: (5, ">Ib", TYPE_EXT),
0xCA: (4, ">f"),
0xCB: (8, ">d"),
0xCC: (1, _NO_FORMAT_USED),
0xCD: (2, ">H"),
0xCE: (4, ">I"),
0xCF: (8, ">Q"),
0xD0: (1, "b"),
0xD1: (2, ">h"),
0xD2: (4, ">i"),
0xD3: (8, ">q"),
0xD4: (1, "b1s", TYPE_EXT),
0xD5: (2, "b2s", TYPE_EXT),
0xD6: (4, "b4s", TYPE_EXT),
0xD7: (8, "b8s", TYPE_EXT),
0xD8: (16, "b16s", TYPE_EXT),
0xD9: (1, _NO_FORMAT_USED, TYPE_RAW),
0xDA: (2, ">H", TYPE_RAW),
0xDB: (4, ">I", TYPE_RAW),
0xDC: (2, ">H", TYPE_ARRAY),
0xDD: (4, ">I", TYPE_ARRAY),
0xDE: (2, ">H", TYPE_MAP),
0xDF: (4, ">I", TYPE_MAP),
}
class Unpacker:
"""Streaming unpacker.
Arguments:
:param file_like:
File-like object having `.read(n)` method.
If specified, unpacker reads serialized data from it and `.feed()` is not usable.
:param int read_size:
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
:param bool use_list:
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
:param bool raw:
If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param int timestamp:
Control how timestamp type is unpacked:
0 - Timestamp
1 - float (Seconds from the EPOCH)
2 - int (Nanoseconds from the EPOCH)
3 - datetime.datetime (UTC).
:param bool strict_map_key:
If true (default), only str or bytes are accepted for map (dict) keys.
:param object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
(See also simplejson)
:param object_pairs_hook:
When specified, it should be callable.
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
:param str unicode_errors:
The error handler for decoding unicode. (default: 'strict')
This option should be used only when you have msgpack data which
contains invalid UTF-8 string.
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means 2**32-1.
The default value is 100*1024*1024 (100MiB).
Raises `BufferFull` exception when it is insufficient.
You should set this parameter when unpacking data from untrusted source.
:param int max_str_len:
Deprecated, use *max_buffer_size* instead.
Limits max length of str. (default: max_buffer_size)
:param int max_bin_len:
Deprecated, use *max_buffer_size* instead.
Limits max length of bin. (default: max_buffer_size)
:param int max_array_len:
Limits max length of array.
(default: max_buffer_size)
:param int max_map_len:
Limits max length of map.
(default: max_buffer_size//2)
:param int max_ext_len:
Deprecated, use *max_buffer_size* instead.
Limits max size of ext type. (default: max_buffer_size)
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
unpacker = Unpacker()
while True:
buf = sock.recv(1024**2)
if not buf:
break
unpacker.feed(buf)
for o in unpacker:
process(o)
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``OutOfData`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
"""
def __init__(
self,
file_like=None,
read_size=0,
use_list=True,
raw=False,
timestamp=0,
strict_map_key=True,
object_hook=None,
object_pairs_hook=None,
list_hook=None,
unicode_errors=None,
max_buffer_size=100 * 1024 * 1024,
ext_hook=ExtType,
max_str_len=-1,
max_bin_len=-1,
max_array_len=-1,
max_map_len=-1,
max_ext_len=-1,
):
if unicode_errors is None:
unicode_errors = "strict"
if file_like is None:
self._feeding = True
else:
if not callable(file_like.read):
raise TypeError("`file_like.read` must be callable")
self.file_like = file_like
self._feeding = False
#: array of bytes fed.
self._buffer = bytearray()
#: Which position we currently reads
self._buff_i = 0
# When Unpacker is used as an iterable, between the calls to next(),
# the buffer is not "consumed" completely, for efficiency sake.
# Instead, it is done sloppily. To make sure we raise BufferFull at
# the correct moments, we have to keep track of how sloppy we were.
# Furthermore, when the buffer is incomplete (that is: in the case
# we raise an OutOfData) we need to rollback the buffer to the correct
# state, which _buf_checkpoint records.
self._buf_checkpoint = 0
if not max_buffer_size:
max_buffer_size = 2**31 - 1
if max_str_len == -1:
max_str_len = max_buffer_size
if max_bin_len == -1:
max_bin_len = max_buffer_size
if max_array_len == -1:
max_array_len = max_buffer_size
if max_map_len == -1:
max_map_len = max_buffer_size // 2
if max_ext_len == -1:
max_ext_len = max_buffer_size
self._max_buffer_size = max_buffer_size
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
self._read_size = read_size or min(self._max_buffer_size, 16 * 1024)
self._raw = bool(raw)
self._strict_map_key = bool(strict_map_key)
self._unicode_errors = unicode_errors
self._use_list = use_list
if not (0 <= timestamp <= 3):
raise ValueError("timestamp must be 0..3")
self._timestamp = timestamp
self._list_hook = list_hook
self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook
self._ext_hook = ext_hook
self._max_str_len = max_str_len
self._max_bin_len = max_bin_len
self._max_array_len = max_array_len
self._max_map_len = max_map_len
self._max_ext_len = max_ext_len
self._stream_offset = 0
if list_hook is not None and not callable(list_hook):
raise TypeError("`list_hook` is not callable")
if object_hook is not None and not callable(object_hook):
raise TypeError("`object_hook` is not callable")
if object_pairs_hook is not None and not callable(object_pairs_hook):
raise TypeError("`object_pairs_hook` is not callable")
if object_hook is not None and object_pairs_hook is not None:
raise TypeError("object_pairs_hook and object_hook are mutually exclusive")
if not callable(ext_hook):
raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes):
assert self._feeding
view = _get_data_from_buffer(next_bytes)
if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size:
raise BufferFull
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
del self._buffer[: self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython
self._buffer.extend(view)
def _consume(self):
"""Gets rid of the used parts of the buffer."""
self._stream_offset += self._buff_i - self._buf_checkpoint
self._buf_checkpoint = self._buff_i
def _got_extradata(self):
return self._buff_i < len(self._buffer)
def _get_extradata(self):
return self._buffer[self._buff_i :]
def read_bytes(self, n):
ret = self._read(n, raise_outofdata=False)
self._consume()
return ret
def _read(self, n, raise_outofdata=True):
# (int) -> bytearray
self._reserve(n, raise_outofdata=raise_outofdata)
i = self._buff_i
ret = self._buffer[i : i + n]
self._buff_i = i + len(ret)
return ret
def _reserve(self, n, raise_outofdata=True):
remain_bytes = len(self._buffer) - self._buff_i - n
# Fast path: buffer has n bytes already
if remain_bytes >= 0:
return
if self._feeding:
self._buff_i = self._buf_checkpoint
raise OutOfData
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
del self._buffer[: self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Read from file
remain_bytes = -remain_bytes
if remain_bytes + len(self._buffer) > self._max_buffer_size:
raise BufferFull
while remain_bytes > 0:
to_read_bytes = max(self._read_size, remain_bytes)
read_data = self.file_like.read(to_read_bytes)
if not read_data:
break
assert isinstance(read_data, bytes)
self._buffer += read_data
remain_bytes -= len(read_data)
if len(self._buffer) < n + self._buff_i and raise_outofdata:
self._buff_i = 0 # rollback
raise OutOfData
def _read_header(self):
typ = TYPE_IMMEDIATE
n = 0
obj = None
self._reserve(1)
b = self._buffer[self._buff_i]
self._buff_i += 1
if b & 0b10000000 == 0:
obj = b
elif b & 0b11100000 == 0b11100000:
obj = -1 - (b ^ 0xFF)
elif b & 0b11100000 == 0b10100000:
n = b & 0b00011111
typ = TYPE_RAW
if n > self._max_str_len:
raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
obj = self._read(n)
elif b & 0b11110000 == 0b10010000:
n = b & 0b00001111
typ = TYPE_ARRAY
if n > self._max_array_len:
raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif b & 0b11110000 == 0b10000000:
n = b & 0b00001111
typ = TYPE_MAP
if n > self._max_map_len:
raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
elif b == 0xC0:
obj = None
elif b == 0xC2:
obj = False
elif b == 0xC3:
obj = True
elif 0xC4 <= b <= 0xC6:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
else:
n = self._buffer[self._buff_i]
self._buff_i += size
if n > self._max_bin_len:
raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})")
obj = self._read(n)
elif 0xC7 <= b <= 0xC9:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
L, n = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if L > self._max_ext_len:
raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})")
obj = self._read(L)
elif 0xCA <= b <= 0xD3:
size, fmt = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
else:
obj = self._buffer[self._buff_i]
self._buff_i += size
elif 0xD4 <= b <= 0xD8:
size, fmt, typ = _MSGPACK_HEADERS[b]
if self._max_ext_len < size:
raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})")
self._reserve(size + 1)
n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size + 1
elif 0xD9 <= b <= 0xDB:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
if len(fmt) > 0:
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
else:
n = self._buffer[self._buff_i]
self._buff_i += size
if n > self._max_str_len:
raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
obj = self._read(n)
elif 0xDC <= b <= 0xDD:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if n > self._max_array_len:
raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif 0xDE <= b <= 0xDF:
size, fmt, typ = _MSGPACK_HEADERS[b]
self._reserve(size)
(n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
self._buff_i += size
if n > self._max_map_len:
raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
else:
raise FormatError("Unknown header: 0x%x" % b)
return typ, n, obj
def _unpack(self, execute=EX_CONSTRUCT):
typ, n, obj = self._read_header()
if execute == EX_READ_ARRAY_HEADER:
if typ != TYPE_ARRAY:
raise ValueError("Expected array")
return n
if execute == EX_READ_MAP_HEADER:
if typ != TYPE_MAP:
raise ValueError("Expected map")
return n
# TODO should we eliminate the recursion?
if typ == TYPE_ARRAY:
if execute == EX_SKIP:
for i in range(n):
# TODO check whether we need to call `list_hook`
self._unpack(EX_SKIP)
return
ret = newlist_hint(n)
for i in range(n):
ret.append(self._unpack(EX_CONSTRUCT))
if self._list_hook is not None:
ret = self._list_hook(ret)
# TODO is the interaction between `list_hook` and `use_list` ok?
return ret if self._use_list else tuple(ret)
if typ == TYPE_MAP:
if execute == EX_SKIP:
for i in range(n):
# TODO check whether we need to call hooks
self._unpack(EX_SKIP)
self._unpack(EX_SKIP)
return
if self._object_pairs_hook is not None:
ret = self._object_pairs_hook(
(self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n)
)
else:
ret = {}
for _ in range(n):
key = self._unpack(EX_CONSTRUCT)
if self._strict_map_key and type(key) not in (str, bytes):
raise ValueError("%s is not allowed for map key" % str(type(key)))
if isinstance(key, str):
key = sys.intern(key)
ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
ret = self._object_hook(ret)
return ret
if execute == EX_SKIP:
return
if typ == TYPE_RAW:
if self._raw:
obj = bytes(obj)
else:
obj = obj.decode("utf_8", self._unicode_errors)
return obj
if typ == TYPE_BIN:
return bytes(obj)
if typ == TYPE_EXT:
if n == -1: # timestamp
ts = Timestamp.from_bytes(bytes(obj))
if self._timestamp == 1:
return ts.to_unix()
elif self._timestamp == 2:
return ts.to_unix_nano()
elif self._timestamp == 3:
return ts.to_datetime()
else:
return ts
else:
return self._ext_hook(n, bytes(obj))
assert typ == TYPE_IMMEDIATE
return obj
def __iter__(self):
return self
def __next__(self):
try:
ret = self._unpack(EX_CONSTRUCT)
self._consume()
return ret
except OutOfData:
self._consume()
raise StopIteration
except RecursionError:
raise StackError
next = __next__
def skip(self):
self._unpack(EX_SKIP)
self._consume()
def unpack(self):
try:
ret = self._unpack(EX_CONSTRUCT)
except RecursionError:
raise StackError
self._consume()
return ret
def read_array_header(self):
ret = self._unpack(EX_READ_ARRAY_HEADER)
self._consume()
return ret
def read_map_header(self):
ret = self._unpack(EX_READ_MAP_HEADER)
self._consume()
return ret
def tell(self):
return self._stream_offset
class Packer:
"""
MessagePack Packer
Usage::
packer = Packer()
astream.write(packer.pack(a))
astream.write(packer.pack(b))
Packer's constructor has some keyword arguments:
:param default:
When specified, it should be callable.
Convert user type to builtin type that Packer supports.
See also simplejson's document.
:param bool use_single_float:
Use single precision float type for float. (default: False)
:param bool autoreset:
Reset buffer after each pack and return its content as `bytes`. (default: True).
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enables str8 type for unicode. (default: True)
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
from serializable types will not be serialized and will be
treated as unsupported type and forwarded to default.
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.
:param bool datetime:
If set to true, datetime with tzinfo is packed into Timestamp type.
Note that the tzinfo is stripped in the timestamp.
You can get UTC datetime with `timestamp=3` option of the Unpacker.
:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage.
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
unpacker = Unpacker()
while True:
buf = sock.recv(1024**2)
if not buf:
break
unpacker.feed(buf)
for o in unpacker:
process(o)
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``OutOfData`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
"""
def __init__(
self,
default=None,
use_single_float=False,
autoreset=True,
use_bin_type=True,
strict_types=False,
datetime=False,
unicode_errors=None,
):
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._buffer = StringIO()
self._datetime = bool(datetime)
self._unicode_errors = unicode_errors or "strict"
if default is not None:
if not callable(default):
raise TypeError("default must be callable")
self._default = default
def _pack(
self,
obj,
nest_limit=DEFAULT_RECURSE_LIMIT,
check=isinstance,
check_type_strict=_check_type_strict,
):
default_used = False
if self._strict_types:
check = check_type_strict
list_types = list
else:
list_types = (list, tuple)
while True:
if nest_limit < 0:
raise ValueError("recursion limit exceeded")
if obj is None:
return self._buffer.write(b"\xc0")
if check(obj, bool):
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if check(obj, int):
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
return self._buffer.write(struct.pack("b", obj))
if 0x80 <= obj <= 0xFF:
return self._buffer.write(struct.pack("BB", 0xCC, obj))
if -0x80 <= obj < 0:
return self._buffer.write(struct.pack(">Bb", 0xD0, obj))
if 0xFF < obj <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xCD, obj))
if -0x8000 <= obj < -0x80:
return self._buffer.write(struct.pack(">Bh", 0xD1, obj))
if 0xFFFF < obj <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xCE, obj))
if -0x80000000 <= obj < -0x8000:
return self._buffer.write(struct.pack(">Bi", 0xD2, obj))
if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF:
return self._buffer.write(struct.pack(">BQ", 0xCF, obj))
if -0x8000000000000000 <= obj < -0x80000000:
return self._buffer.write(struct.pack(">Bq", 0xD3, obj))
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = True
continue
raise OverflowError("Integer value out of range")
if check(obj, (bytes, bytearray)):
n = len(obj)
if n >= 2**32:
raise ValueError("%s is too large" % type(obj).__name__)
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, str):
obj = obj.encode("utf-8", self._unicode_errors)
n = len(obj)
if n >= 2**32:
raise ValueError("String is too large")
self._pack_raw_header(n)
return self._buffer.write(obj)
if check(obj, memoryview):
n = obj.nbytes
if n >= 2**32:
raise ValueError("Memoryview is too large")
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, float):
if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xCA, obj))
return self._buffer.write(struct.pack(">Bd", 0xCB, obj))
if check(obj, (ExtType, Timestamp)):
if check(obj, Timestamp):
code = -1
data = obj.to_bytes()
else:
code = obj.code
data = obj.data
assert isinstance(code, int)
assert isinstance(data, bytes)
L = len(data)
if L == 1:
self._buffer.write(b"\xd4")
elif L == 2:
self._buffer.write(b"\xd5")
elif L == 4:
self._buffer.write(b"\xd6")
elif L == 8:
self._buffer.write(b"\xd7")
elif L == 16:
self._buffer.write(b"\xd8")
elif L <= 0xFF:
self._buffer.write(struct.pack(">BB", 0xC7, L))
elif L <= 0xFFFF:
self._buffer.write(struct.pack(">BH", 0xC8, L))
else:
self._buffer.write(struct.pack(">BI", 0xC9, L))
self._buffer.write(struct.pack("b", code))
self._buffer.write(data)
return
if check(obj, list_types):
n = len(obj)
self._pack_array_header(n)
for i in range(n):
self._pack(obj[i], nest_limit - 1)
return
if check(obj, dict):
return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1)
if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None:
obj = Timestamp.from_datetime(obj)
default_used = 1
continue
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1
continue
if self._datetime and check(obj, _DateTime):
raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None")
raise TypeError(f"Cannot serialize {obj!r}")
def pack(self, obj):
try:
self._pack(obj)
except:
self._buffer = StringIO() # force reset
raise
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = StringIO()
return ret
def pack_map_pairs(self, pairs):
self._pack_map_pairs(len(pairs), pairs)
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = StringIO()
return ret
def pack_array_header(self, n):
if n >= 2**32:
raise ValueError
self._pack_array_header(n)
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = StringIO()
return ret
def pack_map_header(self, n):
if n >= 2**32:
raise ValueError
self._pack_map_header(n)
if self._autoreset:
ret = self._buffer.getvalue()
self._buffer = StringIO()
return ret
def pack_ext_type(self, typecode, data):
if not isinstance(typecode, int):
raise TypeError("typecode must have int type.")
if not 0 <= typecode <= 127:
raise ValueError("typecode should be 0-127")
if not isinstance(data, bytes):
raise TypeError("data must have bytes type")
L = len(data)
if L > 0xFFFFFFFF:
raise ValueError("Too large data")
if L == 1:
self._buffer.write(b"\xd4")
elif L == 2:
self._buffer.write(b"\xd5")
elif L == 4:
self._buffer.write(b"\xd6")
elif L == 8:
self._buffer.write(b"\xd7")
elif L == 16:
self._buffer.write(b"\xd8")
elif L <= 0xFF:
self._buffer.write(b"\xc7" + struct.pack("B", L))
elif L <= 0xFFFF:
self._buffer.write(b"\xc8" + struct.pack(">H", L))
else:
self._buffer.write(b"\xc9" + struct.pack(">I", L))
self._buffer.write(struct.pack("B", typecode))
self._buffer.write(data)
def _pack_array_header(self, n):
if n <= 0x0F:
return self._buffer.write(struct.pack("B", 0x90 + n))
if n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xDC, n))
if n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xDD, n))
raise ValueError("Array is too large")
def _pack_map_header(self, n):
if n <= 0x0F:
return self._buffer.write(struct.pack("B", 0x80 + n))
if n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xDE, n))
if n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xDF, n))
raise ValueError("Dict is too large")
def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
self._pack_map_header(n)
for k, v in pairs:
self._pack(k, nest_limit - 1)
self._pack(v, nest_limit - 1)
def _pack_raw_header(self, n):
if n <= 0x1F:
self._buffer.write(struct.pack("B", 0xA0 + n))
elif self._use_bin_type and n <= 0xFF:
self._buffer.write(struct.pack(">BB", 0xD9, n))
elif n <= 0xFFFF:
self._buffer.write(struct.pack(">BH", 0xDA, n))
elif n <= 0xFFFFFFFF:
self._buffer.write(struct.pack(">BI", 0xDB, n))
else:
raise ValueError("Raw is too large")
def _pack_bin_header(self, n):
if not self._use_bin_type:
return self._pack_raw_header(n)
elif n <= 0xFF:
return self._buffer.write(struct.pack(">BB", 0xC4, n))
elif n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xC5, n))
elif n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xC6, n))
else:
raise ValueError("Bin is too large")
def bytes(self):
"""Return internal buffer contents as bytes object"""
return self._buffer.getvalue()
def reset(self):
"""Reset internal buffer.
This method is useful only when autoreset=False.
"""
self._buffer = StringIO()
def getbuffer(self):
"""Return view of internal buffer."""
if USING_STRINGBUILDER:
return memoryview(self.bytes())
else:
return self._buffer.getbuffer()

View File

@ -0,0 +1,15 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
__title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "24.1"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
__license__ = "BSD-2-Clause or Apache-2.0"
__copyright__ = "2014 %s" % __author__

View File

@ -0,0 +1,110 @@
"""
ELF file parser.
This provides a class ``ELFFile`` that parses an ELF executable in a similar
interface to ``ZipFile``. Only the read interface is implemented.
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
"""
from __future__ import annotations
import enum
import os
import struct
from typing import IO
class ELFInvalid(ValueError):
pass
class EIClass(enum.IntEnum):
C32 = 1
C64 = 2
class EIData(enum.IntEnum):
Lsb = 1
Msb = 2
class EMachine(enum.IntEnum):
I386 = 3
S390 = 22
Arm = 40
X8664 = 62
AArc64 = 183
class ELFFile:
"""
Representation of an ELF executable.
"""
def __init__(self, f: IO[bytes]) -> None:
self._f = f
try:
ident = self._read("16B")
except struct.error:
raise ELFInvalid("unable to parse identification")
magic = bytes(ident[:4])
if magic != b"\x7fELF":
raise ELFInvalid(f"invalid magic: {magic!r}")
self.capacity = ident[4] # Format for program header (bitness).
self.encoding = ident[5] # Data structure encoding (endianness).
try:
# e_fmt: Format for program header.
# p_fmt: Format for section header.
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
e_fmt, self._p_fmt, self._p_idx = {
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
}[(self.capacity, self.encoding)]
except KeyError:
raise ELFInvalid(
f"unrecognized capacity ({self.capacity}) or "
f"encoding ({self.encoding})"
)
try:
(
_,
self.machine, # Architecture type.
_,
_,
self._e_phoff, # Offset of program header.
_,
self.flags, # Processor-specific flags.
_,
self._e_phentsize, # Size of section.
self._e_phnum, # Number of sections.
) = self._read(e_fmt)
except struct.error as e:
raise ELFInvalid("unable to parse machine and section information") from e
def _read(self, fmt: str) -> tuple[int, ...]:
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
@property
def interpreter(self) -> str | None:
"""
The path recorded in the ``PT_INTERP`` section header.
"""
for index in range(self._e_phnum):
self._f.seek(self._e_phoff + self._e_phentsize * index)
try:
data = self._read(self._p_fmt)
except struct.error:
continue
if data[self._p_idx[0]] != 3: # Not PT_INTERP.
continue
self._f.seek(data[self._p_idx[1]])
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
return None

View File

@ -0,0 +1,262 @@
from __future__ import annotations
import collections
import contextlib
import functools
import os
import re
import sys
import warnings
from typing import Generator, Iterator, NamedTuple, Sequence
from ._elffile import EIClass, EIData, ELFFile, EMachine
EF_ARM_ABIMASK = 0xFF000000
EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager
def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
try:
with open(path, "rb") as f:
yield ELFFile(f)
except (OSError, TypeError, ValueError):
yield None
def _is_linux_armhf(executable: str) -> bool:
# hard-float ABI can be detected from the ELF header of the running
# process
# https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
with _parse_elf(executable) as f:
return (
f is not None
and f.capacity == EIClass.C32
and f.encoding == EIData.Lsb
and f.machine == EMachine.Arm
and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
)
def _is_linux_i686(executable: str) -> bool:
with _parse_elf(executable) as f:
return (
f is not None
and f.capacity == EIClass.C32
and f.encoding == EIData.Lsb
and f.machine == EMachine.I386
)
def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
if "armv7l" in archs:
return _is_linux_armhf(executable)
if "i686" in archs:
return _is_linux_i686(executable)
allowed_archs = {
"x86_64",
"aarch64",
"ppc64",
"ppc64le",
"s390x",
"loongarch64",
"riscv64",
}
return any(arch in allowed_archs for arch in archs)
# If glibc ever changes its major version, we need to know what the last
# minor version was, so we can build the complete list of all versions.
# For now, guess what the highest minor version might be, assume it will
# be 50 for testing. Once this actually happens, update the dictionary
# with the actual value.
_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
class _GLibCVersion(NamedTuple):
major: int
minor: int
def _glibc_version_string_confstr() -> str | None:
"""
Primary implementation of glibc_version_string using os.confstr.
"""
# os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
# to be broken or missing. This strategy is used in the standard library
# platform module.
# https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
try:
# Should be a string like "glibc 2.17".
version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
assert version_string is not None
_, version = version_string.rsplit()
except (AssertionError, AttributeError, OSError, ValueError):
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
return None
return version
def _glibc_version_string_ctypes() -> str | None:
"""
Fallback implementation of glibc_version_string using ctypes.
"""
try:
import ctypes
except ImportError:
return None
# ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
# manpage says, "If filename is NULL, then the returned handle is for the
# main program". This way we can let the linker do the work to figure out
# which libc our process is actually using.
#
# We must also handle the special case where the executable is not a
# dynamically linked executable. This can occur when using musl libc,
# for example. In this situation, dlopen() will error, leading to an
# OSError. Interestingly, at least in the case of musl, there is no
# errno set on the OSError. The single string argument used to construct
# OSError comes from libc itself and is therefore not portable to
# hard code here. In any case, failure to call dlopen() means we
# can proceed, so we bail on our attempt.
try:
process_namespace = ctypes.CDLL(None)
except OSError:
return None
try:
gnu_get_libc_version = process_namespace.gnu_get_libc_version
except AttributeError:
# Symbol doesn't exist -> therefore, we are not linked to
# glibc.
return None
# Call gnu_get_libc_version, which returns a string like "2.5"
gnu_get_libc_version.restype = ctypes.c_char_p
version_str: str = gnu_get_libc_version()
# py2 / py3 compatibility:
if not isinstance(version_str, str):
version_str = version_str.decode("ascii")
return version_str
def _glibc_version_string() -> str | None:
"""Returns glibc version string, or None if not using glibc."""
return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
def _parse_glibc_version(version_str: str) -> tuple[int, int]:
"""Parse glibc version.
We use a regexp instead of str.split because we want to discard any
random junk that might come after the minor version -- this might happen
in patched/forked versions of glibc (e.g. Linaro's version of glibc
uses version strings like "2.20-2014.11"). See gh-3588.
"""
m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
if not m:
warnings.warn(
f"Expected glibc version with 2 components major.minor,"
f" got: {version_str}",
RuntimeWarning,
)
return -1, -1
return int(m.group("major")), int(m.group("minor"))
@functools.lru_cache
def _get_glibc_version() -> tuple[int, int]:
version_str = _glibc_version_string()
if version_str is None:
return (-1, -1)
return _parse_glibc_version(version_str)
# From PEP 513, PEP 600
def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
sys_glibc = _get_glibc_version()
if sys_glibc < version:
return False
# Check for presence of _manylinux module.
try:
import _manylinux
except ImportError:
return True
if hasattr(_manylinux, "manylinux_compatible"):
result = _manylinux.manylinux_compatible(version[0], version[1], arch)
if result is not None:
return bool(result)
return True
if version == _GLibCVersion(2, 5):
if hasattr(_manylinux, "manylinux1_compatible"):
return bool(_manylinux.manylinux1_compatible)
if version == _GLibCVersion(2, 12):
if hasattr(_manylinux, "manylinux2010_compatible"):
return bool(_manylinux.manylinux2010_compatible)
if version == _GLibCVersion(2, 17):
if hasattr(_manylinux, "manylinux2014_compatible"):
return bool(_manylinux.manylinux2014_compatible)
return True
_LEGACY_MANYLINUX_MAP = {
# CentOS 7 w/ glibc 2.17 (PEP 599)
(2, 17): "manylinux2014",
# CentOS 6 w/ glibc 2.12 (PEP 571)
(2, 12): "manylinux2010",
# CentOS 5 w/ glibc 2.5 (PEP 513)
(2, 5): "manylinux1",
}
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
"""Generate manylinux tags compatible to the current platform.
:param archs: Sequence of compatible architectures.
The first one shall be the closest to the actual architecture and be the part of
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
be manylinux-compatible.
:returns: An iterator of compatible manylinux tags.
"""
if not _have_compatible_abi(sys.executable, archs):
return
# Oldest glibc to be supported regardless of architecture is (2, 17).
too_old_glibc2 = _GLibCVersion(2, 16)
if set(archs) & {"x86_64", "i686"}:
# On x86/i686 also oldest glibc to be supported is (2, 5).
too_old_glibc2 = _GLibCVersion(2, 4)
current_glibc = _GLibCVersion(*_get_glibc_version())
glibc_max_list = [current_glibc]
# We can assume compatibility across glibc major versions.
# https://sourceware.org/bugzilla/show_bug.cgi?id=24636
#
# Build a list of maximum glibc versions so that we can
# output the canonical list of all glibc from current_glibc
# down to too_old_glibc2, including all intermediary versions.
for glibc_major in range(current_glibc.major - 1, 1, -1):
glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
for arch in archs:
for glibc_max in glibc_max_list:
if glibc_max.major == too_old_glibc2.major:
min_minor = too_old_glibc2.minor
else:
# For other glibc major versions oldest supported is (x, 0).
min_minor = -1
for glibc_minor in range(glibc_max.minor, min_minor, -1):
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
tag = "manylinux_{}_{}".format(*glibc_version)
if _is_compatible(arch, glibc_version):
yield f"{tag}_{arch}"
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
if glibc_version in _LEGACY_MANYLINUX_MAP:
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
if _is_compatible(arch, glibc_version):
yield f"{legacy_tag}_{arch}"

View File

@ -0,0 +1,85 @@
"""PEP 656 support.
This module implements logic to detect if the currently running Python is
linked against musl, and what musl version is used.
"""
from __future__ import annotations
import functools
import re
import subprocess
import sys
from typing import Iterator, NamedTuple, Sequence
from ._elffile import ELFFile
class _MuslVersion(NamedTuple):
major: int
minor: int
def _parse_musl_version(output: str) -> _MuslVersion | None:
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
if len(lines) < 2 or lines[0][:4] != "musl":
return None
m = re.match(r"Version (\d+)\.(\d+)", lines[1])
if not m:
return None
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
@functools.lru_cache
def _get_musl_version(executable: str) -> _MuslVersion | None:
"""Detect currently-running musl runtime version.
This is done by checking the specified executable's dynamic linking
information, and invoking the loader to parse its output for a version
string. If the loader is musl, the output would be something like::
musl libc (x86_64)
Version 1.2.2
Dynamic Program Loader
"""
try:
with open(executable, "rb") as f:
ld = ELFFile(f).interpreter
except (OSError, TypeError, ValueError):
return None
if ld is None or "musl" not in ld:
return None
proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
return _parse_musl_version(proc.stderr)
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
"""Generate musllinux tags compatible to the current platform.
:param archs: Sequence of compatible architectures.
The first one shall be the closest to the actual architecture and be the part of
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
be musllinux-compatible.
:returns: An iterator of compatible musllinux tags.
"""
sys_musl = _get_musl_version(sys.executable)
if sys_musl is None: # Python not dynamically linked against musl.
return
for arch in archs:
for minor in range(sys_musl.minor, -1, -1):
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
if __name__ == "__main__": # pragma: no cover
import sysconfig
plat = sysconfig.get_platform()
assert plat.startswith("linux-"), "not linux"
print("plat:", plat)
print("musl:", _get_musl_version(sys.executable))
print("tags:", end=" ")
for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])):
print(t, end="\n ")

View File

@ -0,0 +1,354 @@
"""Handwritten parser of dependency specifiers.
The docstring for each __parse_* function contains EBNF-inspired grammar representing
the implementation.
"""
from __future__ import annotations
import ast
from typing import NamedTuple, Sequence, Tuple, Union
from ._tokenizer import DEFAULT_RULES, Tokenizer
class Node:
def __init__(self, value: str) -> None:
self.value = value
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str:
raise NotImplementedError
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
MarkerVar = Union[Variable, Value]
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
class ParsedRequirement(NamedTuple):
name: str
url: str
extras: list[str]
specifier: str
marker: MarkerList | None
# --------------------------------------------------------------------------------------
# Recursive descent parser for dependency specifier
# --------------------------------------------------------------------------------------
def parse_requirement(source: str) -> ParsedRequirement:
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
"""
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
"""
tokenizer.consume("WS")
name_token = tokenizer.expect(
"IDENTIFIER", expected="package name at the start of dependency specifier"
)
name = name_token.text
tokenizer.consume("WS")
extras = _parse_extras(tokenizer)
tokenizer.consume("WS")
url, specifier, marker = _parse_requirement_details(tokenizer)
tokenizer.expect("END", expected="end of dependency specifier")
return ParsedRequirement(name, url, extras, specifier, marker)
def _parse_requirement_details(
tokenizer: Tokenizer,
) -> tuple[str, str, MarkerList | None]:
"""
requirement_details = AT URL (WS requirement_marker?)?
| specifier WS? (requirement_marker)?
"""
specifier = ""
url = ""
marker = None
if tokenizer.check("AT"):
tokenizer.read()
tokenizer.consume("WS")
url_start = tokenizer.position
url = tokenizer.expect("URL", expected="URL after @").text
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
tokenizer.expect("WS", expected="whitespace after URL")
# The input might end after whitespace.
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer, span_start=url_start, after="URL and whitespace"
)
else:
specifier_start = tokenizer.position
specifier = _parse_specifier(tokenizer)
tokenizer.consume("WS")
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer,
span_start=specifier_start,
after=(
"version specifier"
if specifier
else "name and no valid version specifier"
),
)
return (url, specifier, marker)
def _parse_requirement_marker(
tokenizer: Tokenizer, *, span_start: int, after: str
) -> MarkerList:
"""
requirement_marker = SEMICOLON marker WS?
"""
if not tokenizer.check("SEMICOLON"):
tokenizer.raise_syntax_error(
f"Expected end or semicolon (after {after})",
span_start=span_start,
)
tokenizer.read()
marker = _parse_marker(tokenizer)
tokenizer.consume("WS")
return marker
def _parse_extras(tokenizer: Tokenizer) -> list[str]:
"""
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
"""
if not tokenizer.check("LEFT_BRACKET", peek=True):
return []
with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS")
return extras
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
"""
extras_list = identifier (wsp* ',' wsp* identifier)*
"""
extras: list[str] = []
if not tokenizer.check("IDENTIFIER"):
return extras
extras.append(tokenizer.read().text)
while True:
tokenizer.consume("WS")
if tokenizer.check("IDENTIFIER", peek=True):
tokenizer.raise_syntax_error("Expected comma between extra names")
elif not tokenizer.check("COMMA"):
break
tokenizer.read()
tokenizer.consume("WS")
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
extras.append(extra_token.text)
return extras
def _parse_specifier(tokenizer: Tokenizer) -> str:
"""
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS")
return parsed_specifiers
def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
"""
parsed_specifiers = ""
while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS")
if not tokenizer.check("COMMA"):
break
parsed_specifiers += tokenizer.read().text
tokenizer.consume("WS")
return parsed_specifiers
# --------------------------------------------------------------------------------------
# Recursive descent parser for marker expression
# --------------------------------------------------------------------------------------
def parse_marker(source: str) -> MarkerList:
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
retval = _parse_marker(tokenizer)
tokenizer.expect("END", expected="end of marker expression")
return retval
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
"""
marker = marker_atom (BOOLOP marker_atom)+
"""
expression = [_parse_marker_atom(tokenizer)]
while tokenizer.check("BOOLOP"):
token = tokenizer.read()
expr_right = _parse_marker_atom(tokenizer)
expression.extend((token.text, expr_right))
return expression
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
"""
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
| WS? marker_item WS?
"""
tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS")
else:
marker = _parse_marker_item(tokenizer)
tokenizer.consume("WS")
return marker
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
"""
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
"""
tokenizer.consume("WS")
marker_var_left = _parse_marker_var(tokenizer)
tokenizer.consume("WS")
marker_op = _parse_marker_op(tokenizer)
tokenizer.consume("WS")
marker_var_right = _parse_marker_var(tokenizer)
tokenizer.consume("WS")
return (marker_var_left, marker_op, marker_var_right)
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
"""
marker_var = VARIABLE | QUOTED_STRING
"""
if tokenizer.check("VARIABLE"):
return process_env_var(tokenizer.read().text.replace(".", "_"))
elif tokenizer.check("QUOTED_STRING"):
return process_python_str(tokenizer.read().text)
else:
tokenizer.raise_syntax_error(
message="Expected a marker variable or quoted string"
)
def process_env_var(env_var: str) -> Variable:
if env_var in ("platform_python_implementation", "python_implementation"):
return Variable("platform_python_implementation")
else:
return Variable(env_var)
def process_python_str(python_str: str) -> Value:
value = ast.literal_eval(python_str)
return Value(str(value))
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
"""
marker_op = IN | NOT IN | OP
"""
if tokenizer.check("IN"):
tokenizer.read()
return Op("in")
elif tokenizer.check("NOT"):
tokenizer.read()
tokenizer.expect("WS", expected="whitespace after 'not'")
tokenizer.expect("IN", expected="'in' after 'not'")
return Op("not in")
elif tokenizer.check("OP"):
return Op(tokenizer.read().text)
else:
return tokenizer.raise_syntax_error(
"Expected marker operator, one of "
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
)

View File

@ -0,0 +1,61 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
class InfinityType:
def __repr__(self) -> str:
return "Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return False
def __le__(self, other: object) -> bool:
return False
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return True
def __ge__(self, other: object) -> bool:
return True
def __neg__(self: object) -> "NegativeInfinityType":
return NegativeInfinity
Infinity = InfinityType()
class NegativeInfinityType:
def __repr__(self) -> str:
return "-Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return True
def __le__(self, other: object) -> bool:
return True
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return False
def __ge__(self, other: object) -> bool:
return False
def __neg__(self: object) -> InfinityType:
return Infinity
NegativeInfinity = NegativeInfinityType()

View File

@ -0,0 +1,194 @@
from __future__ import annotations
import contextlib
import re
from dataclasses import dataclass
from typing import Iterator, NoReturn
from .specifiers import Specifier
@dataclass
class Token:
name: str
text: str
position: int
class ParserSyntaxError(Exception):
"""The provided source text could not be parsed correctly."""
def __init__(
self,
message: str,
*,
source: str,
span: tuple[int, int],
) -> None:
self.span = span
self.message = message
self.source = source
super().__init__()
def __str__(self) -> str:
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
return "\n ".join([self.message, self.source, marker])
DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
"LEFT_PARENTHESIS": r"\(",
"RIGHT_PARENTHESIS": r"\)",
"LEFT_BRACKET": r"\[",
"RIGHT_BRACKET": r"\]",
"SEMICOLON": r";",
"COMMA": r",",
"QUOTED_STRING": re.compile(
r"""
(
('[^']*')
|
("[^"]*")
)
""",
re.VERBOSE,
),
"OP": r"(===|==|~=|!=|<=|>=|<|>)",
"BOOLOP": r"\b(or|and)\b",
"IN": r"\bin\b",
"NOT": r"\bnot\b",
"VARIABLE": re.compile(
r"""
\b(
python_version
|python_full_version
|os[._]name
|sys[._]platform
|platform_(release|system)
|platform[._](version|machine|python_implementation)
|python_implementation
|implementation_(name|version)
|extra
)\b
""",
re.VERBOSE,
),
"SPECIFIER": re.compile(
Specifier._operator_regex_str + Specifier._version_regex_str,
re.VERBOSE | re.IGNORECASE,
),
"AT": r"\@",
"URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+",
"END": r"$",
}
class Tokenizer:
"""Context-sensitive token parsing.
Provides methods to examine the input stream to check whether the next token
matches.
"""
def __init__(
self,
source: str,
*,
rules: dict[str, str | re.Pattern[str]],
) -> None:
self.source = source
self.rules: dict[str, re.Pattern[str]] = {
name: re.compile(pattern) for name, pattern in rules.items()
}
self.next_token: Token | None = None
self.position = 0
def consume(self, name: str) -> None:
"""Move beyond provided token name, if at current position."""
if self.check(name):
self.read()
def check(self, name: str, *, peek: bool = False) -> bool:
"""Check whether the next token has the provided name.
By default, if the check succeeds, the token *must* be read before
another check. If `peek` is set to `True`, the token is not loaded and
would need to be checked again.
"""
assert (
self.next_token is None
), f"Cannot check for {name!r}, already have {self.next_token!r}"
assert name in self.rules, f"Unknown token name: {name!r}"
expression = self.rules[name]
match = expression.match(self.source, self.position)
if match is None:
return False
if not peek:
self.next_token = Token(name, match[0], self.position)
return True
def expect(self, name: str, *, expected: str) -> Token:
"""Expect a certain token name next, failing with a syntax error otherwise.
The token is *not* read.
"""
if not self.check(name):
raise self.raise_syntax_error(f"Expected {expected}")
return self.read()
def read(self) -> Token:
"""Consume the next token and return it."""
token = self.next_token
assert token is not None
self.position += len(token.text)
self.next_token = None
return token
def raise_syntax_error(
self,
message: str,
*,
span_start: int | None = None,
span_end: int | None = None,
) -> NoReturn:
"""Raise ParserSyntaxError at the given position."""
span = (
self.position if span_start is None else span_start,
self.position if span_end is None else span_end,
)
raise ParserSyntaxError(
message,
source=self.source,
span=span,
)
@contextlib.contextmanager
def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token):
open_position = self.position
self.read()
else:
open_position = None
yield
if open_position is None:
return
if not self.check(close_token):
self.raise_syntax_error(
f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position,
)
self.read()

View File

@ -0,0 +1,325 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import operator
import os
import platform
import sys
from typing import Any, Callable, TypedDict, cast
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
from ._parser import parse_marker as _parse_marker
from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name
__all__ = [
"InvalidMarker",
"UndefinedComparison",
"UndefinedEnvironmentName",
"Marker",
"default_environment",
]
Operator = Callable[[str, str], bool]
class InvalidMarker(ValueError):
"""
An invalid marker was found, users should refer to PEP 508.
"""
class UndefinedComparison(ValueError):
"""
An invalid operation was attempted on a value that doesn't support it.
"""
class UndefinedEnvironmentName(ValueError):
"""
A name was attempted to be used that does not exist inside of the
environment.
"""
class Environment(TypedDict):
implementation_name: str
"""The implementation's identifier, e.g. ``'cpython'``."""
implementation_version: str
"""
The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
``'7.3.13'`` for PyPy3.10 v7.3.13.
"""
os_name: str
"""
The value of :py:data:`os.name`. The name of the operating system dependent module
imported, e.g. ``'posix'``.
"""
platform_machine: str
"""
Returns the machine type, e.g. ``'i386'``.
An empty string if the value cannot be determined.
"""
platform_release: str
"""
The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
An empty string if the value cannot be determined.
"""
platform_system: str
"""
The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
An empty string if the value cannot be determined.
"""
platform_version: str
"""
The system's release version, e.g. ``'#3 on degas'``.
An empty string if the value cannot be determined.
"""
python_full_version: str
"""
The Python version as string ``'major.minor.patchlevel'``.
Note that unlike the Python :py:data:`sys.version`, this value will always include
the patchlevel (it defaults to 0).
"""
platform_python_implementation: str
"""
A string identifying the Python implementation, e.g. ``'CPython'``.
"""
python_version: str
"""The Python version as string ``'major.minor'``."""
sys_platform: str
"""
This string contains a platform identifier that can be used to append
platform-specific components to :py:data:`sys.path`, for instance.
For Unix systems, except on Linux and AIX, this is the lowercased OS name as
returned by ``uname -s`` with the first part of the version as returned by
``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
was built.
"""
def _normalize_extra_values(results: Any) -> Any:
"""
Normalize extra values.
"""
if isinstance(results[0], tuple):
lhs, op, rhs = results[0]
if isinstance(lhs, Variable) and lhs.value == "extra":
normalized_extra = canonicalize_name(rhs.value)
rhs = Value(normalized_extra)
elif isinstance(rhs, Variable) and rhs.value == "extra":
normalized_extra = canonicalize_name(lhs.value)
lhs = Value(normalized_extra)
results[0] = lhs, op, rhs
return results
def _format_marker(
marker: list[str] | MarkerAtom | str, first: bool | None = True
) -> str:
assert isinstance(marker, (list, tuple, str))
# Sometimes we have a structure like [[...]] which is a single item list
# where the single item is itself it's own list. In that case we want skip
# the rest of this function so that we don't get extraneous () on the
# outside.
if (
isinstance(marker, list)
and len(marker) == 1
and isinstance(marker[0], (list, tuple))
):
return _format_marker(marker[0])
if isinstance(marker, list):
inner = (_format_marker(m, first=False) for m in marker)
if first:
return " ".join(inner)
else:
return "(" + " ".join(inner) + ")"
elif isinstance(marker, tuple):
return " ".join([m.serialize() for m in marker])
else:
return marker
_operators: dict[str, Operator] = {
"in": lambda lhs, rhs: lhs in rhs,
"not in": lambda lhs, rhs: lhs not in rhs,
"<": operator.lt,
"<=": operator.le,
"==": operator.eq,
"!=": operator.ne,
">=": operator.ge,
">": operator.gt,
}
def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
try:
spec = Specifier("".join([op.serialize(), rhs]))
except InvalidSpecifier:
pass
else:
return spec.contains(lhs, prereleases=True)
oper: Operator | None = _operators.get(op.serialize())
if oper is None:
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
return oper(lhs, rhs)
def _normalize(*values: str, key: str) -> tuple[str, ...]:
# PEP 685 Comparison of extra names for optional distribution dependencies
# https://peps.python.org/pep-0685/
# > When comparing extra names, tools MUST normalize the names being
# > compared using the semantics outlined in PEP 503 for names
if key == "extra":
return tuple(canonicalize_name(v) for v in values)
# other environment markers don't have such standards
return values
def _evaluate_markers(markers: MarkerList, environment: dict[str, str]) -> bool:
groups: list[list[bool]] = [[]]
for marker in markers:
assert isinstance(marker, (list, tuple, str))
if isinstance(marker, list):
groups[-1].append(_evaluate_markers(marker, environment))
elif isinstance(marker, tuple):
lhs, op, rhs = marker
if isinstance(lhs, Variable):
environment_key = lhs.value
lhs_value = environment[environment_key]
rhs_value = rhs.value
else:
lhs_value = lhs.value
environment_key = rhs.value
rhs_value = environment[environment_key]
lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
else:
assert marker in ["and", "or"]
if marker == "or":
groups.append([])
return any(all(item) for item in groups)
def format_full_version(info: sys._version_info) -> str:
version = "{0.major}.{0.minor}.{0.micro}".format(info)
kind = info.releaselevel
if kind != "final":
version += kind[0] + str(info.serial)
return version
def default_environment() -> Environment:
iver = format_full_version(sys.implementation.version)
implementation_name = sys.implementation.name
return {
"implementation_name": implementation_name,
"implementation_version": iver,
"os_name": os.name,
"platform_machine": platform.machine(),
"platform_release": platform.release(),
"platform_system": platform.system(),
"platform_version": platform.version(),
"python_full_version": platform.python_version(),
"platform_python_implementation": platform.python_implementation(),
"python_version": ".".join(platform.python_version_tuple()[:2]),
"sys_platform": sys.platform,
}
class Marker:
def __init__(self, marker: str) -> None:
# Note: We create a Marker object without calling this constructor in
# packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement.
try:
self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
#
# For example, the following expression:
# python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
#
# is parsed into:
# [
# (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
# 'and',
# [
# (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
# 'or',
# (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
# ]
# ]
except ParserSyntaxError as e:
raise InvalidMarker(str(e)) from e
def __str__(self) -> str:
return _format_marker(self._markers)
def __repr__(self) -> str:
return f"<Marker('{self}')>"
def __hash__(self) -> int:
return hash((self.__class__.__name__, str(self)))
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Marker):
return NotImplemented
return str(self) == str(other)
def evaluate(self, environment: dict[str, str] | None = None) -> bool:
"""Evaluate a marker.
Return the boolean from evaluating the given marker against the
environment. environment is an optional argument to override all or
part of the determined environment.
The environment is determined from the current Python process.
"""
current_environment = cast("dict[str, str]", default_environment())
current_environment["extra"] = ""
# Work around platform.python_version() returning something that is not PEP 440
# compliant for non-tagged Python builds. We preserve default_environment()'s
# behavior of returning platform.python_version() verbatim, and leave it to the
# caller to provide a syntactically valid version if they want to override it.
if current_environment["python_full_version"].endswith("+"):
current_environment["python_full_version"] += "local"
if environment is not None:
current_environment.update(environment)
# The API used to allow setting extra to None. We need to handle this
# case for backwards compatibility.
if current_environment["extra"] is None:
current_environment["extra"] = ""
return _evaluate_markers(self._markers, current_environment)

View File

@ -0,0 +1,804 @@
from __future__ import annotations
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import typing
from typing import (
Any,
Callable,
Generic,
Literal,
TypedDict,
cast,
)
from . import requirements, specifiers, utils
from . import version as version_module
T = typing.TypeVar("T")
try:
ExceptionGroup
except NameError: # pragma: no cover
class ExceptionGroup(Exception):
"""A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
If :external:exc:`ExceptionGroup` is already defined by Python itself,
that version is used instead.
"""
message: str
exceptions: list[Exception]
def __init__(self, message: str, exceptions: list[Exception]) -> None:
self.message = message
self.exceptions = exceptions
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
else: # pragma: no cover
ExceptionGroup = ExceptionGroup
class InvalidMetadata(ValueError):
"""A metadata field contains invalid data."""
field: str
"""The name of the field that contains invalid data."""
def __init__(self, field: str, message: str) -> None:
self.field = field
super().__init__(message)
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name. See :class:`Metadata` whose attributes
match the keys of this dictionary.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: list[str]
summary: str
description: str
keywords: list[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: list[str]
download_url: str
classifiers: list[str]
requires: list[str]
provides: list[str]
obsoletes: list[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: list[str]
provides_dist: list[str]
obsoletes_dist: list[str]
requires_python: str
requires_external: list[str]
project_urls: dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emitting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: list[str]
# Metadata 2.2 - PEP 643
dynamic: list[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
_DICT_FIELDS = {
"project_urls",
}
def _parse_keywords(data: str) -> list[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: list[str]) -> dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
"""Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: dict[str, str | list[str] | dict[str, str]] = {}
unparsed: dict[str, list[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name) or []
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: list[tuple[bytes, str | None]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed
_NOT_FOUND = object()
# Keep the two values in sync.
_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
class _Validator(Generic[T]):
"""Validate a metadata field.
All _process_*() methods correspond to a core metadata field. The method is
called with the field's raw value. If the raw value is valid it is returned
in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
as appropriate).
"""
name: str
raw_name: str
added: _MetadataVersion
def __init__(
self,
*,
added: _MetadataVersion = "1.0",
) -> None:
self.added = added
def __set_name__(self, _owner: Metadata, name: str) -> None:
self.name = name
self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
# With Python 3.8, the caching can be replaced with functools.cached_property().
# No need to check the cache as attribute lookup will resolve into the
# instance's __dict__ before __get__ is called.
cache = instance.__dict__
value = instance._raw.get(self.name)
# To make the _process_* methods easier, we'll check if the value is None
# and if this field is NOT a required attribute, and if both of those
# things are true, we'll skip the the converter. This will mean that the
# converters never have to deal with the None union.
if self.name in _REQUIRED_ATTRS or value is not None:
try:
converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
except AttributeError:
pass
else:
value = converter(value)
cache[self.name] = value
try:
del instance._raw[self.name] # type: ignore[misc]
except KeyError:
pass
return cast(T, value)
def _invalid_metadata(
self, msg: str, cause: Exception | None = None
) -> InvalidMetadata:
exc = InvalidMetadata(
self.raw_name, msg.format_map({"field": repr(self.raw_name)})
)
exc.__cause__ = cause
return exc
def _process_metadata_version(self, value: str) -> _MetadataVersion:
# Implicitly makes Metadata-Version required.
if value not in _VALID_METADATA_VERSIONS:
raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
return cast(_MetadataVersion, value)
def _process_name(self, value: str) -> str:
if not value:
raise self._invalid_metadata("{field} is a required field")
# Validate the name as a side-effect.
try:
utils.canonicalize_name(value, validate=True)
except utils.InvalidName as exc:
raise self._invalid_metadata(
f"{value!r} is invalid for {{field}}", cause=exc
)
else:
return value
def _process_version(self, value: str) -> version_module.Version:
if not value:
raise self._invalid_metadata("{field} is a required field")
try:
return version_module.parse(value)
except version_module.InvalidVersion as exc:
raise self._invalid_metadata(
f"{value!r} is invalid for {{field}}", cause=exc
)
def _process_summary(self, value: str) -> str:
"""Check the field contains no newlines."""
if "\n" in value:
raise self._invalid_metadata("{field} must be a single line")
return value
def _process_description_content_type(self, value: str) -> str:
content_types = {"text/plain", "text/x-rst", "text/markdown"}
message = email.message.EmailMessage()
message["content-type"] = value
content_type, parameters = (
# Defaults to `text/plain` if parsing failed.
message.get_content_type().lower(),
message["content-type"].params,
)
# Check if content-type is valid or defaulted to `text/plain` and thus was
# not parseable.
if content_type not in content_types or content_type not in value.lower():
raise self._invalid_metadata(
f"{{field}} must be one of {list(content_types)}, not {value!r}"
)
charset = parameters.get("charset", "UTF-8")
if charset != "UTF-8":
raise self._invalid_metadata(
f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
)
markdown_variants = {"GFM", "CommonMark"}
variant = parameters.get("variant", "GFM") # Use an acceptable default.
if content_type == "text/markdown" and variant not in markdown_variants:
raise self._invalid_metadata(
f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
f"not {variant!r}",
)
return value
def _process_dynamic(self, value: list[str]) -> list[str]:
for dynamic_field in map(str.lower, value):
if dynamic_field in {"name", "version", "metadata-version"}:
raise self._invalid_metadata(
f"{value!r} is not allowed as a dynamic field"
)
elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
raise self._invalid_metadata(f"{value!r} is not a valid dynamic field")
return list(map(str.lower, value))
def _process_provides_extra(
self,
value: list[str],
) -> list[utils.NormalizedName]:
normalized_names = []
try:
for name in value:
normalized_names.append(utils.canonicalize_name(name, validate=True))
except utils.InvalidName as exc:
raise self._invalid_metadata(
f"{name!r} is invalid for {{field}}", cause=exc
)
else:
return normalized_names
def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
try:
return specifiers.SpecifierSet(value)
except specifiers.InvalidSpecifier as exc:
raise self._invalid_metadata(
f"{value!r} is invalid for {{field}}", cause=exc
)
def _process_requires_dist(
self,
value: list[str],
) -> list[requirements.Requirement]:
reqs = []
try:
for req in value:
reqs.append(requirements.Requirement(req))
except requirements.InvalidRequirement as exc:
raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc)
else:
return reqs
class Metadata:
"""Representation of distribution metadata.
Compared to :class:`RawMetadata`, this class provides objects representing
metadata fields instead of only using built-in types. Any invalid metadata
will cause :exc:`InvalidMetadata` to be raised (with a
:py:attr:`~BaseException.__cause__` attribute as appropriate).
"""
_raw: RawMetadata
@classmethod
def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
"""Create an instance from :class:`RawMetadata`.
If *validate* is true, all metadata will be validated. All exceptions
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
"""
ins = cls()
ins._raw = data.copy() # Mutations occur due to caching enriched values.
if validate:
exceptions: list[Exception] = []
try:
metadata_version = ins.metadata_version
metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
except InvalidMetadata as metadata_version_exc:
exceptions.append(metadata_version_exc)
metadata_version = None
# Make sure to check for the fields that are present, the required
# fields (so their absence can be reported).
fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
# Remove fields that have already been checked.
fields_to_check -= {"metadata_version"}
for key in fields_to_check:
try:
if metadata_version:
# Can't use getattr() as that triggers descriptor protocol which
# will fail due to no value for the instance argument.
try:
field_metadata_version = cls.__dict__[key].added
except KeyError:
exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
exceptions.append(exc)
continue
field_age = _VALID_METADATA_VERSIONS.index(
field_metadata_version
)
if field_age > metadata_age:
field = _RAW_TO_EMAIL_MAPPING[key]
exc = InvalidMetadata(
field,
"{field} introduced in metadata version "
"{field_metadata_version}, not {metadata_version}",
)
exceptions.append(exc)
continue
getattr(ins, key)
except InvalidMetadata as exc:
exceptions.append(exc)
if exceptions:
raise ExceptionGroup("invalid metadata", exceptions)
return ins
@classmethod
def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
"""Parse metadata from email headers.
If *validate* is true, the metadata will be validated. All exceptions
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
"""
raw, unparsed = parse_email(data)
if validate:
exceptions: list[Exception] = []
for unparsed_key in unparsed:
if unparsed_key in _EMAIL_TO_RAW_MAPPING:
message = f"{unparsed_key!r} has invalid data"
else:
message = f"unrecognized field: {unparsed_key!r}"
exceptions.append(InvalidMetadata(unparsed_key, message))
if exceptions:
raise ExceptionGroup("unparsed", exceptions)
try:
return cls.from_raw(raw, validate=validate)
except ExceptionGroup as exc_group:
raise ExceptionGroup(
"invalid or unparsed metadata", exc_group.exceptions
) from None
metadata_version: _Validator[_MetadataVersion] = _Validator()
""":external:ref:`core-metadata-metadata-version`
(required; validated to be a valid metadata version)"""
name: _Validator[str] = _Validator()
""":external:ref:`core-metadata-name`
(required; validated using :func:`~packaging.utils.canonicalize_name` and its
*validate* parameter)"""
version: _Validator[version_module.Version] = _Validator()
""":external:ref:`core-metadata-version` (required)"""
dynamic: _Validator[list[str] | None] = _Validator(
added="2.2",
)
""":external:ref:`core-metadata-dynamic`
(validated against core metadata field names and lowercased)"""
platforms: _Validator[list[str] | None] = _Validator()
""":external:ref:`core-metadata-platform`"""
supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
""":external:ref:`core-metadata-supported-platform`"""
summary: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-summary` (validated to contain no newlines)"""
description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
""":external:ref:`core-metadata-description`"""
description_content_type: _Validator[str | None] = _Validator(added="2.1")
""":external:ref:`core-metadata-description-content-type` (validated)"""
keywords: _Validator[list[str] | None] = _Validator()
""":external:ref:`core-metadata-keywords`"""
home_page: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-home-page`"""
download_url: _Validator[str | None] = _Validator(added="1.1")
""":external:ref:`core-metadata-download-url`"""
author: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-author`"""
author_email: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-author-email`"""
maintainer: _Validator[str | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-maintainer`"""
maintainer_email: _Validator[str | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-maintainer-email`"""
license: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-license`"""
classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
""":external:ref:`core-metadata-classifier`"""
requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
added="1.2"
)
""":external:ref:`core-metadata-requires-dist`"""
requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
added="1.2"
)
""":external:ref:`core-metadata-requires-python`"""
# Because `Requires-External` allows for non-PEP 440 version specifiers, we
# don't do any processing on the values.
requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-requires-external`"""
project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-project-url`"""
# PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
# regardless of metadata version.
provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
added="2.1",
)
""":external:ref:`core-metadata-provides-extra`"""
provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-provides-dist`"""
obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-obsoletes-dist`"""
requires: _Validator[list[str] | None] = _Validator(added="1.1")
"""``Requires`` (deprecated)"""
provides: _Validator[list[str] | None] = _Validator(added="1.1")
"""``Provides`` (deprecated)"""
obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
"""``Obsoletes`` (deprecated)"""

View File

@ -0,0 +1,91 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
from typing import Any, Iterator
from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet
from .utils import canonicalize_name
class InvalidRequirement(ValueError):
"""
An invalid requirement was found, users should refer to PEP 508.
"""
class Requirement:
"""Parse a requirement.
Parse a given requirement string into its parts, such as name, specifier,
URL, and extras. Raises InvalidRequirement on a badly-formed requirement
string.
"""
# TODO: Can we test whether something is contained within a requirement?
# If so how do we do that? Do we need to test against the _name_ of
# the thing as well as the version? What about the markers?
# TODO: Can we normalize the name and extra name?
def __init__(self, requirement_string: str) -> None:
try:
parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e
self.name: str = parsed.name
self.url: str | None = parsed.url or None
self.extras: set[str] = set(parsed.extras or [])
self.specifier: SpecifierSet = SpecifierSet(parsed.specifier)
self.marker: Marker | None = None
if parsed.marker is not None:
self.marker = Marker.__new__(Marker)
self.marker._markers = _normalize_extra_values(parsed.marker)
def _iter_parts(self, name: str) -> Iterator[str]:
yield name
if self.extras:
formatted_extras = ",".join(sorted(self.extras))
yield f"[{formatted_extras}]"
if self.specifier:
yield str(self.specifier)
if self.url:
yield f"@ {self.url}"
if self.marker:
yield " "
if self.marker:
yield f"; {self.marker}"
def __str__(self) -> str:
return "".join(self._iter_parts(self.name))
def __repr__(self) -> str:
return f"<Requirement('{self}')>"
def __hash__(self) -> int:
return hash(
(
self.__class__.__name__,
*self._iter_parts(canonicalize_name(self.name)),
)
)
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Requirement):
return NotImplemented
return (
canonicalize_name(self.name) == canonicalize_name(other.name)
and self.extras == other.extras
and self.specifier == other.specifier
and self.url == other.url
and self.marker == other.marker
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,568 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import logging
import platform
import re
import struct
import subprocess
import sys
import sysconfig
from importlib.machinery import EXTENSION_SUFFIXES
from typing import (
Iterable,
Iterator,
Sequence,
Tuple,
cast,
)
from . import _manylinux, _musllinux
logger = logging.getLogger(__name__)
PythonVersion = Sequence[int]
MacVersion = Tuple[int, int]
INTERPRETER_SHORT_NAMES: dict[str, str] = {
"python": "py", # Generic.
"cpython": "cp",
"pypy": "pp",
"ironpython": "ip",
"jython": "jy",
}
_32_BIT_INTERPRETER = struct.calcsize("P") == 4
class Tag:
"""
A representation of the tag triple for a wheel.
Instances are considered immutable and thus are hashable. Equality checking
is also supported.
"""
__slots__ = ["_interpreter", "_abi", "_platform", "_hash"]
def __init__(self, interpreter: str, abi: str, platform: str) -> None:
self._interpreter = interpreter.lower()
self._abi = abi.lower()
self._platform = platform.lower()
# The __hash__ of every single element in a Set[Tag] will be evaluated each time
# that a set calls its `.disjoint()` method, which may be called hundreds of
# times when scanning a page of links for packages with tags matching that
# Set[Tag]. Pre-computing the value here produces significant speedups for
# downstream consumers.
self._hash = hash((self._interpreter, self._abi, self._platform))
@property
def interpreter(self) -> str:
return self._interpreter
@property
def abi(self) -> str:
return self._abi
@property
def platform(self) -> str:
return self._platform
def __eq__(self, other: object) -> bool:
if not isinstance(other, Tag):
return NotImplemented
return (
(self._hash == other._hash) # Short-circuit ASAP for perf reasons.
and (self._platform == other._platform)
and (self._abi == other._abi)
and (self._interpreter == other._interpreter)
)
def __hash__(self) -> int:
return self._hash
def __str__(self) -> str:
return f"{self._interpreter}-{self._abi}-{self._platform}"
def __repr__(self) -> str:
return f"<{self} @ {id(self)}>"
def parse_tag(tag: str) -> frozenset[Tag]:
"""
Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances.
Returning a set is required due to the possibility that the tag is a
compressed tag set.
"""
tags = set()
interpreters, abis, platforms = tag.split("-")
for interpreter in interpreters.split("."):
for abi in abis.split("."):
for platform_ in platforms.split("."):
tags.add(Tag(interpreter, abi, platform_))
return frozenset(tags)
def _get_config_var(name: str, warn: bool = False) -> int | str | None:
value: int | str | None = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
)
return value
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _is_threaded_cpython(abis: list[str]) -> bool:
"""
Determine if the ABI corresponds to a threaded (`--disable-gil`) build.
The threaded builds are indicated by a "t" in the abiflags.
"""
if len(abis) == 0:
return False
# expect e.g., cp313
m = re.match(r"cp\d+(.*)", abis[0])
if not m:
return False
abiflags = m.group(1)
return "t" in abiflags
def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool:
"""
Determine if the Python version supports abi3.
PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`)
builds do not support abi3.
"""
return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading
def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> list[str]:
py_version = tuple(py_version) # To allow for version comparison.
abis = []
version = _version_nodot(py_version[:2])
threading = debug = pymalloc = ucs4 = ""
with_debug = _get_config_var("Py_DEBUG", warn)
has_refcount = hasattr(sys, "gettotalrefcount")
# Windows doesn't set Py_DEBUG, so checking for support of debug-compiled
# extension modules is the best option.
# https://github.com/pypa/pip/issues/3383#issuecomment-173267692
has_ext = "_d.pyd" in EXTENSION_SUFFIXES
if with_debug or (with_debug is None and (has_refcount or has_ext)):
debug = "d"
if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn):
threading = "t"
if py_version < (3, 8):
with_pymalloc = _get_config_var("WITH_PYMALLOC", warn)
if with_pymalloc or with_pymalloc is None:
pymalloc = "m"
if py_version < (3, 3):
unicode_size = _get_config_var("Py_UNICODE_SIZE", warn)
if unicode_size == 4 or (
unicode_size is None and sys.maxunicode == 0x10FFFF
):
ucs4 = "u"
elif debug:
# Debug builds can also load "normal" extension modules.
# We can also assume no UCS-4 or pymalloc requirement.
abis.append(f"cp{version}{threading}")
abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}")
return abis
def cpython_tags(
python_version: PythonVersion | None = None,
abis: Iterable[str] | None = None,
platforms: Iterable[str] | None = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
"""
Yields the tags for a CPython interpreter.
The tags consist of:
- cp<python_version>-<abi>-<platform>
- cp<python_version>-abi3-<platform>
- cp<python_version>-none-<platform>
- cp<less than python_version>-abi3-<platform> # Older Python versions down to 3.2.
If python_version only specifies a major version then user-provided ABIs and
the 'none' ABItag will be used.
If 'abi3' or 'none' are specified in 'abis' then they will be yielded at
their normal position and not at the beginning.
"""
if not python_version:
python_version = sys.version_info[:2]
interpreter = f"cp{_version_nodot(python_version[:2])}"
if abis is None:
if len(python_version) > 1:
abis = _cpython_abis(python_version, warn)
else:
abis = []
abis = list(abis)
# 'abi3' and 'none' are explicitly handled later.
for explicit_abi in ("abi3", "none"):
try:
abis.remove(explicit_abi)
except ValueError:
pass
platforms = list(platforms or platform_tags())
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
threading = _is_threaded_cpython(abis)
use_abi3 = _abi3_applies(python_version, threading)
if use_abi3:
yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms)
yield from (Tag(interpreter, "none", platform_) for platform_ in platforms)
if use_abi3:
for minor_version in range(python_version[1] - 1, 1, -1):
for platform_ in platforms:
interpreter = "cp{version}".format(
version=_version_nodot((python_version[0], minor_version))
)
yield Tag(interpreter, "abi3", platform_)
def _generic_abi() -> list[str]:
"""
Return the ABI tag based on EXT_SUFFIX.
"""
# The following are examples of `EXT_SUFFIX`.
# We want to keep the parts which are related to the ABI and remove the
# parts which are related to the platform:
# - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310
# - mac: '.cpython-310-darwin.so' => cp310
# - win: '.cp310-win_amd64.pyd' => cp310
# - win: '.pyd' => cp37 (uses _cpython_abis())
# - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73
# - graalpy: '.graalpy-38-native-x86_64-darwin.dylib'
# => graalpy_38_native
ext_suffix = _get_config_var("EXT_SUFFIX", warn=True)
if not isinstance(ext_suffix, str) or ext_suffix[0] != ".":
raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')")
parts = ext_suffix.split(".")
if len(parts) < 3:
# CPython3.7 and earlier uses ".pyd" on Windows.
return _cpython_abis(sys.version_info[:2])
soabi = parts[1]
if soabi.startswith("cpython"):
# non-windows
abi = "cp" + soabi.split("-")[1]
elif soabi.startswith("cp"):
# windows
abi = soabi.split("-")[0]
elif soabi.startswith("pypy"):
abi = "-".join(soabi.split("-")[:2])
elif soabi.startswith("graalpy"):
abi = "-".join(soabi.split("-")[:3])
elif soabi:
# pyston, ironpython, others?
abi = soabi
else:
return []
return [_normalize_string(abi)]
def generic_tags(
interpreter: str | None = None,
abis: Iterable[str] | None = None,
platforms: Iterable[str] | None = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
"""
Yields the tags for a generic interpreter.
The tags consist of:
- <interpreter>-<abi>-<platform>
The "none" ABI will be added if it was not explicitly provided.
"""
if not interpreter:
interp_name = interpreter_name()
interp_version = interpreter_version(warn=warn)
interpreter = "".join([interp_name, interp_version])
if abis is None:
abis = _generic_abi()
else:
abis = list(abis)
platforms = list(platforms or platform_tags())
if "none" not in abis:
abis.append("none")
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]:
"""
Yields Python versions in descending order.
After the latest version, the major-only version will be yielded, and then
all previous versions of that major version.
"""
if len(py_version) > 1:
yield f"py{_version_nodot(py_version[:2])}"
yield f"py{py_version[0]}"
if len(py_version) > 1:
for minor in range(py_version[1] - 1, -1, -1):
yield f"py{_version_nodot((py_version[0], minor))}"
def compatible_tags(
python_version: PythonVersion | None = None,
interpreter: str | None = None,
platforms: Iterable[str] | None = None,
) -> Iterator[Tag]:
"""
Yields the sequence of tags that are compatible with a specific version of Python.
The tags consist of:
- py*-none-<platform>
- <interpreter>-none-any # ... if `interpreter` is provided.
- py*-none-any
"""
if not python_version:
python_version = sys.version_info[:2]
platforms = list(platforms or platform_tags())
for version in _py_interpreter_range(python_version):
for platform_ in platforms:
yield Tag(version, "none", platform_)
if interpreter:
yield Tag(interpreter, "none", "any")
for version in _py_interpreter_range(python_version):
yield Tag(version, "none", "any")
def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str:
if not is_32bit:
return arch
if arch.startswith("ppc"):
return "ppc"
return "i386"
def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> list[str]:
formats = [cpu_arch]
if cpu_arch == "x86_64":
if version < (10, 4):
return []
formats.extend(["intel", "fat64", "fat32"])
elif cpu_arch == "i386":
if version < (10, 4):
return []
formats.extend(["intel", "fat32", "fat"])
elif cpu_arch == "ppc64":
# TODO: Need to care about 32-bit PPC for ppc64 through 10.2?
if version > (10, 5) or version < (10, 4):
return []
formats.append("fat64")
elif cpu_arch == "ppc":
if version > (10, 6):
return []
formats.extend(["fat32", "fat"])
if cpu_arch in {"arm64", "x86_64"}:
formats.append("universal2")
if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}:
formats.append("universal")
return formats
def mac_platforms(
version: MacVersion | None = None, arch: str | None = None
) -> Iterator[str]:
"""
Yields the platform tags for a macOS system.
The `version` parameter is a two-item tuple specifying the macOS version to
generate platform tags for. The `arch` parameter is the CPU architecture to
generate platform tags for. Both parameters default to the appropriate value
for the current system.
"""
version_str, _, cpu_arch = platform.mac_ver()
if version is None:
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
if version == (10, 16):
# When built against an older macOS SDK, Python will report macOS 10.16
# instead of the real version.
version_str = subprocess.run(
[
sys.executable,
"-sS",
"-c",
"import platform; print(platform.mac_ver()[0])",
],
check=True,
env={"SYSTEM_VERSION_COMPAT": "0"},
stdout=subprocess.PIPE,
text=True,
).stdout
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
else:
version = version
if arch is None:
arch = _mac_arch(cpu_arch)
else:
arch = arch
if (10, 0) <= version and version < (11, 0):
# Prior to Mac OS 11, each yearly release of Mac OS bumped the
# "minor" version number. The major version was always 10.
for minor_version in range(version[1], -1, -1):
compat_version = 10, minor_version
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=10, minor=minor_version, binary_format=binary_format
)
if version >= (11, 0):
# Starting with Mac OS 11, each yearly release bumps the major version
# number. The minor versions are now the midyear updates.
for major_version in range(version[0], 10, -1):
compat_version = major_version, 0
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=major_version, minor=0, binary_format=binary_format
)
if version >= (11, 0):
# Mac OS 11 on x86_64 is compatible with binaries from previous releases.
# Arm64 support was introduced in 11.0, so no Arm binaries from previous
# releases exist.
#
# However, the "universal2" binary format can have a
# macOS version earlier than 11.0 when the x86_64 part of the binary supports
# that version of macOS.
if arch == "x86_64":
for minor_version in range(16, 3, -1):
compat_version = 10, minor_version
binary_formats = _mac_binary_formats(compat_version, arch)
for binary_format in binary_formats:
yield "macosx_{major}_{minor}_{binary_format}".format(
major=compat_version[0],
minor=compat_version[1],
binary_format=binary_format,
)
else:
for minor_version in range(16, 3, -1):
compat_version = 10, minor_version
binary_format = "universal2"
yield "macosx_{major}_{minor}_{binary_format}".format(
major=compat_version[0],
minor=compat_version[1],
binary_format=binary_format,
)
def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]:
linux = _normalize_string(sysconfig.get_platform())
if not linux.startswith("linux_"):
# we should never be here, just yield the sysconfig one and return
yield linux
return
if is_32bit:
if linux == "linux_x86_64":
linux = "linux_i686"
elif linux == "linux_aarch64":
linux = "linux_armv8l"
_, arch = linux.split("_", 1)
archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch])
yield from _manylinux.platform_tags(archs)
yield from _musllinux.platform_tags(archs)
for arch in archs:
yield f"linux_{arch}"
def _generic_platforms() -> Iterator[str]:
yield _normalize_string(sysconfig.get_platform())
def platform_tags() -> Iterator[str]:
"""
Provides the platform tags for this installation.
"""
if platform.system() == "Darwin":
return mac_platforms()
elif platform.system() == "Linux":
return _linux_platforms()
else:
return _generic_platforms()
def interpreter_name() -> str:
"""
Returns the name of the running interpreter.
Some implementations have a reserved, two-letter abbreviation which will
be returned when appropriate.
"""
name = sys.implementation.name
return INTERPRETER_SHORT_NAMES.get(name) or name
def interpreter_version(*, warn: bool = False) -> str:
"""
Returns the version of the running interpreter.
"""
version = _get_config_var("py_version_nodot", warn=warn)
if version:
version = str(version)
else:
version = _version_nodot(sys.version_info[:2])
return version
def _version_nodot(version: PythonVersion) -> str:
return "".join(map(str, version))
def sys_tags(*, warn: bool = False) -> Iterator[Tag]:
"""
Returns the sequence of tag triples for the running interpreter.
The order of the sequence corresponds to priority order for the
interpreter, from most to least important.
"""
interp_name = interpreter_name()
if interp_name == "cp":
yield from cpython_tags(warn=warn)
else:
yield from generic_tags()
if interp_name == "pp":
interp = "pp3"
elif interp_name == "cp":
interp = "cp" + interpreter_version(warn=warn)
else:
interp = None
yield from compatible_tags(interpreter=interp)

View File

@ -0,0 +1,174 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import re
from typing import NewType, Tuple, Union, cast
from .tags import Tag, parse_tag
from .version import InvalidVersion, Version
BuildTag = Union[Tuple[()], Tuple[int, str]]
NormalizedName = NewType("NormalizedName", str)
class InvalidName(ValueError):
"""
An invalid distribution name; users should refer to the packaging user guide.
"""
class InvalidWheelFilename(ValueError):
"""
An invalid wheel filename was found, users should refer to PEP 427.
"""
class InvalidSdistFilename(ValueError):
"""
An invalid sdist filename was found, users should refer to the packaging user guide.
"""
# Core metadata spec for `Name`
_validate_regex = re.compile(
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
)
_canonicalize_regex = re.compile(r"[-_.]+")
_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$")
# PEP 427: The build number must start with a digit.
_build_tag_regex = re.compile(r"(\d+)(.*)")
def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
if validate and not _validate_regex.match(name):
raise InvalidName(f"name is invalid: {name!r}")
# This is taken from PEP 503.
value = _canonicalize_regex.sub("-", name).lower()
return cast(NormalizedName, value)
def is_normalized_name(name: str) -> bool:
return _normalized_regex.match(name) is not None
def canonicalize_version(
version: Version | str, *, strip_trailing_zero: bool = True
) -> str:
"""
This is very similar to Version.__str__, but has one subtle difference
with the way it handles the release segment.
"""
if isinstance(version, str):
try:
parsed = Version(version)
except InvalidVersion:
# Legacy versions cannot be normalized
return version
else:
parsed = version
parts = []
# Epoch
if parsed.epoch != 0:
parts.append(f"{parsed.epoch}!")
# Release segment
release_segment = ".".join(str(x) for x in parsed.release)
if strip_trailing_zero:
# NB: This strips trailing '.0's to normalize
release_segment = re.sub(r"(\.0)+$", "", release_segment)
parts.append(release_segment)
# Pre-release
if parsed.pre is not None:
parts.append("".join(str(x) for x in parsed.pre))
# Post-release
if parsed.post is not None:
parts.append(f".post{parsed.post}")
# Development release
if parsed.dev is not None:
parts.append(f".dev{parsed.dev}")
# Local version segment
if parsed.local is not None:
parts.append(f"+{parsed.local}")
return "".join(parts)
def parse_wheel_filename(
filename: str,
) -> tuple[NormalizedName, Version, BuildTag, frozenset[Tag]]:
if not filename.endswith(".whl"):
raise InvalidWheelFilename(
f"Invalid wheel filename (extension must be '.whl'): {filename}"
)
filename = filename[:-4]
dashes = filename.count("-")
if dashes not in (4, 5):
raise InvalidWheelFilename(
f"Invalid wheel filename (wrong number of parts): {filename}"
)
parts = filename.split("-", dashes - 2)
name_part = parts[0]
# See PEP 427 for the rules on escaping the project name.
if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
raise InvalidWheelFilename(f"Invalid project name: {filename}")
name = canonicalize_name(name_part)
try:
version = Version(parts[1])
except InvalidVersion as e:
raise InvalidWheelFilename(
f"Invalid wheel filename (invalid version): {filename}"
) from e
if dashes == 5:
build_part = parts[2]
build_match = _build_tag_regex.match(build_part)
if build_match is None:
raise InvalidWheelFilename(
f"Invalid build number: {build_part} in '{filename}'"
)
build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2)))
else:
build = ()
tags = parse_tag(parts[-1])
return (name, version, build, tags)
def parse_sdist_filename(filename: str) -> tuple[NormalizedName, Version]:
if filename.endswith(".tar.gz"):
file_stem = filename[: -len(".tar.gz")]
elif filename.endswith(".zip"):
file_stem = filename[: -len(".zip")]
else:
raise InvalidSdistFilename(
f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
f" {filename}"
)
# We are requiring a PEP 440 version, which cannot contain dashes,
# so we split on the last dash.
name_part, sep, version_part = file_stem.rpartition("-")
if not sep:
raise InvalidSdistFilename(f"Invalid sdist filename: {filename}")
name = canonicalize_name(name_part)
try:
version = Version(version_part)
except InvalidVersion as e:
raise InvalidSdistFilename(
f"Invalid sdist filename (invalid version): {filename}"
) from e
return (name, version)

View File

@ -0,0 +1,563 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
"""
.. testsetup::
from pip._vendor.packaging.version import parse, Version
"""
from __future__ import annotations
import itertools
import re
from typing import Any, Callable, NamedTuple, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"]
LocalType = Tuple[Union[int, str], ...]
CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]]
CmpLocalType = Union[
NegativeInfinityType,
Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...],
]
CmpKey = Tuple[
int,
Tuple[int, ...],
CmpPrePostDevType,
CmpPrePostDevType,
CmpPrePostDevType,
CmpLocalType,
]
VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool]
class _Version(NamedTuple):
epoch: int
release: tuple[int, ...]
dev: tuple[str, int] | None
pre: tuple[str, int] | None
post: tuple[str, int] | None
local: LocalType | None
def parse(version: str) -> Version:
"""Parse the given version string.
>>> parse('1.0.dev1')
<Version('1.0.dev1')>
:param version: The version string to parse.
:raises InvalidVersion: When the version string is not a valid version.
"""
return Version(version)
class InvalidVersion(ValueError):
"""Raised when a version string is not a valid version.
>>> Version("invalid")
Traceback (most recent call last):
...
packaging.version.InvalidVersion: Invalid version: 'invalid'
"""
class _BaseVersion:
_key: tuple[Any, ...]
def __hash__(self) -> int:
return hash(self._key)
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key <= other._key
def __eq__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key == other._key
def __ge__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key > other._key
def __ne__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key != other._key
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
_VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>alpha|a|beta|b|preview|pre|c|rc)
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
(?P<post> # post release
(?:-(?P<post_n1>[0-9]+))
|
(?:
[-_\.]?
(?P<post_l>post|rev|r)
[-_\.]?
(?P<post_n2>[0-9]+)?
)
)?
(?P<dev> # dev release
[-_\.]?
(?P<dev_l>dev)
[-_\.]?
(?P<dev_n>[0-9]+)?
)?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
VERSION_PATTERN = _VERSION_PATTERN
"""
A string containing the regular expression used to match a valid version.
The pattern is not anchored at either end, and is intended for embedding in larger
expressions (for example, matching a version number as part of a file name). The
regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
flags set.
:meta hide-value:
"""
class Version(_BaseVersion):
"""This class abstracts handling of a project's versions.
A :class:`Version` instance is comparison aware and can be compared and
sorted using the standard Python interfaces.
>>> v1 = Version("1.0a5")
>>> v2 = Version("1.0")
>>> v1
<Version('1.0a5')>
>>> v2
<Version('1.0')>
>>> v1 < v2
True
>>> v1 == v2
False
>>> v1 > v2
False
>>> v1 >= v2
False
>>> v1 <= v2
True
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None:
"""Initialize a Version object.
:param version:
The string representation of a version which will be parsed and normalized
before use.
:raises InvalidVersion:
If the ``version`` does not conform to PEP 440 in any way then this
exception will be raised.
"""
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
"""A representation of the Version that shows all internal state.
>>> Version('1.0.0')
<Version('1.0.0')>
"""
return f"<Version('{self}')>"
def __str__(self) -> str:
"""A string representation of the version that can be rounded-tripped.
>>> str(Version("1.0a5"))
'1.0a5'
"""
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
# Pre-release
if self.pre is not None:
parts.append("".join(str(x) for x in self.pre))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
"""The epoch of the version.
>>> Version("2.0.0").epoch
0
>>> Version("1!2.0.0").epoch
1
"""
return self._version.epoch
@property
def release(self) -> tuple[int, ...]:
"""The components of the "release" segment of the version.
>>> Version("1.2.3").release
(1, 2, 3)
>>> Version("2.0.0").release
(2, 0, 0)
>>> Version("1!2.0.0.post0").release
(2, 0, 0)
Includes trailing zeroes but not the epoch or any pre-release / development /
post-release suffixes.
"""
return self._version.release
@property
def pre(self) -> tuple[str, int] | None:
"""The pre-release segment of the version.
>>> print(Version("1.2.3").pre)
None
>>> Version("1.2.3a1").pre
('a', 1)
>>> Version("1.2.3b1").pre
('b', 1)
>>> Version("1.2.3rc1").pre
('rc', 1)
"""
return self._version.pre
@property
def post(self) -> int | None:
"""The post-release number of the version.
>>> print(Version("1.2.3").post)
None
>>> Version("1.2.3.post1").post
1
"""
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> int | None:
"""The development number of the version.
>>> print(Version("1.2.3").dev)
None
>>> Version("1.2.3.dev1").dev
1
"""
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> str | None:
"""The local version segment of the version.
>>> print(Version("1.2.3").local)
None
>>> Version("1.2.3+abc").local
'abc'
"""
if self._version.local:
return ".".join(str(x) for x in self._version.local)
else:
return None
@property
def public(self) -> str:
"""The public portion of the version.
>>> Version("1.2.3").public
'1.2.3'
>>> Version("1.2.3+abc").public
'1.2.3'
>>> Version("1.2.3+abc.dev1").public
'1.2.3'
"""
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
"""The "base version" of the version.
>>> Version("1.2.3").base_version
'1.2.3'
>>> Version("1.2.3+abc").base_version
'1.2.3'
>>> Version("1!1.2.3+abc.dev1").base_version
'1!1.2.3'
The "base version" is the public version of the project without any pre or post
release markers.
"""
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
"""Whether this version is a pre-release.
>>> Version("1.2.3").is_prerelease
False
>>> Version("1.2.3a1").is_prerelease
True
>>> Version("1.2.3b1").is_prerelease
True
>>> Version("1.2.3rc1").is_prerelease
True
>>> Version("1.2.3dev1").is_prerelease
True
"""
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
"""Whether this version is a post-release.
>>> Version("1.2.3").is_postrelease
False
>>> Version("1.2.3.post1").is_postrelease
True
"""
return self.post is not None
@property
def is_devrelease(self) -> bool:
"""Whether this version is a development release.
>>> Version("1.2.3").is_devrelease
False
>>> Version("1.2.3.dev1").is_devrelease
True
"""
return self.dev is not None
@property
def major(self) -> int:
"""The first item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").major
1
"""
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
"""The second item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").minor
2
>>> Version("1").minor
0
"""
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
"""The third item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").micro
3
>>> Version("1").micro
0
"""
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str | None, number: str | bytes | SupportsInt | None
) -> tuple[str, int] | None:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str | None) -> LocalType | None:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: tuple[int, ...],
pre: tuple[str, int] | None,
post: tuple[str, int] | None,
dev: tuple[str, int] | None,
local: LocalType | None,
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: CmpPrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: CmpPrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: CmpPrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: CmpLocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,627 @@
"""
Utilities for determining application-specific dirs.
See <https://github.com/platformdirs/platformdirs> for details and usage.
"""
from __future__ import annotations
import os
import sys
from typing import TYPE_CHECKING
from .api import PlatformDirsABC
from .version import __version__
from .version import __version_tuple__ as __version_info__
if TYPE_CHECKING:
from pathlib import Path
from typing import Literal
def _set_platform_dir_class() -> type[PlatformDirsABC]:
if sys.platform == "win32":
from pip._vendor.platformdirs.windows import Windows as Result # noqa: PLC0415
elif sys.platform == "darwin":
from pip._vendor.platformdirs.macos import MacOS as Result # noqa: PLC0415
else:
from pip._vendor.platformdirs.unix import Unix as Result # noqa: PLC0415
if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system":
if os.getenv("SHELL") or os.getenv("PREFIX"):
return Result
from pip._vendor.platformdirs.android import _android_folder # noqa: PLC0415
if _android_folder() is not None:
from pip._vendor.platformdirs.android import Android # noqa: PLC0415
return Android # return to avoid redefinition of a result
return Result
PlatformDirs = _set_platform_dir_class() #: Currently active platform
AppDirs = PlatformDirs #: Backwards compatibility with appdirs
def user_data_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param roaming: See `roaming <platformdirs.api.PlatformDirsABC.roaming>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: data directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
roaming=roaming,
ensure_exists=ensure_exists,
).user_data_dir
def site_data_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
multipath: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param multipath: See `roaming <platformdirs.api.PlatformDirsABC.multipath>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: data directory shared by users
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
multipath=multipath,
ensure_exists=ensure_exists,
).site_data_dir
def user_config_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param roaming: See `roaming <platformdirs.api.PlatformDirsABC.roaming>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: config directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
roaming=roaming,
ensure_exists=ensure_exists,
).user_config_dir
def site_config_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
multipath: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param multipath: See `roaming <platformdirs.api.PlatformDirsABC.multipath>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: config directory shared by the users
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
multipath=multipath,
ensure_exists=ensure_exists,
).site_config_dir
def user_cache_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: cache directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).user_cache_dir
def site_cache_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: cache directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).site_cache_dir
def user_state_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param roaming: See `roaming <platformdirs.api.PlatformDirsABC.roaming>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: state directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
roaming=roaming,
ensure_exists=ensure_exists,
).user_state_dir
def user_log_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: log directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).user_log_dir
def user_documents_dir() -> str:
""":returns: documents directory tied to the user"""
return PlatformDirs().user_documents_dir
def user_downloads_dir() -> str:
""":returns: downloads directory tied to the user"""
return PlatformDirs().user_downloads_dir
def user_pictures_dir() -> str:
""":returns: pictures directory tied to the user"""
return PlatformDirs().user_pictures_dir
def user_videos_dir() -> str:
""":returns: videos directory tied to the user"""
return PlatformDirs().user_videos_dir
def user_music_dir() -> str:
""":returns: music directory tied to the user"""
return PlatformDirs().user_music_dir
def user_desktop_dir() -> str:
""":returns: desktop directory tied to the user"""
return PlatformDirs().user_desktop_dir
def user_runtime_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: runtime directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).user_runtime_dir
def site_runtime_dir(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> str:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: runtime directory shared by users
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).site_runtime_dir
def user_data_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param roaming: See `roaming <platformdirs.api.PlatformDirsABC.roaming>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: data path tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
roaming=roaming,
ensure_exists=ensure_exists,
).user_data_path
def site_data_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
multipath: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param multipath: See `multipath <platformdirs.api.PlatformDirsABC.multipath>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: data path shared by users
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
multipath=multipath,
ensure_exists=ensure_exists,
).site_data_path
def user_config_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param roaming: See `roaming <platformdirs.api.PlatformDirsABC.roaming>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: config path tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
roaming=roaming,
ensure_exists=ensure_exists,
).user_config_path
def site_config_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
multipath: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param multipath: See `roaming <platformdirs.api.PlatformDirsABC.multipath>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: config path shared by the users
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
multipath=multipath,
ensure_exists=ensure_exists,
).site_config_path
def site_cache_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: cache directory tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).site_cache_path
def user_cache_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: cache path tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).user_cache_path
def user_state_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param roaming: See `roaming <platformdirs.api.PlatformDirsABC.roaming>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: state path tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
roaming=roaming,
ensure_exists=ensure_exists,
).user_state_path
def user_log_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: log path tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).user_log_path
def user_documents_path() -> Path:
""":returns: documents a path tied to the user"""
return PlatformDirs().user_documents_path
def user_downloads_path() -> Path:
""":returns: downloads path tied to the user"""
return PlatformDirs().user_downloads_path
def user_pictures_path() -> Path:
""":returns: pictures path tied to the user"""
return PlatformDirs().user_pictures_path
def user_videos_path() -> Path:
""":returns: videos path tied to the user"""
return PlatformDirs().user_videos_path
def user_music_path() -> Path:
""":returns: music path tied to the user"""
return PlatformDirs().user_music_path
def user_desktop_path() -> Path:
""":returns: desktop path tied to the user"""
return PlatformDirs().user_desktop_path
def user_runtime_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: runtime path tied to the user
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).user_runtime_path
def site_runtime_path(
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> Path:
"""
:param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`.
:param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`.
:param version: See `version <platformdirs.api.PlatformDirsABC.version>`.
:param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`.
:param ensure_exists: See `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
:returns: runtime path shared by users
"""
return PlatformDirs(
appname=appname,
appauthor=appauthor,
version=version,
opinion=opinion,
ensure_exists=ensure_exists,
).site_runtime_path
__all__ = [
"AppDirs",
"PlatformDirs",
"PlatformDirsABC",
"__version__",
"__version_info__",
"site_cache_dir",
"site_cache_path",
"site_config_dir",
"site_config_path",
"site_data_dir",
"site_data_path",
"site_runtime_dir",
"site_runtime_path",
"user_cache_dir",
"user_cache_path",
"user_config_dir",
"user_config_path",
"user_data_dir",
"user_data_path",
"user_desktop_dir",
"user_desktop_path",
"user_documents_dir",
"user_documents_path",
"user_downloads_dir",
"user_downloads_path",
"user_log_dir",
"user_log_path",
"user_music_dir",
"user_music_path",
"user_pictures_dir",
"user_pictures_path",
"user_runtime_dir",
"user_runtime_path",
"user_state_dir",
"user_state_path",
"user_videos_dir",
"user_videos_path",
]

View File

@ -0,0 +1,55 @@
"""Main entry point."""
from __future__ import annotations
from pip._vendor.platformdirs import PlatformDirs, __version__
PROPS = (
"user_data_dir",
"user_config_dir",
"user_cache_dir",
"user_state_dir",
"user_log_dir",
"user_documents_dir",
"user_downloads_dir",
"user_pictures_dir",
"user_videos_dir",
"user_music_dir",
"user_runtime_dir",
"site_data_dir",
"site_config_dir",
"site_cache_dir",
"site_runtime_dir",
)
def main() -> None:
"""Run the main entry point."""
app_name = "MyApp"
app_author = "MyCompany"
print(f"-- platformdirs {__version__} --") # noqa: T201
print("-- app dirs (with optional 'version')") # noqa: T201
dirs = PlatformDirs(app_name, app_author, version="1.0")
for prop in PROPS:
print(f"{prop}: {getattr(dirs, prop)}") # noqa: T201
print("\n-- app dirs (without optional 'version')") # noqa: T201
dirs = PlatformDirs(app_name, app_author)
for prop in PROPS:
print(f"{prop}: {getattr(dirs, prop)}") # noqa: T201
print("\n-- app dirs (without optional 'appauthor')") # noqa: T201
dirs = PlatformDirs(app_name)
for prop in PROPS:
print(f"{prop}: {getattr(dirs, prop)}") # noqa: T201
print("\n-- app dirs (with disabled 'appauthor')") # noqa: T201
dirs = PlatformDirs(app_name, appauthor=False)
for prop in PROPS:
print(f"{prop}: {getattr(dirs, prop)}") # noqa: T201
if __name__ == "__main__":
main()

View File

@ -0,0 +1,249 @@
"""Android."""
from __future__ import annotations
import os
import re
import sys
from functools import lru_cache
from typing import TYPE_CHECKING, cast
from .api import PlatformDirsABC
class Android(PlatformDirsABC):
"""
Follows the guidance `from here <https://android.stackexchange.com/a/216132>`_.
Makes use of the `appname <platformdirs.api.PlatformDirsABC.appname>`, `version
<platformdirs.api.PlatformDirsABC.version>`, `ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
"""
@property
def user_data_dir(self) -> str:
""":return: data directory tied to the user, e.g. ``/data/user/<userid>/<packagename>/files/<AppName>``"""
return self._append_app_name_and_version(cast(str, _android_folder()), "files")
@property
def site_data_dir(self) -> str:
""":return: data directory shared by users, same as `user_data_dir`"""
return self.user_data_dir
@property
def user_config_dir(self) -> str:
"""
:return: config directory tied to the user, e.g. \
``/data/user/<userid>/<packagename>/shared_prefs/<AppName>``
"""
return self._append_app_name_and_version(cast(str, _android_folder()), "shared_prefs")
@property
def site_config_dir(self) -> str:
""":return: config directory shared by the users, same as `user_config_dir`"""
return self.user_config_dir
@property
def user_cache_dir(self) -> str:
""":return: cache directory tied to the user, e.g.,``/data/user/<userid>/<packagename>/cache/<AppName>``"""
return self._append_app_name_and_version(cast(str, _android_folder()), "cache")
@property
def site_cache_dir(self) -> str:
""":return: cache directory shared by users, same as `user_cache_dir`"""
return self.user_cache_dir
@property
def user_state_dir(self) -> str:
""":return: state directory tied to the user, same as `user_data_dir`"""
return self.user_data_dir
@property
def user_log_dir(self) -> str:
"""
:return: log directory tied to the user, same as `user_cache_dir` if not opinionated else ``log`` in it,
e.g. ``/data/user/<userid>/<packagename>/cache/<AppName>/log``
"""
path = self.user_cache_dir
if self.opinion:
path = os.path.join(path, "log") # noqa: PTH118
return path
@property
def user_documents_dir(self) -> str:
""":return: documents directory tied to the user e.g. ``/storage/emulated/0/Documents``"""
return _android_documents_folder()
@property
def user_downloads_dir(self) -> str:
""":return: downloads directory tied to the user e.g. ``/storage/emulated/0/Downloads``"""
return _android_downloads_folder()
@property
def user_pictures_dir(self) -> str:
""":return: pictures directory tied to the user e.g. ``/storage/emulated/0/Pictures``"""
return _android_pictures_folder()
@property
def user_videos_dir(self) -> str:
""":return: videos directory tied to the user e.g. ``/storage/emulated/0/DCIM/Camera``"""
return _android_videos_folder()
@property
def user_music_dir(self) -> str:
""":return: music directory tied to the user e.g. ``/storage/emulated/0/Music``"""
return _android_music_folder()
@property
def user_desktop_dir(self) -> str:
""":return: desktop directory tied to the user e.g. ``/storage/emulated/0/Desktop``"""
return "/storage/emulated/0/Desktop"
@property
def user_runtime_dir(self) -> str:
"""
:return: runtime directory tied to the user, same as `user_cache_dir` if not opinionated else ``tmp`` in it,
e.g. ``/data/user/<userid>/<packagename>/cache/<AppName>/tmp``
"""
path = self.user_cache_dir
if self.opinion:
path = os.path.join(path, "tmp") # noqa: PTH118
return path
@property
def site_runtime_dir(self) -> str:
""":return: runtime directory shared by users, same as `user_runtime_dir`"""
return self.user_runtime_dir
@lru_cache(maxsize=1)
def _android_folder() -> str | None: # noqa: C901, PLR0912
""":return: base folder for the Android OS or None if it cannot be found"""
result: str | None = None
# type checker isn't happy with our "import android", just don't do this when type checking see
# https://stackoverflow.com/a/61394121
if not TYPE_CHECKING:
try:
# First try to get a path to android app using python4android (if available)...
from android import mActivity # noqa: PLC0415
context = cast("android.content.Context", mActivity.getApplicationContext()) # noqa: F821
result = context.getFilesDir().getParentFile().getAbsolutePath()
except Exception: # noqa: BLE001
result = None
if result is None:
try:
# ...and fall back to using plain pyjnius, if python4android isn't available or doesn't deliver any useful
# result...
from jnius import autoclass # noqa: PLC0415
context = autoclass("android.content.Context")
result = context.getFilesDir().getParentFile().getAbsolutePath()
except Exception: # noqa: BLE001
result = None
if result is None:
# and if that fails, too, find an android folder looking at path on the sys.path
# warning: only works for apps installed under /data, not adopted storage etc.
pattern = re.compile(r"/data/(data|user/\d+)/(.+)/files")
for path in sys.path:
if pattern.match(path):
result = path.split("/files")[0]
break
else:
result = None
if result is None:
# one last try: find an android folder looking at path on the sys.path taking adopted storage paths into
# account
pattern = re.compile(r"/mnt/expand/[a-fA-F0-9-]{36}/(data|user/\d+)/(.+)/files")
for path in sys.path:
if pattern.match(path):
result = path.split("/files")[0]
break
else:
result = None
return result
@lru_cache(maxsize=1)
def _android_documents_folder() -> str:
""":return: documents folder for the Android OS"""
# Get directories with pyjnius
try:
from jnius import autoclass # noqa: PLC0415
context = autoclass("android.content.Context")
environment = autoclass("android.os.Environment")
documents_dir: str = context.getExternalFilesDir(environment.DIRECTORY_DOCUMENTS).getAbsolutePath()
except Exception: # noqa: BLE001
documents_dir = "/storage/emulated/0/Documents"
return documents_dir
@lru_cache(maxsize=1)
def _android_downloads_folder() -> str:
""":return: downloads folder for the Android OS"""
# Get directories with pyjnius
try:
from jnius import autoclass # noqa: PLC0415
context = autoclass("android.content.Context")
environment = autoclass("android.os.Environment")
downloads_dir: str = context.getExternalFilesDir(environment.DIRECTORY_DOWNLOADS).getAbsolutePath()
except Exception: # noqa: BLE001
downloads_dir = "/storage/emulated/0/Downloads"
return downloads_dir
@lru_cache(maxsize=1)
def _android_pictures_folder() -> str:
""":return: pictures folder for the Android OS"""
# Get directories with pyjnius
try:
from jnius import autoclass # noqa: PLC0415
context = autoclass("android.content.Context")
environment = autoclass("android.os.Environment")
pictures_dir: str = context.getExternalFilesDir(environment.DIRECTORY_PICTURES).getAbsolutePath()
except Exception: # noqa: BLE001
pictures_dir = "/storage/emulated/0/Pictures"
return pictures_dir
@lru_cache(maxsize=1)
def _android_videos_folder() -> str:
""":return: videos folder for the Android OS"""
# Get directories with pyjnius
try:
from jnius import autoclass # noqa: PLC0415
context = autoclass("android.content.Context")
environment = autoclass("android.os.Environment")
videos_dir: str = context.getExternalFilesDir(environment.DIRECTORY_DCIM).getAbsolutePath()
except Exception: # noqa: BLE001
videos_dir = "/storage/emulated/0/DCIM/Camera"
return videos_dir
@lru_cache(maxsize=1)
def _android_music_folder() -> str:
""":return: music folder for the Android OS"""
# Get directories with pyjnius
try:
from jnius import autoclass # noqa: PLC0415
context = autoclass("android.content.Context")
environment = autoclass("android.os.Environment")
music_dir: str = context.getExternalFilesDir(environment.DIRECTORY_MUSIC).getAbsolutePath()
except Exception: # noqa: BLE001
music_dir = "/storage/emulated/0/Music"
return music_dir
__all__ = [
"Android",
]

View File

@ -0,0 +1,292 @@
"""Base API."""
from __future__ import annotations
import os
from abc import ABC, abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Iterator, Literal
class PlatformDirsABC(ABC): # noqa: PLR0904
"""Abstract base class for platform directories."""
def __init__( # noqa: PLR0913, PLR0917
self,
appname: str | None = None,
appauthor: str | None | Literal[False] = None,
version: str | None = None,
roaming: bool = False, # noqa: FBT001, FBT002
multipath: bool = False, # noqa: FBT001, FBT002
opinion: bool = True, # noqa: FBT001, FBT002
ensure_exists: bool = False, # noqa: FBT001, FBT002
) -> None:
"""
Create a new platform directory.
:param appname: See `appname`.
:param appauthor: See `appauthor`.
:param version: See `version`.
:param roaming: See `roaming`.
:param multipath: See `multipath`.
:param opinion: See `opinion`.
:param ensure_exists: See `ensure_exists`.
"""
self.appname = appname #: The name of application.
self.appauthor = appauthor
"""
The name of the app author or distributing body for this application.
Typically, it is the owning company name. Defaults to `appname`. You may pass ``False`` to disable it.
"""
self.version = version
"""
An optional version path element to append to the path.
You might want to use this if you want multiple versions of your app to be able to run independently. If used,
this would typically be ``<major>.<minor>``.
"""
self.roaming = roaming
"""
Whether to use the roaming appdata directory on Windows.
That means that for users on a Windows network setup for roaming profiles, this user data will be synced on
login (see
`here <https://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>`_).
"""
self.multipath = multipath
"""
An optional parameter which indicates that the entire list of data dirs should be returned.
By default, the first item would only be returned.
"""
self.opinion = opinion #: A flag to indicating to use opinionated values.
self.ensure_exists = ensure_exists
"""
Optionally create the directory (and any missing parents) upon access if it does not exist.
By default, no directories are created.
"""
def _append_app_name_and_version(self, *base: str) -> str:
params = list(base[1:])
if self.appname:
params.append(self.appname)
if self.version:
params.append(self.version)
path = os.path.join(base[0], *params) # noqa: PTH118
self._optionally_create_directory(path)
return path
def _optionally_create_directory(self, path: str) -> None:
if self.ensure_exists:
Path(path).mkdir(parents=True, exist_ok=True)
@property
@abstractmethod
def user_data_dir(self) -> str:
""":return: data directory tied to the user"""
@property
@abstractmethod
def site_data_dir(self) -> str:
""":return: data directory shared by users"""
@property
@abstractmethod
def user_config_dir(self) -> str:
""":return: config directory tied to the user"""
@property
@abstractmethod
def site_config_dir(self) -> str:
""":return: config directory shared by the users"""
@property
@abstractmethod
def user_cache_dir(self) -> str:
""":return: cache directory tied to the user"""
@property
@abstractmethod
def site_cache_dir(self) -> str:
""":return: cache directory shared by users"""
@property
@abstractmethod
def user_state_dir(self) -> str:
""":return: state directory tied to the user"""
@property
@abstractmethod
def user_log_dir(self) -> str:
""":return: log directory tied to the user"""
@property
@abstractmethod
def user_documents_dir(self) -> str:
""":return: documents directory tied to the user"""
@property
@abstractmethod
def user_downloads_dir(self) -> str:
""":return: downloads directory tied to the user"""
@property
@abstractmethod
def user_pictures_dir(self) -> str:
""":return: pictures directory tied to the user"""
@property
@abstractmethod
def user_videos_dir(self) -> str:
""":return: videos directory tied to the user"""
@property
@abstractmethod
def user_music_dir(self) -> str:
""":return: music directory tied to the user"""
@property
@abstractmethod
def user_desktop_dir(self) -> str:
""":return: desktop directory tied to the user"""
@property
@abstractmethod
def user_runtime_dir(self) -> str:
""":return: runtime directory tied to the user"""
@property
@abstractmethod
def site_runtime_dir(self) -> str:
""":return: runtime directory shared by users"""
@property
def user_data_path(self) -> Path:
""":return: data path tied to the user"""
return Path(self.user_data_dir)
@property
def site_data_path(self) -> Path:
""":return: data path shared by users"""
return Path(self.site_data_dir)
@property
def user_config_path(self) -> Path:
""":return: config path tied to the user"""
return Path(self.user_config_dir)
@property
def site_config_path(self) -> Path:
""":return: config path shared by the users"""
return Path(self.site_config_dir)
@property
def user_cache_path(self) -> Path:
""":return: cache path tied to the user"""
return Path(self.user_cache_dir)
@property
def site_cache_path(self) -> Path:
""":return: cache path shared by users"""
return Path(self.site_cache_dir)
@property
def user_state_path(self) -> Path:
""":return: state path tied to the user"""
return Path(self.user_state_dir)
@property
def user_log_path(self) -> Path:
""":return: log path tied to the user"""
return Path(self.user_log_dir)
@property
def user_documents_path(self) -> Path:
""":return: documents a path tied to the user"""
return Path(self.user_documents_dir)
@property
def user_downloads_path(self) -> Path:
""":return: downloads path tied to the user"""
return Path(self.user_downloads_dir)
@property
def user_pictures_path(self) -> Path:
""":return: pictures path tied to the user"""
return Path(self.user_pictures_dir)
@property
def user_videos_path(self) -> Path:
""":return: videos path tied to the user"""
return Path(self.user_videos_dir)
@property
def user_music_path(self) -> Path:
""":return: music path tied to the user"""
return Path(self.user_music_dir)
@property
def user_desktop_path(self) -> Path:
""":return: desktop path tied to the user"""
return Path(self.user_desktop_dir)
@property
def user_runtime_path(self) -> Path:
""":return: runtime path tied to the user"""
return Path(self.user_runtime_dir)
@property
def site_runtime_path(self) -> Path:
""":return: runtime path shared by users"""
return Path(self.site_runtime_dir)
def iter_config_dirs(self) -> Iterator[str]:
""":yield: all user and site configuration directories."""
yield self.user_config_dir
yield self.site_config_dir
def iter_data_dirs(self) -> Iterator[str]:
""":yield: all user and site data directories."""
yield self.user_data_dir
yield self.site_data_dir
def iter_cache_dirs(self) -> Iterator[str]:
""":yield: all user and site cache directories."""
yield self.user_cache_dir
yield self.site_cache_dir
def iter_runtime_dirs(self) -> Iterator[str]:
""":yield: all user and site runtime directories."""
yield self.user_runtime_dir
yield self.site_runtime_dir
def iter_config_paths(self) -> Iterator[Path]:
""":yield: all user and site configuration paths."""
for path in self.iter_config_dirs():
yield Path(path)
def iter_data_paths(self) -> Iterator[Path]:
""":yield: all user and site data paths."""
for path in self.iter_data_dirs():
yield Path(path)
def iter_cache_paths(self) -> Iterator[Path]:
""":yield: all user and site cache paths."""
for path in self.iter_cache_dirs():
yield Path(path)
def iter_runtime_paths(self) -> Iterator[Path]:
""":yield: all user and site runtime paths."""
for path in self.iter_runtime_dirs():
yield Path(path)

View File

@ -0,0 +1,130 @@
"""macOS."""
from __future__ import annotations
import os.path
import sys
from .api import PlatformDirsABC
class MacOS(PlatformDirsABC):
"""
Platform directories for the macOS operating system.
Follows the guidance from
`Apple documentation <https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/MacOSXDirectories/MacOSXDirectories.html>`_.
Makes use of the `appname <platformdirs.api.PlatformDirsABC.appname>`,
`version <platformdirs.api.PlatformDirsABC.version>`,
`ensure_exists <platformdirs.api.PlatformDirsABC.ensure_exists>`.
"""
@property
def user_data_dir(self) -> str:
""":return: data directory tied to the user, e.g. ``~/Library/Application Support/$appname/$version``"""
return self._append_app_name_and_version(os.path.expanduser("~/Library/Application Support")) # noqa: PTH111
@property
def site_data_dir(self) -> str:
"""
:return: data directory shared by users, e.g. ``/Library/Application Support/$appname/$version``.
If we're using a Python binary managed by `Homebrew <https://brew.sh>`_, the directory
will be under the Homebrew prefix, e.g. ``/opt/homebrew/share/$appname/$version``.
If `multipath <platformdirs.api.PlatformDirsABC.multipath>` is enabled, and we're in Homebrew,
the response is a multi-path string separated by ":", e.g.
``/opt/homebrew/share/$appname/$version:/Library/Application Support/$appname/$version``
"""
is_homebrew = sys.prefix.startswith("/opt/homebrew")
path_list = [self._append_app_name_and_version("/opt/homebrew/share")] if is_homebrew else []
path_list.append(self._append_app_name_and_version("/Library/Application Support"))
if self.multipath:
return os.pathsep.join(path_list)
return path_list[0]
@property
def user_config_dir(self) -> str:
""":return: config directory tied to the user, same as `user_data_dir`"""
return self.user_data_dir
@property
def site_config_dir(self) -> str:
""":return: config directory shared by the users, same as `site_data_dir`"""
return self.site_data_dir
@property
def user_cache_dir(self) -> str:
""":return: cache directory tied to the user, e.g. ``~/Library/Caches/$appname/$version``"""
return self._append_app_name_and_version(os.path.expanduser("~/Library/Caches")) # noqa: PTH111
@property
def site_cache_dir(self) -> str:
"""
:return: cache directory shared by users, e.g. ``/Library/Caches/$appname/$version``.
If we're using a Python binary managed by `Homebrew <https://brew.sh>`_, the directory
will be under the Homebrew prefix, e.g. ``/opt/homebrew/var/cache/$appname/$version``.
If `multipath <platformdirs.api.PlatformDirsABC.multipath>` is enabled, and we're in Homebrew,
the response is a multi-path string separated by ":", e.g.
``/opt/homebrew/var/cache/$appname/$version:/Library/Caches/$appname/$version``
"""
is_homebrew = sys.prefix.startswith("/opt/homebrew")
path_list = [self._append_app_name_and_version("/opt/homebrew/var/cache")] if is_homebrew else []
path_list.append(self._append_app_name_and_version("/Library/Caches"))
if self.multipath:
return os.pathsep.join(path_list)
return path_list[0]
@property
def user_state_dir(self) -> str:
""":return: state directory tied to the user, same as `user_data_dir`"""
return self.user_data_dir
@property
def user_log_dir(self) -> str:
""":return: log directory tied to the user, e.g. ``~/Library/Logs/$appname/$version``"""
return self._append_app_name_and_version(os.path.expanduser("~/Library/Logs")) # noqa: PTH111
@property
def user_documents_dir(self) -> str:
""":return: documents directory tied to the user, e.g. ``~/Documents``"""
return os.path.expanduser("~/Documents") # noqa: PTH111
@property
def user_downloads_dir(self) -> str:
""":return: downloads directory tied to the user, e.g. ``~/Downloads``"""
return os.path.expanduser("~/Downloads") # noqa: PTH111
@property
def user_pictures_dir(self) -> str:
""":return: pictures directory tied to the user, e.g. ``~/Pictures``"""
return os.path.expanduser("~/Pictures") # noqa: PTH111
@property
def user_videos_dir(self) -> str:
""":return: videos directory tied to the user, e.g. ``~/Movies``"""
return os.path.expanduser("~/Movies") # noqa: PTH111
@property
def user_music_dir(self) -> str:
""":return: music directory tied to the user, e.g. ``~/Music``"""
return os.path.expanduser("~/Music") # noqa: PTH111
@property
def user_desktop_dir(self) -> str:
""":return: desktop directory tied to the user, e.g. ``~/Desktop``"""
return os.path.expanduser("~/Desktop") # noqa: PTH111
@property
def user_runtime_dir(self) -> str:
""":return: runtime directory tied to the user, e.g. ``~/Library/Caches/TemporaryItems/$appname/$version``"""
return self._append_app_name_and_version(os.path.expanduser("~/Library/Caches/TemporaryItems")) # noqa: PTH111
@property
def site_runtime_dir(self) -> str:
""":return: runtime directory shared by users, same as `user_runtime_dir`"""
return self.user_runtime_dir
__all__ = [
"MacOS",
]

View File

@ -0,0 +1,275 @@
"""Unix."""
from __future__ import annotations
import os
import sys
from configparser import ConfigParser
from pathlib import Path
from typing import Iterator, NoReturn
from .api import PlatformDirsABC
if sys.platform == "win32":
def getuid() -> NoReturn:
msg = "should only be used on Unix"
raise RuntimeError(msg)
else:
from os import getuid
class Unix(PlatformDirsABC): # noqa: PLR0904
"""
On Unix/Linux, we follow the `XDG Basedir Spec <https://specifications.freedesktop.org/basedir-spec/basedir-spec-
latest.html>`_.
The spec allows overriding directories with environment variables. The examples shown are the default values,
alongside the name of the environment variable that overrides them. Makes use of the `appname
<platformdirs.api.PlatformDirsABC.appname>`, `version <platformdirs.api.PlatformDirsABC.version>`, `multipath
<platformdirs.api.PlatformDirsABC.multipath>`, `opinion <platformdirs.api.PlatformDirsABC.opinion>`, `ensure_exists
<platformdirs.api.PlatformDirsABC.ensure_exists>`.
"""
@property
def user_data_dir(self) -> str:
"""
:return: data directory tied to the user, e.g. ``~/.local/share/$appname/$version`` or
``$XDG_DATA_HOME/$appname/$version``
"""
path = os.environ.get("XDG_DATA_HOME", "")
if not path.strip():
path = os.path.expanduser("~/.local/share") # noqa: PTH111
return self._append_app_name_and_version(path)
@property
def _site_data_dirs(self) -> list[str]:
path = os.environ.get("XDG_DATA_DIRS", "")
if not path.strip():
path = f"/usr/local/share{os.pathsep}/usr/share"
return [self._append_app_name_and_version(p) for p in path.split(os.pathsep)]
@property
def site_data_dir(self) -> str:
"""
:return: data directories shared by users (if `multipath <platformdirs.api.PlatformDirsABC.multipath>` is
enabled and ``XDG_DATA_DIRS`` is set and a multi path the response is also a multi path separated by the
OS path separator), e.g. ``/usr/local/share/$appname/$version`` or ``/usr/share/$appname/$version``
"""
# XDG default for $XDG_DATA_DIRS; only first, if multipath is False
dirs = self._site_data_dirs
if not self.multipath:
return dirs[0]
return os.pathsep.join(dirs)
@property
def user_config_dir(self) -> str:
"""
:return: config directory tied to the user, e.g. ``~/.config/$appname/$version`` or
``$XDG_CONFIG_HOME/$appname/$version``
"""
path = os.environ.get("XDG_CONFIG_HOME", "")
if not path.strip():
path = os.path.expanduser("~/.config") # noqa: PTH111
return self._append_app_name_and_version(path)
@property
def _site_config_dirs(self) -> list[str]:
path = os.environ.get("XDG_CONFIG_DIRS", "")
if not path.strip():
path = "/etc/xdg"
return [self._append_app_name_and_version(p) for p in path.split(os.pathsep)]
@property
def site_config_dir(self) -> str:
"""
:return: config directories shared by users (if `multipath <platformdirs.api.PlatformDirsABC.multipath>`
is enabled and ``XDG_CONFIG_DIRS`` is set and a multi path the response is also a multi path separated by
the OS path separator), e.g. ``/etc/xdg/$appname/$version``
"""
# XDG default for $XDG_CONFIG_DIRS only first, if multipath is False
dirs = self._site_config_dirs
if not self.multipath:
return dirs[0]
return os.pathsep.join(dirs)
@property
def user_cache_dir(self) -> str:
"""
:return: cache directory tied to the user, e.g. ``~/.cache/$appname/$version`` or
``~/$XDG_CACHE_HOME/$appname/$version``
"""
path = os.environ.get("XDG_CACHE_HOME", "")
if not path.strip():
path = os.path.expanduser("~/.cache") # noqa: PTH111
return self._append_app_name_and_version(path)
@property
def site_cache_dir(self) -> str:
""":return: cache directory shared by users, e.g. ``/var/cache/$appname/$version``"""
return self._append_app_name_and_version("/var/cache")
@property
def user_state_dir(self) -> str:
"""
:return: state directory tied to the user, e.g. ``~/.local/state/$appname/$version`` or
``$XDG_STATE_HOME/$appname/$version``
"""
path = os.environ.get("XDG_STATE_HOME", "")
if not path.strip():
path = os.path.expanduser("~/.local/state") # noqa: PTH111
return self._append_app_name_and_version(path)
@property
def user_log_dir(self) -> str:
""":return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it"""
path = self.user_state_dir
if self.opinion:
path = os.path.join(path, "log") # noqa: PTH118
self._optionally_create_directory(path)
return path
@property
def user_documents_dir(self) -> str:
""":return: documents directory tied to the user, e.g. ``~/Documents``"""
return _get_user_media_dir("XDG_DOCUMENTS_DIR", "~/Documents")
@property
def user_downloads_dir(self) -> str:
""":return: downloads directory tied to the user, e.g. ``~/Downloads``"""
return _get_user_media_dir("XDG_DOWNLOAD_DIR", "~/Downloads")
@property
def user_pictures_dir(self) -> str:
""":return: pictures directory tied to the user, e.g. ``~/Pictures``"""
return _get_user_media_dir("XDG_PICTURES_DIR", "~/Pictures")
@property
def user_videos_dir(self) -> str:
""":return: videos directory tied to the user, e.g. ``~/Videos``"""
return _get_user_media_dir("XDG_VIDEOS_DIR", "~/Videos")
@property
def user_music_dir(self) -> str:
""":return: music directory tied to the user, e.g. ``~/Music``"""
return _get_user_media_dir("XDG_MUSIC_DIR", "~/Music")
@property
def user_desktop_dir(self) -> str:
""":return: desktop directory tied to the user, e.g. ``~/Desktop``"""
return _get_user_media_dir("XDG_DESKTOP_DIR", "~/Desktop")
@property
def user_runtime_dir(self) -> str:
"""
:return: runtime directory tied to the user, e.g. ``/run/user/$(id -u)/$appname/$version`` or
``$XDG_RUNTIME_DIR/$appname/$version``.
For FreeBSD/OpenBSD/NetBSD, it would return ``/var/run/user/$(id -u)/$appname/$version`` if
exists, otherwise ``/tmp/runtime-$(id -u)/$appname/$version``, if``$XDG_RUNTIME_DIR``
is not set.
"""
path = os.environ.get("XDG_RUNTIME_DIR", "")
if not path.strip():
if sys.platform.startswith(("freebsd", "openbsd", "netbsd")):
path = f"/var/run/user/{getuid()}"
if not Path(path).exists():
path = f"/tmp/runtime-{getuid()}" # noqa: S108
else:
path = f"/run/user/{getuid()}"
return self._append_app_name_and_version(path)
@property
def site_runtime_dir(self) -> str:
"""
:return: runtime directory shared by users, e.g. ``/run/$appname/$version`` or \
``$XDG_RUNTIME_DIR/$appname/$version``.
Note that this behaves almost exactly like `user_runtime_dir` if ``$XDG_RUNTIME_DIR`` is set, but will
fall back to paths associated to the root user instead of a regular logged-in user if it's not set.
If you wish to ensure that a logged-in root user path is returned e.g. ``/run/user/0``, use `user_runtime_dir`
instead.
For FreeBSD/OpenBSD/NetBSD, it would return ``/var/run/$appname/$version`` if ``$XDG_RUNTIME_DIR`` is not set.
"""
path = os.environ.get("XDG_RUNTIME_DIR", "")
if not path.strip():
if sys.platform.startswith(("freebsd", "openbsd", "netbsd")):
path = "/var/run"
else:
path = "/run"
return self._append_app_name_and_version(path)
@property
def site_data_path(self) -> Path:
""":return: data path shared by users. Only return the first item, even if ``multipath`` is set to ``True``"""
return self._first_item_as_path_if_multipath(self.site_data_dir)
@property
def site_config_path(self) -> Path:
""":return: config path shared by the users, returns the first item, even if ``multipath`` is set to ``True``"""
return self._first_item_as_path_if_multipath(self.site_config_dir)
@property
def site_cache_path(self) -> Path:
""":return: cache path shared by users. Only return the first item, even if ``multipath`` is set to ``True``"""
return self._first_item_as_path_if_multipath(self.site_cache_dir)
def _first_item_as_path_if_multipath(self, directory: str) -> Path:
if self.multipath:
# If multipath is True, the first path is returned.
directory = directory.split(os.pathsep)[0]
return Path(directory)
def iter_config_dirs(self) -> Iterator[str]:
""":yield: all user and site configuration directories."""
yield self.user_config_dir
yield from self._site_config_dirs
def iter_data_dirs(self) -> Iterator[str]:
""":yield: all user and site data directories."""
yield self.user_data_dir
yield from self._site_data_dirs
def _get_user_media_dir(env_var: str, fallback_tilde_path: str) -> str:
media_dir = _get_user_dirs_folder(env_var)
if media_dir is None:
media_dir = os.environ.get(env_var, "").strip()
if not media_dir:
media_dir = os.path.expanduser(fallback_tilde_path) # noqa: PTH111
return media_dir
def _get_user_dirs_folder(key: str) -> str | None:
"""
Return directory from user-dirs.dirs config file.
See https://freedesktop.org/wiki/Software/xdg-user-dirs/.
"""
user_dirs_config_path = Path(Unix().user_config_dir) / "user-dirs.dirs"
if user_dirs_config_path.exists():
parser = ConfigParser()
with user_dirs_config_path.open() as stream:
# Add fake section header, so ConfigParser doesn't complain
parser.read_string(f"[top]\n{stream.read()}")
if key not in parser["top"]:
return None
path = parser["top"][key].strip('"')
# Handle relative home paths
return path.replace("$HOME", os.path.expanduser("~")) # noqa: PTH111
return None
__all__ = [
"Unix",
]

View File

@ -0,0 +1,16 @@
# file generated by setuptools_scm
# don't change, don't track in version control
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple, Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object
version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
__version__ = version = '4.2.2'
__version_tuple__ = version_tuple = (4, 2, 2)

View File

@ -0,0 +1,272 @@
"""Windows."""
from __future__ import annotations
import os
import sys
from functools import lru_cache
from typing import TYPE_CHECKING
from .api import PlatformDirsABC
if TYPE_CHECKING:
from collections.abc import Callable
class Windows(PlatformDirsABC):
"""
`MSDN on where to store app data files <https://learn.microsoft.com/en-us/windows/win32/shell/knownfolderid>`_.
Makes use of the `appname <platformdirs.api.PlatformDirsABC.appname>`, `appauthor
<platformdirs.api.PlatformDirsABC.appauthor>`, `version <platformdirs.api.PlatformDirsABC.version>`, `roaming
<platformdirs.api.PlatformDirsABC.roaming>`, `opinion <platformdirs.api.PlatformDirsABC.opinion>`, `ensure_exists
<platformdirs.api.PlatformDirsABC.ensure_exists>`.
"""
@property
def user_data_dir(self) -> str:
"""
:return: data directory tied to the user, e.g.
``%USERPROFILE%\\AppData\\Local\\$appauthor\\$appname`` (not roaming) or
``%USERPROFILE%\\AppData\\Roaming\\$appauthor\\$appname`` (roaming)
"""
const = "CSIDL_APPDATA" if self.roaming else "CSIDL_LOCAL_APPDATA"
path = os.path.normpath(get_win_folder(const))
return self._append_parts(path)
def _append_parts(self, path: str, *, opinion_value: str | None = None) -> str:
params = []
if self.appname:
if self.appauthor is not False:
author = self.appauthor or self.appname
params.append(author)
params.append(self.appname)
if opinion_value is not None and self.opinion:
params.append(opinion_value)
if self.version:
params.append(self.version)
path = os.path.join(path, *params) # noqa: PTH118
self._optionally_create_directory(path)
return path
@property
def site_data_dir(self) -> str:
""":return: data directory shared by users, e.g. ``C:\\ProgramData\\$appauthor\\$appname``"""
path = os.path.normpath(get_win_folder("CSIDL_COMMON_APPDATA"))
return self._append_parts(path)
@property
def user_config_dir(self) -> str:
""":return: config directory tied to the user, same as `user_data_dir`"""
return self.user_data_dir
@property
def site_config_dir(self) -> str:
""":return: config directory shared by the users, same as `site_data_dir`"""
return self.site_data_dir
@property
def user_cache_dir(self) -> str:
"""
:return: cache directory tied to the user (if opinionated with ``Cache`` folder within ``$appname``) e.g.
``%USERPROFILE%\\AppData\\Local\\$appauthor\\$appname\\Cache\\$version``
"""
path = os.path.normpath(get_win_folder("CSIDL_LOCAL_APPDATA"))
return self._append_parts(path, opinion_value="Cache")
@property
def site_cache_dir(self) -> str:
""":return: cache directory shared by users, e.g. ``C:\\ProgramData\\$appauthor\\$appname\\Cache\\$version``"""
path = os.path.normpath(get_win_folder("CSIDL_COMMON_APPDATA"))
return self._append_parts(path, opinion_value="Cache")
@property
def user_state_dir(self) -> str:
""":return: state directory tied to the user, same as `user_data_dir`"""
return self.user_data_dir
@property
def user_log_dir(self) -> str:
""":return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``Logs`` in it"""
path = self.user_data_dir
if self.opinion:
path = os.path.join(path, "Logs") # noqa: PTH118
self._optionally_create_directory(path)
return path
@property
def user_documents_dir(self) -> str:
""":return: documents directory tied to the user e.g. ``%USERPROFILE%\\Documents``"""
return os.path.normpath(get_win_folder("CSIDL_PERSONAL"))
@property
def user_downloads_dir(self) -> str:
""":return: downloads directory tied to the user e.g. ``%USERPROFILE%\\Downloads``"""
return os.path.normpath(get_win_folder("CSIDL_DOWNLOADS"))
@property
def user_pictures_dir(self) -> str:
""":return: pictures directory tied to the user e.g. ``%USERPROFILE%\\Pictures``"""
return os.path.normpath(get_win_folder("CSIDL_MYPICTURES"))
@property
def user_videos_dir(self) -> str:
""":return: videos directory tied to the user e.g. ``%USERPROFILE%\\Videos``"""
return os.path.normpath(get_win_folder("CSIDL_MYVIDEO"))
@property
def user_music_dir(self) -> str:
""":return: music directory tied to the user e.g. ``%USERPROFILE%\\Music``"""
return os.path.normpath(get_win_folder("CSIDL_MYMUSIC"))
@property
def user_desktop_dir(self) -> str:
""":return: desktop directory tied to the user, e.g. ``%USERPROFILE%\\Desktop``"""
return os.path.normpath(get_win_folder("CSIDL_DESKTOPDIRECTORY"))
@property
def user_runtime_dir(self) -> str:
"""
:return: runtime directory tied to the user, e.g.
``%USERPROFILE%\\AppData\\Local\\Temp\\$appauthor\\$appname``
"""
path = os.path.normpath(os.path.join(get_win_folder("CSIDL_LOCAL_APPDATA"), "Temp")) # noqa: PTH118
return self._append_parts(path)
@property
def site_runtime_dir(self) -> str:
""":return: runtime directory shared by users, same as `user_runtime_dir`"""
return self.user_runtime_dir
def get_win_folder_from_env_vars(csidl_name: str) -> str:
"""Get folder from environment variables."""
result = get_win_folder_if_csidl_name_not_env_var(csidl_name)
if result is not None:
return result
env_var_name = {
"CSIDL_APPDATA": "APPDATA",
"CSIDL_COMMON_APPDATA": "ALLUSERSPROFILE",
"CSIDL_LOCAL_APPDATA": "LOCALAPPDATA",
}.get(csidl_name)
if env_var_name is None:
msg = f"Unknown CSIDL name: {csidl_name}"
raise ValueError(msg)
result = os.environ.get(env_var_name)
if result is None:
msg = f"Unset environment variable: {env_var_name}"
raise ValueError(msg)
return result
def get_win_folder_if_csidl_name_not_env_var(csidl_name: str) -> str | None:
"""Get a folder for a CSIDL name that does not exist as an environment variable."""
if csidl_name == "CSIDL_PERSONAL":
return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Documents") # noqa: PTH118
if csidl_name == "CSIDL_DOWNLOADS":
return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Downloads") # noqa: PTH118
if csidl_name == "CSIDL_MYPICTURES":
return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Pictures") # noqa: PTH118
if csidl_name == "CSIDL_MYVIDEO":
return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Videos") # noqa: PTH118
if csidl_name == "CSIDL_MYMUSIC":
return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Music") # noqa: PTH118
return None
def get_win_folder_from_registry(csidl_name: str) -> str:
"""
Get folder from the registry.
This is a fallback technique at best. I'm not sure if using the registry for these guarantees us the correct answer
for all CSIDL_* names.
"""
shell_folder_name = {
"CSIDL_APPDATA": "AppData",
"CSIDL_COMMON_APPDATA": "Common AppData",
"CSIDL_LOCAL_APPDATA": "Local AppData",
"CSIDL_PERSONAL": "Personal",
"CSIDL_DOWNLOADS": "{374DE290-123F-4565-9164-39C4925E467B}",
"CSIDL_MYPICTURES": "My Pictures",
"CSIDL_MYVIDEO": "My Video",
"CSIDL_MYMUSIC": "My Music",
}.get(csidl_name)
if shell_folder_name is None:
msg = f"Unknown CSIDL name: {csidl_name}"
raise ValueError(msg)
if sys.platform != "win32": # only needed for mypy type checker to know that this code runs only on Windows
raise NotImplementedError
import winreg # noqa: PLC0415
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders")
directory, _ = winreg.QueryValueEx(key, shell_folder_name)
return str(directory)
def get_win_folder_via_ctypes(csidl_name: str) -> str:
"""Get folder with ctypes."""
# There is no 'CSIDL_DOWNLOADS'.
# Use 'CSIDL_PROFILE' (40) and append the default folder 'Downloads' instead.
# https://learn.microsoft.com/en-us/windows/win32/shell/knownfolderid
import ctypes # noqa: PLC0415
csidl_const = {
"CSIDL_APPDATA": 26,
"CSIDL_COMMON_APPDATA": 35,
"CSIDL_LOCAL_APPDATA": 28,
"CSIDL_PERSONAL": 5,
"CSIDL_MYPICTURES": 39,
"CSIDL_MYVIDEO": 14,
"CSIDL_MYMUSIC": 13,
"CSIDL_DOWNLOADS": 40,
"CSIDL_DESKTOPDIRECTORY": 16,
}.get(csidl_name)
if csidl_const is None:
msg = f"Unknown CSIDL name: {csidl_name}"
raise ValueError(msg)
buf = ctypes.create_unicode_buffer(1024)
windll = getattr(ctypes, "windll") # noqa: B009 # using getattr to avoid false positive with mypy type checker
windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
# Downgrade to short path name if it has high-bit chars.
if any(ord(c) > 255 for c in buf): # noqa: PLR2004
buf2 = ctypes.create_unicode_buffer(1024)
if windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
buf = buf2
if csidl_name == "CSIDL_DOWNLOADS":
return os.path.join(buf.value, "Downloads") # noqa: PTH118
return buf.value
def _pick_get_win_folder() -> Callable[[str], str]:
try:
import ctypes # noqa: PLC0415
except ImportError:
pass
else:
if hasattr(ctypes, "windll"):
return get_win_folder_via_ctypes
try:
import winreg # noqa: PLC0415, F401
except ImportError:
return get_win_folder_from_env_vars
else:
return get_win_folder_from_registry
get_win_folder = lru_cache(maxsize=None)(_pick_get_win_folder())
__all__ = [
"Windows",
]

View File

@ -0,0 +1,82 @@
"""
Pygments
~~~~~~~~
Pygments is a syntax highlighting package written in Python.
It is a generic syntax highlighter for general use in all kinds of software
such as forum systems, wikis or other applications that need to prettify
source code. Highlights are:
* a wide range of common languages and markup formats is supported
* special attention is paid to details, increasing quality by a fair amount
* support for new languages and formats are added easily
* a number of output formats, presently HTML, LaTeX, RTF, SVG, all image
formats that PIL supports, and ANSI sequences
* it is usable as a command-line tool and as a library
* ... and it highlights even Brainfuck!
The `Pygments master branch`_ is installable with ``easy_install Pygments==dev``.
.. _Pygments master branch:
https://github.com/pygments/pygments/archive/master.zip#egg=Pygments-dev
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from io import StringIO, BytesIO
__version__ = '2.18.0'
__docformat__ = 'restructuredtext'
__all__ = ['lex', 'format', 'highlight']
def lex(code, lexer):
"""
Lex `code` with the `lexer` (must be a `Lexer` instance)
and return an iterable of tokens. Currently, this only calls
`lexer.get_tokens()`.
"""
try:
return lexer.get_tokens(code)
except TypeError:
# Heuristic to catch a common mistake.
from pip._vendor.pygments.lexer import RegexLexer
if isinstance(lexer, type) and issubclass(lexer, RegexLexer):
raise TypeError('lex() argument must be a lexer instance, '
'not a class')
raise
def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builtin
"""
Format ``tokens`` (an iterable of tokens) with the formatter ``formatter``
(a `Formatter` instance).
If ``outfile`` is given and a valid file object (an object with a
``write`` method), the result will be written to it, otherwise it
is returned as a string.
"""
try:
if not outfile:
realoutfile = getattr(formatter, 'encoding', None) and BytesIO() or StringIO()
formatter.format(tokens, realoutfile)
return realoutfile.getvalue()
else:
formatter.format(tokens, outfile)
except TypeError:
# Heuristic to catch a common mistake.
from pip._vendor.pygments.formatter import Formatter
if isinstance(formatter, type) and issubclass(formatter, Formatter):
raise TypeError('format() argument must be a formatter instance, '
'not a class')
raise
def highlight(code, lexer, formatter, outfile=None):
"""
This is the most high-level highlighting function. It combines `lex` and
`format` in one function.
"""
return format(lex(code, lexer), formatter, outfile)

View File

@ -0,0 +1,17 @@
"""
pygments.__main__
~~~~~~~~~~~~~~~~~
Main entry point for ``python -m pygments``.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import sys
from pip._vendor.pygments.cmdline import main
try:
sys.exit(main(sys.argv))
except KeyboardInterrupt:
sys.exit(1)

View File

@ -0,0 +1,668 @@
"""
pygments.cmdline
~~~~~~~~~~~~~~~~
Command line interface.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import os
import sys
import shutil
import argparse
from textwrap import dedent
from pip._vendor.pygments import __version__, highlight
from pip._vendor.pygments.util import ClassNotFound, OptionError, docstring_headline, \
guess_decode, guess_decode_from_terminal, terminal_encoding, \
UnclosingTextIOWrapper
from pip._vendor.pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
from pip._vendor.pygments.lexers.special import TextLexer
from pip._vendor.pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
from pip._vendor.pygments.formatters import get_all_formatters, get_formatter_by_name, \
load_formatter_from_file, get_formatter_for_filename, find_formatter_class
from pip._vendor.pygments.formatters.terminal import TerminalFormatter
from pip._vendor.pygments.formatters.terminal256 import Terminal256Formatter, TerminalTrueColorFormatter
from pip._vendor.pygments.filters import get_all_filters, find_filter_class
from pip._vendor.pygments.styles import get_all_styles, get_style_by_name
def _parse_options(o_strs):
opts = {}
if not o_strs:
return opts
for o_str in o_strs:
if not o_str.strip():
continue
o_args = o_str.split(',')
for o_arg in o_args:
o_arg = o_arg.strip()
try:
o_key, o_val = o_arg.split('=', 1)
o_key = o_key.strip()
o_val = o_val.strip()
except ValueError:
opts[o_arg] = True
else:
opts[o_key] = o_val
return opts
def _parse_filters(f_strs):
filters = []
if not f_strs:
return filters
for f_str in f_strs:
if ':' in f_str:
fname, fopts = f_str.split(':', 1)
filters.append((fname, _parse_options([fopts])))
else:
filters.append((f_str, {}))
return filters
def _print_help(what, name):
try:
if what == 'lexer':
cls = get_lexer_by_name(name)
print(f"Help on the {cls.name} lexer:")
print(dedent(cls.__doc__))
elif what == 'formatter':
cls = find_formatter_class(name)
print(f"Help on the {cls.name} formatter:")
print(dedent(cls.__doc__))
elif what == 'filter':
cls = find_filter_class(name)
print(f"Help on the {name} filter:")
print(dedent(cls.__doc__))
return 0
except (AttributeError, ValueError):
print(f"{what} not found!", file=sys.stderr)
return 1
def _print_list(what):
if what == 'lexer':
print()
print("Lexers:")
print("~~~~~~~")
info = []
for fullname, names, exts, _ in get_all_lexers():
tup = (', '.join(names)+':', fullname,
exts and '(filenames ' + ', '.join(exts) + ')' or '')
info.append(tup)
info.sort()
for i in info:
print(('* {}\n {} {}').format(*i))
elif what == 'formatter':
print()
print("Formatters:")
print("~~~~~~~~~~~")
info = []
for cls in get_all_formatters():
doc = docstring_headline(cls)
tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
'(filenames ' + ', '.join(cls.filenames) + ')' or '')
info.append(tup)
info.sort()
for i in info:
print(('* {}\n {} {}').format(*i))
elif what == 'filter':
print()
print("Filters:")
print("~~~~~~~~")
for name in get_all_filters():
cls = find_filter_class(name)
print("* " + name + ':')
print(f" {docstring_headline(cls)}")
elif what == 'style':
print()
print("Styles:")
print("~~~~~~~")
for name in get_all_styles():
cls = get_style_by_name(name)
print("* " + name + ':')
print(f" {docstring_headline(cls)}")
def _print_list_as_json(requested_items):
import json
result = {}
if 'lexer' in requested_items:
info = {}
for fullname, names, filenames, mimetypes in get_all_lexers():
info[fullname] = {
'aliases': names,
'filenames': filenames,
'mimetypes': mimetypes
}
result['lexers'] = info
if 'formatter' in requested_items:
info = {}
for cls in get_all_formatters():
doc = docstring_headline(cls)
info[cls.name] = {
'aliases': cls.aliases,
'filenames': cls.filenames,
'doc': doc
}
result['formatters'] = info
if 'filter' in requested_items:
info = {}
for name in get_all_filters():
cls = find_filter_class(name)
info[name] = {
'doc': docstring_headline(cls)
}
result['filters'] = info
if 'style' in requested_items:
info = {}
for name in get_all_styles():
cls = get_style_by_name(name)
info[name] = {
'doc': docstring_headline(cls)
}
result['styles'] = info
json.dump(result, sys.stdout)
def main_inner(parser, argns):
if argns.help:
parser.print_help()
return 0
if argns.V:
print(f'Pygments version {__version__}, (c) 2006-2024 by Georg Brandl, Matthäus '
'Chajdas and contributors.')
return 0
def is_only_option(opt):
return not any(v for (k, v) in vars(argns).items() if k != opt)
# handle ``pygmentize -L``
if argns.L is not None:
arg_set = set()
for k, v in vars(argns).items():
if v:
arg_set.add(k)
arg_set.discard('L')
arg_set.discard('json')
if arg_set:
parser.print_help(sys.stderr)
return 2
# print version
if not argns.json:
main(['', '-V'])
allowed_types = {'lexer', 'formatter', 'filter', 'style'}
largs = [arg.rstrip('s') for arg in argns.L]
if any(arg not in allowed_types for arg in largs):
parser.print_help(sys.stderr)
return 0
if not largs:
largs = allowed_types
if not argns.json:
for arg in largs:
_print_list(arg)
else:
_print_list_as_json(largs)
return 0
# handle ``pygmentize -H``
if argns.H:
if not is_only_option('H'):
parser.print_help(sys.stderr)
return 2
what, name = argns.H
if what not in ('lexer', 'formatter', 'filter'):
parser.print_help(sys.stderr)
return 2
return _print_help(what, name)
# parse -O options
parsed_opts = _parse_options(argns.O or [])
# parse -P options
for p_opt in argns.P or []:
try:
name, value = p_opt.split('=', 1)
except ValueError:
parsed_opts[p_opt] = True
else:
parsed_opts[name] = value
# encodings
inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
# handle ``pygmentize -N``
if argns.N:
lexer = find_lexer_class_for_filename(argns.N)
if lexer is None:
lexer = TextLexer
print(lexer.aliases[0])
return 0
# handle ``pygmentize -C``
if argns.C:
inp = sys.stdin.buffer.read()
try:
lexer = guess_lexer(inp, inencoding=inencoding)
except ClassNotFound:
lexer = TextLexer
print(lexer.aliases[0])
return 0
# handle ``pygmentize -S``
S_opt = argns.S
a_opt = argns.a
if S_opt is not None:
f_opt = argns.f
if not f_opt:
parser.print_help(sys.stderr)
return 2
if argns.l or argns.INPUTFILE:
parser.print_help(sys.stderr)
return 2
try:
parsed_opts['style'] = S_opt
fmter = get_formatter_by_name(f_opt, **parsed_opts)
except ClassNotFound as err:
print(err, file=sys.stderr)
return 1
print(fmter.get_style_defs(a_opt or ''))
return 0
# if no -S is given, -a is not allowed
if argns.a is not None:
parser.print_help(sys.stderr)
return 2
# parse -F options
F_opts = _parse_filters(argns.F or [])
# -x: allow custom (eXternal) lexers and formatters
allow_custom_lexer_formatter = bool(argns.x)
# select lexer
lexer = None
# given by name?
lexername = argns.l
if lexername:
# custom lexer, located relative to user's cwd
if allow_custom_lexer_formatter and '.py' in lexername:
try:
filename = None
name = None
if ':' in lexername:
filename, name = lexername.rsplit(':', 1)
if '.py' in name:
# This can happen on Windows: If the lexername is
# C:\lexer.py -- return to normal load path in that case
name = None
if filename and name:
lexer = load_lexer_from_file(filename, name,
**parsed_opts)
else:
lexer = load_lexer_from_file(lexername, **parsed_opts)
except ClassNotFound as err:
print('Error:', err, file=sys.stderr)
return 1
else:
try:
lexer = get_lexer_by_name(lexername, **parsed_opts)
except (OptionError, ClassNotFound) as err:
print('Error:', err, file=sys.stderr)
return 1
# read input code
code = None
if argns.INPUTFILE:
if argns.s:
print('Error: -s option not usable when input file specified',
file=sys.stderr)
return 2
infn = argns.INPUTFILE
try:
with open(infn, 'rb') as infp:
code = infp.read()
except Exception as err:
print('Error: cannot read infile:', err, file=sys.stderr)
return 1
if not inencoding:
code, inencoding = guess_decode(code)
# do we have to guess the lexer?
if not lexer:
try:
lexer = get_lexer_for_filename(infn, code, **parsed_opts)
except ClassNotFound as err:
if argns.g:
try:
lexer = guess_lexer(code, **parsed_opts)
except ClassNotFound:
lexer = TextLexer(**parsed_opts)
else:
print('Error:', err, file=sys.stderr)
return 1
except OptionError as err:
print('Error:', err, file=sys.stderr)
return 1
elif not argns.s: # treat stdin as full file (-s support is later)
# read code from terminal, always in binary mode since we want to
# decode ourselves and be tolerant with it
code = sys.stdin.buffer.read() # use .buffer to get a binary stream
if not inencoding:
code, inencoding = guess_decode_from_terminal(code, sys.stdin)
# else the lexer will do the decoding
if not lexer:
try:
lexer = guess_lexer(code, **parsed_opts)
except ClassNotFound:
lexer = TextLexer(**parsed_opts)
else: # -s option needs a lexer with -l
if not lexer:
print('Error: when using -s a lexer has to be selected with -l',
file=sys.stderr)
return 2
# process filters
for fname, fopts in F_opts:
try:
lexer.add_filter(fname, **fopts)
except ClassNotFound as err:
print('Error:', err, file=sys.stderr)
return 1
# select formatter
outfn = argns.o
fmter = argns.f
if fmter:
# custom formatter, located relative to user's cwd
if allow_custom_lexer_formatter and '.py' in fmter:
try:
filename = None
name = None
if ':' in fmter:
# Same logic as above for custom lexer
filename, name = fmter.rsplit(':', 1)
if '.py' in name:
name = None
if filename and name:
fmter = load_formatter_from_file(filename, name,
**parsed_opts)
else:
fmter = load_formatter_from_file(fmter, **parsed_opts)
except ClassNotFound as err:
print('Error:', err, file=sys.stderr)
return 1
else:
try:
fmter = get_formatter_by_name(fmter, **parsed_opts)
except (OptionError, ClassNotFound) as err:
print('Error:', err, file=sys.stderr)
return 1
if outfn:
if not fmter:
try:
fmter = get_formatter_for_filename(outfn, **parsed_opts)
except (OptionError, ClassNotFound) as err:
print('Error:', err, file=sys.stderr)
return 1
try:
outfile = open(outfn, 'wb')
except Exception as err:
print('Error: cannot open outfile:', err, file=sys.stderr)
return 1
else:
if not fmter:
if os.environ.get('COLORTERM','') in ('truecolor', '24bit'):
fmter = TerminalTrueColorFormatter(**parsed_opts)
elif '256' in os.environ.get('TERM', ''):
fmter = Terminal256Formatter(**parsed_opts)
else:
fmter = TerminalFormatter(**parsed_opts)
outfile = sys.stdout.buffer
# determine output encoding if not explicitly selected
if not outencoding:
if outfn:
# output file? use lexer encoding for now (can still be None)
fmter.encoding = inencoding
else:
# else use terminal encoding
fmter.encoding = terminal_encoding(sys.stdout)
# provide coloring under Windows, if possible
if not outfn and sys.platform in ('win32', 'cygwin') and \
fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover
# unfortunately colorama doesn't support binary streams on Py3
outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
fmter.encoding = None
try:
import colorama.initialise
except ImportError:
pass
else:
outfile = colorama.initialise.wrap_stream(
outfile, convert=None, strip=None, autoreset=False, wrap=True)
# When using the LaTeX formatter and the option `escapeinside` is
# specified, we need a special lexer which collects escaped text
# before running the chosen language lexer.
escapeinside = parsed_opts.get('escapeinside', '')
if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
left = escapeinside[0]
right = escapeinside[1]
lexer = LatexEmbeddedLexer(left, right, lexer)
# ... and do it!
if not argns.s:
# process whole input as per normal...
try:
highlight(code, lexer, fmter, outfile)
finally:
if outfn:
outfile.close()
return 0
else:
# line by line processing of stdin (eg: for 'tail -f')...
try:
while 1:
line = sys.stdin.buffer.readline()
if not line:
break
if not inencoding:
line = guess_decode_from_terminal(line, sys.stdin)[0]
highlight(line, lexer, fmter, outfile)
if hasattr(outfile, 'flush'):
outfile.flush()
return 0
except KeyboardInterrupt: # pragma: no cover
return 0
finally:
if outfn:
outfile.close()
class HelpFormatter(argparse.HelpFormatter):
def __init__(self, prog, indent_increment=2, max_help_position=16, width=None):
if width is None:
try:
width = shutil.get_terminal_size().columns - 2
except Exception:
pass
argparse.HelpFormatter.__init__(self, prog, indent_increment,
max_help_position, width)
def main(args=sys.argv):
"""
Main command line entry point.
"""
desc = "Highlight an input file and write the result to an output file."
parser = argparse.ArgumentParser(description=desc, add_help=False,
formatter_class=HelpFormatter)
operation = parser.add_argument_group('Main operation')
lexersel = operation.add_mutually_exclusive_group()
lexersel.add_argument(
'-l', metavar='LEXER',
help='Specify the lexer to use. (Query names with -L.) If not '
'given and -g is not present, the lexer is guessed from the filename.')
lexersel.add_argument(
'-g', action='store_true',
help='Guess the lexer from the file contents, or pass through '
'as plain text if nothing can be guessed.')
operation.add_argument(
'-F', metavar='FILTER[:options]', action='append',
help='Add a filter to the token stream. (Query names with -L.) '
'Filter options are given after a colon if necessary.')
operation.add_argument(
'-f', metavar='FORMATTER',
help='Specify the formatter to use. (Query names with -L.) '
'If not given, the formatter is guessed from the output filename, '
'and defaults to the terminal formatter if the output is to the '
'terminal or an unknown file extension.')
operation.add_argument(
'-O', metavar='OPTION=value[,OPTION=value,...]', action='append',
help='Give options to the lexer and formatter as a comma-separated '
'list of key-value pairs. '
'Example: `-O bg=light,python=cool`.')
operation.add_argument(
'-P', metavar='OPTION=value', action='append',
help='Give a single option to the lexer and formatter - with this '
'you can pass options whose value contains commas and equal signs. '
'Example: `-P "heading=Pygments, the Python highlighter"`.')
operation.add_argument(
'-o', metavar='OUTPUTFILE',
help='Where to write the output. Defaults to standard output.')
operation.add_argument(
'INPUTFILE', nargs='?',
help='Where to read the input. Defaults to standard input.')
flags = parser.add_argument_group('Operation flags')
flags.add_argument(
'-v', action='store_true',
help='Print a detailed traceback on unhandled exceptions, which '
'is useful for debugging and bug reports.')
flags.add_argument(
'-s', action='store_true',
help='Process lines one at a time until EOF, rather than waiting to '
'process the entire file. This only works for stdin, only for lexers '
'with no line-spanning constructs, and is intended for streaming '
'input such as you get from `tail -f`. '
'Example usage: `tail -f sql.log | pygmentize -s -l sql`.')
flags.add_argument(
'-x', action='store_true',
help='Allow custom lexers and formatters to be loaded from a .py file '
'relative to the current working directory. For example, '
'`-l ./customlexer.py -x`. By default, this option expects a file '
'with a class named CustomLexer or CustomFormatter; you can also '
'specify your own class name with a colon (`-l ./lexer.py:MyLexer`). '
'Users should be very careful not to use this option with untrusted '
'files, because it will import and run them.')
flags.add_argument('--json', help='Output as JSON. This can '
'be only used in conjunction with -L.',
default=False,
action='store_true')
special_modes_group = parser.add_argument_group(
'Special modes - do not do any highlighting')
special_modes = special_modes_group.add_mutually_exclusive_group()
special_modes.add_argument(
'-S', metavar='STYLE -f formatter',
help='Print style definitions for STYLE for a formatter '
'given with -f. The argument given by -a is formatter '
'dependent.')
special_modes.add_argument(
'-L', nargs='*', metavar='WHAT',
help='List lexers, formatters, styles or filters -- '
'give additional arguments for the thing(s) you want to list '
'(e.g. "styles"), or omit them to list everything.')
special_modes.add_argument(
'-N', metavar='FILENAME',
help='Guess and print out a lexer name based solely on the given '
'filename. Does not take input or highlight anything. If no specific '
'lexer can be determined, "text" is printed.')
special_modes.add_argument(
'-C', action='store_true',
help='Like -N, but print out a lexer name based solely on '
'a given content from standard input.')
special_modes.add_argument(
'-H', action='store', nargs=2, metavar=('NAME', 'TYPE'),
help='Print detailed help for the object <name> of type <type>, '
'where <type> is one of "lexer", "formatter" or "filter".')
special_modes.add_argument(
'-V', action='store_true',
help='Print the package version.')
special_modes.add_argument(
'-h', '--help', action='store_true',
help='Print this help.')
special_modes_group.add_argument(
'-a', metavar='ARG',
help='Formatter-specific additional argument for the -S (print '
'style sheet) mode.')
argns = parser.parse_args(args[1:])
try:
return main_inner(parser, argns)
except BrokenPipeError:
# someone closed our stdout, e.g. by quitting a pager.
return 0
except Exception:
if argns.v:
print(file=sys.stderr)
print('*' * 65, file=sys.stderr)
print('An unhandled exception occurred while highlighting.',
file=sys.stderr)
print('Please report the whole traceback to the issue tracker at',
file=sys.stderr)
print('<https://github.com/pygments/pygments/issues>.',
file=sys.stderr)
print('*' * 65, file=sys.stderr)
print(file=sys.stderr)
raise
import traceback
info = traceback.format_exception(*sys.exc_info())
msg = info[-1].strip()
if len(info) >= 3:
# extract relevant file and position info
msg += '\n (f{})'.format(info[-2].split('\n')[0].strip()[1:])
print(file=sys.stderr)
print('*** Error while highlighting:', file=sys.stderr)
print(msg, file=sys.stderr)
print('*** If this is a bug you want to report, please rerun with -v.',
file=sys.stderr)
return 1

View File

@ -0,0 +1,70 @@
"""
pygments.console
~~~~~~~~~~~~~~~~
Format colored console output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
esc = "\x1b["
codes = {}
codes[""] = ""
codes["reset"] = esc + "39;49;00m"
codes["bold"] = esc + "01m"
codes["faint"] = esc + "02m"
codes["standout"] = esc + "03m"
codes["underline"] = esc + "04m"
codes["blink"] = esc + "05m"
codes["overline"] = esc + "06m"
dark_colors = ["black", "red", "green", "yellow", "blue",
"magenta", "cyan", "gray"]
light_colors = ["brightblack", "brightred", "brightgreen", "brightyellow", "brightblue",
"brightmagenta", "brightcyan", "white"]
x = 30
for dark, light in zip(dark_colors, light_colors):
codes[dark] = esc + "%im" % x
codes[light] = esc + "%im" % (60 + x)
x += 1
del dark, light, x
codes["white"] = codes["bold"]
def reset_color():
return codes["reset"]
def colorize(color_key, text):
return codes[color_key] + text + codes["reset"]
def ansiformat(attr, text):
"""
Format ``text`` with a color and/or some attributes::
color normal color
*color* bold color
_color_ underlined color
+color+ blinking color
"""
result = []
if attr[:1] == attr[-1:] == '+':
result.append(codes['blink'])
attr = attr[1:-1]
if attr[:1] == attr[-1:] == '*':
result.append(codes['bold'])
attr = attr[1:-1]
if attr[:1] == attr[-1:] == '_':
result.append(codes['underline'])
attr = attr[1:-1]
result.append(codes[attr])
result.append(text)
result.append(codes['reset'])
return ''.join(result)

View File

@ -0,0 +1,70 @@
"""
pygments.filter
~~~~~~~~~~~~~~~
Module that implements the default filter.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
def apply_filters(stream, filters, lexer=None):
"""
Use this method to apply an iterable of filters to
a stream. If lexer is given it's forwarded to the
filter, otherwise the filter receives `None`.
"""
def _apply(filter_, stream):
yield from filter_.filter(lexer, stream)
for filter_ in filters:
stream = _apply(filter_, stream)
return stream
def simplefilter(f):
"""
Decorator that converts a function into a filter::
@simplefilter
def lowercase(self, lexer, stream, options):
for ttype, value in stream:
yield ttype, value.lower()
"""
return type(f.__name__, (FunctionFilter,), {
'__module__': getattr(f, '__module__'),
'__doc__': f.__doc__,
'function': f,
})
class Filter:
"""
Default filter. Subclass this class or use the `simplefilter`
decorator to create own filters.
"""
def __init__(self, **options):
self.options = options
def filter(self, lexer, stream):
raise NotImplementedError()
class FunctionFilter(Filter):
"""
Abstract class used by `simplefilter` to create simple
function filters on the fly. The `simplefilter` decorator
automatically creates subclasses of this class for
functions passed to it.
"""
function = None
def __init__(self, **options):
if not hasattr(self, 'function'):
raise TypeError(f'{self.__class__.__name__!r} used without bound function')
Filter.__init__(self, **options)
def filter(self, lexer, stream):
# pylint: disable=not-callable
yield from self.function(lexer, stream, self.options)

View File

@ -0,0 +1,940 @@
"""
pygments.filters
~~~~~~~~~~~~~~~~
Module containing filter lookup functions and default
filters.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from pip._vendor.pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
string_to_tokentype
from pip._vendor.pygments.filter import Filter
from pip._vendor.pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
get_choice_opt, ClassNotFound, OptionError
from pip._vendor.pygments.plugin import find_plugin_filters
def find_filter_class(filtername):
"""Lookup a filter by name. Return None if not found."""
if filtername in FILTERS:
return FILTERS[filtername]
for name, cls in find_plugin_filters():
if name == filtername:
return cls
return None
def get_filter_by_name(filtername, **options):
"""Return an instantiated filter.
Options are passed to the filter initializer if wanted.
Raise a ClassNotFound if not found.
"""
cls = find_filter_class(filtername)
if cls:
return cls(**options)
else:
raise ClassNotFound(f'filter {filtername!r} not found')
def get_all_filters():
"""Return a generator of all filter names."""
yield from FILTERS
for name, _ in find_plugin_filters():
yield name
def _replace_special(ttype, value, regex, specialttype,
replacefunc=lambda x: x):
last = 0
for match in regex.finditer(value):
start, end = match.start(), match.end()
if start != last:
yield ttype, value[last:start]
yield specialttype, replacefunc(value[start:end])
last = end
if last != len(value):
yield ttype, value[last:]
class CodeTagFilter(Filter):
"""Highlight special code tags in comments and docstrings.
Options accepted:
`codetags` : list of strings
A list of strings that are flagged as code tags. The default is to
highlight ``XXX``, ``TODO``, ``FIXME``, ``BUG`` and ``NOTE``.
.. versionchanged:: 2.13
Now recognizes ``FIXME`` by default.
"""
def __init__(self, **options):
Filter.__init__(self, **options)
tags = get_list_opt(options, 'codetags',
['XXX', 'TODO', 'FIXME', 'BUG', 'NOTE'])
self.tag_re = re.compile(r'\b({})\b'.format('|'.join([
re.escape(tag) for tag in tags if tag
])))
def filter(self, lexer, stream):
regex = self.tag_re
for ttype, value in stream:
if ttype in String.Doc or \
ttype in Comment and \
ttype not in Comment.Preproc:
yield from _replace_special(ttype, value, regex, Comment.Special)
else:
yield ttype, value
class SymbolFilter(Filter):
"""Convert mathematical symbols such as \\<longrightarrow> in Isabelle
or \\longrightarrow in LaTeX into Unicode characters.
This is mostly useful for HTML or console output when you want to
approximate the source rendering you'd see in an IDE.
Options accepted:
`lang` : string
The symbol language. Must be one of ``'isabelle'`` or
``'latex'``. The default is ``'isabelle'``.
"""
latex_symbols = {
'\\alpha' : '\U000003b1',
'\\beta' : '\U000003b2',
'\\gamma' : '\U000003b3',
'\\delta' : '\U000003b4',
'\\varepsilon' : '\U000003b5',
'\\zeta' : '\U000003b6',
'\\eta' : '\U000003b7',
'\\vartheta' : '\U000003b8',
'\\iota' : '\U000003b9',
'\\kappa' : '\U000003ba',
'\\lambda' : '\U000003bb',
'\\mu' : '\U000003bc',
'\\nu' : '\U000003bd',
'\\xi' : '\U000003be',
'\\pi' : '\U000003c0',
'\\varrho' : '\U000003c1',
'\\sigma' : '\U000003c3',
'\\tau' : '\U000003c4',
'\\upsilon' : '\U000003c5',
'\\varphi' : '\U000003c6',
'\\chi' : '\U000003c7',
'\\psi' : '\U000003c8',
'\\omega' : '\U000003c9',
'\\Gamma' : '\U00000393',
'\\Delta' : '\U00000394',
'\\Theta' : '\U00000398',
'\\Lambda' : '\U0000039b',
'\\Xi' : '\U0000039e',
'\\Pi' : '\U000003a0',
'\\Sigma' : '\U000003a3',
'\\Upsilon' : '\U000003a5',
'\\Phi' : '\U000003a6',
'\\Psi' : '\U000003a8',
'\\Omega' : '\U000003a9',
'\\leftarrow' : '\U00002190',
'\\longleftarrow' : '\U000027f5',
'\\rightarrow' : '\U00002192',
'\\longrightarrow' : '\U000027f6',
'\\Leftarrow' : '\U000021d0',
'\\Longleftarrow' : '\U000027f8',
'\\Rightarrow' : '\U000021d2',
'\\Longrightarrow' : '\U000027f9',
'\\leftrightarrow' : '\U00002194',
'\\longleftrightarrow' : '\U000027f7',
'\\Leftrightarrow' : '\U000021d4',
'\\Longleftrightarrow' : '\U000027fa',
'\\mapsto' : '\U000021a6',
'\\longmapsto' : '\U000027fc',
'\\relbar' : '\U00002500',
'\\Relbar' : '\U00002550',
'\\hookleftarrow' : '\U000021a9',
'\\hookrightarrow' : '\U000021aa',
'\\leftharpoondown' : '\U000021bd',
'\\rightharpoondown' : '\U000021c1',
'\\leftharpoonup' : '\U000021bc',
'\\rightharpoonup' : '\U000021c0',
'\\rightleftharpoons' : '\U000021cc',
'\\leadsto' : '\U0000219d',
'\\downharpoonleft' : '\U000021c3',
'\\downharpoonright' : '\U000021c2',
'\\upharpoonleft' : '\U000021bf',
'\\upharpoonright' : '\U000021be',
'\\restriction' : '\U000021be',
'\\uparrow' : '\U00002191',
'\\Uparrow' : '\U000021d1',
'\\downarrow' : '\U00002193',
'\\Downarrow' : '\U000021d3',
'\\updownarrow' : '\U00002195',
'\\Updownarrow' : '\U000021d5',
'\\langle' : '\U000027e8',
'\\rangle' : '\U000027e9',
'\\lceil' : '\U00002308',
'\\rceil' : '\U00002309',
'\\lfloor' : '\U0000230a',
'\\rfloor' : '\U0000230b',
'\\flqq' : '\U000000ab',
'\\frqq' : '\U000000bb',
'\\bot' : '\U000022a5',
'\\top' : '\U000022a4',
'\\wedge' : '\U00002227',
'\\bigwedge' : '\U000022c0',
'\\vee' : '\U00002228',
'\\bigvee' : '\U000022c1',
'\\forall' : '\U00002200',
'\\exists' : '\U00002203',
'\\nexists' : '\U00002204',
'\\neg' : '\U000000ac',
'\\Box' : '\U000025a1',
'\\Diamond' : '\U000025c7',
'\\vdash' : '\U000022a2',
'\\models' : '\U000022a8',
'\\dashv' : '\U000022a3',
'\\surd' : '\U0000221a',
'\\le' : '\U00002264',
'\\ge' : '\U00002265',
'\\ll' : '\U0000226a',
'\\gg' : '\U0000226b',
'\\lesssim' : '\U00002272',
'\\gtrsim' : '\U00002273',
'\\lessapprox' : '\U00002a85',
'\\gtrapprox' : '\U00002a86',
'\\in' : '\U00002208',
'\\notin' : '\U00002209',
'\\subset' : '\U00002282',
'\\supset' : '\U00002283',
'\\subseteq' : '\U00002286',
'\\supseteq' : '\U00002287',
'\\sqsubset' : '\U0000228f',
'\\sqsupset' : '\U00002290',
'\\sqsubseteq' : '\U00002291',
'\\sqsupseteq' : '\U00002292',
'\\cap' : '\U00002229',
'\\bigcap' : '\U000022c2',
'\\cup' : '\U0000222a',
'\\bigcup' : '\U000022c3',
'\\sqcup' : '\U00002294',
'\\bigsqcup' : '\U00002a06',
'\\sqcap' : '\U00002293',
'\\Bigsqcap' : '\U00002a05',
'\\setminus' : '\U00002216',
'\\propto' : '\U0000221d',
'\\uplus' : '\U0000228e',
'\\bigplus' : '\U00002a04',
'\\sim' : '\U0000223c',
'\\doteq' : '\U00002250',
'\\simeq' : '\U00002243',
'\\approx' : '\U00002248',
'\\asymp' : '\U0000224d',
'\\cong' : '\U00002245',
'\\equiv' : '\U00002261',
'\\Join' : '\U000022c8',
'\\bowtie' : '\U00002a1d',
'\\prec' : '\U0000227a',
'\\succ' : '\U0000227b',
'\\preceq' : '\U0000227c',
'\\succeq' : '\U0000227d',
'\\parallel' : '\U00002225',
'\\mid' : '\U000000a6',
'\\pm' : '\U000000b1',
'\\mp' : '\U00002213',
'\\times' : '\U000000d7',
'\\div' : '\U000000f7',
'\\cdot' : '\U000022c5',
'\\star' : '\U000022c6',
'\\circ' : '\U00002218',
'\\dagger' : '\U00002020',
'\\ddagger' : '\U00002021',
'\\lhd' : '\U000022b2',
'\\rhd' : '\U000022b3',
'\\unlhd' : '\U000022b4',
'\\unrhd' : '\U000022b5',
'\\triangleleft' : '\U000025c3',
'\\triangleright' : '\U000025b9',
'\\triangle' : '\U000025b3',
'\\triangleq' : '\U0000225c',
'\\oplus' : '\U00002295',
'\\bigoplus' : '\U00002a01',
'\\otimes' : '\U00002297',
'\\bigotimes' : '\U00002a02',
'\\odot' : '\U00002299',
'\\bigodot' : '\U00002a00',
'\\ominus' : '\U00002296',
'\\oslash' : '\U00002298',
'\\dots' : '\U00002026',
'\\cdots' : '\U000022ef',
'\\sum' : '\U00002211',
'\\prod' : '\U0000220f',
'\\coprod' : '\U00002210',
'\\infty' : '\U0000221e',
'\\int' : '\U0000222b',
'\\oint' : '\U0000222e',
'\\clubsuit' : '\U00002663',
'\\diamondsuit' : '\U00002662',
'\\heartsuit' : '\U00002661',
'\\spadesuit' : '\U00002660',
'\\aleph' : '\U00002135',
'\\emptyset' : '\U00002205',
'\\nabla' : '\U00002207',
'\\partial' : '\U00002202',
'\\flat' : '\U0000266d',
'\\natural' : '\U0000266e',
'\\sharp' : '\U0000266f',
'\\angle' : '\U00002220',
'\\copyright' : '\U000000a9',
'\\textregistered' : '\U000000ae',
'\\textonequarter' : '\U000000bc',
'\\textonehalf' : '\U000000bd',
'\\textthreequarters' : '\U000000be',
'\\textordfeminine' : '\U000000aa',
'\\textordmasculine' : '\U000000ba',
'\\euro' : '\U000020ac',
'\\pounds' : '\U000000a3',
'\\yen' : '\U000000a5',
'\\textcent' : '\U000000a2',
'\\textcurrency' : '\U000000a4',
'\\textdegree' : '\U000000b0',
}
isabelle_symbols = {
'\\<zero>' : '\U0001d7ec',
'\\<one>' : '\U0001d7ed',
'\\<two>' : '\U0001d7ee',
'\\<three>' : '\U0001d7ef',
'\\<four>' : '\U0001d7f0',
'\\<five>' : '\U0001d7f1',
'\\<six>' : '\U0001d7f2',
'\\<seven>' : '\U0001d7f3',
'\\<eight>' : '\U0001d7f4',
'\\<nine>' : '\U0001d7f5',
'\\<A>' : '\U0001d49c',
'\\<B>' : '\U0000212c',
'\\<C>' : '\U0001d49e',
'\\<D>' : '\U0001d49f',
'\\<E>' : '\U00002130',
'\\<F>' : '\U00002131',
'\\<G>' : '\U0001d4a2',
'\\<H>' : '\U0000210b',
'\\<I>' : '\U00002110',
'\\<J>' : '\U0001d4a5',
'\\<K>' : '\U0001d4a6',
'\\<L>' : '\U00002112',
'\\<M>' : '\U00002133',
'\\<N>' : '\U0001d4a9',
'\\<O>' : '\U0001d4aa',
'\\<P>' : '\U0001d4ab',
'\\<Q>' : '\U0001d4ac',
'\\<R>' : '\U0000211b',
'\\<S>' : '\U0001d4ae',
'\\<T>' : '\U0001d4af',
'\\<U>' : '\U0001d4b0',
'\\<V>' : '\U0001d4b1',
'\\<W>' : '\U0001d4b2',
'\\<X>' : '\U0001d4b3',
'\\<Y>' : '\U0001d4b4',
'\\<Z>' : '\U0001d4b5',
'\\<a>' : '\U0001d5ba',
'\\<b>' : '\U0001d5bb',
'\\<c>' : '\U0001d5bc',
'\\<d>' : '\U0001d5bd',
'\\<e>' : '\U0001d5be',
'\\<f>' : '\U0001d5bf',
'\\<g>' : '\U0001d5c0',
'\\<h>' : '\U0001d5c1',
'\\<i>' : '\U0001d5c2',
'\\<j>' : '\U0001d5c3',
'\\<k>' : '\U0001d5c4',
'\\<l>' : '\U0001d5c5',
'\\<m>' : '\U0001d5c6',
'\\<n>' : '\U0001d5c7',
'\\<o>' : '\U0001d5c8',
'\\<p>' : '\U0001d5c9',
'\\<q>' : '\U0001d5ca',
'\\<r>' : '\U0001d5cb',
'\\<s>' : '\U0001d5cc',
'\\<t>' : '\U0001d5cd',
'\\<u>' : '\U0001d5ce',
'\\<v>' : '\U0001d5cf',
'\\<w>' : '\U0001d5d0',
'\\<x>' : '\U0001d5d1',
'\\<y>' : '\U0001d5d2',
'\\<z>' : '\U0001d5d3',
'\\<AA>' : '\U0001d504',
'\\<BB>' : '\U0001d505',
'\\<CC>' : '\U0000212d',
'\\<DD>' : '\U0001d507',
'\\<EE>' : '\U0001d508',
'\\<FF>' : '\U0001d509',
'\\<GG>' : '\U0001d50a',
'\\<HH>' : '\U0000210c',
'\\<II>' : '\U00002111',
'\\<JJ>' : '\U0001d50d',
'\\<KK>' : '\U0001d50e',
'\\<LL>' : '\U0001d50f',
'\\<MM>' : '\U0001d510',
'\\<NN>' : '\U0001d511',
'\\<OO>' : '\U0001d512',
'\\<PP>' : '\U0001d513',
'\\<QQ>' : '\U0001d514',
'\\<RR>' : '\U0000211c',
'\\<SS>' : '\U0001d516',
'\\<TT>' : '\U0001d517',
'\\<UU>' : '\U0001d518',
'\\<VV>' : '\U0001d519',
'\\<WW>' : '\U0001d51a',
'\\<XX>' : '\U0001d51b',
'\\<YY>' : '\U0001d51c',
'\\<ZZ>' : '\U00002128',
'\\<aa>' : '\U0001d51e',
'\\<bb>' : '\U0001d51f',
'\\<cc>' : '\U0001d520',
'\\<dd>' : '\U0001d521',
'\\<ee>' : '\U0001d522',
'\\<ff>' : '\U0001d523',
'\\<gg>' : '\U0001d524',
'\\<hh>' : '\U0001d525',
'\\<ii>' : '\U0001d526',
'\\<jj>' : '\U0001d527',
'\\<kk>' : '\U0001d528',
'\\<ll>' : '\U0001d529',
'\\<mm>' : '\U0001d52a',
'\\<nn>' : '\U0001d52b',
'\\<oo>' : '\U0001d52c',
'\\<pp>' : '\U0001d52d',
'\\<qq>' : '\U0001d52e',
'\\<rr>' : '\U0001d52f',
'\\<ss>' : '\U0001d530',
'\\<tt>' : '\U0001d531',
'\\<uu>' : '\U0001d532',
'\\<vv>' : '\U0001d533',
'\\<ww>' : '\U0001d534',
'\\<xx>' : '\U0001d535',
'\\<yy>' : '\U0001d536',
'\\<zz>' : '\U0001d537',
'\\<alpha>' : '\U000003b1',
'\\<beta>' : '\U000003b2',
'\\<gamma>' : '\U000003b3',
'\\<delta>' : '\U000003b4',
'\\<epsilon>' : '\U000003b5',
'\\<zeta>' : '\U000003b6',
'\\<eta>' : '\U000003b7',
'\\<theta>' : '\U000003b8',
'\\<iota>' : '\U000003b9',
'\\<kappa>' : '\U000003ba',
'\\<lambda>' : '\U000003bb',
'\\<mu>' : '\U000003bc',
'\\<nu>' : '\U000003bd',
'\\<xi>' : '\U000003be',
'\\<pi>' : '\U000003c0',
'\\<rho>' : '\U000003c1',
'\\<sigma>' : '\U000003c3',
'\\<tau>' : '\U000003c4',
'\\<upsilon>' : '\U000003c5',
'\\<phi>' : '\U000003c6',
'\\<chi>' : '\U000003c7',
'\\<psi>' : '\U000003c8',
'\\<omega>' : '\U000003c9',
'\\<Gamma>' : '\U00000393',
'\\<Delta>' : '\U00000394',
'\\<Theta>' : '\U00000398',
'\\<Lambda>' : '\U0000039b',
'\\<Xi>' : '\U0000039e',
'\\<Pi>' : '\U000003a0',
'\\<Sigma>' : '\U000003a3',
'\\<Upsilon>' : '\U000003a5',
'\\<Phi>' : '\U000003a6',
'\\<Psi>' : '\U000003a8',
'\\<Omega>' : '\U000003a9',
'\\<bool>' : '\U0001d539',
'\\<complex>' : '\U00002102',
'\\<nat>' : '\U00002115',
'\\<rat>' : '\U0000211a',
'\\<real>' : '\U0000211d',
'\\<int>' : '\U00002124',
'\\<leftarrow>' : '\U00002190',
'\\<longleftarrow>' : '\U000027f5',
'\\<rightarrow>' : '\U00002192',
'\\<longrightarrow>' : '\U000027f6',
'\\<Leftarrow>' : '\U000021d0',
'\\<Longleftarrow>' : '\U000027f8',
'\\<Rightarrow>' : '\U000021d2',
'\\<Longrightarrow>' : '\U000027f9',
'\\<leftrightarrow>' : '\U00002194',
'\\<longleftrightarrow>' : '\U000027f7',
'\\<Leftrightarrow>' : '\U000021d4',
'\\<Longleftrightarrow>' : '\U000027fa',
'\\<mapsto>' : '\U000021a6',
'\\<longmapsto>' : '\U000027fc',
'\\<midarrow>' : '\U00002500',
'\\<Midarrow>' : '\U00002550',
'\\<hookleftarrow>' : '\U000021a9',
'\\<hookrightarrow>' : '\U000021aa',
'\\<leftharpoondown>' : '\U000021bd',
'\\<rightharpoondown>' : '\U000021c1',
'\\<leftharpoonup>' : '\U000021bc',
'\\<rightharpoonup>' : '\U000021c0',
'\\<rightleftharpoons>' : '\U000021cc',
'\\<leadsto>' : '\U0000219d',
'\\<downharpoonleft>' : '\U000021c3',
'\\<downharpoonright>' : '\U000021c2',
'\\<upharpoonleft>' : '\U000021bf',
'\\<upharpoonright>' : '\U000021be',
'\\<restriction>' : '\U000021be',
'\\<Colon>' : '\U00002237',
'\\<up>' : '\U00002191',
'\\<Up>' : '\U000021d1',
'\\<down>' : '\U00002193',
'\\<Down>' : '\U000021d3',
'\\<updown>' : '\U00002195',
'\\<Updown>' : '\U000021d5',
'\\<langle>' : '\U000027e8',
'\\<rangle>' : '\U000027e9',
'\\<lceil>' : '\U00002308',
'\\<rceil>' : '\U00002309',
'\\<lfloor>' : '\U0000230a',
'\\<rfloor>' : '\U0000230b',
'\\<lparr>' : '\U00002987',
'\\<rparr>' : '\U00002988',
'\\<lbrakk>' : '\U000027e6',
'\\<rbrakk>' : '\U000027e7',
'\\<lbrace>' : '\U00002983',
'\\<rbrace>' : '\U00002984',
'\\<guillemotleft>' : '\U000000ab',
'\\<guillemotright>' : '\U000000bb',
'\\<bottom>' : '\U000022a5',
'\\<top>' : '\U000022a4',
'\\<and>' : '\U00002227',
'\\<And>' : '\U000022c0',
'\\<or>' : '\U00002228',
'\\<Or>' : '\U000022c1',
'\\<forall>' : '\U00002200',
'\\<exists>' : '\U00002203',
'\\<nexists>' : '\U00002204',
'\\<not>' : '\U000000ac',
'\\<box>' : '\U000025a1',
'\\<diamond>' : '\U000025c7',
'\\<turnstile>' : '\U000022a2',
'\\<Turnstile>' : '\U000022a8',
'\\<tturnstile>' : '\U000022a9',
'\\<TTurnstile>' : '\U000022ab',
'\\<stileturn>' : '\U000022a3',
'\\<surd>' : '\U0000221a',
'\\<le>' : '\U00002264',
'\\<ge>' : '\U00002265',
'\\<lless>' : '\U0000226a',
'\\<ggreater>' : '\U0000226b',
'\\<lesssim>' : '\U00002272',
'\\<greatersim>' : '\U00002273',
'\\<lessapprox>' : '\U00002a85',
'\\<greaterapprox>' : '\U00002a86',
'\\<in>' : '\U00002208',
'\\<notin>' : '\U00002209',
'\\<subset>' : '\U00002282',
'\\<supset>' : '\U00002283',
'\\<subseteq>' : '\U00002286',
'\\<supseteq>' : '\U00002287',
'\\<sqsubset>' : '\U0000228f',
'\\<sqsupset>' : '\U00002290',
'\\<sqsubseteq>' : '\U00002291',
'\\<sqsupseteq>' : '\U00002292',
'\\<inter>' : '\U00002229',
'\\<Inter>' : '\U000022c2',
'\\<union>' : '\U0000222a',
'\\<Union>' : '\U000022c3',
'\\<squnion>' : '\U00002294',
'\\<Squnion>' : '\U00002a06',
'\\<sqinter>' : '\U00002293',
'\\<Sqinter>' : '\U00002a05',
'\\<setminus>' : '\U00002216',
'\\<propto>' : '\U0000221d',
'\\<uplus>' : '\U0000228e',
'\\<Uplus>' : '\U00002a04',
'\\<noteq>' : '\U00002260',
'\\<sim>' : '\U0000223c',
'\\<doteq>' : '\U00002250',
'\\<simeq>' : '\U00002243',
'\\<approx>' : '\U00002248',
'\\<asymp>' : '\U0000224d',
'\\<cong>' : '\U00002245',
'\\<smile>' : '\U00002323',
'\\<equiv>' : '\U00002261',
'\\<frown>' : '\U00002322',
'\\<Join>' : '\U000022c8',
'\\<bowtie>' : '\U00002a1d',
'\\<prec>' : '\U0000227a',
'\\<succ>' : '\U0000227b',
'\\<preceq>' : '\U0000227c',
'\\<succeq>' : '\U0000227d',
'\\<parallel>' : '\U00002225',
'\\<bar>' : '\U000000a6',
'\\<plusminus>' : '\U000000b1',
'\\<minusplus>' : '\U00002213',
'\\<times>' : '\U000000d7',
'\\<div>' : '\U000000f7',
'\\<cdot>' : '\U000022c5',
'\\<star>' : '\U000022c6',
'\\<bullet>' : '\U00002219',
'\\<circ>' : '\U00002218',
'\\<dagger>' : '\U00002020',
'\\<ddagger>' : '\U00002021',
'\\<lhd>' : '\U000022b2',
'\\<rhd>' : '\U000022b3',
'\\<unlhd>' : '\U000022b4',
'\\<unrhd>' : '\U000022b5',
'\\<triangleleft>' : '\U000025c3',
'\\<triangleright>' : '\U000025b9',
'\\<triangle>' : '\U000025b3',
'\\<triangleq>' : '\U0000225c',
'\\<oplus>' : '\U00002295',
'\\<Oplus>' : '\U00002a01',
'\\<otimes>' : '\U00002297',
'\\<Otimes>' : '\U00002a02',
'\\<odot>' : '\U00002299',
'\\<Odot>' : '\U00002a00',
'\\<ominus>' : '\U00002296',
'\\<oslash>' : '\U00002298',
'\\<dots>' : '\U00002026',
'\\<cdots>' : '\U000022ef',
'\\<Sum>' : '\U00002211',
'\\<Prod>' : '\U0000220f',
'\\<Coprod>' : '\U00002210',
'\\<infinity>' : '\U0000221e',
'\\<integral>' : '\U0000222b',
'\\<ointegral>' : '\U0000222e',
'\\<clubsuit>' : '\U00002663',
'\\<diamondsuit>' : '\U00002662',
'\\<heartsuit>' : '\U00002661',
'\\<spadesuit>' : '\U00002660',
'\\<aleph>' : '\U00002135',
'\\<emptyset>' : '\U00002205',
'\\<nabla>' : '\U00002207',
'\\<partial>' : '\U00002202',
'\\<flat>' : '\U0000266d',
'\\<natural>' : '\U0000266e',
'\\<sharp>' : '\U0000266f',
'\\<angle>' : '\U00002220',
'\\<copyright>' : '\U000000a9',
'\\<registered>' : '\U000000ae',
'\\<hyphen>' : '\U000000ad',
'\\<inverse>' : '\U000000af',
'\\<onequarter>' : '\U000000bc',
'\\<onehalf>' : '\U000000bd',
'\\<threequarters>' : '\U000000be',
'\\<ordfeminine>' : '\U000000aa',
'\\<ordmasculine>' : '\U000000ba',
'\\<section>' : '\U000000a7',
'\\<paragraph>' : '\U000000b6',
'\\<exclamdown>' : '\U000000a1',
'\\<questiondown>' : '\U000000bf',
'\\<euro>' : '\U000020ac',
'\\<pounds>' : '\U000000a3',
'\\<yen>' : '\U000000a5',
'\\<cent>' : '\U000000a2',
'\\<currency>' : '\U000000a4',
'\\<degree>' : '\U000000b0',
'\\<amalg>' : '\U00002a3f',
'\\<mho>' : '\U00002127',
'\\<lozenge>' : '\U000025ca',
'\\<wp>' : '\U00002118',
'\\<wrong>' : '\U00002240',
'\\<struct>' : '\U000022c4',
'\\<acute>' : '\U000000b4',
'\\<index>' : '\U00000131',
'\\<dieresis>' : '\U000000a8',
'\\<cedilla>' : '\U000000b8',
'\\<hungarumlaut>' : '\U000002dd',
'\\<some>' : '\U000003f5',
'\\<newline>' : '\U000023ce',
'\\<open>' : '\U00002039',
'\\<close>' : '\U0000203a',
'\\<here>' : '\U00002302',
'\\<^sub>' : '\U000021e9',
'\\<^sup>' : '\U000021e7',
'\\<^bold>' : '\U00002759',
'\\<^bsub>' : '\U000021d8',
'\\<^esub>' : '\U000021d9',
'\\<^bsup>' : '\U000021d7',
'\\<^esup>' : '\U000021d6',
}
lang_map = {'isabelle' : isabelle_symbols, 'latex' : latex_symbols}
def __init__(self, **options):
Filter.__init__(self, **options)
lang = get_choice_opt(options, 'lang',
['isabelle', 'latex'], 'isabelle')
self.symbols = self.lang_map[lang]
def filter(self, lexer, stream):
for ttype, value in stream:
if value in self.symbols:
yield ttype, self.symbols[value]
else:
yield ttype, value
class KeywordCaseFilter(Filter):
"""Convert keywords to lowercase or uppercase or capitalize them, which
means first letter uppercase, rest lowercase.
This can be useful e.g. if you highlight Pascal code and want to adapt the
code to your styleguide.
Options accepted:
`case` : string
The casing to convert keywords to. Must be one of ``'lower'``,
``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
"""
def __init__(self, **options):
Filter.__init__(self, **options)
case = get_choice_opt(options, 'case',
['lower', 'upper', 'capitalize'], 'lower')
self.convert = getattr(str, case)
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype in Keyword:
yield ttype, self.convert(value)
else:
yield ttype, value
class NameHighlightFilter(Filter):
"""Highlight a normal Name (and Name.*) token with a different token type.
Example::
filter = NameHighlightFilter(
names=['foo', 'bar', 'baz'],
tokentype=Name.Function,
)
This would highlight the names "foo", "bar" and "baz"
as functions. `Name.Function` is the default token type.
Options accepted:
`names` : list of strings
A list of names that should be given the different token type.
There is no default.
`tokentype` : TokenType or string
A token type or a string containing a token type name that is
used for highlighting the strings in `names`. The default is
`Name.Function`.
"""
def __init__(self, **options):
Filter.__init__(self, **options)
self.names = set(get_list_opt(options, 'names', []))
tokentype = options.get('tokentype')
if tokentype:
self.tokentype = string_to_tokentype(tokentype)
else:
self.tokentype = Name.Function
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype in Name and value in self.names:
yield self.tokentype, value
else:
yield ttype, value
class ErrorToken(Exception):
pass
class RaiseOnErrorTokenFilter(Filter):
"""Raise an exception when the lexer generates an error token.
Options accepted:
`excclass` : Exception class
The exception class to raise.
The default is `pygments.filters.ErrorToken`.
.. versionadded:: 0.8
"""
def __init__(self, **options):
Filter.__init__(self, **options)
self.exception = options.get('excclass', ErrorToken)
try:
# issubclass() will raise TypeError if first argument is not a class
if not issubclass(self.exception, Exception):
raise TypeError
except TypeError:
raise OptionError('excclass option is not an exception class')
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype is Error:
raise self.exception(value)
yield ttype, value
class VisibleWhitespaceFilter(Filter):
"""Convert tabs, newlines and/or spaces to visible characters.
Options accepted:
`spaces` : string or bool
If this is a one-character string, spaces will be replaces by this string.
If it is another true value, spaces will be replaced by ``·`` (unicode
MIDDLE DOT). If it is a false value, spaces will not be replaced. The
default is ``False``.
`tabs` : string or bool
The same as for `spaces`, but the default replacement character is ``»``
(unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
is ``False``. Note: this will not work if the `tabsize` option for the
lexer is nonzero, as tabs will already have been expanded then.
`tabsize` : int
If tabs are to be replaced by this filter (see the `tabs` option), this
is the total number of characters that a tab should be expanded to.
The default is ``8``.
`newlines` : string or bool
The same as for `spaces`, but the default replacement character is ``¶``
(unicode PILCROW SIGN). The default value is ``False``.
`wstokentype` : bool
If true, give whitespace the special `Whitespace` token type. This allows
styling the visible whitespace differently (e.g. greyed out), but it can
disrupt background colors. The default is ``True``.
.. versionadded:: 0.8
"""
def __init__(self, **options):
Filter.__init__(self, **options)
for name, default in [('spaces', '·'),
('tabs', '»'),
('newlines', '')]:
opt = options.get(name, False)
if isinstance(opt, str) and len(opt) == 1:
setattr(self, name, opt)
else:
setattr(self, name, (opt and default or ''))
tabsize = get_int_opt(options, 'tabsize', 8)
if self.tabs:
self.tabs += ' ' * (tabsize - 1)
if self.newlines:
self.newlines += '\n'
self.wstt = get_bool_opt(options, 'wstokentype', True)
def filter(self, lexer, stream):
if self.wstt:
spaces = self.spaces or ' '
tabs = self.tabs or '\t'
newlines = self.newlines or '\n'
regex = re.compile(r'\s')
def replacefunc(wschar):
if wschar == ' ':
return spaces
elif wschar == '\t':
return tabs
elif wschar == '\n':
return newlines
return wschar
for ttype, value in stream:
yield from _replace_special(ttype, value, regex, Whitespace,
replacefunc)
else:
spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
# simpler processing
for ttype, value in stream:
if spaces:
value = value.replace(' ', spaces)
if tabs:
value = value.replace('\t', tabs)
if newlines:
value = value.replace('\n', newlines)
yield ttype, value
class GobbleFilter(Filter):
"""Gobbles source code lines (eats initial characters).
This filter drops the first ``n`` characters off every line of code. This
may be useful when the source code fed to the lexer is indented by a fixed
amount of space that isn't desired in the output.
Options accepted:
`n` : int
The number of characters to gobble.
.. versionadded:: 1.2
"""
def __init__(self, **options):
Filter.__init__(self, **options)
self.n = get_int_opt(options, 'n', 0)
def gobble(self, value, left):
if left < len(value):
return value[left:], 0
else:
return '', left - len(value)
def filter(self, lexer, stream):
n = self.n
left = n # How many characters left to gobble.
for ttype, value in stream:
# Remove ``left`` tokens from first line, ``n`` from all others.
parts = value.split('\n')
(parts[0], left) = self.gobble(parts[0], left)
for i in range(1, len(parts)):
(parts[i], left) = self.gobble(parts[i], n)
value = '\n'.join(parts)
if value != '':
yield ttype, value
class TokenMergeFilter(Filter):
"""Merges consecutive tokens with the same token type in the output
stream of a lexer.
.. versionadded:: 1.2
"""
def __init__(self, **options):
Filter.__init__(self, **options)
def filter(self, lexer, stream):
current_type = None
current_value = None
for ttype, value in stream:
if ttype is current_type:
current_value += value
else:
if current_type is not None:
yield current_type, current_value
current_type = ttype
current_value = value
if current_type is not None:
yield current_type, current_value
FILTERS = {
'codetagify': CodeTagFilter,
'keywordcase': KeywordCaseFilter,
'highlight': NameHighlightFilter,
'raiseonerror': RaiseOnErrorTokenFilter,
'whitespace': VisibleWhitespaceFilter,
'gobble': GobbleFilter,
'tokenmerge': TokenMergeFilter,
'symbols': SymbolFilter,
}

View File

@ -0,0 +1,129 @@
"""
pygments.formatter
~~~~~~~~~~~~~~~~~~
Base formatter class.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import codecs
from pip._vendor.pygments.util import get_bool_opt
from pip._vendor.pygments.styles import get_style_by_name
__all__ = ['Formatter']
def _lookup_style(style):
if isinstance(style, str):
return get_style_by_name(style)
return style
class Formatter:
"""
Converts a token stream to text.
Formatters should have attributes to help selecting them. These
are similar to the corresponding :class:`~pygments.lexer.Lexer`
attributes.
.. autoattribute:: name
:no-value:
.. autoattribute:: aliases
:no-value:
.. autoattribute:: filenames
:no-value:
You can pass options as keyword arguments to the constructor.
All formatters accept these basic options:
``style``
The style to use, can be a string or a Style subclass
(default: "default"). Not used by e.g. the
TerminalFormatter.
``full``
Tells the formatter to output a "full" document, i.e.
a complete self-contained document. This doesn't have
any effect for some formatters (default: false).
``title``
If ``full`` is true, the title that should be used to
caption the document (default: '').
``encoding``
If given, must be an encoding name. This will be used to
convert the Unicode token strings to byte strings in the
output. If it is "" or None, Unicode strings will be written
to the output file, which most file-like objects do not
support (default: None).
``outencoding``
Overrides ``encoding`` if given.
"""
#: Full name for the formatter, in human-readable form.
name = None
#: A list of short, unique identifiers that can be used to lookup
#: the formatter from a list, e.g. using :func:`.get_formatter_by_name()`.
aliases = []
#: A list of fnmatch patterns that match filenames for which this
#: formatter can produce output. The patterns in this list should be unique
#: among all formatters.
filenames = []
#: If True, this formatter outputs Unicode strings when no encoding
#: option is given.
unicodeoutput = True
def __init__(self, **options):
"""
As with lexers, this constructor takes arbitrary optional arguments,
and if you override it, you should first process your own options, then
call the base class implementation.
"""
self.style = _lookup_style(options.get('style', 'default'))
self.full = get_bool_opt(options, 'full', False)
self.title = options.get('title', '')
self.encoding = options.get('encoding', None) or None
if self.encoding in ('guess', 'chardet'):
# can happen for e.g. pygmentize -O encoding=guess
self.encoding = 'utf-8'
self.encoding = options.get('outencoding') or self.encoding
self.options = options
def get_style_defs(self, arg=''):
"""
This method must return statements or declarations suitable to define
the current style for subsequent highlighted text (e.g. CSS classes
in the `HTMLFormatter`).
The optional argument `arg` can be used to modify the generation and
is formatter dependent (it is standardized because it can be given on
the command line).
This method is called by the ``-S`` :doc:`command-line option <cmdline>`,
the `arg` is then given by the ``-a`` option.
"""
return ''
def format(self, tokensource, outfile):
"""
This method must format the tokens from the `tokensource` iterable and
write the formatted version to the file object `outfile`.
Formatter options can control how exactly the tokens are converted.
"""
if self.encoding:
# wrap the outfile in a StreamWriter
outfile = codecs.lookup(self.encoding)[3](outfile)
return self.format_unencoded(tokensource, outfile)
# Allow writing Formatter[str] or Formatter[bytes]. That's equivalent to
# Formatter. This helps when using third-party type stubs from typeshed.
def __class_getitem__(cls, name):
return cls

View File

@ -0,0 +1,157 @@
"""
pygments.formatters
~~~~~~~~~~~~~~~~~~~
Pygments formatters.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
import sys
import types
import fnmatch
from os.path import basename
from pip._vendor.pygments.formatters._mapping import FORMATTERS
from pip._vendor.pygments.plugin import find_plugin_formatters
from pip._vendor.pygments.util import ClassNotFound
__all__ = ['get_formatter_by_name', 'get_formatter_for_filename',
'get_all_formatters', 'load_formatter_from_file'] + list(FORMATTERS)
_formatter_cache = {} # classes by name
_pattern_cache = {}
def _fn_matches(fn, glob):
"""Return whether the supplied file name fn matches pattern filename."""
if glob not in _pattern_cache:
pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
return pattern.match(fn)
return _pattern_cache[glob].match(fn)
def _load_formatters(module_name):
"""Load a formatter (and all others in the module too)."""
mod = __import__(module_name, None, None, ['__all__'])
for formatter_name in mod.__all__:
cls = getattr(mod, formatter_name)
_formatter_cache[cls.name] = cls
def get_all_formatters():
"""Return a generator for all formatter classes."""
# NB: this returns formatter classes, not info like get_all_lexers().
for info in FORMATTERS.values():
if info[1] not in _formatter_cache:
_load_formatters(info[0])
yield _formatter_cache[info[1]]
for _, formatter in find_plugin_formatters():
yield formatter
def find_formatter_class(alias):
"""Lookup a formatter by alias.
Returns None if not found.
"""
for module_name, name, aliases, _, _ in FORMATTERS.values():
if alias in aliases:
if name not in _formatter_cache:
_load_formatters(module_name)
return _formatter_cache[name]
for _, cls in find_plugin_formatters():
if alias in cls.aliases:
return cls
def get_formatter_by_name(_alias, **options):
"""
Return an instance of a :class:`.Formatter` subclass that has `alias` in its
aliases list. The formatter is given the `options` at its instantiation.
Will raise :exc:`pygments.util.ClassNotFound` if no formatter with that
alias is found.
"""
cls = find_formatter_class(_alias)
if cls is None:
raise ClassNotFound(f"no formatter found for name {_alias!r}")
return cls(**options)
def load_formatter_from_file(filename, formattername="CustomFormatter", **options):
"""
Return a `Formatter` subclass instance loaded from the provided file, relative
to the current directory.
The file is expected to contain a Formatter class named ``formattername``
(by default, CustomFormatter). Users should be very careful with the input, because
this method is equivalent to running ``eval()`` on the input file. The formatter is
given the `options` at its instantiation.
:exc:`pygments.util.ClassNotFound` is raised if there are any errors loading
the formatter.
.. versionadded:: 2.2
"""
try:
# This empty dict will contain the namespace for the exec'd file
custom_namespace = {}
with open(filename, 'rb') as f:
exec(f.read(), custom_namespace)
# Retrieve the class `formattername` from that namespace
if formattername not in custom_namespace:
raise ClassNotFound(f'no valid {formattername} class found in {filename}')
formatter_class = custom_namespace[formattername]
# And finally instantiate it with the options
return formatter_class(**options)
except OSError as err:
raise ClassNotFound(f'cannot read {filename}: {err}')
except ClassNotFound:
raise
except Exception as err:
raise ClassNotFound(f'error when loading custom formatter: {err}')
def get_formatter_for_filename(fn, **options):
"""
Return a :class:`.Formatter` subclass instance that has a filename pattern
matching `fn`. The formatter is given the `options` at its instantiation.
Will raise :exc:`pygments.util.ClassNotFound` if no formatter for that filename
is found.
"""
fn = basename(fn)
for modname, name, _, filenames, _ in FORMATTERS.values():
for filename in filenames:
if _fn_matches(fn, filename):
if name not in _formatter_cache:
_load_formatters(modname)
return _formatter_cache[name](**options)
for _name, cls in find_plugin_formatters():
for filename in cls.filenames:
if _fn_matches(fn, filename):
return cls(**options)
raise ClassNotFound(f"no formatter found for file name {fn!r}")
class _automodule(types.ModuleType):
"""Automatically import formatters."""
def __getattr__(self, name):
info = FORMATTERS.get(name)
if info:
_load_formatters(info[0])
cls = _formatter_cache[info[1]]
setattr(self, name, cls)
return cls
raise AttributeError(name)
oldmod = sys.modules[__name__]
newmod = _automodule(__name__)
newmod.__dict__.update(oldmod.__dict__)
sys.modules[__name__] = newmod
del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types

View File

@ -0,0 +1,23 @@
# Automatically generated by scripts/gen_mapfiles.py.
# DO NOT EDIT BY HAND; run `tox -e mapfiles` instead.
FORMATTERS = {
'BBCodeFormatter': ('pygments.formatters.bbcode', 'BBCode', ('bbcode', 'bb'), (), 'Format tokens with BBcodes. These formatting codes are used by many bulletin boards, so you can highlight your sourcecode with pygments before posting it there.'),
'BmpImageFormatter': ('pygments.formatters.img', 'img_bmp', ('bmp', 'bitmap'), ('*.bmp',), 'Create a bitmap image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.'),
'GifImageFormatter': ('pygments.formatters.img', 'img_gif', ('gif',), ('*.gif',), 'Create a GIF image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.'),
'GroffFormatter': ('pygments.formatters.groff', 'groff', ('groff', 'troff', 'roff'), (), 'Format tokens with groff escapes to change their color and font style.'),
'HtmlFormatter': ('pygments.formatters.html', 'HTML', ('html',), ('*.html', '*.htm'), "Format tokens as HTML 4 ``<span>`` tags. By default, the content is enclosed in a ``<pre>`` tag, itself wrapped in a ``<div>`` tag (but see the `nowrap` option). The ``<div>``'s CSS class can be set by the `cssclass` option."),
'IRCFormatter': ('pygments.formatters.irc', 'IRC', ('irc', 'IRC'), (), 'Format tokens with IRC color sequences'),
'ImageFormatter': ('pygments.formatters.img', 'img', ('img', 'IMG', 'png'), ('*.png',), 'Create a PNG image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.'),
'JpgImageFormatter': ('pygments.formatters.img', 'img_jpg', ('jpg', 'jpeg'), ('*.jpg',), 'Create a JPEG image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.'),
'LatexFormatter': ('pygments.formatters.latex', 'LaTeX', ('latex', 'tex'), ('*.tex',), 'Format tokens as LaTeX code. This needs the `fancyvrb` and `color` standard packages.'),
'NullFormatter': ('pygments.formatters.other', 'Text only', ('text', 'null'), ('*.txt',), 'Output the text unchanged without any formatting.'),
'PangoMarkupFormatter': ('pygments.formatters.pangomarkup', 'Pango Markup', ('pango', 'pangomarkup'), (), 'Format tokens as Pango Markup code. It can then be rendered to an SVG.'),
'RawTokenFormatter': ('pygments.formatters.other', 'Raw tokens', ('raw', 'tokens'), ('*.raw',), 'Format tokens as a raw representation for storing token streams.'),
'RtfFormatter': ('pygments.formatters.rtf', 'RTF', ('rtf',), ('*.rtf',), 'Format tokens as RTF markup. This formatter automatically outputs full RTF documents with color information and other useful stuff. Perfect for Copy and Paste into Microsoft(R) Word(R) documents.'),
'SvgFormatter': ('pygments.formatters.svg', 'SVG', ('svg',), ('*.svg',), 'Format tokens as an SVG graphics file. This formatter is still experimental. Each line of code is a ``<text>`` element with explicit ``x`` and ``y`` coordinates containing ``<tspan>`` elements with the individual token styles.'),
'Terminal256Formatter': ('pygments.formatters.terminal256', 'Terminal256', ('terminal256', 'console256', '256'), (), 'Format tokens with ANSI color sequences, for output in a 256-color terminal or console. Like in `TerminalFormatter` color sequences are terminated at newlines, so that paging the output works correctly.'),
'TerminalFormatter': ('pygments.formatters.terminal', 'Terminal', ('terminal', 'console'), (), 'Format tokens with ANSI color sequences, for output in a text console. Color sequences are terminated at newlines, so that paging the output works correctly.'),
'TerminalTrueColorFormatter': ('pygments.formatters.terminal256', 'TerminalTrueColor', ('terminal16m', 'console16m', '16m'), (), 'Format tokens with ANSI color sequences, for output in a true-color terminal or console. Like in `TerminalFormatter` color sequences are terminated at newlines, so that paging the output works correctly.'),
'TestcaseFormatter': ('pygments.formatters.other', 'Testcase', ('testcase',), (), 'Format tokens as appropriate for a new testcase.'),
}

View File

@ -0,0 +1,108 @@
"""
pygments.formatters.bbcode
~~~~~~~~~~~~~~~~~~~~~~~~~~
BBcode formatter.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.util import get_bool_opt
__all__ = ['BBCodeFormatter']
class BBCodeFormatter(Formatter):
"""
Format tokens with BBcodes. These formatting codes are used by many
bulletin boards, so you can highlight your sourcecode with pygments before
posting it there.
This formatter has no support for background colors and borders, as there
are no common BBcode tags for that.
Some board systems (e.g. phpBB) don't support colors in their [code] tag,
so you can't use the highlighting together with that tag.
Text in a [code] tag usually is shown with a monospace font (which this
formatter can do with the ``monofont`` option) and no spaces (which you
need for indentation) are removed.
Additional options accepted:
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``).
`codetag`
If set to true, put the output into ``[code]`` tags (default:
``false``)
`monofont`
If set to true, add a tag to show the code with a monospace font
(default: ``false``).
"""
name = 'BBCode'
aliases = ['bbcode', 'bb']
filenames = []
def __init__(self, **options):
Formatter.__init__(self, **options)
self._code = get_bool_opt(options, 'codetag', False)
self._mono = get_bool_opt(options, 'monofont', False)
self.styles = {}
self._make_styles()
def _make_styles(self):
for ttype, ndef in self.style:
start = end = ''
if ndef['color']:
start += '[color=#{}]'.format(ndef['color'])
end = '[/color]' + end
if ndef['bold']:
start += '[b]'
end = '[/b]' + end
if ndef['italic']:
start += '[i]'
end = '[/i]' + end
if ndef['underline']:
start += '[u]'
end = '[/u]' + end
# there are no common BBcodes for background-color and border
self.styles[ttype] = start, end
def format_unencoded(self, tokensource, outfile):
if self._code:
outfile.write('[code]')
if self._mono:
outfile.write('[font=monospace]')
lastval = ''
lasttype = None
for ttype, value in tokensource:
while ttype not in self.styles:
ttype = ttype.parent
if ttype == lasttype:
lastval += value
else:
if lastval:
start, end = self.styles[lasttype]
outfile.write(''.join((start, lastval, end)))
lastval = value
lasttype = ttype
if lastval:
start, end = self.styles[lasttype]
outfile.write(''.join((start, lastval, end)))
if self._mono:
outfile.write('[/font]')
if self._code:
outfile.write('[/code]')
if self._code or self._mono:
outfile.write('\n')

View File

@ -0,0 +1,170 @@
"""
pygments.formatters.groff
~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for groff output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import math
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.util import get_bool_opt, get_int_opt
__all__ = ['GroffFormatter']
class GroffFormatter(Formatter):
"""
Format tokens with groff escapes to change their color and font style.
.. versionadded:: 2.11
Additional options accepted:
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``).
`monospaced`
If set to true, monospace font will be used (default: ``true``).
`linenos`
If set to true, print the line numbers (default: ``false``).
`wrap`
Wrap lines to the specified number of characters. Disabled if set to 0
(default: ``0``).
"""
name = 'groff'
aliases = ['groff','troff','roff']
filenames = []
def __init__(self, **options):
Formatter.__init__(self, **options)
self.monospaced = get_bool_opt(options, 'monospaced', True)
self.linenos = get_bool_opt(options, 'linenos', False)
self._lineno = 0
self.wrap = get_int_opt(options, 'wrap', 0)
self._linelen = 0
self.styles = {}
self._make_styles()
def _make_styles(self):
regular = '\\f[CR]' if self.monospaced else '\\f[R]'
bold = '\\f[CB]' if self.monospaced else '\\f[B]'
italic = '\\f[CI]' if self.monospaced else '\\f[I]'
for ttype, ndef in self.style:
start = end = ''
if ndef['color']:
start += '\\m[{}]'.format(ndef['color'])
end = '\\m[]' + end
if ndef['bold']:
start += bold
end = regular + end
if ndef['italic']:
start += italic
end = regular + end
if ndef['bgcolor']:
start += '\\M[{}]'.format(ndef['bgcolor'])
end = '\\M[]' + end
self.styles[ttype] = start, end
def _define_colors(self, outfile):
colors = set()
for _, ndef in self.style:
if ndef['color'] is not None:
colors.add(ndef['color'])
for color in sorted(colors):
outfile.write('.defcolor ' + color + ' rgb #' + color + '\n')
def _write_lineno(self, outfile):
self._lineno += 1
outfile.write("%s% 4d " % (self._lineno != 1 and '\n' or '', self._lineno))
def _wrap_line(self, line):
length = len(line.rstrip('\n'))
space = ' ' if self.linenos else ''
newline = ''
if length > self.wrap:
for i in range(0, math.floor(length / self.wrap)):
chunk = line[i*self.wrap:i*self.wrap+self.wrap]
newline += (chunk + '\n' + space)
remainder = length % self.wrap
if remainder > 0:
newline += line[-remainder-1:]
self._linelen = remainder
elif self._linelen + length > self.wrap:
newline = ('\n' + space) + line
self._linelen = length
else:
newline = line
self._linelen += length
return newline
def _escape_chars(self, text):
text = text.replace('\\', '\\[u005C]'). \
replace('.', '\\[char46]'). \
replace('\'', '\\[u0027]'). \
replace('`', '\\[u0060]'). \
replace('~', '\\[u007E]')
copy = text
for char in copy:
if len(char) != len(char.encode()):
uni = char.encode('unicode_escape') \
.decode()[1:] \
.replace('x', 'u00') \
.upper()
text = text.replace(char, '\\[u' + uni[1:] + ']')
return text
def format_unencoded(self, tokensource, outfile):
self._define_colors(outfile)
outfile.write('.nf\n\\f[CR]\n')
if self.linenos:
self._write_lineno(outfile)
for ttype, value in tokensource:
while ttype not in self.styles:
ttype = ttype.parent
start, end = self.styles[ttype]
for line in value.splitlines(True):
if self.wrap > 0:
line = self._wrap_line(line)
if start and end:
text = self._escape_chars(line.rstrip('\n'))
if text != '':
outfile.write(''.join((start, text, end)))
else:
outfile.write(self._escape_chars(line.rstrip('\n')))
if line.endswith('\n'):
if self.linenos:
self._write_lineno(outfile)
self._linelen = 0
else:
outfile.write('\n')
self._linelen = 0
outfile.write('\n.fi')

View File

@ -0,0 +1,987 @@
"""
pygments.formatters.html
~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for HTML output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import functools
import os
import sys
import os.path
from io import StringIO
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.token import Token, Text, STANDARD_TYPES
from pip._vendor.pygments.util import get_bool_opt, get_int_opt, get_list_opt
try:
import ctags
except ImportError:
ctags = None
__all__ = ['HtmlFormatter']
_escape_html_table = {
ord('&'): '&amp;',
ord('<'): '&lt;',
ord('>'): '&gt;',
ord('"'): '&quot;',
ord("'"): '&#39;',
}
def escape_html(text, table=_escape_html_table):
"""Escape &, <, > as well as single and double quotes for HTML."""
return text.translate(table)
def webify(color):
if color.startswith('calc') or color.startswith('var'):
return color
else:
return '#' + color
def _get_ttype_class(ttype):
fname = STANDARD_TYPES.get(ttype)
if fname:
return fname
aname = ''
while fname is None:
aname = '-' + ttype[-1] + aname
ttype = ttype.parent
fname = STANDARD_TYPES.get(ttype)
return fname + aname
CSSFILE_TEMPLATE = '''\
/*
generated by Pygments <https://pygments.org/>
Copyright 2006-2024 by the Pygments team.
Licensed under the BSD license, see LICENSE for details.
*/
%(styledefs)s
'''
DOC_HEADER = '''\
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<!--
generated by Pygments <https://pygments.org/>
Copyright 2006-2024 by the Pygments team.
Licensed under the BSD license, see LICENSE for details.
-->
<html>
<head>
<title>%(title)s</title>
<meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
<style type="text/css">
''' + CSSFILE_TEMPLATE + '''
</style>
</head>
<body>
<h2>%(title)s</h2>
'''
DOC_HEADER_EXTERNALCSS = '''\
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>%(title)s</title>
<meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
<link rel="stylesheet" href="%(cssfile)s" type="text/css">
</head>
<body>
<h2>%(title)s</h2>
'''
DOC_FOOTER = '''\
</body>
</html>
'''
class HtmlFormatter(Formatter):
r"""
Format tokens as HTML 4 ``<span>`` tags. By default, the content is enclosed
in a ``<pre>`` tag, itself wrapped in a ``<div>`` tag (but see the `nowrap` option).
The ``<div>``'s CSS class can be set by the `cssclass` option.
If the `linenos` option is set to ``"table"``, the ``<pre>`` is
additionally wrapped inside a ``<table>`` which has one row and two
cells: one containing the line numbers and one containing the code.
Example:
.. sourcecode:: html
<div class="highlight" >
<table><tr>
<td class="linenos" title="click to toggle"
onclick="with (this.firstChild.style)
{ display = (display == '') ? 'none' : '' }">
<pre>1
2</pre>
</td>
<td class="code">
<pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
<span class="Ke">pass</span>
</pre>
</td>
</tr></table></div>
(whitespace added to improve clarity).
A list of lines can be specified using the `hl_lines` option to make these
lines highlighted (as of Pygments 0.11).
With the `full` option, a complete HTML 4 document is output, including
the style definitions inside a ``<style>`` tag, or in a separate file if
the `cssfile` option is given.
When `tagsfile` is set to the path of a ctags index file, it is used to
generate hyperlinks from names to their definition. You must enable
`lineanchors` and run ctags with the `-n` option for this to work. The
`python-ctags` module from PyPI must be installed to use this feature;
otherwise a `RuntimeError` will be raised.
The `get_style_defs(arg='')` method of a `HtmlFormatter` returns a string
containing CSS rules for the CSS classes used by the formatter. The
argument `arg` can be used to specify additional CSS selectors that
are prepended to the classes. A call `fmter.get_style_defs('td .code')`
would result in the following CSS classes:
.. sourcecode:: css
td .code .kw { font-weight: bold; color: #00FF00 }
td .code .cm { color: #999999 }
...
If you have Pygments 0.6 or higher, you can also pass a list or tuple to the
`get_style_defs()` method to request multiple prefixes for the tokens:
.. sourcecode:: python
formatter.get_style_defs(['div.syntax pre', 'pre.syntax'])
The output would then look like this:
.. sourcecode:: css
div.syntax pre .kw,
pre.syntax .kw { font-weight: bold; color: #00FF00 }
div.syntax pre .cm,
pre.syntax .cm { color: #999999 }
...
Additional options accepted:
`nowrap`
If set to ``True``, don't add a ``<pre>`` and a ``<div>`` tag
around the tokens. This disables most other options (default: ``False``).
`full`
Tells the formatter to output a "full" document, i.e. a complete
self-contained document (default: ``False``).
`title`
If `full` is true, the title that should be used to caption the
document (default: ``''``).
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``). This option has no effect if the `cssfile`
and `noclobber_cssfile` option are given and the file specified in
`cssfile` exists.
`noclasses`
If set to true, token ``<span>`` tags (as well as line number elements)
will not use CSS classes, but inline styles. This is not recommended
for larger pieces of code since it increases output size by quite a bit
(default: ``False``).
`classprefix`
Since the token types use relatively short class names, they may clash
with some of your own class names. In this case you can use the
`classprefix` option to give a string to prepend to all Pygments-generated
CSS class names for token types.
Note that this option also affects the output of `get_style_defs()`.
`cssclass`
CSS class for the wrapping ``<div>`` tag (default: ``'highlight'``).
If you set this option, the default selector for `get_style_defs()`
will be this class.
.. versionadded:: 0.9
If you select the ``'table'`` line numbers, the wrapping table will
have a CSS class of this string plus ``'table'``, the default is
accordingly ``'highlighttable'``.
`cssstyles`
Inline CSS styles for the wrapping ``<div>`` tag (default: ``''``).
`prestyles`
Inline CSS styles for the ``<pre>`` tag (default: ``''``).
.. versionadded:: 0.11
`cssfile`
If the `full` option is true and this option is given, it must be the
name of an external file. If the filename does not include an absolute
path, the file's path will be assumed to be relative to the main output
file's path, if the latter can be found. The stylesheet is then written
to this file instead of the HTML file.
.. versionadded:: 0.6
`noclobber_cssfile`
If `cssfile` is given and the specified file exists, the css file will
not be overwritten. This allows the use of the `full` option in
combination with a user specified css file. Default is ``False``.
.. versionadded:: 1.1
`linenos`
If set to ``'table'``, output line numbers as a table with two cells,
one containing the line numbers, the other the whole code. This is
copy-and-paste-friendly, but may cause alignment problems with some
browsers or fonts. If set to ``'inline'``, the line numbers will be
integrated in the ``<pre>`` tag that contains the code (that setting
is *new in Pygments 0.8*).
For compatibility with Pygments 0.7 and earlier, every true value
except ``'inline'`` means the same as ``'table'`` (in particular, that
means also ``True``).
The default value is ``False``, which means no line numbers at all.
**Note:** with the default ("table") line number mechanism, the line
numbers and code can have different line heights in Internet Explorer
unless you give the enclosing ``<pre>`` tags an explicit ``line-height``
CSS property (you get the default line spacing with ``line-height:
125%``).
`hl_lines`
Specify a list of lines to be highlighted. The line numbers are always
relative to the input (i.e. the first line is line 1) and are
independent of `linenostart`.
.. versionadded:: 0.11
`linenostart`
The line number for the first line (default: ``1``).
`linenostep`
If set to a number n > 1, only every nth line number is printed.
`linenospecial`
If set to a number n > 0, every nth line number is given the CSS
class ``"special"`` (default: ``0``).
`nobackground`
If set to ``True``, the formatter won't output the background color
for the wrapping element (this automatically defaults to ``False``
when there is no wrapping element [eg: no argument for the
`get_syntax_defs` method given]) (default: ``False``).
.. versionadded:: 0.6
`lineseparator`
This string is output between lines of code. It defaults to ``"\n"``,
which is enough to break a line inside ``<pre>`` tags, but you can
e.g. set it to ``"<br>"`` to get HTML line breaks.
.. versionadded:: 0.7
`lineanchors`
If set to a nonempty string, e.g. ``foo``, the formatter will wrap each
output line in an anchor tag with an ``id`` (and `name`) of ``foo-linenumber``.
This allows easy linking to certain lines.
.. versionadded:: 0.9
`linespans`
If set to a nonempty string, e.g. ``foo``, the formatter will wrap each
output line in a span tag with an ``id`` of ``foo-linenumber``.
This allows easy access to lines via javascript.
.. versionadded:: 1.6
`anchorlinenos`
If set to `True`, will wrap line numbers in <a> tags. Used in
combination with `linenos` and `lineanchors`.
`tagsfile`
If set to the path of a ctags file, wrap names in anchor tags that
link to their definitions. `lineanchors` should be used, and the
tags file should specify line numbers (see the `-n` option to ctags).
The tags file is assumed to be encoded in UTF-8.
.. versionadded:: 1.6
`tagurlformat`
A string formatting pattern used to generate links to ctags definitions.
Available variables are `%(path)s`, `%(fname)s` and `%(fext)s`.
Defaults to an empty string, resulting in just `#prefix-number` links.
.. versionadded:: 1.6
`filename`
A string used to generate a filename when rendering ``<pre>`` blocks,
for example if displaying source code. If `linenos` is set to
``'table'`` then the filename will be rendered in an initial row
containing a single `<th>` which spans both columns.
.. versionadded:: 2.1
`wrapcode`
Wrap the code inside ``<pre>`` blocks using ``<code>``, as recommended
by the HTML5 specification.
.. versionadded:: 2.4
`debug_token_types`
Add ``title`` attributes to all token ``<span>`` tags that show the
name of the token.
.. versionadded:: 2.10
**Subclassing the HTML formatter**
.. versionadded:: 0.7
The HTML formatter is now built in a way that allows easy subclassing, thus
customizing the output HTML code. The `format()` method calls
`self._format_lines()` which returns a generator that yields tuples of ``(1,
line)``, where the ``1`` indicates that the ``line`` is a line of the
formatted source code.
If the `nowrap` option is set, the generator is the iterated over and the
resulting HTML is output.
Otherwise, `format()` calls `self.wrap()`, which wraps the generator with
other generators. These may add some HTML code to the one generated by
`_format_lines()`, either by modifying the lines generated by the latter,
then yielding them again with ``(1, line)``, and/or by yielding other HTML
code before or after the lines, with ``(0, html)``. The distinction between
source lines and other code makes it possible to wrap the generator multiple
times.
The default `wrap()` implementation adds a ``<div>`` and a ``<pre>`` tag.
A custom `HtmlFormatter` subclass could look like this:
.. sourcecode:: python
class CodeHtmlFormatter(HtmlFormatter):
def wrap(self, source, *, include_div):
return self._wrap_code(source)
def _wrap_code(self, source):
yield 0, '<code>'
for i, t in source:
if i == 1:
# it's a line of formatted code
t += '<br>'
yield i, t
yield 0, '</code>'
This results in wrapping the formatted lines with a ``<code>`` tag, where the
source lines are broken using ``<br>`` tags.
After calling `wrap()`, the `format()` method also adds the "line numbers"
and/or "full document" wrappers if the respective options are set. Then, all
HTML yielded by the wrapped generator is output.
"""
name = 'HTML'
aliases = ['html']
filenames = ['*.html', '*.htm']
def __init__(self, **options):
Formatter.__init__(self, **options)
self.title = self._decodeifneeded(self.title)
self.nowrap = get_bool_opt(options, 'nowrap', False)
self.noclasses = get_bool_opt(options, 'noclasses', False)
self.classprefix = options.get('classprefix', '')
self.cssclass = self._decodeifneeded(options.get('cssclass', 'highlight'))
self.cssstyles = self._decodeifneeded(options.get('cssstyles', ''))
self.prestyles = self._decodeifneeded(options.get('prestyles', ''))
self.cssfile = self._decodeifneeded(options.get('cssfile', ''))
self.noclobber_cssfile = get_bool_opt(options, 'noclobber_cssfile', False)
self.tagsfile = self._decodeifneeded(options.get('tagsfile', ''))
self.tagurlformat = self._decodeifneeded(options.get('tagurlformat', ''))
self.filename = self._decodeifneeded(options.get('filename', ''))
self.wrapcode = get_bool_opt(options, 'wrapcode', False)
self.span_element_openers = {}
self.debug_token_types = get_bool_opt(options, 'debug_token_types', False)
if self.tagsfile:
if not ctags:
raise RuntimeError('The "ctags" package must to be installed '
'to be able to use the "tagsfile" feature.')
self._ctags = ctags.CTags(self.tagsfile)
linenos = options.get('linenos', False)
if linenos == 'inline':
self.linenos = 2
elif linenos:
# compatibility with <= 0.7
self.linenos = 1
else:
self.linenos = 0
self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
self.linenospecial = abs(get_int_opt(options, 'linenospecial', 0))
self.nobackground = get_bool_opt(options, 'nobackground', False)
self.lineseparator = options.get('lineseparator', '\n')
self.lineanchors = options.get('lineanchors', '')
self.linespans = options.get('linespans', '')
self.anchorlinenos = get_bool_opt(options, 'anchorlinenos', False)
self.hl_lines = set()
for lineno in get_list_opt(options, 'hl_lines', []):
try:
self.hl_lines.add(int(lineno))
except ValueError:
pass
self._create_stylesheet()
def _get_css_class(self, ttype):
"""Return the css class of this token type prefixed with
the classprefix option."""
ttypeclass = _get_ttype_class(ttype)
if ttypeclass:
return self.classprefix + ttypeclass
return ''
def _get_css_classes(self, ttype):
"""Return the CSS classes of this token type prefixed with the classprefix option."""
cls = self._get_css_class(ttype)
while ttype not in STANDARD_TYPES:
ttype = ttype.parent
cls = self._get_css_class(ttype) + ' ' + cls
return cls or ''
def _get_css_inline_styles(self, ttype):
"""Return the inline CSS styles for this token type."""
cclass = self.ttype2class.get(ttype)
while cclass is None:
ttype = ttype.parent
cclass = self.ttype2class.get(ttype)
return cclass or ''
def _create_stylesheet(self):
t2c = self.ttype2class = {Token: ''}
c2s = self.class2style = {}
for ttype, ndef in self.style:
name = self._get_css_class(ttype)
style = ''
if ndef['color']:
style += 'color: {}; '.format(webify(ndef['color']))
if ndef['bold']:
style += 'font-weight: bold; '
if ndef['italic']:
style += 'font-style: italic; '
if ndef['underline']:
style += 'text-decoration: underline; '
if ndef['bgcolor']:
style += 'background-color: {}; '.format(webify(ndef['bgcolor']))
if ndef['border']:
style += 'border: 1px solid {}; '.format(webify(ndef['border']))
if style:
t2c[ttype] = name
# save len(ttype) to enable ordering the styles by
# hierarchy (necessary for CSS cascading rules!)
c2s[name] = (style[:-2], ttype, len(ttype))
def get_style_defs(self, arg=None):
"""
Return CSS style definitions for the classes produced by the current
highlighting style. ``arg`` can be a string or list of selectors to
insert before the token type classes.
"""
style_lines = []
style_lines.extend(self.get_linenos_style_defs())
style_lines.extend(self.get_background_style_defs(arg))
style_lines.extend(self.get_token_style_defs(arg))
return '\n'.join(style_lines)
def get_token_style_defs(self, arg=None):
prefix = self.get_css_prefix(arg)
styles = [
(level, ttype, cls, style)
for cls, (style, ttype, level) in self.class2style.items()
if cls and style
]
styles.sort()
lines = [
f'{prefix(cls)} {{ {style} }} /* {repr(ttype)[6:]} */'
for (level, ttype, cls, style) in styles
]
return lines
def get_background_style_defs(self, arg=None):
prefix = self.get_css_prefix(arg)
bg_color = self.style.background_color
hl_color = self.style.highlight_color
lines = []
if arg and not self.nobackground and bg_color is not None:
text_style = ''
if Text in self.ttype2class:
text_style = ' ' + self.class2style[self.ttype2class[Text]][0]
lines.insert(
0, '{}{{ background: {};{} }}'.format(
prefix(''), bg_color, text_style
)
)
if hl_color is not None:
lines.insert(
0, '{} {{ background-color: {} }}'.format(prefix('hll'), hl_color)
)
return lines
def get_linenos_style_defs(self):
lines = [
f'pre {{ {self._pre_style} }}',
f'td.linenos .normal {{ {self._linenos_style} }}',
f'span.linenos {{ {self._linenos_style} }}',
f'td.linenos .special {{ {self._linenos_special_style} }}',
f'span.linenos.special {{ {self._linenos_special_style} }}',
]
return lines
def get_css_prefix(self, arg):
if arg is None:
arg = ('cssclass' in self.options and '.'+self.cssclass or '')
if isinstance(arg, str):
args = [arg]
else:
args = list(arg)
def prefix(cls):
if cls:
cls = '.' + cls
tmp = []
for arg in args:
tmp.append((arg and arg + ' ' or '') + cls)
return ', '.join(tmp)
return prefix
@property
def _pre_style(self):
return 'line-height: 125%;'
@property
def _linenos_style(self):
color = self.style.line_number_color
background_color = self.style.line_number_background_color
return f'color: {color}; background-color: {background_color}; padding-left: 5px; padding-right: 5px;'
@property
def _linenos_special_style(self):
color = self.style.line_number_special_color
background_color = self.style.line_number_special_background_color
return f'color: {color}; background-color: {background_color}; padding-left: 5px; padding-right: 5px;'
def _decodeifneeded(self, value):
if isinstance(value, bytes):
if self.encoding:
return value.decode(self.encoding)
return value.decode()
return value
def _wrap_full(self, inner, outfile):
if self.cssfile:
if os.path.isabs(self.cssfile):
# it's an absolute filename
cssfilename = self.cssfile
else:
try:
filename = outfile.name
if not filename or filename[0] == '<':
# pseudo files, e.g. name == '<fdopen>'
raise AttributeError
cssfilename = os.path.join(os.path.dirname(filename),
self.cssfile)
except AttributeError:
print('Note: Cannot determine output file name, '
'using current directory as base for the CSS file name',
file=sys.stderr)
cssfilename = self.cssfile
# write CSS file only if noclobber_cssfile isn't given as an option.
try:
if not os.path.exists(cssfilename) or not self.noclobber_cssfile:
with open(cssfilename, "w", encoding="utf-8") as cf:
cf.write(CSSFILE_TEMPLATE %
{'styledefs': self.get_style_defs('body')})
except OSError as err:
err.strerror = 'Error writing CSS file: ' + err.strerror
raise
yield 0, (DOC_HEADER_EXTERNALCSS %
dict(title=self.title,
cssfile=self.cssfile,
encoding=self.encoding))
else:
yield 0, (DOC_HEADER %
dict(title=self.title,
styledefs=self.get_style_defs('body'),
encoding=self.encoding))
yield from inner
yield 0, DOC_FOOTER
def _wrap_tablelinenos(self, inner):
dummyoutfile = StringIO()
lncount = 0
for t, line in inner:
if t:
lncount += 1
dummyoutfile.write(line)
fl = self.linenostart
mw = len(str(lncount + fl - 1))
sp = self.linenospecial
st = self.linenostep
anchor_name = self.lineanchors or self.linespans
aln = self.anchorlinenos
nocls = self.noclasses
lines = []
for i in range(fl, fl+lncount):
print_line = i % st == 0
special_line = sp and i % sp == 0
if print_line:
line = '%*d' % (mw, i)
if aln:
line = '<a href="#%s-%d">%s</a>' % (anchor_name, i, line)
else:
line = ' ' * mw
if nocls:
if special_line:
style = f' style="{self._linenos_special_style}"'
else:
style = f' style="{self._linenos_style}"'
else:
if special_line:
style = ' class="special"'
else:
style = ' class="normal"'
if style:
line = f'<span{style}>{line}</span>'
lines.append(line)
ls = '\n'.join(lines)
# If a filename was specified, we can't put it into the code table as it
# would misalign the line numbers. Hence we emit a separate row for it.
filename_tr = ""
if self.filename:
filename_tr = (
'<tr><th colspan="2" class="filename">'
'<span class="filename">' + self.filename + '</span>'
'</th></tr>')
# in case you wonder about the seemingly redundant <div> here: since the
# content in the other cell also is wrapped in a div, some browsers in
# some configurations seem to mess up the formatting...
yield 0, (f'<table class="{self.cssclass}table">' + filename_tr +
'<tr><td class="linenos"><div class="linenodiv"><pre>' +
ls + '</pre></div></td><td class="code">')
yield 0, '<div>'
yield 0, dummyoutfile.getvalue()
yield 0, '</div>'
yield 0, '</td></tr></table>'
def _wrap_inlinelinenos(self, inner):
# need a list of lines since we need the width of a single number :(
inner_lines = list(inner)
sp = self.linenospecial
st = self.linenostep
num = self.linenostart
mw = len(str(len(inner_lines) + num - 1))
anchor_name = self.lineanchors or self.linespans
aln = self.anchorlinenos
nocls = self.noclasses
for _, inner_line in inner_lines:
print_line = num % st == 0
special_line = sp and num % sp == 0
if print_line:
line = '%*d' % (mw, num)
else:
line = ' ' * mw
if nocls:
if special_line:
style = f' style="{self._linenos_special_style}"'
else:
style = f' style="{self._linenos_style}"'
else:
if special_line:
style = ' class="linenos special"'
else:
style = ' class="linenos"'
if style:
linenos = f'<span{style}>{line}</span>'
else:
linenos = line
if aln:
yield 1, ('<a href="#%s-%d">%s</a>' % (anchor_name, num, linenos) +
inner_line)
else:
yield 1, linenos + inner_line
num += 1
def _wrap_lineanchors(self, inner):
s = self.lineanchors
# subtract 1 since we have to increment i *before* yielding
i = self.linenostart - 1
for t, line in inner:
if t:
i += 1
href = "" if self.linenos else ' href="#%s-%d"' % (s, i)
yield 1, '<a id="%s-%d" name="%s-%d"%s></a>' % (s, i, s, i, href) + line
else:
yield 0, line
def _wrap_linespans(self, inner):
s = self.linespans
i = self.linenostart - 1
for t, line in inner:
if t:
i += 1
yield 1, '<span id="%s-%d">%s</span>' % (s, i, line)
else:
yield 0, line
def _wrap_div(self, inner):
style = []
if (self.noclasses and not self.nobackground and
self.style.background_color is not None):
style.append(f'background: {self.style.background_color}')
if self.cssstyles:
style.append(self.cssstyles)
style = '; '.join(style)
yield 0, ('<div' + (self.cssclass and f' class="{self.cssclass}"') +
(style and (f' style="{style}"')) + '>')
yield from inner
yield 0, '</div>\n'
def _wrap_pre(self, inner):
style = []
if self.prestyles:
style.append(self.prestyles)
if self.noclasses:
style.append(self._pre_style)
style = '; '.join(style)
if self.filename and self.linenos != 1:
yield 0, ('<span class="filename">' + self.filename + '</span>')
# the empty span here is to keep leading empty lines from being
# ignored by HTML parsers
yield 0, ('<pre' + (style and f' style="{style}"') + '><span></span>')
yield from inner
yield 0, '</pre>'
def _wrap_code(self, inner):
yield 0, '<code>'
yield from inner
yield 0, '</code>'
@functools.lru_cache(maxsize=100)
def _translate_parts(self, value):
"""HTML-escape a value and split it by newlines."""
return value.translate(_escape_html_table).split('\n')
def _format_lines(self, tokensource):
"""
Just format the tokens, without any wrapping tags.
Yield individual lines.
"""
nocls = self.noclasses
lsep = self.lineseparator
tagsfile = self.tagsfile
lspan = ''
line = []
for ttype, value in tokensource:
try:
cspan = self.span_element_openers[ttype]
except KeyError:
title = ' title="{}"'.format('.'.join(ttype)) if self.debug_token_types else ''
if nocls:
css_style = self._get_css_inline_styles(ttype)
if css_style:
css_style = self.class2style[css_style][0]
cspan = f'<span style="{css_style}"{title}>'
else:
cspan = ''
else:
css_class = self._get_css_classes(ttype)
if css_class:
cspan = f'<span class="{css_class}"{title}>'
else:
cspan = ''
self.span_element_openers[ttype] = cspan
parts = self._translate_parts(value)
if tagsfile and ttype in Token.Name:
filename, linenumber = self._lookup_ctag(value)
if linenumber:
base, filename = os.path.split(filename)
if base:
base += '/'
filename, extension = os.path.splitext(filename)
url = self.tagurlformat % {'path': base, 'fname': filename,
'fext': extension}
parts[0] = "<a href=\"%s#%s-%d\">%s" % \
(url, self.lineanchors, linenumber, parts[0])
parts[-1] = parts[-1] + "</a>"
# for all but the last line
for part in parts[:-1]:
if line:
# Also check for part being non-empty, so we avoid creating
# empty <span> tags
if lspan != cspan and part:
line.extend(((lspan and '</span>'), cspan, part,
(cspan and '</span>'), lsep))
else: # both are the same, or the current part was empty
line.extend((part, (lspan and '</span>'), lsep))
yield 1, ''.join(line)
line = []
elif part:
yield 1, ''.join((cspan, part, (cspan and '</span>'), lsep))
else:
yield 1, lsep
# for the last line
if line and parts[-1]:
if lspan != cspan:
line.extend(((lspan and '</span>'), cspan, parts[-1]))
lspan = cspan
else:
line.append(parts[-1])
elif parts[-1]:
line = [cspan, parts[-1]]
lspan = cspan
# else we neither have to open a new span nor set lspan
if line:
line.extend(((lspan and '</span>'), lsep))
yield 1, ''.join(line)
def _lookup_ctag(self, token):
entry = ctags.TagEntry()
if self._ctags.find(entry, token.encode(), 0):
return entry['file'].decode(), entry['lineNumber']
else:
return None, None
def _highlight_lines(self, tokensource):
"""
Highlighted the lines specified in the `hl_lines` option by
post-processing the token stream coming from `_format_lines`.
"""
hls = self.hl_lines
for i, (t, value) in enumerate(tokensource):
if t != 1:
yield t, value
if i + 1 in hls: # i + 1 because Python indexes start at 0
if self.noclasses:
style = ''
if self.style.highlight_color is not None:
style = (f' style="background-color: {self.style.highlight_color}"')
yield 1, f'<span{style}>{value}</span>'
else:
yield 1, f'<span class="hll">{value}</span>'
else:
yield 1, value
def wrap(self, source):
"""
Wrap the ``source``, which is a generator yielding
individual lines, in custom generators. See docstring
for `format`. Can be overridden.
"""
output = source
if self.wrapcode:
output = self._wrap_code(output)
output = self._wrap_pre(output)
return output
def format_unencoded(self, tokensource, outfile):
"""
The formatting process uses several nested generators; which of
them are used is determined by the user's options.
Each generator should take at least one argument, ``inner``,
and wrap the pieces of text generated by this.
Always yield 2-tuples: (code, text). If "code" is 1, the text
is part of the original tokensource being highlighted, if it's
0, the text is some piece of wrapping. This makes it possible to
use several different wrappers that process the original source
linewise, e.g. line number generators.
"""
source = self._format_lines(tokensource)
# As a special case, we wrap line numbers before line highlighting
# so the line numbers get wrapped in the highlighting tag.
if not self.nowrap and self.linenos == 2:
source = self._wrap_inlinelinenos(source)
if self.hl_lines:
source = self._highlight_lines(source)
if not self.nowrap:
if self.lineanchors:
source = self._wrap_lineanchors(source)
if self.linespans:
source = self._wrap_linespans(source)
source = self.wrap(source)
if self.linenos == 1:
source = self._wrap_tablelinenos(source)
source = self._wrap_div(source)
if self.full:
source = self._wrap_full(source, outfile)
for t, piece in source:
outfile.write(piece)

View File

@ -0,0 +1,685 @@
"""
pygments.formatters.img
~~~~~~~~~~~~~~~~~~~~~~~
Formatter for Pixmap output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import os
import sys
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
get_choice_opt
import subprocess
# Import this carefully
try:
from PIL import Image, ImageDraw, ImageFont
pil_available = True
except ImportError:
pil_available = False
try:
import _winreg
except ImportError:
try:
import winreg as _winreg
except ImportError:
_winreg = None
__all__ = ['ImageFormatter', 'GifImageFormatter', 'JpgImageFormatter',
'BmpImageFormatter']
# For some unknown reason every font calls it something different
STYLES = {
'NORMAL': ['', 'Roman', 'Book', 'Normal', 'Regular', 'Medium'],
'ITALIC': ['Oblique', 'Italic'],
'BOLD': ['Bold'],
'BOLDITALIC': ['Bold Oblique', 'Bold Italic'],
}
# A sane default for modern systems
DEFAULT_FONT_NAME_NIX = 'DejaVu Sans Mono'
DEFAULT_FONT_NAME_WIN = 'Courier New'
DEFAULT_FONT_NAME_MAC = 'Menlo'
class PilNotAvailable(ImportError):
"""When Python imaging library is not available"""
class FontNotFound(Exception):
"""When there are no usable fonts specified"""
class FontManager:
"""
Manages a set of fonts: normal, italic, bold, etc...
"""
def __init__(self, font_name, font_size=14):
self.font_name = font_name
self.font_size = font_size
self.fonts = {}
self.encoding = None
self.variable = False
if hasattr(font_name, 'read') or os.path.isfile(font_name):
font = ImageFont.truetype(font_name, self.font_size)
self.variable = True
for style in STYLES:
self.fonts[style] = font
return
if sys.platform.startswith('win'):
if not font_name:
self.font_name = DEFAULT_FONT_NAME_WIN
self._create_win()
elif sys.platform.startswith('darwin'):
if not font_name:
self.font_name = DEFAULT_FONT_NAME_MAC
self._create_mac()
else:
if not font_name:
self.font_name = DEFAULT_FONT_NAME_NIX
self._create_nix()
def _get_nix_font_path(self, name, style):
proc = subprocess.Popen(['fc-list', f"{name}:style={style}", 'file'],
stdout=subprocess.PIPE, stderr=None)
stdout, _ = proc.communicate()
if proc.returncode == 0:
lines = stdout.splitlines()
for line in lines:
if line.startswith(b'Fontconfig warning:'):
continue
path = line.decode().strip().strip(':')
if path:
return path
return None
def _create_nix(self):
for name in STYLES['NORMAL']:
path = self._get_nix_font_path(self.font_name, name)
if path is not None:
self.fonts['NORMAL'] = ImageFont.truetype(path, self.font_size)
break
else:
raise FontNotFound(f'No usable fonts named: "{self.font_name}"')
for style in ('ITALIC', 'BOLD', 'BOLDITALIC'):
for stylename in STYLES[style]:
path = self._get_nix_font_path(self.font_name, stylename)
if path is not None:
self.fonts[style] = ImageFont.truetype(path, self.font_size)
break
else:
if style == 'BOLDITALIC':
self.fonts[style] = self.fonts['BOLD']
else:
self.fonts[style] = self.fonts['NORMAL']
def _get_mac_font_path(self, font_map, name, style):
return font_map.get((name + ' ' + style).strip().lower())
def _create_mac(self):
font_map = {}
for font_dir in (os.path.join(os.getenv("HOME"), 'Library/Fonts/'),
'/Library/Fonts/', '/System/Library/Fonts/'):
font_map.update(
(os.path.splitext(f)[0].lower(), os.path.join(font_dir, f))
for f in os.listdir(font_dir)
if f.lower().endswith(('ttf', 'ttc')))
for name in STYLES['NORMAL']:
path = self._get_mac_font_path(font_map, self.font_name, name)
if path is not None:
self.fonts['NORMAL'] = ImageFont.truetype(path, self.font_size)
break
else:
raise FontNotFound(f'No usable fonts named: "{self.font_name}"')
for style in ('ITALIC', 'BOLD', 'BOLDITALIC'):
for stylename in STYLES[style]:
path = self._get_mac_font_path(font_map, self.font_name, stylename)
if path is not None:
self.fonts[style] = ImageFont.truetype(path, self.font_size)
break
else:
if style == 'BOLDITALIC':
self.fonts[style] = self.fonts['BOLD']
else:
self.fonts[style] = self.fonts['NORMAL']
def _lookup_win(self, key, basename, styles, fail=False):
for suffix in ('', ' (TrueType)'):
for style in styles:
try:
valname = '{}{}{}'.format(basename, style and ' '+style, suffix)
val, _ = _winreg.QueryValueEx(key, valname)
return val
except OSError:
continue
else:
if fail:
raise FontNotFound(f'Font {basename} ({styles[0]}) not found in registry')
return None
def _create_win(self):
lookuperror = None
keynames = [ (_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows NT\CurrentVersion\Fonts'),
(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Fonts'),
(_winreg.HKEY_LOCAL_MACHINE, r'Software\Microsoft\Windows NT\CurrentVersion\Fonts'),
(_winreg.HKEY_LOCAL_MACHINE, r'Software\Microsoft\Windows\CurrentVersion\Fonts') ]
for keyname in keynames:
try:
key = _winreg.OpenKey(*keyname)
try:
path = self._lookup_win(key, self.font_name, STYLES['NORMAL'], True)
self.fonts['NORMAL'] = ImageFont.truetype(path, self.font_size)
for style in ('ITALIC', 'BOLD', 'BOLDITALIC'):
path = self._lookup_win(key, self.font_name, STYLES[style])
if path:
self.fonts[style] = ImageFont.truetype(path, self.font_size)
else:
if style == 'BOLDITALIC':
self.fonts[style] = self.fonts['BOLD']
else:
self.fonts[style] = self.fonts['NORMAL']
return
except FontNotFound as err:
lookuperror = err
finally:
_winreg.CloseKey(key)
except OSError:
pass
else:
# If we get here, we checked all registry keys and had no luck
# We can be in one of two situations now:
# * All key lookups failed. In this case lookuperror is None and we
# will raise a generic error
# * At least one lookup failed with a FontNotFound error. In this
# case, we will raise that as a more specific error
if lookuperror:
raise lookuperror
raise FontNotFound('Can\'t open Windows font registry key')
def get_char_size(self):
"""
Get the character size.
"""
return self.get_text_size('M')
def get_text_size(self, text):
"""
Get the text size (width, height).
"""
font = self.fonts['NORMAL']
if hasattr(font, 'getbbox'): # Pillow >= 9.2.0
return font.getbbox(text)[2:4]
else:
return font.getsize(text)
def get_font(self, bold, oblique):
"""
Get the font based on bold and italic flags.
"""
if bold and oblique:
if self.variable:
return self.get_style('BOLDITALIC')
return self.fonts['BOLDITALIC']
elif bold:
if self.variable:
return self.get_style('BOLD')
return self.fonts['BOLD']
elif oblique:
if self.variable:
return self.get_style('ITALIC')
return self.fonts['ITALIC']
else:
if self.variable:
return self.get_style('NORMAL')
return self.fonts['NORMAL']
def get_style(self, style):
"""
Get the specified style of the font if it is a variable font.
If not found, return the normal font.
"""
font = self.fonts[style]
for style_name in STYLES[style]:
try:
font.set_variation_by_name(style_name)
return font
except ValueError:
pass
except OSError:
return font
return font
class ImageFormatter(Formatter):
"""
Create a PNG image from source code. This uses the Python Imaging Library to
generate a pixmap from the source code.
.. versionadded:: 0.10
Additional options accepted:
`image_format`
An image format to output to that is recognised by PIL, these include:
* "PNG" (default)
* "JPEG"
* "BMP"
* "GIF"
`line_pad`
The extra spacing (in pixels) between each line of text.
Default: 2
`font_name`
The font name to be used as the base font from which others, such as
bold and italic fonts will be generated. This really should be a
monospace font to look sane.
If a filename or a file-like object is specified, the user must
provide different styles of the font.
Default: "Courier New" on Windows, "Menlo" on Mac OS, and
"DejaVu Sans Mono" on \\*nix
`font_size`
The font size in points to be used.
Default: 14
`image_pad`
The padding, in pixels to be used at each edge of the resulting image.
Default: 10
`line_numbers`
Whether line numbers should be shown: True/False
Default: True
`line_number_start`
The line number of the first line.
Default: 1
`line_number_step`
The step used when printing line numbers.
Default: 1
`line_number_bg`
The background colour (in "#123456" format) of the line number bar, or
None to use the style background color.
Default: "#eed"
`line_number_fg`
The text color of the line numbers (in "#123456"-like format).
Default: "#886"
`line_number_chars`
The number of columns of line numbers allowable in the line number
margin.
Default: 2
`line_number_bold`
Whether line numbers will be bold: True/False
Default: False
`line_number_italic`
Whether line numbers will be italicized: True/False
Default: False
`line_number_separator`
Whether a line will be drawn between the line number area and the
source code area: True/False
Default: True
`line_number_pad`
The horizontal padding (in pixels) between the line number margin, and
the source code area.
Default: 6
`hl_lines`
Specify a list of lines to be highlighted.
.. versionadded:: 1.2
Default: empty list
`hl_color`
Specify the color for highlighting lines.
.. versionadded:: 1.2
Default: highlight color of the selected style
"""
# Required by the pygments mapper
name = 'img'
aliases = ['img', 'IMG', 'png']
filenames = ['*.png']
unicodeoutput = False
default_image_format = 'png'
def __init__(self, **options):
"""
See the class docstring for explanation of options.
"""
if not pil_available:
raise PilNotAvailable(
'Python Imaging Library is required for this formatter')
Formatter.__init__(self, **options)
self.encoding = 'latin1' # let pygments.format() do the right thing
# Read the style
self.styles = dict(self.style)
if self.style.background_color is None:
self.background_color = '#fff'
else:
self.background_color = self.style.background_color
# Image options
self.image_format = get_choice_opt(
options, 'image_format', ['png', 'jpeg', 'gif', 'bmp'],
self.default_image_format, normcase=True)
self.image_pad = get_int_opt(options, 'image_pad', 10)
self.line_pad = get_int_opt(options, 'line_pad', 2)
# The fonts
fontsize = get_int_opt(options, 'font_size', 14)
self.fonts = FontManager(options.get('font_name', ''), fontsize)
self.fontw, self.fonth = self.fonts.get_char_size()
# Line number options
self.line_number_fg = options.get('line_number_fg', '#886')
self.line_number_bg = options.get('line_number_bg', '#eed')
self.line_number_chars = get_int_opt(options,
'line_number_chars', 2)
self.line_number_bold = get_bool_opt(options,
'line_number_bold', False)
self.line_number_italic = get_bool_opt(options,
'line_number_italic', False)
self.line_number_pad = get_int_opt(options, 'line_number_pad', 6)
self.line_numbers = get_bool_opt(options, 'line_numbers', True)
self.line_number_separator = get_bool_opt(options,
'line_number_separator', True)
self.line_number_step = get_int_opt(options, 'line_number_step', 1)
self.line_number_start = get_int_opt(options, 'line_number_start', 1)
if self.line_numbers:
self.line_number_width = (self.fontw * self.line_number_chars +
self.line_number_pad * 2)
else:
self.line_number_width = 0
self.hl_lines = []
hl_lines_str = get_list_opt(options, 'hl_lines', [])
for line in hl_lines_str:
try:
self.hl_lines.append(int(line))
except ValueError:
pass
self.hl_color = options.get('hl_color',
self.style.highlight_color) or '#f90'
self.drawables = []
def get_style_defs(self, arg=''):
raise NotImplementedError('The -S option is meaningless for the image '
'formatter. Use -O style=<stylename> instead.')
def _get_line_height(self):
"""
Get the height of a line.
"""
return self.fonth + self.line_pad
def _get_line_y(self, lineno):
"""
Get the Y coordinate of a line number.
"""
return lineno * self._get_line_height() + self.image_pad
def _get_char_width(self):
"""
Get the width of a character.
"""
return self.fontw
def _get_char_x(self, linelength):
"""
Get the X coordinate of a character position.
"""
return linelength + self.image_pad + self.line_number_width
def _get_text_pos(self, linelength, lineno):
"""
Get the actual position for a character and line position.
"""
return self._get_char_x(linelength), self._get_line_y(lineno)
def _get_linenumber_pos(self, lineno):
"""
Get the actual position for the start of a line number.
"""
return (self.image_pad, self._get_line_y(lineno))
def _get_text_color(self, style):
"""
Get the correct color for the token from the style.
"""
if style['color'] is not None:
fill = '#' + style['color']
else:
fill = '#000'
return fill
def _get_text_bg_color(self, style):
"""
Get the correct background color for the token from the style.
"""
if style['bgcolor'] is not None:
bg_color = '#' + style['bgcolor']
else:
bg_color = None
return bg_color
def _get_style_font(self, style):
"""
Get the correct font for the style.
"""
return self.fonts.get_font(style['bold'], style['italic'])
def _get_image_size(self, maxlinelength, maxlineno):
"""
Get the required image size.
"""
return (self._get_char_x(maxlinelength) + self.image_pad,
self._get_line_y(maxlineno + 0) + self.image_pad)
def _draw_linenumber(self, posno, lineno):
"""
Remember a line number drawable to paint later.
"""
self._draw_text(
self._get_linenumber_pos(posno),
str(lineno).rjust(self.line_number_chars),
font=self.fonts.get_font(self.line_number_bold,
self.line_number_italic),
text_fg=self.line_number_fg,
text_bg=None,
)
def _draw_text(self, pos, text, font, text_fg, text_bg):
"""
Remember a single drawable tuple to paint later.
"""
self.drawables.append((pos, text, font, text_fg, text_bg))
def _create_drawables(self, tokensource):
"""
Create drawables for the token content.
"""
lineno = charno = maxcharno = 0
maxlinelength = linelength = 0
for ttype, value in tokensource:
while ttype not in self.styles:
ttype = ttype.parent
style = self.styles[ttype]
# TODO: make sure tab expansion happens earlier in the chain. It
# really ought to be done on the input, as to do it right here is
# quite complex.
value = value.expandtabs(4)
lines = value.splitlines(True)
# print lines
for i, line in enumerate(lines):
temp = line.rstrip('\n')
if temp:
self._draw_text(
self._get_text_pos(linelength, lineno),
temp,
font = self._get_style_font(style),
text_fg = self._get_text_color(style),
text_bg = self._get_text_bg_color(style),
)
temp_width, _ = self.fonts.get_text_size(temp)
linelength += temp_width
maxlinelength = max(maxlinelength, linelength)
charno += len(temp)
maxcharno = max(maxcharno, charno)
if line.endswith('\n'):
# add a line for each extra line in the value
linelength = 0
charno = 0
lineno += 1
self.maxlinelength = maxlinelength
self.maxcharno = maxcharno
self.maxlineno = lineno
def _draw_line_numbers(self):
"""
Create drawables for the line numbers.
"""
if not self.line_numbers:
return
for p in range(self.maxlineno):
n = p + self.line_number_start
if (n % self.line_number_step) == 0:
self._draw_linenumber(p, n)
def _paint_line_number_bg(self, im):
"""
Paint the line number background on the image.
"""
if not self.line_numbers:
return
if self.line_number_fg is None:
return
draw = ImageDraw.Draw(im)
recth = im.size[-1]
rectw = self.image_pad + self.line_number_width - self.line_number_pad
draw.rectangle([(0, 0), (rectw, recth)],
fill=self.line_number_bg)
if self.line_number_separator:
draw.line([(rectw, 0), (rectw, recth)], fill=self.line_number_fg)
del draw
def format(self, tokensource, outfile):
"""
Format ``tokensource``, an iterable of ``(tokentype, tokenstring)``
tuples and write it into ``outfile``.
This implementation calculates where it should draw each token on the
pixmap, then calculates the required pixmap size and draws the items.
"""
self._create_drawables(tokensource)
self._draw_line_numbers()
im = Image.new(
'RGB',
self._get_image_size(self.maxlinelength, self.maxlineno),
self.background_color
)
self._paint_line_number_bg(im)
draw = ImageDraw.Draw(im)
# Highlight
if self.hl_lines:
x = self.image_pad + self.line_number_width - self.line_number_pad + 1
recth = self._get_line_height()
rectw = im.size[0] - x
for linenumber in self.hl_lines:
y = self._get_line_y(linenumber - 1)
draw.rectangle([(x, y), (x + rectw, y + recth)],
fill=self.hl_color)
for pos, value, font, text_fg, text_bg in self.drawables:
if text_bg:
# see deprecations https://pillow.readthedocs.io/en/stable/releasenotes/9.2.0.html#font-size-and-offset-methods
if hasattr(draw, 'textsize'):
text_size = draw.textsize(text=value, font=font)
else:
text_size = font.getbbox(value)[2:]
draw.rectangle([pos[0], pos[1], pos[0] + text_size[0], pos[1] + text_size[1]], fill=text_bg)
draw.text(pos, value, font=font, fill=text_fg)
im.save(outfile, self.image_format.upper())
# Add one formatter per format, so that the "-f gif" option gives the correct result
# when used in pygmentize.
class GifImageFormatter(ImageFormatter):
"""
Create a GIF image from source code. This uses the Python Imaging Library to
generate a pixmap from the source code.
.. versionadded:: 1.0
"""
name = 'img_gif'
aliases = ['gif']
filenames = ['*.gif']
default_image_format = 'gif'
class JpgImageFormatter(ImageFormatter):
"""
Create a JPEG image from source code. This uses the Python Imaging Library to
generate a pixmap from the source code.
.. versionadded:: 1.0
"""
name = 'img_jpg'
aliases = ['jpg', 'jpeg']
filenames = ['*.jpg']
default_image_format = 'jpeg'
class BmpImageFormatter(ImageFormatter):
"""
Create a bitmap image from source code. This uses the Python Imaging Library to
generate a pixmap from the source code.
.. versionadded:: 1.0
"""
name = 'img_bmp'
aliases = ['bmp', 'bitmap']
filenames = ['*.bmp']
default_image_format = 'bmp'

View File

@ -0,0 +1,154 @@
"""
pygments.formatters.irc
~~~~~~~~~~~~~~~~~~~~~~~
Formatter for IRC output
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.token import Keyword, Name, Comment, String, Error, \
Number, Operator, Generic, Token, Whitespace
from pip._vendor.pygments.util import get_choice_opt
__all__ = ['IRCFormatter']
#: Map token types to a tuple of color values for light and dark
#: backgrounds.
IRC_COLORS = {
Token: ('', ''),
Whitespace: ('gray', 'brightblack'),
Comment: ('gray', 'brightblack'),
Comment.Preproc: ('cyan', 'brightcyan'),
Keyword: ('blue', 'brightblue'),
Keyword.Type: ('cyan', 'brightcyan'),
Operator.Word: ('magenta', 'brightcyan'),
Name.Builtin: ('cyan', 'brightcyan'),
Name.Function: ('green', 'brightgreen'),
Name.Namespace: ('_cyan_', '_brightcyan_'),
Name.Class: ('_green_', '_brightgreen_'),
Name.Exception: ('cyan', 'brightcyan'),
Name.Decorator: ('brightblack', 'gray'),
Name.Variable: ('red', 'brightred'),
Name.Constant: ('red', 'brightred'),
Name.Attribute: ('cyan', 'brightcyan'),
Name.Tag: ('brightblue', 'brightblue'),
String: ('yellow', 'yellow'),
Number: ('blue', 'brightblue'),
Generic.Deleted: ('brightred', 'brightred'),
Generic.Inserted: ('green', 'brightgreen'),
Generic.Heading: ('**', '**'),
Generic.Subheading: ('*magenta*', '*brightmagenta*'),
Generic.Error: ('brightred', 'brightred'),
Error: ('_brightred_', '_brightred_'),
}
IRC_COLOR_MAP = {
'white': 0,
'black': 1,
'blue': 2,
'brightgreen': 3,
'brightred': 4,
'yellow': 5,
'magenta': 6,
'orange': 7,
'green': 7, #compat w/ ansi
'brightyellow': 8,
'lightgreen': 9,
'brightcyan': 9, # compat w/ ansi
'cyan': 10,
'lightblue': 11,
'red': 11, # compat w/ ansi
'brightblue': 12,
'brightmagenta': 13,
'brightblack': 14,
'gray': 15,
}
def ircformat(color, text):
if len(color) < 1:
return text
add = sub = ''
if '_' in color: # italic
add += '\x1D'
sub = '\x1D' + sub
color = color.strip('_')
if '*' in color: # bold
add += '\x02'
sub = '\x02' + sub
color = color.strip('*')
# underline (\x1F) not supported
# backgrounds (\x03FF,BB) not supported
if len(color) > 0: # actual color - may have issues with ircformat("red", "blah")+"10" type stuff
add += '\x03' + str(IRC_COLOR_MAP[color]).zfill(2)
sub = '\x03' + sub
return add + text + sub
return '<'+add+'>'+text+'</'+sub+'>'
class IRCFormatter(Formatter):
r"""
Format tokens with IRC color sequences
The `get_style_defs()` method doesn't do anything special since there is
no support for common styles.
Options accepted:
`bg`
Set to ``"light"`` or ``"dark"`` depending on the terminal's background
(default: ``"light"``).
`colorscheme`
A dictionary mapping token types to (lightbg, darkbg) color names or
``None`` (default: ``None`` = use builtin colorscheme).
`linenos`
Set to ``True`` to have line numbers in the output as well
(default: ``False`` = no line numbers).
"""
name = 'IRC'
aliases = ['irc', 'IRC']
filenames = []
def __init__(self, **options):
Formatter.__init__(self, **options)
self.darkbg = get_choice_opt(options, 'bg',
['light', 'dark'], 'light') == 'dark'
self.colorscheme = options.get('colorscheme', None) or IRC_COLORS
self.linenos = options.get('linenos', False)
self._lineno = 0
def _write_lineno(self, outfile):
if self.linenos:
self._lineno += 1
outfile.write("%04d: " % self._lineno)
def format_unencoded(self, tokensource, outfile):
self._write_lineno(outfile)
for ttype, value in tokensource:
color = self.colorscheme.get(ttype)
while color is None:
ttype = ttype[:-1]
color = self.colorscheme.get(ttype)
if color:
color = color[self.darkbg]
spl = value.split('\n')
for line in spl[:-1]:
if line:
outfile.write(ircformat(color, line))
outfile.write('\n')
self._write_lineno(outfile)
if spl[-1]:
outfile.write(ircformat(color, spl[-1]))
else:
outfile.write(value)

View File

@ -0,0 +1,518 @@
"""
pygments.formatters.latex
~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for LaTeX fancyvrb output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from io import StringIO
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.lexer import Lexer, do_insertions
from pip._vendor.pygments.token import Token, STANDARD_TYPES
from pip._vendor.pygments.util import get_bool_opt, get_int_opt
__all__ = ['LatexFormatter']
def escape_tex(text, commandprefix):
return text.replace('\\', '\x00'). \
replace('{', '\x01'). \
replace('}', '\x02'). \
replace('\x00', rf'\{commandprefix}Zbs{{}}'). \
replace('\x01', rf'\{commandprefix}Zob{{}}'). \
replace('\x02', rf'\{commandprefix}Zcb{{}}'). \
replace('^', rf'\{commandprefix}Zca{{}}'). \
replace('_', rf'\{commandprefix}Zus{{}}'). \
replace('&', rf'\{commandprefix}Zam{{}}'). \
replace('<', rf'\{commandprefix}Zlt{{}}'). \
replace('>', rf'\{commandprefix}Zgt{{}}'). \
replace('#', rf'\{commandprefix}Zsh{{}}'). \
replace('%', rf'\{commandprefix}Zpc{{}}'). \
replace('$', rf'\{commandprefix}Zdl{{}}'). \
replace('-', rf'\{commandprefix}Zhy{{}}'). \
replace("'", rf'\{commandprefix}Zsq{{}}'). \
replace('"', rf'\{commandprefix}Zdq{{}}'). \
replace('~', rf'\{commandprefix}Zti{{}}')
DOC_TEMPLATE = r'''
\documentclass{%(docclass)s}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage[%(encoding)s]{inputenc}
%(preamble)s
%(styledefs)s
\begin{document}
\section*{%(title)s}
%(code)s
\end{document}
'''
## Small explanation of the mess below :)
#
# The previous version of the LaTeX formatter just assigned a command to
# each token type defined in the current style. That obviously is
# problematic if the highlighted code is produced for a different style
# than the style commands themselves.
#
# This version works much like the HTML formatter which assigns multiple
# CSS classes to each <span> tag, from the most specific to the least
# specific token type, thus falling back to the parent token type if one
# is not defined. Here, the classes are there too and use the same short
# forms given in token.STANDARD_TYPES.
#
# Highlighted code now only uses one custom command, which by default is
# \PY and selectable by the commandprefix option (and in addition the
# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
# backwards compatibility purposes).
#
# \PY has two arguments: the classes, separated by +, and the text to
# render in that style. The classes are resolved into the respective
# style commands by magic, which serves to ignore unknown classes.
#
# The magic macros are:
# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
# to render in \PY@do. Their definition determines the style.
# * \PY@reset resets \PY@it etc. to do nothing.
# * \PY@toks parses the list of classes, using magic inspired by the
# keyval package (but modified to use plusses instead of commas
# because fancyvrb redefines commas inside its environments).
# * \PY@tok processes one class, calling the \PY@tok@classname command
# if it exists.
# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
# for its class.
# * \PY resets the style, parses the classnames and then calls \PY@do.
#
# Tip: to read this code, print it out in substituted form using e.g.
# >>> print STYLE_TEMPLATE % {'cp': 'PY'}
STYLE_TEMPLATE = r'''
\makeatletter
\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
\let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
\let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
\%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
\%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}
%(styles)s
\def\%(cp)sZbs{\char`\\}
\def\%(cp)sZus{\char`\_}
\def\%(cp)sZob{\char`\{}
\def\%(cp)sZcb{\char`\}}
\def\%(cp)sZca{\char`\^}
\def\%(cp)sZam{\char`\&}
\def\%(cp)sZlt{\char`\<}
\def\%(cp)sZgt{\char`\>}
\def\%(cp)sZsh{\char`\#}
\def\%(cp)sZpc{\char`\%%}
\def\%(cp)sZdl{\char`\$}
\def\%(cp)sZhy{\char`\-}
\def\%(cp)sZsq{\char`\'}
\def\%(cp)sZdq{\char`\"}
\def\%(cp)sZti{\char`\~}
%% for compatibility with earlier versions
\def\%(cp)sZat{@}
\def\%(cp)sZlb{[}
\def\%(cp)sZrb{]}
\makeatother
'''
def _get_ttype_name(ttype):
fname = STANDARD_TYPES.get(ttype)
if fname:
return fname
aname = ''
while fname is None:
aname = ttype[-1] + aname
ttype = ttype.parent
fname = STANDARD_TYPES.get(ttype)
return fname + aname
class LatexFormatter(Formatter):
r"""
Format tokens as LaTeX code. This needs the `fancyvrb` and `color`
standard packages.
Without the `full` option, code is formatted as one ``Verbatim``
environment, like this:
.. sourcecode:: latex
\begin{Verbatim}[commandchars=\\\{\}]
\PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
\PY{k}{pass}
\end{Verbatim}
Wrapping can be disabled using the `nowrap` option.
The special command used here (``\PY``) and all the other macros it needs
are output by the `get_style_defs` method.
With the `full` option, a complete LaTeX document is output, including
the command definitions in the preamble.
The `get_style_defs()` method of a `LatexFormatter` returns a string
containing ``\def`` commands defining the macros needed inside the
``Verbatim`` environments.
Additional options accepted:
`nowrap`
If set to ``True``, don't wrap the tokens at all, not even inside a
``\begin{Verbatim}`` environment. This disables most other options
(default: ``False``).
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``).
`full`
Tells the formatter to output a "full" document, i.e. a complete
self-contained document (default: ``False``).
`title`
If `full` is true, the title that should be used to caption the
document (default: ``''``).
`docclass`
If the `full` option is enabled, this is the document class to use
(default: ``'article'``).
`preamble`
If the `full` option is enabled, this can be further preamble commands,
e.g. ``\usepackage`` (default: ``''``).
`linenos`
If set to ``True``, output line numbers (default: ``False``).
`linenostart`
The line number for the first line (default: ``1``).
`linenostep`
If set to a number n > 1, only every nth line number is printed.
`verboptions`
Additional options given to the Verbatim environment (see the *fancyvrb*
docs for possible values) (default: ``''``).
`commandprefix`
The LaTeX commands used to produce colored output are constructed
using this prefix and some letters (default: ``'PY'``).
.. versionadded:: 0.7
.. versionchanged:: 0.10
The default is now ``'PY'`` instead of ``'C'``.
`texcomments`
If set to ``True``, enables LaTeX comment lines. That is, LaTex markup
in comment tokens is not escaped so that LaTeX can render it (default:
``False``).
.. versionadded:: 1.2
`mathescape`
If set to ``True``, enables LaTeX math mode escape in comments. That
is, ``'$...$'`` inside a comment will trigger math mode (default:
``False``).
.. versionadded:: 1.2
`escapeinside`
If set to a string of length 2, enables escaping to LaTeX. Text
delimited by these 2 characters is read as LaTeX code and
typeset accordingly. It has no effect in string literals. It has
no effect in comments if `texcomments` or `mathescape` is
set. (default: ``''``).
.. versionadded:: 2.0
`envname`
Allows you to pick an alternative environment name replacing Verbatim.
The alternate environment still has to support Verbatim's option syntax.
(default: ``'Verbatim'``).
.. versionadded:: 2.0
"""
name = 'LaTeX'
aliases = ['latex', 'tex']
filenames = ['*.tex']
def __init__(self, **options):
Formatter.__init__(self, **options)
self.nowrap = get_bool_opt(options, 'nowrap', False)
self.docclass = options.get('docclass', 'article')
self.preamble = options.get('preamble', '')
self.linenos = get_bool_opt(options, 'linenos', False)
self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
self.verboptions = options.get('verboptions', '')
self.nobackground = get_bool_opt(options, 'nobackground', False)
self.commandprefix = options.get('commandprefix', 'PY')
self.texcomments = get_bool_opt(options, 'texcomments', False)
self.mathescape = get_bool_opt(options, 'mathescape', False)
self.escapeinside = options.get('escapeinside', '')
if len(self.escapeinside) == 2:
self.left = self.escapeinside[0]
self.right = self.escapeinside[1]
else:
self.escapeinside = ''
self.envname = options.get('envname', 'Verbatim')
self._create_stylesheet()
def _create_stylesheet(self):
t2n = self.ttype2name = {Token: ''}
c2d = self.cmd2def = {}
cp = self.commandprefix
def rgbcolor(col):
if col:
return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)
for i in (0, 2, 4)])
else:
return '1,1,1'
for ttype, ndef in self.style:
name = _get_ttype_name(ttype)
cmndef = ''
if ndef['bold']:
cmndef += r'\let\$$@bf=\textbf'
if ndef['italic']:
cmndef += r'\let\$$@it=\textit'
if ndef['underline']:
cmndef += r'\let\$$@ul=\underline'
if ndef['roman']:
cmndef += r'\let\$$@ff=\textrm'
if ndef['sans']:
cmndef += r'\let\$$@ff=\textsf'
if ndef['mono']:
cmndef += r'\let\$$@ff=\textsf'
if ndef['color']:
cmndef += (r'\def\$$@tc##1{{\textcolor[rgb]{{{}}}{{##1}}}}'.format(rgbcolor(ndef['color'])))
if ndef['border']:
cmndef += (r'\def\$$@bc##1{{{{\setlength{{\fboxsep}}{{\string -\fboxrule}}'
r'\fcolorbox[rgb]{{{}}}{{{}}}{{\strut ##1}}}}}}'.format(rgbcolor(ndef['border']),
rgbcolor(ndef['bgcolor'])))
elif ndef['bgcolor']:
cmndef += (r'\def\$$@bc##1{{{{\setlength{{\fboxsep}}{{0pt}}'
r'\colorbox[rgb]{{{}}}{{\strut ##1}}}}}}'.format(rgbcolor(ndef['bgcolor'])))
if cmndef == '':
continue
cmndef = cmndef.replace('$$', cp)
t2n[ttype] = name
c2d[name] = cmndef
def get_style_defs(self, arg=''):
"""
Return the command sequences needed to define the commands
used to format text in the verbatim environment. ``arg`` is ignored.
"""
cp = self.commandprefix
styles = []
for name, definition in self.cmd2def.items():
styles.append(rf'\@namedef{{{cp}@tok@{name}}}{{{definition}}}')
return STYLE_TEMPLATE % {'cp': self.commandprefix,
'styles': '\n'.join(styles)}
def format_unencoded(self, tokensource, outfile):
# TODO: add support for background colors
t2n = self.ttype2name
cp = self.commandprefix
if self.full:
realoutfile = outfile
outfile = StringIO()
if not self.nowrap:
outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
if self.linenos:
start, step = self.linenostart, self.linenostep
outfile.write(',numbers=left' +
(start and ',firstnumber=%d' % start or '') +
(step and ',stepnumber=%d' % step or ''))
if self.mathescape or self.texcomments or self.escapeinside:
outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7'
'\\catcode`\\_=8\\relax}')
if self.verboptions:
outfile.write(',' + self.verboptions)
outfile.write(']\n')
for ttype, value in tokensource:
if ttype in Token.Comment:
if self.texcomments:
# Try to guess comment starting lexeme and escape it ...
start = value[0:1]
for i in range(1, len(value)):
if start[0] != value[i]:
break
start += value[i]
value = value[len(start):]
start = escape_tex(start, cp)
# ... but do not escape inside comment.
value = start + value
elif self.mathescape:
# Only escape parts not inside a math environment.
parts = value.split('$')
in_math = False
for i, part in enumerate(parts):
if not in_math:
parts[i] = escape_tex(part, cp)
in_math = not in_math
value = '$'.join(parts)
elif self.escapeinside:
text = value
value = ''
while text:
a, sep1, text = text.partition(self.left)
if sep1:
b, sep2, text = text.partition(self.right)
if sep2:
value += escape_tex(a, cp) + b
else:
value += escape_tex(a + sep1 + b, cp)
else:
value += escape_tex(a, cp)
else:
value = escape_tex(value, cp)
elif ttype not in Token.Escape:
value = escape_tex(value, cp)
styles = []
while ttype is not Token:
try:
styles.append(t2n[ttype])
except KeyError:
# not in current style
styles.append(_get_ttype_name(ttype))
ttype = ttype.parent
styleval = '+'.join(reversed(styles))
if styleval:
spl = value.split('\n')
for line in spl[:-1]:
if line:
outfile.write(f"\\{cp}{{{styleval}}}{{{line}}}")
outfile.write('\n')
if spl[-1]:
outfile.write(f"\\{cp}{{{styleval}}}{{{spl[-1]}}}")
else:
outfile.write(value)
if not self.nowrap:
outfile.write('\\end{' + self.envname + '}\n')
if self.full:
encoding = self.encoding or 'utf8'
# map known existings encodings from LaTeX distribution
encoding = {
'utf_8': 'utf8',
'latin_1': 'latin1',
'iso_8859_1': 'latin1',
}.get(encoding.replace('-', '_'), encoding)
realoutfile.write(DOC_TEMPLATE %
dict(docclass = self.docclass,
preamble = self.preamble,
title = self.title,
encoding = encoding,
styledefs = self.get_style_defs(),
code = outfile.getvalue()))
class LatexEmbeddedLexer(Lexer):
"""
This lexer takes one lexer as argument, the lexer for the language
being formatted, and the left and right delimiters for escaped text.
First everything is scanned using the language lexer to obtain
strings and comments. All other consecutive tokens are merged and
the resulting text is scanned for escaped segments, which are given
the Token.Escape type. Finally text that is not escaped is scanned
again with the language lexer.
"""
def __init__(self, left, right, lang, **options):
self.left = left
self.right = right
self.lang = lang
Lexer.__init__(self, **options)
def get_tokens_unprocessed(self, text):
# find and remove all the escape tokens (replace with an empty string)
# this is very similar to DelegatingLexer.get_tokens_unprocessed.
buffered = ''
insertions = []
insertion_buf = []
for i, t, v in self._find_safe_escape_tokens(text):
if t is None:
if insertion_buf:
insertions.append((len(buffered), insertion_buf))
insertion_buf = []
buffered += v
else:
insertion_buf.append((i, t, v))
if insertion_buf:
insertions.append((len(buffered), insertion_buf))
return do_insertions(insertions,
self.lang.get_tokens_unprocessed(buffered))
def _find_safe_escape_tokens(self, text):
""" find escape tokens that are not in strings or comments """
for i, t, v in self._filter_to(
self.lang.get_tokens_unprocessed(text),
lambda t: t in Token.Comment or t in Token.String
):
if t is None:
for i2, t2, v2 in self._find_escape_tokens(v):
yield i + i2, t2, v2
else:
yield i, None, v
def _filter_to(self, it, pred):
""" Keep only the tokens that match `pred`, merge the others together """
buf = ''
idx = 0
for i, t, v in it:
if pred(t):
if buf:
yield idx, None, buf
buf = ''
yield i, t, v
else:
if not buf:
idx = i
buf += v
if buf:
yield idx, None, buf
def _find_escape_tokens(self, text):
""" Find escape tokens within text, give token=None otherwise """
index = 0
while text:
a, sep1, text = text.partition(self.left)
if a:
yield index, None, a
index += len(a)
if sep1:
b, sep2, text = text.partition(self.right)
if sep2:
yield index + len(sep1), Token.Escape, b
index += len(sep1) + len(b) + len(sep2)
else:
yield index, Token.Error, sep1
index += len(sep1)
text = b

View File

@ -0,0 +1,160 @@
"""
pygments.formatters.other
~~~~~~~~~~~~~~~~~~~~~~~~~
Other formatters: NullFormatter, RawTokenFormatter.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.util import get_choice_opt
from pip._vendor.pygments.token import Token
from pip._vendor.pygments.console import colorize
__all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
class NullFormatter(Formatter):
"""
Output the text unchanged without any formatting.
"""
name = 'Text only'
aliases = ['text', 'null']
filenames = ['*.txt']
def format(self, tokensource, outfile):
enc = self.encoding
for ttype, value in tokensource:
if enc:
outfile.write(value.encode(enc))
else:
outfile.write(value)
class RawTokenFormatter(Formatter):
r"""
Format tokens as a raw representation for storing token streams.
The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
be converted to a token stream with the `RawTokenLexer`, described in the
:doc:`lexer list <lexers>`.
Only two options are accepted:
`compress`
If set to ``'gz'`` or ``'bz2'``, compress the output with the given
compression algorithm after encoding (default: ``''``).
`error_color`
If set to a color name, highlight error tokens using that color. If
set but with no value, defaults to ``'red'``.
.. versionadded:: 0.11
"""
name = 'Raw tokens'
aliases = ['raw', 'tokens']
filenames = ['*.raw']
unicodeoutput = False
def __init__(self, **options):
Formatter.__init__(self, **options)
# We ignore self.encoding if it is set, since it gets set for lexer
# and formatter if given with -Oencoding on the command line.
# The RawTokenFormatter outputs only ASCII. Override here.
self.encoding = 'ascii' # let pygments.format() do the right thing
self.compress = get_choice_opt(options, 'compress',
['', 'none', 'gz', 'bz2'], '')
self.error_color = options.get('error_color', None)
if self.error_color is True:
self.error_color = 'red'
if self.error_color is not None:
try:
colorize(self.error_color, '')
except KeyError:
raise ValueError(f"Invalid color {self.error_color!r} specified")
def format(self, tokensource, outfile):
try:
outfile.write(b'')
except TypeError:
raise TypeError('The raw tokens formatter needs a binary '
'output file')
if self.compress == 'gz':
import gzip
outfile = gzip.GzipFile('', 'wb', 9, outfile)
write = outfile.write
flush = outfile.close
elif self.compress == 'bz2':
import bz2
compressor = bz2.BZ2Compressor(9)
def write(text):
outfile.write(compressor.compress(text))
def flush():
outfile.write(compressor.flush())
outfile.flush()
else:
write = outfile.write
flush = outfile.flush
if self.error_color:
for ttype, value in tokensource:
line = b"%r\t%r\n" % (ttype, value)
if ttype is Token.Error:
write(colorize(self.error_color, line))
else:
write(line)
else:
for ttype, value in tokensource:
write(b"%r\t%r\n" % (ttype, value))
flush()
TESTCASE_BEFORE = '''\
def testNeedsName(lexer):
fragment = %r
tokens = [
'''
TESTCASE_AFTER = '''\
]
assert list(lexer.get_tokens(fragment)) == tokens
'''
class TestcaseFormatter(Formatter):
"""
Format tokens as appropriate for a new testcase.
.. versionadded:: 2.0
"""
name = 'Testcase'
aliases = ['testcase']
def __init__(self, **options):
Formatter.__init__(self, **options)
if self.encoding is not None and self.encoding != 'utf-8':
raise ValueError("Only None and utf-8 are allowed encodings.")
def format(self, tokensource, outfile):
indentation = ' ' * 12
rawbuf = []
outbuf = []
for ttype, value in tokensource:
rawbuf.append(value)
outbuf.append(f'{indentation}({ttype}, {value!r}),\n')
before = TESTCASE_BEFORE % (''.join(rawbuf),)
during = ''.join(outbuf)
after = TESTCASE_AFTER
if self.encoding is None:
outfile.write(before + during + after)
else:
outfile.write(before.encode('utf-8'))
outfile.write(during.encode('utf-8'))
outfile.write(after.encode('utf-8'))
outfile.flush()

View File

@ -0,0 +1,83 @@
"""
pygments.formatters.pangomarkup
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for Pango markup output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pip._vendor.pygments.formatter import Formatter
__all__ = ['PangoMarkupFormatter']
_escape_table = {
ord('&'): '&amp;',
ord('<'): '&lt;',
}
def escape_special_chars(text, table=_escape_table):
"""Escape & and < for Pango Markup."""
return text.translate(table)
class PangoMarkupFormatter(Formatter):
"""
Format tokens as Pango Markup code. It can then be rendered to an SVG.
.. versionadded:: 2.9
"""
name = 'Pango Markup'
aliases = ['pango', 'pangomarkup']
filenames = []
def __init__(self, **options):
Formatter.__init__(self, **options)
self.styles = {}
for token, style in self.style:
start = ''
end = ''
if style['color']:
start += '<span fgcolor="#{}">'.format(style['color'])
end = '</span>' + end
if style['bold']:
start += '<b>'
end = '</b>' + end
if style['italic']:
start += '<i>'
end = '</i>' + end
if style['underline']:
start += '<u>'
end = '</u>' + end
self.styles[token] = (start, end)
def format_unencoded(self, tokensource, outfile):
lastval = ''
lasttype = None
outfile.write('<tt>')
for ttype, value in tokensource:
while ttype not in self.styles:
ttype = ttype.parent
if ttype == lasttype:
lastval += escape_special_chars(value)
else:
if lastval:
stylebegin, styleend = self.styles[lasttype]
outfile.write(stylebegin + lastval + styleend)
lastval = escape_special_chars(value)
lasttype = ttype
if lastval:
stylebegin, styleend = self.styles[lasttype]
outfile.write(stylebegin + lastval + styleend)
outfile.write('</tt>')

View File

@ -0,0 +1,349 @@
"""
pygments.formatters.rtf
~~~~~~~~~~~~~~~~~~~~~~~
A formatter that generates RTF files.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from collections import OrderedDict
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.style import _ansimap
from pip._vendor.pygments.util import get_bool_opt, get_int_opt, get_list_opt, surrogatepair
__all__ = ['RtfFormatter']
class RtfFormatter(Formatter):
"""
Format tokens as RTF markup. This formatter automatically outputs full RTF
documents with color information and other useful stuff. Perfect for Copy and
Paste into Microsoft(R) Word(R) documents.
Please note that ``encoding`` and ``outencoding`` options are ignored.
The RTF format is ASCII natively, but handles unicode characters correctly
thanks to escape sequences.
.. versionadded:: 0.6
Additional options accepted:
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``).
`fontface`
The used font family, for example ``Bitstream Vera Sans``. Defaults to
some generic font which is supposed to have fixed width.
`fontsize`
Size of the font used. Size is specified in half points. The
default is 24 half-points, giving a size 12 font.
.. versionadded:: 2.0
`linenos`
Turn on line numbering (default: ``False``).
.. versionadded:: 2.18
`lineno_fontsize`
Font size for line numbers. Size is specified in half points
(default: `fontsize`).
.. versionadded:: 2.18
`lineno_padding`
Number of spaces between the (inline) line numbers and the
source code (default: ``2``).
.. versionadded:: 2.18
`linenostart`
The line number for the first line (default: ``1``).
.. versionadded:: 2.18
`linenostep`
If set to a number n > 1, only every nth line number is printed.
.. versionadded:: 2.18
`lineno_color`
Color for line numbers specified as a hex triplet, e.g. ``'5e5e5e'``.
Defaults to the style's line number color if it is a hex triplet,
otherwise ansi bright black.
.. versionadded:: 2.18
`hl_lines`
Specify a list of lines to be highlighted, as line numbers separated by
spaces, e.g. ``'3 7 8'``. The line numbers are relative to the input
(i.e. the first line is line 1) unless `hl_linenostart` is set.
.. versionadded:: 2.18
`hl_color`
Color for highlighting the lines specified in `hl_lines`, specified as
a hex triplet (default: style's `highlight_color`).
.. versionadded:: 2.18
`hl_linenostart`
If set to ``True`` line numbers in `hl_lines` are specified
relative to `linenostart` (default ``False``).
.. versionadded:: 2.18
"""
name = 'RTF'
aliases = ['rtf']
filenames = ['*.rtf']
def __init__(self, **options):
r"""
Additional options accepted:
``fontface``
Name of the font used. Could for example be ``'Courier New'``
to further specify the default which is ``'\fmodern'``. The RTF
specification claims that ``\fmodern`` are "Fixed-pitch serif
and sans serif fonts". Hope every RTF implementation thinks
the same about modern...
"""
Formatter.__init__(self, **options)
self.fontface = options.get('fontface') or ''
self.fontsize = get_int_opt(options, 'fontsize', 0)
self.linenos = get_bool_opt(options, 'linenos', False)
self.lineno_fontsize = get_int_opt(options, 'lineno_fontsize',
self.fontsize)
self.lineno_padding = get_int_opt(options, 'lineno_padding', 2)
self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
self.hl_linenostart = get_bool_opt(options, 'hl_linenostart', False)
self.hl_color = options.get('hl_color', '')
if not self.hl_color:
self.hl_color = self.style.highlight_color
self.hl_lines = []
for lineno in get_list_opt(options, 'hl_lines', []):
try:
lineno = int(lineno)
if self.hl_linenostart:
lineno = lineno - self.linenostart + 1
self.hl_lines.append(lineno)
except ValueError:
pass
self.lineno_color = options.get('lineno_color', '')
if not self.lineno_color:
if self.style.line_number_color == 'inherit':
# style color is the css value 'inherit'
# default to ansi bright-black
self.lineno_color = _ansimap['ansibrightblack']
else:
# style color is assumed to be a hex triplet as other
# colors in pygments/style.py
self.lineno_color = self.style.line_number_color
self.color_mapping = self._create_color_mapping()
def _escape(self, text):
return text.replace('\\', '\\\\') \
.replace('{', '\\{') \
.replace('}', '\\}')
def _escape_text(self, text):
# empty strings, should give a small performance improvement
if not text:
return ''
# escape text
text = self._escape(text)
buf = []
for c in text:
cn = ord(c)
if cn < (2**7):
# ASCII character
buf.append(str(c))
elif (2**7) <= cn < (2**16):
# single unicode escape sequence
buf.append('{\\u%d}' % cn)
elif (2**16) <= cn:
# RTF limits unicode to 16 bits.
# Force surrogate pairs
buf.append('{\\u%d}{\\u%d}' % surrogatepair(cn))
return ''.join(buf).replace('\n', '\\par')
@staticmethod
def hex_to_rtf_color(hex_color):
if hex_color[0] == "#":
hex_color = hex_color[1:]
return '\\red%d\\green%d\\blue%d;' % (
int(hex_color[0:2], 16),
int(hex_color[2:4], 16),
int(hex_color[4:6], 16)
)
def _split_tokens_on_newlines(self, tokensource):
"""
Split tokens containing newline characters into multiple token
each representing a line of the input file. Needed for numbering
lines of e.g. multiline comments.
"""
for ttype, value in tokensource:
if value == '\n':
yield (ttype, value)
elif "\n" in value:
lines = value.split("\n")
for line in lines[:-1]:
yield (ttype, line+"\n")
if lines[-1]:
yield (ttype, lines[-1])
else:
yield (ttype, value)
def _create_color_mapping(self):
"""
Create a mapping of style hex colors to index/offset in
the RTF color table.
"""
color_mapping = OrderedDict()
offset = 1
if self.linenos:
color_mapping[self.lineno_color] = offset
offset += 1
if self.hl_lines:
color_mapping[self.hl_color] = offset
offset += 1
for _, style in self.style:
for color in style['color'], style['bgcolor'], style['border']:
if color and color not in color_mapping:
color_mapping[color] = offset
offset += 1
return color_mapping
@property
def _lineno_template(self):
if self.lineno_fontsize != self.fontsize:
return '{{\\fs{} \\cf{} %s{}}}'.format(self.lineno_fontsize,
self.color_mapping[self.lineno_color],
" " * self.lineno_padding)
return '{{\\cf{} %s{}}}'.format(self.color_mapping[self.lineno_color],
" " * self.lineno_padding)
@property
def _hl_open_str(self):
return rf'{{\highlight{self.color_mapping[self.hl_color]} '
@property
def _rtf_header(self):
lines = []
# rtf 1.8 header
lines.append('{\\rtf1\\ansi\\uc0\\deff0'
'{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
% (self.fontface and ' '
+ self._escape(self.fontface) or ''))
# color table
lines.append('{\\colortbl;')
for color, _ in self.color_mapping.items():
lines.append(self.hex_to_rtf_color(color))
lines.append('}')
# font and fontsize
lines.append('\\f0\\sa0')
if self.fontsize:
lines.append('\\fs%d' % self.fontsize)
# ensure Libre Office Writer imports and renders consecutive
# space characters the same width, needed for line numbering.
# https://bugs.documentfoundation.org/show_bug.cgi?id=144050
lines.append('\\dntblnsbdb')
return lines
def format_unencoded(self, tokensource, outfile):
for line in self._rtf_header:
outfile.write(line + "\n")
tokensource = self._split_tokens_on_newlines(tokensource)
# first pass of tokens to count lines, needed for line numbering
if self.linenos:
line_count = 0
tokens = [] # for copying the token source generator
for ttype, value in tokensource:
tokens.append((ttype, value))
if value.endswith("\n"):
line_count += 1
# width of line number strings (for padding with spaces)
linenos_width = len(str(line_count+self.linenostart-1))
tokensource = tokens
# highlight stream
lineno = 1
start_new_line = True
for ttype, value in tokensource:
if start_new_line and lineno in self.hl_lines:
outfile.write(self._hl_open_str)
if start_new_line and self.linenos:
if (lineno-self.linenostart+1)%self.linenostep == 0:
current_lineno = lineno + self.linenostart - 1
lineno_str = str(current_lineno).rjust(linenos_width)
else:
lineno_str = "".rjust(linenos_width)
outfile.write(self._lineno_template % lineno_str)
while not self.style.styles_token(ttype) and ttype.parent:
ttype = ttype.parent
style = self.style.style_for_token(ttype)
buf = []
if style['bgcolor']:
buf.append('\\cb%d' % self.color_mapping[style['bgcolor']])
if style['color']:
buf.append('\\cf%d' % self.color_mapping[style['color']])
if style['bold']:
buf.append('\\b')
if style['italic']:
buf.append('\\i')
if style['underline']:
buf.append('\\ul')
if style['border']:
buf.append('\\chbrdr\\chcfpat%d' %
self.color_mapping[style['border']])
start = ''.join(buf)
if start:
outfile.write(f'{{{start} ')
outfile.write(self._escape_text(value))
if start:
outfile.write('}')
start_new_line = False
# complete line of input
if value.endswith("\n"):
# close line highlighting
if lineno in self.hl_lines:
outfile.write('}')
# newline in RTF file after closing }
outfile.write("\n")
start_new_line = True
lineno += 1
outfile.write('}\n')

View File

@ -0,0 +1,185 @@
"""
pygments.formatters.svg
~~~~~~~~~~~~~~~~~~~~~~~
Formatter for SVG output.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.token import Comment
from pip._vendor.pygments.util import get_bool_opt, get_int_opt
__all__ = ['SvgFormatter']
def escape_html(text):
"""Escape &, <, > as well as single and double quotes for HTML."""
return text.replace('&', '&amp;'). \
replace('<', '&lt;'). \
replace('>', '&gt;'). \
replace('"', '&quot;'). \
replace("'", '&#39;')
class2style = {}
class SvgFormatter(Formatter):
"""
Format tokens as an SVG graphics file. This formatter is still experimental.
Each line of code is a ``<text>`` element with explicit ``x`` and ``y``
coordinates containing ``<tspan>`` elements with the individual token styles.
By default, this formatter outputs a full SVG document including doctype
declaration and the ``<svg>`` root element.
.. versionadded:: 0.9
Additional options accepted:
`nowrap`
Don't wrap the SVG ``<text>`` elements in ``<svg><g>`` elements and
don't add a XML declaration and a doctype. If true, the `fontfamily`
and `fontsize` options are ignored. Defaults to ``False``.
`fontfamily`
The value to give the wrapping ``<g>`` element's ``font-family``
attribute, defaults to ``"monospace"``.
`fontsize`
The value to give the wrapping ``<g>`` element's ``font-size``
attribute, defaults to ``"14px"``.
`linenos`
If ``True``, add line numbers (default: ``False``).
`linenostart`
The line number for the first line (default: ``1``).
`linenostep`
If set to a number n > 1, only every nth line number is printed.
`linenowidth`
Maximum width devoted to line numbers (default: ``3*ystep``, sufficient
for up to 4-digit line numbers. Increase width for longer code blocks).
`xoffset`
Starting offset in X direction, defaults to ``0``.
`yoffset`
Starting offset in Y direction, defaults to the font size if it is given
in pixels, or ``20`` else. (This is necessary since text coordinates
refer to the text baseline, not the top edge.)
`ystep`
Offset to add to the Y coordinate for each subsequent line. This should
roughly be the text size plus 5. It defaults to that value if the text
size is given in pixels, or ``25`` else.
`spacehack`
Convert spaces in the source to ``&#160;``, which are non-breaking
spaces. SVG provides the ``xml:space`` attribute to control how
whitespace inside tags is handled, in theory, the ``preserve`` value
could be used to keep all whitespace as-is. However, many current SVG
viewers don't obey that rule, so this option is provided as a workaround
and defaults to ``True``.
"""
name = 'SVG'
aliases = ['svg']
filenames = ['*.svg']
def __init__(self, **options):
Formatter.__init__(self, **options)
self.nowrap = get_bool_opt(options, 'nowrap', False)
self.fontfamily = options.get('fontfamily', 'monospace')
self.fontsize = options.get('fontsize', '14px')
self.xoffset = get_int_opt(options, 'xoffset', 0)
fs = self.fontsize.strip()
if fs.endswith('px'):
fs = fs[:-2].strip()
try:
int_fs = int(fs)
except ValueError:
int_fs = 20
self.yoffset = get_int_opt(options, 'yoffset', int_fs)
self.ystep = get_int_opt(options, 'ystep', int_fs + 5)
self.spacehack = get_bool_opt(options, 'spacehack', True)
self.linenos = get_bool_opt(options,'linenos',False)
self.linenostart = get_int_opt(options,'linenostart',1)
self.linenostep = get_int_opt(options,'linenostep',1)
self.linenowidth = get_int_opt(options,'linenowidth', 3*self.ystep)
self._stylecache = {}
def format_unencoded(self, tokensource, outfile):
"""
Format ``tokensource``, an iterable of ``(tokentype, tokenstring)``
tuples and write it into ``outfile``.
For our implementation we put all lines in their own 'line group'.
"""
x = self.xoffset
y = self.yoffset
if not self.nowrap:
if self.encoding:
outfile.write(f'<?xml version="1.0" encoding="{self.encoding}"?>\n')
else:
outfile.write('<?xml version="1.0"?>\n')
outfile.write('<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" '
'"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/'
'svg10.dtd">\n')
outfile.write('<svg xmlns="http://www.w3.org/2000/svg">\n')
outfile.write(f'<g font-family="{self.fontfamily}" font-size="{self.fontsize}">\n')
counter = self.linenostart
counter_step = self.linenostep
counter_style = self._get_style(Comment)
line_x = x
if self.linenos:
if counter % counter_step == 0:
outfile.write(f'<text x="{x+self.linenowidth}" y="{y}" {counter_style} text-anchor="end">{counter}</text>')
line_x += self.linenowidth + self.ystep
counter += 1
outfile.write(f'<text x="{line_x}" y="{y}" xml:space="preserve">')
for ttype, value in tokensource:
style = self._get_style(ttype)
tspan = style and '<tspan' + style + '>' or ''
tspanend = tspan and '</tspan>' or ''
value = escape_html(value)
if self.spacehack:
value = value.expandtabs().replace(' ', '&#160;')
parts = value.split('\n')
for part in parts[:-1]:
outfile.write(tspan + part + tspanend)
y += self.ystep
outfile.write('</text>\n')
if self.linenos and counter % counter_step == 0:
outfile.write(f'<text x="{x+self.linenowidth}" y="{y}" text-anchor="end" {counter_style}>{counter}</text>')
counter += 1
outfile.write(f'<text x="{line_x}" y="{y}" ' 'xml:space="preserve">')
outfile.write(tspan + parts[-1] + tspanend)
outfile.write('</text>')
if not self.nowrap:
outfile.write('</g></svg>\n')
def _get_style(self, tokentype):
if tokentype in self._stylecache:
return self._stylecache[tokentype]
otokentype = tokentype
while not self.style.styles_token(tokentype):
tokentype = tokentype.parent
value = self.style.style_for_token(tokentype)
result = ''
if value['color']:
result = ' fill="#' + value['color'] + '"'
if value['bold']:
result += ' font-weight="bold"'
if value['italic']:
result += ' font-style="italic"'
self._stylecache[otokentype] = result
return result

View File

@ -0,0 +1,127 @@
"""
pygments.formatters.terminal
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for terminal output with ANSI sequences.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.token import Keyword, Name, Comment, String, Error, \
Number, Operator, Generic, Token, Whitespace
from pip._vendor.pygments.console import ansiformat
from pip._vendor.pygments.util import get_choice_opt
__all__ = ['TerminalFormatter']
#: Map token types to a tuple of color values for light and dark
#: backgrounds.
TERMINAL_COLORS = {
Token: ('', ''),
Whitespace: ('gray', 'brightblack'),
Comment: ('gray', 'brightblack'),
Comment.Preproc: ('cyan', 'brightcyan'),
Keyword: ('blue', 'brightblue'),
Keyword.Type: ('cyan', 'brightcyan'),
Operator.Word: ('magenta', 'brightmagenta'),
Name.Builtin: ('cyan', 'brightcyan'),
Name.Function: ('green', 'brightgreen'),
Name.Namespace: ('_cyan_', '_brightcyan_'),
Name.Class: ('_green_', '_brightgreen_'),
Name.Exception: ('cyan', 'brightcyan'),
Name.Decorator: ('brightblack', 'gray'),
Name.Variable: ('red', 'brightred'),
Name.Constant: ('red', 'brightred'),
Name.Attribute: ('cyan', 'brightcyan'),
Name.Tag: ('brightblue', 'brightblue'),
String: ('yellow', 'yellow'),
Number: ('blue', 'brightblue'),
Generic.Deleted: ('brightred', 'brightred'),
Generic.Inserted: ('green', 'brightgreen'),
Generic.Heading: ('**', '**'),
Generic.Subheading: ('*magenta*', '*brightmagenta*'),
Generic.Prompt: ('**', '**'),
Generic.Error: ('brightred', 'brightred'),
Error: ('_brightred_', '_brightred_'),
}
class TerminalFormatter(Formatter):
r"""
Format tokens with ANSI color sequences, for output in a text console.
Color sequences are terminated at newlines, so that paging the output
works correctly.
The `get_style_defs()` method doesn't do anything special since there is
no support for common styles.
Options accepted:
`bg`
Set to ``"light"`` or ``"dark"`` depending on the terminal's background
(default: ``"light"``).
`colorscheme`
A dictionary mapping token types to (lightbg, darkbg) color names or
``None`` (default: ``None`` = use builtin colorscheme).
`linenos`
Set to ``True`` to have line numbers on the terminal output as well
(default: ``False`` = no line numbers).
"""
name = 'Terminal'
aliases = ['terminal', 'console']
filenames = []
def __init__(self, **options):
Formatter.__init__(self, **options)
self.darkbg = get_choice_opt(options, 'bg',
['light', 'dark'], 'light') == 'dark'
self.colorscheme = options.get('colorscheme', None) or TERMINAL_COLORS
self.linenos = options.get('linenos', False)
self._lineno = 0
def format(self, tokensource, outfile):
return Formatter.format(self, tokensource, outfile)
def _write_lineno(self, outfile):
self._lineno += 1
outfile.write("%s%04d: " % (self._lineno != 1 and '\n' or '', self._lineno))
def _get_color(self, ttype):
# self.colorscheme is a dict containing usually generic types, so we
# have to walk the tree of dots. The base Token type must be a key,
# even if it's empty string, as in the default above.
colors = self.colorscheme.get(ttype)
while colors is None:
ttype = ttype.parent
colors = self.colorscheme.get(ttype)
return colors[self.darkbg]
def format_unencoded(self, tokensource, outfile):
if self.linenos:
self._write_lineno(outfile)
for ttype, value in tokensource:
color = self._get_color(ttype)
for line in value.splitlines(True):
if color:
outfile.write(ansiformat(color, line.rstrip('\n')))
else:
outfile.write(line.rstrip('\n'))
if line.endswith('\n'):
if self.linenos:
self._write_lineno(outfile)
else:
outfile.write('\n')
if self.linenos:
outfile.write("\n")

View File

@ -0,0 +1,338 @@
"""
pygments.formatters.terminal256
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter for 256-color terminal output with ANSI sequences.
RGB-to-XTERM color conversion routines adapted from xterm256-conv
tool (http://frexx.de/xterm-256-notes/data/xterm256-conv2.tar.bz2)
by Wolfgang Frisch.
Formatter version 1.
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
# TODO:
# - Options to map style's bold/underline/italic/border attributes
# to some ANSI attrbutes (something like 'italic=underline')
# - An option to output "style RGB to xterm RGB/index" conversion table
# - An option to indicate that we are running in "reverse background"
# xterm. This means that default colors are white-on-black, not
# black-on-while, so colors like "white background" need to be converted
# to "white background, black foreground", etc...
from pip._vendor.pygments.formatter import Formatter
from pip._vendor.pygments.console import codes
from pip._vendor.pygments.style import ansicolors
__all__ = ['Terminal256Formatter', 'TerminalTrueColorFormatter']
class EscapeSequence:
def __init__(self, fg=None, bg=None, bold=False, underline=False, italic=False):
self.fg = fg
self.bg = bg
self.bold = bold
self.underline = underline
self.italic = italic
def escape(self, attrs):
if len(attrs):
return "\x1b[" + ";".join(attrs) + "m"
return ""
def color_string(self):
attrs = []
if self.fg is not None:
if self.fg in ansicolors:
esc = codes[self.fg.replace('ansi','')]
if ';01m' in esc:
self.bold = True
# extract fg color code.
attrs.append(esc[2:4])
else:
attrs.extend(("38", "5", "%i" % self.fg))
if self.bg is not None:
if self.bg in ansicolors:
esc = codes[self.bg.replace('ansi','')]
# extract fg color code, add 10 for bg.
attrs.append(str(int(esc[2:4])+10))
else:
attrs.extend(("48", "5", "%i" % self.bg))
if self.bold:
attrs.append("01")
if self.underline:
attrs.append("04")
if self.italic:
attrs.append("03")
return self.escape(attrs)
def true_color_string(self):
attrs = []
if self.fg:
attrs.extend(("38", "2", str(self.fg[0]), str(self.fg[1]), str(self.fg[2])))
if self.bg:
attrs.extend(("48", "2", str(self.bg[0]), str(self.bg[1]), str(self.bg[2])))
if self.bold:
attrs.append("01")
if self.underline:
attrs.append("04")
if self.italic:
attrs.append("03")
return self.escape(attrs)
def reset_string(self):
attrs = []
if self.fg is not None:
attrs.append("39")
if self.bg is not None:
attrs.append("49")
if self.bold or self.underline or self.italic:
attrs.append("00")
return self.escape(attrs)
class Terminal256Formatter(Formatter):
"""
Format tokens with ANSI color sequences, for output in a 256-color
terminal or console. Like in `TerminalFormatter` color sequences
are terminated at newlines, so that paging the output works correctly.
The formatter takes colors from a style defined by the `style` option
and converts them to nearest ANSI 256-color escape sequences. Bold and
underline attributes from the style are preserved (and displayed).
.. versionadded:: 0.9
.. versionchanged:: 2.2
If the used style defines foreground colors in the form ``#ansi*``, then
`Terminal256Formatter` will map these to non extended foreground color.
See :ref:`AnsiTerminalStyle` for more information.
.. versionchanged:: 2.4
The ANSI color names have been updated with names that are easier to
understand and align with colornames of other projects and terminals.
See :ref:`this table <new-ansi-color-names>` for more information.
Options accepted:
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``).
`linenos`
Set to ``True`` to have line numbers on the terminal output as well
(default: ``False`` = no line numbers).
"""
name = 'Terminal256'
aliases = ['terminal256', 'console256', '256']
filenames = []
def __init__(self, **options):
Formatter.__init__(self, **options)
self.xterm_colors = []
self.best_match = {}
self.style_string = {}
self.usebold = 'nobold' not in options
self.useunderline = 'nounderline' not in options
self.useitalic = 'noitalic' not in options
self._build_color_table() # build an RGB-to-256 color conversion table
self._setup_styles() # convert selected style's colors to term. colors
self.linenos = options.get('linenos', False)
self._lineno = 0
def _build_color_table(self):
# colors 0..15: 16 basic colors
self.xterm_colors.append((0x00, 0x00, 0x00)) # 0
self.xterm_colors.append((0xcd, 0x00, 0x00)) # 1
self.xterm_colors.append((0x00, 0xcd, 0x00)) # 2
self.xterm_colors.append((0xcd, 0xcd, 0x00)) # 3
self.xterm_colors.append((0x00, 0x00, 0xee)) # 4
self.xterm_colors.append((0xcd, 0x00, 0xcd)) # 5
self.xterm_colors.append((0x00, 0xcd, 0xcd)) # 6
self.xterm_colors.append((0xe5, 0xe5, 0xe5)) # 7
self.xterm_colors.append((0x7f, 0x7f, 0x7f)) # 8
self.xterm_colors.append((0xff, 0x00, 0x00)) # 9
self.xterm_colors.append((0x00, 0xff, 0x00)) # 10
self.xterm_colors.append((0xff, 0xff, 0x00)) # 11
self.xterm_colors.append((0x5c, 0x5c, 0xff)) # 12
self.xterm_colors.append((0xff, 0x00, 0xff)) # 13
self.xterm_colors.append((0x00, 0xff, 0xff)) # 14
self.xterm_colors.append((0xff, 0xff, 0xff)) # 15
# colors 16..232: the 6x6x6 color cube
valuerange = (0x00, 0x5f, 0x87, 0xaf, 0xd7, 0xff)
for i in range(217):
r = valuerange[(i // 36) % 6]
g = valuerange[(i // 6) % 6]
b = valuerange[i % 6]
self.xterm_colors.append((r, g, b))
# colors 233..253: grayscale
for i in range(1, 22):
v = 8 + i * 10
self.xterm_colors.append((v, v, v))
def _closest_color(self, r, g, b):
distance = 257*257*3 # "infinity" (>distance from #000000 to #ffffff)
match = 0
for i in range(0, 254):
values = self.xterm_colors[i]
rd = r - values[0]
gd = g - values[1]
bd = b - values[2]
d = rd*rd + gd*gd + bd*bd
if d < distance:
match = i
distance = d
return match
def _color_index(self, color):
index = self.best_match.get(color, None)
if color in ansicolors:
# strip the `ansi/#ansi` part and look up code
index = color
self.best_match[color] = index
if index is None:
try:
rgb = int(str(color), 16)
except ValueError:
rgb = 0
r = (rgb >> 16) & 0xff
g = (rgb >> 8) & 0xff
b = rgb & 0xff
index = self._closest_color(r, g, b)
self.best_match[color] = index
return index
def _setup_styles(self):
for ttype, ndef in self.style:
escape = EscapeSequence()
# get foreground from ansicolor if set
if ndef['ansicolor']:
escape.fg = self._color_index(ndef['ansicolor'])
elif ndef['color']:
escape.fg = self._color_index(ndef['color'])
if ndef['bgansicolor']:
escape.bg = self._color_index(ndef['bgansicolor'])
elif ndef['bgcolor']:
escape.bg = self._color_index(ndef['bgcolor'])
if self.usebold and ndef['bold']:
escape.bold = True
if self.useunderline and ndef['underline']:
escape.underline = True
if self.useitalic and ndef['italic']:
escape.italic = True
self.style_string[str(ttype)] = (escape.color_string(),
escape.reset_string())
def _write_lineno(self, outfile):
self._lineno += 1
outfile.write("%s%04d: " % (self._lineno != 1 and '\n' or '', self._lineno))
def format(self, tokensource, outfile):
return Formatter.format(self, tokensource, outfile)
def format_unencoded(self, tokensource, outfile):
if self.linenos:
self._write_lineno(outfile)
for ttype, value in tokensource:
not_found = True
while ttype and not_found:
try:
# outfile.write( "<" + str(ttype) + ">" )
on, off = self.style_string[str(ttype)]
# Like TerminalFormatter, add "reset colors" escape sequence
# on newline.
spl = value.split('\n')
for line in spl[:-1]:
if line:
outfile.write(on + line + off)
if self.linenos:
self._write_lineno(outfile)
else:
outfile.write('\n')
if spl[-1]:
outfile.write(on + spl[-1] + off)
not_found = False
# outfile.write( '#' + str(ttype) + '#' )
except KeyError:
# ottype = ttype
ttype = ttype.parent
# outfile.write( '!' + str(ottype) + '->' + str(ttype) + '!' )
if not_found:
outfile.write(value)
if self.linenos:
outfile.write("\n")
class TerminalTrueColorFormatter(Terminal256Formatter):
r"""
Format tokens with ANSI color sequences, for output in a true-color
terminal or console. Like in `TerminalFormatter` color sequences
are terminated at newlines, so that paging the output works correctly.
.. versionadded:: 2.1
Options accepted:
`style`
The style to use, can be a string or a Style subclass (default:
``'default'``).
"""
name = 'TerminalTrueColor'
aliases = ['terminal16m', 'console16m', '16m']
filenames = []
def _build_color_table(self):
pass
def _color_tuple(self, color):
try:
rgb = int(str(color), 16)
except ValueError:
return None
r = (rgb >> 16) & 0xff
g = (rgb >> 8) & 0xff
b = rgb & 0xff
return (r, g, b)
def _setup_styles(self):
for ttype, ndef in self.style:
escape = EscapeSequence()
if ndef['color']:
escape.fg = self._color_tuple(ndef['color'])
if ndef['bgcolor']:
escape.bg = self._color_tuple(ndef['bgcolor'])
if self.usebold and ndef['bold']:
escape.bold = True
if self.useunderline and ndef['underline']:
escape.underline = True
if self.useitalic and ndef['italic']:
escape.italic = True
self.style_string[str(ttype)] = (escape.true_color_string(),
escape.reset_string())

Some files were not shown because too many files have changed in this diff Show More