dotfiles/vscode/.vscode/extensions/ms-python.vscode-pylance-2024.7.1/dist/stub-generation/scrape_module.py
Errol Sancaktar 5f8db31398 alacritty
2024-07-15 17:06:13 -06:00

1328 lines
42 KiB
Python

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# WARNING: DO run scraper from 32-bit command line or else output may be UTF-16.
#
# Scraping most modules does not require search path. Example:
# c:\python3\python -W ignore -B -E scrape_module.py lxml.etree > etree.pyi
#
# However, some compiled modules do need explicit search path. For example, cv2 is
# actually compiled 'cv2.cp36-win_amd64.pyd' under 'site-packages/cv2'. Thus the
# compiled module is actually cv2.cv2 (which cv2 imports via *). So for the scraped
# stub to work cv2.cv2 has to be scraped but stored in 'native-stubs' as cv2.
#
# Scraping then requires explicit search path to local cv2.cv2.
# c:\python3\python -W ignore -B -E scrape_module.py cv2.cv2 C:\Python3\Lib\site-packages > cv2.pyi
#
import ast
import builtins
import importlib
import inspect
import io
import keyword
import sys
import tokenize
import warnings
import re
if sys.version_info[0] < 3:
raise Exception("Python 2 is unsupported")
class InspectWarning(UserWarning):
pass
def get_module_version(module):
try:
version = getattr(module, "__version__")
if isinstance(version, bytes):
return version.decode()
else:
return version
except AttributeError:
return "unspecified"
def print_module_version(module, out):
module_name = getattr(module, "__name__")
module_version = get_module_version(module)
library_name = module_name.split(".")[0]
if library_name == module_name:
package_name = getattr(module, "__package__", None)
if package_name:
library_name = package_name.split(".")[0]
library = importlib.import_module(library_name)
library_version = get_module_version(library)
print(
"# Python: "
+ sys.version
+ "\n# Library: "
+ library_name
+ ", version: "
+ library_version
+ "\n# Module: "
+ module_name
+ ", version: "
+ module_version,
file=out
)
def can_eval(s):
"""Returns True if the string can be evaluated."""
if not s:
return False
try:
ast.parse(s, mode="eval")
except SyntaxError:
return False
else:
return True
def is_callable(v):
"""Returns True if v has __call__."""
try:
return hasattr(v, "__call__")
except Exception:
return False
def safe_module_name(n):
"""Returns a module name which should not conflict with any other symbol."""
if n:
return "_mod_" + n.replace(".", "_")
return n
def do_not_inspect(v):
"""Returns True if this value should not be inspected due to potential bugs."""
# https://github.com/Microsoft/python-language-server/issues/740
# https://github.com/cython/cython/issues/1470
if type(v).__name__ != "fused_cython_function":
return False
# If a fused function has __defaults__, then attempting to access
# __kwdefaults__ will fail if generated before cython 0.29.6.
return bool(getattr(v, "__defaults__", False))
class SeenNames(object):
"""Tracks unique names."""
def __init__(self, s=None):
self.seen = set() if s is None else s.copy()
def make_unique(self, name):
if name not in self.seen:
self.seen.add(name)
return name
n = name + "_"
if n not in self.seen:
self.seen.add(n)
return n
i = 0
while True:
i += 1
n = name + "_" + str(i)
if n not in self.seen:
self.seen.add(n)
return n
raise RuntimeError("Unreachable")
class DefaultRepr(object):
"""
A value whose repr is an exact string representation.
For example, ``DefaultRepr("...")`` would be printed in
an inspect.Parameter exactly as ``...``, whereas actually
using ``...`` as a default value would print as ``Ellipsis``.
"""
def __init__(self, v):
self.v = v
def __repr__(self):
return self.v
ELLIPSIS_DEFAULT = DefaultRepr("...")
try:
# Fragile; this member isn't officially documented.
EXACT_TOKEN_TYPES = tokenize.EXACT_TOKEN_TYPES
except AttributeError:
# Bare minimum that we need here
EXACT_TOKEN_TYPES = {
"(": tokenize.LPAR,
")": tokenize.RPAR,
"[": tokenize.LSQB,
"]": tokenize.RSQB,
"{": tokenize.LBRACE,
"}": tokenize.RBRACE,
",": tokenize.COMMA,
":": tokenize.COLON,
"*": tokenize.STAR,
"**": tokenize.DOUBLESTAR,
"=": tokenize.EQUAL,
}
PAREN_TOKEN_MAP = {
tokenize.LPAR: tokenize.RPAR,
tokenize.LBRACE: tokenize.RBRACE,
tokenize.LSQB: tokenize.RSQB,
}
class DocstringSigParser(object):
"""Spooky docstring parsing logic."""
def __init__(self, callable, expected_name, defaults=None):
self.callable = callable
self.name = expected_name
self._defaults = defaults
def restype(self):
doc = getattr(self.callable, "__doc__", None)
if not isinstance(doc, str):
return None
doc = doc.lstrip()
first_line = doc.partition("\n")[0].strip()
if not "->" in first_line:
return None
index = first_line.index("->")
typeName = first_line[index + 2 :].strip()
if typeName.startswith("str"):
return "str"
if typeName.startswith("float"):
return "float"
if typeName.startswith("int"):
return "int"
if typeName.startswith("long"):
return "int"
if typeName.startswith("list"):
return "typing.List[typing.Any]"
if typeName.startswith("dict"):
return "typing.Dict[typing.Any, typing.Any]"
if typeName.startswith("("):
return "typing.Tuple[typing.Any, ...]"
if typeName.startswith("bool"):
return "bool"
if "Return a string" in first_line:
return "str"
return None
def argspec(self, doc=None, override_name=None):
if not doc:
doc = getattr(self.callable, "__doc__", None)
if not isinstance(doc, str):
return None
doc = doc.lstrip()
# TODO: Support overloads by reading multiple lines?
doc = self._get_first_function_call(doc, override_name)
if not doc:
return None
if override_name:
allow_name_mismatch = override_name not in doc
else:
allow_name_mismatch = False
return self._parse_funcdef(
doc, allow_name_mismatch, self._defaults, override_name
)
def _tokenize(self, expr):
if sys.version_info[0] == 3 and sys.version_info[1] <= 2:
expr = "# coding: utf-8\n" + expr
buf = io.BytesIO(expr.strip().encode("utf-8"))
tokens = tokenize.tokenize(buf.readline)
return [
(EXACT_TOKEN_TYPES.get(s, tt) if tt == tokenize.OP else tt, s)
for tt, s, _, _, _ in tokens
]
def _parse_take_expr(self, tokens, *stop_at):
nesting = []
expr = []
while tokens:
tt, s = tokens[0]
if tt == tokenize.LSQB and len(tokens) > 2 and tokens[1][0] in stop_at:
return expr
if tt in PAREN_TOKEN_MAP:
expr.append((tt, s))
nesting.append(PAREN_TOKEN_MAP[tt])
elif nesting and tt == nesting[-1]:
expr.append((tt, s))
nesting.pop()
elif tt in (tokenize.RPAR, tokenize.RBRACE, tokenize.RSQB):
return expr
elif not nesting and tt in stop_at:
return expr
else:
expr.append((tt, s))
tokens.pop(0)
return expr
def _parse_format_arg(self, name, args, defaults):
defaults = list(defaults)
default_set = set(defaults)
seen_names = SeenNames(INVALID_ARGNAMES)
parts = [name or "<function>", "("]
arg_parts = []
any_default = False
for a_names, a_ann, a_def, a_opt in args:
if not a_names:
continue
a_name = "".join(a_names)
if a_name in default_set:
default_set.discard(a_name)
arg_parts.append(seen_names.make_unique(a_name))
if can_eval("".join(a_ann)):
# TODO: Fix unqualified typing annotations, rather than omitting them.
# arg_parts.append(": ")
# arg_parts.extend(a_ann)
pass
if can_eval("".join(a_def)):
arg_parts.append("=")
# arg_parts.extend(a_def)
arg_parts.extend("...")
any_default = True
elif a_opt[0] or (any_default and "*" not in a_name and "**" not in a_name):
# arg_parts.append("=None")
arg_parts.append("=...")
any_default = True
if a_name.startswith("*"):
any_default = True
arg_parts.append(", ")
if default_set:
for a in defaults:
if a in default_set:
parts.append(a)
parts.append(", ")
parts.extend(arg_parts)
if parts[-1] == ", ":
parts.pop()
if parts and parts[-1] in ("*", "**"):
parts[-1] += seen_names.make_unique("_")
parts.append(")")
return "".join(parts)
def _parse_funcdef(self, expr, allow_name_mismatch, defaults, override_name=None):
"""Takes a call expression that was part of a docstring
and parses the AST as if it were a definition. If the parsed
AST matches the callable we are wrapping, returns the node.
"""
try:
tokens = self._tokenize(expr)
except (TypeError, tokenize.TokenError):
warnings.warn("failed to tokenize " + expr, InspectWarning)
return None
name = None
seen_open_paren = False
args = [([], [], [], [False])]
optional = False
while tokens:
tt, s = tokens.pop(0)
if tt == tokenize.NAME:
if override_name is not None and s == override_name:
name = s
if name is None:
name = s
elif seen_open_paren:
args[-1][0].append(s)
args[-1][3][0] = optional
elif tt in (tokenize.STAR, tokenize.DOUBLESTAR):
args[-1][0].append(s)
elif tt == tokenize.COLON:
e = self._parse_take_expr(tokens, tokenize.EQUAL, tokenize.COMMA)
args[-1][1].append("".join(i[1] for i in e))
elif tt == tokenize.EQUAL:
if not seen_open_paren:
name = None
continue
e = self._parse_take_expr(tokens, tokenize.COMMA)
args[-1][2].append("".join(i[1] for i in e))
elif tt == tokenize.COMMA:
args.append(([], [], [], [False]))
elif tt == tokenize.LSQB:
optional = True
elif tt == tokenize.RSQB:
optional = False
elif tt == tokenize.LPAR:
seen_open_paren = True
elif tt == tokenize.RPAR:
break
elif s in ("->", "..."):
return None
# TODO: Handle '/', the positional-only argument separator, when stubs support them.
if name and (allow_name_mismatch or name == self.name):
return self._parse_format_arg(override_name or name, args, defaults)
def _get_first_function_call(self, expr: str, name: str):
"""Scans the string for the first closing parenthesis,
handling nesting, which is the best heuristic we have for
an example call at the start of the docstring."""
# Note: line may or may not contain complete (...) and closing ')' may be on another line.
# We also prevent going too far into the expression so it does not pick random x() in comments.
if "\n\n" not in expr and name not in expr:
return None
expr = expr.split("\n\n")[0]
if not expr or ")" not in expr:
return None
found = []
n = 0
expr = expr.replace("\r", " ").replace("\n", " ").replace("\t", " ")
# See whether string before open paren is valid.
openParenIndex = expr.find("(")
if openParenIndex < 0:
return None
header = expr[:openParenIndex].strip()
tokens = header.split(" ")
tokenLength = len(tokens)
if tokenLength == 0:
# Nothing before "("
return None
if not tokens[tokenLength - 1].isidentifier() and name not in tokens[tokenLength - 1]:
# Token before "(" is not valid identifier.
return None
if tokenLength > 1 and tokens[tokenLength - 2].isidentifier():
# 2 consecutive words separated by a space. probably not a function call.
return None
expr = expr.replace(" ", "")
for i, c in enumerate(expr):
if c == ")":
n -= 1
if n == 0:
return expr[: i + 1]
elif c == "(":
n += 1
return None
SKIP_TYPENAME_FOR_TYPES = bool, str, bytes, int, float
STATICMETHOD_TYPES = ()
CLASSMETHOD_TYPES = (type(float.fromhex),)
PROPERTY_TYPES = type(int.real), type(property.fget)
INVALID_ARGNAMES = set(keyword.kwlist)
# These full names are known to be lies. When we encounter
# them while scraping a module, assume that we need to write
# out the full type rather than including them by reference.
# TODO: Which of these are still needed?
LIES_ABOUT_MODULE = frozenset(
[
builtins.__name__ + ".weakcallableproxy",
builtins.__name__ + ".weakproxy",
builtins.__name__ + ".weakref",
"ctypes.ArgumentError",
"os.stat_result",
"os.statvfs_result",
"xml.parsers.expat.ExpatError",
"numpy.broadcast",
"numpy.busdaycalendar",
"numpy.dtype",
"numpy.flagsobj",
"numpy.flatiter",
"numpy.ndarray",
"numpy.nditer",
# These modules contain multiple members that lie about their
# module. Always write out all members of these in full.
"_asyncio.*",
"_bsddb.*",
"_decimal.*",
"_elementtree.*",
"_socket.*",
"_sqlite3.*",
"_ssl.*",
"_testmultiphase.*",
]
)
# These symbols have decls but doc strings are not on them.
# Make sure we write them down on scraped file.
MUST_EMIT_DOCSTRINGS = frozenset(
[
"_collections.defaultdict",
"_collections.deque",
]
)
# These type names cause conflicts with their values, so
# we need to forcibly rename them.
# TODO: Which of these are still needed?
SYS_INFO_TYPES = frozenset(
(
"float_info",
"hash_info",
"int_info",
"thread_info",
"version_info",
)
)
VALUE_REPR_FIX = {
float("inf"): "float('inf')",
float("-inf"): "float('-inf')",
}
IMPLICIT_CLASSMETHOD = ("__new__",)
# TODO: Canonicalize internal storage as an inspect.Signature.
class Signature(object):
KNOWN_RESTYPES = {
"__abs__": "__T__",
"__add__": "__T__",
"__and__": "__T__",
"__annotations__": "typing.Dict[str, typing.Any]",
"__base__": "type",
"__bases__": "typing.Tuple[type, ...]",
"__bool__": "bool",
"__call__": "typing.Any",
"__ceil__": "__T__",
"__code__": "types.CodeType",
"__contains__": "bool",
"__del__": "None",
"__delattr__": "None",
"__delitem__": "None",
"__dict__": "typing.Dict[str, typing.Any]",
"__dir__": "typing.Iterable[str]",
"__divmod__": "typing.Tuple[__T__, __T__]",
"__eq__": "bool",
"__format__": "str",
"__float__": "float",
"__floor__": "__T__",
"__floordiv__": "int",
"__ge__": "bool",
"__get__": "__T__",
"__getattr__": "typing.Any",
"__getattribute__": "typing.Any",
"__getitem__": "typing.Any",
"__getnewargs__": "typing.Tuple[__T__]",
"__getnewargs_ex__": "typing.Tuple[typing.Tuple[typing.Any, ...], typing.Dict[str, typing.Any]]",
"__getslice__": "__T__",
"__globals__": "typing.Dict[str, typing.Any]",
"__gt__": "bool",
"__hash__": "int",
"__iadd__": "None",
"__iand__": "None",
"__imul__": "None",
"__index__": "int",
"__init__": "None",
"__init_subclass__": "None",
"__int__": "int",
"__invert__": "__T__",
"__ior__": "None",
"__isub__": "None",
"__iter__": "__T__",
"__ixor__": "None",
"__le__": "bool",
"__len__": "int",
"__length_hint__": "int",
"__lshift__": "__T__",
"__lt__": "bool",
"__mod__": "__T__",
"__mul__": "__T__",
"__ne__": "bool",
"__neg__": "__T__",
"__next__": "typing.Any",
"__pos__": "__T__",
"__pow__": "__T__",
"__or__": "__T__",
"__radd__": "__T__",
"__rand__": "__T__",
"__rdivmod__": "typing.Tuple[__T__, __T__]",
"__rfloordiv__": "__T__",
"__rlshift__": "__T__",
"__rmod__": "__T__",
"__rmul__": "__T__",
"__ror__": "__T__",
"__round__": "__T__",
"__rpow__": "__T__",
"__rrshift__": "__T__",
"__rshift__": "__T__",
"__rsub__": "__T__",
"__rtruediv__": "__T__",
"__rxor__": "__T__",
"__reduce__": "typing.Union[str, typing.Tuple[typing.Any, ...]]",
"__reduce_ex__": "typing.Union[str, typing.Tuple[typing.Any, ...]]",
"__repr__": "str",
"__set__": "None",
"__setattr__": "None",
"__setitem__": "None",
"__setstate__": "None",
"__sizeof__": "int",
"__str__": "str",
"__sub__": "__T__",
"__truediv__": "float",
"__trunc__": "__T__",
"__xor__": "__T__",
"__subclasscheck__": "bool",
"__subclasshook__": "bool",
}
KNOWN_ARGSPECS = {
"__contains__": "(self, value: typing.Any)",
"__del__": "(self)",
"__dir__": "(self)",
"__floor__": "(self)",
"__format__": "(self, format_spec: str)",
"__getitem__": "(self, index: int)",
"__getnewargs__": "(self)",
"__getnewargs_ex__": "(self)",
"__init_subclass__": "(cls)",
"__instancecheck__": "(self, instance: typing.Any)",
"__length_hint__": "(self)",
"__prepare__": "(cls, name: str, bases: typing.Tuple[type, ...], **kwds: typing.Any)", # TODO: ???
"__round__": "(self, ndigits: int = ...)",
"__reduce__": "(self)",
"__reduce_ex__": "(self, protocol: int)",
"__reversed__": "(self)",
"__setitem__": "(self, index: typing.Any, value: typing.Any)",
"__setstate__": "(self, state: typing.Any)",
"__sizeof__": "(self)",
"__subclasses__": "(cls)",
"__subclasscheck__": "(cls, subclass: typing.Any)",
"__subclasshook__": "(cls, subclass: typing.Any)",
"__trunc__": "(self)",
}
def __init__(
self,
name,
callable,
scope=None,
defaults=None,
scope_alias=None,
decorators=None,
fallback_doc=None,
):
self.callable = callable
self.name = name
self.scope = scope
self.decorators = decorators or ()
self._signature = None
self._defaults = defaults or ()
if scope and "@staticmethod" not in self.decorators:
def_arg = (
"cls"
if ("@classmethod" in self.decorators or name in IMPLICIT_CLASSMETHOD)
else "self"
)
if len(self._defaults) == 0 or self._defaults[0] != def_arg:
self._defaults = (def_arg,) + self._defaults
ds_parser = DocstringSigParser(self.callable, self.name, self._defaults)
self.fullsig = None
self.restype = None
# TODO: Combine this with the check for "See help(type(self))" in MemberInfo.
if self.name in ("__init__", "__new__") and fallback_doc:
self.fullsig = ds_parser.argspec(doc=fallback_doc, override_name=self.name)
elif not hasattr(self.callable, "__call__") and hasattr(
self.callable, "__get__"
):
# We have a property
self.decorators = ("@property",)
self.fullsig = self.name + "(" + ", ".join(self._defaults) + ")"
if scope == "object" and name == "__init__":
self.fullsig = "__init__(self)"
self.restype = "None"
# TODO: Strip defaults, replace the "restype" with an actual type that is added after the "->".
self.fullsig = (
self.fullsig
# Disable fromsignature() because it doesn't work as well as argspec
# or self._init_argspec_fromsignature()
or self._init_argspec_fromargspec()
or self._init_argspec_fromknown(scope_alias)
or ds_parser.argspec(override_name=self.name)
or (self.name + "(" + ", ".join(self._defaults) + ")")
)
# If full sig only has args and kwargs, try the ds_parser instead.
if self.fullsig and self.fullsig.endswith("*args, **kwargs)"):
ds_argspec = ds_parser.argspec(override_name=self.name)
if ds_argspec:
self.fullsig = ds_argspec
self.restype = (
self.restype
or self._init_restype_fromsignature()
or self._init_restype_fromknown(scope_alias)
or ds_parser.restype()
)
if self.restype and scope:
self.restype = self.restype.replace("__T__", scope)
# Special case for 'with' statement and built-ins like open() or memoryview
if name == "__enter__" and self.restype == "pass":
self.restype = scope
def __str__(self):
return self.fullsig
def _init_argspec_fromsignature(self):
if do_not_inspect(self.callable):
return None
try:
sig = inspect.signature(self.callable)
except Exception:
return None
new_args = []
for arg in sig.parameters:
p = sig.parameters[arg]
if p.default != inspect.Signature.empty:
# TODO: Replace deafult with ELLIPSIS_DEFAULT
# TODO: Figure out how to qualify things inside the type annotation.
try:
ast.literal_eval(repr(p.default))
except Exception:
p = p.replace(default=None)
if p.kind == inspect.Parameter.POSITIONAL_ONLY:
p = p.replace(kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
new_args.append(p)
sig = sig.replace(parameters=new_args)
# TODO: This duplicates return types, since str(sig) contains the return annotation.
return self.name + str(sig)
def _init_restype_fromsignature(self):
if do_not_inspect(self.callable):
return None
try:
sig = inspect.signature(self.callable)
except Exception:
return None
# If signature has a return annotation, it's in the
# full signature and we don't need it from here.
if not sig or sig.return_annotation == inspect._empty:
return None
ann = inspect.formatannotation(sig.return_annotation)
if not ann or not can_eval(ann):
return None
return ann
def _init_argspec_fromargspec(self):
if do_not_inspect(self.callable):
return None
try:
args = inspect.getfullargspec(self.callable)
except Exception:
return None
argn = []
seen_names = SeenNames(INVALID_ARGNAMES)
defaults = list(self._defaults)
default_set = set(defaults)
for a in args.args:
if a in default_set:
default_set.discard(a)
argn.append(seen_names.make_unique(a))
if default_set:
argn[:0] = [a for a in defaults if a in default_set]
if getattr(args, "varargs", None):
argn.append("*" + args.varargs)
if getattr(args, "varkw", None):
argn.append("**" + args.varkw)
if argn and argn[-1] in ("*", "**"):
argn[-1] += seen_names.make_unique("_")
return self.name + "(" + ", ".join(argn) + ")"
def _init_argspec_fromknown(self, scope_alias):
spec = None
if scope_alias and not spec:
spec = self.KNOWN_ARGSPECS.get(scope_alias + "." + self.name)
if self.scope and not spec:
spec = self.KNOWN_ARGSPECS.get(self.scope + "." + self.name)
if not spec:
spec = self.KNOWN_ARGSPECS.get(self.name)
if not spec:
return None
return self.name + spec
def _init_restype_fromknown(self, scope_alias):
restype = None
if scope_alias and not restype:
restype = self.KNOWN_RESTYPES.get(scope_alias + "." + self.name)
if self.scope and not restype:
restype = self.KNOWN_RESTYPES.get(self.scope + "." + self.name)
if not restype:
restype = self.KNOWN_RESTYPES.get(self.name)
if not restype:
return None
return restype
class MemberInfo(object):
NO_VALUE = object()
def __init__(
self,
name,
value,
literal=None,
type_literal=None,
scope=None,
module=None,
alias=None,
fallback_doc=None,
scope_alias=None,
):
self.name = name
self.module = module
self.value = value
self.literal = literal
self.type_literal = type_literal
self.members = []
self.values = []
self.need_imports = ()
self.type_name = None
self.scope_name = None
self.bases = ()
self.signature = None
self.documentation = getattr(value, "__doc__", None)
self.alias = alias
self.instance = True
if not isinstance(self.documentation, str):
self.documentation = None
# Special case for __init__ that refers to class docs
if self.name == "__init__" and (
not self.documentation or "See help(type(self))" in self.documentation
):
self.documentation = fallback_doc
if self.name:
self.name = self.name.replace("-", "_")
value_type = type(value)
if issubclass(value_type, type):
self.instance = False
self.need_imports, type_name = self._get_typename(value, module)
if "." in type_name:
m, s, n = type_name.rpartition(".")
self.literal = safe_module_name(m) + s + n
else:
self.scope_name = self.type_name = type_name
self._collect_bases(value, module, self.type_name)
elif is_callable(value):
dec = ()
if scope:
if value_type in STATICMETHOD_TYPES:
dec += ("@staticmethod",)
elif value_type in CLASSMETHOD_TYPES:
dec += ("@classmethod",)
self.signature = Signature(
name,
value,
scope,
scope_alias=scope_alias,
decorators=dec,
fallback_doc=fallback_doc,
)
# Remove the def of the function if it's in the docstring.
function_regex = "\s*" + self.name + "\(.*?\s*.*?\)"
if self.documentation and re.match(function_regex, self.documentation):
# Find the line it's on and remove it and all blank lines before and after it.
match = re.search(function_regex, self.documentation)
start = match.start()
end = match.end()
self.documentation = self.documentation[end:]
# Remove all blank lines after the function definition but before the
# next line of text.
i = 1 # This skips the line the function definition was on.
lines = self.documentation.split("\n")
while i < len(lines) and not lines[i].strip():
i += 1
new_doc = "\n".join(lines[i:]) # This is here just so we can check in the debugger
self.documentation = new_doc
elif value is not None:
if value_type in PROPERTY_TYPES:
self.signature = Signature(name, value, scope, scope_alias=scope_alias)
if value_type not in (): # SKIP_TYPENAME_FOR_TYPES:
self.need_imports, self.type_name = self._get_typename(
value_type, module
)
self._collect_bases(value_type, module, self.type_name)
# if isinstance(value, float) and repr(value) == "nan":
# self.literal = "float('nan')"
# try:
# self.literal = VALUE_REPR_FIX[value]
# except Exception:
# pass
# elif not self.literal:
# self.literal = "None"
def _collect_bases(self, value_type, module, type_name):
try:
bases = getattr(value_type, "__bases__", ())
except Exception:
pass
else:
self.bases = []
self.need_imports = list(self.need_imports)
for ni, t in (self._get_typename(b, module) for b in bases):
if not t:
continue
if t == type_name and module in ni:
continue
self.bases.append(t)
self.need_imports.extend(ni)
@classmethod
def _get_typename(cls, value_type, in_module):
try:
type_name = value_type.__name__.replace("-", "_")
module = getattr(value_type, "__module__", None)
if module and module != "<unknown>":
if module == in_module:
return (module,), type_name
fullname = module + "." + type_name
if in_module and (
fullname in LIES_ABOUT_MODULE
or (in_module + ".*") in LIES_ABOUT_MODULE
):
# Treat the type as if it came from the current module
return (in_module,), type_name
return (module,), fullname
return (), type_name
except Exception:
warnings.warn("could not get type of " + repr(value_type), InspectWarning)
raise
def _str_from_typename(self, type_name):
mod_name, sep, name = type_name.rpartition(".")
if mod_name == "builtins":
type_name = name
else:
type_name = safe_module_name(mod_name) + sep + name
s = self.name + ": " + type_name
# s = s + "()"
if not self.instance:
# TODO: Handle non-instances
pass
return s
def _lines_with_members(self):
if self.bases:
split_bases = [n.rpartition(".") for n in self.bases]
bases = ",".join(
(safe_module_name(n[0]) + n[1] + n[2]) for n in split_bases
)
yield "class " + self.name + "(" + bases + "):"
else:
yield "class " + self.name + ":"
if self.documentation:
yield " " + repr(self.documentation)
if self.members:
for mi in self.members:
if (
hasattr(mi, "documentation")
and mi.documentation != None
and not isinstance(mi.documentation, str)
):
continue
if mi is not MemberInfo.NO_VALUE:
yield mi.as_str(" ")
else:
yield " pass"
yield ""
def _lines_with_signature(self):
seen_decorators = set()
for d in self.signature.decorators:
d = str(d)
if d not in seen_decorators:
seen_decorators.add(d)
yield d
line = "def " + str(self.signature)
restype = self.signature.restype
if restype is None:
restype = "typing.Any"
line += " -> " + restype
yield line + ":"
if self.documentation:
yield " " + repr(self.documentation)
yield " ..."
yield ""
def as_str(self, indent=""):
if self.literal:
literal = indent + self.name + " = " + self.literal
# Put doc string next to reference.
if self.module + "." + self.name in MUST_EMIT_DOCSTRINGS:
literal += "\n" + indent + repr(self.documentation)
return literal
if self.type_literal:
return indent + self.name + ": " + self.type_literal
if self.members:
return "\n".join(indent + s for s in self._lines_with_members())
if self.signature:
return "\n".join(indent + s for s in self._lines_with_signature())
if self.type_name is not None:
return indent + self._str_from_typename(self.type_name)
if self.value is not None:
return indent + self.name + " = " + repr(self.value)
return indent + self.name + ": typing.Any"
MODULE_MEMBER_SUBSTITUTE = {
"__spec__": None,
"__loader__": None,
}
CLASS_MEMBER_SUBSTITUTE = {
"__bases__": MemberInfo("__bases__", None, type_literal="typing.Tuple[type, ...]"),
"__mro__": MemberInfo("__mro__", None, type_literal="typing.Tuple[type, ...]"),
# TODO: Only expose this on object and not every other class?
"__dict__": MemberInfo(
"__dict__", None, type_literal="typing.Dict[str, typing.Any]"
),
"__doc__": None,
"__new__": None,
}
def do_import(module_name, search_path=None):
"""
Imports a module by name and returns the module.
If the import fails, the exception is analyzed for a fix and retried.
"""
if search_path:
sys.path.insert(0, search_path)
try:
return importlib.import_module(module_name)
except Exception:
ex_msg = str(sys.exc_info()[1])
warnings.warn("Working around " + ex_msg, InspectWarning)
if ex_msg == "This must be an MFC application - try 'import win32ui' first":
importlib.import_module("win32ui")
elif (
ex_msg == "Could not find TCL routines"
or module_name == "matplotlib.backends._tkagg"
):
importlib.import_module("tkinter")
else:
raise
finally:
if search_path:
del sys.path[0]
return importlib.import_module(module_name)
def mro_contains(mro, name, value):
for m in mro:
try:
mro_value = getattr(m, name)
except Exception:
pass
else:
if mro_value is value:
return True
return False
class ScrapeState(object):
def __init__(self, module_name, search_path):
self.root_module = None
self.module_name = module_name
self.module = do_import(self.module_name, search_path)
self.members = []
def collect_top_level_members(self):
self._collect_members(self.module, self.members, MODULE_MEMBER_SUBSTITUTE, None)
if self.module_name == "sys":
sysinfo = [m for m in self.members if m.type_name in SYS_INFO_TYPES]
for m in sysinfo:
self.members.append(
MemberInfo(m.name, None, literal="__" + m.name + "()")
)
m.name = m.scope_name = m.type_name = "__" + m.type_name
m_names = set(m.name for m in self.members)
undeclared = []
for m in self.members:
if (
m.value is not None
and m.type_name
and "." not in m.type_name
and m.type_name not in m_names
):
undeclared.append(
MemberInfo(m.type_name, type(m.value), module=self.module_name)
)
self.members[:0] = undeclared
def _should_collect_members(self, member):
if self.module_name in member.need_imports and member.name == member.type_name:
return True
# Support cffi libs
if member.type_name == builtins.__name__ + ".CompiledLib":
return True
return False
def collect_second_level_members(self):
for mi in self.members:
if not self._should_collect_members(mi):
continue
substitutes = dict(CLASS_MEMBER_SUBSTITUTE)
# substitutes["__class__"] = MemberInfo(
# "__class__", None, literal=mi.type_name
# )
self._collect_members(mi.value, mi.members, substitutes, mi)
if mi.scope_name == mi.type_name:
continue
# When the scope and type names are different, we have a static
# class. To emulate this, we add '@staticmethod' decorators to
# all members.
for mi2 in mi.members:
if mi2.signature:
mi2.signature.decorators += ("@staticmethod",)
def _collect_members(self, mod, members, substitutes, outer_member):
"""Fills the members attribute with a dictionary containing
all members from the module."""
if not mod:
raise RuntimeError("failed to import module")
if mod is MemberInfo.NO_VALUE:
return
existing_names = set(m.name for m in members)
if outer_member:
scope = outer_member.scope_name
scope_alias = outer_member.alias
else:
scope, scope_alias = None, None
mod_scope = (self.module_name + "." + scope) if scope else self.module_name
fallback_doc = getattr(mod, "__doc__", None)
mro = (getattr(mod, "__mro__", None) or ())[1:]
for name in dir(mod):
if keyword.iskeyword(name):
continue
try:
m = substitutes[name]
if m:
members.append(m)
continue
except LookupError:
pass
try:
m = substitutes[mod_scope + "." + name]
if m:
members.append(m)
continue
except LookupError:
pass
if name in existing_names:
continue
try:
value = getattr(mod, name)
except AttributeError:
warnings.warn(
"attribute "
+ name
+ " on "
+ repr(mod)
+ " was in dir() but not getattr()",
InspectWarning,
)
except Exception:
warnings.warn(
"error getting " + name + " for " + repr(mod), InspectWarning
)
else:
if not self._should_add_value(value):
continue
if name != "__init__" and mro_contains(mro, name, value):
continue
members.append(
MemberInfo(
name,
value,
scope=scope,
module=self.module_name,
fallback_doc=fallback_doc,
scope_alias=scope_alias,
)
)
if not "__getattr__" in existing_names:
value = (
self.__getattr__dummy if scope else ScrapeState.__getattr__dummy_module
)
members.append(
MemberInfo(
"__getattr__",
value,
scope=None,
module=self.module_name,
)
)
def __getattr__dummy(self, name):
pass
@classmethod
def __getattr__dummy_module(name):
pass
def _should_add_value(self, value):
try:
value_type = type(value)
mod = getattr(value_type, "__module__", None)
name = value_type.__name__
except Exception:
warnings.warn("error getting typename", InspectWarning)
return False
if (mod, name) == (builtins.__name__, "CompiledLib"):
# Always allow CFFI lib
return True
if issubclass(value_type, (type(sys), type(inspect))):
# Disallow nested modules
return False
# By default, include all values
return True
def dump(self, out):
print_module_version(self.module, out)
documentation = getattr(self.module, "__doc__", None)
if isinstance(documentation, str):
print("", file=out)
print(repr(documentation), file=out)
print("", file=out)
print("import typing", file=out)
imports = set()
for value in self.members:
for mod in value.need_imports:
imports.add(mod)
imports.discard(self.module_name)
if imports:
for mod in sorted(imports):
print("import " + mod + " as " + safe_module_name(mod), file=out)
print("", file=out)
for value in self.members:
s = value.as_str("")
try:
print(s, file=out)
except TypeError:
print(repr(s), file=sys.stderr)
raise
def main():
module_name = sys.argv[1] if len(sys.argv) > 1 else "builtins"
search_path = sys.argv[2] if len(sys.argv) > 2 else None
state = ScrapeState(module_name, search_path)
state.collect_top_level_members()
state.members[:] = [m for m in state.members if m.name not in keyword.kwlist]
state.collect_second_level_members()
state.dump(sys.stdout)
if __name__ == "__main__":
main()