1328 lines
42 KiB
Python
1328 lines
42 KiB
Python
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
|
|
# WARNING: DO run scraper from 32-bit command line or else output may be UTF-16.
|
|
#
|
|
# Scraping most modules does not require search path. Example:
|
|
# c:\python3\python -W ignore -B -E scrape_module.py lxml.etree > etree.pyi
|
|
#
|
|
# However, some compiled modules do need explicit search path. For example, cv2 is
|
|
# actually compiled 'cv2.cp36-win_amd64.pyd' under 'site-packages/cv2'. Thus the
|
|
# compiled module is actually cv2.cv2 (which cv2 imports via *). So for the scraped
|
|
# stub to work cv2.cv2 has to be scraped but stored in 'native-stubs' as cv2.
|
|
#
|
|
# Scraping then requires explicit search path to local cv2.cv2.
|
|
# c:\python3\python -W ignore -B -E scrape_module.py cv2.cv2 C:\Python3\Lib\site-packages > cv2.pyi
|
|
#
|
|
|
|
import ast
|
|
import builtins
|
|
import importlib
|
|
import inspect
|
|
import io
|
|
import keyword
|
|
import sys
|
|
import tokenize
|
|
import warnings
|
|
import re
|
|
|
|
if sys.version_info[0] < 3:
|
|
raise Exception("Python 2 is unsupported")
|
|
|
|
|
|
class InspectWarning(UserWarning):
|
|
pass
|
|
|
|
|
|
def get_module_version(module):
|
|
try:
|
|
version = getattr(module, "__version__")
|
|
if isinstance(version, bytes):
|
|
return version.decode()
|
|
else:
|
|
return version
|
|
except AttributeError:
|
|
return "unspecified"
|
|
|
|
|
|
def print_module_version(module, out):
|
|
module_name = getattr(module, "__name__")
|
|
module_version = get_module_version(module)
|
|
|
|
library_name = module_name.split(".")[0]
|
|
|
|
if library_name == module_name:
|
|
package_name = getattr(module, "__package__", None)
|
|
if package_name:
|
|
library_name = package_name.split(".")[0]
|
|
|
|
library = importlib.import_module(library_name)
|
|
library_version = get_module_version(library)
|
|
|
|
print(
|
|
"# Python: "
|
|
+ sys.version
|
|
+ "\n# Library: "
|
|
+ library_name
|
|
+ ", version: "
|
|
+ library_version
|
|
+ "\n# Module: "
|
|
+ module_name
|
|
+ ", version: "
|
|
+ module_version,
|
|
file=out
|
|
)
|
|
|
|
|
|
def can_eval(s):
|
|
"""Returns True if the string can be evaluated."""
|
|
if not s:
|
|
return False
|
|
try:
|
|
ast.parse(s, mode="eval")
|
|
except SyntaxError:
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def is_callable(v):
|
|
"""Returns True if v has __call__."""
|
|
try:
|
|
return hasattr(v, "__call__")
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def safe_module_name(n):
|
|
"""Returns a module name which should not conflict with any other symbol."""
|
|
if n:
|
|
return "_mod_" + n.replace(".", "_")
|
|
return n
|
|
|
|
|
|
def do_not_inspect(v):
|
|
"""Returns True if this value should not be inspected due to potential bugs."""
|
|
# https://github.com/Microsoft/python-language-server/issues/740
|
|
# https://github.com/cython/cython/issues/1470
|
|
if type(v).__name__ != "fused_cython_function":
|
|
return False
|
|
|
|
# If a fused function has __defaults__, then attempting to access
|
|
# __kwdefaults__ will fail if generated before cython 0.29.6.
|
|
return bool(getattr(v, "__defaults__", False))
|
|
|
|
|
|
class SeenNames(object):
|
|
"""Tracks unique names."""
|
|
|
|
def __init__(self, s=None):
|
|
self.seen = set() if s is None else s.copy()
|
|
|
|
def make_unique(self, name):
|
|
if name not in self.seen:
|
|
self.seen.add(name)
|
|
return name
|
|
|
|
n = name + "_"
|
|
if n not in self.seen:
|
|
self.seen.add(n)
|
|
return n
|
|
|
|
i = 0
|
|
while True:
|
|
i += 1
|
|
n = name + "_" + str(i)
|
|
if n not in self.seen:
|
|
self.seen.add(n)
|
|
return n
|
|
|
|
raise RuntimeError("Unreachable")
|
|
|
|
|
|
class DefaultRepr(object):
|
|
"""
|
|
A value whose repr is an exact string representation.
|
|
|
|
For example, ``DefaultRepr("...")`` would be printed in
|
|
an inspect.Parameter exactly as ``...``, whereas actually
|
|
using ``...`` as a default value would print as ``Ellipsis``.
|
|
"""
|
|
|
|
def __init__(self, v):
|
|
self.v = v
|
|
|
|
def __repr__(self):
|
|
return self.v
|
|
|
|
|
|
ELLIPSIS_DEFAULT = DefaultRepr("...")
|
|
|
|
try:
|
|
# Fragile; this member isn't officially documented.
|
|
EXACT_TOKEN_TYPES = tokenize.EXACT_TOKEN_TYPES
|
|
except AttributeError:
|
|
# Bare minimum that we need here
|
|
EXACT_TOKEN_TYPES = {
|
|
"(": tokenize.LPAR,
|
|
")": tokenize.RPAR,
|
|
"[": tokenize.LSQB,
|
|
"]": tokenize.RSQB,
|
|
"{": tokenize.LBRACE,
|
|
"}": tokenize.RBRACE,
|
|
",": tokenize.COMMA,
|
|
":": tokenize.COLON,
|
|
"*": tokenize.STAR,
|
|
"**": tokenize.DOUBLESTAR,
|
|
"=": tokenize.EQUAL,
|
|
}
|
|
|
|
PAREN_TOKEN_MAP = {
|
|
tokenize.LPAR: tokenize.RPAR,
|
|
tokenize.LBRACE: tokenize.RBRACE,
|
|
tokenize.LSQB: tokenize.RSQB,
|
|
}
|
|
|
|
|
|
class DocstringSigParser(object):
|
|
"""Spooky docstring parsing logic."""
|
|
|
|
def __init__(self, callable, expected_name, defaults=None):
|
|
self.callable = callable
|
|
self.name = expected_name
|
|
self._defaults = defaults
|
|
|
|
def restype(self):
|
|
doc = getattr(self.callable, "__doc__", None)
|
|
if not isinstance(doc, str):
|
|
return None
|
|
|
|
doc = doc.lstrip()
|
|
|
|
first_line = doc.partition("\n")[0].strip()
|
|
if not "->" in first_line:
|
|
return None
|
|
|
|
index = first_line.index("->")
|
|
typeName = first_line[index + 2 :].strip()
|
|
|
|
if typeName.startswith("str"):
|
|
return "str"
|
|
if typeName.startswith("float"):
|
|
return "float"
|
|
if typeName.startswith("int"):
|
|
return "int"
|
|
if typeName.startswith("long"):
|
|
return "int"
|
|
if typeName.startswith("list"):
|
|
return "typing.List[typing.Any]"
|
|
if typeName.startswith("dict"):
|
|
return "typing.Dict[typing.Any, typing.Any]"
|
|
if typeName.startswith("("):
|
|
return "typing.Tuple[typing.Any, ...]"
|
|
if typeName.startswith("bool"):
|
|
return "bool"
|
|
if "Return a string" in first_line:
|
|
return "str"
|
|
return None
|
|
|
|
def argspec(self, doc=None, override_name=None):
|
|
if not doc:
|
|
doc = getattr(self.callable, "__doc__", None)
|
|
if not isinstance(doc, str):
|
|
return None
|
|
|
|
doc = doc.lstrip()
|
|
|
|
# TODO: Support overloads by reading multiple lines?
|
|
doc = self._get_first_function_call(doc, override_name)
|
|
if not doc:
|
|
return None
|
|
|
|
if override_name:
|
|
allow_name_mismatch = override_name not in doc
|
|
else:
|
|
allow_name_mismatch = False
|
|
|
|
return self._parse_funcdef(
|
|
doc, allow_name_mismatch, self._defaults, override_name
|
|
)
|
|
|
|
def _tokenize(self, expr):
|
|
if sys.version_info[0] == 3 and sys.version_info[1] <= 2:
|
|
expr = "# coding: utf-8\n" + expr
|
|
buf = io.BytesIO(expr.strip().encode("utf-8"))
|
|
tokens = tokenize.tokenize(buf.readline)
|
|
return [
|
|
(EXACT_TOKEN_TYPES.get(s, tt) if tt == tokenize.OP else tt, s)
|
|
for tt, s, _, _, _ in tokens
|
|
]
|
|
|
|
def _parse_take_expr(self, tokens, *stop_at):
|
|
nesting = []
|
|
expr = []
|
|
while tokens:
|
|
tt, s = tokens[0]
|
|
if tt == tokenize.LSQB and len(tokens) > 2 and tokens[1][0] in stop_at:
|
|
return expr
|
|
if tt in PAREN_TOKEN_MAP:
|
|
expr.append((tt, s))
|
|
nesting.append(PAREN_TOKEN_MAP[tt])
|
|
elif nesting and tt == nesting[-1]:
|
|
expr.append((tt, s))
|
|
nesting.pop()
|
|
elif tt in (tokenize.RPAR, tokenize.RBRACE, tokenize.RSQB):
|
|
return expr
|
|
elif not nesting and tt in stop_at:
|
|
return expr
|
|
else:
|
|
expr.append((tt, s))
|
|
tokens.pop(0)
|
|
return expr
|
|
|
|
def _parse_format_arg(self, name, args, defaults):
|
|
defaults = list(defaults)
|
|
default_set = set(defaults)
|
|
seen_names = SeenNames(INVALID_ARGNAMES)
|
|
parts = [name or "<function>", "("]
|
|
arg_parts = []
|
|
any_default = False
|
|
|
|
for a_names, a_ann, a_def, a_opt in args:
|
|
if not a_names:
|
|
continue
|
|
a_name = "".join(a_names)
|
|
if a_name in default_set:
|
|
default_set.discard(a_name)
|
|
|
|
arg_parts.append(seen_names.make_unique(a_name))
|
|
if can_eval("".join(a_ann)):
|
|
# TODO: Fix unqualified typing annotations, rather than omitting them.
|
|
# arg_parts.append(": ")
|
|
# arg_parts.extend(a_ann)
|
|
pass
|
|
if can_eval("".join(a_def)):
|
|
arg_parts.append("=")
|
|
# arg_parts.extend(a_def)
|
|
arg_parts.extend("...")
|
|
any_default = True
|
|
elif a_opt[0] or (any_default and "*" not in a_name and "**" not in a_name):
|
|
# arg_parts.append("=None")
|
|
arg_parts.append("=...")
|
|
any_default = True
|
|
if a_name.startswith("*"):
|
|
any_default = True
|
|
arg_parts.append(", ")
|
|
|
|
if default_set:
|
|
for a in defaults:
|
|
if a in default_set:
|
|
parts.append(a)
|
|
parts.append(", ")
|
|
parts.extend(arg_parts)
|
|
if parts[-1] == ", ":
|
|
parts.pop()
|
|
if parts and parts[-1] in ("*", "**"):
|
|
parts[-1] += seen_names.make_unique("_")
|
|
|
|
parts.append(")")
|
|
|
|
return "".join(parts)
|
|
|
|
def _parse_funcdef(self, expr, allow_name_mismatch, defaults, override_name=None):
|
|
"""Takes a call expression that was part of a docstring
|
|
and parses the AST as if it were a definition. If the parsed
|
|
AST matches the callable we are wrapping, returns the node.
|
|
"""
|
|
try:
|
|
tokens = self._tokenize(expr)
|
|
except (TypeError, tokenize.TokenError):
|
|
warnings.warn("failed to tokenize " + expr, InspectWarning)
|
|
return None
|
|
|
|
name = None
|
|
seen_open_paren = False
|
|
args = [([], [], [], [False])]
|
|
optional = False
|
|
|
|
while tokens:
|
|
tt, s = tokens.pop(0)
|
|
if tt == tokenize.NAME:
|
|
if override_name is not None and s == override_name:
|
|
name = s
|
|
|
|
if name is None:
|
|
name = s
|
|
elif seen_open_paren:
|
|
args[-1][0].append(s)
|
|
args[-1][3][0] = optional
|
|
elif tt in (tokenize.STAR, tokenize.DOUBLESTAR):
|
|
args[-1][0].append(s)
|
|
elif tt == tokenize.COLON:
|
|
e = self._parse_take_expr(tokens, tokenize.EQUAL, tokenize.COMMA)
|
|
args[-1][1].append("".join(i[1] for i in e))
|
|
elif tt == tokenize.EQUAL:
|
|
if not seen_open_paren:
|
|
name = None
|
|
continue
|
|
e = self._parse_take_expr(tokens, tokenize.COMMA)
|
|
args[-1][2].append("".join(i[1] for i in e))
|
|
elif tt == tokenize.COMMA:
|
|
args.append(([], [], [], [False]))
|
|
elif tt == tokenize.LSQB:
|
|
optional = True
|
|
elif tt == tokenize.RSQB:
|
|
optional = False
|
|
elif tt == tokenize.LPAR:
|
|
seen_open_paren = True
|
|
elif tt == tokenize.RPAR:
|
|
break
|
|
elif s in ("->", "..."):
|
|
return None
|
|
|
|
# TODO: Handle '/', the positional-only argument separator, when stubs support them.
|
|
|
|
if name and (allow_name_mismatch or name == self.name):
|
|
return self._parse_format_arg(override_name or name, args, defaults)
|
|
|
|
def _get_first_function_call(self, expr: str, name: str):
|
|
"""Scans the string for the first closing parenthesis,
|
|
handling nesting, which is the best heuristic we have for
|
|
an example call at the start of the docstring."""
|
|
# Note: line may or may not contain complete (...) and closing ')' may be on another line.
|
|
# We also prevent going too far into the expression so it does not pick random x() in comments.
|
|
if "\n\n" not in expr and name not in expr:
|
|
return None
|
|
|
|
expr = expr.split("\n\n")[0]
|
|
if not expr or ")" not in expr:
|
|
return None
|
|
|
|
found = []
|
|
n = 0
|
|
expr = expr.replace("\r", " ").replace("\n", " ").replace("\t", " ")
|
|
|
|
# See whether string before open paren is valid.
|
|
openParenIndex = expr.find("(")
|
|
if openParenIndex < 0:
|
|
return None
|
|
|
|
header = expr[:openParenIndex].strip()
|
|
tokens = header.split(" ")
|
|
tokenLength = len(tokens)
|
|
if tokenLength == 0:
|
|
# Nothing before "("
|
|
return None
|
|
|
|
if not tokens[tokenLength - 1].isidentifier() and name not in tokens[tokenLength - 1]:
|
|
# Token before "(" is not valid identifier.
|
|
return None
|
|
|
|
if tokenLength > 1 and tokens[tokenLength - 2].isidentifier():
|
|
# 2 consecutive words separated by a space. probably not a function call.
|
|
return None
|
|
|
|
expr = expr.replace(" ", "")
|
|
|
|
for i, c in enumerate(expr):
|
|
if c == ")":
|
|
n -= 1
|
|
if n == 0:
|
|
return expr[: i + 1]
|
|
elif c == "(":
|
|
n += 1
|
|
|
|
return None
|
|
|
|
|
|
SKIP_TYPENAME_FOR_TYPES = bool, str, bytes, int, float
|
|
STATICMETHOD_TYPES = ()
|
|
CLASSMETHOD_TYPES = (type(float.fromhex),)
|
|
PROPERTY_TYPES = type(int.real), type(property.fget)
|
|
|
|
INVALID_ARGNAMES = set(keyword.kwlist)
|
|
|
|
# These full names are known to be lies. When we encounter
|
|
# them while scraping a module, assume that we need to write
|
|
# out the full type rather than including them by reference.
|
|
# TODO: Which of these are still needed?
|
|
LIES_ABOUT_MODULE = frozenset(
|
|
[
|
|
builtins.__name__ + ".weakcallableproxy",
|
|
builtins.__name__ + ".weakproxy",
|
|
builtins.__name__ + ".weakref",
|
|
"ctypes.ArgumentError",
|
|
"os.stat_result",
|
|
"os.statvfs_result",
|
|
"xml.parsers.expat.ExpatError",
|
|
"numpy.broadcast",
|
|
"numpy.busdaycalendar",
|
|
"numpy.dtype",
|
|
"numpy.flagsobj",
|
|
"numpy.flatiter",
|
|
"numpy.ndarray",
|
|
"numpy.nditer",
|
|
# These modules contain multiple members that lie about their
|
|
# module. Always write out all members of these in full.
|
|
"_asyncio.*",
|
|
"_bsddb.*",
|
|
"_decimal.*",
|
|
"_elementtree.*",
|
|
"_socket.*",
|
|
"_sqlite3.*",
|
|
"_ssl.*",
|
|
"_testmultiphase.*",
|
|
]
|
|
)
|
|
|
|
# These symbols have decls but doc strings are not on them.
|
|
# Make sure we write them down on scraped file.
|
|
MUST_EMIT_DOCSTRINGS = frozenset(
|
|
[
|
|
"_collections.defaultdict",
|
|
"_collections.deque",
|
|
]
|
|
)
|
|
|
|
# These type names cause conflicts with their values, so
|
|
# we need to forcibly rename them.
|
|
# TODO: Which of these are still needed?
|
|
SYS_INFO_TYPES = frozenset(
|
|
(
|
|
"float_info",
|
|
"hash_info",
|
|
"int_info",
|
|
"thread_info",
|
|
"version_info",
|
|
)
|
|
)
|
|
|
|
VALUE_REPR_FIX = {
|
|
float("inf"): "float('inf')",
|
|
float("-inf"): "float('-inf')",
|
|
}
|
|
|
|
IMPLICIT_CLASSMETHOD = ("__new__",)
|
|
|
|
|
|
# TODO: Canonicalize internal storage as an inspect.Signature.
|
|
class Signature(object):
|
|
KNOWN_RESTYPES = {
|
|
"__abs__": "__T__",
|
|
"__add__": "__T__",
|
|
"__and__": "__T__",
|
|
"__annotations__": "typing.Dict[str, typing.Any]",
|
|
"__base__": "type",
|
|
"__bases__": "typing.Tuple[type, ...]",
|
|
"__bool__": "bool",
|
|
"__call__": "typing.Any",
|
|
"__ceil__": "__T__",
|
|
"__code__": "types.CodeType",
|
|
"__contains__": "bool",
|
|
"__del__": "None",
|
|
"__delattr__": "None",
|
|
"__delitem__": "None",
|
|
"__dict__": "typing.Dict[str, typing.Any]",
|
|
"__dir__": "typing.Iterable[str]",
|
|
"__divmod__": "typing.Tuple[__T__, __T__]",
|
|
"__eq__": "bool",
|
|
"__format__": "str",
|
|
"__float__": "float",
|
|
"__floor__": "__T__",
|
|
"__floordiv__": "int",
|
|
"__ge__": "bool",
|
|
"__get__": "__T__",
|
|
"__getattr__": "typing.Any",
|
|
"__getattribute__": "typing.Any",
|
|
"__getitem__": "typing.Any",
|
|
"__getnewargs__": "typing.Tuple[__T__]",
|
|
"__getnewargs_ex__": "typing.Tuple[typing.Tuple[typing.Any, ...], typing.Dict[str, typing.Any]]",
|
|
"__getslice__": "__T__",
|
|
"__globals__": "typing.Dict[str, typing.Any]",
|
|
"__gt__": "bool",
|
|
"__hash__": "int",
|
|
"__iadd__": "None",
|
|
"__iand__": "None",
|
|
"__imul__": "None",
|
|
"__index__": "int",
|
|
"__init__": "None",
|
|
"__init_subclass__": "None",
|
|
"__int__": "int",
|
|
"__invert__": "__T__",
|
|
"__ior__": "None",
|
|
"__isub__": "None",
|
|
"__iter__": "__T__",
|
|
"__ixor__": "None",
|
|
"__le__": "bool",
|
|
"__len__": "int",
|
|
"__length_hint__": "int",
|
|
"__lshift__": "__T__",
|
|
"__lt__": "bool",
|
|
"__mod__": "__T__",
|
|
"__mul__": "__T__",
|
|
"__ne__": "bool",
|
|
"__neg__": "__T__",
|
|
"__next__": "typing.Any",
|
|
"__pos__": "__T__",
|
|
"__pow__": "__T__",
|
|
"__or__": "__T__",
|
|
"__radd__": "__T__",
|
|
"__rand__": "__T__",
|
|
"__rdivmod__": "typing.Tuple[__T__, __T__]",
|
|
"__rfloordiv__": "__T__",
|
|
"__rlshift__": "__T__",
|
|
"__rmod__": "__T__",
|
|
"__rmul__": "__T__",
|
|
"__ror__": "__T__",
|
|
"__round__": "__T__",
|
|
"__rpow__": "__T__",
|
|
"__rrshift__": "__T__",
|
|
"__rshift__": "__T__",
|
|
"__rsub__": "__T__",
|
|
"__rtruediv__": "__T__",
|
|
"__rxor__": "__T__",
|
|
"__reduce__": "typing.Union[str, typing.Tuple[typing.Any, ...]]",
|
|
"__reduce_ex__": "typing.Union[str, typing.Tuple[typing.Any, ...]]",
|
|
"__repr__": "str",
|
|
"__set__": "None",
|
|
"__setattr__": "None",
|
|
"__setitem__": "None",
|
|
"__setstate__": "None",
|
|
"__sizeof__": "int",
|
|
"__str__": "str",
|
|
"__sub__": "__T__",
|
|
"__truediv__": "float",
|
|
"__trunc__": "__T__",
|
|
"__xor__": "__T__",
|
|
"__subclasscheck__": "bool",
|
|
"__subclasshook__": "bool",
|
|
}
|
|
|
|
KNOWN_ARGSPECS = {
|
|
"__contains__": "(self, value: typing.Any)",
|
|
"__del__": "(self)",
|
|
"__dir__": "(self)",
|
|
"__floor__": "(self)",
|
|
"__format__": "(self, format_spec: str)",
|
|
"__getitem__": "(self, index: int)",
|
|
"__getnewargs__": "(self)",
|
|
"__getnewargs_ex__": "(self)",
|
|
"__init_subclass__": "(cls)",
|
|
"__instancecheck__": "(self, instance: typing.Any)",
|
|
"__length_hint__": "(self)",
|
|
"__prepare__": "(cls, name: str, bases: typing.Tuple[type, ...], **kwds: typing.Any)", # TODO: ???
|
|
"__round__": "(self, ndigits: int = ...)",
|
|
"__reduce__": "(self)",
|
|
"__reduce_ex__": "(self, protocol: int)",
|
|
"__reversed__": "(self)",
|
|
"__setitem__": "(self, index: typing.Any, value: typing.Any)",
|
|
"__setstate__": "(self, state: typing.Any)",
|
|
"__sizeof__": "(self)",
|
|
"__subclasses__": "(cls)",
|
|
"__subclasscheck__": "(cls, subclass: typing.Any)",
|
|
"__subclasshook__": "(cls, subclass: typing.Any)",
|
|
"__trunc__": "(self)",
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
name,
|
|
callable,
|
|
scope=None,
|
|
defaults=None,
|
|
scope_alias=None,
|
|
decorators=None,
|
|
fallback_doc=None,
|
|
):
|
|
self.callable = callable
|
|
self.name = name
|
|
self.scope = scope
|
|
self.decorators = decorators or ()
|
|
self._signature = None
|
|
self._defaults = defaults or ()
|
|
|
|
if scope and "@staticmethod" not in self.decorators:
|
|
def_arg = (
|
|
"cls"
|
|
if ("@classmethod" in self.decorators or name in IMPLICIT_CLASSMETHOD)
|
|
else "self"
|
|
)
|
|
if len(self._defaults) == 0 or self._defaults[0] != def_arg:
|
|
self._defaults = (def_arg,) + self._defaults
|
|
|
|
ds_parser = DocstringSigParser(self.callable, self.name, self._defaults)
|
|
|
|
self.fullsig = None
|
|
self.restype = None
|
|
|
|
# TODO: Combine this with the check for "See help(type(self))" in MemberInfo.
|
|
if self.name in ("__init__", "__new__") and fallback_doc:
|
|
self.fullsig = ds_parser.argspec(doc=fallback_doc, override_name=self.name)
|
|
elif not hasattr(self.callable, "__call__") and hasattr(
|
|
self.callable, "__get__"
|
|
):
|
|
# We have a property
|
|
self.decorators = ("@property",)
|
|
self.fullsig = self.name + "(" + ", ".join(self._defaults) + ")"
|
|
|
|
if scope == "object" and name == "__init__":
|
|
self.fullsig = "__init__(self)"
|
|
self.restype = "None"
|
|
|
|
# TODO: Strip defaults, replace the "restype" with an actual type that is added after the "->".
|
|
|
|
self.fullsig = (
|
|
self.fullsig
|
|
# Disable fromsignature() because it doesn't work as well as argspec
|
|
# or self._init_argspec_fromsignature()
|
|
or self._init_argspec_fromargspec()
|
|
or self._init_argspec_fromknown(scope_alias)
|
|
or ds_parser.argspec(override_name=self.name)
|
|
or (self.name + "(" + ", ".join(self._defaults) + ")")
|
|
)
|
|
|
|
# If full sig only has args and kwargs, try the ds_parser instead.
|
|
if self.fullsig and self.fullsig.endswith("*args, **kwargs)"):
|
|
ds_argspec = ds_parser.argspec(override_name=self.name)
|
|
if ds_argspec:
|
|
self.fullsig = ds_argspec
|
|
|
|
self.restype = (
|
|
self.restype
|
|
or self._init_restype_fromsignature()
|
|
or self._init_restype_fromknown(scope_alias)
|
|
or ds_parser.restype()
|
|
)
|
|
|
|
if self.restype and scope:
|
|
self.restype = self.restype.replace("__T__", scope)
|
|
|
|
# Special case for 'with' statement and built-ins like open() or memoryview
|
|
if name == "__enter__" and self.restype == "pass":
|
|
self.restype = scope
|
|
|
|
def __str__(self):
|
|
return self.fullsig
|
|
|
|
def _init_argspec_fromsignature(self):
|
|
if do_not_inspect(self.callable):
|
|
return None
|
|
|
|
try:
|
|
sig = inspect.signature(self.callable)
|
|
except Exception:
|
|
return None
|
|
|
|
new_args = []
|
|
for arg in sig.parameters:
|
|
p = sig.parameters[arg]
|
|
if p.default != inspect.Signature.empty:
|
|
# TODO: Replace deafult with ELLIPSIS_DEFAULT
|
|
# TODO: Figure out how to qualify things inside the type annotation.
|
|
try:
|
|
ast.literal_eval(repr(p.default))
|
|
except Exception:
|
|
p = p.replace(default=None)
|
|
if p.kind == inspect.Parameter.POSITIONAL_ONLY:
|
|
p = p.replace(kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
new_args.append(p)
|
|
sig = sig.replace(parameters=new_args)
|
|
|
|
# TODO: This duplicates return types, since str(sig) contains the return annotation.
|
|
return self.name + str(sig)
|
|
|
|
def _init_restype_fromsignature(self):
|
|
if do_not_inspect(self.callable):
|
|
return None
|
|
|
|
try:
|
|
sig = inspect.signature(self.callable)
|
|
except Exception:
|
|
return None
|
|
|
|
# If signature has a return annotation, it's in the
|
|
# full signature and we don't need it from here.
|
|
if not sig or sig.return_annotation == inspect._empty:
|
|
return None
|
|
ann = inspect.formatannotation(sig.return_annotation)
|
|
if not ann or not can_eval(ann):
|
|
return None
|
|
return ann
|
|
|
|
def _init_argspec_fromargspec(self):
|
|
if do_not_inspect(self.callable):
|
|
return None
|
|
|
|
try:
|
|
args = inspect.getfullargspec(self.callable)
|
|
except Exception:
|
|
return None
|
|
|
|
argn = []
|
|
seen_names = SeenNames(INVALID_ARGNAMES)
|
|
defaults = list(self._defaults)
|
|
default_set = set(defaults)
|
|
|
|
for a in args.args:
|
|
if a in default_set:
|
|
default_set.discard(a)
|
|
argn.append(seen_names.make_unique(a))
|
|
if default_set:
|
|
argn[:0] = [a for a in defaults if a in default_set]
|
|
|
|
if getattr(args, "varargs", None):
|
|
argn.append("*" + args.varargs)
|
|
if getattr(args, "varkw", None):
|
|
argn.append("**" + args.varkw)
|
|
|
|
if argn and argn[-1] in ("*", "**"):
|
|
argn[-1] += seen_names.make_unique("_")
|
|
|
|
return self.name + "(" + ", ".join(argn) + ")"
|
|
|
|
def _init_argspec_fromknown(self, scope_alias):
|
|
spec = None
|
|
if scope_alias and not spec:
|
|
spec = self.KNOWN_ARGSPECS.get(scope_alias + "." + self.name)
|
|
if self.scope and not spec:
|
|
spec = self.KNOWN_ARGSPECS.get(self.scope + "." + self.name)
|
|
if not spec:
|
|
spec = self.KNOWN_ARGSPECS.get(self.name)
|
|
if not spec:
|
|
return None
|
|
|
|
return self.name + spec
|
|
|
|
def _init_restype_fromknown(self, scope_alias):
|
|
restype = None
|
|
if scope_alias and not restype:
|
|
restype = self.KNOWN_RESTYPES.get(scope_alias + "." + self.name)
|
|
if self.scope and not restype:
|
|
restype = self.KNOWN_RESTYPES.get(self.scope + "." + self.name)
|
|
if not restype:
|
|
restype = self.KNOWN_RESTYPES.get(self.name)
|
|
if not restype:
|
|
return None
|
|
|
|
return restype
|
|
|
|
|
|
class MemberInfo(object):
|
|
NO_VALUE = object()
|
|
|
|
def __init__(
|
|
self,
|
|
name,
|
|
value,
|
|
literal=None,
|
|
type_literal=None,
|
|
scope=None,
|
|
module=None,
|
|
alias=None,
|
|
fallback_doc=None,
|
|
scope_alias=None,
|
|
):
|
|
self.name = name
|
|
self.module = module
|
|
self.value = value
|
|
self.literal = literal
|
|
self.type_literal = type_literal
|
|
self.members = []
|
|
self.values = []
|
|
self.need_imports = ()
|
|
self.type_name = None
|
|
self.scope_name = None
|
|
self.bases = ()
|
|
self.signature = None
|
|
self.documentation = getattr(value, "__doc__", None)
|
|
self.alias = alias
|
|
self.instance = True
|
|
|
|
if not isinstance(self.documentation, str):
|
|
self.documentation = None
|
|
|
|
# Special case for __init__ that refers to class docs
|
|
if self.name == "__init__" and (
|
|
not self.documentation or "See help(type(self))" in self.documentation
|
|
):
|
|
self.documentation = fallback_doc
|
|
|
|
if self.name:
|
|
self.name = self.name.replace("-", "_")
|
|
|
|
value_type = type(value)
|
|
if issubclass(value_type, type):
|
|
self.instance = False
|
|
self.need_imports, type_name = self._get_typename(value, module)
|
|
if "." in type_name:
|
|
m, s, n = type_name.rpartition(".")
|
|
self.literal = safe_module_name(m) + s + n
|
|
else:
|
|
self.scope_name = self.type_name = type_name
|
|
self._collect_bases(value, module, self.type_name)
|
|
|
|
elif is_callable(value):
|
|
dec = ()
|
|
if scope:
|
|
if value_type in STATICMETHOD_TYPES:
|
|
dec += ("@staticmethod",)
|
|
elif value_type in CLASSMETHOD_TYPES:
|
|
dec += ("@classmethod",)
|
|
self.signature = Signature(
|
|
name,
|
|
value,
|
|
scope,
|
|
scope_alias=scope_alias,
|
|
decorators=dec,
|
|
fallback_doc=fallback_doc,
|
|
)
|
|
|
|
# Remove the def of the function if it's in the docstring.
|
|
function_regex = "\s*" + self.name + "\(.*?\s*.*?\)"
|
|
if self.documentation and re.match(function_regex, self.documentation):
|
|
# Find the line it's on and remove it and all blank lines before and after it.
|
|
match = re.search(function_regex, self.documentation)
|
|
start = match.start()
|
|
end = match.end()
|
|
self.documentation = self.documentation[end:]
|
|
|
|
# Remove all blank lines after the function definition but before the
|
|
# next line of text.
|
|
i = 1 # This skips the line the function definition was on.
|
|
lines = self.documentation.split("\n")
|
|
while i < len(lines) and not lines[i].strip():
|
|
i += 1
|
|
new_doc = "\n".join(lines[i:]) # This is here just so we can check in the debugger
|
|
self.documentation = new_doc
|
|
|
|
elif value is not None:
|
|
if value_type in PROPERTY_TYPES:
|
|
self.signature = Signature(name, value, scope, scope_alias=scope_alias)
|
|
if value_type not in (): # SKIP_TYPENAME_FOR_TYPES:
|
|
self.need_imports, self.type_name = self._get_typename(
|
|
value_type, module
|
|
)
|
|
self._collect_bases(value_type, module, self.type_name)
|
|
# if isinstance(value, float) and repr(value) == "nan":
|
|
# self.literal = "float('nan')"
|
|
# try:
|
|
# self.literal = VALUE_REPR_FIX[value]
|
|
# except Exception:
|
|
# pass
|
|
|
|
# elif not self.literal:
|
|
# self.literal = "None"
|
|
|
|
def _collect_bases(self, value_type, module, type_name):
|
|
try:
|
|
bases = getattr(value_type, "__bases__", ())
|
|
except Exception:
|
|
pass
|
|
else:
|
|
self.bases = []
|
|
self.need_imports = list(self.need_imports)
|
|
for ni, t in (self._get_typename(b, module) for b in bases):
|
|
if not t:
|
|
continue
|
|
if t == type_name and module in ni:
|
|
continue
|
|
self.bases.append(t)
|
|
self.need_imports.extend(ni)
|
|
|
|
@classmethod
|
|
def _get_typename(cls, value_type, in_module):
|
|
try:
|
|
type_name = value_type.__name__.replace("-", "_")
|
|
module = getattr(value_type, "__module__", None)
|
|
|
|
if module and module != "<unknown>":
|
|
if module == in_module:
|
|
return (module,), type_name
|
|
|
|
fullname = module + "." + type_name
|
|
|
|
if in_module and (
|
|
fullname in LIES_ABOUT_MODULE
|
|
or (in_module + ".*") in LIES_ABOUT_MODULE
|
|
):
|
|
# Treat the type as if it came from the current module
|
|
return (in_module,), type_name
|
|
|
|
return (module,), fullname
|
|
|
|
return (), type_name
|
|
except Exception:
|
|
warnings.warn("could not get type of " + repr(value_type), InspectWarning)
|
|
raise
|
|
|
|
def _str_from_typename(self, type_name):
|
|
mod_name, sep, name = type_name.rpartition(".")
|
|
if mod_name == "builtins":
|
|
type_name = name
|
|
else:
|
|
type_name = safe_module_name(mod_name) + sep + name
|
|
|
|
s = self.name + ": " + type_name
|
|
# s = s + "()"
|
|
if not self.instance:
|
|
# TODO: Handle non-instances
|
|
pass
|
|
return s
|
|
|
|
def _lines_with_members(self):
|
|
if self.bases:
|
|
split_bases = [n.rpartition(".") for n in self.bases]
|
|
bases = ",".join(
|
|
(safe_module_name(n[0]) + n[1] + n[2]) for n in split_bases
|
|
)
|
|
yield "class " + self.name + "(" + bases + "):"
|
|
else:
|
|
yield "class " + self.name + ":"
|
|
if self.documentation:
|
|
yield " " + repr(self.documentation)
|
|
if self.members:
|
|
for mi in self.members:
|
|
if (
|
|
hasattr(mi, "documentation")
|
|
and mi.documentation != None
|
|
and not isinstance(mi.documentation, str)
|
|
):
|
|
continue
|
|
if mi is not MemberInfo.NO_VALUE:
|
|
yield mi.as_str(" ")
|
|
else:
|
|
yield " pass"
|
|
yield ""
|
|
|
|
def _lines_with_signature(self):
|
|
seen_decorators = set()
|
|
for d in self.signature.decorators:
|
|
d = str(d)
|
|
if d not in seen_decorators:
|
|
seen_decorators.add(d)
|
|
yield d
|
|
|
|
line = "def " + str(self.signature)
|
|
|
|
restype = self.signature.restype
|
|
if restype is None:
|
|
restype = "typing.Any"
|
|
|
|
line += " -> " + restype
|
|
yield line + ":"
|
|
|
|
if self.documentation:
|
|
yield " " + repr(self.documentation)
|
|
|
|
yield " ..."
|
|
|
|
yield ""
|
|
|
|
def as_str(self, indent=""):
|
|
if self.literal:
|
|
literal = indent + self.name + " = " + self.literal
|
|
|
|
# Put doc string next to reference.
|
|
if self.module + "." + self.name in MUST_EMIT_DOCSTRINGS:
|
|
literal += "\n" + indent + repr(self.documentation)
|
|
|
|
return literal
|
|
|
|
if self.type_literal:
|
|
return indent + self.name + ": " + self.type_literal
|
|
|
|
if self.members:
|
|
return "\n".join(indent + s for s in self._lines_with_members())
|
|
|
|
if self.signature:
|
|
return "\n".join(indent + s for s in self._lines_with_signature())
|
|
|
|
if self.type_name is not None:
|
|
return indent + self._str_from_typename(self.type_name)
|
|
|
|
if self.value is not None:
|
|
return indent + self.name + " = " + repr(self.value)
|
|
|
|
return indent + self.name + ": typing.Any"
|
|
|
|
|
|
MODULE_MEMBER_SUBSTITUTE = {
|
|
"__spec__": None,
|
|
"__loader__": None,
|
|
}
|
|
|
|
CLASS_MEMBER_SUBSTITUTE = {
|
|
"__bases__": MemberInfo("__bases__", None, type_literal="typing.Tuple[type, ...]"),
|
|
"__mro__": MemberInfo("__mro__", None, type_literal="typing.Tuple[type, ...]"),
|
|
# TODO: Only expose this on object and not every other class?
|
|
"__dict__": MemberInfo(
|
|
"__dict__", None, type_literal="typing.Dict[str, typing.Any]"
|
|
),
|
|
"__doc__": None,
|
|
"__new__": None,
|
|
}
|
|
|
|
|
|
def do_import(module_name, search_path=None):
|
|
"""
|
|
Imports a module by name and returns the module.
|
|
If the import fails, the exception is analyzed for a fix and retried.
|
|
"""
|
|
if search_path:
|
|
sys.path.insert(0, search_path)
|
|
|
|
try:
|
|
return importlib.import_module(module_name)
|
|
except Exception:
|
|
ex_msg = str(sys.exc_info()[1])
|
|
warnings.warn("Working around " + ex_msg, InspectWarning)
|
|
if ex_msg == "This must be an MFC application - try 'import win32ui' first":
|
|
importlib.import_module("win32ui")
|
|
elif (
|
|
ex_msg == "Could not find TCL routines"
|
|
or module_name == "matplotlib.backends._tkagg"
|
|
):
|
|
importlib.import_module("tkinter")
|
|
else:
|
|
raise
|
|
finally:
|
|
if search_path:
|
|
del sys.path[0]
|
|
|
|
return importlib.import_module(module_name)
|
|
|
|
|
|
def mro_contains(mro, name, value):
|
|
for m in mro:
|
|
try:
|
|
mro_value = getattr(m, name)
|
|
except Exception:
|
|
pass
|
|
else:
|
|
if mro_value is value:
|
|
return True
|
|
return False
|
|
|
|
|
|
class ScrapeState(object):
|
|
def __init__(self, module_name, search_path):
|
|
self.root_module = None
|
|
self.module_name = module_name
|
|
self.module = do_import(self.module_name, search_path)
|
|
self.members = []
|
|
|
|
def collect_top_level_members(self):
|
|
self._collect_members(self.module, self.members, MODULE_MEMBER_SUBSTITUTE, None)
|
|
|
|
if self.module_name == "sys":
|
|
sysinfo = [m for m in self.members if m.type_name in SYS_INFO_TYPES]
|
|
for m in sysinfo:
|
|
self.members.append(
|
|
MemberInfo(m.name, None, literal="__" + m.name + "()")
|
|
)
|
|
m.name = m.scope_name = m.type_name = "__" + m.type_name
|
|
|
|
m_names = set(m.name for m in self.members)
|
|
undeclared = []
|
|
for m in self.members:
|
|
if (
|
|
m.value is not None
|
|
and m.type_name
|
|
and "." not in m.type_name
|
|
and m.type_name not in m_names
|
|
):
|
|
undeclared.append(
|
|
MemberInfo(m.type_name, type(m.value), module=self.module_name)
|
|
)
|
|
|
|
self.members[:0] = undeclared
|
|
|
|
def _should_collect_members(self, member):
|
|
if self.module_name in member.need_imports and member.name == member.type_name:
|
|
return True
|
|
|
|
# Support cffi libs
|
|
if member.type_name == builtins.__name__ + ".CompiledLib":
|
|
return True
|
|
|
|
return False
|
|
|
|
def collect_second_level_members(self):
|
|
for mi in self.members:
|
|
if not self._should_collect_members(mi):
|
|
continue
|
|
|
|
substitutes = dict(CLASS_MEMBER_SUBSTITUTE)
|
|
# substitutes["__class__"] = MemberInfo(
|
|
# "__class__", None, literal=mi.type_name
|
|
# )
|
|
self._collect_members(mi.value, mi.members, substitutes, mi)
|
|
|
|
if mi.scope_name == mi.type_name:
|
|
continue
|
|
|
|
# When the scope and type names are different, we have a static
|
|
# class. To emulate this, we add '@staticmethod' decorators to
|
|
# all members.
|
|
for mi2 in mi.members:
|
|
if mi2.signature:
|
|
mi2.signature.decorators += ("@staticmethod",)
|
|
|
|
def _collect_members(self, mod, members, substitutes, outer_member):
|
|
"""Fills the members attribute with a dictionary containing
|
|
all members from the module."""
|
|
if not mod:
|
|
raise RuntimeError("failed to import module")
|
|
if mod is MemberInfo.NO_VALUE:
|
|
return
|
|
|
|
existing_names = set(m.name for m in members)
|
|
|
|
if outer_member:
|
|
scope = outer_member.scope_name
|
|
scope_alias = outer_member.alias
|
|
else:
|
|
scope, scope_alias = None, None
|
|
|
|
mod_scope = (self.module_name + "." + scope) if scope else self.module_name
|
|
fallback_doc = getattr(mod, "__doc__", None)
|
|
mro = (getattr(mod, "__mro__", None) or ())[1:]
|
|
for name in dir(mod):
|
|
if keyword.iskeyword(name):
|
|
continue
|
|
try:
|
|
m = substitutes[name]
|
|
if m:
|
|
members.append(m)
|
|
continue
|
|
except LookupError:
|
|
pass
|
|
try:
|
|
m = substitutes[mod_scope + "." + name]
|
|
if m:
|
|
members.append(m)
|
|
continue
|
|
except LookupError:
|
|
pass
|
|
|
|
if name in existing_names:
|
|
continue
|
|
|
|
try:
|
|
value = getattr(mod, name)
|
|
except AttributeError:
|
|
warnings.warn(
|
|
"attribute "
|
|
+ name
|
|
+ " on "
|
|
+ repr(mod)
|
|
+ " was in dir() but not getattr()",
|
|
InspectWarning,
|
|
)
|
|
except Exception:
|
|
warnings.warn(
|
|
"error getting " + name + " for " + repr(mod), InspectWarning
|
|
)
|
|
else:
|
|
if not self._should_add_value(value):
|
|
continue
|
|
if name != "__init__" and mro_contains(mro, name, value):
|
|
continue
|
|
members.append(
|
|
MemberInfo(
|
|
name,
|
|
value,
|
|
scope=scope,
|
|
module=self.module_name,
|
|
fallback_doc=fallback_doc,
|
|
scope_alias=scope_alias,
|
|
)
|
|
)
|
|
if not "__getattr__" in existing_names:
|
|
value = (
|
|
self.__getattr__dummy if scope else ScrapeState.__getattr__dummy_module
|
|
)
|
|
members.append(
|
|
MemberInfo(
|
|
"__getattr__",
|
|
value,
|
|
scope=None,
|
|
module=self.module_name,
|
|
)
|
|
)
|
|
|
|
def __getattr__dummy(self, name):
|
|
pass
|
|
|
|
@classmethod
|
|
def __getattr__dummy_module(name):
|
|
pass
|
|
|
|
def _should_add_value(self, value):
|
|
try:
|
|
value_type = type(value)
|
|
mod = getattr(value_type, "__module__", None)
|
|
name = value_type.__name__
|
|
except Exception:
|
|
warnings.warn("error getting typename", InspectWarning)
|
|
return False
|
|
|
|
if (mod, name) == (builtins.__name__, "CompiledLib"):
|
|
# Always allow CFFI lib
|
|
return True
|
|
|
|
if issubclass(value_type, (type(sys), type(inspect))):
|
|
# Disallow nested modules
|
|
return False
|
|
|
|
# By default, include all values
|
|
return True
|
|
|
|
def dump(self, out):
|
|
print_module_version(self.module, out)
|
|
|
|
documentation = getattr(self.module, "__doc__", None)
|
|
if isinstance(documentation, str):
|
|
print("", file=out)
|
|
print(repr(documentation), file=out)
|
|
print("", file=out)
|
|
|
|
print("import typing", file=out)
|
|
|
|
imports = set()
|
|
for value in self.members:
|
|
for mod in value.need_imports:
|
|
imports.add(mod)
|
|
imports.discard(self.module_name)
|
|
|
|
if imports:
|
|
for mod in sorted(imports):
|
|
print("import " + mod + " as " + safe_module_name(mod), file=out)
|
|
print("", file=out)
|
|
|
|
for value in self.members:
|
|
s = value.as_str("")
|
|
try:
|
|
print(s, file=out)
|
|
except TypeError:
|
|
print(repr(s), file=sys.stderr)
|
|
raise
|
|
|
|
|
|
def main():
|
|
module_name = sys.argv[1] if len(sys.argv) > 1 else "builtins"
|
|
search_path = sys.argv[2] if len(sys.argv) > 2 else None
|
|
|
|
state = ScrapeState(module_name, search_path)
|
|
state.collect_top_level_members()
|
|
|
|
state.members[:] = [m for m in state.members if m.name not in keyword.kwlist]
|
|
|
|
state.collect_second_level_members()
|
|
|
|
state.dump(sys.stdout)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|