# Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # WARNING: DO run scraper from 32-bit command line or else output may be UTF-16. # # Scraping most modules does not require search path. Example: # c:\python3\python -W ignore -B -E scrape_module.py lxml.etree > etree.pyi # # However, some compiled modules do need explicit search path. For example, cv2 is # actually compiled 'cv2.cp36-win_amd64.pyd' under 'site-packages/cv2'. Thus the # compiled module is actually cv2.cv2 (which cv2 imports via *). So for the scraped # stub to work cv2.cv2 has to be scraped but stored in 'native-stubs' as cv2. # # Scraping then requires explicit search path to local cv2.cv2. # c:\python3\python -W ignore -B -E scrape_module.py cv2.cv2 C:\Python3\Lib\site-packages > cv2.pyi # import ast import builtins import importlib import inspect import io import keyword import sys import tokenize import warnings import re if sys.version_info[0] < 3: raise Exception("Python 2 is unsupported") class InspectWarning(UserWarning): pass def get_module_version(module): try: version = getattr(module, "__version__") if isinstance(version, bytes): return version.decode() else: return version except AttributeError: return "unspecified" def print_module_version(module, out): module_name = getattr(module, "__name__") module_version = get_module_version(module) library_name = module_name.split(".")[0] if library_name == module_name: package_name = getattr(module, "__package__", None) if package_name: library_name = package_name.split(".")[0] library = importlib.import_module(library_name) library_version = get_module_version(library) print( "# Python: " + sys.version + "\n# Library: " + library_name + ", version: " + library_version + "\n# Module: " + module_name + ", version: " + module_version, file=out ) def can_eval(s): """Returns True if the string can be evaluated.""" if not s: return False try: ast.parse(s, mode="eval") except SyntaxError: return False else: return True def is_callable(v): """Returns True if v has __call__.""" try: return hasattr(v, "__call__") except Exception: return False def safe_module_name(n): """Returns a module name which should not conflict with any other symbol.""" if n: return "_mod_" + n.replace(".", "_") return n def do_not_inspect(v): """Returns True if this value should not be inspected due to potential bugs.""" # https://github.com/Microsoft/python-language-server/issues/740 # https://github.com/cython/cython/issues/1470 if type(v).__name__ != "fused_cython_function": return False # If a fused function has __defaults__, then attempting to access # __kwdefaults__ will fail if generated before cython 0.29.6. return bool(getattr(v, "__defaults__", False)) class SeenNames(object): """Tracks unique names.""" def __init__(self, s=None): self.seen = set() if s is None else s.copy() def make_unique(self, name): if name not in self.seen: self.seen.add(name) return name n = name + "_" if n not in self.seen: self.seen.add(n) return n i = 0 while True: i += 1 n = name + "_" + str(i) if n not in self.seen: self.seen.add(n) return n raise RuntimeError("Unreachable") class DefaultRepr(object): """ A value whose repr is an exact string representation. For example, ``DefaultRepr("...")`` would be printed in an inspect.Parameter exactly as ``...``, whereas actually using ``...`` as a default value would print as ``Ellipsis``. """ def __init__(self, v): self.v = v def __repr__(self): return self.v ELLIPSIS_DEFAULT = DefaultRepr("...") try: # Fragile; this member isn't officially documented. EXACT_TOKEN_TYPES = tokenize.EXACT_TOKEN_TYPES except AttributeError: # Bare minimum that we need here EXACT_TOKEN_TYPES = { "(": tokenize.LPAR, ")": tokenize.RPAR, "[": tokenize.LSQB, "]": tokenize.RSQB, "{": tokenize.LBRACE, "}": tokenize.RBRACE, ",": tokenize.COMMA, ":": tokenize.COLON, "*": tokenize.STAR, "**": tokenize.DOUBLESTAR, "=": tokenize.EQUAL, } PAREN_TOKEN_MAP = { tokenize.LPAR: tokenize.RPAR, tokenize.LBRACE: tokenize.RBRACE, tokenize.LSQB: tokenize.RSQB, } class DocstringSigParser(object): """Spooky docstring parsing logic.""" def __init__(self, callable, expected_name, defaults=None): self.callable = callable self.name = expected_name self._defaults = defaults def restype(self): doc = getattr(self.callable, "__doc__", None) if not isinstance(doc, str): return None doc = doc.lstrip() first_line = doc.partition("\n")[0].strip() if not "->" in first_line: return None index = first_line.index("->") typeName = first_line[index + 2 :].strip() if typeName.startswith("str"): return "str" if typeName.startswith("float"): return "float" if typeName.startswith("int"): return "int" if typeName.startswith("long"): return "int" if typeName.startswith("list"): return "typing.List[typing.Any]" if typeName.startswith("dict"): return "typing.Dict[typing.Any, typing.Any]" if typeName.startswith("("): return "typing.Tuple[typing.Any, ...]" if typeName.startswith("bool"): return "bool" if "Return a string" in first_line: return "str" return None def argspec(self, doc=None, override_name=None): if not doc: doc = getattr(self.callable, "__doc__", None) if not isinstance(doc, str): return None doc = doc.lstrip() # TODO: Support overloads by reading multiple lines? doc = self._get_first_function_call(doc, override_name) if not doc: return None if override_name: allow_name_mismatch = override_name not in doc else: allow_name_mismatch = False return self._parse_funcdef( doc, allow_name_mismatch, self._defaults, override_name ) def _tokenize(self, expr): if sys.version_info[0] == 3 and sys.version_info[1] <= 2: expr = "# coding: utf-8\n" + expr buf = io.BytesIO(expr.strip().encode("utf-8")) tokens = tokenize.tokenize(buf.readline) return [ (EXACT_TOKEN_TYPES.get(s, tt) if tt == tokenize.OP else tt, s) for tt, s, _, _, _ in tokens ] def _parse_take_expr(self, tokens, *stop_at): nesting = [] expr = [] while tokens: tt, s = tokens[0] if tt == tokenize.LSQB and len(tokens) > 2 and tokens[1][0] in stop_at: return expr if tt in PAREN_TOKEN_MAP: expr.append((tt, s)) nesting.append(PAREN_TOKEN_MAP[tt]) elif nesting and tt == nesting[-1]: expr.append((tt, s)) nesting.pop() elif tt in (tokenize.RPAR, tokenize.RBRACE, tokenize.RSQB): return expr elif not nesting and tt in stop_at: return expr else: expr.append((tt, s)) tokens.pop(0) return expr def _parse_format_arg(self, name, args, defaults): defaults = list(defaults) default_set = set(defaults) seen_names = SeenNames(INVALID_ARGNAMES) parts = [name or "", "("] arg_parts = [] any_default = False for a_names, a_ann, a_def, a_opt in args: if not a_names: continue a_name = "".join(a_names) if a_name in default_set: default_set.discard(a_name) arg_parts.append(seen_names.make_unique(a_name)) if can_eval("".join(a_ann)): # TODO: Fix unqualified typing annotations, rather than omitting them. # arg_parts.append(": ") # arg_parts.extend(a_ann) pass if can_eval("".join(a_def)): arg_parts.append("=") # arg_parts.extend(a_def) arg_parts.extend("...") any_default = True elif a_opt[0] or (any_default and "*" not in a_name and "**" not in a_name): # arg_parts.append("=None") arg_parts.append("=...") any_default = True if a_name.startswith("*"): any_default = True arg_parts.append(", ") if default_set: for a in defaults: if a in default_set: parts.append(a) parts.append(", ") parts.extend(arg_parts) if parts[-1] == ", ": parts.pop() if parts and parts[-1] in ("*", "**"): parts[-1] += seen_names.make_unique("_") parts.append(")") return "".join(parts) def _parse_funcdef(self, expr, allow_name_mismatch, defaults, override_name=None): """Takes a call expression that was part of a docstring and parses the AST as if it were a definition. If the parsed AST matches the callable we are wrapping, returns the node. """ try: tokens = self._tokenize(expr) except (TypeError, tokenize.TokenError): warnings.warn("failed to tokenize " + expr, InspectWarning) return None name = None seen_open_paren = False args = [([], [], [], [False])] optional = False while tokens: tt, s = tokens.pop(0) if tt == tokenize.NAME: if override_name is not None and s == override_name: name = s if name is None: name = s elif seen_open_paren: args[-1][0].append(s) args[-1][3][0] = optional elif tt in (tokenize.STAR, tokenize.DOUBLESTAR): args[-1][0].append(s) elif tt == tokenize.COLON: e = self._parse_take_expr(tokens, tokenize.EQUAL, tokenize.COMMA) args[-1][1].append("".join(i[1] for i in e)) elif tt == tokenize.EQUAL: if not seen_open_paren: name = None continue e = self._parse_take_expr(tokens, tokenize.COMMA) args[-1][2].append("".join(i[1] for i in e)) elif tt == tokenize.COMMA: args.append(([], [], [], [False])) elif tt == tokenize.LSQB: optional = True elif tt == tokenize.RSQB: optional = False elif tt == tokenize.LPAR: seen_open_paren = True elif tt == tokenize.RPAR: break elif s in ("->", "..."): return None # TODO: Handle '/', the positional-only argument separator, when stubs support them. if name and (allow_name_mismatch or name == self.name): return self._parse_format_arg(override_name or name, args, defaults) def _get_first_function_call(self, expr: str, name: str): """Scans the string for the first closing parenthesis, handling nesting, which is the best heuristic we have for an example call at the start of the docstring.""" # Note: line may or may not contain complete (...) and closing ')' may be on another line. # We also prevent going too far into the expression so it does not pick random x() in comments. if "\n\n" not in expr and name not in expr: return None expr = expr.split("\n\n")[0] if not expr or ")" not in expr: return None found = [] n = 0 expr = expr.replace("\r", " ").replace("\n", " ").replace("\t", " ") # See whether string before open paren is valid. openParenIndex = expr.find("(") if openParenIndex < 0: return None header = expr[:openParenIndex].strip() tokens = header.split(" ") tokenLength = len(tokens) if tokenLength == 0: # Nothing before "(" return None if not tokens[tokenLength - 1].isidentifier() and name not in tokens[tokenLength - 1]: # Token before "(" is not valid identifier. return None if tokenLength > 1 and tokens[tokenLength - 2].isidentifier(): # 2 consecutive words separated by a space. probably not a function call. return None expr = expr.replace(" ", "") for i, c in enumerate(expr): if c == ")": n -= 1 if n == 0: return expr[: i + 1] elif c == "(": n += 1 return None SKIP_TYPENAME_FOR_TYPES = bool, str, bytes, int, float STATICMETHOD_TYPES = () CLASSMETHOD_TYPES = (type(float.fromhex),) PROPERTY_TYPES = type(int.real), type(property.fget) INVALID_ARGNAMES = set(keyword.kwlist) # These full names are known to be lies. When we encounter # them while scraping a module, assume that we need to write # out the full type rather than including them by reference. # TODO: Which of these are still needed? LIES_ABOUT_MODULE = frozenset( [ builtins.__name__ + ".weakcallableproxy", builtins.__name__ + ".weakproxy", builtins.__name__ + ".weakref", "ctypes.ArgumentError", "os.stat_result", "os.statvfs_result", "xml.parsers.expat.ExpatError", "numpy.broadcast", "numpy.busdaycalendar", "numpy.dtype", "numpy.flagsobj", "numpy.flatiter", "numpy.ndarray", "numpy.nditer", # These modules contain multiple members that lie about their # module. Always write out all members of these in full. "_asyncio.*", "_bsddb.*", "_decimal.*", "_elementtree.*", "_socket.*", "_sqlite3.*", "_ssl.*", "_testmultiphase.*", ] ) # These symbols have decls but doc strings are not on them. # Make sure we write them down on scraped file. MUST_EMIT_DOCSTRINGS = frozenset( [ "_collections.defaultdict", "_collections.deque", ] ) # These type names cause conflicts with their values, so # we need to forcibly rename them. # TODO: Which of these are still needed? SYS_INFO_TYPES = frozenset( ( "float_info", "hash_info", "int_info", "thread_info", "version_info", ) ) VALUE_REPR_FIX = { float("inf"): "float('inf')", float("-inf"): "float('-inf')", } IMPLICIT_CLASSMETHOD = ("__new__",) # TODO: Canonicalize internal storage as an inspect.Signature. class Signature(object): KNOWN_RESTYPES = { "__abs__": "__T__", "__add__": "__T__", "__and__": "__T__", "__annotations__": "typing.Dict[str, typing.Any]", "__base__": "type", "__bases__": "typing.Tuple[type, ...]", "__bool__": "bool", "__call__": "typing.Any", "__ceil__": "__T__", "__code__": "types.CodeType", "__contains__": "bool", "__del__": "None", "__delattr__": "None", "__delitem__": "None", "__dict__": "typing.Dict[str, typing.Any]", "__dir__": "typing.Iterable[str]", "__divmod__": "typing.Tuple[__T__, __T__]", "__eq__": "bool", "__format__": "str", "__float__": "float", "__floor__": "__T__", "__floordiv__": "int", "__ge__": "bool", "__get__": "__T__", "__getattr__": "typing.Any", "__getattribute__": "typing.Any", "__getitem__": "typing.Any", "__getnewargs__": "typing.Tuple[__T__]", "__getnewargs_ex__": "typing.Tuple[typing.Tuple[typing.Any, ...], typing.Dict[str, typing.Any]]", "__getslice__": "__T__", "__globals__": "typing.Dict[str, typing.Any]", "__gt__": "bool", "__hash__": "int", "__iadd__": "None", "__iand__": "None", "__imul__": "None", "__index__": "int", "__init__": "None", "__init_subclass__": "None", "__int__": "int", "__invert__": "__T__", "__ior__": "None", "__isub__": "None", "__iter__": "__T__", "__ixor__": "None", "__le__": "bool", "__len__": "int", "__length_hint__": "int", "__lshift__": "__T__", "__lt__": "bool", "__mod__": "__T__", "__mul__": "__T__", "__ne__": "bool", "__neg__": "__T__", "__next__": "typing.Any", "__pos__": "__T__", "__pow__": "__T__", "__or__": "__T__", "__radd__": "__T__", "__rand__": "__T__", "__rdivmod__": "typing.Tuple[__T__, __T__]", "__rfloordiv__": "__T__", "__rlshift__": "__T__", "__rmod__": "__T__", "__rmul__": "__T__", "__ror__": "__T__", "__round__": "__T__", "__rpow__": "__T__", "__rrshift__": "__T__", "__rshift__": "__T__", "__rsub__": "__T__", "__rtruediv__": "__T__", "__rxor__": "__T__", "__reduce__": "typing.Union[str, typing.Tuple[typing.Any, ...]]", "__reduce_ex__": "typing.Union[str, typing.Tuple[typing.Any, ...]]", "__repr__": "str", "__set__": "None", "__setattr__": "None", "__setitem__": "None", "__setstate__": "None", "__sizeof__": "int", "__str__": "str", "__sub__": "__T__", "__truediv__": "float", "__trunc__": "__T__", "__xor__": "__T__", "__subclasscheck__": "bool", "__subclasshook__": "bool", } KNOWN_ARGSPECS = { "__contains__": "(self, value: typing.Any)", "__del__": "(self)", "__dir__": "(self)", "__floor__": "(self)", "__format__": "(self, format_spec: str)", "__getitem__": "(self, index: int)", "__getnewargs__": "(self)", "__getnewargs_ex__": "(self)", "__init_subclass__": "(cls)", "__instancecheck__": "(self, instance: typing.Any)", "__length_hint__": "(self)", "__prepare__": "(cls, name: str, bases: typing.Tuple[type, ...], **kwds: typing.Any)", # TODO: ??? "__round__": "(self, ndigits: int = ...)", "__reduce__": "(self)", "__reduce_ex__": "(self, protocol: int)", "__reversed__": "(self)", "__setitem__": "(self, index: typing.Any, value: typing.Any)", "__setstate__": "(self, state: typing.Any)", "__sizeof__": "(self)", "__subclasses__": "(cls)", "__subclasscheck__": "(cls, subclass: typing.Any)", "__subclasshook__": "(cls, subclass: typing.Any)", "__trunc__": "(self)", } def __init__( self, name, callable, scope=None, defaults=None, scope_alias=None, decorators=None, fallback_doc=None, ): self.callable = callable self.name = name self.scope = scope self.decorators = decorators or () self._signature = None self._defaults = defaults or () if scope and "@staticmethod" not in self.decorators: def_arg = ( "cls" if ("@classmethod" in self.decorators or name in IMPLICIT_CLASSMETHOD) else "self" ) if len(self._defaults) == 0 or self._defaults[0] != def_arg: self._defaults = (def_arg,) + self._defaults ds_parser = DocstringSigParser(self.callable, self.name, self._defaults) self.fullsig = None self.restype = None # TODO: Combine this with the check for "See help(type(self))" in MemberInfo. if self.name in ("__init__", "__new__") and fallback_doc: self.fullsig = ds_parser.argspec(doc=fallback_doc, override_name=self.name) elif not hasattr(self.callable, "__call__") and hasattr( self.callable, "__get__" ): # We have a property self.decorators = ("@property",) self.fullsig = self.name + "(" + ", ".join(self._defaults) + ")" if scope == "object" and name == "__init__": self.fullsig = "__init__(self)" self.restype = "None" # TODO: Strip defaults, replace the "restype" with an actual type that is added after the "->". self.fullsig = ( self.fullsig # Disable fromsignature() because it doesn't work as well as argspec # or self._init_argspec_fromsignature() or self._init_argspec_fromargspec() or self._init_argspec_fromknown(scope_alias) or ds_parser.argspec(override_name=self.name) or (self.name + "(" + ", ".join(self._defaults) + ")") ) # If full sig only has args and kwargs, try the ds_parser instead. if self.fullsig and self.fullsig.endswith("*args, **kwargs)"): ds_argspec = ds_parser.argspec(override_name=self.name) if ds_argspec: self.fullsig = ds_argspec self.restype = ( self.restype or self._init_restype_fromsignature() or self._init_restype_fromknown(scope_alias) or ds_parser.restype() ) if self.restype and scope: self.restype = self.restype.replace("__T__", scope) # Special case for 'with' statement and built-ins like open() or memoryview if name == "__enter__" and self.restype == "pass": self.restype = scope def __str__(self): return self.fullsig def _init_argspec_fromsignature(self): if do_not_inspect(self.callable): return None try: sig = inspect.signature(self.callable) except Exception: return None new_args = [] for arg in sig.parameters: p = sig.parameters[arg] if p.default != inspect.Signature.empty: # TODO: Replace deafult with ELLIPSIS_DEFAULT # TODO: Figure out how to qualify things inside the type annotation. try: ast.literal_eval(repr(p.default)) except Exception: p = p.replace(default=None) if p.kind == inspect.Parameter.POSITIONAL_ONLY: p = p.replace(kind=inspect.Parameter.POSITIONAL_OR_KEYWORD) new_args.append(p) sig = sig.replace(parameters=new_args) # TODO: This duplicates return types, since str(sig) contains the return annotation. return self.name + str(sig) def _init_restype_fromsignature(self): if do_not_inspect(self.callable): return None try: sig = inspect.signature(self.callable) except Exception: return None # If signature has a return annotation, it's in the # full signature and we don't need it from here. if not sig or sig.return_annotation == inspect._empty: return None ann = inspect.formatannotation(sig.return_annotation) if not ann or not can_eval(ann): return None return ann def _init_argspec_fromargspec(self): if do_not_inspect(self.callable): return None try: args = inspect.getfullargspec(self.callable) except Exception: return None argn = [] seen_names = SeenNames(INVALID_ARGNAMES) defaults = list(self._defaults) default_set = set(defaults) for a in args.args: if a in default_set: default_set.discard(a) argn.append(seen_names.make_unique(a)) if default_set: argn[:0] = [a for a in defaults if a in default_set] if getattr(args, "varargs", None): argn.append("*" + args.varargs) if getattr(args, "varkw", None): argn.append("**" + args.varkw) if argn and argn[-1] in ("*", "**"): argn[-1] += seen_names.make_unique("_") return self.name + "(" + ", ".join(argn) + ")" def _init_argspec_fromknown(self, scope_alias): spec = None if scope_alias and not spec: spec = self.KNOWN_ARGSPECS.get(scope_alias + "." + self.name) if self.scope and not spec: spec = self.KNOWN_ARGSPECS.get(self.scope + "." + self.name) if not spec: spec = self.KNOWN_ARGSPECS.get(self.name) if not spec: return None return self.name + spec def _init_restype_fromknown(self, scope_alias): restype = None if scope_alias and not restype: restype = self.KNOWN_RESTYPES.get(scope_alias + "." + self.name) if self.scope and not restype: restype = self.KNOWN_RESTYPES.get(self.scope + "." + self.name) if not restype: restype = self.KNOWN_RESTYPES.get(self.name) if not restype: return None return restype class MemberInfo(object): NO_VALUE = object() def __init__( self, name, value, literal=None, type_literal=None, scope=None, module=None, alias=None, fallback_doc=None, scope_alias=None, ): self.name = name self.module = module self.value = value self.literal = literal self.type_literal = type_literal self.members = [] self.values = [] self.need_imports = () self.type_name = None self.scope_name = None self.bases = () self.signature = None self.documentation = getattr(value, "__doc__", None) self.alias = alias self.instance = True if not isinstance(self.documentation, str): self.documentation = None # Special case for __init__ that refers to class docs if self.name == "__init__" and ( not self.documentation or "See help(type(self))" in self.documentation ): self.documentation = fallback_doc if self.name: self.name = self.name.replace("-", "_") value_type = type(value) if issubclass(value_type, type): self.instance = False self.need_imports, type_name = self._get_typename(value, module) if "." in type_name: m, s, n = type_name.rpartition(".") self.literal = safe_module_name(m) + s + n else: self.scope_name = self.type_name = type_name self._collect_bases(value, module, self.type_name) elif is_callable(value): dec = () if scope: if value_type in STATICMETHOD_TYPES: dec += ("@staticmethod",) elif value_type in CLASSMETHOD_TYPES: dec += ("@classmethod",) self.signature = Signature( name, value, scope, scope_alias=scope_alias, decorators=dec, fallback_doc=fallback_doc, ) # Remove the def of the function if it's in the docstring. function_regex = "\s*" + self.name + "\(.*?\s*.*?\)" if self.documentation and re.match(function_regex, self.documentation): # Find the line it's on and remove it and all blank lines before and after it. match = re.search(function_regex, self.documentation) start = match.start() end = match.end() self.documentation = self.documentation[end:] # Remove all blank lines after the function definition but before the # next line of text. i = 1 # This skips the line the function definition was on. lines = self.documentation.split("\n") while i < len(lines) and not lines[i].strip(): i += 1 new_doc = "\n".join(lines[i:]) # This is here just so we can check in the debugger self.documentation = new_doc elif value is not None: if value_type in PROPERTY_TYPES: self.signature = Signature(name, value, scope, scope_alias=scope_alias) if value_type not in (): # SKIP_TYPENAME_FOR_TYPES: self.need_imports, self.type_name = self._get_typename( value_type, module ) self._collect_bases(value_type, module, self.type_name) # if isinstance(value, float) and repr(value) == "nan": # self.literal = "float('nan')" # try: # self.literal = VALUE_REPR_FIX[value] # except Exception: # pass # elif not self.literal: # self.literal = "None" def _collect_bases(self, value_type, module, type_name): try: bases = getattr(value_type, "__bases__", ()) except Exception: pass else: self.bases = [] self.need_imports = list(self.need_imports) for ni, t in (self._get_typename(b, module) for b in bases): if not t: continue if t == type_name and module in ni: continue self.bases.append(t) self.need_imports.extend(ni) @classmethod def _get_typename(cls, value_type, in_module): try: type_name = value_type.__name__.replace("-", "_") module = getattr(value_type, "__module__", None) if module and module != "": if module == in_module: return (module,), type_name fullname = module + "." + type_name if in_module and ( fullname in LIES_ABOUT_MODULE or (in_module + ".*") in LIES_ABOUT_MODULE ): # Treat the type as if it came from the current module return (in_module,), type_name return (module,), fullname return (), type_name except Exception: warnings.warn("could not get type of " + repr(value_type), InspectWarning) raise def _str_from_typename(self, type_name): mod_name, sep, name = type_name.rpartition(".") if mod_name == "builtins": type_name = name else: type_name = safe_module_name(mod_name) + sep + name s = self.name + ": " + type_name # s = s + "()" if not self.instance: # TODO: Handle non-instances pass return s def _lines_with_members(self): if self.bases: split_bases = [n.rpartition(".") for n in self.bases] bases = ",".join( (safe_module_name(n[0]) + n[1] + n[2]) for n in split_bases ) yield "class " + self.name + "(" + bases + "):" else: yield "class " + self.name + ":" if self.documentation: yield " " + repr(self.documentation) if self.members: for mi in self.members: if ( hasattr(mi, "documentation") and mi.documentation != None and not isinstance(mi.documentation, str) ): continue if mi is not MemberInfo.NO_VALUE: yield mi.as_str(" ") else: yield " pass" yield "" def _lines_with_signature(self): seen_decorators = set() for d in self.signature.decorators: d = str(d) if d not in seen_decorators: seen_decorators.add(d) yield d line = "def " + str(self.signature) restype = self.signature.restype if restype is None: restype = "typing.Any" line += " -> " + restype yield line + ":" if self.documentation: yield " " + repr(self.documentation) yield " ..." yield "" def as_str(self, indent=""): if self.literal: literal = indent + self.name + " = " + self.literal # Put doc string next to reference. if self.module + "." + self.name in MUST_EMIT_DOCSTRINGS: literal += "\n" + indent + repr(self.documentation) return literal if self.type_literal: return indent + self.name + ": " + self.type_literal if self.members: return "\n".join(indent + s for s in self._lines_with_members()) if self.signature: return "\n".join(indent + s for s in self._lines_with_signature()) if self.type_name is not None: return indent + self._str_from_typename(self.type_name) if self.value is not None: return indent + self.name + " = " + repr(self.value) return indent + self.name + ": typing.Any" MODULE_MEMBER_SUBSTITUTE = { "__spec__": None, "__loader__": None, } CLASS_MEMBER_SUBSTITUTE = { "__bases__": MemberInfo("__bases__", None, type_literal="typing.Tuple[type, ...]"), "__mro__": MemberInfo("__mro__", None, type_literal="typing.Tuple[type, ...]"), # TODO: Only expose this on object and not every other class? "__dict__": MemberInfo( "__dict__", None, type_literal="typing.Dict[str, typing.Any]" ), "__doc__": None, "__new__": None, } def do_import(module_name, search_path=None): """ Imports a module by name and returns the module. If the import fails, the exception is analyzed for a fix and retried. """ if search_path: sys.path.insert(0, search_path) try: return importlib.import_module(module_name) except Exception: ex_msg = str(sys.exc_info()[1]) warnings.warn("Working around " + ex_msg, InspectWarning) if ex_msg == "This must be an MFC application - try 'import win32ui' first": importlib.import_module("win32ui") elif ( ex_msg == "Could not find TCL routines" or module_name == "matplotlib.backends._tkagg" ): importlib.import_module("tkinter") else: raise finally: if search_path: del sys.path[0] return importlib.import_module(module_name) def mro_contains(mro, name, value): for m in mro: try: mro_value = getattr(m, name) except Exception: pass else: if mro_value is value: return True return False class ScrapeState(object): def __init__(self, module_name, search_path): self.root_module = None self.module_name = module_name self.module = do_import(self.module_name, search_path) self.members = [] def collect_top_level_members(self): self._collect_members(self.module, self.members, MODULE_MEMBER_SUBSTITUTE, None) if self.module_name == "sys": sysinfo = [m for m in self.members if m.type_name in SYS_INFO_TYPES] for m in sysinfo: self.members.append( MemberInfo(m.name, None, literal="__" + m.name + "()") ) m.name = m.scope_name = m.type_name = "__" + m.type_name m_names = set(m.name for m in self.members) undeclared = [] for m in self.members: if ( m.value is not None and m.type_name and "." not in m.type_name and m.type_name not in m_names ): undeclared.append( MemberInfo(m.type_name, type(m.value), module=self.module_name) ) self.members[:0] = undeclared def _should_collect_members(self, member): if self.module_name in member.need_imports and member.name == member.type_name: return True # Support cffi libs if member.type_name == builtins.__name__ + ".CompiledLib": return True return False def collect_second_level_members(self): for mi in self.members: if not self._should_collect_members(mi): continue substitutes = dict(CLASS_MEMBER_SUBSTITUTE) # substitutes["__class__"] = MemberInfo( # "__class__", None, literal=mi.type_name # ) self._collect_members(mi.value, mi.members, substitutes, mi) if mi.scope_name == mi.type_name: continue # When the scope and type names are different, we have a static # class. To emulate this, we add '@staticmethod' decorators to # all members. for mi2 in mi.members: if mi2.signature: mi2.signature.decorators += ("@staticmethod",) def _collect_members(self, mod, members, substitutes, outer_member): """Fills the members attribute with a dictionary containing all members from the module.""" if not mod: raise RuntimeError("failed to import module") if mod is MemberInfo.NO_VALUE: return existing_names = set(m.name for m in members) if outer_member: scope = outer_member.scope_name scope_alias = outer_member.alias else: scope, scope_alias = None, None mod_scope = (self.module_name + "." + scope) if scope else self.module_name fallback_doc = getattr(mod, "__doc__", None) mro = (getattr(mod, "__mro__", None) or ())[1:] for name in dir(mod): if keyword.iskeyword(name): continue try: m = substitutes[name] if m: members.append(m) continue except LookupError: pass try: m = substitutes[mod_scope + "." + name] if m: members.append(m) continue except LookupError: pass if name in existing_names: continue try: value = getattr(mod, name) except AttributeError: warnings.warn( "attribute " + name + " on " + repr(mod) + " was in dir() but not getattr()", InspectWarning, ) except Exception: warnings.warn( "error getting " + name + " for " + repr(mod), InspectWarning ) else: if not self._should_add_value(value): continue if name != "__init__" and mro_contains(mro, name, value): continue members.append( MemberInfo( name, value, scope=scope, module=self.module_name, fallback_doc=fallback_doc, scope_alias=scope_alias, ) ) if not "__getattr__" in existing_names: value = ( self.__getattr__dummy if scope else ScrapeState.__getattr__dummy_module ) members.append( MemberInfo( "__getattr__", value, scope=None, module=self.module_name, ) ) def __getattr__dummy(self, name): pass @classmethod def __getattr__dummy_module(name): pass def _should_add_value(self, value): try: value_type = type(value) mod = getattr(value_type, "__module__", None) name = value_type.__name__ except Exception: warnings.warn("error getting typename", InspectWarning) return False if (mod, name) == (builtins.__name__, "CompiledLib"): # Always allow CFFI lib return True if issubclass(value_type, (type(sys), type(inspect))): # Disallow nested modules return False # By default, include all values return True def dump(self, out): print_module_version(self.module, out) documentation = getattr(self.module, "__doc__", None) if isinstance(documentation, str): print("", file=out) print(repr(documentation), file=out) print("", file=out) print("import typing", file=out) imports = set() for value in self.members: for mod in value.need_imports: imports.add(mod) imports.discard(self.module_name) if imports: for mod in sorted(imports): print("import " + mod + " as " + safe_module_name(mod), file=out) print("", file=out) for value in self.members: s = value.as_str("") try: print(s, file=out) except TypeError: print(repr(s), file=sys.stderr) raise def main(): module_name = sys.argv[1] if len(sys.argv) > 1 else "builtins" search_path = sys.argv[2] if len(sys.argv) > 2 else None state = ScrapeState(module_name, search_path) state.collect_top_level_members() state.members[:] = [m for m in state.members if m.name not in keyword.kwlist] state.collect_second_level_members() state.dump(sys.stdout) if __name__ == "__main__": main()