294 lines
12 KiB
Python
294 lines
12 KiB
Python
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
|
|
import ast
|
|
import json
|
|
import re
|
|
import sys
|
|
import textwrap
|
|
from typing import Iterable
|
|
|
|
|
|
def split_lines(source):
|
|
"""
|
|
Split selection lines in a version-agnostic way.
|
|
|
|
Python grammar only treats \r, \n, and \r\n as newlines.
|
|
But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
|
|
As such, this function will split lines across all Python versions.
|
|
"""
|
|
return re.split(r"[\n\r]+", source)
|
|
|
|
|
|
def _get_statements(selection):
|
|
"""Process a multiline selection into a list of its top-level statements.
|
|
|
|
This will remove empty newlines around and within the selection, dedent it,
|
|
and split it using the result of `ast.parse()`.
|
|
"""
|
|
# Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
|
|
lines = (line for line in split_lines(selection) if line.strip() != "")
|
|
|
|
# Dedent the selection and parse it using the ast module.
|
|
# Note that leading comments in the selection will be discarded during parsing.
|
|
source = textwrap.dedent("\n".join(lines))
|
|
tree = ast.parse(source)
|
|
|
|
# We'll need the dedented lines to rebuild the selection.
|
|
lines = split_lines(source)
|
|
|
|
# Get the line ranges for top-level blocks returned from parsing the dedented text
|
|
# and split the selection accordingly.
|
|
# tree.body is a list of AST objects, which we rely on to extract top-level statements.
|
|
# If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
|
|
# to get the boundaries of each block.
|
|
# However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
|
|
# Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
|
|
# `end = next_block.lineno - 1`
|
|
# for all blocks except the last one, which will will just run until the last line.
|
|
ends = []
|
|
for node in tree.body[1:]:
|
|
line_end = node.lineno - 1
|
|
# Special handling of decorators:
|
|
# In Python 3.8 and higher, decorators are not taken into account in the value returned by lineno,
|
|
# and we have to use the length of the decorator_list array to compute the actual start line.
|
|
# Before that, lineno takes into account decorators, so this offset check is unnecessary.
|
|
# Also, not all AST objects can have decorators.
|
|
if hasattr(node, "decorator_list") and sys.version_info >= (3, 8):
|
|
# Using getattr instead of node.decorator_list or pyright will complain about an unknown member.
|
|
line_end -= len(getattr(node, "decorator_list")) # noqa: B009
|
|
ends.append(line_end)
|
|
ends.append(len(lines))
|
|
|
|
for node, end in zip(tree.body, ends):
|
|
# Given this selection:
|
|
# 1: if (m > 0 and
|
|
# 2: n < 3):
|
|
# 3: print('foo')
|
|
# 4: value = 'bar'
|
|
#
|
|
# The first block would have lineno = 1,and the second block lineno = 4
|
|
start = node.lineno - 1
|
|
|
|
# Special handling of decorators similar to what's above.
|
|
if hasattr(node, "decorator_list") and sys.version_info >= (3, 8):
|
|
# Using getattr instead of node.decorator_list or pyright will complain about an unknown member.
|
|
start -= len(getattr(node, "decorator_list")) # noqa: B009
|
|
block = "\n".join(lines[start:end])
|
|
|
|
# If the block is multiline, add an extra newline character at its end.
|
|
# This way, when joining blocks back together, there will be a blank line between each multiline statement
|
|
# and no blank lines between single-line statements, or it would look like this:
|
|
# >>> x = 22
|
|
# >>>
|
|
# >>> total = x + 30
|
|
# >>>
|
|
# Note that for the multiline parentheses case this newline is redundant,
|
|
# since the closing parenthesis terminates the statement already.
|
|
# This means that for this pattern we'll end up with:
|
|
# >>> x = [
|
|
# ... 1
|
|
# ... ]
|
|
# >>>
|
|
# >>> y = [
|
|
# ... 2
|
|
# ...]
|
|
if end - start > 1:
|
|
block += "\n"
|
|
|
|
yield block
|
|
|
|
|
|
def normalize_lines(selection):
|
|
"""
|
|
Normalize the text selection received from the extension.
|
|
|
|
If it is a single line selection, dedent it and append a newline and
|
|
send it back to the extension.
|
|
Otherwise, sanitize the multiline selection before returning it:
|
|
split it in a list of top-level statements
|
|
and add newlines between each of them so the REPL knows where each block ends.
|
|
"""
|
|
try:
|
|
# Parse the selection into a list of top-level blocks.
|
|
# We don't differentiate between single and multiline statements
|
|
# because it's not a perf bottleneck,
|
|
# and the overhead from splitting and rejoining strings in the multiline case is one-off.
|
|
statements = _get_statements(selection)
|
|
|
|
# Insert a newline between each top-level statement, and append a newline to the selection.
|
|
source = "\n".join(statements) + "\n"
|
|
if selection[-2] == "}" or selection[-2] == "]":
|
|
source = source[:-1]
|
|
except Exception:
|
|
# If there's a problem when parsing statements,
|
|
# append a blank line to end the block and send it as-is.
|
|
source = selection + "\n\n"
|
|
|
|
return source
|
|
|
|
|
|
top_level_nodes = []
|
|
min_key = None
|
|
|
|
|
|
def check_exact_exist(top_level_nodes, start_line, end_line):
|
|
return [
|
|
node
|
|
for node in top_level_nodes
|
|
if node.lineno == start_line and node.end_lineno == end_line
|
|
]
|
|
|
|
|
|
def traverse_file(whole_file_content, start_line, end_line, was_highlighted): # noqa: ARG001
|
|
"""Intended to traverse through a user's given file content and find, collect all appropriate lines that should be sent to the REPL in case of smart selection.
|
|
|
|
This could be exact statement such as just a single line print statement,
|
|
or a multiline dictionary, or differently styled multi-line list comprehension, etc.
|
|
Then call the normalize_lines function to normalize our smartly selected code block.
|
|
"""
|
|
parsed_file_content = None
|
|
|
|
try:
|
|
parsed_file_content = ast.parse(whole_file_content)
|
|
except Exception:
|
|
# Handle case where user is attempting to run code where file contains deprecated Python code.
|
|
# Let typescript side know and show warning message.
|
|
return {
|
|
"normalized_smart_result": "deprecated",
|
|
"which_line_next": 0,
|
|
}
|
|
|
|
smart_code = ""
|
|
should_run_top_blocks = []
|
|
|
|
# Purpose of this loop is to fetch and collect all the
|
|
# AST top level nodes, and its node.body as child nodes.
|
|
# Individual nodes will contain information like
|
|
# the start line, end line and get source segment information
|
|
# that will be used to smartly select, and send normalized code.
|
|
for node in ast.iter_child_nodes(parsed_file_content):
|
|
top_level_nodes.append(node)
|
|
|
|
ast_types_with_nodebody = (
|
|
ast.Module,
|
|
ast.Interactive,
|
|
ast.Expression,
|
|
ast.FunctionDef,
|
|
ast.AsyncFunctionDef,
|
|
ast.ClassDef,
|
|
ast.For,
|
|
ast.AsyncFor,
|
|
ast.While,
|
|
ast.If,
|
|
ast.With,
|
|
ast.AsyncWith,
|
|
ast.Try,
|
|
ast.Lambda,
|
|
ast.IfExp,
|
|
ast.ExceptHandler,
|
|
)
|
|
if isinstance(node, ast_types_with_nodebody) and isinstance(node.body, Iterable):
|
|
top_level_nodes.extend(node.body)
|
|
|
|
exact_nodes = check_exact_exist(top_level_nodes, start_line, end_line)
|
|
|
|
# Just return the exact top level line, if present.
|
|
if len(exact_nodes) > 0:
|
|
which_line_next = 0
|
|
for same_line_node in exact_nodes:
|
|
should_run_top_blocks.append(same_line_node)
|
|
smart_code += f"{ast.get_source_segment(whole_file_content, same_line_node)}\n"
|
|
which_line_next = get_next_block_lineno(should_run_top_blocks)
|
|
return {
|
|
"normalized_smart_result": smart_code,
|
|
"which_line_next": which_line_next,
|
|
}
|
|
|
|
# For each of the nodes in the parsed file content,
|
|
# add the appropriate source code line(s) to be sent to the REPL, dependent on
|
|
# user is trying to send and execute single line/statement or multiple with smart selection.
|
|
for top_node in ast.iter_child_nodes(parsed_file_content):
|
|
if start_line == top_node.lineno and end_line == top_node.end_lineno:
|
|
should_run_top_blocks.append(top_node)
|
|
|
|
smart_code += f"{ast.get_source_segment(whole_file_content, top_node)}\n"
|
|
break # If we found exact match, don't waste computation in parsing extra nodes.
|
|
elif start_line >= top_node.lineno and end_line <= top_node.end_lineno:
|
|
# Case to apply smart selection for multiple line.
|
|
# This is the case for when we have to add multiple lines that should be included in the smart send.
|
|
# For example:
|
|
# 'my_dictionary': {
|
|
# 'Audi': 'Germany',
|
|
# 'BMW': 'Germany',
|
|
# 'Genesis': 'Korea',
|
|
# }
|
|
# with the mouse cursor at 'BMW': 'Germany', should send all of the lines that pertains to my_dictionary.
|
|
|
|
should_run_top_blocks.append(top_node)
|
|
|
|
smart_code += str(ast.get_source_segment(whole_file_content, top_node))
|
|
smart_code += "\n"
|
|
|
|
normalized_smart_result = normalize_lines(smart_code)
|
|
which_line_next = get_next_block_lineno(should_run_top_blocks)
|
|
return {
|
|
"normalized_smart_result": normalized_smart_result,
|
|
"which_line_next": which_line_next,
|
|
}
|
|
|
|
|
|
# Look at the last top block added, find lineno for the next upcoming block,
|
|
# This will be used in calculating lineOffset to move cursor in VS Code.
|
|
def get_next_block_lineno(which_line_next):
|
|
last_ran_lineno = int(which_line_next[-1].end_lineno)
|
|
next_lineno = int(which_line_next[-1].end_lineno)
|
|
|
|
for reverse_node in top_level_nodes:
|
|
if reverse_node.lineno > last_ran_lineno:
|
|
next_lineno = reverse_node.lineno
|
|
break
|
|
return next_lineno
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Content is being sent from the extension as a JSON object.
|
|
# Decode the data from the raw bytes.
|
|
stdin = sys.stdin if sys.version_info < (3,) else sys.stdin.buffer
|
|
raw = stdin.read()
|
|
contents = json.loads(raw.decode("utf-8"))
|
|
# Empty highlight means user has not explicitly selected specific text.
|
|
empty_highlight = contents.get("emptyHighlight", False)
|
|
|
|
# We also get the activeEditor selection start line and end line from the typescript VS Code side.
|
|
# Remember to add 1 to each of the received since vscode starts line counting from 0 .
|
|
vscode_start_line = contents["startLine"] + 1
|
|
vscode_end_line = contents["endLine"] + 1
|
|
|
|
# Send the normalized code back to the extension in a JSON object.
|
|
data = None
|
|
which_line_next = 0
|
|
|
|
if empty_highlight and contents.get("smartSendSettingsEnabled"):
|
|
result = traverse_file(
|
|
contents["wholeFileContent"],
|
|
vscode_start_line,
|
|
vscode_end_line,
|
|
not empty_highlight,
|
|
)
|
|
normalized = result["normalized_smart_result"]
|
|
which_line_next = result["which_line_next"]
|
|
if normalized == "deprecated":
|
|
data = json.dumps({"normalized": normalized})
|
|
else:
|
|
data = json.dumps(
|
|
{"normalized": normalized, "nextBlockLineno": result["which_line_next"]}
|
|
)
|
|
else:
|
|
normalized = normalize_lines(contents["code"])
|
|
data = json.dumps({"normalized": normalized})
|
|
|
|
stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
|
|
stdout.write(data.encode("utf-8"))
|
|
stdout.close()
|