-
-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New parser and AST for shell like syntax in command tasks
- This fixes various edge cases with handling of quotes within words. - Single quotes now disable parameter expansions (like in bash). Quoting, escaping, parameter expansions and glob patterns are now close interpreted in a way closer to how bash does it. Exceptions: - Glob patterns are limited to what the python standard library glob module supports. Invalid patterns are ignored - Glob patterns with no results are expanded to nothing - New lines are ignored As of this commit all existing tests pass but more tests are required for the command AST.
- Loading branch information
Showing
8 changed files
with
1,167 additions
and
327 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import re | ||
from glob import escape | ||
from typing import ( | ||
IO, | ||
TYPE_CHECKING, | ||
Dict, | ||
Generic, | ||
Iterable, | ||
Iterator, | ||
List, | ||
Literal, | ||
Mapping, | ||
Optional, | ||
Tuple, | ||
Type, | ||
TypeVar, | ||
Union, | ||
cast, | ||
) | ||
|
||
if TYPE_CHECKING: | ||
from .ast import Line, ParseConfig | ||
|
||
|
||
def parse_poe_cmd(source: Union[IO[str], str], config: Optional["ParseConfig"] = None): | ||
from .ast import Glob, ParseConfig, ParseCursor, PythonGlob, Script | ||
|
||
if not config: | ||
# Poe cmd task content differs from POSIX command lines in that new lines are | ||
# ignored (except in comments) and glob patterns are constrained to what the | ||
# python standard library glob module can support | ||
config = ParseConfig(substitute_nodes={Glob: PythonGlob}, line_seperators=";") | ||
|
||
if isinstance(source, str): | ||
cursor = ParseCursor(((char for char in source))) | ||
else: | ||
cursor = ParseCursor.from_file(source) | ||
|
||
return Script(cursor, config) | ||
|
||
|
||
def resolve_command_tokens( | ||
line: "Line", | ||
env: Mapping[str, str], | ||
config: Optional["ParseConfig"] = None, | ||
) -> Iterator[Tuple[str, bool]]: | ||
""" | ||
Generates a sequence of tokens, and indicates for each whether it includes glob | ||
patterns that are not escaped or quoted. In case there are glob patterns in the | ||
token, any escaped glob characters will have been escaped with []. | ||
""" | ||
from .ast import Glob, ParamExpansion, ParseConfig, ParseCursor, PythonGlob, Script | ||
|
||
if not config: | ||
config = ParseConfig(substitute_nodes={Glob: PythonGlob}) | ||
|
||
glob_pattern = re.compile(cast(Glob, config.resolve_node_cls(Glob)).PATTERN) | ||
|
||
def finalize_token(token_parts): | ||
""" | ||
Determine whether any parts of this token include an active glob. | ||
If so then apply glob escaping to all other parts. | ||
Join the result into a single token string. | ||
""" | ||
includes_glob = any(has_glob for part, has_glob in token_parts) | ||
token = "".join( | ||
( | ||
(escape(token_part) if not has_glob else token_part) | ||
for token_part, has_glob in token_parts | ||
) | ||
if includes_glob | ||
else (token_part for token_part, _ in token_parts) | ||
) | ||
token_parts.clear() | ||
return (token, includes_glob) | ||
|
||
for word in line: | ||
# For each token part indicate whether it is a glob | ||
for segment in word: | ||
token_parts: List[Tuple[str, bool]] = [] | ||
|
||
for element in segment: | ||
if isinstance(element, ParamExpansion): | ||
param_value = env.get(element.param_name, "") | ||
if not param_value: | ||
continue | ||
if segment.is_quoted: | ||
token_parts.append((env.get(element.param_name, ""), False)) | ||
else: | ||
# If the the param expansion it not quoted then: | ||
# - Whitespace inside a substituted param value results in | ||
# a word break, regardless of quotes or backslashes | ||
# - glob patterns should be evaluated | ||
|
||
if param_value[0].isspace() and token_parts: | ||
# param_value starts with a word break | ||
yield finalize_token(token_parts) | ||
|
||
param_words = ( | ||
(word, bool(glob_pattern.match(word))) | ||
for word in param_value.split() | ||
) | ||
|
||
token_parts.append(next(param_words)) | ||
|
||
for param_word in param_words: | ||
if token_parts: | ||
yield finalize_token(token_parts) | ||
token_parts.append(next(param_words)) | ||
|
||
if param_value[-1].isspace() and token_parts: | ||
# param_value ends with a word break | ||
yield finalize_token(token_parts) | ||
|
||
elif isinstance(element, Glob): | ||
token_parts.append((element.content, True)) | ||
|
||
else: | ||
token_parts.append((element.content, False)) | ||
|
||
if token_parts: | ||
yield finalize_token(token_parts) |
Oops, something went wrong.