Source code for cli.preprocessor

""" A module with Preprocessor responsibility.

This module holds the Preprocessor - an entity
that accepts raw string as input, and expands
variables (in the form ``$x``) according to
an environment.

Preprocessing is a common action in programming languages,
so we use it in interpreting Shell commands as well.
"""
import logging
import enum


@enum.unique
class _PreprocessorAutomataState(enum.Enum):
    INITIAL_STATE = 1
    INSIDE_DOUBLE_QUOTES = 2
    INSIDE_SINGLE_QUOTES = 3
    MET_DOLLAR = 4
    READING_VAR_NAME = 5
    MET_DOLLAR_INSIDE_DOUBLE_QUOTES = 6
    READING_VAR_NAME_INSIDE_DOUBLE_QUOTES = 7


[docs]class Preprocessor: """A static class for preprocessing a shell input string. Given a raw string, we want to preprocess it, i.e. substitue things like `$x` into previously assigned value of `x`. """ @staticmethod
[docs] def substitute_environment_variables(raw_str, env): """Do a one-time pass over string and substitute `$x`-like patterns. Args: raw_str (str): an initial, unprocessed string; env (:class:`environment.Environment`): an environment in which this string must be expanded. Returns: str. The processed string. All substrings in single quotes are left untouched. Inside double quotes, nonempty substrings starting with `$` sign and ending in - space symbol - double or single quotes - `$` sign are treated as variable names. The values for these variables are queried from the input `env`. Outside any quotation, similar rules apply: nonempty substrings that start with `$` and end either - before the next space character - before the other `$` sign - at the end of the input string - at the beginning of quotes (single or double) are treated as variable names. Example: If the environment contains:: x=1 long_name=qwe Then the following substitutions apply (nonexistant variables are substituted by an empty string):: echo "123$x" --> echo "1231" echo "123$x " --> echo "1231 " echo "123$xy " --> echo "123 " echo "123$x dfg" --> echo "1231 dfg" echo $long_name'123' --> echo qwe'123' echo $long_name2'123' --> echo '123' echo $x '123' --> echo 1 '123' echo $x"qwe" --> echo 1"qwe" echo $x$long_name --> echo 1qwe echo `$x`"$x" --> echo `$x`"1" """ processed_prefix = '' unprocessed_suffix = raw_str autom_state = _PreprocessorAutomataState.INITIAL_STATE while unprocessed_suffix: next_var_info = Preprocessor._find_next_var(unprocessed_suffix, autom_state) if next_var_info: (nonvar_part, var_name, unprocessed_part, autom_state) = next_var_info processed_prefix += nonvar_part processed_prefix += env.get_var(var_name) unprocessed_suffix = unprocessed_part else: processed_prefix += unprocessed_suffix unprocessed_suffix = '' logging.info('Preprocessor: """{}""" processed ' \ 'to """{}"""'.format(raw_str, processed_prefix)) return processed_prefix
@staticmethod def _find_next_var(raw_str, autom_state): """Return information about the leftmost variable in the string. Args: raw_str (str): an unprocessed string; autom_state (_PreprocessorAutomataState): a current state of the automata. Returns: If `raw_str` contains a variable, return tuple <part of the string before the variable, variable name, part of the string after the variable, automata state> Otherwise, return None. """ read_idx = 0 sz_raw = len(raw_str) var_name = '' while read_idx < sz_raw: cur_char = raw_str[read_idx] if autom_state == _PreprocessorAutomataState.INITIAL_STATE: if cur_char == '"': autom_state = _PreprocessorAutomataState.INSIDE_DOUBLE_QUOTES elif cur_char == "'": autom_state = _PreprocessorAutomataState.INSIDE_SINGLE_QUOTES elif cur_char == '$': autom_state = _PreprocessorAutomataState.MET_DOLLAR else: pass elif autom_state == _PreprocessorAutomataState.INSIDE_DOUBLE_QUOTES: if cur_char == '"': autom_state = _PreprocessorAutomataState.INITIAL_STATE elif cur_char == '$': autom_state = _PreprocessorAutomataState.MET_DOLLAR_INSIDE_DOUBLE_QUOTES else: pass elif autom_state == _PreprocessorAutomataState.INSIDE_SINGLE_QUOTES: if cur_char == "'": autom_state = _PreprocessorAutomataState.INITIAL_STATE else: pass elif autom_state == _PreprocessorAutomataState.MET_DOLLAR: if cur_char == '"': autom_state = _PreprocessorAutomataState.INSIDE_DOUBLE_QUOTES elif cur_char == "'": autom_state = _PreprocessorAutomataState.INSIDE_SINGLE_QUOTES elif cur_char == '$': pass elif cur_char.isspace(): autom_state = _PreprocessorAutomataState.INITIAL_STATE else: var_name = cur_char autom_state = _PreprocessorAutomataState.READING_VAR_NAME elif autom_state == _PreprocessorAutomataState.READING_VAR_NAME: if cur_char in ('"', "'", '$') or cur_char.isspace(): return (raw_str[:read_idx - len(var_name) - 1], var_name, raw_str[read_idx:], _PreprocessorAutomataState.INITIAL_STATE) else: var_name += cur_char elif autom_state == _PreprocessorAutomataState.MET_DOLLAR_INSIDE_DOUBLE_QUOTES: if cur_char == '"': autom_state = _PreprocessorAutomataState.INITIAL_STATE elif cur_char == '$': pass elif cur_char.isspace(): autom_state = _PreprocessorAutomataState.INSIDE_DOUBLE_QUOTES else: var_name = cur_char autom_state = _PreprocessorAutomataState.READING_VAR_NAME_INSIDE_DOUBLE_QUOTES elif autom_state == _PreprocessorAutomataState.READING_VAR_NAME_INSIDE_DOUBLE_QUOTES: if cur_char in ('"', "'", '$') or cur_char.isspace(): return (raw_str[:read_idx - len(var_name) - 1], var_name, raw_str[read_idx:], _PreprocessorAutomataState.INSIDE_DOUBLE_QUOTES) else: var_name += cur_char read_idx += 1 # Automata can't be in `READING_VAR_NAME_INSIDE_DOUBLE_QUOTES` state # if the input string is valid. Preprocessor won't check for validity. if autom_state == _PreprocessorAutomataState.READING_VAR_NAME: return (raw_str[:read_idx - len(var_name) - 1], var_name, '', _PreprocessorAutomataState.INITIAL_STATE) return None